Exemple #1
0
def token_contexts_to_features(token_contexts, feature_extractors, workers=1):
    #single thread
    if workers == 1:
         return {token: np.vstack( [np.hstack([map_feature_extractor((context, extractor)) for extractor in feature_extractors] ) for context in contexts]) for token, contexts in token_contexts.items()}

    #multiple threads
    else:
        #resulting object
        res_dict = {}
        pool = multi.Pool(workers)
        print("Feature extractors: ", feature_extractors)
        for token, contexts in token_contexts.items():
            logger.info('Multithreaded - Extracting contexts for token: ' + token + ' -- with ' + str(len(contexts)) + ' contexts...')
            #each context is paired with all feature extractors
#            context_list = [ (cont, feature_extractors) for cont in contexts ]
            extractors_output = []
            for extractor in feature_extractors:
                context_list = [(cont, extractor) for cont in contexts]
                extractors_output.append(np.vstack(pool.map(map_feature_extractor, context_list)))
            res_dict[token] = np.hstack(extractors_output)

        return res_dict
Exemple #2
0
def token_contexts_to_features_categorical(token_contexts, feature_extractors, workers=1):
    #single thread
    if workers == 1:
        return {token: [[x for a_list in [map_feature_extractor((context, extractor)) for extractor in feature_extractors] for x in a_list ] for context in contexts] for token, contexts in token_contexts.items()}

    #multiple threads
    else:
        #resulting object
        res_dict = {}
        pool = multi.Pool(workers)
        print("Feature extractors: ", feature_extractors)
        for token, contexts in token_contexts.items():
            logger.info('Multithreaded - Extracting categorical contexts for token: ' + token + ' -- with ' + str(len(contexts)) + ' contexts...')
            #each context is paired with all feature extractors
            extractors_output = []
            for extractor in feature_extractors:
                context_list = [(cont, extractor) for cont in contexts]
                extractors_output.append( pool.map(map_feature_extractor, context_list) )
            # np.hstack and np.vstack can't be used because lists have objects of different types
            intermediate =  [ [x[i] for x in extractors_output] for i in range(len(extractors_output[0])) ]
            res_dict[token] = [ flatten(sl) for sl in intermediate ]

        return res_dict