Beispiel #1
0
def featurize_file(filename):
    # Create the file handler:
    fi_handler = open(filename, 'r')

    # # Apply the hashing trick
    # hf = FeatureHasher(input_type='string',non_negative=True)

    X = crfutils.get_features(feature_extractor, fields=fields, sep=' ', fi=fi_handler)
    # X = crfutils.get_features(feature_extractor, fields=fields, sep=separator)
    # doc = []
    Y = set()
    for x in X:
        for entry in x:
            Y.add(entry['y'])
            # doc.append(entry['F'])
    # return X, hf.transform(doc)
    return X, list(Y)
Beispiel #2
0
    X = crfutils.get_features(feature_extractor, fields=fields, sep=' ', fi=fi_handler)
    # X = crfutils.get_features(feature_extractor, fields=fields, sep=separator)
    # doc = []
    Y = set()
    for x in X:
        for entry in x:
            Y.add(entry['y'])
            # doc.append(entry['F'])
    # return X, hf.transform(doc)
    return X, list(Y)


if __name__ == '__main__':
    # pass
    # crfutils.main(feature_extractor, fields=fields, sep=separator)
    X = crfutils.get_features(feature_extractor, fields=fields, sep=separator)

    # Apply the hashing trick
    hf = FeatureHasher(input_type='string',non_negative=True)
    # # List of dictionaries:
    # x_set = set()
    # # Iterate over each of the tokens features:

    doc = []
    for x in X:
        # sg_tv.transform(x)
        for entry in x:
            # print entry['F']
            doc+=entry['F']
        # vec = sg_tv.transform(doc)
        print hf.transform(doc)