Beispiel #1
0
def get_lexicon_count_pipeline(tokenizer):

    lexpipe = skpipeline.Pipeline([
        ('lexfeatures', CountBasedTransformer(tokenizer)),
        ('lexvect', dv.DictVectorizer()),
    ])
    return lexpipe
def get_named_entity_weight_pipeline(language):

    ntransformer = NNEsTransformer(language)
    nvect = dv.DictVectorizer()
    ne_rate_pipe = skpipeline.Pipeline([
        ('neratefeat', ntransformer),
        ('neratevect', nvect),
    ])
    return ne_rate_pipe
def get_keyword_pipeline(word):

    ttransformer = TermPresenceTransformer(word)
    tvect = dv.DictVectorizer()
    wordpipe = skpipeline.Pipeline([
        ('wordpresfeat', ttransformer),
        ('wordpresvect', tvect),
    ])
    return wordpipe
def get_polylglot_polarity_count_pipe(lang):

    ptransformer = PolyglotPolarityCountTransformer(lang)
    tvect = dv.DictVectorizer()
    polaritypipe = skpipeline.Pipeline([
        ('polyglotpolaritycfeat', ptransformer),
        ('polyglotpolaritycvect', tvect),
    ])

    return polaritypipe
def get_polylglot_polarity_value_pipe(lang):

    ptransformer = PolyglotPolarityValueTransformer(lang)
    tvect = dv.DictVectorizer()
    polaritypipe = skpipeline.Pipeline([
        ('polyglotpolarityvfeat', ptransformer),
        ('polyglotpolarityvvect', tvect),
    ])

    return polaritypipe
Beispiel #6
0
def vectorizer():

    dict_vec = dict_vectorizer.DictVectorizer(sparse=False)

    data = dict_vec.fit_transform([{
        'name': 'jiujue',
        'age': 10
    }, {
        'name': 'mmp',
        'age': 11
    }, {
        'name': 'sam',
        'age': 12
    }])

    print(dict_vec.get_feature_names())

    print(data)
    print(type(data))

    pass