Python CountVectorizer.analyzer Examples

Programming Language: Python

Namespace/Package Name: sklearn.feature_extraction.text

Class/Type: CountVectorizer

Method/Function: analyzer

Examples at hotexamples.com: 4

Python CountVectorizer.analyzer - 4 examples found. These are the top rated real world Python examples of sklearn.feature_extraction.text.CountVectorizer.analyzer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

CountVectorizer(30)

_validate_vocabulary(30)

fit_transform(30)

fit(30)

build_tokenizer(30)

build_analyzer(30)

get_stop_words(30)

get_params(21)

get_feature_names_out(15)

build_preprocessor(13)

__init__(10)

get_feature_names(9)

dictionary_freeze(6)

count(4)

analyzer(4)

fixed_vocabulary(3)

astype(3)

_count_vocab(2)

copy(2)

fit_trainsform(2)

get_features_names(2)

append(2)

_word_ngrams(2)

get_feature_name(1)

getSenVec(1)

_sort_features(1)

get_features(1)

get_sentence_vector(1)

get_shape(1)

getOutputCol(1)

fit_Transform(1)

fit_trasform(1)

fit_transfrom(1)

fit_transforn(1)

__repr__(1)

fir_transform(1)

__dict__(1)

extract_ngrams(1)

delete_temporary_training_data(1)

count_features(1)

_limit_features(1)

fir(1)

Example #1

Show file

File: sample.py Project: wararaki718/scrapbox2

def main():
    # modeling
    vectorizer = CountVectorizer(analyzer=space_analyzer)
    vectorizer.fit(corpus)
    print(vectorizer.get_feature_names())
    print('')

    print('[space vectorizer]')
    check(vectorizer)

    print('[comma vectorizer]')
    vectorizer.analyzer = comma_analyzer
    check(vectorizer)

    print('[space vectorizer]')
    vectorizer.analyzer = space_analyzer
    check(vectorizer)

    return 0

Example #2

Show file

def build_matrix_count(bmt__document_list, input_type='filename', with_analyzer=False, amr_tool=None):
    """

    :param input_type:
    :param bmt__document_list:
    :return:
    """

    vectorizer = CountVectorizer(input=input_type, dtype=np.float64)

    analyzer = vectorizer.build_analyzer()

    def stemm(doc):
        stemmer = PorterStemmer()
        return (stemmer.stem(word) for word in analyzer(doc))

    def nodes(doc):

        graph_str = amr_tool.amr_graph_reader(doc)

        graph_list = amr_tool.parse_graph(graph_str)

        _nodes = []

        for graph in graph_list:

            _nodes.extend(graph.nodes)

        return _nodes

    if with_analyzer:

        vectorizer.analyzer = stemm
    else:

        vectorizer.analyzer = nodes

    term_document_matrix = vectorizer.fit_transform(bmt__document_list)

    vocabulary = vectorizer.vocabulary_

    return term_document_matrix, vocabulary

Example #3

Show file

def generate_bag_of_words(bdtm__document_list, input_type='filename'):
    """

    :param input_type:
    :param bdtm__document_list:
    :return:
    """

    vectorizer = CountVectorizer(input=input_type)

    analyzer = vectorizer.build_analyzer()

    def stemm(doc):
        stemmer = PorterStemmer()
        return (stemmer.stem(word) for word in analyzer(doc))

    vectorizer.analyzer = stemm

    vectorizer.fit(bdtm__document_list)

    vocabulary = vectorizer.vocabulary_

    return vocabulary

Example #4

Show file

    def generate_bag_of_words(self, generate_bow__path_list):
        """

        :param generate_bow__path_list:
        :return:
        """

        vectorizer = CountVectorizer(input='filename')

        analyzer = vectorizer.build_analyzer()

        def stemm(doc):
            stemmer = PorterStemmer()
            return (stemmer.stem(word) for word in analyzer(doc))

        vectorizer.analyzer = stemm

        term_document_matrix = vectorizer.fit_transform(
            generate_bow__path_list)

        vocabulary = vectorizer.vocabulary_

        return term_document_matrix, vocabulary