Python CountVectorizer.analyzer 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: sklearn.feature_extraction.text

클래스/타입: CountVectorizer

메소드/함수: analyzer

hotexamples.com에서의 예제들: 4

Python CountVectorizer.analyzer - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 sklearn.feature_extraction.text.CountVectorizer.analyzer에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

CountVectorizer(30)

_validate_vocabulary(30)

fit_transform(30)

fit(30)

build_tokenizer(30)

build_analyzer(30)

get_stop_words(30)

get_params(21)

get_feature_names_out(15)

build_preprocessor(13)

__init__(10)

get_feature_names(9)

dictionary_freeze(6)

count(4)

analyzer(4)

fixed_vocabulary(3)

astype(3)

_count_vocab(2)

copy(2)

fit_trainsform(2)

get_features_names(2)

append(2)

_word_ngrams(2)

get_feature_name(1)

getSenVec(1)

_sort_features(1)

get_features(1)

get_sentence_vector(1)

get_shape(1)

getOutputCol(1)

fit_Transform(1)

fit_trasform(1)

fit_transfrom(1)

fit_transforn(1)

__repr__(1)

fir_transform(1)

__dict__(1)

extract_ngrams(1)

delete_temporary_training_data(1)

count_features(1)

_limit_features(1)

fir(1)

예제 #1

파일 보기

파일: sample.py 프로젝트: wararaki718/scrapbox2

def main():
    # modeling
    vectorizer = CountVectorizer(analyzer=space_analyzer)
    vectorizer.fit(corpus)
    print(vectorizer.get_feature_names())
    print('')

    print('[space vectorizer]')
    check(vectorizer)

    print('[comma vectorizer]')
    vectorizer.analyzer = comma_analyzer
    check(vectorizer)

    print('[space vectorizer]')
    vectorizer.analyzer = space_analyzer
    check(vectorizer)

    return 0

예제 #2

파일 보기

def build_matrix_count(bmt__document_list, input_type='filename', with_analyzer=False, amr_tool=None):
    """

    :param input_type:
    :param bmt__document_list:
    :return:
    """

    vectorizer = CountVectorizer(input=input_type, dtype=np.float64)

    analyzer = vectorizer.build_analyzer()

    def stemm(doc):
        stemmer = PorterStemmer()
        return (stemmer.stem(word) for word in analyzer(doc))

    def nodes(doc):

        graph_str = amr_tool.amr_graph_reader(doc)

        graph_list = amr_tool.parse_graph(graph_str)

        _nodes = []

        for graph in graph_list:

            _nodes.extend(graph.nodes)

        return _nodes

    if with_analyzer:

        vectorizer.analyzer = stemm
    else:

        vectorizer.analyzer = nodes

    term_document_matrix = vectorizer.fit_transform(bmt__document_list)

    vocabulary = vectorizer.vocabulary_

    return term_document_matrix, vocabulary

예제 #3

파일 보기

def generate_bag_of_words(bdtm__document_list, input_type='filename'):
    """

    :param input_type:
    :param bdtm__document_list:
    :return:
    """

    vectorizer = CountVectorizer(input=input_type)

    analyzer = vectorizer.build_analyzer()

    def stemm(doc):
        stemmer = PorterStemmer()
        return (stemmer.stem(word) for word in analyzer(doc))

    vectorizer.analyzer = stemm

    vectorizer.fit(bdtm__document_list)

    vocabulary = vectorizer.vocabulary_

    return vocabulary

예제 #4

파일 보기

    def generate_bag_of_words(self, generate_bow__path_list):
        """

        :param generate_bow__path_list:
        :return:
        """

        vectorizer = CountVectorizer(input='filename')

        analyzer = vectorizer.build_analyzer()

        def stemm(doc):
            stemmer = PorterStemmer()
            return (stemmer.stem(word) for word in analyzer(doc))

        vectorizer.analyzer = stemm

        term_document_matrix = vectorizer.fit_transform(
            generate_bow__path_list)

        vocabulary = vectorizer.vocabulary_

        return term_document_matrix, vocabulary