Python CountVectorizer.init Examples

Programming Language: Python

Namespace/Package Name: sklearn.feature_extraction.text

Class/Type: CountVectorizer

Method/Function: __init__

Examples at hotexamples.com: 14

Python CountVectorizer.__init__ - 14 examples found. These are the top rated real world Python examples of sklearn.feature_extraction.text.CountVectorizer.__init__ extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

CountVectorizer(30)

_validate_vocabulary(30)

fit_transform(30)

fit(30)

build_tokenizer(30)

build_analyzer(30)

get_stop_words(30)

get_params(21)

get_feature_names_out(15)

build_preprocessor(13)

__init__(10)

get_feature_names(9)

dictionary_freeze(6)

count(4)

analyzer(4)

fixed_vocabulary(3)

astype(3)

_count_vocab(2)

copy(2)

fit_trainsform(2)

get_features_names(2)

append(2)

_word_ngrams(2)

get_feature_name(1)

getSenVec(1)

_sort_features(1)

get_features(1)

get_sentence_vector(1)

get_shape(1)

getOutputCol(1)

fit_Transform(1)

fit_trasform(1)

fit_transfrom(1)

fit_transforn(1)

__repr__(1)

fir_transform(1)

__dict__(1)

extract_ngrams(1)

delete_temporary_training_data(1)

count_features(1)

_limit_features(1)

fir(1)

Example #1

Show file

 def set_params(self, **kwargs):
     CountVectorizer.set_params(self, **kwargs)
     CountVectorizer.__init__(self,
                              preprocessor=get_preprocessor(
                                  self.column, self.size, self.terminator),
                              ngram_range=(1, self.size),
                              analyzer='char',
                              binary=self.binary)

Example #2

Show file

File: crfsuite_utils.py Project: 2dpodcast/misc-nlp

 def set_params(self, **kwargs):
     CountVectorizer.set_params(self, **kwargs)
     CountVectorizer.__init__(self,
                              preprocessor=get_preprocessor(self.column,
                                                            self.size,
                                                            self.terminator),
                              ngram_range=(1, self.size),
                              analyzer='char',
                              binary=self.binary)

Example #3

Show file

    def __init__(self, lang, **kwargs):
        CountVectorizer.__init__(self, kwargs)

        try:
            self.stemmer = SnowballStemmer(lang.lower()).stem
            self.vect = CountVectorizer()
            self.analyzer = self.analyzer_nltk
        except ValueError:
            pass

Example #4

Show file

File: Corpus_Vectorizer.py Project: aham1203/fictional-barnacle

 def __init__(self, large_file=False):
     if large_file:
         HashingVectorizer.__init__(self)
     else:
         # Over ride the built in string processing by assigning the
         # tokenizer, preprocessor and lowercase parameters as below.
         CountVectorizer.__init__(self, tokenizer=identity,
                                  preprocessor=None,
                                  lowercase=False)

Example #5

Show file

 def __init__(self, column, binary=False, size=3, terminator='$'):
     self.column = column
     self.size = size
     self.terminator = terminator
     CountVectorizer.__init__(self,
                              preprocessor=get_preprocessor(
                                  self.column, self.size, self.terminator),
                              ngram_range=(1, size),
                              analyzer='char',
                              binary=binary)

Example #6

Show file

File: crfsuite_utils.py Project: 2dpodcast/misc-nlp

 def __init__(self, column, binary=False, size=3, terminator='$'):
     self.column = column
     self.size = size
     self.terminator = terminator
     CountVectorizer.__init__(self,
                              preprocessor=get_preprocessor(self.column,
                                                            self.size,
                                                            self.terminator),
                              ngram_range=(1, size),
                              analyzer='char',
                              binary=binary)

Example #7

Show file

File: _manual.py Project: AymericShini/hacktoberfest_ML

    def __init__(
            self,
            lowercase: Boolean(),
            stopwords_remove: Boolean(),
            binary: Boolean(),
            inner_tokenizer: algorithm(Sentence(), List(Word())),
            inner_stemmer: algorithm(Word(), Stem()),
            inner_stopwords: algorithm(List(Word()), List(Word())),
    ):
        self.stopwords_remove = stopwords_remove
        self.inner_tokenizer = inner_tokenizer
        self.inner_stemmer = inner_stemmer
        self.inner_stopwords = inner_stopwords

        SklearnTransformer.__init__(self)
        _CountVectorizer.__init__(self, lowercase=lowercase, binary=binary)

Example #8

Show file

    def __init__(self,
                 input="content",
                 encoding="utf-8",
                 decode_error="strict",
                 strip_accents=None,
                 lowercase=True,
                 preprocessor=None,
                 tokenizer=None,
                 stop_words=None,
                 token_pattern="(?u)\b\w\w+\b",
                 ngram_range=(1, 1),
                 analyzer="word",
                 max_df=1.0,
                 min_df=1,
                 max_features=None,
                 vocabulary=None,
                 binary=False,
                 dtype=numpy.int64,
                 progress_bar_resolution_seconds=.333,
                 progress_bar_clear_when_done=False):

        CountVectorizer.__init__(self,
                                 input=input,
                                 encoding=encoding,
                                 decode_error=decode_error,
                                 strip_accents=strip_accents,
                                 lowercase=lowercase,
                                 preprocessor=preprocessor,
                                 tokenizer=tokenizer,
                                 stop_words=stop_words,
                                 token_pattern=token_pattern,
                                 ngram_range=ngram_range,
                                 analyzer=analyzer,
                                 max_df=max_df,
                                 min_df=min_df,
                                 max_features=max_features,
                                 vocabulary=vocabulary,
                                 binary=binary,
                                 dtype=dtype)

        ProgressBarVectorizer.__init__(self, progress_bar_resolution_seconds,
                                       progress_bar_clear_when_done)

Example #9

Show file

 def __init__(self, stopwords_list=None, max_features=None):
     CountVectorizer.__init__(self,analyzer="word",\
                              strip_accents="unicode",\
                              stop_words=stopwords_list,\
                              max_features=max_features)
     self.en_lemmatizer = nltk.stem.WordNetLemmatizer()

Example #10

Show file

File: sci.py Project: escherba/nltk-trainer

	def __init__(self, analyzer=BOWAnalyzer, max_df=None):
		CountVectorizer.__init__(self, analyzer=analyzer, max_df=max_df)

Example #11

Show file

 def __init__(self,
              n_grams=1,
              first_last_sentence_only=False):
     CountVectorizer.__init__(self, ngram_range=(n_grams, n_grams))
     self.first_last_sentence_only = first_last_sentence_only
     self.term_dict = {}

Example #12

Show file

 def __init__(self, **kwargs):
     CountVectorizer.__init__(self, **kwargs)

Example #13

Show file

File: Corpus_Vectorizer.py Project: aham1203/fictional-barnacle

 def __init__(self):
     CountVectorizer.__init__(self, binary=True,
                              tokenizer=identity,
                              preprocessor=None,
                              lowercase=False)

Example #14

Show file

File: sk_feature_extractors.py Project: andrely/NLI2013-submission

 def __init__(self, window=None, sentence_splitter="\n", directional=False, **args):
     self.window = window
     self.sentence_splitter = sentence_splitter
     self.directional = directional
     CountVectorizer.__init__(self, **args)

Python CountVectorizer.__init__ Examples

Python CountVectorizer.init Examples