def __init__(self, large_file=False):
     if large_file:
         HashingVectorizer.__init__(self)
     else:
         # Over ride the built in string processing by assigning the
         # tokenizer, preprocessor and lowercase parameters as below.
         CountVectorizer.__init__(self, tokenizer=identity,
                                  preprocessor=None,
                                  lowercase=False)
예제 #2
0
 def __init__(self, chunksize=100000, **kwargs):
     self.chunksize = chunksize
     HashingVectorizer.__init__(self, **kwargs)