Beispiel #1
0
 def parse(self, text):
     tokens = re.split('\s+', text)
     for wnum in xrange(0, len(tokens)):
         for ng_ord in xrange(1, self.max_order + 1):
             if wnum + ng_ord < len(tokens):
                 words_tuple = tuple(tokens[wnum:wnum + ng_ord])
                 ngram = self.storage_.get_n_gram(words_tuple)
                 if ngram == None:
                     ngram = Ngram(1)
                 else:
                     ngram.count = ngram.count + 1
                 self.storage_.set_n_gram(words_tuple, ngram)
Beispiel #2
0
 def mock_ngram(self, string, count, frequency, sig_score):
     ngram = Ngram(string)
     ngram.count = count
     ngram.frequency = frequency
     ngram.sig_score = sig_score
     return ngram