Exemple #1
0
 def func(text):
     # the ignore_case ignores case when removing stopwords,
     # but does not return the tokens lower cased!!!
     text = remove_stopwords(text.lower().split(), 
                             ignore_case=True, 
                             remove_punc=True)
     return n_gram_strings(list(text), n)
Exemple #2
0
 def func(text):
     # the ignore_case ignores case when removing stopwords,
     # but does not return the tokens lower cased!!!
     text = [ token.lower()
              for token in text.split()
              if text not in string.punctuation ]
     return n_gram_strings(text, n)
Exemple #3
0
 def func(text):
     return n_gram_strings(list(text), n)
Exemple #4
0
 def func(text):
     return n_gram_strings(text.split(), n)
Exemple #5
0
 def test_n_gram_strings_bigram_padded(self):
     ng = n_gram_strings(self.tokens, 2, pad_size=1)
     self.assertEqual(ng, [ "_ a",
                            "a nice", 
                            "nice sentence",
                            "sentence _"] )
Exemple #6
0
 def test_n_gram_strings_bigram(self):
     ng = n_gram_strings(self.tokens, 2)
     self.assertEqual(ng, [ "a nice", 
                            "nice sentence" ])
Exemple #7
0
 def test_n_gram_strings_bigram_padded(self):
     ng = n_gram_strings(self.tokens, 2, pad_size=1)
     self.assertEqual(ng, ["_ a", "a nice", "nice sentence", "sentence _"])
Exemple #8
0
 def test_n_gram_strings_bigram(self):
     ng = n_gram_strings(self.tokens, 2)
     self.assertEqual(ng, ["a nice", "nice sentence"])