def func(text): # the ignore_case ignores case when removing stopwords, # but does not return the tokens lower cased!!! text = remove_stopwords(text.lower().split(), ignore_case=True, remove_punc=True) return n_gram_strings(list(text), n)
def func(text): # the ignore_case ignores case when removing stopwords, # but does not return the tokens lower cased!!! text = [ token.lower() for token in text.split() if text not in string.punctuation ] return n_gram_strings(text, n)
def func(text): return n_gram_strings(list(text), n)
def func(text): return n_gram_strings(text.split(), n)
def test_n_gram_strings_bigram_padded(self): ng = n_gram_strings(self.tokens, 2, pad_size=1) self.assertEqual(ng, [ "_ a", "a nice", "nice sentence", "sentence _"] )
def test_n_gram_strings_bigram(self): ng = n_gram_strings(self.tokens, 2) self.assertEqual(ng, [ "a nice", "nice sentence" ])
def test_n_gram_strings_bigram_padded(self): ng = n_gram_strings(self.tokens, 2, pad_size=1) self.assertEqual(ng, ["_ a", "a nice", "nice sentence", "sentence _"])
def test_n_gram_strings_bigram(self): ng = n_gram_strings(self.tokens, 2) self.assertEqual(ng, ["a nice", "nice sentence"])