def test_trigram(self): expected = TestNGramTransformer.tokensA + ['a+b', 'b+c'] + [ 'a+b+c' ] + TestNGramTransformer.tokensB + ['A+B', 'B+C'] + ['A+B+C'] t = NGramTransformer(Sentinel(), n=3) self.assertListEqual(expected, list(t.n_gram(TestNGramTransformer.segments)))
def test_bigram(self): expected = TestNGramTransformer.tokensA + [ 'a+b', 'b+c' ] + TestNGramTransformer.tokensB + [ 'A+B', 'B+C' ] t = NGramTransformer(Sentinel(), n=2) self.assertListEqual(expected, list(t.n_gram(TestNGramTransformer.segments)))
def test_unigram(self): expected = TestNGramTransformer.tokensA + TestNGramTransformer.tokensB t = NGramTransformer(Sentinel(), n=1) self.assertListEqual(expected, list(t.n_gram(TestNGramTransformer.segments)))