Example #1
0
 def test_bigram(self):
     expected = TestNGramTransformer.tokensA + [
         'a+b', 'b+c'
     ] + TestNGramTransformer.tokensB + [
         'A+B', 'B+C'
     ]
     t = NGramTransformer(Sentinel(), n=2)
     self.assertListEqual(expected,
                          list(t.n_gram(TestNGramTransformer.segments)))
Example #2
0
    def test_extract(self):
        row = [TestNGramTransformer.segments, []]
        expected = [TestNGramTransformer.tokensA +
                    TestNGramTransformer.tokensB, ['~void~']]
        t = NGramTransformer(Sentinel(), n=1)

        for i in range(len(row)):
            t._extract(row, i)

        self.assertListEqual(expected, row)
Example #3
0
 def test_unigram(self):
     expected = TestNGramTransformer.tokensA + TestNGramTransformer.tokensB
     t = NGramTransformer(Sentinel(), n=1)
     self.assertListEqual(expected,
                          list(t.n_gram(TestNGramTransformer.segments)))