def test_extract(self): row = [TestNGramTransformer.segments, []] expected = [ TestNGramTransformer.tokensA + TestNGramTransformer.tokensB, ['~void~'] ] t = NGramTransformer(Sentinel(), n=1) for i in range(len(row)): t._extract(row, i) self.assertListEqual(expected, row)
def test_unigram(self): expected = TestNGramTransformer.tokensA + TestNGramTransformer.tokensB t = NGramTransformer(Sentinel(), n=1) self.assertListEqual(expected, list(t.n_gram(TestNGramTransformer.segments)))
def test_setup(self): s = Sentinel() t = NGramTransformer(s) self.assertEqual(s, t.rows) self.assertEqual(1, t.N) self.assertEqual(None, t.text_columns)