Ejemplo n.º 1
0
 def test_boilerpipe_extractor_str(self):
     r = WShingle(2)
     docs = list(r.process([self.d2]))
     shingles = list(docs[0])
     self.assertEqual(len(shingles), 3)
     self.assertItemsEqual(shingles, [4511874163119075276, 586875170268770749, 1339662791857901318])
Ejemplo n.º 2
0
 def test_boilerpipe_extractor_with_attribute(self):
     r = WShingle(2, "w_shingles")
     docs = list(r.process(self.docs))
     shingles = list(docs[0].document["w_shingles"])
     self.assertEqual(len(shingles), 3)
     self.assertItemsEqual(shingles, [4511874163119075276, 586875170268770749, 1339662791857901318])