def test_boilerpipe_extractor_str(self): r = WShingle(2) docs = list(r.process([self.d2])) shingles = list(docs[0]) self.assertEqual(len(shingles), 3) self.assertItemsEqual(shingles, [4511874163119075276, 586875170268770749, 1339662791857901318])
def test_boilerpipe_extractor_with_attribute(self): r = WShingle(2, "w_shingles") docs = list(r.process(self.docs)) shingles = list(docs[0].document["w_shingles"]) self.assertEqual(len(shingles), 3) self.assertItemsEqual(shingles, [4511874163119075276, 586875170268770749, 1339662791857901318])