def test_boilerpipe_extractor_strings(self): r = BoilerpipeExtractor() docs = list(r.process([self.d1["body"]])) words = docs[0] self.assertEqual(len(words), 52) self.assertEqual(words[32], ".") self.assertEqual(words[33], "\n") self.assertEqual(words[34], "A")
def test_boilerpipe_extractor_docs(self): r = BoilerpipeExtractor() docs = list(r.process(self.docs)) words = list(docs[0].words()) self.assertEqual(len(words), 10) self.assertEqual(words[5], "extractor") self.assertEqual(words[6], ".") self.assertEqual(words[7], "another")