def test_stringio(self): # Assert loading data from file-like strings. p = text.Parser(lexicon={"to": "TO", "saw": "VBD"}, morphology=StringIO(u"NN s fhassuf 1 NNS x"), context=StringIO(u"VBD VB PREVTAG TO")) self.assertEqual(p.parse("cats"), "cats/NNS/B-NP/O") self.assertEqual(p.parse("to saw"), "to/TO/B-VP/O saw/VB/I-VP/O")
def test_find_chunks(self): # Assert the default phrase chunker and its optional parameters. p = text.Parser() v1 = p.find_chunks([["", "DT"], ["", "JJ"], ["", "NN"]], language="en") v2 = p.find_chunks([["", "DT"], ["", "JJ"], ["", "NN"]], language="es") v3 = p.find_chunks([["", "DT"], ["", "NN"], ["", "JJ"]], language="en") v4 = p.find_chunks([["", "DT"], ["", "NN"], ["", "JJ"]], language="es") self.assertEqual(v1, [["", "DT", "B-NP", "O"], ["", "JJ", "I-NP", "O"], ["", "NN", "I-NP", "O"]]) self.assertEqual(v2, [["", "DT", "B-NP", "O"], ["", "JJ", "I-NP", "O"], ["", "NN", "I-NP", "O"]]) self.assertEqual(v3, [["", "DT", "B-NP", "O"], ["", "NN", "I-NP", "O"], ["", "JJ", "B-ADJP", "O"]]) self.assertEqual(v4, [["", "DT", "B-NP", "O"], ["", "NN", "I-NP", "O"], ["", "JJ", "I-NP", "O"]]) print "pattern.text.Parser.find_chunks()"
def test_find_tokens(self): # Assert the default tokenizer and its optional parameters. p = text.Parser() v1 = p.find_tokens(u"Schrödinger's cat is alive!", punctuation="", replace={}) v2 = p.find_tokens(u"Schrödinger's cat is dead!", punctuation="!", replace={"'s": " 's"}) v3 = p.find_tokens(u"etc.", abbreviations=set()) v4 = p.find_tokens(u"etc.", abbreviations=set(("etc.",))) self.assertEqual(v1[0], u"Schrödinger's cat is alive!") self.assertEqual(v2[0], u"Schrödinger 's cat is dead !") self.assertEqual(v3[0], "etc .") self.assertEqual(v4[0], "etc.") print "pattern.text.Parser.find_tokens()"
def test_find_tags(self): # Assert the default part-of-speech tagger and its optional parameters. p = text.Parser() v1 = p.find_tags([u"Schrödinger", "cat", "1.0"], lexicon={}, default=("NN?", "NNP?", "CD?")) v2 = p.find_tags([u"Schrödinger", "cat", "1.0"], lexicon={"1.0": "CD?"}) v3 = p.find_tags([u"Schrödinger", "cat", "1.0"], map=lambda token, tag: (token, tag+"!")) v4 = p.find_tags(["observer", "observable"], language="fr") v5 = p.find_tags(["observer", "observable"], language="en") self.assertEqual(v1, [[u"Schr\xf6dinger", "NNP?"], ["cat", "NN?"], ["1.0", "CD?"]]) self.assertEqual(v2, [[u"Schr\xf6dinger", "NNP" ], ["cat", "NN" ], ["1.0", "CD?"]]) self.assertEqual(v3, [[u"Schr\xf6dinger", "NNP!"], ["cat", "NN!"], ["1.0", "CD!"]]) self.assertEqual(v4, [["observer", "NN"], ["observable", "NN"]]) self.assertEqual(v5, [["observer", "NN"], ["observable", "JJ"]]) print "pattern.text.Parser.find_tags()"
def test_find_keywords(self): # Assert the intrinsic keyword extraction algorithm. p = text.Parser() p.lexicon["the"] = "DT" p.lexicon["cat"] = "NN" p.lexicon["dog"] = "NN" v1 = p.find_keywords("the cat") v2 = p.find_keywords("cat. cat. dog.") v3 = p.find_keywords("cat. dog. dog.") v4 = p.find_keywords("the. cat. dog.", frequency={"cat": 1.0, "dog": 0.0}) self.assertEqual(v1, ["cat"]) self.assertEqual(v2, ["cat", "dog"]) self.assertEqual(v3, ["dog", "cat"]) self.assertEqual(v3, ["dog", "cat"]) print "pattern.text.Parser.find_keywords()"