Beispiel #1
0
 def test_stringio(self):
     # Assert loading data from file-like strings.
     p = text.Parser(lexicon={"to": "TO", "saw": "VBD"},
                     morphology=StringIO(u"NN s fhassuf 1 NNS x"),
                     context=StringIO(u"VBD VB PREVTAG TO"))
     self.assertEqual(p.parse("cats"), "cats/NNS/B-NP/O")
     self.assertEqual(p.parse("to saw"), "to/TO/B-VP/O saw/VB/I-VP/O")
Beispiel #2
0
 def test_find_chunks(self):
     # Assert the default phrase chunker and its optional parameters.
     p = text.Parser()
     v1 = p.find_chunks([["", "DT"], ["", "JJ"], ["", "NN"]], language="en")
     v2 = p.find_chunks([["", "DT"], ["", "JJ"], ["", "NN"]], language="es")
     v3 = p.find_chunks([["", "DT"], ["", "NN"], ["", "JJ"]], language="en")
     v4 = p.find_chunks([["", "DT"], ["", "NN"], ["", "JJ"]], language="es")
     self.assertEqual(v1, [["", "DT", "B-NP", "O"], ["", "JJ", "I-NP", "O"], ["", "NN", "I-NP", "O"]])
     self.assertEqual(v2, [["", "DT", "B-NP", "O"], ["", "JJ", "I-NP", "O"], ["", "NN", "I-NP", "O"]])
     self.assertEqual(v3, [["", "DT", "B-NP", "O"], ["", "NN", "I-NP", "O"], ["", "JJ", "B-ADJP", "O"]])
     self.assertEqual(v4, [["", "DT", "B-NP", "O"], ["", "NN", "I-NP", "O"], ["", "JJ", "I-NP", "O"]])
     print "pattern.text.Parser.find_chunks()"  
Beispiel #3
0
 def test_find_tokens(self):
     # Assert the default tokenizer and its optional parameters.
     p = text.Parser()
     v1 = p.find_tokens(u"Schrödinger's cat is alive!", punctuation="", replace={})
     v2 = p.find_tokens(u"Schrödinger's cat is dead!", punctuation="!", replace={"'s": " 's"})
     v3 = p.find_tokens(u"etc.", abbreviations=set())
     v4 = p.find_tokens(u"etc.", abbreviations=set(("etc.",)))
     self.assertEqual(v1[0], u"Schrödinger's cat is alive!")
     self.assertEqual(v2[0], u"Schrödinger 's cat is dead !")
     self.assertEqual(v3[0], "etc .")
     self.assertEqual(v4[0], "etc.")
     print "pattern.text.Parser.find_tokens()"
Beispiel #4
0
 def test_find_tags(self):
     # Assert the default part-of-speech tagger and its optional parameters.
     p = text.Parser()
     v1 = p.find_tags([u"Schrödinger", "cat", "1.0"], lexicon={}, default=("NN?", "NNP?", "CD?"))
     v2 = p.find_tags([u"Schrödinger", "cat", "1.0"], lexicon={"1.0": "CD?"})
     v3 = p.find_tags([u"Schrödinger", "cat", "1.0"], map=lambda token, tag: (token, tag+"!"))
     v4 = p.find_tags(["observer", "observable"], language="fr")
     v5 = p.find_tags(["observer", "observable"], language="en")
     self.assertEqual(v1, [[u"Schr\xf6dinger", "NNP?"], ["cat", "NN?"], ["1.0", "CD?"]])
     self.assertEqual(v2, [[u"Schr\xf6dinger", "NNP" ], ["cat", "NN" ], ["1.0", "CD?"]])
     self.assertEqual(v3, [[u"Schr\xf6dinger", "NNP!"], ["cat", "NN!"], ["1.0", "CD!"]])
     self.assertEqual(v4, [["observer", "NN"], ["observable", "NN"]])
     self.assertEqual(v5, [["observer", "NN"], ["observable", "JJ"]])
     print "pattern.text.Parser.find_tags()"
Beispiel #5
0
 def test_find_keywords(self):
     # Assert the intrinsic keyword extraction algorithm.
     p = text.Parser()
     p.lexicon["the"] = "DT"
     p.lexicon["cat"] = "NN"
     p.lexicon["dog"] = "NN"
     v1 = p.find_keywords("the cat")
     v2 = p.find_keywords("cat. cat. dog.")
     v3 = p.find_keywords("cat. dog. dog.")
     v4 = p.find_keywords("the. cat. dog.", frequency={"cat": 1.0, "dog": 0.0})
     self.assertEqual(v1, ["cat"])
     self.assertEqual(v2, ["cat", "dog"])
     self.assertEqual(v3, ["dog", "cat"])
     self.assertEqual(v3, ["dog", "cat"])
     print "pattern.text.Parser.find_keywords()"