def test_lexicon(self): # Assert lexicon from file (or file-like string). f1 = u";;; Comments. \n schrödinger NNP \n cat NN" f2 = StringIO.StringIO(u";;; Comments. \n schrödinger NNP \n cat NN") v1 = text.Lexicon(path=f1) v2 = text.Lexicon(path=f2) self.assertEqual(v1[u"schrödinger"], "NNP") self.assertEqual(v2[u"schrödinger"], "NNP") print "pattern.text.Lexicon"
def test_lexicon(self): # Assert loading and applying Brill lexicon and rules. f1 = u";;; Comments. \n schrödinger NNP \n cat NN" f2 = StringIO.StringIO(u"NN s fhassuf 1 NNS x") f3 = StringIO.StringIO(u"VBD VB PREVTAG TO") f4 = StringIO.StringIO(u"Schrödinger's cat PERS") v = text.Lexicon(path=f1, morphology=f2, context=f3, entities=f4) self.assertEqual(v[u"schrödinger"], "NNP") self.assertEqual(v.morphology.apply(["cats", "NN"]), ["cats", "NNS"]) self.assertEqual(v.context.apply([["to", "TO"], ["be", "VBD"]]), [["to", "TO"], ["be", "VB"]]) self.assertEqual( v.entities.apply([[u"Schrödinger's", "NNP"], ["cat", "NN"]]), [[u"Schrödinger's", "NNP-PERS"], ["cat", "NNP-PERS"]]) print "pattern.text.Lexicon" print "pattern.text.Morphology" print "pattern.text.Context" print "pattern.text.Entities"