def setUp(self): self.l = Lemmatizer()
class TestLemmatizer(unittest.TestCase): def setUp(self): self.l = Lemmatizer() def tearDown(self): self.l = None def test_bi(self): self.assertEqual(self.l.lemmatize("tha", "V-p"), "bi") self.assertEqual(self.l.lemmatize("thà", "V-p"), "bi") self.assertEqual(self.l.lemmatize("Tha", "V-p"), "bi") self.assertEqual(self.l.lemmatize("th'", "V-p"), "bi") self.assertEqual(self.l.lemmatize("bha", "V-s"), "bi") self.assertEqual(self.l.lemmatize("bh'", "V-s"), "bi") self.assertEqual(self.l.lemmatize("eil", "V-p--d"), "bi") self.assertEqual(self.l.lemmatize("robh", "V-s--d"), "bi") self.assertEqual(self.l.lemmatize("bhith", "Nv"), "bi") self.assertEqual(self.l.lemmatize("bhiodh", "V-h"), "bi") self.assertEqual(self.l.lemmatize("bhi", "V-f--d"), "bi") self.assertEqual(self.l.lemmatize("bi", "V-f--d"), "bi") self.assertEqual(self.l.lemmatize("bhios", "V-f--r"), "bi") self.assertEqual(self.l.lemmatize("bidh", "V-f"), "bi") self.assertEqual(self.l.lemmatize("biodh", "V-h--d"), "bi") def test_copula(self): self.assertEqual(self.l.lemmatize("an", "Wpdqa"), "is") self.assertEqual(self.l.lemmatize("B'", "Ws"), "is") self.assertEqual(self.l.lemmatize("b'", "Ws"), "is") self.assertEqual(self.l.lemmatize("bu", "Ws"), "is") self.assertEqual(self.l.lemmatize("cha", "Wp-in"), "is") self.assertEqual(self.l.lemmatize("chan", "Wp-in"), "is") self.assertEqual(self.l.lemmatize("gur", "Wpdia"), "is") self.assertEqual(self.l.lemmatize("'S", "Wp-i"), "is") self.assertEqual(self.l.lemmatize("'s", "Wp-i"), "is") self.assertEqual(self.l.lemmatize("is", "Wp-i"), "is") self.assertEqual(self.l.lemmatize("nach", "Wpdqn"), "is") self.assertEqual(self.l.lemmatize("'se", "Wp-i-3"), "is") self.assertEqual(self.l.lemmatize("as", "Wpr"), "is") def test_irregulars(self): self.assertEqual(self.l.lemmatize("Thuirt", "V-s"), "abair") self.assertEqual(self.l.lemmatize("thuirt", "V-s"), "abair") self.assertEqual(self.l.lemmatize("ràdh", "Nv"), "abair") self.assertEqual(self.l.lemmatize("ars\xe2\x80\x99", "V-s"), "arsa") self.assertEqual(self.l.lemmatize("ars'", "V-s"), "arsa") self.assertEqual(self.l.lemmatize("as", "V-s"), "arsa") self.assertEqual(self.l.lemmatize("chuala", "V-s"), "cluinn") self.assertEqual(self.l.lemmatize("dèanamh", "Nv"), "dèan") self.assertEqual(self.l.lemmatize("dhèanamh", "Nv"), "dèan") self.assertEqual(self.l.lemmatize("nì", "V-f"), "dèan") self.assertEqual(self.l.lemmatize("Rinn", "V-s"), "dèan") self.assertEqual(self.l.lemmatize("rinn", "V-s"), "dèan") self.assertEqual(self.l.lemmatize("chunnaic", "V-s"), "faic") self.assertEqual(self.l.lemmatize("faicinn", "Nv"), "faic") self.assertEqual(self.l.lemmatize("fhaicinn", "Nv"), "faic") self.assertEqual(self.l.lemmatize("faigheadh", "V-h--d"), "faigh") self.assertEqual(self.l.lemmatize("faighinn", "Nv"), "faigh") self.assertEqual(self.l.lemmatize("fhuair", "V-s"), "faigh") self.assertEqual(self.l.lemmatize("gheibh", "V-f"), "faigh") self.assertEqual(self.l.lemmatize("Chaidh", "V-s"), "rach") self.assertEqual(self.l.lemmatize("chaidh", "V-s"), "rach") self.assertEqual(self.l.lemmatize("deach", "V-s--d"), "rach") self.assertEqual(self.l.lemmatize("dhol", "Nv"), "rach") self.assertEqual(self.l.lemmatize("dol", "Nv"), "rach") self.assertEqual(self.l.lemmatize("Thèid", "V-f"), "rach") self.assertEqual(self.l.lemmatize("thèid", "V-f"), "rach") self.assertEqual(self.l.lemmatize("tèid", "V-f--d"), "rach") self.assertEqual(self.l.lemmatize("thàinig", "V-s"), "thig") self.assertEqual(self.l.lemmatize("tighinn", "Nv"), "thig") self.assertEqual(self.l.lemmatize("thug", "V-s"), "thoir") self.assertEqual(self.l.lemmatize("tug", "V-s--d"), "thoir") self.assertEqual(self.l.lemmatize("bheir", "V-f"), "thoir") self.assertEqual(self.l.lemmatize("thoirt", "Nv"), "thoir") self.assertEqual(self.l.lemmatize("toirt", "Nv"), "thoir") def test_regulars(self): self.assertEqual(self.l.lemmatize("bhuail", "V-s"), "buail") self.assertEqual(self.l.lemmatize("choinnich", "V-s"), "coinnich") self.assertEqual(self.l.lemmatize("chuir", "V-s"), "cuir") self.assertEqual(self.l.lemmatize("fhàg", "V-s"), "fàg") self.assertEqual(self.l.lemmatize("faodaidh", "V-f"), "faod") self.assertEqual(self.l.lemmatize("feuch", "Vm-2s"), "feuch") self.assertEqual(self.l.lemmatize("fheuch", "V-s"), "feuch") self.assertEqual(self.l.lemmatize("feumaidh", "V-f"), "feum") self.assertEqual(self.l.lemmatize("ghabh", "V-s"), "gabh") self.assertEqual(self.l.lemmatize("ràinig", "V-s"), "ruig") self.assertEqual(self.l.lemmatize("thachair", "V-s"), "tachair") self.assertEqual(self.l.lemmatize("thòisich", "V-s"), "tòisich") def nv(self, vn, root): self.assertEqual(self.l.lemmatize_vn(vn), root) def test_nvs(self): self.assertEqual(self.l.lemmatize_vn('àicheadh'), "àicheidh") self.assertEqual(self.l.lemmatize_vn('amas'), 'amais') self.assertEqual(self.l.lemmatize_vn('amharc'), 'amhairc') self.assertEqual(self.l.lemmatize_vn('agairt'), 'agair') self.assertEqual(self.l.lemmatize_vn('bagairt'), 'bagair') self.assertEqual(self.l.lemmatize_vn('baisteadh'), 'baist') self.assertEqual(self.l.lemmatize_vn('blasad'), 'blais') self.assertEqual(self.l.lemmatize_vn('blasadh'), 'blais') self.assertEqual(self.l.lemmatize_vn('brìodal'), 'brìodail') self.assertEqual(self.l.lemmatize_vn('briseadh'), 'bris') self.assertEqual(self.l.lemmatize_vn('bristeadh'), 'bris') self.assertEqual(self.l.lemmatize_vn('bruich'), 'bruich') self.assertEqual(self.l.lemmatize("bruidhinn", "Nv"), "bruidhinn") self.assertEqual(self.l.lemmatize_vn('bruthadh'), 'brùth') self.assertEqual(self.l.lemmatize_vn('buntainn'), 'buin') self.nv("cinntinn", "cinn") self.nv("cagar", "cagair") self.nv("cagarsaich", "cagair") self.nv("cagartaich", "cagair") self.nv("casgairt", "casgair") self.nv("casgradh", "casgair") self.assertEqual(self.l.lemmatize("coimhead", "Nv"), "coimhead") self.assertEqual(self.l.lemmatize("cur", "Nv"), "cuir") self.assertEqual(self.l.lemmatize("chur", "Nv"), "cuir") self.nv("cluiche", "cluich") self.assertEqual(self.l.lemmatize("creidsinn", "Nv"), "creid") self.assertEqual(self.l.lemmatize("cumail", "Nv"), "cùm") self.assertEqual(self.l.lemmatize("falbh", "Nv"), "falbh") self.assertEqual(self.l.lemmatize("fàs", "Nv"), "fàs") self.assertEqual(self.l.lemmatize("feuchainn", "Nv"), "feuch") self.assertEqual(self.l.lemmatize("fhalbh", "Nv"), "falbh") self.assertEqual(self.l.lemmatize("fuireach", "Nv"), "fuirich") self.assertEqual(self.l.lemmatize("gabhail", "Nv"), "gabh") self.assertEqual(self.l.lemmatize("gluasad", "Nv"), "gluais") self.assertEqual(self.l.lemmatize("iarraidh", "Nv"), "iarr") self.assertEqual(self.l.lemmatize("innse", "Nv"), "inns") self.nv("ithe", "ith") self.nv("laighe", "laigh") self.assertEqual(self.l.lemmatize("leigeil", "Nv"), "leig") self.nv("leigheas", "leighis") self.assertEqual(self.l.lemmatize("obair", "Nv"), "obraich") self.assertEqual(self.l.lemmatize("ruith", "Nv"), "ruith") self.assertEqual(self.l.lemmatize("sealltainn", "Nv"), "seall") self.assertEqual(self.l.lemmatize("smaoineachadh", "Nv"), "smaoinich") self.assertEqual(self.l.lemmatize("tachairt", "Nv"), "tachair") self.assertEqual(self.l.lemmatize_vn("tòiseachadh"), "tòisich") self.assertEqual(self.l.lemmatize_vn("tionndadh"), "tionndaidh")