def test_parse(self): # Assert parsed output with Penn Treebank II tags (slash-formatted). # 1) "de zwarte kat" is a noun phrase, "op de mat" is a prepositional # noun phrase. v = nl.parser.parse("De zwarte kat zat op de mat.") self.assertEqual(v, "De/DT/B-NP/O zwarte/JJ/I-NP/O kat/NN/I-NP/O " + "zat/VBD/B-VP/O " + "op/IN/B-PP/B-PNP de/DT/B-NP/I-PNP mat/NN/I-NP/I-PNP ././O/O" ) # 2) "jaagt" and "vogels" lemmata are "jagen" and "vogel". v = nl.parser.parse("De zwarte kat jaagt op vogels.", lemmata=True) self.assertEqual(v, "De/DT/B-NP/O/de zwarte/JJ/I-NP/O/zwart kat/NN/I-NP/O/kat " + "jaagt/VBZ/B-VP/O/jagen " + "op/IN/B-PP/B-PNP/op vogels/NNS/B-NP/I-PNP/vogel ././O/O/." ) # Assert the accuracy of the Dutch tagger. i, n = 0, 0 for sentence in open(os.path.join(PATH, "corpora", "tagged-nl-twnc.txt")).readlines(): sentence = sentence.decode("utf-8").strip() s1 = [w.split("/") for w in sentence.split(" ")] s1 = [nl.wotan2penntreebank(w, tag) for w, tag in s1] s2 = [[w for w, pos in s1]] s2 = nl.parse(s2, tokenize=False) s2 = [w.split("/") for w in s2.split(" ")] for j in range(len(s1)): if s1[j][1] == s2[j][1]: i += 1 n += 1 self.assertTrue(float(i) / n > 0.90) print("pattern.nl.parser.parse()")
def test_parse(self): # Assert parsed output with Penn Treebank II tags (slash-formatted). # 1) "de zwarte kat" is a noun phrase, "op de mat" is a prepositional # noun phrase. v = nl.parser.parse("De zwarte kat zat op de mat.") self.assertEqual( v, "De/DT/B-NP/O zwarte/JJ/I-NP/O kat/NN/I-NP/O " + "zat/VBD/B-VP/O " + "op/IN/B-PP/B-PNP de/DT/B-NP/I-PNP mat/NN/I-NP/I-PNP ././O/O") # 2) "jaagt" and "vogels" lemmata are "jagen" and "vogel". v = nl.parser.parse("De zwarte kat jaagt op vogels.", lemmata=True) self.assertEqual( v, "De/DT/B-NP/O/de zwarte/JJ/I-NP/O/zwart kat/NN/I-NP/O/kat " + "jaagt/VBZ/B-VP/O/jagen " + "op/IN/B-PP/B-PNP/op vogels/NNS/B-NP/I-PNP/vogel ././O/O/.") # Assert the accuracy of the Dutch tagger. i, n = 0, 0 for sentence in open( os.path.join(PATH, "corpora", "tagged-nl-twnc.txt")).readlines(): sentence = sentence.strip() s1 = [w.split("/") for w in sentence.split(" ")] s1 = [nl.wotan2penntreebank(w, tag) for w, tag in s1] s2 = [[w for w, pos in s1]] s2 = nl.parse(s2, tokenize=False) s2 = [w.split("/") for w in s2.split(" ")] for j in range(len(s1)): if s1[j][1] == s2[j][1]: i += 1 n += 1 self.assertTrue(float(i) / n > 0.90) print("pattern.nl.parser.parse()")
def test_wotan2penntreebank(self): # Assert tag translation. for penntreebank, wotan in ( ("NNP", "N(eigen,ev,neut)"), ("NNPS", "N(eigen,mv,neut)"), ("NN", "N(soort,ev,neut)"), ("NNS", "N(soort,mv,neut)"), ("VBZ", "V(refl,ott,3,ev)"), ("VBP", "V(intrans,ott,1_of_2_of_3,mv)"), ("VBD", "V(trans,ovt,1_of_2_of_3,mv)"), ("VBN", "V(trans,verl_dw,onverv)"), ("VBG", "V(intrans,teg_dw,onverv)"), ("VB", "V(intrans,inf)"), ("MD", "V(hulp_of_kopp,ott,3,ev)"), ("JJ", "Adj(attr,stell,onverv)"), ("JJR", "Adj(adv,vergr,onverv)"), ("JJS", "Adj(attr,overtr,verv_neut)"), ("RP", "Adv(deel_v)"), ("RB", "Adv(gew,geen_func,stell,onverv)"), ("DT", "Art(bep,zijd_of_mv,neut)"), ("CC", "Conj(neven)"), ("CD", "Num(hoofd,bep,zelfst,onverv)"), ("TO", "Prep(voor_inf)"), ("IN", "Prep(voor)"), ("PRP", "Pron(onbep,neut,attr)"), ("PRP$", "Pron(bez,2,ev,neut,attr)"), (",", "Punc(komma)"), ("(", "Punc(haak_open)"), (")", "Punc(haak_sluit)"), (".", "Punc(punt)"), ("UH", "Int"), ("SYM", "Misc(symbool)")): self.assertEqual(nl.wotan2penntreebank("", wotan)[1], penntreebank) print("pattern.nl.wotan2penntreebank()")
def test_wotan2penntreebank(self): # Assert tag translation. for penntreebank, wotan in ( ("NNP", "N(eigen,ev,neut)"), ("NNPS", "N(eigen,mv,neut)"), ("NN", "N(soort,ev,neut)"), ("NNS", "N(soort,mv,neut)"), ("VBZ", "V(refl,ott,3,ev)"), ("VBP", "V(intrans,ott,1_of_2_of_3,mv)"), ("VBD", "V(trans,ovt,1_of_2_of_3,mv)"), ("VBN", "V(trans,verl_dw,onverv)"), ("VBG", "V(intrans,teg_dw,onverv)"), ("VB", "V(intrans,inf)"), ("MD", "V(hulp_of_kopp,ott,3,ev)"), ("JJ", "Adj(attr,stell,onverv)"), ("JJR", "Adj(adv,vergr,onverv)"), ("JJS", "Adj(attr,overtr,verv_neut)"), ("RP", "Adv(deel_v)"), ("RB", "Adv(gew,geen_func,stell,onverv)"), ("DT", "Art(bep,zijd_of_mv,neut)"), ("CC", "Conj(neven)"), ("CD", "Num(hoofd,bep,zelfst,onverv)"), ("TO", "Prep(voor_inf)"), ("IN", "Prep(voor)"), ("PRP", "Pron(onbep,neut,attr)"), ("PRP$", "Pron(bez,2,ev,neut,attr)"), (",", "Punc(komma)"), ("(", "Punc(haak_open)"), (")", "Punc(haak_sluit)"), (".", "Punc(punt)"), ("UH", "Int"), ("SYM", "Misc(symbool)")): self.assertEqual(nl.wotan2penntreebank("", wotan)[1], penntreebank) print "pattern.nl.wotan2penntreebank()"