def test_ds_project(self): xc = xc_load(os.path.join(testfile_dir, 'xigt/index_error.xml'), do_basic_processing=True) inst = xc[0] heur_align_inst(inst) parse_translation_line(inst, dt=True) project_ds_tier(inst) proj_t = get_lang_ds(inst) tgt_t = DepTree.fromstring("""(ROOT[0] (salli-i[2] (Jumala[1]) (sata-a[4] ([[3])) (rake-i-ta[5]) (ja[6]) (tuhka-a[7] (].[8]))))""", stype=DEPSTR_PTB) self.assertTrue(tgt_t.similar(proj_t)) inst2 = xc[1] heur_align_inst(inst2) parse_translation_line(inst2, dt=True) project_ds_tier(inst2) print(inst2) tgt2_t = DepTree.fromstring("""(ROOT[0] (unohta-a[2] (*Minua[1]) (unohda-n[4] (/Minä[3]) (/laula-tta-a[6] (pelo-tta-a[5])) ) )) """, stype=DEPSTR_PTB) self.assertTrue(get_lang_ds(inst2), tgt2_t)
def test_projection_2(self): ds3 = DepTree.fromstring(self.ds3str) # English sentence: # 1 2 3 4 5 # "Tomorrow Mary will meet Hans" # # Den Hans wird Maria morgen treffen # 1 2 3 4 5 6 aln = Alignment([(1,5),(2,4),(3,3),(4,6),(5,2)]) tgt_w = create_words_tier_from_string("Den Hans wird Maria morgen treffen") ds_proj = project_ds(ds3, tgt_w, aln) exp_proj = DepTree.fromstring(""" (ROOT[0] (treffen[6] (Hans[2] (Den[1])) (wird[3]) (Maria[4]) (morgen[5]) )) """, stype=DEPSTR_PTB) self.assertTrue(ds_proj.structurally_eq(exp_proj))
def test_conll_read(self): ds = DepTree.fromstring(self.s, stype=DEPSTR_CONLL) tgt = DepTree.fromstring(""" (ROOT[0] (zag[2] (Cathy[1]) (hen[3]) (zwaaien[5] (wild[4]) (.[6])) ))""", stype=DEPSTR_PTB) self.assertTrue(ds.structurally_eq(tgt))
def test_stanford_ds_string(self): """ Unit tests for parsing the stanford dependency format, and ensuring it is written back out correctly. """ ds1 = DepTree.fromstring(self.ds1str) self.assertTrue(ds1.stanford_str(separator='\n').strip() == re.sub('\s\s+', '\n', self.ds1str.strip()))
def test_ds_cycle(self): """ The tree in the ds_cycle file has "woman" depend both on "arriving" and "browse." """ xc = xc_load(ds_cycle) inst = xc[0] # 1 2 4 5 7 8 9 # The woman, (after) arriving, began to browse. # (The commas count as words, hence the skipping) tgt_t = DepTree.fromstring(""" (ROOT[0] (began[7] (woman[2] (The[1]) (\(after\)[4] (arriving[5]))) (browse[9] (woman[2]) (to[8]) ) )) """, stype=DEPSTR_PTB) ds = get_ds(inst, trans(inst)) self.assertTrue(tgt_t.structurally_eq(ds)) self.assertIsNone(project_ds_tier(inst))
def test_projection_1(self): """ Testcase for the DS projection in Fei/Will's paper. """ ds1 = DepTree.fromstring(self.ds1str) ds2 = DepTree.fromstring(self.ds2str, stype=DEPSTR_PTB) # ----------------------------------------------------------------------------- # 1 2 3 4 5 6 7 # Rhoddod yr athro lyfr i'r bachgen ddoe # gave-3sg the teacher book to-the boy yesterday # # The teacher gave a book to the boy yesterday # 1 2 3 4 5 6 7 8 9 tgt_w = create_words_tier_from_string("Rhoddodd yr athro lyfr i'r bachgen ddoe") aln = Alignment([(1,2),(2,3),(3,1),(5,4),(6,5),(7,5),(8,6),(9,7)]) # And now, project... ds_proj = project_ds(ds1, tgt_w, aln) self.assertTrue(ds2.structurally_eq(ds_proj))
def test_read_proj_ds_tree(self): src_t = get_ds(self.inst2, trans(self.inst2)) tgt_w = lang(self.inst2) aln = get_trans_gloss_alignment(self.inst2) tgt_t = DepTree.fromstring(""" (ROOT[0] (glaubst[2] (Was[1]) (Du[3]) (wer[4]) (angerufen[5] (hat[6])) )) """, stype=DEPSTR_PTB) proj_t = project_ds(src_t, tgt_w, aln) self.assertTrue(proj_t.structurally_eq(tgt_t))
def test_conll_write(self): ds = DepTree.fromstring(self.s, stype=DEPSTR_CONLL) print(ds.to_conll())
def test_ptb_stanford_equiv(self): ds1 = DepTree.fromstring(self.ds1str, stype=DEPSTR_STANFORD) ds2 = DepTree.fromstring(self.ds1bstr, stype=DEPSTR_PTB) self.assertTrue(ds1.structurally_eq(ds2))
def test_ptb_ds_string(self): """ Unit test for parsing the PTB-style format. """ ds2 = DepTree.fromstring(self.ds1bstr, stype=DEPSTR_PTB)
def test_cycle(self): dt_string = '''nsubj(did-2, And-1) root(ROOT-0, did-2) dobj(did-2, you-3) dep(did-2, make-4) dobj(make-4, rice-5) nsubj(day,-7, rice-5) rcmod(rice-5, day,-7) dep(did-2, eat-9) conj_and(make-4, eat-9) dobj(eat-9, it?-10)''' dt = DepTree.fromstring(dt_string) self.assertEqual(dt[0].label(), 'did')
def setUp(self): dt_string = '''nsubj(ran-2, John-1) root(ROOT-0, ran-2) det(woods-5, the-4) prep_into(ran-2, woods-5)''' self.dt = DepTree.fromstring(dt_string)
def test_read_ds_tree(self): ds = get_ds(self.inst1, trans(self.inst1)) r = DepTree.fromstring("""(ROOT[0] (found[2] (Someone[1]) (them[3]) (boring[4])))""", stype=DEPSTR_PTB) self.assertTrue(r.structurally_eq(ds))