Example #1
0
    def test_projection_2(self):
        ds3 = DepTree.fromstring(self.ds3str)

        # English sentence:
        #    1        2     3    4   5
        # "Tomorrow Mary  will meet Hans"
        #
        # Den   Hans  wird Maria morgen treffen
        #  1     2     3     4      5      6
        aln = Alignment([(1,5),(2,4),(3,3),(4,6),(5,2)])
        tgt_w = create_words_tier_from_string("Den Hans wird Maria morgen treffen")

        ds_proj = project_ds(ds3, tgt_w, aln)

        exp_proj = DepTree.fromstring("""
                                        (ROOT[0]
                                            (treffen[6]
                                                (Hans[2] (Den[1]))
                                                (wird[3])
                                                (Maria[4])
                                                (morgen[5])
                                        ))
                                                """,
                                      stype=DEPSTR_PTB)
        self.assertTrue(ds_proj.structurally_eq(exp_proj))
Example #2
0
    def test_ds_project(self):
        xc = xc_load(os.path.join(testfile_dir, 'xigt/index_error.xml'), do_basic_processing=True)
        inst = xc[0]
        heur_align_inst(inst)
        parse_translation_line(inst, dt=True)
        project_ds_tier(inst)
        proj_t = get_lang_ds(inst)

        tgt_t = DepTree.fromstring("""(ROOT[0] (salli-i[2] (Jumala[1]) (sata-a[4] ([[3])) (rake-i-ta[5]) (ja[6]) (tuhka-a[7] (].[8]))))""", stype=DEPSTR_PTB)

        self.assertTrue(tgt_t.similar(proj_t))

        inst2 = xc[1]
        heur_align_inst(inst2)
        parse_translation_line(inst2, dt=True)
        project_ds_tier(inst2)

        print(inst2)

        tgt2_t = DepTree.fromstring("""(ROOT[0]
                                            (unohta-a[2] (*Minua[1])
                                                (unohda-n[4]
                                                    (/Minä[3])
                                                    (/laula-tta-a[6] (pelo-tta-a[5]))
                                                )
                                            ))
                                        """, stype=DEPSTR_PTB)

        self.assertTrue(get_lang_ds(inst2), tgt2_t)
Example #3
0
        def test_conll_read(self):
            ds = DepTree.fromstring(self.s, stype=DEPSTR_CONLL)

            tgt = DepTree.fromstring("""
                            (ROOT[0]
                                (zag[2]
                                    (Cathy[1])
                                    (hen[3])
                                    (zwaaien[5] (wild[4])
                                                (.[6]))
                                ))""", stype=DEPSTR_PTB)

            self.assertTrue(ds.structurally_eq(tgt))
Example #4
0
    def test_stanford_ds_string(self):
        """
        Unit tests for parsing the stanford dependency format, and ensuring it is written back out correctly.
        """
        ds1 = DepTree.fromstring(self.ds1str)

        self.assertTrue(ds1.stanford_str(separator='\n').strip() == re.sub('\s\s+', '\n', self.ds1str.strip()))
Example #5
0
    def test_ds_cycle(self):
        """
        The tree in the ds_cycle file has "woman" depend both
        on "arriving" and "browse."
        """
        xc = xc_load(ds_cycle)
        inst = xc[0]

        #  1    2       4        5       7    8    9
        # The woman, (after) arriving, began to browse.

        # (The commas count as words, hence the skipping)

        tgt_t = DepTree.fromstring("""
        (ROOT[0]
            (began[7]
                (woman[2]
                    (The[1])
                    (\(after\)[4] (arriving[5])))
                (browse[9]
                    (woman[2])
                    (to[8])
                )
            ))
        """, stype=DEPSTR_PTB)

        ds = get_ds(inst, trans(inst))
        self.assertTrue(tgt_t.structurally_eq(ds))

        self.assertIsNone(project_ds_tier(inst))
Example #6
0
    def test_projection_1(self):
        """
        Testcase for the DS projection in Fei/Will's paper.
        """
        ds1 = DepTree.fromstring(self.ds1str)
        ds2 = DepTree.fromstring(self.ds2str, stype=DEPSTR_PTB)

        # -----------------------------------------------------------------------------
        #    1       2      3        4    5        6      7
        # Rhoddod    yr   athro    lyfr  i'r     bachgen  ddoe
        # gave-3sg   the  teacher  book  to-the  boy      yesterday
        #
        # The     teacher  gave  a book   to the  boy      yesterday
        #  1         2     3     4  5     6  7    8           9


        tgt_w = create_words_tier_from_string("Rhoddodd yr athro lyfr i'r bachgen ddoe")

        aln = Alignment([(1,2),(2,3),(3,1),(5,4),(6,5),(7,5),(8,6),(9,7)])

        # And now, project...
        ds_proj = project_ds(ds1, tgt_w, aln)

        self.assertTrue(ds2.structurally_eq(ds_proj))
Example #7
0
def _assemble_ds(sent, index_pairs, cur_head = -1, parent_node = None, seen_indices=set(())):
    """
    :type sent: Sentence
    """
    # Get all the words that depend on the current index,
    # starting with the root (-1)

    dep_orders = [i[1] for i in index_pairs if i[0] == cur_head]
    if not dep_orders:
        return None
    elif cur_head in seen_indices:
        return None
    else:
        if parent_node is None:
            parent_node = DepTree.root()

        for dep_order in dep_orders:
            word = sent.getorder(dep_order)
            dt = DepTree(word.text, word_index=int(dep_order), pos=word.pos)
            parent_node.append(dt)
            _assemble_ds(sent, index_pairs, cur_head = dep_order, parent_node=dt, seen_indices=seen_indices|set([cur_head]))
            dt.sort(key=lambda x: x.word_index)

        return parent_node
Example #8
0
    def test_read_proj_ds_tree(self):
        src_t = get_ds(self.inst2, trans(self.inst2))
        tgt_w = lang(self.inst2)
        aln   = get_trans_gloss_alignment(self.inst2)

        tgt_t = DepTree.fromstring("""
        (ROOT[0]
            (glaubst[2]
                (Was[1])
                (Du[3])
                (wer[4])
                (angerufen[5] (hat[6]))
            ))
        """, stype=DEPSTR_PTB)

        proj_t = project_ds(src_t, tgt_w, aln)

        self.assertTrue(proj_t.structurally_eq(tgt_t))
Example #9
0
        def test_conll_write(self):
            ds = DepTree.fromstring(self.s, stype=DEPSTR_CONLL)

            print(ds.to_conll())
Example #10
0
    def test_ptb_stanford_equiv(self):
        ds1 = DepTree.fromstring(self.ds1str,  stype=DEPSTR_STANFORD)
        ds2 = DepTree.fromstring(self.ds1bstr, stype=DEPSTR_PTB)

        self.assertTrue(ds1.structurally_eq(ds2))
Example #11
0
 def test_ptb_ds_string(self):
     """
     Unit test for parsing the PTB-style format.
     """
     ds2 = DepTree.fromstring(self.ds1bstr, stype=DEPSTR_PTB)
Example #12
0
    def test_cycle(self):
        dt_string = '''nsubj(did-2, And-1) root(ROOT-0, did-2) dobj(did-2, you-3) dep(did-2, make-4) dobj(make-4, rice-5) nsubj(day,-7, rice-5) rcmod(rice-5, day,-7) dep(did-2, eat-9) conj_and(make-4, eat-9) dobj(eat-9, it?-10)'''

        dt = DepTree.fromstring(dt_string)

        self.assertEqual(dt[0].label(), 'did')
Example #13
0
 def setUp(self):
     dt_string = '''nsubj(ran-2, John-1)
                    root(ROOT-0, ran-2)
                    det(woods-5, the-4)
                    prep_into(ran-2, woods-5)'''
     self.dt = DepTree.fromstring(dt_string)
Example #14
0
    def test_read_ds_tree(self):
        ds = get_ds(self.inst1, trans(self.inst1))
        r = DepTree.fromstring("""(ROOT[0] (found[2] (Someone[1]) (them[3]) (boring[4])))""", stype=DEPSTR_PTB)

        self.assertTrue(r.structurally_eq(ds))