Example #1
0
def extract_sents_from_inst(inst: Igt, out_src, out_tgt, aln_method=None, no_alignment_heur = True, sent_type=SENT_TYPE_T_G):
    """
    Extract parallel sentences from an instance. Either:

    1) Translation--Gloss
    2) Translation--Language
    """

    # -------------------------------------------
    # 1) Get the source string (translation)
    # -------------------------------------------
    src_str = tier_text(trans(inst), remove_whitespace_inside_tokens=True).lower()


    # -------------------------------------------
    # 2) Decide whether the target string is gloss or language.
    # -------------------------------------------
    if sent_type == SENT_TYPE_T_L:
        tgt_str = tier_text(lang(inst), remove_whitespace_inside_tokens=True).lower()
    elif sent_type == SENT_TYPE_T_G:
        tgt_str = tier_text(gloss(inst), remove_whitespace_inside_tokens=True).lower()
    else:
        raise Exception("Invalid sent type")

    # -------------------------------------------
    # 3) Write the choice out to disk.
    # -------------------------------------------
    out_src.write(src_str + '\n')
    out_tgt.write(tgt_str + '\n')
    out_src.flush()
    out_tgt.flush()

    # -------------------------------------------
    # 4) Add heuristic alignments, if asked for.
    # -------------------------------------------
    if not no_alignment_heur:

        pairs = get_trans_aligned_wordpairs(inst, aln_method=aln_method, add_align=True, sent_type=sent_type)
        for src_word, tgt_word in pairs:
            out_src.write(src_word.lower() + '\n')
            out_tgt.write(tgt_word.lower() + '\n')
Example #2
0
 def glosses_test(self):
     """
     Test that the glosses are rendered correctly.
     """
     self.assertEqual(len(glosses(self.igt)), 10)
     self.assertEqual(tier_text(glosses(self.igt)), 'I Nom child Dat rice Acc eat Caus Pst Dec')
Example #3
0
 def line_test(self):
     """
     Test that lines are rendered correctly.
     """
     self.assertEqual(tier_text(gloss(self.igt)), 'I-Nom child-Dat rice-Acc eat-Caus-Pst-Dec')
     self.assertEqual(tier_text(trans(self.igt)), 'I made the child eat rice')