def extract_sents_from_inst(inst: Igt, out_src, out_tgt, aln_method=None, no_alignment_heur = True, sent_type=SENT_TYPE_T_G): """ Extract parallel sentences from an instance. Either: 1) Translation--Gloss 2) Translation--Language """ # ------------------------------------------- # 1) Get the source string (translation) # ------------------------------------------- src_str = tier_text(trans(inst), remove_whitespace_inside_tokens=True).lower() # ------------------------------------------- # 2) Decide whether the target string is gloss or language. # ------------------------------------------- if sent_type == SENT_TYPE_T_L: tgt_str = tier_text(lang(inst), remove_whitespace_inside_tokens=True).lower() elif sent_type == SENT_TYPE_T_G: tgt_str = tier_text(gloss(inst), remove_whitespace_inside_tokens=True).lower() else: raise Exception("Invalid sent type") # ------------------------------------------- # 3) Write the choice out to disk. # ------------------------------------------- out_src.write(src_str + '\n') out_tgt.write(tgt_str + '\n') out_src.flush() out_tgt.flush() # ------------------------------------------- # 4) Add heuristic alignments, if asked for. # ------------------------------------------- if not no_alignment_heur: pairs = get_trans_aligned_wordpairs(inst, aln_method=aln_method, add_align=True, sent_type=sent_type) for src_word, tgt_word in pairs: out_src.write(src_word.lower() + '\n') out_tgt.write(tgt_word.lower() + '\n')
def glosses_test(self): """ Test that the glosses are rendered correctly. """ self.assertEqual(len(glosses(self.igt)), 10) self.assertEqual(tier_text(glosses(self.igt)), 'I Nom child Dat rice Acc eat Caus Pst Dec')
def line_test(self): """ Test that lines are rendered correctly. """ self.assertEqual(tier_text(gloss(self.igt)), 'I-Nom child-Dat rice-Acc eat-Caus-Pst-Dec') self.assertEqual(tier_text(trans(self.igt)), 'I made the child eat rice')