def project_aln_select_test(self): inst = copy_xigt(self.xc[0]) def test_proj_method(method): project_trans_pos_to_gloss(inst, aln_method=method) gtt = gloss_tag_tier(inst, tag_method=INTENT_POS_PROJ) self.assertIsNotNone(inst, gtt) self.assertEqual(get_intent_proj_aln_method(gtt), method) test_proj_method(INTENT_ALN_GIZA) test_proj_method(INTENT_ALN_HEUR) test_proj_method(INTENT_ALN_GIZAHEUR) test_proj_method(INTENT_ALN_HEURPOS)
def test_align_extract(self): inst = copy_xigt(self.xc[0]) # rgp(get_bilingual_alignment_tier(inst, trans(inst).id, glosses(inst).id, aln_method=INTENT_ALN_HEURPOS)) aheur = get_trans_gloss_alignment(inst, aln_method=INTENT_ALN_HEUR) aheurpos = get_trans_gloss_alignment(inst, aln_method=INTENT_ALN_HEURPOS) agiza = get_trans_gloss_alignment(inst, aln_method=INTENT_ALN_GIZA) agizaheur = get_trans_gloss_alignment(inst, aln_method=INTENT_ALN_GIZAHEUR) a1 = Alignment([(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (8, 7), (11, 8)]) a2 = Alignment([(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 12), (8, 7), (9, 10), (11, 8), (12, 10), (13, 11)]) a3 = Alignment([(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 5), (7, 5), (9, 7), (10, 12), (11, 8), (12, 12), (13, 11), (14, 12)]) a4 = Alignment([(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 5), (7, 5), (9, 7), (10, 12), (11, 8), (12, 12), (13, 11), (14, 11)]) self.assertEqual(aheur, a1) self.assertEqual(aheurpos, a2) self.assertEqual(agiza, a3) self.assertEqual(agizaheur, a4)
def evaluate_heuristic_methods_on_file(f, xc, mas, classifier_obj, tagger_obj, lang, pool=None, lock=None): EVAL_LOG.info('Evaluating heuristic methods on file "{}"'.format(os.path.basename(f))) for inst in xc: # ------------------------------------------- # Only evaluate against instances that have a gold alignment. manual = get_trans_gloss_alignment(inst, aln_method=INTENT_ALN_MANUAL) if manual is None: continue EVAL_LOG.debug('Running heuristic alignments on instance "{}"'.format(inst.id)) heur = heur_align_inst(copy_xigt(inst), lowercase=False, stem=False, tokenize=False, no_multiples=True, use_pos=False) mas.add_alignment('baseline', lang, inst.id, heur) heur = heur_align_inst(copy_xigt(inst), lowercase=True, stem=False, tokenize=False, no_multiples=True, use_pos=False) mas.add_alignment('lowercasing', lang, inst.id, heur) heur = heur_align_inst(copy_xigt(inst), lowercase=True, stem=False, tokenize=True, no_multiples=True, use_pos=False) mas.add_alignment('Tokenization', lang, inst.id, heur) heur = heur_align_inst(copy_xigt(inst), lowercase=True, stem=False, tokenize=True, no_multiples=False, use_pos=False) mas.add_alignment('Multiple Matches', lang, inst.id, heur) heur = heur_align_inst(copy_xigt(inst), lowercase=True, stem=True, tokenize=True, no_multiples=False, use_pos=False) mas.add_alignment('Morphing', lang, inst.id, heur) heur = heur_align_inst(copy_xigt(inst), lowercase=True, stem=True, tokenize=True, no_multiples=False, grams=True, use_pos=False) mas.add_alignment('Grams', lang, inst.id, heur) b = copy_xigt(inst) classify_gloss_pos(b, classifier_obj) tag_trans_pos(b, tagger_obj) heur = heur_align_inst(b, lowercase=True, stem=True, tokenize=True, no_multiples=False, grams=True, use_pos=True) mas.add_alignment('POS', lang, inst.id, heur)
def test_inst_pos_heur(self): inst = copy_xigt(self.inst) print(classify_gloss_pos(inst)) print(tag_trans_pos(inst)) print(heur_align_inst(inst, use_pos=True))
def giza_align_test(self): new_c = copy_xigt(self.xc) giza_align_t_g(new_c) giza_aln = get_trans_glosses_alignment(new_c[0], aln_method=INTENT_ALN_GIZA) print(giza_aln) self.assertEqual(giza_aln, Alignment([(5, 7), (1, 1), (4, 3), (6, 5)]))
def heur_align_test(self): new_c = copy_xigt(self.xc) heur_align_corp(new_c) aln = get_trans_glosses_alignment(new_c[0], aln_method=INTENT_ALN_HEUR) a = Alignment([(5, 7), (6, 5), (1, 1), (4, 3)]) self.assertEquals(a, aln)