Beispiel #1
0
    def project_aln_select_test(self):
        inst = copy_xigt(self.xc[0])

        def test_proj_method(method):
            project_trans_pos_to_gloss(inst, aln_method=method)
            gtt = gloss_tag_tier(inst, tag_method=INTENT_POS_PROJ)
            self.assertIsNotNone(inst, gtt)
            self.assertEqual(get_intent_proj_aln_method(gtt), method)

        test_proj_method(INTENT_ALN_GIZA)
        test_proj_method(INTENT_ALN_HEUR)
        test_proj_method(INTENT_ALN_GIZAHEUR)
        test_proj_method(INTENT_ALN_HEURPOS)
Beispiel #2
0
    def test_align_extract(self):
        inst = copy_xigt(self.xc[0])
        # rgp(get_bilingual_alignment_tier(inst, trans(inst).id, glosses(inst).id, aln_method=INTENT_ALN_HEURPOS))
        aheur = get_trans_gloss_alignment(inst, aln_method=INTENT_ALN_HEUR)
        aheurpos = get_trans_gloss_alignment(inst, aln_method=INTENT_ALN_HEURPOS)
        agiza = get_trans_gloss_alignment(inst, aln_method=INTENT_ALN_GIZA)
        agizaheur = get_trans_gloss_alignment(inst, aln_method=INTENT_ALN_GIZAHEUR)
        
        a1 = Alignment([(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (8, 7), (11, 8)])
        a2 = Alignment([(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 12), (8, 7), (9, 10), (11, 8), (12, 10), (13, 11)])
        a3 = Alignment([(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 5), (7, 5), (9, 7), (10, 12), (11, 8), (12, 12), (13, 11), (14, 12)])
        a4 = Alignment([(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 5), (7, 5), (9, 7), (10, 12), (11, 8), (12, 12), (13, 11), (14, 11)])

        self.assertEqual(aheur, a1)
        self.assertEqual(aheurpos, a2)
        self.assertEqual(agiza, a3)
        self.assertEqual(agizaheur, a4)
Beispiel #3
0
def evaluate_heuristic_methods_on_file(f, xc, mas, classifier_obj, tagger_obj, lang, pool=None, lock=None):
    EVAL_LOG.info('Evaluating heuristic methods on file "{}"'.format(os.path.basename(f)))




    for inst in xc:

        # -------------------------------------------
        # Only evaluate against instances that have a gold alignment.
        manual = get_trans_gloss_alignment(inst, aln_method=INTENT_ALN_MANUAL)

        if manual is None:
            continue

        EVAL_LOG.debug('Running heuristic alignments on instance "{}"'.format(inst.id))

        heur = heur_align_inst(copy_xigt(inst), lowercase=False, stem=False, tokenize=False, no_multiples=True, use_pos=False)
        mas.add_alignment('baseline', lang, inst.id, heur)

        heur = heur_align_inst(copy_xigt(inst), lowercase=True, stem=False, tokenize=False, no_multiples=True, use_pos=False)
        mas.add_alignment('lowercasing', lang, inst.id, heur)

        heur = heur_align_inst(copy_xigt(inst), lowercase=True, stem=False, tokenize=True, no_multiples=True, use_pos=False)
        mas.add_alignment('Tokenization', lang, inst.id, heur)

        heur = heur_align_inst(copy_xigt(inst), lowercase=True, stem=False, tokenize=True, no_multiples=False, use_pos=False)
        mas.add_alignment('Multiple Matches', lang, inst.id, heur)

        heur = heur_align_inst(copy_xigt(inst), lowercase=True, stem=True, tokenize=True, no_multiples=False, use_pos=False)
        mas.add_alignment('Morphing', lang, inst.id, heur)

        heur = heur_align_inst(copy_xigt(inst), lowercase=True, stem=True, tokenize=True, no_multiples=False, grams=True, use_pos=False)
        mas.add_alignment('Grams', lang, inst.id, heur)


        b = copy_xigt(inst)
        classify_gloss_pos(b, classifier_obj)
        tag_trans_pos(b, tagger_obj)
        heur = heur_align_inst(b, lowercase=True, stem=True, tokenize=True, no_multiples=False, grams=True, use_pos=True)
        mas.add_alignment('POS', lang, inst.id, heur)
Beispiel #4
0
 def test_inst_pos_heur(self):
     inst = copy_xigt(self.inst)
     print(classify_gloss_pos(inst))
     print(tag_trans_pos(inst))
     print(heur_align_inst(inst, use_pos=True))
Beispiel #5
0
 def giza_align_test(self):
     new_c = copy_xigt(self.xc)
     giza_align_t_g(new_c)
     giza_aln = get_trans_glosses_alignment(new_c[0], aln_method=INTENT_ALN_GIZA)
     print(giza_aln)
     self.assertEqual(giza_aln, Alignment([(5, 7), (1, 1), (4, 3), (6, 5)]))
Beispiel #6
0
 def heur_align_test(self):
     new_c = copy_xigt(self.xc)
     heur_align_corp(new_c)
     aln = get_trans_glosses_alignment(new_c[0], aln_method=INTENT_ALN_HEUR)
     a = Alignment([(5, 7), (6, 5), (1, 1), (4, 3)])
     self.assertEquals(a, aln)