Esempio n. 1
0
    def set_bilingual_align_test(self):
        """
        Set the bilingual alignment manually, and ensure that it is read back correctly.
        """

        a = Alignment([(1,1),(1,2),(2,8),(4,3),(5,7),(6,5)])
        set_bilingual_alignment(self.igt, trans(self.igt), glosses(self.igt), a, INTENT_ALN_MANUAL)
        get_trans_glosses_alignment(self.igt, INTENT_ALN_MANUAL)
Esempio n. 2
0
def gather_gloss_pos_stats(inst, subword_dict, feat_list):
    """
    Given an instance, look for the gloss pos tags, and save the statistics
    about them, so that we can filter by the number of times each kind was
    seen later.

    :param inst: Instance to process.
    :type inst: RGIgt
    :param subword_dict: This dictionary will record the number of times each (word, TAG)
                          pair has been seen.
    :type subword_dict: SubwordDict
    :param gram_tag_dict: This dictionary will record the number of times individual grams are seen.
    :type gram_tag_dict: TwoLevelCountDict
    """

    # Grab the gloss POS tier...
    gpos_tier = gloss_tag_tier(inst)
    lpos_tier = lang_tag_tier(inst)
    gw_tier = gloss(inst)

    if CLASS_FEATS_ALN in feat_list:
        heur_align_inst(inst)
        get_trans_glosses_alignment(inst, aln_method=INTENT_ALN_HEUR)

    # If there are POS tags on the language line but not the gloss line...
    if gpos_tier is None and lpos_tier is not None:
        add_gloss_lang_alignments(inst)
        project_lang_to_gloss(inst)
        gpos_tier = gloss_tag_tier(inst)


    # If this tier exists, then let's process it.
    if gpos_tier is not None:

        # Iterate over each gloss POS tag...
        for i, gw in enumerate(gw_tier):
            tag = xigt_find(inst, alignment=gw.id)

            if tag is None:
                continue

            prev_word = gw_tier[i-1].value().lower() if i > 0 else None
            next_word = gw_tier[i+1].value().lower() if i < len(gw_tier)-1 else None

            if CLASS_FEATS_ALN in feat_list:
                subword_dict.add_word_tag(gw.value().lower(), tag.value(), prev_word, next_word)
Esempio n. 3
0
 def giza_align_test(self):
     new_c = copy_xigt(self.xc)
     giza_align_t_g(new_c)
     giza_aln = get_trans_glosses_alignment(new_c[0], aln_method=INTENT_ALN_GIZA)
     print(giza_aln)
     self.assertEqual(giza_aln, Alignment([(5, 7), (1, 1), (4, 3), (6, 5)]))
Esempio n. 4
0
 def heur_align_test(self):
     new_c = copy_xigt(self.xc)
     heur_align_corp(new_c)
     aln = get_trans_glosses_alignment(new_c[0], aln_method=INTENT_ALN_HEUR)
     a = Alignment([(5, 7), (6, 5), (1, 1), (4, 3)])
     self.assertEquals(a, aln)