Beispiel #1
0
def generate_gloss_words(inst, create=True):
    """
    Given an IGT instance, create the gloss word-level tier.

    1. If a "glosses" type exists, either referencing the .
    2. If it does not exist, tokenize the gloss line and return it.
    3. If there are NO tokens on the gloss line for whatever reason... Return None.

    :param inst: Instance which to create the tiers from.
    :type inst: RGIgt
    :rtype: RGWordTier
    """

    # 1. Look for an existing words tier that aligns with the normalized tier...
    gloss_tier = xigt_find(inst, type=GLOSS_WORD_TYPE,
                   # Add the "others" to find only the "glosses" tiers that
                   # are at the word level...

                           # TODO FIXME: Find more elegant solution
                           others=[lambda x: is_word_level_gloss(x),
                                   lambda x: ODIN_GLOSS_TAG in odin_tags(x)])

    # 2. If it exists, return it. Otherwise, look for the glosses tier.
    if gloss_tier is None:
        if create:
            n = generate_normal_tier(inst)
            gloss_line_item = retrieve_normal_lines(inst, ODIN_GLOSS_TAG)[0]

            # If the value of the gloss line is None, or it's simply an empty string...
            if gloss_line_item is None or gloss_line_item.value() is None or not gloss_line_item.value().strip():
                raise EmptyGlossException()
            else:
                gloss_tier = create_words_tier(gloss_line_item, GLOSS_WORD_ID,
                                               GLOSS_WORD_TYPE, aln_attribute=CONTENT,
                                               tokenizer=whitespace_tokenizer)

            # Set the "gloss type" to the "word-level"
            add_word_level_info(gloss_tier, INTENT_GLOSS_WORD)
            inst.append(gloss_tier)
            return gloss_tier

        else:
            return None

    else:
        # If we have alignment, we can remove the metadata, because
        # that indicates the type for us.
        if gloss_tier.alignment is not None:
            remove_word_level_info(gloss_tier)

        return gloss_tier
Beispiel #2
0
def glosses(inst) -> Tier:
    # Make sure that we don't pick up the gloss-word tier by accident.
    f = [lambda x: not is_word_level_gloss(x)]

    gt = xigt_find(inst, type=GLOSS_MORPH_TYPE, others=f)

    # If we don't already have a sub-token-level glosses tier, let's create
    # it. Remembering that we want to use CONTENT to align the tier, not
    # SEGMENTATION.
    if gt is None:
        gt = words_to_morph_tier(gloss(inst), GLOSS_MORPH_TYPE, GLOSS_MORPH_ID, CONTENT)

        # Add the meta information that this is not a word-level gloss.
        add_word_level_info(gt, INTENT_GLOSS_MORPH)
        inst.append(gt)

    # If we have alignment, remove the metadata attribute.
    if gt.alignment is not None:
        remove_word_level_info(gt)

    return gt