예제 #1
0
    def process_file(self, file_name):
        """
        Processes one file.

        :param file_name:
        :return:
        """

        gloss_lingtypes = ["Gloss Child", "Gloss Adult"]
        external_ref = "ecv_ref"
        ecv_name = "ASL Signbank lexicon"

        try:
            eaf = Eaf(file_name)

            # Add linguistic types
            for lingtype in gloss_lingtypes:
                eaf.add_linguistic_type(lingtype, constraints=None)

            # Add linguistic types to tiers
            gloss_tiers = self.find_gloss_tiers(eaf)
            for tier in gloss_tiers:
                if "Adult" in tier:
                    eaf.tiers[tier][2]['LINGUISTIC_TYPE_REF'] = "Gloss Adult"
                elif "Child" in tier:
                    eaf.tiers[tier][2]['LINGUISTIC_TYPE_REF'] = "Gloss Child"

            # Add an ECV external reference
            eaf.add_external_ref(
                external_ref, "ecv",
                "http://applejack.science.ru.nl/asl-signbank/static/ecv/asl.ecv"
            )

            # Add a Controlled Vocabulary
            eaf.add_controlled_vocabulary(ecv_name, external_ref)

            # Add the CV to linguistic types
            for lingtype in gloss_lingtypes:
                eaf.linguistic_types[lingtype][
                    'CONTROLLED_VOCABULARY_REF'] = ecv_name

            eaf.to_file(self.output_dir + os.sep +
                        os.path.basename(urlparse(file_name).path),
                        pretty=True)
        except IOError:
            print("The EAF %s could not be processed." % file_name,
                  file=sys.stderr)
            print(sys.exc_info()[0])
예제 #2
0
    def process_file(self, file_name):
        """
        Processes one file.

        :param file_name:
        :return:
        """

        gloss_append_lingtype = "gloss-append"

        try:
            eaf = Eaf(file_name)
            eaf.add_linguistic_type(gloss_append_lingtype, constraints="Symbolic_Association")
            gloss_tiers = self.find_gloss_tiers(eaf)
            self.add_gloss_tier_children(eaf, gloss_tiers, gloss_append_lingtype, file_name)
            eaf.to_file(self.output_dir + os.sep + os.path.basename(urlparse(file_name).path), pretty=True)
        except IOError:
            print("The EAF %s could not be processed." % file_name, file=sys.stderr)
            print(sys.exc_info()[0])
예제 #3
0
    def process_file(self, file_name):
        """
        Processes one file.

        :param file_name:
        :return:
        """

        gloss_lingtypes = ["Gloss Child", "Gloss Adult"]
        external_ref = "ecv_ref"
        ecv_name = "ASL Signbank lexicon"

        try:
            eaf = Eaf(file_name)

            # Add linguistic types
            for lingtype in gloss_lingtypes:
                eaf.add_linguistic_type(lingtype, constraints=None)

            # Add linguistic types to tiers
            gloss_tiers = self.find_gloss_tiers(eaf)
            for tier in gloss_tiers:
                if "Adult" in tier:
                    eaf.tiers[tier][2]['LINGUISTIC_TYPE_REF'] = "Gloss Adult"
                elif "Child" in tier:
                    eaf.tiers[tier][2]['LINGUISTIC_TYPE_REF'] = "Gloss Child"

            # Add an ECV external reference
            eaf.add_external_ref(external_ref, "ecv", "http://applejack.science.ru.nl/asl-signbank/static/ecv/asl.ecv")

            # Add a Controlled Vocabulary
            eaf.add_controlled_vocabulary(ecv_name, external_ref)

            # Add the CV to linguistic types
            for lingtype in gloss_lingtypes:
                eaf.linguistic_types[lingtype]['CONTROLLED_VOCABULARY_REF'] = ecv_name

            eaf.to_file(self.output_dir + os.sep + os.path.basename(urlparse(file_name).path), pretty=True)
        except IOError:
            print("The EAF %s could not be processed." % file_name, file=sys.stderr)
            print(sys.exc_info()[0])
예제 #4
0
    def process_file(self, file_name):
        """
        Processes one file.

        :param file_name:
        :return:
        """

        gloss_append_lingtype = "gloss-append"

        try:
            eaf = Eaf(file_name)
            eaf.add_linguistic_type(gloss_append_lingtype,
                                    constraints="Symbolic_Association")
            gloss_tiers = self.find_gloss_tiers(eaf)
            self.add_gloss_tier_children(eaf, gloss_tiers,
                                         gloss_append_lingtype, file_name)
            eaf.to_file(self.output_dir + os.sep +
                        os.path.basename(urlparse(file_name).path),
                        pretty=True)
        except IOError:
            print("The EAF %s could not be processed." % file_name,
                  file=sys.stderr)
            print(sys.exc_info()[0])
예제 #5
0
def main():
    """
    File 1 has the utterance and utterance translation
    File 2 has the gloss
    File 3 is the destination
    """
    # Input files
    file_1 = 'input/file-1.eaf'
    file_2 = 'input/file-2.eaf'
    file_3 = 'input/new.eaf'

    # Tier names
    utterance_id_source_tier = "A_phrase-segnum-en"
    utterance_id_target_tier = "utterance_id"
    utterance_source_tier = "DDD_Transcription-txt-qaa-fonipa-x-eib"
    utterance_target_tier = "utterance"
    utterance_translation_source_tier = "DDD_Translation-gls-en"
    utterance_translation_target_tier = "utterance_translation"
    word_source_tier = "A_word-txt-qaa-fonipa-x-eib"
    word_target_tier = "grammatical_words"
    morph_source_tier = "A_morph-txt-qaa-fonipa-x-eib"
    gloss_source_tier = "A_morph-gls-en"
    gloss_target_tier = "gloss"

    # Set up the eaf objects
    eaf_1 = Eaf(file_1)
    eaf_2 = Eaf(file_2)
    eaf_3 = Eaf()

    # Remove default tier and copy media
    eaf_3.remove_tier("default")
    # eaf_3 = copy_media(eaf_1, eaf_3)
    """
    Copy annotation number tier from file 2
    tier-type default-lt
    <LINGUISTIC_TYPE GRAPHIC_REFERENCES="false" LINGUISTIC_TYPE_ID="default-lt" TIME_ALIGNABLE="true"/>
    """
    print("Copying annotation numbers from file 2")
    utterance_id_type_params = {
        'LINGUISTIC_TYPE_ID': 'default-lt',
        'TIME_ALIGNABLE': 'true'
    }
    utterance_id_tier_params = {
        'LINGUISTIC_TYPE_REF': 'default-lt',
        'TIER_ID': utterance_id_target_tier
    }
    _tier_copy(source_eaf=eaf_2,
               target_eaf=eaf_3,
               source_tier_name=utterance_id_source_tier,
               target_tier_name=utterance_id_target_tier,
               override_params=utterance_id_tier_params)
    """
    Copy utterance tier from file 1
    LINGUISTIC_TYPE_REF="Blank"
    <LINGUISTIC_TYPE CONSTRAINTS="Symbolic_Association" GRAPHIC_REFERENCES="false" LINGUISTIC_TYPE_ID="Blank" TIME_ALIGNABLE="false"/>
    """
    print("Copying utterance tier from file 1")
    blank_type_params = {
        'LINGUISTIC_TYPE_ID': 'Blank',
        'CONSTRAINTS': 'Symbolic_Association',
        'TIME_ALIGNABLE': 'false'
    }
    eaf_3.add_linguistic_type('Blank', param_dict=blank_type_params)
    utterance_tier_params = {
        'LINGUISTIC_TYPE_REF': 'Blank',
        'PARENT_REF': utterance_id_target_tier,
        'TIER_ID': utterance_target_tier
    }
    _tier_copy_to_ref(source_eaf=eaf_1,
                      target_eaf=eaf_3,
                      source_tier_name=utterance_source_tier,
                      target_tier_name=utterance_target_tier,
                      target_parent_tier_name=utterance_id_target_tier,
                      override_params=utterance_tier_params)
    """
    Copy utterance translation tier from file 1
    LINGUISTIC_TYPE_REF="Blank"
    <LINGUISTIC_TYPE CONSTRAINTS="Symbolic_Association" GRAPHIC_REFERENCES="false" LINGUISTIC_TYPE_ID="Blank" TIME_ALIGNABLE="false"/>
    <TIER LINGUISTIC_TYPE_REF="Blank" PARENT_REF="utterance" PARTICIPANT="DDD" TIER_ID="utterance_translation">    
    """
    print("Copying utterance translation tier from file 1")
    utterance_translation_tier_params = {
        'LINGUISTIC_TYPE_REF': 'Blank',
        'PARENT_REF': utterance_target_tier,
        'TIER_ID': utterance_translation_target_tier
    }
    _ref_tier_copy(source_eaf=eaf_1,
                   target_eaf=eaf_3,
                   source_tier_name=utterance_translation_source_tier,
                   target_tier_name=utterance_translation_target_tier,
                   target_parent_tier_name=utterance_target_tier,
                   override_params=utterance_translation_tier_params)
    """
    Copy the word tier from file 2
        <LINGUISTIC_TYPE CONSTRAINTS="Symbolic_Subdivision" GRAPHIC_REFERENCES="false" LINGUISTIC_TYPE_ID="word" TIME_ALIGNABLE="false"/>
        <TIER DEFAULT_LOCALE="qaa-fonipa-x-eib" LINGUISTIC_TYPE_REF="word" PARENT_REF="A_phrase-segnum-en" PARTICIPANT="DDD" TIER_ID="A_word-txt-qaa-fonipa-x-eib">

    """
    print("Copying word tier from file 2")
    word_type_params = {
        'LINGUISTIC_TYPE_ID': 'word',
        'CONSTRAINTS': 'Symbolic_Subdivision',
        'TIME_ALIGNABLE': 'false'
    }
    eaf_3.add_linguistic_type('word', param_dict=word_type_params)

    word_tier_params = {
        'LINGUISTIC_TYPE_REF': 'word',
        'PARENT_REF': utterance_target_tier,
        'TIER_ID': word_target_tier
    }

    _copy_symbolic_subdivision_tier(
        source_eaf=eaf_2,
        target_eaf=eaf_3,
        source_tier_name=word_source_tier,
        target_tier_name=word_target_tier,
        target_parent_tier_name=utterance_id_target_tier,
        override_params=word_tier_params)
    """
    Get all the annotations from -2 gloss tier (gloss_source_tier A_morph-gls-en)
    Join the glosses with "-" so there is a 1:1 match with word annotations
    <LINGUISTIC_TYPE CONSTRAINTS="Symbolic_Association" GRAPHIC_REFERENCES="false" LINGUISTIC_TYPE_ID="Blank" TIME_ALIGNABLE="false"/>
    <TIER LINGUISTIC_TYPE_REF="Blank" PARENT_REF="grammatical_words" TIER_ID="gloss">
    """
    print("Epic battle with words to get glosses from file 2")
    gloss_tier_params = {
        'LINGUISTIC_TYPE_REF': 'Blank',
        'PARENT_REF': word_target_tier,
        'TIER_ID': gloss_target_tier
    }
    # None of the pympi methods will suit this task, so let's do it manually.
    # Get all the data
    eaf_2_tiers = eaf_2.tiers
    eaf_2_timeslots = eaf_2.timeslots
    # A tier is of the form: {tier_name -> (aligned_annotations, reference_annotations, attributes, ordinal)},
    # Word and gloss tiers are ref_annotations, the second item in the tiers dict. See docs for more info about format.
    word_tier = eaf_2_tiers[word_source_tier][1]
    morph_tier = eaf_2_tiers[morph_source_tier][1]
    gloss_tier = eaf_2_tiers[gloss_source_tier][1]

    # Each reference annotation is of the form: [{id -> (reference, value, previous, svg_ref)}].
    # Start at the top of the hierarchy
    utterance_id_tier = eaf_2_tiers[utterance_id_source_tier][0]

    new_dict = dict()
    # For each utterance, get the words. For each word, get the glosses. Merge glosses for each word
    for utterance_id, utterance in utterance_id_tier.items():
        utt_start = eaf_2_timeslots[utterance[0]]
        utt_end = eaf_2_timeslots[utterance[1]]
        word_gloss: List[Union[int, List[str]]] = []
        for word_id, word in word_tier.items():
            if word[0] == utterance_id:
                glosses = []
                # Find morphs of this word...
                for morph_id, morph in morph_tier.items():
                    # ...by filtering on morph parents id matching the word id
                    if morph[0] == word_id:
                        for gloss_id, gloss in gloss_tier.items():
                            if gloss[0] == morph_id:
                                glosses.append(gloss[1])
                # Join glosses for this word with a dash
                word_gloss.append([word[1], '-'.join(glosses)])
        # Now, work out word duration (it is an even division of parent utterance duration)
        # Make this value the first item in the data list eg [word_duration, [word, gloss], [word, gloss], ...]
        num_segments = len(word_gloss)
        utt_dur = utt_end - utt_start
        word_dur = int(utt_dur / num_segments)
        word_gloss = [utt_start, word_dur] + word_gloss
        print("word gloss", word_gloss)
        new_dict[utterance_id] = word_gloss

    # Having worked all that out, now we can add a ref annotation tier.
    # but parent seems to now bubble all the way to the top.
    eaf_3.add_tier(gloss_target_tier,
                   ling='Blank',
                   parent=word_target_tier,
                   tier_dict=gloss_tier_params)
    # And some annotations
    for ann_id, annotation in new_dict.items():
        utt_start = annotation[0]
        word_dur = annotation[1]
        count = 0

        for ann in annotation[2:]:
            word_start = utt_start + word_dur * count
            id_tier = gloss_target_tier
            tier2 = word_target_tier
            value = ann[1]
            prev = None
            svg = None

            for aid, (ref_id, _value, _prev,
                      _) in eaf_3.tiers[tier2][1].items():
                if ann[0] == _value:
                    new_aid = eaf_3.generate_annotation_id()
                    eaf_3.tiers[id_tier][1][new_aid] = (aid, value, prev, svg)

            count = count + 1

    # Save the new file
    print("Saving object to file")
    eaf_3.to_file(file_3)