예제 #1
0
 def infer_pos(cls, pos, pos_dict):
     if cls.is_suffix(pos):
         return 'sfx'
     elif cls.is_prefix(pos):
         return 'pfx'
     else:
         pos = ToolboxMorphemeCleaner.clean(pos)
         return pos_dict.get(pos, '')
예제 #2
0
 def map(cls, pos, ud=False):
     if pos.startswith('-'):
         return 'sfx'
     elif pos.endswith('-'):
         return 'pfx'
     else:
         pos = ToolboxMorphemeCleaner.clean(pos)
         if ud:
             return cls.pos_ud_dict.get(pos, '')
         else:
             return cls.pos_dict.get(pos, '')
예제 #3
0
    def infer_gloss(cls, gloss):
        gloss = ToolboxMorphemeCleaner.clean(gloss)

        gloss = cls.replace_colons(gloss)
        gloss = cls.remove_bt_tp(gloss)
        gloss = cls.replace_many_to_one(gloss)

        number_person_rgx = re.compile(r'[1-3/]+(SG|DU|PL)')

        # case 1: direct mapping
        if gloss in cls.gloss_dict:
            return cls.gloss_dict[gloss]

        # case 2: number-person combinations
        if number_person_rgx.fullmatch(gloss):
            return gloss

        # case 3: lexical gloss
        if gloss.islower():
            return ''

        # case 4: NER
        if gloss in ['PERSON', 'PLACE', 'TRIBE']:
            return ''

        # case 5: multi-category morpheme
        if '.' in gloss:
            categories = gloss.split('.')

            mapped_categories = []

            for category in categories:
                if number_person_rgx.fullmatch(category):
                    mapped_category = category
                elif category.islower():
                    return ''
                else:
                    mapped_category = cls.gloss_dict.get(category, '???')

                mapped_categories.append(mapped_category)

            return '.'.join(mapped_categories)

        # other
        return ''
예제 #4
0
파일: cleaner.py 프로젝트: acqdiv/acqdiv
    def clean_id(cls, id_):
        id_ = ToolboxMorphemeCleaner.remove_morpheme_delimiters(id_)
        id_ = cls.unify_unknown_morpheme(id_)

        return id_
예제 #5
0
파일: cleaner.py 프로젝트: acqdiv/acqdiv
 def clean_morpheme(cls, morpheme):
     """Clean the morpheme."""
     return ToolboxMorphemeCleaner.remove_morpheme_delimiters(morpheme)
예제 #6
0
파일: pos_mapper.py 프로젝트: acqdiv/acqdiv
 def clean_pos(cls, pos):
     pos = cls.unify_unknowns_morpheme(pos)
     pos = ToolboxMorphemeCleaner.remove_morpheme_delimiters(pos)
     return pos
예제 #7
0
 def map(cls, gloss):
     gloss = ToolboxMorphemeCleaner.remove_morpheme_delimiters(gloss)
     return cls.gloss_dict.get(gloss, '')
예제 #8
0
 def map(cls, gloss):
     gloss = ToolboxMorphemeCleaner.remove_morpheme_delimiters(gloss)
     return cls.infer_gloss(gloss)