def infer_pos(cls, pos, pos_dict): if cls.is_suffix(pos): return 'sfx' elif cls.is_prefix(pos): return 'pfx' else: pos = ToolboxMorphemeCleaner.clean(pos) return pos_dict.get(pos, '')
def map(cls, pos, ud=False): if pos.startswith('-'): return 'sfx' elif pos.endswith('-'): return 'pfx' else: pos = ToolboxMorphemeCleaner.clean(pos) if ud: return cls.pos_ud_dict.get(pos, '') else: return cls.pos_dict.get(pos, '')
def infer_gloss(cls, gloss): gloss = ToolboxMorphemeCleaner.clean(gloss) gloss = cls.replace_colons(gloss) gloss = cls.remove_bt_tp(gloss) gloss = cls.replace_many_to_one(gloss) number_person_rgx = re.compile(r'[1-3/]+(SG|DU|PL)') # case 1: direct mapping if gloss in cls.gloss_dict: return cls.gloss_dict[gloss] # case 2: number-person combinations if number_person_rgx.fullmatch(gloss): return gloss # case 3: lexical gloss if gloss.islower(): return '' # case 4: NER if gloss in ['PERSON', 'PLACE', 'TRIBE']: return '' # case 5: multi-category morpheme if '.' in gloss: categories = gloss.split('.') mapped_categories = [] for category in categories: if number_person_rgx.fullmatch(category): mapped_category = category elif category.islower(): return '' else: mapped_category = cls.gloss_dict.get(category, '???') mapped_categories.append(mapped_category) return '.'.join(mapped_categories) # other return ''
def clean_id(cls, id_): id_ = ToolboxMorphemeCleaner.remove_morpheme_delimiters(id_) id_ = cls.unify_unknown_morpheme(id_) return id_
def clean_morpheme(cls, morpheme): """Clean the morpheme.""" return ToolboxMorphemeCleaner.remove_morpheme_delimiters(morpheme)
def clean_pos(cls, pos): pos = cls.unify_unknowns_morpheme(pos) pos = ToolboxMorphemeCleaner.remove_morpheme_delimiters(pos) return pos
def map(cls, gloss): gloss = ToolboxMorphemeCleaner.remove_morpheme_delimiters(gloss) return cls.gloss_dict.get(gloss, '')
def map(cls, gloss): gloss = ToolboxMorphemeCleaner.remove_morpheme_delimiters(gloss) return cls.infer_gloss(gloss)