def __init__( self, result: types.Result, *, search_run: core.SearchRun, display_mode="community", animate_emoji=AnimateEmoji.default, show_emoji=ShowEmoji.default, dict_source=None, ): self._result = result self._search_run = search_run self._relabeller = { "english": read_labels().english, "linguistic": read_labels().linguistic_long, "source_language": read_labels().source_language, }.get(display_mode, DisplayMode.default) self._animate_emoji = animate_emoji self._show_emoji = show_emoji self.wordform = result.wordform self.lemma_wordform = result.lemma_wordform self.is_lemma = result.is_lemma self.source_language_match = result.source_language_match self.dict_source = dict_source if settings.MORPHODICT_TAG_STYLE == "Plus": ( self.linguistic_breakdown_head, _, self.linguistic_breakdown_tail, ) = result.wordform.analysis or [[], None, []] elif settings.MORPHODICT_TAG_STYLE == "Bracket": # Arapaho has some head tags that the Plus-style FSTs put at the # tail. For now, move them; later on elaboration could be a # language-specific function. head, _, tail = result.wordform.analysis or [[], None, []] new_head = [] new_tail_prefix = [] for i, tag in enumerate(head): if tag.startswith("["): new_tail_prefix.append(tag) else: new_head.append(tag) self.linguistic_breakdown_head = new_head self.linguistic_breakdown_tail = new_tail_prefix + list(tail) else: raise Exception(f"Unknown {settings.MORPHODICT_TAG_STYLE=}") self.lexical_info = get_lexical_info(result.wordform.analysis, animate_emoji, self._show_emoji, self.dict_source) if rich_analysis := result.wordform.analysis: self.morphemes = rich_analysis.generate_with_morphemes( result.wordform.text)
def relabel_linguistic_long(pos: str): """ Should take in a class and return the plain english labelling for it So if I pass in "VTA-1", I should get back: transitive animate verb – class 1: regular """ return read_labels().linguistic_long.get(pos)
def relabel_linguistic_short(pos: str): """ Should take in a class and return the plain english labelling for it So if I pass in "VTA-1", I should get back: VTA-1 """ return read_labels().linguistic_short.get(pos)
def relabel_source(pos: str): """ Should take in a class and return the plain english labelling for it So if I pass in "VTA-1", I should get back: tâpiskôc: wîcihêw """ return read_labels().source_language.get(pos)
def relabel_plain_english(pos: str): """ Should take in a class and return the plain english labelling for it So if I pass in "VTA-1", I should get back: like: wîcihêw """ return read_labels().english.get(pos)
def get_emoji_for_cree_wordclass( word_class: Optional[str], animate_emoji: str = AnimateEmoji.default) -> Optional[str]: """ Attempts to get an emoji description of the full wordclass. e.g., "👤👵🏽" for "nôhkom" """ if word_class is None: return None def to_fst_output_style(value): if value[0] == "N": return list(value.upper()) elif value[0] == "V": return ["V", value[1:].upper()] else: return [value.title()] tags = to_fst_output_style(word_class) original = read_labels().emoji.get_longest(tags) ret = original if original: ret = use_preferred_animate_emoji(original, animate_emoji) return ret
def label_setting_to_relabeller(label_setting: str): labels = read_labels() return { "english": labels.english, "linguistic": labels.linguistic_short, "source_language": labels.source_language, }.get(label_setting, labels.english)
def serialize_wordform(wordform: Wordform, animate_emoji: str, dict_source: list) -> SerializedWordform: """ Intended to be passed in a JSON API or into templates. :return: json parsable result """ result = model_to_dict(wordform) result["definitions"] = serialize_definitions(wordform.definitions.all(), dict_source=dict_source) result["lemma_url"] = wordform.get_absolute_url() if wordform.linguist_info: if inflectional_category := wordform.linguist_info.get( "inflectional_category", None): result.update({ "inflectional_category_plain_english": read_labels().english.get(inflectional_category), "inflectional_category_linguistic": read_labels().linguistic_long.get(inflectional_category), }) if wordclass := wordform.linguist_info.get("wordclass"): result["wordclass_emoji"] = get_emoji_for_cree_wordclass( wordclass, animate_emoji)
def __init__( self, result: types.Result, *, search_run: core.SearchRun, display_mode="community", animate_emoji=AnimateEmoji.default, dict_source=None, ): self._result = result self._search_run = search_run self._relabeller = { "community": read_labels().english, "linguistic": read_labels().linguistic_long, }.get(display_mode, DisplayMode.default) self._animate_emoji = animate_emoji self.wordform = result.wordform self.lemma_wordform = result.lemma_wordform self.is_lemma = result.is_lemma self.source_language_match = result.source_language_match self.dict_source = dict_source if settings.MORPHODICT_TAG_STYLE == "Plus": ( self.linguistic_breakdown_head, _, self.linguistic_breakdown_tail, ) = result.wordform.analysis or [[], None, []] elif settings.MORPHODICT_TAG_STYLE == "Bracket": # Arapaho has some head tags that the Plus-style FSTs put at the # tail. For now, move them; later on elaboration could be a # language-specific function. head, _, tail = result.wordform.analysis or [[], None, []] new_head = [] new_tail_prefix = [] for i, tag in enumerate(head): if tag.startswith("["): new_tail_prefix.append(tag) else: new_head.append(tag) self.linguistic_breakdown_head = new_head self.linguistic_breakdown_tail = new_tail_prefix + list(tail) else: raise Exception(f"Unknown {settings.MORPHODICT_TAG_STYLE=}") self.lexical_info = get_lexical_info(result.wordform.analysis, animate_emoji, self.dict_source) self.preverbs = [ lexical_entry["entry"] for lexical_entry in self.lexical_info if lexical_entry["type"] == "Preverb" ] self.reduplication = [ lexical_entry["entry"] for lexical_entry in self.lexical_info if lexical_entry["type"] == "Reduplication" ] self.friendly_linguistic_breakdown_head = replace_user_friendly_tags( to_list_of_fst_tags(self.linguistic_breakdown_head)) self.friendly_linguistic_breakdown_tail = replace_user_friendly_tags( to_list_of_fst_tags(self.linguistic_breakdown_tail))
def get_lexical_info(result_analysis: RichAnalysis, animate_emoji: str, dict_source: list) -> List[Dict]: if not result_analysis: return [] result_analysis_tags = result_analysis.prefix_tags first_letters = extract_first_letters(result_analysis) lexical_info: List[Dict] = [] for (i, tag) in enumerate(result_analysis_tags): preverb_result: Optional[Preverb] = None reduplication_string: Optional[str] = None _type: Optional[LexicalEntryType] = None entry: Optional[_ReduplicationResult | SerializedWordform | _InitialChangeResult] = None if tag in ["RdplW+", "RdplS+"]: reduplication_string = generate_reduplication_string( tag, first_letters[i + 1]) elif tag == "IC+": change_types = get_initial_change_types() _type = "Initial Change" entry = _InitialChangeResult(text=" ", definitions=change_types).serialize() elif tag.startswith("PV/"): # use altlabel.tsv to figure out the preverb # ling_short looks like: "Preverb: âpihci-" ling_short = read_labels().linguistic_short.get( cast(FSTTag, tag.rstrip("+"))) if ling_short: # convert to "âpihci" by dropping prefix and last character normative_preverb_text = ling_short[len("Preverb: "):] preverb_results = Wordform.objects.filter( text=normative_preverb_text, raw_analysis__isnull=True) # find the one that looks the most similar if preverb_results: preverb_result = min( preverb_results, key=lambda pr: get_modified_distance( normative_preverb_text, pr.text.strip("-"), ), ) else: # Can't find a match for the preverb in the database. # This happens when searching against the test database for # ê-kî-nitawi-kâh-kîmôci-kotiskâwêyâhk, as the test database # lacks lacks ê and kî. preverb_result = Wordform(text=normative_preverb_text, is_lemma=True) if reduplication_string is not None: entry = _ReduplicationResult( text=reduplication_string, definitions=[{ "text": "Strong reduplication: intermittent, repeatedly, iteratively; again and again; here and there" if tag == "RdplS+" else "Weak Reduplication: ongoing, continuing" }], ).serialize() _type = "Reduplication" if preverb_result is not None: entry = serialize_wordform(preverb_result, animate_emoji, dict_source) _type = "Preverb" if entry and _type: result = _LexicalEntry(entry=entry, type=_type, original_tag=tag) lexical_info.append(serialize_lexical_entry(result)) return lexical_info
def replace_user_friendly_tags(fst_tags: List[FSTTag]) -> List[Label]: """replace fst-tags to cute ones""" return read_labels().english.get_full_relabelling(fst_tags)