def populate_wordform_definitions(self, wf, senses):
        should_do_translation = self.translate_wordforms

        if should_do_translation:
            has_analysis_and_paradigm = (
                (wf.analysis and wf.paradigm)
                if not settings.MORPHODICT_ENABLE_FST_LEMMA_SUPPORT else
                (wf.fst_lemma and wf.paradigm))

            if not has_analysis_and_paradigm:
                should_do_translation = False

        definitions_and_sources = self.create_definitions(wf, senses)

        if not should_do_translation:
            return

        lemma_text = (wf.text
                      if not settings.MORPHODICT_ENABLE_FST_LEMMA_SUPPORT else
                      wf.fst_lemma)

        for (
                prefix_tags,
                suffix_tags,
        ) in self.paradigm_manager.all_analysis_template_tags(wf.paradigm):
            analysis = RichAnalysis((prefix_tags, lemma_text, suffix_tags))
            for generated in strict_generator().lookup(analysis.smushed()):
                # Skip re-instantiating lemma
                if analysis == wf.analysis:
                    continue

                inflected_wordform = Wordform(
                    # For now, leaving paradigm and linguist_info empty;
                    # code can get that info from the lemma instead.
                    text=generated,
                    raw_analysis=analysis.tuple,
                    lemma=wf,
                    is_lemma=False,
                )

                for d, sources in definitions_and_sources:

                    translation = translate_single_definition(
                        inflected_wordform, d.text, self.translation_stats)
                    if translation is None:
                        continue

                    is_inflected_wordform_unsaved = inflected_wordform.id is None
                    if is_inflected_wordform_unsaved:
                        self.wordform_buffer.add(inflected_wordform)

                    self._add_definition(
                        inflected_wordform,
                        translation,
                        ("🤖" + source for source in sources),
                        auto_translation_source=d,
                    )
def test_fst_generation():
    wordforms = set(strict_generator().lookup("PV/ta+PV/pe+kîwêmakan+V+II+Ind+4Sg"))
    assert "ta-pê-kîwêmakaniyiw" in wordforms
Beispiel #3
0
def fetch_results(search_run: core.SearchRun):
    fetch_results_from_target_language_keywords(search_run)
    fetch_results_from_source_language_keywords(search_run)

    # Use the spelling relaxation to try to decipher the query
    #   e.g., "atchakosuk" becomes "acâhkos+N+A+Pl" --
    #         thus, we can match "acâhkos" in the dictionary!
    fst_analyses = set(rich_analyze_relaxed(search_run.internal_query))
    print([a.tuple for a in fst_analyses])

    db_matches = list(
        Wordform.objects.filter(
            raw_analysis__in=[a.tuple for a in fst_analyses]))

    for wf in db_matches:
        search_run.add_result(
            Result(
                wf,
                source_language_match=wf.text,
                query_wordform_edit_distance=get_modified_distance(
                    wf.text, search_run.internal_query),
            ))

        # An exact match here means we’re done with this analysis.
        fst_analyses.discard(wf.analysis)

    # fst_analyses has now been thinned by calls to `fst_analyses.remove()`
    # above; remaining items are analyses which are not in the database,
    # although their lemmas should be.
    for analysis in fst_analyses:
        # When the user query is outside of paradigm tables
        # e.g. mad preverb and reduplication: ê-mâh-misi-nâh-nôcihikocik
        # e.g. Initial change: nêpât: {'IC+nipâw+V+AI+Cnj+3Sg'}

        normatized_form_for_analysis = strict_generator().lookup(
            analysis.smushed())
        if len(normatized_form_for_analysis) == 0:
            logger.error(
                "Cannot generate normative form for analysis: %s (query: %s)",
                analysis,
                search_run.internal_query,
            )
            continue

        # If there are multiple forms for this analysis, use the one that is
        # closest to what the user typed.
        normatized_user_query = min(
            normatized_form_for_analysis,
            key=lambda f: get_modified_distance(f, search_run.internal_query),
        )

        possible_lemma_wordforms = best_lemma_matches(
            analysis,
            Wordform.objects.filter(text=analysis.lemma, is_lemma=True))

        for lemma_wordform in possible_lemma_wordforms:
            synthetic_wordform = Wordform(
                text=normatized_user_query,
                raw_analysis=analysis.tuple,
                lemma=lemma_wordform,
            )
            search_run.add_result(
                Result(
                    synthetic_wordform,
                    analyzable_inflection_match=True,
                    query_wordform_edit_distance=get_modified_distance(
                        search_run.internal_query,
                        normatized_user_query,
                    ),
                ))
def test_generate_non_word():
    assert [] == list(strict_generator().lookup("pîpîpôpô+Ipc"))
def test_generate(analysis, wordform):
    """
    Simple test of generating wordforms.
    """
    assert wordform in list(strict_generator().lookup(analysis))