Python SearchRun Examples

Programming Language: Python

Namespace/Package Name: CreeDictionary.API.search.core

Class/Type: SearchRun

Examples at hotexamples.com: 6

Python SearchRun - 6 examples found. These are the top rated real world Python examples of CreeDictionary.API.search.core.SearchRun extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

SearchRun(3)

add_result(3)

add_verbose_message(2)

unsorted_results(2)

sort_function(1)

Example #1

Show file

File: cvd_search.py Project: UAlbertaALTLab/cree-intelligent-dictionary

def do_cvd_search(search_run: SearchRun):
    """Use cosine vector distance to add results to the search run.

    Keywords from the query string are turned into vectors from Google News,
    added together, and then compared against pre-computed definition vectors.
    """
    keys = extract_keyed_words(search_run.query.query_string,
                               google_news_vectors())
    if not keys:
        return

    search_run.add_verbose_message(cvd_extracted_keys=keys)
    query_vector = vector_for_keys(google_news_vectors(), keys)

    try:
        closest = definition_vectors().similar_by_vector(query_vector, 50)
    except DefinitionVectorsNotFoundException:
        logger.exception("")
        return

    wordform_queries = [
        cvd_key_to_wordform_query(similarity) for similarity, weight in closest
    ]
    similarities = [similarity for cvd_key, similarity in closest]

    # Get all possible wordforms in one big query. We will select more than we
    # need, then filter it down later, but this will have to do until we get
    # better homonym handling.
    wordform_results = Wordform.objects.filter(text__in=set(
        wf["text"] for wf in wordform_queries))

    # Now match back up
    wordforms_by_text = {
        text: list(wordforms)
        for text, wordforms in itertools.groupby(wordform_results,
                                                 key=lambda x: x.text)
    }

    for similarity, wordform_query in zip(similarities, wordform_queries):
        # gensim uses the terminology, similarity = 1 - distance. Its
        # similarity is a number from 0 to 1, with more similar items having
        # similarity closer to 1. A distance should be small for things that
        # are close together.
        distance = 1 - similarity

        wordforms_for_query = wordforms_by_text.get(wordform_query["text"],
                                                    None)
        if wordforms_for_query is None:
            logger.warning(
                f"Wordform {wordform_query['text']} not found in CVD; mismatch between definition vector model file and definitions in database?"
            )
        else:
            for wf in wordforms_for_query:
                if wordform_query_matches(wordform_query, wf):
                    search_run.add_result(
                        Result(wf, cosine_vector_distance=distance))

Example #2

Show file

File: test_espt_crk.py Project: UAlbertaALTLab/cree-intelligent-dictionary

def test_espt_search_doesnt_crash_when_no_analysis(db):
    search_run = SearchRun("my little bears")
    espt_search = EsptSearch(search_run)
    espt_search.analyze_query()

    wordform = Wordform(text="pê-")
    wordform.lemma = wordform
    wordform.is_lemma = True
    search_run.add_result(
        Result(wordform=wordform, target_language_keyword_match=["bear"]))

    # This will crash if the espt code doesn’t handle results without an analysis
    espt_search.inflect_search_results()

Example #3

Show file

def search(*,
           query: str,
           include_affixes=True,
           include_auto_definitions=False) -> SearchRun:
    """
    Perform an actual search, using the provided options.

    This class encapsulates the logic of which search methods to try, and in
    which order, to build up results in a SearchRun.
    """
    search_run = SearchRun(query=query,
                           include_auto_definitions=include_auto_definitions)

    if search_run.query.espt:
        espt_search = EsptSearch(search_run)
        espt_search.analyze_query()

    if settings.MORPHODICT_ENABLE_CVD:
        cvd_search_type = cast_away_optional(
            first_non_none_value(search_run.query.cvd,
                                 default=CvdSearchType.DEFAULT))

        # For when you type 'cvd:exclusive' in a query to debug ONLY CVD results!
        if cvd_search_type == CvdSearchType.EXCLUSIVE:

            def sort_by_cvd(r: Result):
                return r.cosine_vector_distance

            search_run.sort_function = sort_by_cvd
            do_cvd_search(search_run)
            return search_run

    fetch_results(search_run)

    if (settings.MORPHODICT_ENABLE_AFFIX_SEARCH and include_affixes
            and not query_would_return_too_many_results(
                search_run.internal_query)):
        do_source_language_affix_search(search_run)
        do_target_language_affix_search(search_run)

    if settings.MORPHODICT_ENABLE_CVD:
        if cvd_search_type.should_do_search(
        ) and not is_almost_certainly_cree(search_run):
            do_cvd_search(search_run)

    if search_run.query.espt:
        espt_search.inflect_search_results()

    return search_run

Example #4

Show file

def find_pos_matches(search_run: SearchRun) -> None:

    analyzed_query = AnalyzedQuery(search_run.internal_query)
    # print(search_run.verbose_messages["new_tags"])

    if len(search_run.verbose_messages) <= 1:
        return
    tags = search_run.verbose_messages[1].get("tags")
    [pos_match(result, tags) for result in search_run.unsorted_results()]

Example #5

Show file

def is_almost_certainly_cree(search_run: SearchRun) -> bool:
    """
    Heuristics intended to AVOID doing an English search.
    """
    query = search_run.query

    # If there is a word with two or more dashes in it, it's probably Cree:
    if any(term.count("-") >= 2 for term in query.query_terms):
        search_run.add_verbose_message(
            "Skipping CVD because query has too many hyphens"
        )
        return True

    if CREE_LONG_VOWEL.search(query.query_string):
        search_run.add_verbose_message("Skipping CVD because query has Cree diacritics")
        return True

    return False

Example #6

Show file

File: test_espt_crk.py Project: UAlbertaALTLab/cree-intelligent-dictionary

def test_espt_search(db, search, params):
    search_run = SearchRun(search)
    espt_search = EsptSearch(search_run)
    espt_search.analyze_query()
    assert search_run.query.query_terms == params["expected_query_terms"]
    assert search_run.query.query_string == " ".join(
        params["expected_query_terms"])
    assert espt_search.new_tags == params["expected_new_tags"]

    lemma1 = Wordform.objects.get(slug=params["slug"], is_lemma=True)

    search_run.add_result(
        Result(
            wordform=lemma1,
            target_language_keyword_match=params["expected_query_terms"],
        ))

    espt_search.inflect_search_results()

    assert params["expected_inflection"] in [
        entry.wordform.text for entry in list(search_run.unsorted_results())
    ]