Example #1
0
def test_search_for_exact_lemma(lemma: Wordform):
    """
    Check that we get a search result that matches the exact query.
    """

    assert lemma.is_lemma
    lemma_from_analysis, _, _ = lemma.analysis.partition("+")
    assert all(c == c.lower() for c in lemma_from_analysis)
    assume(lemma.text == lemma_from_analysis)

    query = lemma.text
    search_results = Wordform.search(query)

    exact_matches = {
        result
        for result in search_results
        if result.is_lemma and result.lemma_wordform == lemma
    }
    assert len(exact_matches) == 1

    # Let's look at that search result in more detail
    exact_match = exact_matches.pop()
    assert exact_match.matched_cree == lemma.text
    assert not exact_match.preverbs
    assert not exact_match.reduplication_tags
    assert not exact_match.initial_change_tags
Example #2
0
def test_lemma_and_syncretic_form_ranking(lemma):
    """
    Tests that the lemma is always shown first, even when a search yields
    one or more forms that are syncretic with the lemma; That is, ensure THIS
    doesn't happen:

        sheep [Plural]
        form of sheep [Singular]

        (no definition found for sheep [Plural])

        sheep [Singular]
        1. a fluffy mammal that appears in dreams

    Note: this test is likely to be **FLAKY** if the implementation is buggy
    and uses a **non-stable** sort or comparison.
    """

    results = Wordform.search(lemma)
    assert len(results) >= 2
    maskwa_results = [
        res for res in results if res.lemma_wordform.text == lemma
    ]
    assert len(maskwa_results) >= 2
    assert any(res.is_lemma for res in maskwa_results)
    first_result = maskwa_results[0]
    assert first_result.is_lemma, f"unexpected first result: {first_result}"
Example #3
0
def test_search_for_stored_non_lemma():
    """
    A "stored non-lemma" is a wordform in the database that is NOT a lemma.
    """
    # "S/he would tell us stories."
    lemma_str = "âcimêw"
    query = "ê-kî-âcimikoyâhk"
    search_results = Wordform.search(query)

    assert len(search_results) >= 1

    exact_matches = [
        result for result in search_results if result.matched_cree == query
    ]
    assert len(exact_matches) >= 1

    # Let's look at that search result in more detail
    result = exact_matches[0]
    assert not result.is_lemma
    assert result.lemma_wordform.text == lemma_str
    # todo: tags are not implemented
    # assert not result.preverbs
    # assert not result.reduplication_tags
    # assert not result.initial_change_tags
    assert len(result.lemma_wordform.definitions.all()) >= 1
    assert all(
        len(dfn.source_ids) >= 1
        for dfn in result.lemma_wordform.definitions.all())
def index(request):  # pragma: no cover
    """
    homepage with optional initial search results to display

    :param request:
    :param query_string: optional initial search results to display
    :return:
    """

    user_query = request.GET.get("q", None)

    if user_query:
        search_results = [
            search_result.serialize() for search_result in Wordform.search(user_query)
        ]
        did_search = True
    else:
        search_results = []
        did_search = False

    context = {
        "word_search_form": WordSearchForm(),
        # when we have initial query word to search and display
        "query_string": user_query,
        "search_results": search_results,
        "did_search": did_search,
    }
    return HttpResponse(render(request, "CreeDictionary/index.html", context))
Example #5
0
def test_search_for_pronoun() -> None:
    """
    Search for a common pronoun "ôma". Make sure "oma" returns at least one
    result that says "ôma"
    """

    search_results = Wordform.search("oma")
    assert "ôma" in {res.matched_cree for res in search_results}
Example #6
0
def test_search_for_english() -> None:
    """
    Search for a word that is definitely in English.
    """

    # This should match "âcimowin" and related words:
    search_results = Wordform.search("story")

    assert search_results[0].matched_by == Language.ENGLISH
Example #7
0
def test_search_words_with_preverbs():
    """
    preverbs should be extracted and present in SearchResult instances
    """
    results = Wordform.search("nitawi-nipâw")
    assert len(results) == 1
    search_result = results.pop()

    assert len(search_result.preverbs) == 1
    assert search_result.preverbs[0].text == "nitawi-"
Example #8
0
def test_when_linguistic_breakdown_absent():
    # pê- is a preverb
    # it's not analyzable by the fst and should not have a linguistic breakdown

    query = "pe-"
    search_results = Wordform.search(query)

    assert len(search_results) == 1

    result = search_results[0]
    assert (result.linguistic_breakdown_head == ()
            and result.linguistic_breakdown_tail == ())
Example #9
0
def test_search_text_with_ambiguous_word_classes():
    """
    Results of all word classes should be searched when the query is ambiguous
    """
    # pipon can be viewed as a Verb as well as a Noun
    results = Wordform.search("pipon")
    assert {
        r.lemma_wordform.pos
        for r in results if r.matched_cree == "pipon"
    } == {
        "N",
        "V",
    }
def search_results(request, query_string: str):  # pragma: no cover
    """
    returns rendered boxes of search results according to user query
    """
    results = Wordform.search(query_string)
    return render(
        request,
        "CreeDictionary/word-entries.html",
        {
            "query_string": query_string,
            "search_results": [r.serialize() for r in results],
        },
    )
Example #11
0
def test_search_serialization_json_parsable(query):
    """
    Test SearchResult.serialize produces json compatible results
    """
    results = Wordform.search(query)
    for result in results:

        serialized = result.serialize()
        try:
            json.dumps(serialized)
        except Exception as e:
            print(e)
            pytest.fail(
                "SearchResult.serialized method failed to be json compatible")
Example #12
0
def test_lemma_ranking_most_frequent_word():
    # the English sleep should many cree words. But nipâw should show first because
    # it undoubtedly has the highest frequency
    results = Wordform.search("sleep")
    assert results[0].matched_cree == "nipâw"