Beispiel #1
0
def test_derived_terms_pitaa():
    _defns, heads = parse_enwiktionary_page("pitää", read_data("pitaa"))
    found = 0
    for head in heads:
        if head["tag"] != "deriv":
            continue
        found += 1
    assert 23 <= found <= 27
Beispiel #2
0
def test_parse_min_results(entry):
    """
    Smoke test to check parsing returns a minimum number of definitions.
    """
    defns, _heads = parse_enwiktionary_page(entry, read_data(entry))

    got_senses = len(flat_roundtrip_senses(defns))
    min_senses = MIN_LENGTHS[entry]
    assert got_senses >= min_senses, "Needed {} senses for {} but got {}".format(
        min_senses, entry, got_senses
    )
Beispiel #3
0
def test_compound_fi(compound, subwords):
    defns, heads = parse_enwiktionary_page(compound, read_data(compound))
    found = 0
    for head in heads:
        if head["tag"] != "etymology-heading":
            continue
        assert head["ety_idx"] is None
        assert len(head["etys"]) == 1
        assert len(head["etys"][0]["bits"]) == len(subwords)
        for bit, subword in zip(head["etys"][0]["bits"], subwords):
            assert re.match(subword, bit["headword"])
        found += 1
    assert found == 1
Beispiel #4
0
def test_parse_no_exceptions(entry):
    parse_enwiktionary_page(entry, read_data(entry), skip_ety=True)
Beispiel #5
0
def test_gram_note_has_formatting():
    defns, heads = parse_enwiktionary_page("test", THING)
    assert "thing" in defns["Noun"][0].cleaned_defn
    assert "elative" not in defns["Noun"][0].cleaned_defn
Beispiel #6
0
def test_maki_not_gram_note():
    defns, heads = parse_enwiktionary_page("maki", read_data("maki"))
    assert (
        "a relatively large, usually rounded elevation of earth"
        in defns["Noun"][0].cleaned_defn
    )
Beispiel #7
0
def test_saattaa():
    defns, heads = parse_enwiktionary_page("saattaa", read_data("saattaa"))
    verb_4_1 = defns["Verb"][3].subsenses[0].cleaned_defn
    assert "might" in verb_4_1
    assert "do, probably do" in verb_4_1
Beispiel #8
0
def test_pitaa_gram_rm():
    defns, _heads = parse_enwiktionary_page("pitaa", read_data("pitaa"))
    to_like_defn = defns["Verb"][2]
    assert "like" in to_like_defn.cleaned_defn
    assert "elative" not in to_like_defn.cleaned_defn
Beispiel #9
0
def test_vuotta_head_gram():
    defns, _heads = parse_enwiktionary_page("vuotta", read_data("vuotta"))
    ety1_form = defns["Etymology 1"]["Noun"][0].morph
    assert ety1_form and ety1_form["case"] == "abessive"
    ety2_form = defns["Etymology 2"]["Noun"][0].morph
    assert ety2_form and ety2_form["case"] == "partitive"
Beispiel #10
0
def parse_file(filename):
    defns = parse_enwiktionary_page(filename, filename.read())
    if defns is None:
        print("No definitions found")
    pprint(defns)