Esempio n. 1
0
def test_generation(test_file, lexicon_file):

    lexicon = Lexicon(lexicon_file)

    counter = Counter()

    with open(test_file) as f:
        for test in yaml.load(f):
            lemma = test.pop("lemma")
            location = test.pop("location", "")
            for parse, form in test.items():
                predicted = lexicon.generate(lemma, parse, context=location)
                if predicted is None:
                    counter.fail("didn't know how to work out {} {} {}".format(lemma, parse, form))
                elif strip_length(form) == strip_length(predicted):
                    counter.success()
                    continue
                elif strip_length(form) not in [strip_length(p) for p in predicted.split("/")]:
                    counter.fail("{} {} got {} instead of {} in {}".format(lemma, parse, predicted, form, location))
                else:
                    counter.skip("{} {} {} {} {}".format(lemma, parse, form, predicted, location))

    counter.results()
Esempio n. 2
0
#!/usr/bin/env python3

from pysblgnt import morphgnt_rows

from characters import strip_length

from utils import Counter

from verbs import Lexicon

lexicon = Lexicon("lexicons/morphgnt.yaml")

counter = Counter()

for book_num in range(1, 28):
    for row in morphgnt_rows(book_num):
        ccat_pos = row["ccat-pos"]
        ccat_parse = row["ccat-parse"]
        norm = row["norm"]
        lemma = row["lemma"]
        if ccat_pos != "V-":
            continue

        if ccat_parse[3] == "N":
            parse = ccat_parse[1:4]
        elif ccat_parse[3] == "P":
            parse = ccat_parse[1:4] + "." + ccat_parse[4:7]
        elif ccat_parse[3] == "I":
            parse = ccat_parse[1:4] + "." + ccat_parse[0] + ccat_parse[5]
        else:
            continue
            words[lemma]["present.mp1.actual"].add(norm)

        if ccat_parse[1:4] == "PPN":
            words[lemma]["present.mp1.actual"].add(norm)

        if ccat_parse[1:4] == "AAN":
            words[lemma]["aorist.act.actual"].add(norm)

        if ccat_parse[1:4] == "AMN":
            words[lemma]["aorist.mp1.actual"].add(norm)

        if ccat_parse[1:4] == "APN":
            words[lemma]["aorist.mp2.actual"].add(norm)


lexicon = Lexicon("lexicons/morphgnt.yaml")

for k in sorted(words.keys(), key=collator.sort_key):
    PAN_generated = lexicon.generate(k, "PAN")
    PMN_generated = lexicon.generate(k, "PMN")
    PPN_generated = lexicon.generate(k, "PPN")
    AAN_generated = lexicon.generate(k, "AAN")
    AMN_generated = lexicon.generate(k, "AMN")
    APN_generated = lexicon.generate(k, "APN")

    if PAN_generated:
        if "present.act.actual" not in words[k]:
            words[k]["present.act.generated"].add(strip_length(PAN_generated))
    if PMN_generated:
        if "present.mp1.actual" not in words[k]:
            words[k]["present.mp1.generated"].add(strip_length(PMN_generated))
Esempio n. 4
0
        if ccat_parse[1:4] == "PMN":
            words[lemma]["present.mp1.actual"].add(norm)

        if ccat_parse[1:4] == "PPN":
            words[lemma]["present.mp1.actual"].add(norm)

        if ccat_parse[1:4] == "AAN":
            words[lemma]["aorist.act.actual"].add(norm)

        if ccat_parse[1:4] == "AMN":
            words[lemma]["aorist.mp1.actual"].add(norm)

        if ccat_parse[1:4] == "APN":
            words[lemma]["aorist.mp2.actual"].add(norm)

lexicon = Lexicon("lexicons/morphgnt.yaml")

for k in sorted(words.keys(), key=collator.sort_key):
    PAN_generated = lexicon.generate(k, "PAN")
    PMN_generated = lexicon.generate(k, "PMN")
    PPN_generated = lexicon.generate(k, "PPN")
    AAN_generated = lexicon.generate(k, "AAN")
    AMN_generated = lexicon.generate(k, "AMN")
    APN_generated = lexicon.generate(k, "APN")

    if PAN_generated:
        if "present.act.actual" not in words[k]:
            words[k]["present.act.generated"].add(strip_length(PAN_generated))
    if PMN_generated:
        if "present.mp1.actual" not in words[k]:
            words[k]["present.mp1.generated"].add(strip_length(PMN_generated))
#!/usr/bin/env python3

from pysblgnt import morphgnt_rows

from characters import strip_length

from utils import Counter

from verbs import Lexicon


lexicon = Lexicon("lexicons/morphgnt.yaml")

counter = Counter()

for book_num in range(1, 28):
    for row in morphgnt_rows(book_num):
        ccat_pos = row["ccat-pos"]
        ccat_parse = row["ccat-parse"]
        norm = row["norm"]
        lemma = row["lemma"]
        if ccat_pos != "V-":
            continue

        if ccat_parse[3] == "N":
            parse = ccat_parse[1:4]
        elif ccat_parse[3] == "P":
            parse = ccat_parse[1:4] + "." + ccat_parse[4:7]
        elif ccat_parse[3] == "I":
            parse = ccat_parse[1:4] + "." + ccat_parse[0] + ccat_parse[5]
        else: