Esempio n. 1
0
#!/usr/bin/env python3

import yaml

from characters import strip_length

from parse import Lexicon, Endings

TEST_FILE = "tests/pratt.yaml"
LEXICON_FILE = "lexicons/pratt.yaml"
ENDINGS_FILE = "stemming.yaml"

lexicon = Lexicon(LEXICON_FILE)
endings = Endings(ENDINGS_FILE)

if __name__ == "__main__":

    with open(TEST_FILE) as f:
        for test in yaml.load(f):
            lemma = strip_length(test.pop("lemma"))
            test_length = test.pop("test_length", True)
            location = test.pop("location", None)

            for parse, form in test.items():
                stem_info = lexicon.stem_info(lemma, parse, context=location)
                if stem_info is None:
                    print("couldn't get stem info for {} {}".format(
                        lemma, parse))
                    continue
                ending_info = endings.ending_info(form, parse)
                valid_stems = (
from parse import Lexicon, Endings


IGNORE_LIST = [
    "σαβαχθάνι",
    "ἔνι",
    "χρή",
]


LEXICON_FILE = "lexicons/morphgnt.yaml"
ENDINGS_FILE = "stemming.yaml"


lexicon = Lexicon(LEXICON_FILE)
endings = Endings(ENDINGS_FILE)


if __name__ == "__main__":

    for book_num in range(1, 28):
        for row in morphgnt_rows(book_num):
            ccat_pos = row["ccat-pos"]
            ccat_parse = row["ccat-parse"]
            form = row["norm"]
            lemma = row["lemma"]

            if ccat_pos != "V-":
                continue
Esempio n. 3
0
#!/usr/bin/env python3

import yaml

from characters import strip_length

from parse import Lexicon, Endings


TEST_FILE = "tests/dik.yaml"
LEXICON_FILE = "lexicons/dik.yaml"
ENDINGS_FILE = "stemming.yaml"


lexicon = Lexicon(LEXICON_FILE)
endings = Endings(ENDINGS_FILE)


if __name__ == "__main__":

    with open(TEST_FILE) as f:
        for test in yaml.load(f):
            lemma = strip_length(test.pop("lemma"))
            test_length = test.pop("test_length", True)
            location = test.pop("location", None)

            for parse, form in test.items():
                stem_info = lexicon.stem_info(lemma, parse, context=location)
                if stem_info is None:
                    print("couldn't get stem info for {} {}".format(lemma, parse))
                    continue
Esempio n. 4
0
from characters import strip_length

from pysblgnt import morphgnt_rows

from parse import Lexicon, Endings

IGNORE_LIST = [
    "σαβαχθάνι",
    "ἔνι",
    "χρή",
]

LEXICON_FILE = "lexicons/morphgnt.yaml"
ENDINGS_FILE = "stemming.yaml"

lexicon = Lexicon(LEXICON_FILE)
endings = Endings(ENDINGS_FILE)

if __name__ == "__main__":

    for book_num in range(1, 28):
        for row in morphgnt_rows(book_num):
            ccat_pos = row["ccat-pos"]
            ccat_parse = row["ccat-parse"]
            form = row["norm"]
            lemma = row["lemma"]

            if ccat_pos != "V-":
                continue

            if lemma in IGNORE_LIST: