#!/usr/bin/env python3 import yaml from characters import strip_length from parse import Lexicon, Endings TEST_FILE = "tests/pratt.yaml" LEXICON_FILE = "lexicons/pratt.yaml" ENDINGS_FILE = "stemming.yaml" lexicon = Lexicon(LEXICON_FILE) endings = Endings(ENDINGS_FILE) if __name__ == "__main__": with open(TEST_FILE) as f: for test in yaml.load(f): lemma = strip_length(test.pop("lemma")) test_length = test.pop("test_length", True) location = test.pop("location", None) for parse, form in test.items(): stem_info = lexicon.stem_info(lemma, parse, context=location) if stem_info is None: print("couldn't get stem info for {} {}".format( lemma, parse)) continue ending_info = endings.ending_info(form, parse) valid_stems = (
from parse import Lexicon, Endings IGNORE_LIST = [ "σαβαχθάνι", "ἔνι", "χρή", ] LEXICON_FILE = "lexicons/morphgnt.yaml" ENDINGS_FILE = "stemming.yaml" lexicon = Lexicon(LEXICON_FILE) endings = Endings(ENDINGS_FILE) if __name__ == "__main__": for book_num in range(1, 28): for row in morphgnt_rows(book_num): ccat_pos = row["ccat-pos"] ccat_parse = row["ccat-parse"] form = row["norm"] lemma = row["lemma"] if ccat_pos != "V-": continue
#!/usr/bin/env python3 import yaml from characters import strip_length from parse import Lexicon, Endings TEST_FILE = "tests/dik.yaml" LEXICON_FILE = "lexicons/dik.yaml" ENDINGS_FILE = "stemming.yaml" lexicon = Lexicon(LEXICON_FILE) endings = Endings(ENDINGS_FILE) if __name__ == "__main__": with open(TEST_FILE) as f: for test in yaml.load(f): lemma = strip_length(test.pop("lemma")) test_length = test.pop("test_length", True) location = test.pop("location", None) for parse, form in test.items(): stem_info = lexicon.stem_info(lemma, parse, context=location) if stem_info is None: print("couldn't get stem info for {} {}".format(lemma, parse)) continue
from characters import strip_length from pysblgnt import morphgnt_rows from parse import Lexicon, Endings IGNORE_LIST = [ "σαβαχθάνι", "ἔνι", "χρή", ] LEXICON_FILE = "lexicons/morphgnt.yaml" ENDINGS_FILE = "stemming.yaml" lexicon = Lexicon(LEXICON_FILE) endings = Endings(ENDINGS_FILE) if __name__ == "__main__": for book_num in range(1, 28): for row in morphgnt_rows(book_num): ccat_pos = row["ccat-pos"] ccat_parse = row["ccat-parse"] form = row["norm"] lemma = row["lemma"] if ccat_pos != "V-": continue if lemma in IGNORE_LIST: