def test_generate(stemming_file, lexicon_file, test_file, global_tags=None, debug=False): """ generates all the forms in the test_file using the lexicon_file and stemming_file and outputs any discrepancies (or everything if debug on) """ ginflexion = GreekInflexion(stemming_file, lexicon_file) with open(test_file) as f: for test in yaml.safe_load(f): source = test.pop("source", None) test.pop("test_length", False) lemma = test.pop("lemma") tags = set(test.pop("tags", [])) if source: tags.update({source}) if global_tags: tags.update(global_tags) segmented_lemma = ginflexion.segmented_lemmas.get(lemma) for key, form in sorted(test.items()): stem = ginflexion.find_stems(lemma, key, tags) generated = ginflexion.generate(lemma, key, tags) if stem: stem_guess = None else: stem_guess = [ stem for key, stem in ginflexion.possible_stems( form, "^" + key + "$") ] if [strip_length(w) for w in sorted(generated)] == \ [strip_length(w) for w in sorted(form.split("/"))]: correct = "✓" else: correct = "✕" if debug or correct == "✕": output_item(lemma, segmented_lemma, key, None, form, None, stem, stem_guess, None, None, generated, correct)
def test_generate( stemming_file, lexicon_file, test_file, global_tags=None, debug=False ): """ generates all the forms in the test_file using the lexicon_file and stemming_file and outputs any discrepancies (or everything if debug on) """ ginflexion = GreekInflexion(stemming_file, lexicon_file) with open(test_file) as f: for test in yaml.load(f): source = test.pop("source", None) test.pop("test_length", False) lemma = test.pop("lemma") tags = set(test.pop("tags", [])) if source: tags.update({source}) if global_tags: tags.update(global_tags) for key, form in sorted(test.items()): stem = ginflexion.find_stems(lemma, key, tags) generated = ginflexion.generate(lemma, key, tags) if stem: stem_guess = None else: stem_guess = [ stem for key, stem in ginflexion.possible_stems(form, "^" + key + "$")] if [strip_length(w) for w in sorted(generated)] == \ [strip_length(w) for w in sorted(form.split("/"))]: correct = "✓" else: correct = "✕" if debug or correct == "✕": output_item( lemma, key, None, form, None, stem, stem_guess, None, None, generated, correct)
def test_generate( stemming_file, lexicon_file, test_file, global_tags=None, debug=False ): """ generates all the forms in the test_file using the lexicon_file and stemming_file and outputs any discrepancies (or everything if debug on) """ ginflexion = GreekInflexion(stemming_file, lexicon_file) with open(test_file) as f: for test in yaml.load(f): source = test.pop("source", None) test.pop("test_length", False) lemma = test.pop("lemma") tags = set(test.pop("tags", [])) if source: tags.update({source}) if global_tags: tags.update(global_tags) for key, form in sorted(test.items()): c = form.count("/") + 1 stem = ginflexion.find_stems(lemma, key, tags) generated = ginflexion.generate(lemma, key, tags) if [strip_length(w) for w in sorted(generated)] == \ [strip_length(w) for w in sorted(form.split("/"))]: correct = "✓" else: correct = "✕" if debug or correct == "✕": print() print(lemma, key, form) print("stem: {}".format(stem)) print("generate[{}/{}{}]:".format( len(generated), c, correct)) for generated_form, details in generated.items(): print(" - {}".format(generated_form)) for detail in details: print(" {}".format(detail))
argparser.add_argument("--lexicon", dest="lexicon", default="STEM_DATA/morphgnt_lexicon.yaml", help="path to stem lexicon file " "(defaults to morphgnt_lexicon.yaml)") argparser.add_argument("--stemming", dest="stemming", default="stemming.yaml", help="path to stemming rules file " "(defaults to stemming.yaml)") args = argparser.parse_args() ginflexion = GreekInflexion(args.stemming, args.lexicon) debug = False incorrect_count = 0 total_count = 0 IGNORE_LIST = [ "κουμ", "εφφαθα", "σαβαχθάνι", "θά", ] for book_num in args.books: for row in morphgnt_rows(book_num):
argparser.add_argument("--lexicon", dest="lexicon", default="STEM_DATA/morphgnt_lexicon.yaml", help="path to initial stem lexicon file " "(defaults to morphgnt_lexicon.yaml)") argparser.add_argument("--stemming", dest="stemming", default="stemming.yaml", help="path to stemming rules file " "(defaults to stemming.yaml)") args = argparser.parse_args() ginflexion = GreekInflexion(args.stemming, args.lexicon, strip_length=True) STEM_GUESSES = defaultdict(lambda: defaultdict(set)) for book_num in args.books: for row in morphgnt_rows(book_num): b, c, v = bcv_tuple(row["bcv"]) if row["ccat-pos"] == "V-": lemma = row["lemma"] key = convert_parse(row["ccat-parse"]) form = row["norm"] tags = set([ "final-nu-aai.3s", "oida-yai3p-variant", "no-final-nu-yai.3s",
#!/usr/bin/env python3 from collections import defaultdict from accent import strip_length from greek_inflexion import GreekInflexion from morphgnt_utils import key_to_part from normalise import convert as norm_convert from lxxmorph_utils import get_words, convert_parse, trim_multiples ginflexion = GreekInflexion("stemming.yaml", "STEM_DATA/lxx_lexicon.yaml", strip_length=True) LXX_FILENAME = "lxxmorph/12.1Sam.mlxx" STEM_GUESSES = defaultdict(lambda: defaultdict(set)) for row in get_words(LXX_FILENAME): form = row["word"] preverb = row["preverb"] lemma = row["lemma"] key = convert_parse(row["parse"]) if preverb: lemma = "+".join(preverb.split()) + "++" + lemma form = norm_convert(form, lemma, key) tags = set([ "final-nu-aai.3s", "alt-apo-pl",
#!/usr/bin/env python3 from collections import defaultdict from accent import strip_length from greek_inflexion import GreekInflexion from homer_utils import key_to_part, trim_multiples ginflexion = GreekInflexion("stemming.yaml", "STEM_DATA/homer_lexicon.yaml") STEM_GUESSES = defaultdict(lambda: defaultdict(set)) with open("homer-data/verbs.tsv") as f: for row in f: lemma, key, form = row.strip().split("\t") tags = set([ "fixed-final-nu-aai.3s", "no-final-nu-aai.3s", "no-final-nu-aao.3s", "no-final-nu-fai.3p", "no-final-nu-pai.3p", "no-final-nu-iai.3s", "no-final-nu-xai.3s", "no-final-nu-xai.3p", "no-final-nu-yai.3s", "no-final-nu-aps.3p", "no-final-nu-pai.3s", "no-final-nu-aas.3p", "no-final-nu-xas.3p", "no-sigma-loss-imi.2s",
from accent import strip_length from greek_inflexion import GreekInflexion from test_generate import output_item from homer_utils import key_to_part debug = False incorrect_count = 0 total_count = 0 summary_by_lemma = defaultdict(set) ginflexion = GreekInflexion( "stemming.yaml", "STEM_DATA/homer_lexicon.yaml" ) first = True FILENAME = "homer-data/paradigms.tsv" with open(FILENAME) as f: for row in f: total_count += 1 lemma, key, form = row.strip().split() tags = set([ "fixed-final-nu-aai.3s",
def setUp(self): self.inflexion = GreekInflexion("stemming.yaml", "STEM_DATA/pratt_lexicon.yaml")
class InflexionTest(unittest.TestCase): def setUp(self): self.inflexion = GreekInflexion("stemming.yaml", "STEM_DATA/pratt_lexicon.yaml") def test_generate(self): self.inflexion.generate('λύω', 'AAI.1S') # @@@ def test_find_stems(self): self.assertEqual(self.inflexion.find_stems('λύω', 'AAI.1S'), {'ἐλυσ'}) def test_parse1(self): self.assertEqual(self.inflexion.parse('ἔλυσα'), {('λύω', 'AAI.1S')}) def test_parse2(self): self.assertEqual(self.inflexion.parse('ποιοῦμαι'), set()) def test_possible_stems1(self): self.assertEqual(sorted(self.inflexion.possible_stems('ποιοῦμαι')), [ ('AAN', 'ποιουμ'), ('AAO.3S', 'ποιουμ'), ('AMD.2S', 'ποιουμ'), ('FMI.1S', 'ποι{contract}'), ('PMI.1S', 'ποιε'), ('PMI.1S', 'ποιο'), ('XMI.1S', 'ποιου'), ('ZMI.1S', 'ποι{contract}'), ]) def test_possible_stems2(self): self.assertEqual( sorted(self.inflexion.possible_stems('ποιοῦμαι', '.+1S$')), [ ('FMI.1S', 'ποι{contract}'), ('PMI.1S', 'ποιε'), ('PMI.1S', 'ποιο'), ('XMI.1S', 'ποιου'), ('ZMI.1S', 'ποι{contract}'), ]) def test_conjugate(self): self.inflexion.conjugate("λύω", "PAI", "AAI", tags={"final-nu-aai.3s"}) # @@@ def test_decline(self): self.inflexion.decline("λύω", "PAP")
def setUp(self): self.inflexion = GreekInflexion("stemming.yaml", "test_data/pratt_lexicon.yaml")
#!/usr/bin/env python3 import sys from greek_inflexion import GreekInflexion mi = GreekInflexion("stemming.yaml", "STEM_DATA/morphgnt_lexicon.yaml") incorrect_count = 0 def test(ref, inflexion, lemma, key, expected): global incorrect_count result = set(inflexion.generate(lemma, key)) if result != expected: print(f"failed {ref} {lemma} {key} {expected} (got {result}))") incorrect_count += 1 test("#3", mi, "ἀνίστημι", "AMD.2S", {"ἀνάστησαι"}) test("#3", mi, "ἀνίστημι", "AMD.3S", {"ἀναστησάσθω"}) test("#3", mi, "ἀνίστημι", "AMD.2P", {"ἀναστήσασθε"}) test("#3", mi, "ἀνίστημι", "AMD.3P", {"ἀναστησάσθων"}) test("#29", mi, "δίδωμι", "PAP.DPF", {"διδούσαις"}) test("#30", mi, "τίθημι", "AAS.3P", {"θῶσι(ν)", "θήσωσι(ν)"}) test("#30", mi, "τίθημι", "AMP.APF", {"θεμένᾱς", "θησαμένᾱς"}) test("#30", mi, "τίθημι", "AMP.APM", {"θεμένους", "θησαμένους"}) test("#30", mi, "τίθημι", "AMP.APN", {"θέμενα", "θησάμενα"})
from greek_inflexion import GreekInflexion import paradigm_tools as pu inflexion = GreekInflexion('stemming.yaml', 'STEM_DATA/pratt_lexicon.yaml') labels = pu.load_labels("labels.yaml", "el") pu.decline_html("λύω", "PAP") pu.decline_md("λύω", "PAP") pu.conjugate_html("λύω", "PAD", "AAD", tags={"final-nu-aai.3s"}, merge_paradigms=False) pu.conjugate_html("λύω", "PAI", "AAI", tags={"final-nu-aai.3s"}) pu.conjugate_html("λύω", "PAI", "PMI", "FAI", "FMI", tags={"final-nu-aai.3s"}) pu.conjugate_html("λύω", "PAI", "PMI", tags={"final-nu-aai.3s"}, merge_paradigms=False) pu.layout_merged_verb_paradigm_html([["1", "2", "3", "4", "5", "6"]], ["Random"], labels) pu.layout_merged_verb_paradigm_md([["1", "2", "3", "4", "5", "6"]], ["Random"], labels) pu.layout_non_merged_verb_paradigm_md(["1", "2", "3", "4", "5", "6"], "Random", labels) pu.conjugate_md("λύω", "PAI", "PMI", tags={"final-nu-aai.3s"}, merge_paradigms=True) pu.conjugate_md("λύω", "PAI", "PMI", tags={"final-nu-aai.3s"}, merge_paradigms=False)
def setUp(self): self.inflexion = GreekInflexion( "stemming.yaml", "test_data/pratt_lexicon.yaml" )
class InflexionTest(unittest.TestCase): def setUp(self): self.inflexion = GreekInflexion( "stemming.yaml", "test_data/pratt_lexicon.yaml" ) def test_generate(self): self.inflexion.generate('λύω', 'AAI.1S') # @@@ def test_find_stems(self): self.assertEqual( self.inflexion.find_stems('λύω', 'AAI.1S'), {'ἐλυσ'} ) def test_parse1(self): self.assertEqual( self.inflexion.parse('ἔλυσα'), {('λύω', 'AAI.1S')} ) def test_parse2(self): self.assertEqual( self.inflexion.parse('ποιοῦμαι'), set() ) def test_possible_stems1(self): self.assertEqual( sorted(self.inflexion.possible_stems('ποιοῦμαι')), [ ('AAN', 'ποιουμ'), ('AAO.3S', 'ποιουμ'), ('AMD.2S', 'ποιουμ'), ('FMI.1S', 'ποι{contract}'), ('PMI.1S', 'ποιε'), ('PMI.1S', 'ποιο'), ('XMI.1S', 'ποιου') ] ) def test_possible_stems2(self): self.assertEqual( sorted(self.inflexion.possible_stems('ποιοῦμαι', '.+1S$')), [ ('FMI.1S', 'ποι{contract}'), ('PMI.1S', 'ποιε'), ('PMI.1S', 'ποιο'), ('XMI.1S', 'ποιου') ] ) def test_conjugate(self): self.inflexion.conjugate( "λύω", "PAI", "AAI", tags={"final-nu-aai.3s"} ) # @@@ def test_decline(self): self.inflexion.decline( "λύω", "PAP" )