def test_generate(stemming_file, lexicon_file, test_file, global_tags=None, debug=False): """ generates all the forms in the test_file using the lexicon_file and stemming_file and outputs any discrepancies (or everything if debug on) """ ginflexion = GreekInflexion(stemming_file, lexicon_file) with open(test_file) as f: for test in yaml.safe_load(f): source = test.pop("source", None) test.pop("test_length", False) lemma = test.pop("lemma") tags = set(test.pop("tags", [])) if source: tags.update({source}) if global_tags: tags.update(global_tags) segmented_lemma = ginflexion.segmented_lemmas.get(lemma) for key, form in sorted(test.items()): stem = ginflexion.find_stems(lemma, key, tags) generated = ginflexion.generate(lemma, key, tags) if stem: stem_guess = None else: stem_guess = [ stem for key, stem in ginflexion.possible_stems( form, "^" + key + "$") ] if [strip_length(w) for w in sorted(generated)] == \ [strip_length(w) for w in sorted(form.split("/"))]: correct = "✓" else: correct = "✕" if debug or correct == "✕": output_item(lemma, segmented_lemma, key, None, form, None, stem, stem_guess, None, None, generated, correct)
def test_generate( stemming_file, lexicon_file, test_file, global_tags=None, debug=False ): """ generates all the forms in the test_file using the lexicon_file and stemming_file and outputs any discrepancies (or everything if debug on) """ ginflexion = GreekInflexion(stemming_file, lexicon_file) with open(test_file) as f: for test in yaml.load(f): source = test.pop("source", None) test.pop("test_length", False) lemma = test.pop("lemma") tags = set(test.pop("tags", [])) if source: tags.update({source}) if global_tags: tags.update(global_tags) for key, form in sorted(test.items()): stem = ginflexion.find_stems(lemma, key, tags) generated = ginflexion.generate(lemma, key, tags) if stem: stem_guess = None else: stem_guess = [ stem for key, stem in ginflexion.possible_stems(form, "^" + key + "$")] if [strip_length(w) for w in sorted(generated)] == \ [strip_length(w) for w in sorted(form.split("/"))]: correct = "✓" else: correct = "✕" if debug or correct == "✕": output_item( lemma, key, None, form, None, stem, stem_guess, None, None, generated, correct)
def test_generate( stemming_file, lexicon_file, test_file, global_tags=None, debug=False ): """ generates all the forms in the test_file using the lexicon_file and stemming_file and outputs any discrepancies (or everything if debug on) """ ginflexion = GreekInflexion(stemming_file, lexicon_file) with open(test_file) as f: for test in yaml.load(f): source = test.pop("source", None) test.pop("test_length", False) lemma = test.pop("lemma") tags = set(test.pop("tags", [])) if source: tags.update({source}) if global_tags: tags.update(global_tags) for key, form in sorted(test.items()): c = form.count("/") + 1 stem = ginflexion.find_stems(lemma, key, tags) generated = ginflexion.generate(lemma, key, tags) if [strip_length(w) for w in sorted(generated)] == \ [strip_length(w) for w in sorted(form.split("/"))]: correct = "✓" else: correct = "✕" if debug or correct == "✕": print() print(lemma, key, form) print("stem: {}".format(stem)) print("generate[{}/{}{}]:".format( len(generated), c, correct)) for generated_form, details in generated.items(): print(" - {}".format(generated_form)) for detail in details: print(" {}".format(detail))
continue tags = set([ "final-nu-aai.3s", "oida-yai3p-variant", "no-final-nu-yai.3s", "late-pluperfect-singulars", "sigma-loss-pmd.2s", "HGrk", ]) c = form.count("/") + 1 stem = ginflexion.find_stems(lemma, key, tags) generated = ginflexion.generate(lemma, key, tags) if strip_length(form) in [ strip_length(w) for w in sorted(generated) ]: correct = "✓" stem_guess = None else: correct = "✕" incorrect_count += 1 possible_stems = [(key_to_part(a), b, a) for a, b in ginflexion.possible_stems(form)] likely_stems = [(key_to_part(a), b) for a, b in ginflexion.possible_stems( form, "^" + key + "$")] possible_parses = ginflexion.parse(form) if debug or correct == "✕":
"HGrk", ]) c = form.count("/") + 1 stem = ginflexion.find_stems(lemma, key, tags) generated = ginflexion.generate(lemma, key, tags) if stem: stem_guess = None else: stem_guess = [ stem for key, stem in ginflexion.possible_stems( form, "^" + key + "$") ] if [strip_length(w) for w in sorted(generated)] == \ [strip_length(w) for w in sorted(form.split("/"))]: correct = "✓" else: correct = "✕" if correct == "✕": if stem_guess: STEM_GUESSES[lemma][key_to_part(key)].add( frozenset(stem_guess)) for lemma, parts in sorted(STEM_GUESSES.items()): print() print("{}:".format(lemma)) print(" stems:".format(lemma)) for part, stem_sets in sorted(parts.items()): stem = set.intersection(*(set(s) for s in stem_sets))