class InflexionTest(unittest.TestCase): def setUp(self): self.inflexion = GreekInflexion("stemming.yaml", "STEM_DATA/pratt_lexicon.yaml") def test_generate(self): self.inflexion.generate('λύω', 'AAI.1S') # @@@ def test_find_stems(self): self.assertEqual(self.inflexion.find_stems('λύω', 'AAI.1S'), {'ἐλυσ'}) def test_parse1(self): self.assertEqual(self.inflexion.parse('ἔλυσα'), {('λύω', 'AAI.1S')}) def test_parse2(self): self.assertEqual(self.inflexion.parse('ποιοῦμαι'), set()) def test_possible_stems1(self): self.assertEqual(sorted(self.inflexion.possible_stems('ποιοῦμαι')), [ ('AAN', 'ποιουμ'), ('AAO.3S', 'ποιουμ'), ('AMD.2S', 'ποιουμ'), ('FMI.1S', 'ποι{contract}'), ('PMI.1S', 'ποιε'), ('PMI.1S', 'ποιο'), ('PMI.1S', 'ποιου{athematic}'), ('XMI.1S', 'ποιου'), ('ZMI.1S', 'ποι{contract}'), ]) def test_possible_stems2(self): self.assertEqual( sorted(self.inflexion.possible_stems('ποιοῦμαι', '.+1S$')), [ ('FMI.1S', 'ποι{contract}'), ('PMI.1S', 'ποιε'), ('PMI.1S', 'ποιο'), ('PMI.1S', 'ποιου{athematic}'), ('XMI.1S', 'ποιου'), ('ZMI.1S', 'ποι{contract}'), ]) def test_conjugate(self): self.inflexion.conjugate("λύω", "PAI", "AAI", tags={"final-nu-aai.3s"}) # @@@ def test_decline(self): self.inflexion.decline("λύω", "PAP")
def test_generate(stemming_file, lexicon_file, test_file, global_tags=None, debug=False): """ generates all the forms in the test_file using the lexicon_file and stemming_file and outputs any discrepancies (or everything if debug on) """ ginflexion = GreekInflexion(stemming_file, lexicon_file) with open(test_file) as f: for test in yaml.safe_load(f): source = test.pop("source", None) test.pop("test_length", False) lemma = test.pop("lemma") tags = set(test.pop("tags", [])) if source: tags.update({source}) if global_tags: tags.update(global_tags) segmented_lemma = ginflexion.segmented_lemmas.get(lemma) for key, form in sorted(test.items()): stem = ginflexion.find_stems(lemma, key, tags) generated = ginflexion.generate(lemma, key, tags) if stem: stem_guess = None else: stem_guess = [ stem for key, stem in ginflexion.possible_stems( form, "^" + key + "$") ] if [strip_length(w) for w in sorted(generated)] == \ [strip_length(w) for w in sorted(form.split("/"))]: correct = "✓" else: correct = "✕" if debug or correct == "✕": output_item(lemma, segmented_lemma, key, None, form, None, stem, stem_guess, None, None, generated, correct)
def test_generate( stemming_file, lexicon_file, test_file, global_tags=None, debug=False ): """ generates all the forms in the test_file using the lexicon_file and stemming_file and outputs any discrepancies (or everything if debug on) """ ginflexion = GreekInflexion(stemming_file, lexicon_file) with open(test_file) as f: for test in yaml.load(f): source = test.pop("source", None) test.pop("test_length", False) lemma = test.pop("lemma") tags = set(test.pop("tags", [])) if source: tags.update({source}) if global_tags: tags.update(global_tags) for key, form in sorted(test.items()): stem = ginflexion.find_stems(lemma, key, tags) generated = ginflexion.generate(lemma, key, tags) if stem: stem_guess = None else: stem_guess = [ stem for key, stem in ginflexion.possible_stems(form, "^" + key + "$")] if [strip_length(w) for w in sorted(generated)] == \ [strip_length(w) for w in sorted(form.split("/"))]: correct = "✓" else: correct = "✕" if debug or correct == "✕": output_item( lemma, key, None, form, None, stem, stem_guess, None, None, generated, correct)
"late-pluperfect-singulars", "sigma-loss-pmd.2s", "HGrk", ]) c = form.count("/") + 1 stem = ginflexion.find_stems(lemma, key, tags) generated = ginflexion.generate(lemma, key, tags) if strip_length(form) in [ strip_length(w) for w in sorted(generated) ]: correct = "✓" stem_guess = None else: correct = "✕" incorrect_count += 1 possible_stems = [(key_to_part(a), b, a) for a, b in ginflexion.possible_stems(form)] likely_stems = [(key_to_part(a), b) for a, b in ginflexion.possible_stems( form, "^" + key + "$")] possible_parses = ginflexion.parse(form) if debug or correct == "✕": output_item(lemma, key, key_to_part(key), form, None, stem, possible_stems, likely_stems, possible_parses, generated, correct) print("{}/{} incorrect".format(incorrect_count, total_count))
"oida-yai3p-variant", "no-final-nu-yai.3s", "late-pluperfect-singulars", "sigma-loss-pmd.2s", "HGrk", ]) c = form.count("/") + 1 stem = ginflexion.find_stems(lemma, key, tags) generated = ginflexion.generate(lemma, key, tags) if stem: stem_guess = None else: stem_guess = [ stem for key, stem in ginflexion.possible_stems( form, "^" + key + "$") ] if [strip_length(w) for w in sorted(generated)] == \ [strip_length(w) for w in sorted(form.split("/"))]: correct = "✓" else: correct = "✕" if correct == "✕": if stem_guess: STEM_GUESSES[lemma][key_to_part(key)].add( frozenset(stem_guess)) for lemma, parts in sorted(STEM_GUESSES.items()): print() print("{}:".format(lemma))
class InflexionTest(unittest.TestCase): def setUp(self): self.inflexion = GreekInflexion( "stemming.yaml", "test_data/pratt_lexicon.yaml" ) def test_generate(self): self.inflexion.generate('λύω', 'AAI.1S') # @@@ def test_find_stems(self): self.assertEqual( self.inflexion.find_stems('λύω', 'AAI.1S'), {'ἐλυσ'} ) def test_parse1(self): self.assertEqual( self.inflexion.parse('ἔλυσα'), {('λύω', 'AAI.1S')} ) def test_parse2(self): self.assertEqual( self.inflexion.parse('ποιοῦμαι'), set() ) def test_possible_stems1(self): self.assertEqual( sorted(self.inflexion.possible_stems('ποιοῦμαι')), [ ('AAN', 'ποιουμ'), ('AAO.3S', 'ποιουμ'), ('AMD.2S', 'ποιουμ'), ('FMI.1S', 'ποι{contract}'), ('PMI.1S', 'ποιε'), ('PMI.1S', 'ποιο'), ('XMI.1S', 'ποιου') ] ) def test_possible_stems2(self): self.assertEqual( sorted(self.inflexion.possible_stems('ποιοῦμαι', '.+1S$')), [ ('FMI.1S', 'ποι{contract}'), ('PMI.1S', 'ποιε'), ('PMI.1S', 'ποιο'), ('XMI.1S', 'ποιου') ] ) def test_conjugate(self): self.inflexion.conjugate( "λύω", "PAI", "AAI", tags={"final-nu-aai.3s"} ) # @@@ def test_decline(self): self.inflexion.decline( "λύω", "PAP" )