def test_invalid_invocation(labels, shared_datadir): """ Try using an incorrect parameters for labels """ with pytest.raises(ValueError): FST.from_file(shared_datadir / "eat.fomabin", labels=labels)
def test_cree_hfstol_generation_in_bulk(cree_hfstol_generator: FST, surface_forms, analyses): """ Test that cree fomabin generation in bulk """ assert set((map(tuple, cree_hfstol_generator.generate_in_bulk(analyses)) )) == set(surface_forms)
def test_analyze_concatenation(english_ipa_fst: FST): """ Test that concatenation of the analysis works as expected when all elements include epsilons. """ result, = english_ipa_fst.analyze('rough') assert result == ('ɹʌf', )
def test_cree_hfstol_analysis(cree_hfstol_analyzer: FST, surface_form, analyses): """ Test that cree hfstol work """ assert set(cree_hfstol_analyzer.analyze(surface_form)) == { ''.join(x) for x in analyses }
def test_cree_hfstol_analysis_in_bulk(cree_hfstol_analyzer: FST, surface_forms, analyses): """ Test that cree hfstol analyses in bulk """ assert set( (map(lambda x: tuple(sorted(tuple(x))), cree_hfstol_analyzer.analyze_in_bulk(surface_forms)))) == set( (map(lambda x: tuple(sorted(x)), analyses)))
def test_cree_foma_generation_in_bulk(cree_foma_generator: FST, surface_forms, analyses): """ Test that cree fomabin generation in bulk """ assert { tuple(result) for result in cree_foma_generator.generate_in_bulk(analyses) } == set(surface_forms)
def test_load_from_file(shared_datadir): """ Integration test for loading the FST from a file. """ fst = FST.from_file(shared_datadir / "eat.fomabin") # Do a transduction that outputs multiple results. assert set(fst.analyze("eats")) == { ("eat", "+N", "+Mass"), ("eat", "+V", "+3P", "+Sg"), } # Transduce the other way! assert set(fst.generate("eat" "+V" "+Past")) == {"ate"}
def test_load_from_file(shared_datadir): """ Integration test for loading the FST from a file. """ fst = FST.from_file(shared_datadir / 'eat.fomabin') # Do a transduction that outputs multiple results. assert set(fst.analyze('eats')) == { ('eat', '+N', '+Mass'), ('eat', '+V', '+3P', '+Sg'), } # Transduce the other way! assert set(fst.generate('eat' '+V' '+Past')) == {'ate'}
def make_fst(*custom_arcs: str, a_and_b='positive') -> FST: """ To make a complete FST, add one or more arcs that go from state 1 to state 2. There are existing arcs to state 1 that set x <- a, set x <- b, and do not define x. a_and_b can be either 'positive' for @P.x.V@ flags or 'unify' for @U.x.V@ flags. """ a_and_b_arcs = UNIFY_ARCS if a_and_b == 'unify' else POSITIVE_SET_ARCS arcs = (ACCEPT_C, ACCEPTING_STATE, *a_and_b_arcs, *custom_arcs) source = HEADER + '\n'.join(arcs) + FOOTER return FST.from_text(source)
def test_concatenate_lemma(shared_datadir): """ Test https://github.com/eddieantonio/fst-lookup/issues/4 Skips if the file is not found. """ fst_file = shared_datadir / "crk-descriptive-analyzer.fomabin" if not fst_file.exists(): pytest.skip("cannot find " + str(fst_file)) fst = FST.from_file(fst_file) actual = list(fst.analyze("pimitâskosin")) assert [("pimitâskosin", "+V", "+AI", "+Ind", "+Prs", "+3Sg")] == actual
def test_concatenate_lemma(shared_datadir): """ Test https://github.com/eddieantonio/fst-lookup/issues/4 Skips if the file is not found. """ fst_file = shared_datadir / 'crk-descriptive-analyzer.fomabin' if not fst_file.exists(): pytest.skip('cannot find ' + str(fst_file)) fst = FST.from_file(fst_file) actual = list(fst.analyze('pimitâskosin')) assert [('pimitâskosin', '+V', '+AI', '+Ind', '+Prs', '+3Sg')] == actual
def test_load_from_file_flipped(shared_datadir): """ Integration test loading an FST from a file where its UPPER side is the surface form, and its LOWER side is the deep form (following HFST conventions rather and XFST conventions). """ fst = FST.from_file(shared_datadir / 'tae.fomabin', labels='invert') # The following tests are INTENTIONALLY the same as for # test_load_from_file(). However, the FST is different than in that test. # Do a transduction that outputs multiple results. assert set(fst.analyze('eats')) == { ('eat', '+N', '+Mass'), ('eat', '+V', '+3P', '+Sg'), } # Transduce the other way! assert set(fst.generate('eat' '+V' '+Past')) == {'ate'}
def test_load_from_file_flipped(shared_datadir): """ Integration test loading an FST from a file where its UPPER side is the surface form, and its LOWER side is the deep form (following HFST conventions rather and XFST conventions). """ fst = FST.from_file(shared_datadir / "tae.fomabin", labels="invert") # The following tests are INTENTIONALLY the same as for # test_load_from_file(). However, the FST is different than in that test. # Do a transduction that outputs multiple results. assert set(fst.analyze("eats")) == { ("eat", "+N", "+Mass"), ("eat", "+V", "+3P", "+Sg"), } # Transduce the other way! assert set(fst.generate("eat" "+V" "+Past")) == {"ate"}
def test_flag_fst(surface_form: str, analysis, english_flags_fst: FST): """ Analyze and generate on an FST **WITH** simple flag diacritics. """ assert set(english_flags_fst.analyze(surface_form)) == {analysis} assert set(english_flags_fst.generate(''.join(analysis))) == {surface_form}
def test_generate_eat_fst(analysis: str, surface_form: set, eat_fst: FST): """ Apply down (generate) on an FST **WITHOUT** flag diacritics. """ actual, = eat_fst.generate(analysis) assert actual == surface_form
def test_cree_hfstol_generation(cree_hfstol_generator: FST, word, analysis): """ Test that cree hfstol work """ assert set(cree_hfstol_generator.generate(''.join(analysis))) == set(word)
def cree_hfstol_generator(shared_datadir: Path) -> FST: """ Return the FST that deals with the cree language """ return FST.from_file(shared_datadir / 'crk-normative-generator.hfstol', labels='hfstol')
def test_generate_form_outside_of_alphabet(eat_fst: FST): """ Generating forms with characters outside of the upper alphabet should reject instantly. """ assert set(eat_fst.generate('wug' '+N' '+Pl')) == set()
def cree_foma_analyzer(shared_datadir: Path) -> FST: """ Return the FST that deals with the cree language """ return FST.from_file(shared_datadir / 'crk-descriptive-analyzer.fomabin')
def test_cree_foma_generation(cree_foma_generator: FST, word, analysis): """ Test that cree fomabin work """ assert set(cree_foma_generator.generate(''.join(analysis))) == set(word)
def test_cree_foma_analysis(cree_foma_analyzer: FST, surface_form, analyses): """ Test that cree fomabin work """ assert set(cree_foma_analyzer.analyze(surface_form)) == set(analyses)
def english_ipa_fst(shared_datadir: Path) -> FST: """ Return the FST that transcribes -ough words to IPA, including through/though/enough/plough/trough/tough/rough/cough/dough. """ return FST.from_file(shared_datadir / 'english-ipa.fomabin')
def english_flags_fst(english_flags_fst_txt: str) -> FST: """ Return the FST that uses flag diacritics and tranduces pay/payable/unpayable/do/undo/doable/undoable. """ return FST.from_text(english_flags_fst_txt)
def eat_fst(eat_fst_txt: str) -> FST: """ Return the FST that analyzes eat/eats/eaten/eating/ate. """ return FST.from_text(eat_fst_txt)
def test_unacceptable_forms_in_flag_fst(unacceptable_form: str, english_flags_fst: FST): """ Analyze forms that should not transduce on an FST **WITH** simple flag diacritics. """ assert set(english_flags_fst.analyze(unacceptable_form)) == set()
def test_analyze_eat_fst(surface_form: str, analyses: set, eat_fst: FST): """ Apply up (analyze) on an FST **WITHOUT** flag diacritics. """ assert set(eat_fst.analyze(surface_form)) == analyses
def test_analyze_form_outside_of_alphabet(eat_fst: FST): """ Analyzing forms with characters outside of the lower alphabet should reject instantly. """ assert set(eat_fst.analyze('mîcisow')) == set()
def cree_hfstol_analyzer(shared_datadir: Path) -> FST: """ Return the FST that deals with the cree language """ return FST.from_file(shared_datadir / 'crk-descriptive-analyzer.hfstol', labels='hfstol')
from fst_lookup import FST import re Nounfst = FST.from_file('./module/Nouns2.fst') Verbfst = FST.from_file('./module/verbs.fst') Guesserfst = FST.from_file('./module/NounGuesser.fst') Adjfst = FST.from_file('./module/Adjectives.fst') Partfst = FST.from_file('./module/particles.fst') TAG_RE = re.compile(r'<[^>]+>') def remove_tags(text): return TAG_RE.sub('', text) def preprocess_text(sen): # Removing html tags sentence = remove_tags(sen) sentence=sentence.replace("\u200b","") sentence=sentence.replace("\u200d","") sentence=sentence.replace("."," ") sentence=sentence.replace(","," ") sentence=sentence.replace("?"," ") sentence=sentence.replace("\'","") sentence=sentence.replace("\"","") sentence=sentence.replace("‘","") sentence=sentence.replace("’","") # Removing multiple spaces
def cree_foma_generator(shared_datadir: Path) -> FST: """ Return the FST that deals with the cree language """ return FST.from_file(shared_datadir / 'crk-normative-generator.fomabin', labels='invert')