def test_invalid_invocation(labels, shared_datadir): """ Try using an incorrect parameters for labels """ with pytest.raises(ValueError): FST.from_file(shared_datadir / "eat.fomabin", labels=labels)
def test_load_from_file(shared_datadir): """ Integration test for loading the FST from a file. """ fst = FST.from_file(shared_datadir / "eat.fomabin") # Do a transduction that outputs multiple results. assert set(fst.analyze("eats")) == { ("eat", "+N", "+Mass"), ("eat", "+V", "+3P", "+Sg"), } # Transduce the other way! assert set(fst.generate("eat" "+V" "+Past")) == {"ate"}
def test_load_from_file(shared_datadir): """ Integration test for loading the FST from a file. """ fst = FST.from_file(shared_datadir / 'eat.fomabin') # Do a transduction that outputs multiple results. assert set(fst.analyze('eats')) == { ('eat', '+N', '+Mass'), ('eat', '+V', '+3P', '+Sg'), } # Transduce the other way! assert set(fst.generate('eat' '+V' '+Past')) == {'ate'}
def test_concatenate_lemma(shared_datadir): """ Test https://github.com/eddieantonio/fst-lookup/issues/4 Skips if the file is not found. """ fst_file = shared_datadir / "crk-descriptive-analyzer.fomabin" if not fst_file.exists(): pytest.skip("cannot find " + str(fst_file)) fst = FST.from_file(fst_file) actual = list(fst.analyze("pimitâskosin")) assert [("pimitâskosin", "+V", "+AI", "+Ind", "+Prs", "+3Sg")] == actual
def test_concatenate_lemma(shared_datadir): """ Test https://github.com/eddieantonio/fst-lookup/issues/4 Skips if the file is not found. """ fst_file = shared_datadir / 'crk-descriptive-analyzer.fomabin' if not fst_file.exists(): pytest.skip('cannot find ' + str(fst_file)) fst = FST.from_file(fst_file) actual = list(fst.analyze('pimitâskosin')) assert [('pimitâskosin', '+V', '+AI', '+Ind', '+Prs', '+3Sg')] == actual
def test_load_from_file_flipped(shared_datadir): """ Integration test loading an FST from a file where its UPPER side is the surface form, and its LOWER side is the deep form (following HFST conventions rather and XFST conventions). """ fst = FST.from_file(shared_datadir / "tae.fomabin", labels="invert") # The following tests are INTENTIONALLY the same as for # test_load_from_file(). However, the FST is different than in that test. # Do a transduction that outputs multiple results. assert set(fst.analyze("eats")) == { ("eat", "+N", "+Mass"), ("eat", "+V", "+3P", "+Sg"), } # Transduce the other way! assert set(fst.generate("eat" "+V" "+Past")) == {"ate"}
def test_load_from_file_flipped(shared_datadir): """ Integration test loading an FST from a file where its UPPER side is the surface form, and its LOWER side is the deep form (following HFST conventions rather and XFST conventions). """ fst = FST.from_file(shared_datadir / 'tae.fomabin', labels='invert') # The following tests are INTENTIONALLY the same as for # test_load_from_file(). However, the FST is different than in that test. # Do a transduction that outputs multiple results. assert set(fst.analyze('eats')) == { ('eat', '+N', '+Mass'), ('eat', '+V', '+3P', '+Sg'), } # Transduce the other way! assert set(fst.generate('eat' '+V' '+Past')) == {'ate'}
def english_ipa_fst(shared_datadir: Path) -> FST: """ Return the FST that transcribes -ough words to IPA, including through/though/enough/plough/trough/tough/rough/cough/dough. """ return FST.from_file(shared_datadir / 'english-ipa.fomabin')
def cree_hfstol_generator(shared_datadir: Path) -> FST: """ Return the FST that deals with the cree language """ return FST.from_file(shared_datadir / 'crk-normative-generator.hfstol', labels='hfstol')
def cree_foma_generator(shared_datadir: Path) -> FST: """ Return the FST that deals with the cree language """ return FST.from_file(shared_datadir / 'crk-normative-generator.fomabin', labels='invert')
def cree_foma_analyzer(shared_datadir: Path) -> FST: """ Return the FST that deals with the cree language """ return FST.from_file(shared_datadir / 'crk-descriptive-analyzer.fomabin')
def cree_hfstol_analyzer(shared_datadir: Path) -> FST: """ Return the FST that deals with the cree language """ return FST.from_file(shared_datadir / 'crk-descriptive-analyzer.hfstol', labels='hfstol')
from fst_lookup import FST import re Nounfst = FST.from_file('./module/Nouns2.fst') Verbfst = FST.from_file('./module/verbs.fst') Guesserfst = FST.from_file('./module/NounGuesser.fst') Adjfst = FST.from_file('./module/Adjectives.fst') Partfst = FST.from_file('./module/particles.fst') TAG_RE = re.compile(r'<[^>]+>') def remove_tags(text): return TAG_RE.sub('', text) def preprocess_text(sen): # Removing html tags sentence = remove_tags(sen) sentence=sentence.replace("\u200b","") sentence=sentence.replace("\u200d","") sentence=sentence.replace("."," ") sentence=sentence.replace(","," ") sentence=sentence.replace("?"," ") sentence=sentence.replace("\'","") sentence=sentence.replace("\"","") sentence=sentence.replace("‘","") sentence=sentence.replace("’","") # Removing multiple spaces