예제 #1
0
def test_invalid_invocation(labels, shared_datadir):
    """
    Try using an incorrect parameters for labels
    """

    with pytest.raises(ValueError):
        FST.from_file(shared_datadir / "eat.fomabin", labels=labels)
예제 #2
0
def test_cree_hfstol_generation_in_bulk(cree_hfstol_generator: FST,
                                        surface_forms, analyses):
    """
    Test that cree fomabin generation in bulk
    """
    assert set((map(tuple, cree_hfstol_generator.generate_in_bulk(analyses))
                )) == set(surface_forms)
예제 #3
0
def test_analyze_concatenation(english_ipa_fst: FST):
    """
    Test that concatenation of the analysis works as expected when all
    elements include epsilons.
    """
    result, = english_ipa_fst.analyze('rough')
    assert result == ('ɹʌf', )
예제 #4
0
def test_cree_hfstol_analysis(cree_hfstol_analyzer: FST, surface_form,
                              analyses):
    """
    Test that cree hfstol work
    """
    assert set(cree_hfstol_analyzer.analyze(surface_form)) == {
        ''.join(x)
        for x in analyses
    }
예제 #5
0
def test_cree_hfstol_analysis_in_bulk(cree_hfstol_analyzer: FST, surface_forms,
                                      analyses):
    """
    Test that cree hfstol analyses in bulk
    """
    assert set(
        (map(lambda x: tuple(sorted(tuple(x))),
             cree_hfstol_analyzer.analyze_in_bulk(surface_forms)))) == set(
                 (map(lambda x: tuple(sorted(x)), analyses)))
예제 #6
0
def test_cree_foma_generation_in_bulk(cree_foma_generator: FST, surface_forms,
                                      analyses):
    """
    Test that cree fomabin generation in bulk
    """
    assert {
        tuple(result)
        for result in cree_foma_generator.generate_in_bulk(analyses)
    } == set(surface_forms)
예제 #7
0
def test_load_from_file(shared_datadir):
    """
    Integration test for loading the FST from a file.
    """
    fst = FST.from_file(shared_datadir / "eat.fomabin")
    # Do a transduction that outputs multiple results.
    assert set(fst.analyze("eats")) == {
        ("eat", "+N", "+Mass"),
        ("eat", "+V", "+3P", "+Sg"),
    }
    # Transduce the other way!
    assert set(fst.generate("eat" "+V" "+Past")) == {"ate"}
예제 #8
0
def test_load_from_file(shared_datadir):
    """
    Integration test for loading the FST from a file.
    """
    fst = FST.from_file(shared_datadir / 'eat.fomabin')
    # Do a transduction that outputs multiple results.
    assert set(fst.analyze('eats')) == {
            ('eat', '+N', '+Mass'),
            ('eat', '+V', '+3P', '+Sg'),
    }
    # Transduce the other way!
    assert set(fst.generate('eat' '+V' '+Past')) == {'ate'}
예제 #9
0
def make_fst(*custom_arcs: str, a_and_b='positive') -> FST:
    """
    To make a complete FST, add one or more arcs that go from state 1 to state 2.
    There are existing arcs to state 1 that set x <- a, set x <- b, and do not define x.

    a_and_b can be either 'positive' for @P.x.V@ flags or 'unify' for @U.x.V@
    flags.
    """

    a_and_b_arcs = UNIFY_ARCS if a_and_b == 'unify' else POSITIVE_SET_ARCS

    arcs = (ACCEPT_C, ACCEPTING_STATE, *a_and_b_arcs, *custom_arcs)
    source = HEADER + '\n'.join(arcs) + FOOTER
    return FST.from_text(source)
예제 #10
0
def test_concatenate_lemma(shared_datadir):
    """
    Test https://github.com/eddieantonio/fst-lookup/issues/4

    Skips if the file is not found.
    """
    fst_file = shared_datadir / "crk-descriptive-analyzer.fomabin"
    if not fst_file.exists():
        pytest.skip("cannot find " + str(fst_file))

    fst = FST.from_file(fst_file)

    actual = list(fst.analyze("pimitâskosin"))
    assert [("pimitâskosin", "+V", "+AI", "+Ind", "+Prs", "+3Sg")] == actual
def test_concatenate_lemma(shared_datadir):
    """
    Test https://github.com/eddieantonio/fst-lookup/issues/4

    Skips if the file is not found.
    """
    fst_file = shared_datadir / 'crk-descriptive-analyzer.fomabin'
    if not fst_file.exists():
        pytest.skip('cannot find ' + str(fst_file))

    fst = FST.from_file(fst_file)

    actual = list(fst.analyze('pimitâskosin'))
    assert [('pimitâskosin', '+V', '+AI', '+Ind', '+Prs', '+3Sg')] == actual
예제 #12
0
def test_load_from_file_flipped(shared_datadir):
    """
    Integration test loading an FST from a file where its
    UPPER side is the surface form, and its
    LOWER side is the deep form
    (following HFST conventions rather and XFST conventions).
    """
    fst = FST.from_file(shared_datadir / 'tae.fomabin', labels='invert')

    # The following tests are INTENTIONALLY the same as for
    # test_load_from_file(). However, the FST is different than in that test.

    # Do a transduction that outputs multiple results.
    assert set(fst.analyze('eats')) == {
            ('eat', '+N', '+Mass'),
            ('eat', '+V', '+3P', '+Sg'),
    }
    # Transduce the other way!
    assert set(fst.generate('eat' '+V' '+Past')) == {'ate'}
예제 #13
0
def test_load_from_file_flipped(shared_datadir):
    """
    Integration test loading an FST from a file where its
    UPPER side is the surface form, and its
    LOWER side is the deep form
    (following HFST conventions rather and XFST conventions).
    """
    fst = FST.from_file(shared_datadir / "tae.fomabin", labels="invert")

    # The following tests are INTENTIONALLY the same as for
    # test_load_from_file(). However, the FST is different than in that test.

    # Do a transduction that outputs multiple results.
    assert set(fst.analyze("eats")) == {
        ("eat", "+N", "+Mass"),
        ("eat", "+V", "+3P", "+Sg"),
    }
    # Transduce the other way!
    assert set(fst.generate("eat" "+V" "+Past")) == {"ate"}
예제 #14
0
def test_flag_fst(surface_form: str, analysis, english_flags_fst: FST):
    """
    Analyze and generate on an FST **WITH** simple flag diacritics.
    """
    assert set(english_flags_fst.analyze(surface_form)) == {analysis}
    assert set(english_flags_fst.generate(''.join(analysis))) == {surface_form}
예제 #15
0
def test_generate_eat_fst(analysis: str, surface_form: set, eat_fst: FST):
    """
    Apply down (generate) on an FST **WITHOUT** flag diacritics.
    """
    actual, = eat_fst.generate(analysis)
    assert actual == surface_form
예제 #16
0
def test_cree_hfstol_generation(cree_hfstol_generator: FST, word, analysis):
    """
    Test that cree hfstol work
    """
    assert set(cree_hfstol_generator.generate(''.join(analysis))) == set(word)
예제 #17
0
def cree_hfstol_generator(shared_datadir: Path) -> FST:
    """
    Return the FST that deals with the cree language
    """
    return FST.from_file(shared_datadir / 'crk-normative-generator.hfstol', labels='hfstol')
예제 #18
0
def test_generate_form_outside_of_alphabet(eat_fst: FST):
    """
    Generating forms with characters outside of the upper alphabet should
    reject instantly.
    """
    assert set(eat_fst.generate('wug' '+N' '+Pl')) == set()
예제 #19
0
def cree_foma_analyzer(shared_datadir: Path) -> FST:
    """
    Return the FST that deals with the cree language
    """
    return FST.from_file(shared_datadir / 'crk-descriptive-analyzer.fomabin')
예제 #20
0
def test_cree_foma_generation(cree_foma_generator: FST, word, analysis):
    """
    Test that cree fomabin work
    """
    assert set(cree_foma_generator.generate(''.join(analysis))) == set(word)
예제 #21
0
def test_cree_foma_analysis(cree_foma_analyzer: FST, surface_form, analyses):
    """
    Test that cree fomabin work
    """
    assert set(cree_foma_analyzer.analyze(surface_form)) == set(analyses)
예제 #22
0
def english_ipa_fst(shared_datadir: Path) -> FST:
    """
    Return the FST that transcribes -ough words to IPA, including
    through/though/enough/plough/trough/tough/rough/cough/dough.
    """
    return FST.from_file(shared_datadir / 'english-ipa.fomabin')
예제 #23
0
def english_flags_fst(english_flags_fst_txt: str) -> FST:
    """
    Return the FST that uses flag diacritics and tranduces
    pay/payable/unpayable/do/undo/doable/undoable.
    """
    return FST.from_text(english_flags_fst_txt)
예제 #24
0
def eat_fst(eat_fst_txt: str) -> FST:
    """
    Return the FST that analyzes eat/eats/eaten/eating/ate.
    """
    return FST.from_text(eat_fst_txt)
예제 #25
0
def test_unacceptable_forms_in_flag_fst(unacceptable_form: str,
                                        english_flags_fst: FST):
    """
    Analyze forms that should not transduce on an FST **WITH** simple flag diacritics.
    """
    assert set(english_flags_fst.analyze(unacceptable_form)) == set()
예제 #26
0
def test_analyze_eat_fst(surface_form: str, analyses: set, eat_fst: FST):
    """
    Apply up (analyze) on an FST **WITHOUT** flag diacritics.
    """
    assert set(eat_fst.analyze(surface_form)) == analyses
예제 #27
0
def test_analyze_form_outside_of_alphabet(eat_fst: FST):
    """
    Analyzing forms with characters outside of the lower alphabet should
    reject instantly.
    """
    assert set(eat_fst.analyze('mîcisow')) == set()
예제 #28
0
def cree_hfstol_analyzer(shared_datadir: Path) -> FST:
    """
    Return the FST that deals with the cree language
    """
    return FST.from_file(shared_datadir / 'crk-descriptive-analyzer.hfstol', labels='hfstol')
예제 #29
0
from fst_lookup import FST
import re



Nounfst = FST.from_file('./module/Nouns2.fst')
Verbfst = FST.from_file('./module/verbs.fst')
Guesserfst = FST.from_file('./module/NounGuesser.fst')
Adjfst = FST.from_file('./module/Adjectives.fst')
Partfst = FST.from_file('./module/particles.fst')

TAG_RE = re.compile(r'<[^>]+>')

def remove_tags(text):
    return TAG_RE.sub('', text)

def preprocess_text(sen):
    # Removing html tags
    sentence = remove_tags(sen)
    sentence=sentence.replace("\u200b","")
    sentence=sentence.replace("\u200d","")
    sentence=sentence.replace("."," ")
    sentence=sentence.replace(","," ")
    sentence=sentence.replace("?"," ")
    sentence=sentence.replace("\'","")
    sentence=sentence.replace("\"","")
    sentence=sentence.replace("‘","")
    sentence=sentence.replace("’","")


    # Removing multiple spaces
예제 #30
0
def cree_foma_generator(shared_datadir: Path) -> FST:
    """
    Return the FST that deals with the cree language
    """
    return FST.from_file(shared_datadir / 'crk-normative-generator.fomabin', labels='invert')