コード例 #1
0
def test_invalid_invocation(labels, shared_datadir):
    """
    Try using an incorrect parameters for labels
    """

    with pytest.raises(ValueError):
        FST.from_file(shared_datadir / "eat.fomabin", labels=labels)
コード例 #2
0
def test_load_from_file(shared_datadir):
    """
    Integration test for loading the FST from a file.
    """
    fst = FST.from_file(shared_datadir / "eat.fomabin")
    # Do a transduction that outputs multiple results.
    assert set(fst.analyze("eats")) == {
        ("eat", "+N", "+Mass"),
        ("eat", "+V", "+3P", "+Sg"),
    }
    # Transduce the other way!
    assert set(fst.generate("eat" "+V" "+Past")) == {"ate"}
コード例 #3
0
def test_load_from_file(shared_datadir):
    """
    Integration test for loading the FST from a file.
    """
    fst = FST.from_file(shared_datadir / 'eat.fomabin')
    # Do a transduction that outputs multiple results.
    assert set(fst.analyze('eats')) == {
            ('eat', '+N', '+Mass'),
            ('eat', '+V', '+3P', '+Sg'),
    }
    # Transduce the other way!
    assert set(fst.generate('eat' '+V' '+Past')) == {'ate'}
コード例 #4
0
def test_concatenate_lemma(shared_datadir):
    """
    Test https://github.com/eddieantonio/fst-lookup/issues/4

    Skips if the file is not found.
    """
    fst_file = shared_datadir / "crk-descriptive-analyzer.fomabin"
    if not fst_file.exists():
        pytest.skip("cannot find " + str(fst_file))

    fst = FST.from_file(fst_file)

    actual = list(fst.analyze("pimitâskosin"))
    assert [("pimitâskosin", "+V", "+AI", "+Ind", "+Prs", "+3Sg")] == actual
コード例 #5
0
def test_concatenate_lemma(shared_datadir):
    """
    Test https://github.com/eddieantonio/fst-lookup/issues/4

    Skips if the file is not found.
    """
    fst_file = shared_datadir / 'crk-descriptive-analyzer.fomabin'
    if not fst_file.exists():
        pytest.skip('cannot find ' + str(fst_file))

    fst = FST.from_file(fst_file)

    actual = list(fst.analyze('pimitâskosin'))
    assert [('pimitâskosin', '+V', '+AI', '+Ind', '+Prs', '+3Sg')] == actual
コード例 #6
0
def test_load_from_file_flipped(shared_datadir):
    """
    Integration test loading an FST from a file where its
    UPPER side is the surface form, and its
    LOWER side is the deep form
    (following HFST conventions rather and XFST conventions).
    """
    fst = FST.from_file(shared_datadir / "tae.fomabin", labels="invert")

    # The following tests are INTENTIONALLY the same as for
    # test_load_from_file(). However, the FST is different than in that test.

    # Do a transduction that outputs multiple results.
    assert set(fst.analyze("eats")) == {
        ("eat", "+N", "+Mass"),
        ("eat", "+V", "+3P", "+Sg"),
    }
    # Transduce the other way!
    assert set(fst.generate("eat" "+V" "+Past")) == {"ate"}
コード例 #7
0
def test_load_from_file_flipped(shared_datadir):
    """
    Integration test loading an FST from a file where its
    UPPER side is the surface form, and its
    LOWER side is the deep form
    (following HFST conventions rather and XFST conventions).
    """
    fst = FST.from_file(shared_datadir / 'tae.fomabin', labels='invert')

    # The following tests are INTENTIONALLY the same as for
    # test_load_from_file(). However, the FST is different than in that test.

    # Do a transduction that outputs multiple results.
    assert set(fst.analyze('eats')) == {
            ('eat', '+N', '+Mass'),
            ('eat', '+V', '+3P', '+Sg'),
    }
    # Transduce the other way!
    assert set(fst.generate('eat' '+V' '+Past')) == {'ate'}
コード例 #8
0
ファイル: conftest.py プロジェクト: Madoshakalaka/fst-lookup
def english_ipa_fst(shared_datadir: Path) -> FST:
    """
    Return the FST that transcribes -ough words to IPA, including
    through/though/enough/plough/trough/tough/rough/cough/dough.
    """
    return FST.from_file(shared_datadir / 'english-ipa.fomabin')
コード例 #9
0
ファイル: conftest.py プロジェクト: Madoshakalaka/fst-lookup
def cree_hfstol_generator(shared_datadir: Path) -> FST:
    """
    Return the FST that deals with the cree language
    """
    return FST.from_file(shared_datadir / 'crk-normative-generator.hfstol', labels='hfstol')
コード例 #10
0
ファイル: conftest.py プロジェクト: Madoshakalaka/fst-lookup
def cree_foma_generator(shared_datadir: Path) -> FST:
    """
    Return the FST that deals with the cree language
    """
    return FST.from_file(shared_datadir / 'crk-normative-generator.fomabin', labels='invert')
コード例 #11
0
ファイル: conftest.py プロジェクト: Madoshakalaka/fst-lookup
def cree_foma_analyzer(shared_datadir: Path) -> FST:
    """
    Return the FST that deals with the cree language
    """
    return FST.from_file(shared_datadir / 'crk-descriptive-analyzer.fomabin')
コード例 #12
0
ファイル: conftest.py プロジェクト: Madoshakalaka/fst-lookup
def cree_hfstol_analyzer(shared_datadir: Path) -> FST:
    """
    Return the FST that deals with the cree language
    """
    return FST.from_file(shared_datadir / 'crk-descriptive-analyzer.hfstol', labels='hfstol')
コード例 #13
0
ファイル: pre_processing.py プロジェクト: avcjeewantha/SinSRL
from fst_lookup import FST
import re



Nounfst = FST.from_file('./module/Nouns2.fst')
Verbfst = FST.from_file('./module/verbs.fst')
Guesserfst = FST.from_file('./module/NounGuesser.fst')
Adjfst = FST.from_file('./module/Adjectives.fst')
Partfst = FST.from_file('./module/particles.fst')

TAG_RE = re.compile(r'<[^>]+>')

def remove_tags(text):
    return TAG_RE.sub('', text)

def preprocess_text(sen):
    # Removing html tags
    sentence = remove_tags(sen)
    sentence=sentence.replace("\u200b","")
    sentence=sentence.replace("\u200d","")
    sentence=sentence.replace("."," ")
    sentence=sentence.replace(","," ")
    sentence=sentence.replace("?"," ")
    sentence=sentence.replace("\'","")
    sentence=sentence.replace("\"","")
    sentence=sentence.replace("‘","")
    sentence=sentence.replace("’","")


    # Removing multiple spaces