Beispiel #1
0
def symbol_pair_to_fst(insym, outsym):
    """"Return a FST which accepts one the pair string 'insym:outsym'"""
    bfst = hfst.HfstBasicTransducer()
    string_pair_path = ((insym, outsym))
    bfsa.disjunct(string_pair_path, 0)
    fst = hfst.fst(bfst)
    return (fst)
Beispiel #2
0
def symbol_to_fsa(sym):
    """Return a FSA which accepts the one letter string 'sym'

The symbol 'sym' may be e.g. a composed Unicode grapheme, i.e. a
string of two or more Unicode characters.
"""
    bfsa = hfst.HfstBasicTransducer()
    string_pair_path = ((sym, sym))
    bfsa.disjunct(string_pair_path, 0)
    fsa = hfst.fst(bfsa)
    return (fsa)
Beispiel #3
0
def main():
    """Invoke a simple CLI analyser."""
    argp = ArgumentParser()
    argp.add_argument('-a', '--analyser', metavar='FSA', required=True,
                      help="Path to FSA analyser")
    argp.add_argument('-i', '--input', metavar="INFILE", type=open,
                      dest="infile", help="source of analysis data in CONLLU")
    options = argp.parse_args()
    analyser = load_analyser(options.analyser)
    sentence = hfst.epsilon_fst()
    if not options.infile:
        options.infile = stdin
    for line in options.infile:
        line = line.strip()
        if not line or line == '':
            print("@SENTENCE_SEPARATOR@")
        elif line.startswith('#'):
            print(line)
        else:
            refs = line.strip().split('\t')
            anals = analyse(analyser, refs[1])
            if anals:
                lattice = hfst.empty_fst()
                for anal in anals:
                    surf = refs[1]
                    deep = anal[0]
                    weight = anal[1]
                    print(surf, deep)
                    bleh = hfst.fst({surf: deep})
                    lattice.disjunct(bleh)
                sentence.concatenate(lattice)
            else:
                surf = refs[1]
                deep = refs[1] + "|NOUN|Case=Nom|Number=Sing|Guess=Yes|nsubj"
                print(surf, deep)
                bleh = hfst.fst({surf: deep})
                sentence.concatenate(bleh)
            print("@TOKEN SEPARATOR@")
            foo = hfst.fst("@TOKEN_SEPARATOR@")
            sentence.concatenate(foo)
    exit(0)
Beispiel #4
0
def align_two_words(in_word, out_word, aligner_fst, zero, number):
    w1 = hfst.fst(in_word)
    w1.insert_freely((zero, zero))
    w1.minimize()
    ###print(w1)

    w2 = hfst.fst(out_word)
    w2.insert_freely((zero, zero))
    w2.minimize()
    ###print(w2)

    w3 = hfst.HfstTransducer(w1)
    w3.compose(aligner_fst)
    w3.compose(w2)
    ###print(w1)

    w3.n_best(number)
    w3.minimize()
    ###print(w3)

    raw_paths = w3.extract_paths(output='raw')
    if cfg.verbosity >= 10:
        print("raw_paths:", raw_paths)
    return raw_paths
Beispiel #5
0
def shuffle_with_zeros(string, target_length):
    """Return a fsa where zeros are inserted in all possible ways
    
    string -- the string to which zero symbols are inserted

    target_length -- how long the strings after insertions must be

    Returns a fsa which accepts all the strings with the inserted zeros.
    All strings have exactly target_length symbols.
    """
    result_fsa = hfst.fst(string)
    l = len(string)
    if l < target_length:
        n = target_length - l
        n_zeros_fsa = hfst.regex(' '.join(n * 'Ø'))
        result_fsa.shuffle(n_zeros_fsa)
    result_fsa.minimize()
    result_fsa.set_name(string)
    if cfg.verbosity >= 30:
        print("shuffle_with_zeros:")
        print(result_fsa)
    return result_fsa
Beispiel #6
0
 def to_fst(self) -> hfst.HfstTransducer:
     return hfst.fst(self.symstr)
Beispiel #7
0
 def test_fst(input, result):
     tr1_ = hfst.fst(input)
     tr2_ = hfst.regex(result)
     if not tr1_.compare(tr2_):
         raise RuntimeError('test_fst failed with input: ' + input)
Beispiel #8
0
    f.close()

    # Create automaton:
    # unweighted
    test_fst('foobar', '[f o o b a r]')
    test_fst(['foobar'], '[f o o b a r]')
    test_fst(['foobar', 'foobaz'], '[f o o b a [r|z]]')
    # with weights
    test_fst(('foobar', 0.3), '[f o o b a r]::0.3')
    test_fst([('foobar', 0.5)], '[f o o b a r]::0.5')
    test_fst(['foobar', ('foobaz', -2)], '[ f o o b a [r|[z::-2]] ]')
    # Special inputs
    test_fst('*** FOO ***', '{*** FOO ***}')

    foo = hfst.fst('')
    eps = hfst.epsilon_fst()
    assert(foo.compare(eps))
    #try:
    #    foo = hfst.fst('')
    #    raise RuntimeError(get_linenumber())
    #except RuntimeError as e:
    #    if not e.__str__() == 'Empty word.':
    #        raise RuntimeError(get_linenumber())

    # Create transducer:
    # unweighted
    test_fst({'foobar':'foobaz'}, '[f o o b a r:z]')
    test_fst({'foobar':['foobar','foobaz']}, '[f o o b a [r|r:z]]')
    test_fst({'foobar':('foobar','foobaz')}, '[f o o b a [r|r:z]]')
    test_fst({'foobar':'foobaz', 'FOOBAR':('foobar','FOOBAR'), 'Foobar':['Foo','bar','Foobar']}, '[f o o b a r:z] | [F O O B A R] | [F:f O:o O:o B:b A:a R:r] | [F o o b:0 a:0 r:0] | [F:b o:a o:r b:0 a:0 r:0] | [F o o b a r]')
Beispiel #9
0
        # TransducerTypeMismatchException:
        if hfst.ImplementationType.FOMA_TYPE in types:
            hfst.set_default_fst_type(hfst.ImplementationType.TROPICAL_OPENFST_TYPE)
            tr1 = hfst.regex('foo')
            tr2 = hfst.regex('bar')
            tr2.convert(hfst.ImplementationType.FOMA_TYPE)
            try:
                tr1.disjunct(tr2)
            except hfst.exceptions.TransducerTypeMismatchException:
                print('The implementation types of transducers must be the same.')

            hfst.set_default_fst_type(type)

        # fst
        # One unweighted identity path:
        if not hfst.fst('foo').compare(hfst.regex('{foo}')):
            raise RuntimeError('')
        # Weighted path: a tuple of string and number, e.g.
        if not hfst.fst(('foo',1.4)).compare(hfst.regex('{foo}::1.4')):
            raise RuntimeError('')
        if not hfst.fst(('bar',-3)).compare(hfst.regex('{bar}::-3')):
            raise RuntimeError('')
        if not hfst.fst(('baz',0)).compare(hfst.regex('{baz}')):
            raise RuntimeError('')
        # Several paths: a list or a tuple of paths and/or weighted paths, e.g.
        if not hfst.fst(['foo', 'bar']).compare(hfst.regex('{foo}|{bar}')):
            raise RuntimeError('')
        if not hfst.fst(('foo', ('bar',5.0))).compare(hfst.regex('{foo}|{bar}::5.0')):
            raise RuntimeError('')
        if not hfst.fst(('foo', ('bar',5.0), 'baz', 'Foo', ('Bar',2.4))).compare(hfst.regex('{foo}|{bar}::5.0|{baz}|{Foo}|{Bar}::2.4')):
            raise RuntimeError('')
Beispiel #10
0
 def test_fst(input, result):
     tr1_ = hfst.fst(input)
     tr2_ = hfst.regex(result)
     if not tr1_.compare(tr2_):
         raise RuntimeError('test_fst failed with input: ' + input)
Beispiel #11
0
    f.close()

    # Create automaton:
    # unweighted
    test_fst('foobar', '[f o o b a r]')
    test_fst(['foobar'], '[f o o b a r]')
    test_fst(['foobar', 'foobaz'], '[f o o b a [r|z]]')
    # with weights
    test_fst(('foobar', 0.3), '[f o o b a r]::0.3')
    test_fst([('foobar', 0.5)], '[f o o b a r]::0.5')
    test_fst(['foobar', ('foobaz', -2)], '[ f o o b a [r|[z::-2]] ]')
    # Special inputs
    test_fst('*** FOO ***', '{*** FOO ***}')

    foo = hfst.fst('')
    eps = hfst.epsilon_fst()
    assert(foo.compare(eps))
    #try:
    #    foo = hfst.fst('')
    #    raise RuntimeError(get_linenumber())
    #except RuntimeError as e:
    #    if not e.__str__() == 'Empty word.':
    #        raise RuntimeError(get_linenumber())

    # Create transducer:
    # unweighted
    test_fst({'foobar':'foobaz'}, '[f o o b a r:z]')
    test_fst({'foobar':['foobar','foobaz']}, '[f o o b a [r|r:z]]')
    test_fst({'foobar':('foobar','foobaz')}, '[f o o b a [r|r:z]]')
    test_fst({'foobar':'foobaz', 'FOOBAR':('foobar','FOOBAR'), 'Foobar':['Foo','bar','Foobar']}, '[f o o b a r:z] | [F O O B A R] | [F:f O:o O:o B:b A:a R:r] | [F o o b:0 a:0 r:0] | [F:b o:a o:r b:0 a:0 r:0] | [F o o b a r]')
Beispiel #12
0
        # TransducerTypeMismatchException:
        if hfst.ImplementationType.FOMA_TYPE in types:
            hfst.set_default_fst_type(hfst.ImplementationType.TROPICAL_OPENFST_TYPE)
            tr1 = hfst.regex('foo')
            tr2 = hfst.regex('bar')
            tr2.convert(hfst.ImplementationType.FOMA_TYPE)
            try:
                tr1.disjunct(tr2)
            except hfst.exceptions.TransducerTypeMismatchException:
                print('The implementation types of transducers must be the same.')

            hfst.set_default_fst_type(type)

        # fst
        # One unweighted identity path:
        if not hfst.fst('foo').compare(hfst.regex('{foo}')):
            raise RuntimeError('')
        # Weighted path: a tuple of string and number, e.g.
        if not hfst.fst(('foo',1.4)).compare(hfst.regex('{foo}::1.4')):
            raise RuntimeError('')
        if not hfst.fst(('bar',-3)).compare(hfst.regex('{bar}::-3')):
            raise RuntimeError('')
        if not hfst.fst(('baz',0)).compare(hfst.regex('{baz}')):
            raise RuntimeError('')
        # Several paths: a list or a tuple of paths and/or weighted paths, e.g.
        if not hfst.fst(['foo', 'bar']).compare(hfst.regex('{foo}|{bar}')):
            raise RuntimeError('')
        if not hfst.fst(('foo', ('bar',5.0))).compare(hfst.regex('{foo}|{bar}::5.0')):
            raise RuntimeError('')
        if not hfst.fst(('foo', ('bar',5.0), 'baz', 'Foo', ('Bar',2.4))).compare(hfst.regex('{foo}|{bar}::5.0|{baz}|{Foo}|{Bar}::2.4')):
            raise RuntimeError('')
Beispiel #13
0
for line in istr:
    line = line.rstrip()
    weight = None
    line_and_weight = line.split('\t')
    if len(line_and_weight) == 2:
        weight = float(line_and_weight[1])
        line = line_and_weight[0]
    tr = None
    if not pairstrings:
        input_and_output = line.split(':')
        if len(input_and_output) == 2:
            input_and_output[0] = input_and_output[0].strip().rstrip()
            input_and_output[1] = input_and_output[1].strip().rstrip()
            if not has_spaces:
                tr = hfst.fst(input_and_output[0])
                tr2 = hfst.fst(input_and_output[1])
                tr.cross_product(tr2)
            else:
                inputstr = input_and_output[0].split(' ')
                outputstr = input_and_output[1].split(' ')
                tr = hfst.tokenized_fst(inputstr)
                tr2 = hfst.tokenized_fst(outputstr)
                tr.cross_product(tr2)
        else:
            if not has_spaces:
                tr = hfst.fst(line)
            else:
                line = line.split(' ')
                tr = hfst.tokenized_fst(line)
    elif has_spaces:
Beispiel #14
0
    f.close()

    # Create automaton:
    # unweighted
    test_fst('foobar', '[f o o b a r]')
    test_fst(['foobar'], '[f o o b a r]')
    test_fst(['foobar', 'foobaz'], '[f o o b a [r|z]]')
    # with weights
    test_fst(('foobar', 0.3), '[f o o b a r]::0.3')
    test_fst([('foobar', 0.5)], '[f o o b a r]::0.5')
    test_fst(['foobar', ('foobaz', -2)], '[ f o o b a [r|[z::-2]] ]')
    # Special inputs
    test_fst('*** FOO ***', '{*** FOO ***}')

    try:
        foo = hfst.fst('')
        raise RuntimeError(get_linenumber())
    except RuntimeError as e:
        if not e.__str__() == 'Empty word.':
            raise RuntimeError(get_linenumber())

    # Create transducer:
    # unweighted
    test_fst({'foobar': 'foobaz'}, '[f o o b a r:z]')
    test_fst({'foobar': ['foobar', 'foobaz']}, '[f o o b a [r|r:z]]')
    test_fst({'foobar': ('foobar', 'foobaz')}, '[f o o b a [r|r:z]]')
    test_fst(
        {
            'foobar': 'foobaz',
            'FOOBAR': ('foobar', 'FOOBAR'),
            'Foobar': ['Foo', 'bar', 'Foobar']
Beispiel #15
0
import sys, hfst
algfile = hfst.HfstInputStream("chardist.fst")
align = algfile.read()

for line in sys.stdin:
    lst =  line.strip().split(sep=":")
    if len(lst) == 2:
        f1,f2 = lst
    else: f1,f2 = lst[0],lst[0]

    w1 = hfst.fst(f1)
    w1.insert_freely(("Ø","Ø"))
    w1.minimize()
#    print(w1)

    w2 = hfst.fst(f2)
    w2.insert_freely(("Ø","Ø"))
    w2.minimize()
#    print(w2)

    w3 = hfst.HfstTransducer(w1)
    w3.compose(align)
    w3.compose(w2)
#    print(w1)

    w3.n_best(1)
    w3.minimize()
    
    paths = w3.extract_paths(output='text')
    print(paths.strip())