Esempio n. 1
0
def fst_to_fsa(fst, separator=''):
    """
    (Experimental)

    Encode a transducer into an automaton, i.e. create a transducer where each
    transition <in:out> of *fst* is replaced with a transition <inSout:inSout>
    where 'S' is *separator*, except if the transition symbol on both sides is
    hfst.EPSILON, hfst.IDENTITY or hfst.UNKNOWN.

    All states and weights of transitions and end states are copied otherwise
    as such. The alphabet is copied, and new symbols which are created when
    encoding the transitions, are inserted to it.

    Parameters
    ----------
    * `fst` :
        The transducer.
    * `separator` :
        The separator symbol inserted between input and output symbols.

    Examples:

        import hfst
        foo2bar = hfst.fst({'foo':'bar'})

    creates a transducer [f:b o:a o:r]. Calling

        foobar = hfst.fst_to_fsa(foo2bar)
        
    will create the transducer [fb:fb oa:oa or:or] and

        foobar = hfst.fst_to_fsa(foo2bar, '^')

    the transducer [f^b:f^b o^a:o^a o^r:o^r].

    """
    encoded_symbols = libhfst.StringSet()
    retval = hfst.HfstBasicTransducer(fst)
    for state in retval.states():
        arcs = retval.transitions(state)
        for arc in arcs:
            input = arc.get_input_symbol()
            output = arc.get_output_symbol()
            if (input == output) and ((input == hfst.EPSILON) or
                                      (input == hfst.UNKNOWN) or
                                      (input == hfst.IDENTITY)):
                continue
            symbol = input + separator + output
            arc.set_input_symbol(symbol)
            arc.set_output_symbol(symbol)
            encoded_symbols.insert(symbol)
    retval.add_symbols_to_alphabet(encoded_symbols)
    if 'HfstTransducer' in str(type(fst)):
        return hfst.HfstTransducer(retval)
    else:
        return retval
Esempio n. 2
0
def fsa_to_fst(fsa, separator=''):
    """
    (Experimental)

    Decode an encoded automaton back into a transducer, i.e. create a 
    transducer where each transition <inSout:inSout> of *fsa*, where 'S' is
    the first *separator* found in the compound symbol 'inSout', is replaced
    with a transition <in:out>.

    If no *separator* is found in the symbol, transition is copied as such. All
    states and weights of transitions and end states are copied as such. The
    alphabet is copied, omitting encoded symbols which were decoded according
    to *separator*. Any new input and output symbols extracted from encoded
    symbols are added to the alphabet.

    If *separator* is the empty string, 'in' must either be single-character
    symbol or a special symbol of form '@...@'.

    Parameters
    ----------
    * `fsa` :
        The encoded transducer. Must be an automaton, i.e. for each
        transition, the input and output symbols must be the same. Else, a
        RuntimeError is thrown.
    * `separator` :
        The symbol separating input and output symbol parts in *fsa*. If it is
        the empty string, each encoded transition symbol is must be of form
        'x...' (single-character input symbol 'x') or '@...@...' (special
        symbol as input symbol). Else, a RuntimeError is thrown.

    Examples:

        import hfst
        foo2bar = hfst.fst({'foo':'bar'})  # creates transducer [f:b o:a o:r]
        foobar = hfst.fst_to_fsa(foo2bar, '^')

    creates the transducer [f^b:f^b o^a:o^a o^r:o^r]. Then calling

        foo2bar = hfst.fsa_to_fst(foobar, '^')

    will create again the original transducer [f:b o:a o:r].
    """
    retval = hfst.HfstBasicTransducer(fsa)
    encoded_symbols = libhfst.StringSet()
    for state in retval.states():
        arcs = retval.transitions(state)
        for arc in arcs:
            input = arc.get_input_symbol()
            output = arc.get_output_symbol()
            symbols = []
            if not (input == output):
                raise RuntimeError('Transition input and output symbols differ.')
            if input == "":
                raise RuntimeError('Transition symbol cannot be the empty string.')
            # separator given:
            if len(separator) > 0:
                symbols = input.split(separator, 1)                
            # no separator given:
            else:
                index = input.find('@')
                if not index == 0:
                    symbols.append(input[0])
                    if not input[1] == '':
                        symbols.append(input[1:])
                else:
                    index = input.find('@', 1)
                    if index == -1:
                        raise RuntimeError('Transition symbol cannot have only one "@" sign.')
                    symbols.append(input[0:index+1])
                    if not input[index+1] == '':
                        symbols.append(input[index+1:])
            arc.set_input_symbol(symbols[0])
            arc.set_output_symbol(symbols[-1])
            # encoded symbol to be removed from alphabet of result
            if len(symbols) > 1:
                encoded_symbols.insert(input)
    retval.remove_symbols_from_alphabet(encoded_symbols)
    if 'HfstTransducer' in str(type(fsa)):
        return hfst.HfstTransducer(retval)
    else:
        return retval