def fst_to_fsa(fst, separator=''): """ (Experimental) Encode a transducer into an automaton, i.e. create a transducer where each transition <in:out> of *fst* is replaced with a transition <inSout:inSout> where 'S' is *separator*, except if the transition symbol on both sides is hfst.EPSILON, hfst.IDENTITY or hfst.UNKNOWN. All states and weights of transitions and end states are copied otherwise as such. The alphabet is copied, and new symbols which are created when encoding the transitions, are inserted to it. Parameters ---------- * `fst` : The transducer. * `separator` : The separator symbol inserted between input and output symbols. Examples: import hfst foo2bar = hfst.fst({'foo':'bar'}) creates a transducer [f:b o:a o:r]. Calling foobar = hfst.fst_to_fsa(foo2bar) will create the transducer [fb:fb oa:oa or:or] and foobar = hfst.fst_to_fsa(foo2bar, '^') the transducer [f^b:f^b o^a:o^a o^r:o^r]. """ encoded_symbols = libhfst.StringSet() retval = hfst.HfstBasicTransducer(fst) for state in retval.states(): arcs = retval.transitions(state) for arc in arcs: input = arc.get_input_symbol() output = arc.get_output_symbol() if (input == output) and ((input == hfst.EPSILON) or (input == hfst.UNKNOWN) or (input == hfst.IDENTITY)): continue symbol = input + separator + output arc.set_input_symbol(symbol) arc.set_output_symbol(symbol) encoded_symbols.insert(symbol) retval.add_symbols_to_alphabet(encoded_symbols) if 'HfstTransducer' in str(type(fst)): return hfst.HfstTransducer(retval) else: return retval
def fsa_to_fst(fsa, separator=''): """ (Experimental) Decode an encoded automaton back into a transducer, i.e. create a transducer where each transition <inSout:inSout> of *fsa*, where 'S' is the first *separator* found in the compound symbol 'inSout', is replaced with a transition <in:out>. If no *separator* is found in the symbol, transition is copied as such. All states and weights of transitions and end states are copied as such. The alphabet is copied, omitting encoded symbols which were decoded according to *separator*. Any new input and output symbols extracted from encoded symbols are added to the alphabet. If *separator* is the empty string, 'in' must either be single-character symbol or a special symbol of form '@...@'. Parameters ---------- * `fsa` : The encoded transducer. Must be an automaton, i.e. for each transition, the input and output symbols must be the same. Else, a RuntimeError is thrown. * `separator` : The symbol separating input and output symbol parts in *fsa*. If it is the empty string, each encoded transition symbol is must be of form 'x...' (single-character input symbol 'x') or '@...@...' (special symbol as input symbol). Else, a RuntimeError is thrown. Examples: import hfst foo2bar = hfst.fst({'foo':'bar'}) # creates transducer [f:b o:a o:r] foobar = hfst.fst_to_fsa(foo2bar, '^') creates the transducer [f^b:f^b o^a:o^a o^r:o^r]. Then calling foo2bar = hfst.fsa_to_fst(foobar, '^') will create again the original transducer [f:b o:a o:r]. """ retval = hfst.HfstBasicTransducer(fsa) encoded_symbols = libhfst.StringSet() for state in retval.states(): arcs = retval.transitions(state) for arc in arcs: input = arc.get_input_symbol() output = arc.get_output_symbol() symbols = [] if not (input == output): raise RuntimeError('Transition input and output symbols differ.') if input == "": raise RuntimeError('Transition symbol cannot be the empty string.') # separator given: if len(separator) > 0: symbols = input.split(separator, 1) # no separator given: else: index = input.find('@') if not index == 0: symbols.append(input[0]) if not input[1] == '': symbols.append(input[1:]) else: index = input.find('@', 1) if index == -1: raise RuntimeError('Transition symbol cannot have only one "@" sign.') symbols.append(input[0:index+1]) if not input[index+1] == '': symbols.append(input[index+1:]) arc.set_input_symbol(symbols[0]) arc.set_output_symbol(symbols[-1]) # encoded symbol to be removed from alphabet of result if len(symbols) > 1: encoded_symbols.insert(input) retval.remove_symbols_from_alphabet(encoded_symbols) if 'HfstTransducer' in str(type(fsa)): return hfst.HfstTransducer(retval) else: return retval