def intersect(transducers): """ Return an intersection of *transducers*. """ retval = None for tr in transducers: if retval == None: retval = HfstTransducer(tr) else: retval.intersect(tr) retval.minimize() return retval
def cross_product(transducers): """ Return a cross product of *transducers*. """ retval = None for tr in transducers: if retval == None: retval = HfstTransducer(tr) else: retval.cross_product(tr) retval.minimize() return retval
def compose(transducers): """ Return a composition of *transducers*. """ retval = None for tr in transducers: if retval == None: retval = HfstTransducer(tr) else: retval.compose(tr) retval.minimize() return retval
def read_prolog_transducer(f, linecount=[0]): """ Create a transducer as defined in prolog format in file *f*. *linecount* keeps track of the current line in the file. """ linecount_ = 0 fsm = HfstBasicTransducer() line = "" while(True): line = f.readline() linecount_ = linecount_ + 1 if line == "": raise hfst.exceptions.EndOfStreamException("","",linecount[0] + linecount_) line = line.rstrip() if line == "": pass # allow extra prolog separator(s) if line[0] == '#': pass # comment line else: break if not libhfst.parse_prolog_network_line(line, fsm): raise hfst.exceptions.NotValidPrologFormatException(line,"",linecount[0] + linecount_) while(True): line = f.readline() if (line == ""): retval = HfstTransducer(fsm, get_default_fst_type()) retval.set_name(fsm.name) linecount[0] = linecount[0] + linecount_ return retval line = line.rstrip() linecount_ = linecount_ + 1 if line == "": # prolog separator retval = HfstTransducer(fsm, get_default_fst_type()) retval.set_name(fsm.name) linecount[0] = linecount[0] + linecount_ return retval if libhfst.parse_prolog_arc_line(line, fsm): pass elif libhfst.parse_prolog_final_line(line, fsm): pass elif libhfst.parse_prolog_symbol_line(line, fsm): pass else: raise hfst.exceptions.NotValidPrologFormatException(line,"",linecount[0] + linecount_)
def read_att_transducer(f, epsilonstr=EPSILON, linecount=[0]): """ Create a transducer as defined in AT&T format in file *f*. *epsilonstr* defines how epsilons are represented. *linecount* keeps track of the current line in the file. """ linecount_ = 0 fsm = HfstBasicTransducer() while True: line = f.readline() if line == "": if linecount_ == 0: raise hfst.exceptions.EndOfStreamException("", "", 0) else: linecount_ = linecount_ + 1 break linecount_ = linecount_ + 1 if line[0] == '-': break if not _parse_att_line(line, fsm, epsilonstr): raise hfst.exceptions.NotValidAttFormatException( line, "", linecount[0] + linecount_) linecount[0] = linecount[0] + linecount_ return HfstTransducer(fsm, get_default_fst_type())
def fsa(arg): """ Get a transducer (automaton in this case) that recognizes one or more paths. Parameters ---------- * `arg` : See example below Possible inputs: One unweighted identity path: 'foo' -> [f o o] Weighted path: a tuple of string and number, e.g. ('foo',1.4) ('bar',-3) ('baz',0) Several paths: a list or a tuple of paths and/or weighted paths, e.g. ['foo', 'bar'] ('foo', ('bar',5.0)) ('foo', ('bar',5.0), 'baz', 'Foo', ('Bar',2.4)) [('foo',-1), ('bar',0), ('baz',3.5)] """ deftok = HfstTokenizer() retval = HfstBasicTransducer() if isinstance(arg, str): if len(arg) == 0: retval.set_final_weight(0, 0) # epsilon transducer with zero weight else: retval.disjunct(deftok.tokenize(_check_word(arg)), 0) elif _is_weighted_word(arg): if len(arg) == 0: retval.set_final_weight(0, arg[1]) # epsilon transducer with weight else: retval.disjunct(deftok.tokenize(_check_word(arg[0])), arg[1]) elif isinstance(arg, tuple) or isinstance(arg, list): for word in arg: if _is_weighted_word(word): if len(word) == 0: retval.set_final_weight( 0, word[1]) # epsilon transducer with weight else: retval.disjunct(deftok.tokenize(_check_word(word[0])), word[1]) elif isinstance(word, str): if len(word) == 0: retval.set_final_weight( 0, 0) # epsilon transducer with zero weight else: retval.disjunct(deftok.tokenize(_check_word(word)), 0) else: raise RuntimeError( 'Tuple/list element not a string or tuple of string and weight.' ) else: raise RuntimeError('Not a string or tuple/list of strings.') return HfstTransducer(retval, get_default_fst_type())