Beispiel #1
0
def fsa(arg):
    """
    Get a transducer (automaton in this case) that recognizes one or more paths.

    Parameters
    ----------
    * `arg` :
        See example below

    Possible inputs:

      One unweighted identity path:
        'foo'  ->  [f o o]

      Weighted path: a tuple of string and number, e.g.
        ('foo',1.4)
        ('bar',-3)
        ('baz',0)

      Several paths: a list or a tuple of paths and/or weighted paths, e.g.
        ['foo', 'bar']
        ('foo', ('bar',5.0))
        ('foo', ('bar',5.0), 'baz', 'Foo', ('Bar',2.4))
        [('foo',-1), ('bar',0), ('baz',3.5)]

    """
    deftok = HfstTokenizer()
    retval = HfstBasicTransducer()
    if isinstance(arg, str):
       if len(arg) == 0:
           retval.set_final_weight(0, 0) # epsilon transducer with zero weight
       else:
           retval.disjunct(deftok.tokenize(_check_word(arg)), 0)
    elif _is_weighted_word(arg):
       if len(arg) == 0:
           retval.set_final_weight(0, arg[1]) # epsilon transducer with weight
       else:
           retval.disjunct(deftok.tokenize(_check_word(arg[0])), arg[1])
    elif isinstance(arg, tuple) or isinstance(arg, list):
       for word in arg:
           if _is_weighted_word(word):
              if len(word) == 0:
                  retval.set_final_weight(0, word[1]) # epsilon transducer with weight
              else:
                  retval.disjunct(deftok.tokenize(_check_word(word[0])), word[1])
           elif isinstance(word, str):
              if len(word) == 0:
                  retval.set_final_weight(0, 0) # epsilon transducer with zero weight
              else:
                  retval.disjunct(deftok.tokenize(_check_word(word)), 0)
           else:
              raise RuntimeError('Tuple/list element not a string or tuple of string and weight.')
    else:
       raise RuntimeError('Not a string or tuple/list of strings.')
    return HfstTransducer(retval, get_default_fst_type())
Beispiel #2
0
def fsa(arg):
    """
    Get a transducer (automaton in this case) that recognizes one or more paths.

    Parameters
    ----------
    * `arg` :
        See example below

    Possible inputs:

      One unweighted identity path:
        'foo'  ->  [f o o]

      Weighted path: a tuple of string and number, e.g.
        ('foo',1.4)
        ('bar',-3)
        ('baz',0)

      Several paths: a list or a tuple of paths and/or weighted paths, e.g.
        ['foo', 'bar']
        ('foo', ('bar',5.0))
        ('foo', ('bar',5.0), 'baz', 'Foo', ('Bar',2.4))
        [('foo',-1), ('bar',0), ('baz',3.5)]

    """
    deftok = HfstTokenizer()
    retval = HfstBasicTransducer()
    if isinstance(arg, str):
       if len(arg) == 0:
           retval.set_final_weight(0, 0) # epsilon transducer with zero weight
       else:
           retval.disjunct(deftok.tokenize(_check_word(arg)), 0)
    elif _is_weighted_word(arg):
       if len(arg) == 0:
           retval.set_final_weight(0, arg[1]) # epsilon transducer with weight
       else:
           retval.disjunct(deftok.tokenize(_check_word(arg[0])), arg[1])
    elif isinstance(arg, tuple) or isinstance(arg, list):
       for word in arg:
           if _is_weighted_word(word):
              if len(word) == 0:
                  retval.set_final_weight(0, word[1]) # epsilon transducer with weight
              else:
                  retval.disjunct(deftok.tokenize(_check_word(word[0])), word[1])
           elif isinstance(word, str):
              if len(word) == 0:
                  retval.set_final_weight(0, 0) # epsilon transducer with zero weight
              else:
                  retval.disjunct(deftok.tokenize(_check_word(word)), 0)
           else:
              raise RuntimeError('Tuple/list element not a string or tuple of string and weight.')
    else:
       raise RuntimeError('Not a string or tuple/list of strings.')
    return HfstTransducer(retval, get_default_fst_type())
Beispiel #3
0
def tokenized_fst(arg, weight=0):
    """
    Get a transducer that recognizes the concatenation of symbols or symbol pairs in
    *arg*.

    Parameters
    ----------
    * `arg` :
        The symbols or symbol pairs that form the path to be recognized.

    Example

       import hfst
       tok = hfst.HfstTokenizer()
       tok.add_multichar_symbol('foo')
       tok.add_multichar_symbol('bar')
       tr = hfst.tokenized_fst(tok.tokenize('foobar', 'foobaz'))

    will create the transducer [foo:foo bar:b 0:a 0:z].
    """
    retval = HfstBasicTransducer()
    state = 0
    if isinstance(arg, list) or isinstance(arg, tuple):
        for token in arg:
            if isinstance(token, str):
                new_state = retval.add_state()
                retval.add_transition(state, new_state, token, token, 0)
                state = new_state
            elif isinstance(token, list) or isinstance(token, tuple):
                if len(token) == 2:
                    new_state = retval.add_state()
                    retval.add_transition(state, new_state, token[0], token[1],
                                          0)
                    state = new_state
                elif len(token) == 1:
                    new_state = retval.add_state()
                    retval.add_transition(state, new_state, token, token, 0)
                    state = new_state
                else:
                    raise RuntimeError('Symbol or symbol pair must be given.')
        retval.set_final_weight(state, weight)
        return HfstTransducer(retval, get_default_fst_type())
    else:
        raise RuntimeError('Argument must be a list or a tuple')
Beispiel #4
0
def tokenized_fst(arg, weight=0):
    """
    Get a transducer that recognizes the concatenation of symbols or symbol pairs in
    *arg*.

    Parameters
    ----------
    * `arg` :
        The symbols or symbol pairs that form the path to be recognized.

    Example

       import hfst
       tok = hfst.HfstTokenizer()
       tok.add_multichar_symbol('foo')
       tok.add_multichar_symbol('bar')
       tr = hfst.tokenized_fst(tok.tokenize('foobar', 'foobaz'))

    will create the transducer [foo:foo bar:b 0:a 0:z].
    """
    retval = HfstBasicTransducer()
    state = 0
    if isinstance(arg, list) or isinstance(arg, tuple):
       for token in arg:
           if isinstance(token, str):
              new_state = retval.add_state()
              retval.add_transition(state, new_state, token, token, 0)
              state = new_state
           elif isinstance(token, list) or isinstance(token, tuple):
              if len(token) == 2:
                 new_state = retval.add_state()
                 retval.add_transition(state, new_state, token[0], token[1], 0)
                 state = new_state
              elif len(token) == 1:
                 new_state = retval.add_state()
                 retval.add_transition(state, new_state, token, token, 0)
                 state = new_state
              else:
                 raise RuntimeError('Symbol or symbol pair must be given.')
       retval.set_final_weight(state, weight)
       return HfstTransducer(retval, get_default_fst_type())
    else:
       raise RuntimeError('Argument must be a list or a tuple')