コード例 #1
0
ファイル: __init__.py プロジェクト: unhammer/hfst
def intersect(transducers):
    """
    Return an intersection of *transducers*.
    """
    retval = None
    for tr in transducers:
      if retval == None:
        retval = HfstTransducer(tr)
      else:
        retval.intersect(tr)
    retval.minimize()
    return retval
コード例 #2
0
ファイル: __init__.py プロジェクト: hfst/hfst
def cross_product(transducers):
    """
    Return a cross product of *transducers*.
    """
    retval = None
    for tr in transducers:
        if retval == None:
            retval = HfstTransducer(tr)
        else:
            retval.cross_product(tr)
    retval.minimize()
    return retval
コード例 #3
0
ファイル: __init__.py プロジェクト: hfst/hfst
def compose(transducers):
    """
    Return a composition of *transducers*.
    """
    retval = None
    for tr in transducers:
        if retval == None:
            retval = HfstTransducer(tr)
        else:
            retval.compose(tr)
    retval.minimize()
    return retval
コード例 #4
0
ファイル: __init__.py プロジェクト: unhammer/hfst
def read_prolog_transducer(f, linecount=[0]):
    """
    Create a transducer as defined in prolog format in file *f*. *linecount*
    keeps track of the current line in the file.
    """
    linecount_ = 0
    fsm = HfstBasicTransducer()
    
    line = ""
    while(True):
        line = f.readline()
        linecount_ = linecount_ + 1
        if line == "":
            raise hfst.exceptions.EndOfStreamException("","",linecount[0] + linecount_)
        line = line.rstrip()
        if line == "":
            pass # allow extra prolog separator(s)
        if line[0] == '#':
            pass # comment line
        else:
            break

    if not libhfst.parse_prolog_network_line(line, fsm):
        raise hfst.exceptions.NotValidPrologFormatException(line,"",linecount[0] + linecount_)

    while(True):
        line = f.readline()
        if (line == ""):
            retval = HfstTransducer(fsm, get_default_fst_type())
            retval.set_name(fsm.name)
            linecount[0] = linecount[0] + linecount_
            return retval
        line = line.rstrip()
        linecount_ = linecount_ + 1
        if line == "":  # prolog separator
            retval = HfstTransducer(fsm, get_default_fst_type())
            retval.set_name(fsm.name)
            linecount[0] = linecount[0] + linecount_
            return retval
        if libhfst.parse_prolog_arc_line(line, fsm):
            pass
        elif libhfst.parse_prolog_final_line(line, fsm):
            pass
        elif libhfst.parse_prolog_symbol_line(line, fsm):
            pass
        else:
            raise hfst.exceptions.NotValidPrologFormatException(line,"",linecount[0] + linecount_)
コード例 #5
0
def read_att_transducer(f, epsilonstr=EPSILON, linecount=[0]):
    """
    Create a transducer as defined in AT&T format in file *f*. *epsilonstr*
    defines how epsilons are represented. *linecount* keeps track of the current
    line in the file.
    """
    linecount_ = 0
    fsm = HfstBasicTransducer()
    while True:
        line = f.readline()
        if line == "":
            if linecount_ == 0:
                raise hfst.exceptions.EndOfStreamException("", "", 0)
            else:
                linecount_ = linecount_ + 1
                break
        linecount_ = linecount_ + 1
        if line[0] == '-':
            break
        if not _parse_att_line(line, fsm, epsilonstr):
            raise hfst.exceptions.NotValidAttFormatException(
                line, "", linecount[0] + linecount_)
    linecount[0] = linecount[0] + linecount_
    return HfstTransducer(fsm, get_default_fst_type())
コード例 #6
0
def fsa(arg):
    """
    Get a transducer (automaton in this case) that recognizes one or more paths.

    Parameters
    ----------
    * `arg` :
        See example below

    Possible inputs:

      One unweighted identity path:
        'foo'  ->  [f o o]

      Weighted path: a tuple of string and number, e.g.
        ('foo',1.4)
        ('bar',-3)
        ('baz',0)

      Several paths: a list or a tuple of paths and/or weighted paths, e.g.
        ['foo', 'bar']
        ('foo', ('bar',5.0))
        ('foo', ('bar',5.0), 'baz', 'Foo', ('Bar',2.4))
        [('foo',-1), ('bar',0), ('baz',3.5)]

    """
    deftok = HfstTokenizer()
    retval = HfstBasicTransducer()
    if isinstance(arg, str):
        if len(arg) == 0:
            retval.set_final_weight(0,
                                    0)  # epsilon transducer with zero weight
        else:
            retval.disjunct(deftok.tokenize(_check_word(arg)), 0)
    elif _is_weighted_word(arg):
        if len(arg) == 0:
            retval.set_final_weight(0,
                                    arg[1])  # epsilon transducer with weight
        else:
            retval.disjunct(deftok.tokenize(_check_word(arg[0])), arg[1])
    elif isinstance(arg, tuple) or isinstance(arg, list):
        for word in arg:
            if _is_weighted_word(word):
                if len(word) == 0:
                    retval.set_final_weight(
                        0, word[1])  # epsilon transducer with weight
                else:
                    retval.disjunct(deftok.tokenize(_check_word(word[0])),
                                    word[1])
            elif isinstance(word, str):
                if len(word) == 0:
                    retval.set_final_weight(
                        0, 0)  # epsilon transducer with zero weight
                else:
                    retval.disjunct(deftok.tokenize(_check_word(word)), 0)
            else:
                raise RuntimeError(
                    'Tuple/list element not a string or tuple of string and weight.'
                )
    else:
        raise RuntimeError('Not a string or tuple/list of strings.')
    return HfstTransducer(retval, get_default_fst_type())