Ejemplo n.º 1
0
def read_prolog_transducer(f, linecount=[0]):
    """
    Create a transducer as defined in prolog format in file *f*. *linecount*
    keeps track of the current line in the file.
    """
    linecount_ = 0
    fsm = HfstBasicTransducer()

    line = ""
    while (True):
        line = f.readline()
        linecount_ = linecount_ + 1
        if line == "":
            raise hfst.exceptions.EndOfStreamException(
                "", "", linecount[0] + linecount_)
        line = line.rstrip()
        if line == "":
            pass  # allow extra prolog separator(s)
        if line[0] == '#':
            pass  # comment line
        else:
            break

    if not libhfst.parse_prolog_network_line(line, fsm):
        raise hfst.exceptions.NotValidPrologFormatException(
            line, "", linecount[0] + linecount_)

    while (True):
        line = f.readline()
        if (line == ""):
            retval = HfstTransducer(fsm, get_default_fst_type())
            retval.set_name(fsm.name)
            linecount[0] = linecount[0] + linecount_
            return retval
        line = line.rstrip()
        linecount_ = linecount_ + 1
        if line == "":  # prolog separator
            retval = HfstTransducer(fsm, get_default_fst_type())
            retval.set_name(fsm.name)
            linecount[0] = linecount[0] + linecount_
            return retval
        if libhfst.parse_prolog_arc_line(line, fsm):
            pass
        elif libhfst.parse_prolog_final_line(line, fsm):
            pass
        elif libhfst.parse_prolog_symbol_line(line, fsm):
            pass
        else:
            raise hfst.exceptions.NotValidPrologFormatException(
                line, "", linecount[0] + linecount_)
Ejemplo n.º 2
0
def read_prolog_transducer(f, linecount=[0]):
    """
    Create a transducer as defined in prolog format in file *f*. *linecount*
    keeps track of the current line in the file.
    """
    linecount_ = 0
    fsm = HfstBasicTransducer()
    
    line = ""
    while(True):
        line = f.readline()
        linecount_ = linecount_ + 1
        if line == "":
            raise hfst.exceptions.EndOfStreamException("","",linecount[0] + linecount_)
        line = line.rstrip()
        if line == "":
            pass # allow extra prolog separator(s)
        if line[0] == '#':
            pass # comment line
        else:
            break

    if not libhfst.parse_prolog_network_line(line, fsm):
        raise hfst.exceptions.NotValidPrologFormatException(line,"",linecount[0] + linecount_)

    while(True):
        line = f.readline()
        if (line == ""):
            retval = HfstTransducer(fsm, get_default_fst_type())
            retval.set_name(fsm.name)
            linecount[0] = linecount[0] + linecount_
            return retval
        line = line.rstrip()
        linecount_ = linecount_ + 1
        if line == "":  # prolog separator
            retval = HfstTransducer(fsm, get_default_fst_type())
            retval.set_name(fsm.name)
            linecount[0] = linecount[0] + linecount_
            return retval
        if libhfst.parse_prolog_arc_line(line, fsm):
            pass
        elif libhfst.parse_prolog_final_line(line, fsm):
            pass
        elif libhfst.parse_prolog_symbol_line(line, fsm):
            pass
        else:
            raise hfst.exceptions.NotValidPrologFormatException(line,"",linecount[0] + linecount_)
Ejemplo n.º 3
0
def fsa(arg):
    """
    Get a transducer (automaton in this case) that recognizes one or more paths.

    Parameters
    ----------
    * `arg` :
        See example below

    Possible inputs:

      One unweighted identity path:
        'foo'  ->  [f o o]

      Weighted path: a tuple of string and number, e.g.
        ('foo',1.4)
        ('bar',-3)
        ('baz',0)

      Several paths: a list or a tuple of paths and/or weighted paths, e.g.
        ['foo', 'bar']
        ('foo', ('bar',5.0))
        ('foo', ('bar',5.0), 'baz', 'Foo', ('Bar',2.4))
        [('foo',-1), ('bar',0), ('baz',3.5)]

    """
    deftok = HfstTokenizer()
    retval = HfstBasicTransducer()
    if isinstance(arg, str):
       if len(arg) == 0:
           retval.set_final_weight(0, 0) # epsilon transducer with zero weight
       else:
           retval.disjunct(deftok.tokenize(_check_word(arg)), 0)
    elif _is_weighted_word(arg):
       if len(arg) == 0:
           retval.set_final_weight(0, arg[1]) # epsilon transducer with weight
       else:
           retval.disjunct(deftok.tokenize(_check_word(arg[0])), arg[1])
    elif isinstance(arg, tuple) or isinstance(arg, list):
       for word in arg:
           if _is_weighted_word(word):
              if len(word) == 0:
                  retval.set_final_weight(0, word[1]) # epsilon transducer with weight
              else:
                  retval.disjunct(deftok.tokenize(_check_word(word[0])), word[1])
           elif isinstance(word, str):
              if len(word) == 0:
                  retval.set_final_weight(0, 0) # epsilon transducer with zero weight
              else:
                  retval.disjunct(deftok.tokenize(_check_word(word)), 0)
           else:
              raise RuntimeError('Tuple/list element not a string or tuple of string and weight.')
    else:
       raise RuntimeError('Not a string or tuple/list of strings.')
    return HfstTransducer(retval, get_default_fst_type())
Ejemplo n.º 4
0
def fsa(arg):
    """
    Get a transducer (automaton in this case) that recognizes one or more paths.

    Parameters
    ----------
    * `arg` :
        See example below

    Possible inputs:

      One unweighted identity path:
        'foo'  ->  [f o o]

      Weighted path: a tuple of string and number, e.g.
        ('foo',1.4)
        ('bar',-3)
        ('baz',0)

      Several paths: a list or a tuple of paths and/or weighted paths, e.g.
        ['foo', 'bar']
        ('foo', ('bar',5.0))
        ('foo', ('bar',5.0), 'baz', 'Foo', ('Bar',2.4))
        [('foo',-1), ('bar',0), ('baz',3.5)]

    """
    deftok = HfstTokenizer()
    retval = HfstBasicTransducer()
    if isinstance(arg, str):
       if len(arg) == 0:
           retval.set_final_weight(0, 0) # epsilon transducer with zero weight
       else:
           retval.disjunct(deftok.tokenize(_check_word(arg)), 0)
    elif _is_weighted_word(arg):
       if len(arg) == 0:
           retval.set_final_weight(0, arg[1]) # epsilon transducer with weight
       else:
           retval.disjunct(deftok.tokenize(_check_word(arg[0])), arg[1])
    elif isinstance(arg, tuple) or isinstance(arg, list):
       for word in arg:
           if _is_weighted_word(word):
              if len(word) == 0:
                  retval.set_final_weight(0, word[1]) # epsilon transducer with weight
              else:
                  retval.disjunct(deftok.tokenize(_check_word(word[0])), word[1])
           elif isinstance(word, str):
              if len(word) == 0:
                  retval.set_final_weight(0, 0) # epsilon transducer with zero weight
              else:
                  retval.disjunct(deftok.tokenize(_check_word(word)), 0)
           else:
              raise RuntimeError('Tuple/list element not a string or tuple of string and weight.')
    else:
       raise RuntimeError('Not a string or tuple/list of strings.')
    return HfstTransducer(retval, get_default_fst_type())
Ejemplo n.º 5
0
def read_att_string(att):
    """
    Create a transducer as defined in AT&T format in *att*.
    """
    linecount = 0
    fsm = HfstBasicTransducer()
    lines = att.split('\n')
    for line in lines:
        linecount = linecount + 1
        if not _parse_att_line(line, fsm):
           raise hfst.exceptions.NotValidAttFormatException(line, "", linecount)
    return HfstTransducer(fsm, get_default_fst_type())
Ejemplo n.º 6
0
def read_att_string(att):
    """
    Create a transducer as defined in AT&T format in *att*.
    """
    linecount = 0
    fsm = HfstBasicTransducer()
    lines = att.split('\n')
    for line in lines:
        linecount = linecount + 1
        if not _parse_att_line(line, fsm):
           raise hfst.exceptions.NotValidAttFormatException(line, "", linecount)
    return HfstTransducer(fsm, get_default_fst_type())
Ejemplo n.º 7
0
def compile_twolc_file(inputfilename, outputfilename, **kvargs):
    """
    Compile twolc file *inputfilename* and store the result to file *outputfilename*.

    Parameters
    ----------
    * `inputfilename` :
        The name of the twolc input file.
    * `outputfilename` :
        The name of the transducer output file.
    * `kvargs` :
        Arguments recognized are: silent, verbose, resolve_right_conflicts, resolve_left_conflicts, type.
    * `silent` :
        Whether compilation is performed in silent mode, defaults to False.
    * `verbose` :
        Whether compilation is performed in verbose mode, defaults to False.
    * `resolve_right_conflicts` :
        Whether right arrow conflicts are resolved, defaults to True.
    * `resolve_left_conflicts` :
        Whether left arrow conflicts are resolved, defaults to False.
    * `type` :
        Implementation type of the compiler, defaults to hfst.get_default_fst_type().

    Returns
    -------
    On success zero, else an integer other than zero.
    """
    silent = False
    verbose = False
    resolve_right_conflicts = True
    resolve_left_conflicts = False
    implementation_type = get_default_fst_type()

    for k, v in kvargs.items():
        if k == 'type':
            implementation_type = v
        elif k == 'silent':
            silent = v
        elif k == 'verbose':
            verbose = v
        elif k == 'resolve_right_conflicts':
            resolve_right_conflicts = v
        elif k == 'resolve_left_conflicts':
            resolve_left_conflicts = v
        else:
            print('Warning: ignoring unknown argument %s.' % (k))

    return libhfst.TwolcCompiler.compile(inputfilename, outputfilename, silent,
                                         verbose, resolve_right_conflicts,
                                         resolve_left_conflicts,
                                         implementation_type)
Ejemplo n.º 8
0
def compile_twolc_file(inputfilename, outputfilename, **kwargs):
    """
    Compile twolc file *inputfilename* and store the result to file *outputfilename*.

    Parameters
    ----------
    * `inputfilename` :
        The name of the twolc input file.
    * `outputfilename` :
        The name of the transducer output file.
    * `kwargs` :
        Arguments recognized are: silent, verbose, resolve_right_conflicts, resolve_left_conflicts, type.
    * `silent` :
        Whether compilation is performed in silent mode, defaults to False.
    * `verbose` :
        Whether compilation is performed in verbose mode, defaults to False.
    * `resolve_right_conflicts` :
        Whether right arrow conflicts are resolved, defaults to True.
    * `resolve_left_conflicts` :
        Whether left arrow conflicts are resolved, defaults to False.
    * `type` :
        Implementation type of the compiler, defaults to hfst.get_default_fst_type().

    Returns
    -------
    On success zero, else an integer other than zero.
    """
    silent=False
    verbose=False
    resolve_right_conflicts=True
    resolve_left_conflicts=False
    implementation_type=get_default_fst_type()

    for k,v in kwargs.items():
        if k == 'type':
            implementation_type = v
        elif k == 'silent':
            silent=v
        elif k == 'verbose':
            verbose=v
        elif k == 'resolve_right_conflicts':
            resolve_right_conflicts=v
        elif k == 'resolve_left_conflicts':
            resolve_left_conflicts=v
        else:
            print('Warning: ignoring unknown argument %s.' % (k))

    return libhfst.TwolcCompiler.compile(inputfilename, outputfilename, silent, verbose,
                                         resolve_right_conflicts, resolve_left_conflicts,
                                         implementation_type)
Ejemplo n.º 9
0
def compile_sfst_file(filename, **kvargs):
    """
    Compile sfst file *filename* into a transducer.

    Parameters
    ----------
    * `filename` :
        The name of the sfst file.
    * `kvargs` :
        Arguments recognized are: verbose, output.
    * `verbose` :
        Whether sfst file is processed in verbose mode, defaults to False.
    * `output` :
        Where output is printed. Possible values are sys.stdout, sys.stderr, a
        StringI0, sys.stderr being the default. TODO

    Returns
    -------
    On success the resulting transducer, else None.
    """
    verbosity = False
    type = get_default_fst_type()
    output = None
    to_console = get_output_to_console()

    for k, v in kvargs.items():
        if k == 'verbose':
            verbosity = v
        elif k == 'output':
            output = v
        elif k == 'output_to_console':
            to_console = v
        else:
            print('Warning: ignoring unknown argument %s.' % (k))

    retval = None
    import sys
    if output == None:
        retval = libhfst.hfst_compile_sfst(filename, "", verbosity, to_console)
    elif output == sys.stdout:
        retval = libhfst.hfst_compile_sfst(filename, "cout", verbosity,
                                           to_console)
    elif output == sys.stderr:
        retval = libhfst.hfst_compile_sfst(filename, "cerr", verbosity,
                                           to_console)
    else:
        retval = libhfst.hfst_compile_sfst(filename, "", verbosity, to_console)
        output.write(unicode(libhfst.get_hfst_sfst_output(), 'utf-8'))

    return retval
Ejemplo n.º 10
0
def read_att_input():
    """
    Create a transducer as defined in AT&T format in user input.
    An empty line signals the end of input.
    """
    linecount = 0
    fsm = HfstBasicTransducer()
    while True:
        line = input().rstrip()
        if line == "":
           break
        linecount = linecount + 1
        if not _parse_att_line(line, fsm):
           raise hfst.exceptions.NotValidAttFormatException(line, "", linecount)
    return HfstTransducer(fsm, get_default_fst_type())
Ejemplo n.º 11
0
def read_att_input():
    """
    Create a transducer as defined in AT&T format in user input.
    An empty line signals the end of input.
    """
    linecount = 0
    fsm = HfstBasicTransducer()
    while True:
        line = input().rstrip()
        if line == "":
           break
        linecount = linecount + 1
        if not _parse_att_line(line, fsm):
           raise hfst.exceptions.NotValidAttFormatException(line, "", linecount)
    return HfstTransducer(fsm, get_default_fst_type())
Ejemplo n.º 12
0
def compile_sfst_file(filename, **kvargs):
    """
    Compile sfst file *filename* into a transducer.

    Parameters
    ----------
    * `filename` :
        The name of the sfst file.
    * `kvargs` :
        Arguments recognized are: verbose, output.
    * `verbose` :
        Whether sfst file is processed in verbose mode, defaults to False.
    * `output` :
        Where output is printed. Possible values are sys.stdout, sys.stderr, a
        StringI0, sys.stderr being the default. TODO

    Returns
    -------
    On success the resulting transducer, else None.
    """
    verbosity=False
    type = get_default_fst_type()
    output=None
    to_console=get_output_to_console()

    for k,v in kvargs.items():
      if k == 'verbose':
        verbosity=v
      elif k == 'output':
          output=v
      elif k == 'output_to_console':
          to_console=v
      else:
        print('Warning: ignoring unknown argument %s.' % (k))

    retval=None
    import sys
    if output == None:
       retval = libhfst.hfst_compile_sfst(filename, "", verbosity, to_console)
    elif output == sys.stdout:
       retval = libhfst.hfst_compile_sfst(filename, "cout", verbosity, to_console)
    elif output == sys.stderr:
       retval = libhfst.hfst_compile_sfst(filename, "cerr", verbosity, to_console)
    else:
       retval = libhfst.hfst_compile_sfst(filename, "", verbosity, to_console)
       output.write(unicode(libhfst.get_hfst_sfst_output(), 'utf-8'))

    return retval
Ejemplo n.º 13
0
def tokenized_fst(arg, weight=0):
    """
    Get a transducer that recognizes the concatenation of symbols or symbol pairs in
    *arg*.

    Parameters
    ----------
    * `arg` :
        The symbols or symbol pairs that form the path to be recognized.

    Example

       import hfst
       tok = hfst.HfstTokenizer()
       tok.add_multichar_symbol('foo')
       tok.add_multichar_symbol('bar')
       tr = hfst.tokenized_fst(tok.tokenize('foobar', 'foobaz'))

    will create the transducer [foo:foo bar:b 0:a 0:z].
    """
    retval = HfstBasicTransducer()
    state = 0
    if isinstance(arg, list) or isinstance(arg, tuple):
        for token in arg:
            if isinstance(token, str):
                new_state = retval.add_state()
                retval.add_transition(state, new_state, token, token, 0)
                state = new_state
            elif isinstance(token, list) or isinstance(token, tuple):
                if len(token) == 2:
                    new_state = retval.add_state()
                    retval.add_transition(state, new_state, token[0], token[1],
                                          0)
                    state = new_state
                elif len(token) == 1:
                    new_state = retval.add_state()
                    retval.add_transition(state, new_state, token, token, 0)
                    state = new_state
                else:
                    raise RuntimeError('Symbol or symbol pair must be given.')
        retval.set_final_weight(state, weight)
        return HfstTransducer(retval, get_default_fst_type())
    else:
        raise RuntimeError('Argument must be a list or a tuple')
Ejemplo n.º 14
0
def tokenized_fst(arg, weight=0):
    """
    Get a transducer that recognizes the concatenation of symbols or symbol pairs in
    *arg*.

    Parameters
    ----------
    * `arg` :
        The symbols or symbol pairs that form the path to be recognized.

    Example

       import hfst
       tok = hfst.HfstTokenizer()
       tok.add_multichar_symbol('foo')
       tok.add_multichar_symbol('bar')
       tr = hfst.tokenized_fst(tok.tokenize('foobar', 'foobaz'))

    will create the transducer [foo:foo bar:b 0:a 0:z].
    """
    retval = HfstBasicTransducer()
    state = 0
    if isinstance(arg, list) or isinstance(arg, tuple):
       for token in arg:
           if isinstance(token, str):
              new_state = retval.add_state()
              retval.add_transition(state, new_state, token, token, 0)
              state = new_state
           elif isinstance(token, list) or isinstance(token, tuple):
              if len(token) == 2:
                 new_state = retval.add_state()
                 retval.add_transition(state, new_state, token[0], token[1], 0)
                 state = new_state
              elif len(token) == 1:
                 new_state = retval.add_state()
                 retval.add_transition(state, new_state, token, token, 0)
                 state = new_state
              else:
                 raise RuntimeError('Symbol or symbol pair must be given.')
       retval.set_final_weight(state, weight)
       return HfstTransducer(retval, get_default_fst_type())
    else:
       raise RuntimeError('Argument must be a list or a tuple')
Ejemplo n.º 15
0
def regex(re, **kvargs):
    """
    Get a transducer as defined by regular expression *re*.

    Parameters
    ----------
    * `re` :
        The regular expression defined with Xerox transducer notation.
    * `kvargs` :
        Arguments recognized are: 'error'.
    * `error` :
        Where warnings and errors are printed. Possible values are sys.stdout,
        sys.stderr (the default), a StringIO or None, indicating a quiet mode.

    """
    type = get_default_fst_type()
    to_console=get_output_to_console()
    import sys
    err=None

    for k,v in kvargs.items():
      if k == 'output_to_console':
          to_console=v
      if k == 'error':
          err=v
      else:
        print('Warning: ignoring unknown argument %s.' % (k))

    comp = XreCompiler(type)
    comp.setOutputToConsole(to_console)

    if err == None:
       return libhfst.hfst_regex(comp, re, "")
    elif err == sys.stdout:
       return libhfst.hfst_regex(comp, re, "cout")
    elif err == sys.stderr:
       return libhfst.hfst_regex(comp, re, "cerr")
    else:
       retval = libhfst.hfst_regex(comp, re, "")
       err.write(libhfst.get_hfst_regex_error_message())
       return retval
Ejemplo n.º 16
0
def read_att_transducer(f, epsilonstr=EPSILON, linecount=[0]):
    """
    Create a transducer as defined in AT&T format in file *f*. *epsilonstr*
    defines how epsilons are represented. *linecount* keeps track of the current
    line in the file.
    """
    linecount_ = 0
    fsm = HfstBasicTransducer()
    while True:
        line = f.readline()
        if line == "":
           if linecount_ == 0:
              raise hfst.exceptions.EndOfStreamException("","",0)
           else:
              linecount_ = linecount_ + 1
              break
        linecount_ = linecount_ + 1
        if line[0] == '-':
           break
        if not _parse_att_line(line, fsm, epsilonstr):
           raise hfst.exceptions.NotValidAttFormatException(line, "", linecount[0] + linecount_)
    linecount[0] = linecount[0] + linecount_
    return HfstTransducer(fsm, get_default_fst_type())
Ejemplo n.º 17
0
def read_att_transducer(f, epsilonstr=EPSILON, linecount=[0]):
    """
    Create a transducer as defined in AT&T format in file *f*. *epsilonstr*
    defines how epsilons are represented. *linecount* keeps track of the current
    line in the file.
    """
    linecount_ = 0
    fsm = HfstBasicTransducer()
    while True:
        line = f.readline()
        if line == "":
           if linecount_ == 0:
              raise hfst.exceptions.EndOfStreamException("","",0)
           else:
              linecount_ = linecount_ + 1
              break
        linecount_ = linecount_ + 1
        if line[0] == '-':
           break
        if not _parse_att_line(line, fsm, epsilonstr):
           raise hfst.exceptions.NotValidAttFormatException(line, "", linecount[0] + linecount_)
    linecount[0] = linecount[0] + linecount_
    return HfstTransducer(fsm, get_default_fst_type())
Ejemplo n.º 18
0
        tr1 = istr.read()
        numtr += 1
        tr2 = istr.read()
        numtr += 1
        tr3 = istr.read()
        numtr += 1
    except libhfst.EndOfStreamException:
        pass
    except:
        raise RuntimeError(get_linenumber())
    istr.close()

    if numtr != 2:
        raise RuntimeError(get_linenumber())

    tr1.convert(libhfst.get_default_fst_type())
    tr2.convert(libhfst.get_default_fst_type())

    ostr = libhfst.HfstOutputStream(filename='foobar2.hfst')
    ostr.write(tr1)
    ostr.write(tr2)
    ostr.flush()
    ostr.close()

    TR1 = None
    TR2 = None
    TR3 = None

    istr = libhfst.HfstInputStream('foobar2.hfst')
    numtr = 0
    try:
Ejemplo n.º 19
0
def compile_lexc_file(filename, **kwargs):
    """
    Compile lexc file *filename* into a transducer.

    Parameters
    ----------
    * `filename` :
        The name of the lexc file.
    * `kwargs` :
        Arguments recognized are: verbosity, with_flags, output.
    * `verbosity` :
        The verbosity of the compiler, defaults to 0 (silent). Possible values are:
        0, 1, 2.
    * `with_flags` :
        Whether lexc flags are used when compiling, defaults to False.
    * `output` :
        Where output is printed. Possible values are sys.stdout, sys.stderr, a
        StringIO, sys.stderr being the default.

    Returns
    -------
    On success the resulting transducer, else None.
    """
    verbosity=0
    withflags=False
    alignstrings=False
    type = get_default_fst_type()
    output=None
    to_console=get_output_to_console()

    for k,v in kwargs.items():
      if k == 'verbosity':
        verbosity=v
      elif k == 'with_flags':
        if v == True:
          withflags = v
      elif k == 'align_strings':
          alignstrings = v
      elif k == 'output':
          output=v
      elif k == 'output_to_console':
          to_console=v
      else:
        print('Warning: ignoring unknown argument %s.' % (k))

    lexccomp = LexcCompiler(type, withflags, alignstrings)
    lexccomp.setVerbosity(verbosity)
    lexccomp.setOutputToConsole(to_console)

    retval=-1
    import sys
    if output == None:
       retval = libhfst.hfst_compile_lexc(lexccomp, filename, "")
    elif output == sys.stdout:
       retval = libhfst.hfst_compile_lexc(lexccomp, filename, "cout")
    elif output == sys.stderr:
       retval = libhfst.hfst_compile_lexc(lexccomp, filename, "cerr")
    else:
       retval = libhfst.hfst_compile_lexc(lexccomp, filename, "")
       output.write(unicode(libhfst.get_hfst_lexc_output(), 'utf-8'))

    return retval
Ejemplo n.º 20
0
def start_xfst(**kwargs):
    """
    Start interactive xfst compiler.

    Parameters
    ----------
    * `kwargs` :
        Arguments recognized are: type, quit_on_fail.
    * `quit_on_fail` :
        Whether the compiler exits on any error, defaults to False.
    * `type` :
        Implementation type of the compiler, defaults to
        hfst.get_default_fst_type().
    """
    import sys
    idle = 'idlelib' in sys.modules
    if idle:
        print('It seems that you are running python in in IDLE. Note that all output from xfst will be buffered.')
        print('This means that all warnings, e.g. about time-consuming operations, will be printed only after the operation is carried out.')
        print('Consider running python from shell, for example command prompt, if you wish to see output with no delays.')

    type = get_default_fst_type()
    quit_on_fail = 'OFF'
    to_console=get_output_to_console()
    for k,v in kwargs.items():
      if k == 'type':
        type = v
      elif k == 'output_to_console':
        to_console=v
      elif k == 'quit_on_fail':
        if v == True:
          quit_on_fail='ON'
      else:
        print('Warning: ignoring unknown argument %s.' % (k))

    comp = XfstCompiler(type)
    comp.setReadInteractiveTextFromStdin(True)
    comp.setReadline(False) # do not mix python and c++ readline

    if to_console and idle:
        print('Cannot output to console when running libhfst from IDLE.')
        to_console=False
    comp.setOutputToConsole(to_console)
    comp.set('quit-on-fail', quit_on_fail)

    rl_length_1 = 0
    rl_found = False
    try:
      import readline
      rl_found = True
      rl_length_1 = readline.get_current_history_length()
    except ImportError:
      pass

    import sys
    expression=""
    while True:
        expression += input(comp.get_prompt()).rstrip().lstrip()
        if len(expression) == 0:
           continue
        if expression[-1] == '\\':
           expression = expression[:-2] + '\n'
           continue
        retval = -1
        if idle:
            retval = libhfst.hfst_compile_xfst_to_string_one(comp, expression)
            stdout.write(libhfst.get_hfst_xfst_string_one())
        else:
            # interactive command
            if (expression == "apply down" or expression == "apply up") and rl_found:
               rl_length_2 = readline.get_current_history_length()
               while True:
                  try:
                     line = input().rstrip().lstrip()
                  except EOFError:
                     break
                  if expression == "apply down":
                     comp.apply_down(line)
                  elif expression == "apply up":
                     comp.apply_up(line)
               for foo in range(readline.get_current_history_length() - rl_length_2):
                  readline.remove_history_item(rl_length_2)
               retval = 0
            elif expression == "inspect" or expression == "inspect net":
               print('inspect net not supported')
               retval = 0
            else:
               retval = comp.parse_line(expression + "\n")
        if retval != 0:
           print("expression '%s' could not be parsed" % expression)
           if comp.get("quit-on-fail") == "ON":
              return
        if comp.quit_requested():
           break
        expression = ""

    if rl_found:
      for foo in range(readline.get_current_history_length() - rl_length_1):
         readline.remove_history_item(rl_length_1)
Ejemplo n.º 21
0
def compile_xfst_file(filename, **kwargs):
    """
    Compile (run) xfst file *filename*.

    Parameters
    ----------
    * `filename` :
        The name of the xfst file.
    * `kwargs` :
        Arguments recognized are: verbosity, quit_on_fail, output, type.
    * `verbosity` :
        The verbosity of the compiler, defaults to 0 (silent). Possible values are:
        0, 1, 2.
    * `quit_on_fail` :
        Whether the script is exited on any error, defaults to True.
    * `output` :
        Where output is printed. Possible values are sys.stdout, sys.stderr, a
        StringIO, sys.stderr being the default?
    * `type` :
        Implementation type of the compiler, defaults to
        hfst.get_default_fst_type().

    Returns
    -------
    On success 0, else an integer greater than 0.
    """
    if int(version[0]) > 2:
      pass
    else:
      raise RuntimeError('hfst.compile_xfst_file not supported for python version 2')
    verbosity=0
    quit_on_fail='ON'
    type = get_default_fst_type()
    output=None
    error=None
    to_console=get_output_to_console()

    for k,v in kwargs.items():
      if k == 'verbosity':
        verbosity=v
      elif k == 'quit_on_fail':
        if v == False:
          quit_on_fail='OFF'
      elif k == 'output':
          output=v
      elif k == 'error':
          error=v
      elif k == 'output_to_console':
          to_console=v
      else:
        print('Warning: ignoring unknown argument %s.' % (k))

    if verbosity > 1:
      print('Compiling with %s implementation...' % fst_type_to_string(type))
    xfstcomp = XfstCompiler(type)
    xfstcomp.setOutputToConsole(to_console)
    xfstcomp.setVerbosity(verbosity > 0)
    xfstcomp.set('quit-on-fail', quit_on_fail)
    if verbosity > 1:
      print('Opening xfst file %s...' % filename)
    f = open(filename, 'r', encoding='utf-8')
    data = f.read()
    f.close()
    if verbosity > 1:
      print('File closed...')

    retval=-1
    import sys
    from io import StringIO

    # check special case
    if isinstance(output, StringIO) and isinstance(error, StringIO) and output == error:
       retval = libhfst.hfst_compile_xfst_to_string_one(xfstcomp, data)
       output.write(unicode(libhfst.get_hfst_xfst_string_one(), 'utf-8'))
    else:
       arg1 = ""
       arg2 = ""
       if output == None or output == sys.stdout:
          arg1 = "cout"
       if output == sys.stderr:
          arg1 == "cerr"
       if error == None or error == sys.stderr:
          arg2 = "cerr"
       if error == sys.stdout:
          arg2 == "cout"

       retval = libhfst.hfst_compile_xfst(xfstcomp, data, arg1, arg2)

       if isinstance(output, StringIO):
          output.write(unicode(libhfst.get_hfst_xfst_string_one(), 'utf-8'))
       if isinstance(error, StringIO):
          error.write(unicode(libhfst.get_hfst_xfst_string_two(), 'utf-8'))

    if verbosity > 1:
      print('Parsed file with return value %i (0 indicating succesful parsing).' % retval)
    return retval
Ejemplo n.º 22
0
def compile_lexc_file(filename, **kvargs):
    """
    Compile lexc file *filename* into a transducer.

    Parameters
    ----------
    * `filename` :
        The name of the lexc file.
    * `kvargs` :
        Arguments recognized are: verbosity, with_flags, output.
    * `verbosity` :
        The verbosity of the compiler, defaults to 0 (silent). Possible values are:
        0, 1, 2.
    * `with_flags` :
        Whether lexc flags are used when compiling, defaults to False.
    * `output` :
        Where output is printed. Possible values are sys.stdout, sys.stderr, a
        StringIO, sys.stderr being the default.

    Returns
    -------
    On success the resulting transducer, else None.
    """
    verbosity=0
    withflags=False
    alignstrings=False
    type = get_default_fst_type()
    output=None
    to_console=get_output_to_console()

    for k,v in kvargs.items():
      if k == 'verbosity':
        verbosity=v
      elif k == 'with_flags':
        if v == True:
          withflags = v
      elif k == 'align_strings':
          alignstrings = v
      elif k == 'output':
          output=v
      elif k == 'output_to_console':
          to_console=v
      else:
        print('Warning: ignoring unknown argument %s.' % (k))

    lexccomp = LexcCompiler(type, withflags, alignstrings)
    lexccomp.setVerbosity(verbosity)
    lexccomp.setOutputToConsole(to_console)

    retval=-1
    import sys
    if output == None:
       retval = libhfst.hfst_compile_lexc(lexccomp, filename, "")
    elif output == sys.stdout:
       retval = libhfst.hfst_compile_lexc(lexccomp, filename, "cout")
    elif output == sys.stderr:
       retval = libhfst.hfst_compile_lexc(lexccomp, filename, "cerr")
    else:
       retval = libhfst.hfst_compile_lexc(lexccomp, filename, "")
       output.write(unicode(libhfst.get_hfst_lexc_output(), 'utf-8'))

    return retval
Ejemplo n.º 23
0
import libhfst
# Create a HFST basic transducer [a:b] with transition weight 0.3 and final weight 0.5.
t = libhfst.HfstBasicTransducer()
t.add_state(1)
t.add_transition(0, 1, 'a', 'b', 0.3)
t.set_final_weight(1, 0.5)
#
# Convert to tropical OpenFst format (the default) and push weights toward final state.
T = libhfst.HfstTransducer(t, libhfst.get_default_fst_type())
T.push_weights(libhfst.TO_FINAL_STATE)
#
# Convert back to HFST basic transducer.
tc = libhfst.HfstBasicTransducer(T)
try:
    # Rounding might affect the precision.
    if (0.79 < tc.get_final_weight(1)) and (tc.get_final_weight(1) < 0.81):
        print("TEST PASSED")
        exit(0)
    else:
        print("TEST FAILED")
        exit(1)
# If the state does not exist or is not final */
except libhfst.HfstException:
    print("TEST FAILED: An exception thrown.")
    exit(1)
Ejemplo n.º 24
0
def regex(re, **kwargs):
    """
    Get a transducer as defined by regular expression *re*.

    Parameters
    ----------
    * `re` :
        The regular expression defined with Xerox transducer notation.
    * `kwargs` :
        Arguments recognized are: 'error' and 'definitions'.
    * `error` :
        Where warnings and errors are printed. Possible values are sys.stdout,
        sys.stderr (the default), a StringIO or None, indicating a quiet mode.
    * `definitions` :
        A dictionary mapping variable names into transducers.


    Regular expression operators:

    ~   complement
    \   term complement
    &   intersection
    -   minus

    $.  contains once
    $?  contains optionally
    $   contains once or more
    ( ) optionality

    +   Kleene plus
    *   Kleene star

    ./. ignore internally (not yet implemented)
    /   ignoring

    |   union

    <>  shuffle
    <   before
    >   after

    .o.   composition
    .O.   lenient composition
    .m>.  merge right
    .<m.  merge left
    .x.   cross product
    .P.   input priority union
    .p.   output priority union
    .-u.  input minus
    .-l.  output minus
    `[ ]  substitute

    ^n,k  catenate from n to k times, inclusive
    ^>n   catenate more than n times
    ^>n   catenate less than n times
    ^n    catenate n times

    .r   reverse
    .i   invert
    .u   input side
    .l   output side

    \\\\\\  left quotient

    Two-level rules:

     \<=   left restriction
     <=>   left and right arrow
     <=    left arrow
     =>    right arrow

    Replace rules:

     ->    replace right
     (->)  optionally replace right
     <-    replace left
     (<-)  optionally replace left
     <->   replace left and right
     (<->) optionally replace left and right
     @->   left-to-right longest match
     @>    left-to-right shortest match
     ->@   right-to-left longest match
     >@    right-to-left shortest match

    Rule contexts, markers and separators:

     ||   match contexts on input sides
     //   match left context on output side and right context on input side
     \\   match left context on input side and right context on output side
     \/   match contexts on output sides
     _    center marker
     ...  markup marker
     ,,   rule separator in parallel rules
     ,    context separator
     [. .]  match epsilons only once

    Read from file:

     @bin" "  read binary transducer
     @txt" "  read transducer in att text format
     @stxt" " read spaced text
     @pl" "   read transducer in prolog text format
     @re" "   read regular expression

    Symbols:

     .#.  word boundary symbol in replacements, restrictions
     0    the epsilon
     ?    any token
     %    escape character
     { }  concatenate symbols
     " "  quote symbol

    :    pair separator
    ::   weight

    ;   end of expression
    !   starts a comment until end of line
    #   starts a comment until end of line    
    """
    type_ = get_default_fst_type()
    to_console=get_output_to_console()
    import sys
    err=None
    defs=None

    for k,v in kwargs.items():
      if k == 'output_to_console':
          to_console=v
      if k == 'error':
          err=v
      if k == 'definitions':
          defs=v;
      else:
        print('Warning: ignoring unknown argument %s.' % (k))

    comp = XreCompiler(type_)
    comp.setOutputToConsole(to_console)
    if not defs == None:
        for k,v in defs.items():
            vtype = str(type(v))
            if "HfstTransducer" in vtype:
                comp.define_transducer(k,v)
                # print('defining transducer')
            else:
                pass

    if err == None:
       return libhfst.hfst_regex(comp, re, "")
    elif err == sys.stdout:
       return libhfst.hfst_regex(comp, re, "cout")
    elif err == sys.stderr:
       return libhfst.hfst_regex(comp, re, "cerr")
    else:
       retval = libhfst.hfst_regex(comp, re, "")
       err.write(unicode(libhfst.get_hfst_regex_error_message(), 'utf-8'))
       return retval
Ejemplo n.º 25
0
def start_xfst(**kvargs):
    """
    Start interactive xfst compiler.

    Parameters
    ----------
    * `kvargs` :
        Arguments recognized are: type, quit_on_fail.
    * `quit_on_fail` :
        Whether the compiler exits on any error, defaults to False.
    * `type` :
        Implementation type of the compiler, defaults to
        hfst.get_default_fst_type().
    """
    import sys
    idle = 'idlelib' in sys.modules
    if idle:
        print('It seems that you are running python in in IDLE. Note that all output from xfst will be buffered.')
        print('This means that all warnings, e.g. about time-consuming operations, will be printed only after the operation is carried out.')
        print('Consider running python from shell, for example command prompt, if you wish to see output with no delays.')

    type = get_default_fst_type()
    quit_on_fail = 'OFF'
    to_console=get_output_to_console()
    for k,v in kvargs.items():
      if k == 'type':
        type = v
      elif k == 'output_to_console':
        to_console=v
      elif k == 'quit_on_fail':
        if v == True:
          quit_on_fail='ON'
      else:
        print('Warning: ignoring unknown argument %s.' % (k))

    comp = XfstCompiler(type)
    comp.setReadInteractiveTextFromStdin(True)
    comp.setReadline(False) # do not mix python and c++ readline

    if to_console and idle:
        print('Cannot output to console when running libhfst from IDLE.')
        to_console=False
    comp.setOutputToConsole(to_console)
    comp.set('quit-on-fail', quit_on_fail)

    rl_length_1 = 0
    rl_found = False
    try:
      import readline
      rl_found = True
      rl_length_1 = readline.get_current_history_length()
    except ImportError:
      pass

    import sys
    expression=""
    while True:
        expression += input(comp.get_prompt()).rstrip().lstrip()
        if len(expression) == 0:
           continue
        if expression[-1] == '\\':
           expression = expression[:-2] + '\n'
           continue
        retval = -1
        if idle:
            retval = libhfst.hfst_compile_xfst_to_string_one(comp, expression)
            stdout.write(libhfst.get_hfst_xfst_string_one())
        else:
            # interactive command
            if (expression == "apply down" or expression == "apply up") and rl_found:
               rl_length_2 = readline.get_current_history_length()
               while True:
                  try:
                     line = input().rstrip().lstrip()
                  except EOFError:
                     break
                  if expression == "apply down":
                     comp.apply_down(line)
                  elif expression == "apply up":
                     comp.apply_up(line)
               for foo in range(readline.get_current_history_length() - rl_length_2):
                  readline.remove_history_item(rl_length_2)
               retval = 0
            elif expression == "inspect" or expression == "inspect net":
               print('inspect net not supported')
               retval = 0
            else:
               retval = comp.parse_line(expression + "\n")
        if retval != 0:
           print("expression '%s' could not be parsed" % expression)
           if comp.get("quit-on-fail") == "ON":
              return
        if comp.quit_requested():
           break
        expression = ""

    if rl_found:
      for foo in range(readline.get_current_history_length() - rl_length_1):
         readline.remove_history_item(rl_length_1)
Ejemplo n.º 26
0
def compile_xfst_file(filename, **kvargs):
    """
    Compile (run) xfst file *filename*.

    Parameters
    ----------
    * `filename` :
        The name of the xfst file.
    * `kvargs` :
        Arguments recognized are: verbosity, quit_on_fail, output, type.
    * `verbosity` :
        The verbosity of the compiler, defaults to 0 (silent). Possible values are:
        0, 1, 2.
    * `quit_on_fail` :
        Whether the script is exited on any error, defaults to True.
    * `output` :
        Where output is printed. Possible values are sys.stdout, sys.stderr, a
        StringIO, sys.stderr being the default?
    * `type` :
        Implementation type of the compiler, defaults to
        hfst.get_default_fst_type().

    Returns
    -------
    On success 0, else an integer greater than 0.
    """
    if int(version[0]) > 2:
      pass
    else:
      raise RuntimeError('hfst.compile_xfst_file not supported for python version 2')
    verbosity=0
    quit_on_fail='ON'
    type = get_default_fst_type()
    output=None
    error=None
    to_console=get_output_to_console()

    for k,v in kvargs.items():
      if k == 'verbosity':
        verbosity=v
      elif k == 'quit_on_fail':
        if v == False:
          quit_on_fail='OFF'
      elif k == 'output':
          output=v
      elif k == 'error':
          error=v
      elif k == 'output_to_console':
          to_console=v
      else:
        print('Warning: ignoring unknown argument %s.' % (k))

    if verbosity > 1:
      print('Compiling with %s implementation...' % fst_type_to_string(type))
    xfstcomp = XfstCompiler(type)
    xfstcomp.setOutputToConsole(to_console)
    xfstcomp.setVerbosity(verbosity > 0)
    xfstcomp.set('quit-on-fail', quit_on_fail)
    if verbosity > 1:
      print('Opening xfst file %s...' % filename)
    f = open(filename, 'r', encoding='utf-8')
    data = f.read()
    f.close()
    if verbosity > 1:
      print('File closed...')

    retval=-1
    import sys
    from io import StringIO

    # check special case
    if isinstance(output, StringIO) and isinstance(error, StringIO) and output == error:
       retval = libhfst.hfst_compile_xfst_to_string_one(xfstcomp, data)
       output.write(unicode(libhfst.get_hfst_xfst_string_one(), 'utf-8'))
    else:
       arg1 = ""
       arg2 = ""
       if output == None or output == sys.stdout:
          arg1 = "cout"
       if output == sys.stderr:
          arg1 == "cerr"
       if error == None or error == sys.stderr:
          arg2 = "cerr"
       if error == sys.stdout:
          arg2 == "cout"

       retval = libhfst.hfst_compile_xfst(xfstcomp, data, arg1, arg2)

       if isinstance(output, StringIO):
          output.write(unicode(libhfst.get_hfst_xfst_string_one(), 'utf-8'))
       if isinstance(error, StringIO):
          error.write(unicode(libhfst.get_hfst_xfst_string_two(), 'utf-8'))

    if verbosity > 1:
      print('Parsed file with return value %i (0 indicating succesful parsing).' % retval)
    return retval
Ejemplo n.º 27
0
def regex(re, **kvargs):
    """
    Get a transducer as defined by regular expression *re*.

    Parameters
    ----------
    * `re` :
        The regular expression defined with Xerox transducer notation.
    * `kvargs` :
        Arguments recognized are: 'error' and 'definitions'.
    * `error` :
        Where warnings and errors are printed. Possible values are sys.stdout,
        sys.stderr (the default), a StringIO or None, indicating a quiet mode.
    * `definitions` :
        A dictionary mapping variable names into transducers.


    Regular expression operators:

    ~   complement
    \   term complement
    &   intersection
    -   minus

    $.  contains once
    $?  contains optionally
    $   contains once or more
    ( ) optionality

    +   Kleene plus
    *   Kleene star

    ./. ignore internally (not yet implemented)
    /   ignoring

    |   union

    <>  shuffle
    <   before
    >   after

    .o.   composition
    .O.   lenient composition
    .m>.  merge right
    .<m.  merge left
    .x.   cross product
    .P.   input priority union
    .p.   output priority union
    .-u.  input minus
    .-l.  output minus
    `[ ]  substitute

    ^n,k  catenate from n to k times, inclusive
    ^>n   catenate more than n times
    ^>n   catenate less than n times
    ^n    catenate n times

    .r   reverse
    .i   invert
    .u   input side
    .l   output side

    \\\\\\  left quotient

    Two-level rules:

     \<=   left restriction
     <=>   left and right arrow
     <=    left arrow
     =>    right arrow

    Replace rules:

     ->    replace right
     (->)  optionally replace right
     <-    replace left
     (<-)  optionally replace left
     <->   replace left and right
     (<->) optionally replace left and right
     @->   left-to-right longest match
     @>    left-to-right shortest match
     ->@   right-to-left longest match
     >@    right-to-left shortest match

    Rule contexts, markers and separators:

     ||   match contexts on input sides
     //   match left context on output side and right context on input side
     \\   match left context on input side and right context on output side
     \/   match contexts on output sides
     _    center marker
     ...  markup marker
     ,,   rule separator in parallel rules
     ,    context separator
     [. .]  match epsilons only once

    Read from file:

     @bin" "  read binary transducer
     @txt" "  read transducer in att text format
     @stxt" " read spaced text
     @pl" "   read transducer in prolog text format
     @re" "   read regular expression

    Symbols:

     .#.  word boundary symbol in replacements, restrictions
     0    the epsilon
     ?    any token
     %    escape character
     { }  concatenate symbols
     " "  quote symbol

    :    pair separator
    ::   weight

    ;   end of expression
    !   starts a comment until end of line
    #   starts a comment until end of line    
    """
    type_ = get_default_fst_type()
    to_console=get_output_to_console()
    import sys
    err=None
    defs=None

    for k,v in kvargs.items():
      if k == 'output_to_console':
          to_console=v
      if k == 'error':
          err=v
      if k == 'definitions':
          defs=v;
      else:
        print('Warning: ignoring unknown argument %s.' % (k))

    comp = XreCompiler(type_)
    comp.setOutputToConsole(to_console)
    if not defs == None:
        for k,v in defs.items():
            vtype = str(type(v))
            if "HfstTransducer" in vtype:
                comp.define_transducer(k,v)
                print('defining transducer')
            else:
                pass

    if err == None:
       return libhfst.hfst_regex(comp, re, "")
    elif err == sys.stdout:
       return libhfst.hfst_regex(comp, re, "cout")
    elif err == sys.stderr:
       return libhfst.hfst_regex(comp, re, "cerr")
    else:
       retval = libhfst.hfst_regex(comp, re, "")
       err.write(unicode(libhfst.get_hfst_regex_error_message(), 'utf-8'))
       return retval