Exemple #1
0
def regex(re, **kvargs):
    """
    Get a transducer as defined by regular expression *re*.

    Parameters
    ----------
    * `re` :
        The regular expression defined with Xerox transducer notation.
    * `kvargs` :
        Arguments recognized are: 'error'.
    * `error` :
        Where warnings and errors are printed. Possible values are sys.stdout,
        sys.stderr (the default), a StringIO or None, indicating a quiet mode.

    """
    type = get_default_fst_type()
    to_console=get_output_to_console()
    import sys
    err=None

    for k,v in kvargs.items():
      if k == 'output_to_console':
          to_console=v
      if k == 'error':
          err=v
      else:
        print('Warning: ignoring unknown argument %s.' % (k))

    comp = XreCompiler(type)
    comp.setOutputToConsole(to_console)

    if err == None:
       return libhfst.hfst_regex(comp, re, "")
    elif err == sys.stdout:
       return libhfst.hfst_regex(comp, re, "cout")
    elif err == sys.stderr:
       return libhfst.hfst_regex(comp, re, "cerr")
    else:
       retval = libhfst.hfst_regex(comp, re, "")
       err.write(libhfst.get_hfst_regex_error_message())
       return retval
Exemple #2
0
def regex(re, **kwargs):
    """
    Get a transducer as defined by regular expression *re*.

    Parameters
    ----------
    * `re` :
        The regular expression defined with Xerox transducer notation.
    * `kwargs` :
        Arguments recognized are: 'error' and 'definitions'.
    * `error` :
        Where warnings and errors are printed. Possible values are sys.stdout,
        sys.stderr (the default), a StringIO or None, indicating a quiet mode.
    * `definitions` :
        A dictionary mapping variable names into transducers.


    Regular expression operators:

    ~   complement
    \   term complement
    &   intersection
    -   minus

    $.  contains once
    $?  contains optionally
    $   contains once or more
    ( ) optionality

    +   Kleene plus
    *   Kleene star

    ./. ignore internally (not yet implemented)
    /   ignoring

    |   union

    <>  shuffle
    <   before
    >   after

    .o.   composition
    .O.   lenient composition
    .m>.  merge right
    .<m.  merge left
    .x.   cross product
    .P.   input priority union
    .p.   output priority union
    .-u.  input minus
    .-l.  output minus
    `[ ]  substitute

    ^n,k  catenate from n to k times, inclusive
    ^>n   catenate more than n times
    ^>n   catenate less than n times
    ^n    catenate n times

    .r   reverse
    .i   invert
    .u   input side
    .l   output side

    \\\\\\  left quotient

    Two-level rules:

     \<=   left restriction
     <=>   left and right arrow
     <=    left arrow
     =>    right arrow

    Replace rules:

     ->    replace right
     (->)  optionally replace right
     <-    replace left
     (<-)  optionally replace left
     <->   replace left and right
     (<->) optionally replace left and right
     @->   left-to-right longest match
     @>    left-to-right shortest match
     ->@   right-to-left longest match
     >@    right-to-left shortest match

    Rule contexts, markers and separators:

     ||   match contexts on input sides
     //   match left context on output side and right context on input side
     \\   match left context on input side and right context on output side
     \/   match contexts on output sides
     _    center marker
     ...  markup marker
     ,,   rule separator in parallel rules
     ,    context separator
     [. .]  match epsilons only once

    Read from file:

     @bin" "  read binary transducer
     @txt" "  read transducer in att text format
     @stxt" " read spaced text
     @pl" "   read transducer in prolog text format
     @re" "   read regular expression

    Symbols:

     .#.  word boundary symbol in replacements, restrictions
     0    the epsilon
     ?    any token
     %    escape character
     { }  concatenate symbols
     " "  quote symbol

    :    pair separator
    ::   weight

    ;   end of expression
    !   starts a comment until end of line
    #   starts a comment until end of line    
    """
    type_ = get_default_fst_type()
    to_console=get_output_to_console()
    import sys
    err=None
    defs=None

    for k,v in kwargs.items():
      if k == 'output_to_console':
          to_console=v
      if k == 'error':
          err=v
      if k == 'definitions':
          defs=v;
      else:
        print('Warning: ignoring unknown argument %s.' % (k))

    comp = XreCompiler(type_)
    comp.setOutputToConsole(to_console)
    if not defs == None:
        for k,v in defs.items():
            vtype = str(type(v))
            if "HfstTransducer" in vtype:
                comp.define_transducer(k,v)
                # print('defining transducer')
            else:
                pass

    if err == None:
       return libhfst.hfst_regex(comp, re, "")
    elif err == sys.stdout:
       return libhfst.hfst_regex(comp, re, "cout")
    elif err == sys.stderr:
       return libhfst.hfst_regex(comp, re, "cerr")
    else:
       retval = libhfst.hfst_regex(comp, re, "")
       err.write(unicode(libhfst.get_hfst_regex_error_message(), 'utf-8'))
       return retval
Exemple #3
0
def regex(re, **kvargs):
    """
    Get a transducer as defined by regular expression *re*.

    Parameters
    ----------
    * `re` :
        The regular expression defined with Xerox transducer notation.
    * `kvargs` :
        Arguments recognized are: 'error' and 'definitions'.
    * `error` :
        Where warnings and errors are printed. Possible values are sys.stdout,
        sys.stderr (the default), a StringIO or None, indicating a quiet mode.
    * `definitions` :
        A dictionary mapping variable names into transducers.


    Regular expression operators:

    ~   complement
    \   term complement
    &   intersection
    -   minus

    $.  contains once
    $?  contains optionally
    $   contains once or more
    ( ) optionality

    +   Kleene plus
    *   Kleene star

    ./. ignore internally (not yet implemented)
    /   ignoring

    |   union

    <>  shuffle
    <   before
    >   after

    .o.   composition
    .O.   lenient composition
    .m>.  merge right
    .<m.  merge left
    .x.   cross product
    .P.   input priority union
    .p.   output priority union
    .-u.  input minus
    .-l.  output minus
    `[ ]  substitute

    ^n,k  catenate from n to k times, inclusive
    ^>n   catenate more than n times
    ^>n   catenate less than n times
    ^n    catenate n times

    .r   reverse
    .i   invert
    .u   input side
    .l   output side

    \\\\\\  left quotient

    Two-level rules:

     \<=   left restriction
     <=>   left and right arrow
     <=    left arrow
     =>    right arrow

    Replace rules:

     ->    replace right
     (->)  optionally replace right
     <-    replace left
     (<-)  optionally replace left
     <->   replace left and right
     (<->) optionally replace left and right
     @->   left-to-right longest match
     @>    left-to-right shortest match
     ->@   right-to-left longest match
     >@    right-to-left shortest match

    Rule contexts, markers and separators:

     ||   match contexts on input sides
     //   match left context on output side and right context on input side
     \\   match left context on input side and right context on output side
     \/   match contexts on output sides
     _    center marker
     ...  markup marker
     ,,   rule separator in parallel rules
     ,    context separator
     [. .]  match epsilons only once

    Read from file:

     @bin" "  read binary transducer
     @txt" "  read transducer in att text format
     @stxt" " read spaced text
     @pl" "   read transducer in prolog text format
     @re" "   read regular expression

    Symbols:

     .#.  word boundary symbol in replacements, restrictions
     0    the epsilon
     ?    any token
     %    escape character
     { }  concatenate symbols
     " "  quote symbol

    :    pair separator
    ::   weight

    ;   end of expression
    !   starts a comment until end of line
    #   starts a comment until end of line    
    """
    type_ = get_default_fst_type()
    to_console=get_output_to_console()
    import sys
    err=None
    defs=None

    for k,v in kvargs.items():
      if k == 'output_to_console':
          to_console=v
      if k == 'error':
          err=v
      if k == 'definitions':
          defs=v;
      else:
        print('Warning: ignoring unknown argument %s.' % (k))

    comp = XreCompiler(type_)
    comp.setOutputToConsole(to_console)
    if not defs == None:
        for k,v in defs.items():
            vtype = str(type(v))
            if "HfstTransducer" in vtype:
                comp.define_transducer(k,v)
                print('defining transducer')
            else:
                pass

    if err == None:
       return libhfst.hfst_regex(comp, re, "")
    elif err == sys.stdout:
       return libhfst.hfst_regex(comp, re, "cout")
    elif err == sys.stderr:
       return libhfst.hfst_regex(comp, re, "cerr")
    else:
       retval = libhfst.hfst_regex(comp, re, "")
       err.write(unicode(libhfst.get_hfst_regex_error_message(), 'utf-8'))
       return retval