def regex(re, **kvargs): """ Get a transducer as defined by regular expression *re*. Parameters ---------- * `re` : The regular expression defined with Xerox transducer notation. * `kvargs` : Arguments recognized are: 'error'. * `error` : Where warnings and errors are printed. Possible values are sys.stdout, sys.stderr (the default), a StringIO or None, indicating a quiet mode. """ type = get_default_fst_type() to_console=get_output_to_console() import sys err=None for k,v in kvargs.items(): if k == 'output_to_console': to_console=v if k == 'error': err=v else: print('Warning: ignoring unknown argument %s.' % (k)) comp = XreCompiler(type) comp.setOutputToConsole(to_console) if err == None: return libhfst.hfst_regex(comp, re, "") elif err == sys.stdout: return libhfst.hfst_regex(comp, re, "cout") elif err == sys.stderr: return libhfst.hfst_regex(comp, re, "cerr") else: retval = libhfst.hfst_regex(comp, re, "") err.write(libhfst.get_hfst_regex_error_message()) return retval
def regex(re, **kwargs): """ Get a transducer as defined by regular expression *re*. Parameters ---------- * `re` : The regular expression defined with Xerox transducer notation. * `kwargs` : Arguments recognized are: 'error' and 'definitions'. * `error` : Where warnings and errors are printed. Possible values are sys.stdout, sys.stderr (the default), a StringIO or None, indicating a quiet mode. * `definitions` : A dictionary mapping variable names into transducers. Regular expression operators: ~ complement \ term complement & intersection - minus $. contains once $? contains optionally $ contains once or more ( ) optionality + Kleene plus * Kleene star ./. ignore internally (not yet implemented) / ignoring | union <> shuffle < before > after .o. composition .O. lenient composition .m>. merge right .<m. merge left .x. cross product .P. input priority union .p. output priority union .-u. input minus .-l. output minus `[ ] substitute ^n,k catenate from n to k times, inclusive ^>n catenate more than n times ^>n catenate less than n times ^n catenate n times .r reverse .i invert .u input side .l output side \\\\\\ left quotient Two-level rules: \<= left restriction <=> left and right arrow <= left arrow => right arrow Replace rules: -> replace right (->) optionally replace right <- replace left (<-) optionally replace left <-> replace left and right (<->) optionally replace left and right @-> left-to-right longest match @> left-to-right shortest match ->@ right-to-left longest match >@ right-to-left shortest match Rule contexts, markers and separators: || match contexts on input sides // match left context on output side and right context on input side \\ match left context on input side and right context on output side \/ match contexts on output sides _ center marker ... markup marker ,, rule separator in parallel rules , context separator [. .] match epsilons only once Read from file: @bin" " read binary transducer @txt" " read transducer in att text format @stxt" " read spaced text @pl" " read transducer in prolog text format @re" " read regular expression Symbols: .#. word boundary symbol in replacements, restrictions 0 the epsilon ? any token % escape character { } concatenate symbols " " quote symbol : pair separator :: weight ; end of expression ! starts a comment until end of line # starts a comment until end of line """ type_ = get_default_fst_type() to_console=get_output_to_console() import sys err=None defs=None for k,v in kwargs.items(): if k == 'output_to_console': to_console=v if k == 'error': err=v if k == 'definitions': defs=v; else: print('Warning: ignoring unknown argument %s.' % (k)) comp = XreCompiler(type_) comp.setOutputToConsole(to_console) if not defs == None: for k,v in defs.items(): vtype = str(type(v)) if "HfstTransducer" in vtype: comp.define_transducer(k,v) # print('defining transducer') else: pass if err == None: return libhfst.hfst_regex(comp, re, "") elif err == sys.stdout: return libhfst.hfst_regex(comp, re, "cout") elif err == sys.stderr: return libhfst.hfst_regex(comp, re, "cerr") else: retval = libhfst.hfst_regex(comp, re, "") err.write(unicode(libhfst.get_hfst_regex_error_message(), 'utf-8')) return retval
def regex(re, **kvargs): """ Get a transducer as defined by regular expression *re*. Parameters ---------- * `re` : The regular expression defined with Xerox transducer notation. * `kvargs` : Arguments recognized are: 'error' and 'definitions'. * `error` : Where warnings and errors are printed. Possible values are sys.stdout, sys.stderr (the default), a StringIO or None, indicating a quiet mode. * `definitions` : A dictionary mapping variable names into transducers. Regular expression operators: ~ complement \ term complement & intersection - minus $. contains once $? contains optionally $ contains once or more ( ) optionality + Kleene plus * Kleene star ./. ignore internally (not yet implemented) / ignoring | union <> shuffle < before > after .o. composition .O. lenient composition .m>. merge right .<m. merge left .x. cross product .P. input priority union .p. output priority union .-u. input minus .-l. output minus `[ ] substitute ^n,k catenate from n to k times, inclusive ^>n catenate more than n times ^>n catenate less than n times ^n catenate n times .r reverse .i invert .u input side .l output side \\\\\\ left quotient Two-level rules: \<= left restriction <=> left and right arrow <= left arrow => right arrow Replace rules: -> replace right (->) optionally replace right <- replace left (<-) optionally replace left <-> replace left and right (<->) optionally replace left and right @-> left-to-right longest match @> left-to-right shortest match ->@ right-to-left longest match >@ right-to-left shortest match Rule contexts, markers and separators: || match contexts on input sides // match left context on output side and right context on input side \\ match left context on input side and right context on output side \/ match contexts on output sides _ center marker ... markup marker ,, rule separator in parallel rules , context separator [. .] match epsilons only once Read from file: @bin" " read binary transducer @txt" " read transducer in att text format @stxt" " read spaced text @pl" " read transducer in prolog text format @re" " read regular expression Symbols: .#. word boundary symbol in replacements, restrictions 0 the epsilon ? any token % escape character { } concatenate symbols " " quote symbol : pair separator :: weight ; end of expression ! starts a comment until end of line # starts a comment until end of line """ type_ = get_default_fst_type() to_console=get_output_to_console() import sys err=None defs=None for k,v in kvargs.items(): if k == 'output_to_console': to_console=v if k == 'error': err=v if k == 'definitions': defs=v; else: print('Warning: ignoring unknown argument %s.' % (k)) comp = XreCompiler(type_) comp.setOutputToConsole(to_console) if not defs == None: for k,v in defs.items(): vtype = str(type(v)) if "HfstTransducer" in vtype: comp.define_transducer(k,v) print('defining transducer') else: pass if err == None: return libhfst.hfst_regex(comp, re, "") elif err == sys.stdout: return libhfst.hfst_regex(comp, re, "cout") elif err == sys.stderr: return libhfst.hfst_regex(comp, re, "cerr") else: retval = libhfst.hfst_regex(comp, re, "") err.write(unicode(libhfst.get_hfst_regex_error_message(), 'utf-8')) return retval