def to_fst(self) -> hfst.HfstTransducer: lexc_file = shared.filenames['lexicon-tr'] + '.lex' tags = set() for entry in self.items: for t in entry.tag: tags.add(t) with open_to_write(lexc_file) as lexfp: lexfp.write('Multichar_Symbols ' + ' '.join(self._lexc_escape(s) \ for s in shared.multichar_symbols+list(tags)) + '\n\n') lexfp.write('LEXICON Root\n') for entry in self.items: lexfp.write('\t' + self._lexc_escape(entry.symstr) + ' # ;\n') transducer = hfst.compile_lexc_file(full_path(lexc_file)) remove_file(lexc_file) return transducer
raise RuntimeError(get_linenumber()) if not (TR1.compare(tr1)): raise RuntimeError(get_linenumber()) if not (TR2.compare(tr2)): raise RuntimeError(get_linenumber()) # Copy constructor transducer = hfst.HfstTransducer(TR1) if not (TR1.compare(transducer)): raise RuntimeError(get_linenumber()) if not (transducer.compare(TR1)): raise RuntimeError(get_linenumber()) # Read lexc tr = hfst.compile_lexc_file('test.lexc') tr.insert_freely(tr1) tr.minimize() tr.insert_freely(('A','B')) tr.minimize() # Read sfst tr = hfst.compile_sfst_file('test.sfstpl') assert(not (tr == None)) # Substitute tr = hfst.regex('a a:b b;') tr.substitute('a', 'A', input=True, output=False) eq = hfst.regex('A:a A:b b;') if not (tr.compare(eq)): raise RuntimeError(get_linenumber())
WithFlags=True elif arg == '-o' or arg == '--output': ofile = '<next>' elif arg == '-i' or arg == '--input': ifile = '<next>' elif arg == '-f' or arg == '--format': output_format = '<next>' elif ofile == '<next>': ofile = arg elif ifile == '<next>': ifile = arg elif output_format == '<next>': if arg == 'openfst-tropical' or arg == 'openfst': output_format = hfst.ImplementationType.TROPICAL_OPENFST_TYPE elif arg == 'foma': output_format = hfst.ImplementationType.FOMA_TYPE elif arg == 'sfst': output_format = hfst.ImplementationType.SFST_TYPE else: raise RuntimeError('Error: hfst-lexc.py: format ' + arg + ' not recognized.') hfst.set_default_fst_type(output_format) elif ifile == None: ifile = arg elif ofile == None: ofile = arg else: raise RuntimeError('Error: hfst-lexc.py: unknown option: ' + arg) tr = hfst.compile_lexc_file(ifile, with_flags=WithFlags) tr.write_to_file(ofile)
import hfst import hfst_commandline #treat_warnings_as_errors=False output_format = hfst.ImplementationType.TROPICAL_OPENFST_TYPE WithFlags = False shortopts = 'f:Fi:o:' longopts = ['format=', 'withFlags', 'Werror', 'input=', 'output='] options = hfst_commandline.hfst_getopt(shortopts, longopts, 1) for opt in options[0]: # if opt[0] == '--Werror': # treat_warnings_as_errors=True if opt[0] == '-F' or opt[0] == '--withFlags': WithFlags = True elif opt[0] == '-f' or opt[0] == '--format': output_format = hfst_commandline.get_implementation_type(opt[1]) else: pass istr = hfst_commandline.get_one_input_text_stream(options) ostr = hfst_commandline.get_one_output_hfst_stream(options, output_format) istr[0].close() ostr[0].close() hfst.set_default_fst_type(output_format) tr = hfst.compile_lexc_file(istr[1], with_flags=WithFlags) tr.write_to_file(ostr[1])
print('Fetching lexc file...') import urllib.request data = urllib.request.urlopen( 'http://hfst.github.io/downloads/finntreebank.lexc') s = data.read().decode('utf-8') data.close() # todo: implement hfst.compile_lexc(lexc_string) f = open('finntreebank.lexc', 'w') f.write(s) f.close() import hfst print('Compiling the file...') tr = hfst.compile_lexc_file('finntreebank.lexc') assert (tr != None) print('Inverting the transducer...') tr.invert() tr.minimize() print('Testing the result:') print('') for word in ('testi', 'xtesti', 'alusta'): print(word + ':') print(tr.lookup(word, output='text')) # todo: empty result should contain a newline? # todo: the indentation of weights when there are several results