Esempio n. 1
0
 def to_fst(self) -> hfst.HfstTransducer:
     lexc_file = shared.filenames['lexicon-tr'] + '.lex'
     tags = set()
     for entry in self.items:
         for t in entry.tag:
             tags.add(t)
     with open_to_write(lexc_file) as lexfp:
         lexfp.write('Multichar_Symbols ' +
                     ' '.join(self._lexc_escape(s) \
                     for s in shared.multichar_symbols+list(tags)) + '\n\n')
         lexfp.write('LEXICON Root\n')
         for entry in self.items:
             lexfp.write('\t' + self._lexc_escape(entry.symstr) + ' # ;\n')
     transducer = hfst.compile_lexc_file(full_path(lexc_file))
     remove_file(lexc_file)
     return transducer
Esempio n. 2
0
        raise RuntimeError(get_linenumber())
    
    if not (TR1.compare(tr1)):
        raise RuntimeError(get_linenumber())
    if not (TR2.compare(tr2)):
        raise RuntimeError(get_linenumber())

    # Copy constructor
    transducer = hfst.HfstTransducer(TR1)
    if not (TR1.compare(transducer)):
        raise RuntimeError(get_linenumber())
    if not (transducer.compare(TR1)):
        raise RuntimeError(get_linenumber())

    # Read lexc
    tr = hfst.compile_lexc_file('test.lexc')
    tr.insert_freely(tr1)
    tr.minimize()
    tr.insert_freely(('A','B'))
    tr.minimize()

    # Read sfst
    tr = hfst.compile_sfst_file('test.sfstpl')
    assert(not (tr == None))

    # Substitute
    tr = hfst.regex('a a:b b;')
    tr.substitute('a', 'A', input=True, output=False)
    eq = hfst.regex('A:a A:b b;')
    if not (tr.compare(eq)):
        raise RuntimeError(get_linenumber())
Esempio n. 3
0
        WithFlags=True
    elif arg == '-o' or arg == '--output':
        ofile = '<next>'
    elif arg == '-i' or arg == '--input':
        ifile = '<next>'
    elif arg == '-f' or arg == '--format':
        output_format = '<next>'
    elif ofile == '<next>':
        ofile = arg
    elif ifile == '<next>':
        ifile = arg
    elif output_format == '<next>':
        if arg == 'openfst-tropical' or arg == 'openfst':
            output_format = hfst.ImplementationType.TROPICAL_OPENFST_TYPE
        elif arg == 'foma':
            output_format = hfst.ImplementationType.FOMA_TYPE
        elif arg == 'sfst':
            output_format = hfst.ImplementationType.SFST_TYPE
        else:
            raise RuntimeError('Error: hfst-lexc.py: format ' + arg + ' not recognized.')
        hfst.set_default_fst_type(output_format)
    elif ifile == None:
        ifile = arg
    elif ofile == None:
        ofile = arg
    else:
        raise RuntimeError('Error: hfst-lexc.py: unknown option: ' + arg)

tr = hfst.compile_lexc_file(ifile, with_flags=WithFlags)
tr.write_to_file(ofile)
Esempio n. 4
0
import hfst
import hfst_commandline

#treat_warnings_as_errors=False
output_format = hfst.ImplementationType.TROPICAL_OPENFST_TYPE
WithFlags = False

shortopts = 'f:Fi:o:'
longopts = ['format=', 'withFlags', 'Werror', 'input=', 'output=']
options = hfst_commandline.hfst_getopt(shortopts, longopts, 1)

for opt in options[0]:
    #    if opt[0] == '--Werror':
    #        treat_warnings_as_errors=True
    if opt[0] == '-F' or opt[0] == '--withFlags':
        WithFlags = True
    elif opt[0] == '-f' or opt[0] == '--format':
        output_format = hfst_commandline.get_implementation_type(opt[1])
    else:
        pass

istr = hfst_commandline.get_one_input_text_stream(options)
ostr = hfst_commandline.get_one_output_hfst_stream(options, output_format)
istr[0].close()
ostr[0].close()

hfst.set_default_fst_type(output_format)
tr = hfst.compile_lexc_file(istr[1], with_flags=WithFlags)
tr.write_to_file(ostr[1])
Esempio n. 5
0
        raise RuntimeError(get_linenumber())
    
    if not (TR1.compare(tr1)):
        raise RuntimeError(get_linenumber())
    if not (TR2.compare(tr2)):
        raise RuntimeError(get_linenumber())

    # Copy constructor
    transducer = hfst.HfstTransducer(TR1)
    if not (TR1.compare(transducer)):
        raise RuntimeError(get_linenumber())
    if not (transducer.compare(TR1)):
        raise RuntimeError(get_linenumber())

    # Read lexc
    tr = hfst.compile_lexc_file('test.lexc')
    tr.insert_freely(tr1)
    tr.minimize()
    tr.insert_freely(('A','B'))
    tr.minimize()

    # Read sfst
    tr = hfst.compile_sfst_file('test.sfstpl')
    assert(not (tr == None))

    # Substitute
    tr = hfst.regex('a a:b b;')
    tr.substitute('a', 'A', input=True, output=False)
    eq = hfst.regex('A:a A:b b;')
    if not (tr.compare(eq)):
        raise RuntimeError(get_linenumber())
Esempio n. 6
0
print('Fetching lexc file...')
import urllib.request
data = urllib.request.urlopen(
    'http://hfst.github.io/downloads/finntreebank.lexc')
s = data.read().decode('utf-8')
data.close()

# todo: implement hfst.compile_lexc(lexc_string)
f = open('finntreebank.lexc', 'w')
f.write(s)
f.close()

import hfst
print('Compiling the file...')
tr = hfst.compile_lexc_file('finntreebank.lexc')
assert (tr != None)
print('Inverting the transducer...')
tr.invert()
tr.minimize()

print('Testing the result:')
print('')
for word in ('testi', 'xtesti', 'alusta'):
    print(word + ':')
    print(tr.lookup(word, output='text'))

# todo: empty result should contain a newline?
# todo: the indentation of weights when there are several results