def get_fst(start_rule, end_rule, *args): src = Path('g2p.twolc') tmp = Path('g2p_test_from_py.tmp.hfst') hfst.compile_twolc_file(src.name, tmp.name, resolve_left_conflicts=True) print('Preparing rule transducers for composition...', file=sys.stderr) rule_fsts_stream = hfst.HfstInputStream(tmp.name) rule_numbers = set() rule_numbers.add(0) for i in range(start_rule, end_rule + 1): rule_numbers.add(i) if (len(args) > 0): for i in range(args[0], args[1] + 1): rule_numbers.add(i) rule_fsts = [] for index, rule in enumerate(rule_fsts_stream): if index in rule_numbers: rule_fsts.append(rule) print('Creating universal language FST...', file=sys.stderr) output = hfst.regex('?* ;') print('Compose-intersecting rules with universal FST...', file=sys.stderr) output.compose_intersect(rule_fsts) print('Optimizing for fast lookup...', file=sys.stderr) output.lookup_optimize() return output
def get_fst(): src = Path('g2p.twolc') tmp = Path('g2p_from_py.tmp.hfst') final = Path('g2p_from_py.hfstol') #if (not tmp.exists()) or (src.stat().st_mtime > tmp.stat().st_mtime): print('Compiling twolc rules...', file=sys.stderr) hfst.compile_twolc_file(src.name, tmp.name, resolve_left_conflicts=True) #if (not final.exists()) or not (src.stat().st_mtime < # tmp.stat().st_mtime < # final.stat().st_mtime): print('Preparing rule transducers for composition...', file=sys.stderr) rule_fsts_stream = hfst.HfstInputStream(tmp.name) rule_fsts = [t for t in rule_fsts_stream] print('Creating universal language FST...', file=sys.stderr) output = hfst.regex('?* ;') print('Compose-intersecting rules with universal FST...', file=sys.stderr) output.compose_intersect(rule_fsts) print('Optimizing for fast lookup...', file=sys.stderr) output.lookup_optimize() print('Writing out final FST...', file=sys.stderr) output.write_to_file(final.name) #else: # ol_fst_stream = hfst.HfstInputStream(final.name) # output = next(ol_fst_stream) return output
def get_fst(src): tmp = Path('../res/g2p_from_py.hfst') print('Compiling twolc rules...', file=sys.stderr) hfst.compile_twolc_file(src.name, tmp.name, resolve_left_conflicts=True) print('Preparing rule transducers for composition...', file=sys.stderr) rule_fsts_stream = hfst.HfstInputStream(tmp.name) rule_fsts = [t for t in rule_fsts_stream] print('Creating universal language FST...', file=sys.stderr) output = hfst.regex('?* ;') print('Compose-intersecting rules with universal FST...', file=sys.stderr) output.compose_intersect(rule_fsts) print('Optimizing for fast lookup...', file=sys.stderr) output.lookup_optimize() return output
# -*- coding: utf-8 -*- import sys if len(sys.argv) > 1: sys.path.insert(0, sys.argv[1]) import hfst for n in [1, 2, 3]: assert(hfst.compile_twolc_file('test'+str(n)+'.twolc', 'test'+str(n)+'.hfst') == 0) if hfst.HfstTransducer.is_implementation_type_available(hfst.ImplementationType.FOMA_TYPE): for n in [1, 2, 3]: assert(hfst.compile_twolc_file('test'+str(n)+'.twolc', 'test'+str(n)+'.hfst', verbose=True, type=hfst.ImplementationType.FOMA_TYPE) == 0)
# -*- coding: utf-8 -*- import sys if len(sys.argv) > 1: sys.path.insert(0, sys.argv[1]) import hfst for n in [1, 2, 3]: assert (hfst.compile_twolc_file('test' + str(n) + '.twolc', 'test' + str(n) + '.hfst') == 0) if hfst.HfstTransducer.is_implementation_type_available( hfst.ImplementationType.FOMA_TYPE): for n in [1, 2, 3]: assert (hfst.compile_twolc_file( 'test' + str(n) + '.twolc', 'test' + str(n) + '.hfst', verbose=True, type=hfst.ImplementationType.FOMA_TYPE) == 0)