コード例 #1
0
def get_fst(start_rule, end_rule, *args):
    src = Path('g2p.twolc')
    tmp = Path('g2p_test_from_py.tmp.hfst')
    hfst.compile_twolc_file(src.name, tmp.name, resolve_left_conflicts=True)
    print('Preparing rule transducers for composition...', file=sys.stderr)
    rule_fsts_stream = hfst.HfstInputStream(tmp.name)

    rule_numbers = set()
    rule_numbers.add(0)
    for i in range(start_rule, end_rule + 1):
        rule_numbers.add(i)
    if (len(args) > 0):
        for i in range(args[0], args[1] + 1):
            rule_numbers.add(i)

    rule_fsts = []
    for index, rule in enumerate(rule_fsts_stream):
        if index in rule_numbers:
            rule_fsts.append(rule)

    print('Creating universal language FST...', file=sys.stderr)
    output = hfst.regex('?* ;')
    print('Compose-intersecting rules with universal FST...', file=sys.stderr)
    output.compose_intersect(rule_fsts)
    print('Optimizing for fast lookup...', file=sys.stderr)
    output.lookup_optimize()
    return output
コード例 #2
0
def get_fst():
    src = Path('g2p.twolc')
    tmp = Path('g2p_from_py.tmp.hfst')
    final = Path('g2p_from_py.hfstol')
    #if (not tmp.exists()) or (src.stat().st_mtime > tmp.stat().st_mtime):
    print('Compiling twolc rules...', file=sys.stderr)
    hfst.compile_twolc_file(src.name, tmp.name, resolve_left_conflicts=True)
    #if (not final.exists()) or not (src.stat().st_mtime <
    #                                tmp.stat().st_mtime <
    #                                final.stat().st_mtime):
    print('Preparing rule transducers for composition...', file=sys.stderr)
    rule_fsts_stream = hfst.HfstInputStream(tmp.name)
    rule_fsts = [t for t in rule_fsts_stream]
    print('Creating universal language FST...', file=sys.stderr)
    output = hfst.regex('?* ;')
    print('Compose-intersecting rules with universal FST...', file=sys.stderr)
    output.compose_intersect(rule_fsts)
    print('Optimizing for fast lookup...', file=sys.stderr)
    output.lookup_optimize()
    print('Writing out final FST...', file=sys.stderr)
    output.write_to_file(final.name)
    #else:
    #    ol_fst_stream = hfst.HfstInputStream(final.name)
    #    output = next(ol_fst_stream)
    return output
コード例 #3
0
def get_fst(src):
    tmp = Path('../res/g2p_from_py.hfst')
    print('Compiling twolc rules...', file=sys.stderr)
    hfst.compile_twolc_file(src.name, tmp.name, resolve_left_conflicts=True)
    print('Preparing rule transducers for composition...', file=sys.stderr)
    rule_fsts_stream = hfst.HfstInputStream(tmp.name)
    rule_fsts = [t for t in rule_fsts_stream]
    print('Creating universal language FST...', file=sys.stderr)
    output = hfst.regex('?* ;')
    print('Compose-intersecting rules with universal FST...', file=sys.stderr)
    output.compose_intersect(rule_fsts)
    print('Optimizing for fast lookup...', file=sys.stderr)
    output.lookup_optimize()
    return output
コード例 #4
0
ファイル: test_twolc.py プロジェクト: hfst/hfst
# -*- coding: utf-8 -*-
import sys
if len(sys.argv) > 1:
    sys.path.insert(0, sys.argv[1])
import hfst

for n in [1, 2, 3]:
    assert(hfst.compile_twolc_file('test'+str(n)+'.twolc', 'test'+str(n)+'.hfst') == 0)

if hfst.HfstTransducer.is_implementation_type_available(hfst.ImplementationType.FOMA_TYPE):
    for n in [1, 2, 3]:
        assert(hfst.compile_twolc_file('test'+str(n)+'.twolc', 'test'+str(n)+'.hfst', verbose=True, type=hfst.ImplementationType.FOMA_TYPE) == 0)
# -*- coding: utf-8 -*-
import sys
if len(sys.argv) > 1:
    sys.path.insert(0, sys.argv[1])
import hfst

for n in [1, 2, 3]:
    assert (hfst.compile_twolc_file('test' + str(n) + '.twolc',
                                    'test' + str(n) + '.hfst') == 0)

if hfst.HfstTransducer.is_implementation_type_available(
        hfst.ImplementationType.FOMA_TYPE):
    for n in [1, 2, 3]:
        assert (hfst.compile_twolc_file(
            'test' + str(n) + '.twolc',
            'test' + str(n) + '.hfst',
            verbose=True,
            type=hfst.ImplementationType.FOMA_TYPE) == 0)