Esempio n. 1
0
def get_struct_feat(labels, weight):
    center_label = labels[-1]
    center, center_context = get_struct_center(center_label, weight)

    context_labels = labels[:-1]
    left_context = ' '.join([get_label_context(l) for l in context_labels])

    rule_str = ' '.join([center, left_context, center_context])

    return regex(rule_str), rule_str
Esempio n. 2
0
 def test_tokenized(tok, pathin, pathout, exp, weight=0):
     tokenized = None
     if (pathout == None):
         tokenized = tok.tokenize_one_level(pathin)
     else:
         tokenized = tok.tokenize(pathin, pathout)
     if not libhfst.tokenized_fst(tokenized, weight).compare(libhfst.regex(exp)):
         if pathout == None:
             raise RuntimeError('test_tokenized failed with input: ' + pathin)
         else:
             raise RuntimeError('test_tokenized failed with input: ' + pathin + ", " + pathout)
Esempio n. 3
0
def get_top_outputs(outputs, str_model, top_n):
    fst = empty_fst()

    outputs = [[['_#_', 0]]] * 2 + outputs + [[['_#_', 0]]] * 2

    fst = regex("0")

    for os in outputs:
        os_fst = empty_fst()
        for o, score in os:
            o_fst = regex("%s %s::%f %s" % (LS, get_symbs(o), score, LS))
            os_fst.disjunct(o_fst)
        fst.concatenate(os_fst)
    fst.compose(str_model)
    fst.remove_epsilons()
    fst.determinize()
    #    fst.minimize()
    fst.n_best(top_n)

    paths = [(p[0][1], p[0][0]) for p in fst.extract_paths().values()]
    paths.sort()
    return [(remove_markers(p[1]), p[0]) for p in paths]
Esempio n. 4
0
import libhfst

tr1 = libhfst.regex('föö:bär')
tr2 = libhfst.regex('0')
tr3 = libhfst.regex('0-0')

ostr = libhfst.HfstOutputStream()
ostr.write(tr1)
ostr.write(tr2)
ostr.write(tr3)
ostr.flush()
ostr.close()
Esempio n. 5
0
import libhfst
tr = libhfst.regex('[a::1 a:b::0.3 (b::0)]::0.7;')
tr.push_weights(libhfst.TO_INITIAL_STATE)
print(tr)
tr.push_weights(libhfst.TO_FINAL_STATE)
print(tr)
Esempio n. 6
0
import libhfst

transducers = []
istr = libhfst.HfstInputStream()
while not istr.is_eof():
    transducers.append(istr.read())
istr.close()

if not len(transducers) == 3:
    raise RuntimeError('Wrong number of transducers read.')

i = 0
for re in ['föö:bär','0','0-0']:
    if not transducers[i].compare(libhfst.regex(re)):
        raise RuntimeError('Transducers are not equivalent.')
    i += 1

if len(transducers) > 0:
    f = libhfst.hfst_stdout()
    i=0
    transducers[i].write_att(f)
    i += 1
    while i < len(transducers):
        f.write('--\n')
        transducers[i].write_att(f)
        i += 1
Esempio n. 7
0
 def test_fst(input, result):
     if not libhfst.fst(input).compare(libhfst.regex(result)):
         raise RuntimeError('test_fst failed with input: ' + input)
Esempio n. 8
0
        raise RuntimeError(get_linenumber())

    # Copy constructor
    transducer = libhfst.HfstTransducer(TR1)
    if not (TR1.compare(transducer)):
        raise RuntimeError(get_linenumber())
    if not (transducer.compare(TR1)):
        raise RuntimeError(get_linenumber())

    # Read lexc
    tr = libhfst.compile_lexc_file('test.lexc')
    tr.insert_freely(tr1).minimize()
    tr.insert_freely(('A','B')).minimize()

    # Substitute
    tr = libhfst.regex('a a:b b;')
    tr.substitute('a', 'A', input=True, output=False)
    eq = libhfst.regex('A:a A:b b;')
    if not (tr.compare(eq)):
        raise RuntimeError(get_linenumber())

    tr = libhfst.regex('a a:b b;')
    tr.substitute('a', 'A', input=False, output=True)
    eq = libhfst.regex('a:A a:b b;')
    if not (tr.compare(eq)):
        raise RuntimeError(get_linenumber())

    tr = libhfst.regex('a a:b b;')
    tr.substitute('a','A')
    eq = libhfst.regex('A A:b b;')
    if not (tr.compare(eq)):
Esempio n. 9
0
import libhfst

transducers = []
istr = libhfst.HfstInputStream()
while not istr.is_eof():
    transducers.append(istr.read())
istr.close()

if not len(transducers) == 3:
    raise RuntimeError('Wrong number of transducers read.')

i = 0
for re in ['föö:bär', '0', '0-0']:
    if not transducers[i].compare(libhfst.regex(re)):
        raise RuntimeError('Transducers are not equivalent.')
    i += 1

if len(transducers) > 0:
    f = libhfst.hfst_stdout()
    i = 0
    transducers[i].write_att(f)
    i += 1
    while i < len(transducers):
        f.write('--\n')
        transducers[i].write_att(f)
        i += 1
Esempio n. 10
0
    center, center_context = get_struct_center(center_label, weight)

    context_labels = labels[:-1]
    left_context = ' '.join([get_label_context(l) for l in context_labels])

    rule_str = ' '.join([center, left_context, center_context])

    return regex(rule_str), rule_str


if __name__ == '__main__':
    is_structured = 0

    unstructured_model = {}

    structured_rules = regex('?*')
    structured_model = regex('?*')

    oustr = open(argv[1] + '.ustr', 'wb')

    ostr = create_hfst_output_stream(argv[1] + '.str', TROPICAL_OPENFST_TYPE,
                                     1)

    seen_struct_feats = set()

    for i, line in enumerate(map(lambda x: x.strip(), stdin)):
        if line == '':
            continue
        if line == STRUCTID:
            stderr.write("Structured features.\n")
            is_structured = 1
Esempio n. 11
0
import libhfst
ab = libhfst.regex('a:b::2.8')
out = libhfst.HfstOutputStream(hfst_format=False)
out.write(ab)
out.flush()
out.close()
Esempio n. 12
0
# -*- coding: utf-8 -*-

import libhfst
from sys import argv, stderr

base = "[\£ \£ £]*"
all_ustr_fsts = []
all_str_fsts = []
ustr_fst = libhfst.regex(base)
str_fst = libhfst.regex(base)
ustr_fsts = libhfst.regex(base)
str_fsts = libhfst.regex(base)

fst_count = 0

ustr = 1

for line in open(argv[1], "r"):
    stderr.write("LINE: %u\r" % fst_count)
    line = line.strip()

    if line == "":
        continue

    if line == "UNSTRUCTURED FEATURES":
        ustr = 1
    elif line == "STRUCTURED FEATURES":
        ustr = 0
    else:
        if ustr:
            if fst_count % 100 == 0:
Esempio n. 13
0
File: test7.py Progetto: hfst/hfst
import libhfst

fsm = libhfst.HfstBasicTransducer()
fsm.add_state(1)
fsm.set_final_weight(1, 2.0)
fsm.add_transition(0, 1, "foo", libhfst.EPSILON)
if not libhfst.HfstTransducer(fsm).compare(libhfst.regex('foo:0::2.0')):
    raise RuntimeError('')
    
fsm = libhfst.HfstBasicTransducer()
fsm.add_state(1)
fsm.set_final_weight(1, -0.5)
fsm.add_transition(0, 1, "foo", libhfst.UNKNOWN)
fsm.add_transition(0, 1, "foo", "foo")
if not libhfst.HfstTransducer(fsm).compare(libhfst.regex('foo:?::-0.5')):
    raise RuntimeError('')

fsm = libhfst.HfstBasicTransducer()
fsm.add_state(1)
fsm.set_final_weight(1, 1.5)
fsm.add_transition(0, 1, libhfst.IDENTITY, libhfst.IDENTITY)
if not libhfst.HfstTransducer(fsm).compare(libhfst.regex('?::1.5')):
    raise RuntimeError('')
Esempio n. 14
0
outputs['_#_'].append('_#_')

out = libhfst.create_hfst_output_stream("",
                                        libhfst.TROPICAL_OPENFST_TYPE, 1)

ustr_model = libhfst.HfstInputStream(argv[2]).read()
str_model = libhfst.HfstInputStream(argv[3]).read()

for i, line in enumerate(imap(lambda x: x.strip(), stdin)):
    stderr.write("LINE: %u\r" % i)
    expr = ''

    if line == '':
        continue
    chars = ('_#_ _#_ # ' + line.replace('0','"0"') + ' # _#_ _#_').split(' ')
    
    for char in chars:
        expr += ('%s [%s] £ ' % (escape(char),
                                 '|'.join([escape(c) for c in outputs[char]])))
    re = libhfst.regex(expr)
    re.compose(ustr_model)
    re.minimize()
    re.compose(str_model)
    re.minimize()
    re.n_best(NBEST)
    for p in get_sorted_paths(re):
        print p[1]
    print "<SEP>"
    stdout.flush()
stderr.write('\n')
Esempio n. 15
0
# The examples given in doxygen documentation

import libhfst

# StreamIsClosedException
try:
    tr = libhfst.regex('foo')
    outstr = libhfst.HfstOutputStream(filename='testfile')
    outstr.close()
    outstr.write(tr)
except libhfst.StreamIsClosedException:
    print("Could not write transducer: stream to file was closed.")

# TransducerIsCyclicException
transducer = libhfst.regex('[a:b]*')
try:
    results = transducer.extract_paths(output='text')
    print("The transducer has %i paths:" % len(results))
    print(results)
except libhfst.TransducerIsCyclicException:
    print(
        "The transducer is cyclic and has an infinite number of paths. Some of them:"
    )
    results = transducer.extract_paths(output='text', max_cycles=5)
    print(results)

# NotTransducerStreamException
f = open('foofile', 'w')
f.write('This is an ordinary text file.\n')
f.close()
try:
Esempio n. 16
0
import libhfst
libhfst.set_default_fst_type(libhfst.FOMA_TYPE)
ab = libhfst.regex('a:b')
out = libhfst.HfstOutputStream(hfst_format=False)
out.write(ab)
out.flush()
out.close()
Esempio n. 17
0
# The examples given in doxygen documentation

import libhfst

# StreamIsClosedException
try:
    tr = libhfst.regex("foo")
    outstr = libhfst.HfstOutputStream(filename="testfile")
    outstr.close()
    outstr.write(tr)
except libhfst.StreamIsClosedException:
    print("Could not write transducer: stream to file was closed.")

# TransducerIsCyclicException
transducer = libhfst.regex("[a:b]*")
try:
    results = transducer.extract_paths(output="text")
    print("The transducer has %i paths:" % len(results))
    print(results)
except libhfst.TransducerIsCyclicException:
    print("The transducer is cyclic and has an infinite number of paths. Some of them:")
    results = transducer.extract_paths(output="text", max_cycles=5)
    print(results)

# NotTransducerStreamException
f = open("foofile", "w")
f.write("This is an ordinary text file.\n")
f.close()
try:
    instr = libhfst.HfstInputStream("foofile")
    tr = instr.read()