Beispiel #1
0
            if fst_count % 10 == 0:
                str_fst.minimize()

        fst_count += 1
stderr.write("\n")

ustr_fsts.intersect(ustr_fst)
all_ustr_fsts.append(ustr_fsts)

str_fsts.intersect(str_fst)
all_str_fsts.append(str_fsts)

ustr_model = libhfst.regex(base)
str_model = libhfst.regex(base)

for i, fst in enumerate(all_ustr_fsts):
    print "USTR: %u of %u" % (i, len(all_ustr_fsts))
    ustr_model.intersect(fst)
    ustr_model.minimize()

for i, fst in enumerate(all_str_fsts):
    print "STR: %u of %u" % (i, len(all_str_fsts))
    str_model.intersect(fst)
    str_model.minimize()

out = libhfst.create_hfst_output_stream(argv[1] + ".ustr", libhfst.TROPICAL_OPENFST_TYPE, 1)
out.write(ustr_model)

out = libhfst.create_hfst_output_stream(argv[1] + ".str", libhfst.TROPICAL_OPENFST_TYPE, 1)
out.write(str_model)
Beispiel #2
0
    rule_str = ' '.join([center, left_context, center_context])

    return regex(rule_str), rule_str


if __name__ == '__main__':
    is_structured = 0

    unstructured_model = {}

    structured_rules = regex('?*')
    structured_model = regex('?*')

    oustr = open(argv[1] + '.ustr', 'wb')

    ostr = create_hfst_output_stream(argv[1] + '.str', TROPICAL_OPENFST_TYPE,
                                     1)

    seen_struct_feats = set()

    for i, line in enumerate(map(lambda x: x.strip(), stdin)):
        if line == '':
            continue
        if line == STRUCTID:
            stderr.write("Structured features.\n")
            is_structured = 1
        elif line == UNSTRUCTID:
            stderr.write("Unstructured features.\n")
            is_structured = 0
        else:
            if is_structured:
                fields = line.split(' ')
Beispiel #3
0
for i in iocounts:
    odistr = sorted([(iocounts[i][o], o) for o in iocounts[i]])
    odistr.reverse()

    tot = 0

    for m, o in odistr:
        outputs[i].append(o)
        tot += m
        if m >= TH:
            break

outputs['#'].append('#')
outputs['_#_'].append('_#_')

out = libhfst.create_hfst_output_stream("",
                                        libhfst.TROPICAL_OPENFST_TYPE, 1)

ustr_model = libhfst.HfstInputStream(argv[2]).read()
str_model = libhfst.HfstInputStream(argv[3]).read()

for i, line in enumerate(imap(lambda x: x.strip(), stdin)):
    stderr.write("LINE: %u\r" % i)
    expr = ''

    if line == '':
        continue
    chars = ('_#_ _#_ # ' + line.replace('0','"0"') + ' # _#_ _#_').split(' ')
    
    for char in chars:
        expr += ('%s [%s] £ ' % (escape(char),
                                 '|'.join([escape(c) for c in outputs[char]])))