Esempio n. 1
0
def seq_to_transducer(alignment, weight=0.0, type=None, alphabet=None):
    if type is None:
        type=shared.config['FST'].getint('transducer_type')
    tr = hfst.HfstBasicTransducer()
    if alphabet is None:
        alphabet = ()
    alphabet = tuple(sorted(set(alphabet) | set(sum(alignment, ()))))
    tr.add_symbols_to_alphabet(alphabet)
    last_state_id = 0
    for (x, y) in alignment:
        state_id = tr.add_state()
        if (x, y) == (hfst.IDENTITY, hfst.IDENTITY):
            tr.add_transition(last_state_id, 
                              hfst.HfstBasicTransition(state_id,
                                                          hfst.IDENTITY,
                                                          hfst.IDENTITY,
                                                          0.0))
            tr.add_transition(state_id, 
                              hfst.HfstBasicTransition(state_id,
                                                          hfst.IDENTITY,
                                                          hfst.IDENTITY,
                                                          0.0))
            for a in tr.get_alphabet():
                if not a.startswith('@_'):
                    tr.add_transition(last_state_id, hfst.HfstBasicTransition(state_id, a, a, 0.0))
                    tr.add_transition(state_id, hfst.HfstBasicTransition(state_id, a, a, 0.0))
        else:
            tr.add_transition(last_state_id, 
                              hfst.HfstBasicTransition(state_id, x, y, 0.0))
        last_state_id = state_id
    tr.set_final_weight(last_state_id, weight)
    return hfst.HfstTransducer(tr, type)
Esempio n. 2
0
 def add_identity_loop(tr, alphabet, state):
     for c in alphabet:
         if c not in (hfst.EPSILON, hfst.IDENTITY, hfst.UNKNOWN):
             tr.add_transition(state,
                               hfst.HfstBasicTransition(state+1, c, c, 0.0))
             tr.add_transition(state+1,
                               hfst.HfstBasicTransition(state+1, c, c, 0.0))
     return state+1
Esempio n. 3
0
def tag_absorber(alphabet):
    tr = hfst.HfstBasicTransducer()
    for c in alphabet:
        if shared.compiled_patterns['symbol'].match(c):
            tr.add_transition(0,
                hfst.HfstBasicTransition(0, c, c, 0.0))
        elif shared.compiled_patterns['tag'].match(c):
            tr.add_transition(0,
                hfst.HfstBasicTransition(1, c, hfst.EPSILON, 0.0))
            tr.add_transition(1,
                hfst.HfstBasicTransition(1, c, hfst.EPSILON, 0.0))
    tr.set_final_weight(0, 0.0)
    tr.set_final_weight(1, 0.0)
    return hfst.HfstTransducer(tr)
Esempio n. 4
0
 def add_deletion_chain(tr, alphabet, state, length):
     tr.add_transition(state,
                       hfst.HfstBasicTransition(
                           state+1, hfst.EPSILON, deletion_slot_symbol, 0.0))
     for i in range(1, length+1):
         for c in alphabet:
             if c not in (hfst.EPSILON, hfst.IDENTITY, hfst.UNKNOWN):
                 tr.add_transition(state+i,
                                   hfst.HfstBasicTransition(
                                       state+i+1, 
                                       c, deletion_symbol, 0.0))
     last_state = state + length + 1
     for i in range(length+1):
         tr.add_transition(state+i,
                           hfst.HfstBasicTransition(
                               last_state,
                               hfst.EPSILON, hfst.EPSILON, 0.0))
     return last_state
Esempio n. 5
0
def tag_acceptor(tag, alphabet):
    tr = hfst.HfstBasicTransducer()
    for c in alphabet:
        if shared.compiled_patterns['symbol'].match(c):
            tr.add_transition(0,
                hfst.HfstBasicTransition(0, c, c, 0.0))
    tr.set_final_weight(0, 0.0)
    tr_c = hfst.HfstTransducer(tr)
    tr_c.concatenate(seq_to_transducer(tuple(zip(tag, tag))))
    return tr_c
Esempio n. 6
0
def rootgen_transducer(rootdist):
    # create an automaton for word generation
    if shared.config['Features'].getint('rootdist_n') != 1:
        raise NotImplementedError('Not implemented for rootdist_n != 1')
    weights = rootdist.features[0].log_probs
    tr = hfst.HfstBasicTransducer()
    tr.set_final_weight(0, weights[('#',)])
    for char, weight in weights.items():
        if char != ('#',):
            tr.add_transition(0, 
                hfst.HfstBasicTransition(0, char[0], char[0], weight))
    return hfst.HfstTransducer(tr)
Esempio n. 7
0
 def to_hfst(self) -> hfst.HfstTransducer:
     result = hfst.HfstBasicTransducer()
     for state in self.states.values():
         total_freq = state.get_total_freq()
         for t in state.transitions.values():
             weight = -math.log(t.freq / total_freq)
             result.add_transition(
                 state.id,
                 hfst.HfstBasicTransition(t.target_state_id, t.symbol,
                                          t.symbol, weight))
         if state.final_freq > 0:
             final_weight = -math.log(state.final_freq / total_freq)
             result.set_final_weight(state.id, final_weight)
     return hfst.HfstTransducer(result)
Esempio n. 8
0
def delfilter(alphabet, length, deletion_symbol='@_DEL_@',
              deletion_slot_symbol='@_DELSLOT_@'):
    tr = hfst.HfstBasicTransducer()
    tr.set_final_weight(0, 0.0)
    tr.add_transition(0,
                      hfst.HfstBasicTransition(
                          0, deletion_slot_symbol, deletion_slot_symbol, 0.0))
    printable_chars = set(alphabet) -\
                      { hfst.EPSILON, hfst.IDENTITY, hfst.UNKNOWN,
                        deletion_symbol }
    for i in range(length):
        for c in printable_chars:
            tr.add_transition(i,
                              hfst.HfstBasicTransition(i+1, c, c, 0.0))
        tr.add_transition(i+1,
                          hfst.HfstBasicTransition(
                              i, deletion_symbol, hfst.EPSILON, 0.0))
        tr.add_transition(i+1,
                          hfst.HfstBasicTransition(
                              i+1, deletion_slot_symbol, deletion_slot_symbol, 0.0))
        tr.set_final_weight(i+1, 0.0)
    first_negative_state = length+1
    tr.add_transition(0, hfst.HfstBasicTransition(
                             first_negative_state, deletion_symbol,
                             hfst.EPSILON, 0.0))
    for c in printable_chars:
        tr.add_transition(first_negative_state, 
                          hfst.HfstBasicTransition(0, c, c, 0.0))
    for i in range(length-1):
        tr.add_transition(first_negative_state+i,
                          hfst.HfstBasicTransition(
                              first_negative_state+i+1, 
                              deletion_symbol, hfst.EPSILON, 0.0))
        tr.add_transition(first_negative_state+i+1,
                          hfst.HfstBasicTransition(
                              first_negative_state+i+1, deletion_slot_symbol, deletion_slot_symbol, 0.0))
        for c in printable_chars:
            tr.add_transition(first_negative_state+i+1,
                              hfst.HfstBasicTransition(
                                  first_negative_state+i, c, c, 0.0))
    tr_c = hfst.HfstTransducer(tr)
    return tr_c
Esempio n. 9
0
def accum_input_labels(fst, separator=""):
    """Encode, weight and prune a transducer

fst -- transducer to be processed, input labels are strings of alphabet symbols and output labels are single alphabet symbols

separator -- null string or a symbol not part of the alphabet

Returns a transducer where input labels of thrasitions are concatenations of the input label and the output label of the original transition, the weights are according to the weights of the resulting morphophonemes and all transitions with invalid morphophoneme labels are discarded.
"""
    if cfg.verbosity >= 10:
        print("to be accumulated:\n", fst)
    bfst = hfst.HfstBasicTransducer(fst)
    result_bfst = hfst.HfstBasicTransducer()
    for state in bfst.states():
        result_bfst.add_state(state)
        if bfst.is_final_state(state):
            weight = bfst.get_final_weight(state)
            result_bfst.set_final_weight(state, weight)
        for arc in bfst.transitions(state):
            tostate = arc.get_target_state()
            insym = arc.get_input_symbol()
            outsym = arc.get_output_symbol()
            weight = arc.get_weight()
            new_insym = insym + separator + outsym
            if cfg.verbosity >= 25:
                print("arc", state, tostate, insym, outsym, weight)
            if not alphabet.mphon_is_valid(new_insym):
                continue
            new_weight = alphabet.mphon_weight(new_insym)
            result_arc = hfst.HfstBasicTransition(tostate,
                                                  new_insym,
                                                  new_insym,
                                                  new_weight)
            result_bfst.add_transition(state, result_arc)
            if cfg.verbosity >= 25:
                print("after addition of transition:\n", result_bfst)
    result_fst = hfst.HfstTransducer(result_bfst)
    result_fst.minimize()
    if cfg.verbosity >= 10:
        print("accumulated fst:\n", result_fst)
    return result_fst
Esempio n. 10
0
    assert(tr.compare(hfst.empty_fst()))

    defs = {'foo':hfst.regex('Foo'), 'bar':hfst.regex('Bar')}
    tr = hfst.regex('foo bar', definitions=defs)
    assert(tr.compare(hfst.regex('Foo Bar')))
    tr = hfst.regex('foo bar')
    assert(tr.compare(hfst.regex('foo bar')))

# print('\n--- Testing HfstBasicTransducer ---\n')

# Create basic transducer, write it to file, read it, and test equivalence
fsm = hfst.HfstBasicTransducer()
fsm.add_state(0)
fsm.add_state(1)
fsm.set_final_weight(1, 0.3)
tr = hfst.HfstBasicTransition(1, 'foo', 'bar', 0.5)
fsm.add_transition(0, tr)
fsm.add_transition(0, 0, 'baz', 'baz')
fsm.add_transition(0, 0, 'baz', 'BAZ', 0.1)

f = open('foo_basic', 'w')
fsm.write_att(f)
f.close()

f = open('foo_basic', 'r')
fsm2 = hfst.HfstBasicTransducer(hfst.read_att_transducer(f, hfst.EPSILON))
f.close()

# Modify weights of a basic transducer
fsm = hfst.HfstBasicTransducer()
fsm.add_state(0)
Esempio n. 11
0
        tok.add_multichar_symbol('foo')
        tok.add_multichar_symbol('bar')
        tr = hfst.tokenized_fst(tok.tokenize('foobar', 'foobaz'))
        if not tr.compare(hfst.regex('[foo:foo bar:b 0:a 0:z]')):
            raise RuntimeError('')

        # HfstBasicTransducer
        # Create an empty transducer
        # The transducer has initially one start state (number zero)
        # that is not final
        fsm = hfst.HfstBasicTransducer()
        # Add two states to the transducer
        fsm.add_state(1)
        fsm.add_state(2)
        # Create a transition [foo:bar] leading to state 1 with weight 0.1
        tr = hfst.HfstBasicTransition(1, 'foo', 'bar', 0.1)
        # and add it to state zero
        fsm.add_transition(0, tr)
        # Add a transition [baz:baz] with weight 0 from state 1 to state 2
        fsm.add_transition(1, hfst.HfstBasicTransition(2, 'baz', 'baz', 0.0))
        # Set state 2 as final with weight 0.3
        fsm.set_final_weight(2, 0.3)
        # Go through all states
        for state, arcs in enumerate(fsm):
            for arc in arcs:
                print('%i ' % (state), end='')
                print(arc)
            if fsm.is_final_state(state):
                print('%i %f' % (state, fsm.get_final_weight(state)) )

        for state in fsm.states():
Esempio n. 12
0
 def _generator_for_seq(seq):
     tr = hfst.HfstBasicTransducer()
     for i, c in enumerate(seq):
         tr.add_transition(i, hfst.HfstBasicTransition(i+1, c, c, 0.0))
     tr.set_final_weight(len(seq), 0.0)
     return hfst.HfstTransducer(tr)