Esempi in Python per HfstBasicTransition, esempi in Python per hfst.HfstBasicTransition

Esempio n. 1

0

Mostra file

def seq_to_transducer(alignment, weight=0.0, type=None, alphabet=None):
    if type is None:
        type=shared.config['FST'].getint('transducer_type')
    tr = hfst.HfstBasicTransducer()
    if alphabet is None:
        alphabet = ()
    alphabet = tuple(sorted(set(alphabet) | set(sum(alignment, ()))))
    tr.add_symbols_to_alphabet(alphabet)
    last_state_id = 0
    for (x, y) in alignment:
        state_id = tr.add_state()
        if (x, y) == (hfst.IDENTITY, hfst.IDENTITY):
            tr.add_transition(last_state_id, 
                              hfst.HfstBasicTransition(state_id,
                                                          hfst.IDENTITY,
                                                          hfst.IDENTITY,
                                                          0.0))
            tr.add_transition(state_id, 
                              hfst.HfstBasicTransition(state_id,
                                                          hfst.IDENTITY,
                                                          hfst.IDENTITY,
                                                          0.0))
            for a in tr.get_alphabet():
                if not a.startswith('@_'):
                    tr.add_transition(last_state_id, hfst.HfstBasicTransition(state_id, a, a, 0.0))
                    tr.add_transition(state_id, hfst.HfstBasicTransition(state_id, a, a, 0.0))
        else:
            tr.add_transition(last_state_id, 
                              hfst.HfstBasicTransition(state_id, x, y, 0.0))
        last_state_id = state_id
    tr.set_final_weight(last_state_id, weight)
    return hfst.HfstTransducer(tr, type)

Esempio n. 2

0

Mostra file

 def add_identity_loop(tr, alphabet, state):
     for c in alphabet:
         if c not in (hfst.EPSILON, hfst.IDENTITY, hfst.UNKNOWN):
             tr.add_transition(state,
                               hfst.HfstBasicTransition(state+1, c, c, 0.0))
             tr.add_transition(state+1,
                               hfst.HfstBasicTransition(state+1, c, c, 0.0))
     return state+1

Esempio n. 3

0

Mostra file

def tag_absorber(alphabet):
    tr = hfst.HfstBasicTransducer()
    for c in alphabet:
        if shared.compiled_patterns['symbol'].match(c):
            tr.add_transition(0,
                hfst.HfstBasicTransition(0, c, c, 0.0))
        elif shared.compiled_patterns['tag'].match(c):
            tr.add_transition(0,
                hfst.HfstBasicTransition(1, c, hfst.EPSILON, 0.0))
            tr.add_transition(1,
                hfst.HfstBasicTransition(1, c, hfst.EPSILON, 0.0))
    tr.set_final_weight(0, 0.0)
    tr.set_final_weight(1, 0.0)
    return hfst.HfstTransducer(tr)

Esempio n. 4

0

Mostra file

 def add_deletion_chain(tr, alphabet, state, length):
     tr.add_transition(state,
                       hfst.HfstBasicTransition(
                           state+1, hfst.EPSILON, deletion_slot_symbol, 0.0))
     for i in range(1, length+1):
         for c in alphabet:
             if c not in (hfst.EPSILON, hfst.IDENTITY, hfst.UNKNOWN):
                 tr.add_transition(state+i,
                                   hfst.HfstBasicTransition(
                                       state+i+1, 
                                       c, deletion_symbol, 0.0))
     last_state = state + length + 1
     for i in range(length+1):
         tr.add_transition(state+i,
                           hfst.HfstBasicTransition(
                               last_state,
                               hfst.EPSILON, hfst.EPSILON, 0.0))
     return last_state

Esempio n. 5

0

Mostra file

def tag_acceptor(tag, alphabet):
    tr = hfst.HfstBasicTransducer()
    for c in alphabet:
        if shared.compiled_patterns['symbol'].match(c):
            tr.add_transition(0,
                hfst.HfstBasicTransition(0, c, c, 0.0))
    tr.set_final_weight(0, 0.0)
    tr_c = hfst.HfstTransducer(tr)
    tr_c.concatenate(seq_to_transducer(tuple(zip(tag, tag))))
    return tr_c

Esempio n. 6

0

Mostra file

def rootgen_transducer(rootdist):
    # create an automaton for word generation
    if shared.config['Features'].getint('rootdist_n') != 1:
        raise NotImplementedError('Not implemented for rootdist_n != 1')
    weights = rootdist.features[0].log_probs
    tr = hfst.HfstBasicTransducer()
    tr.set_final_weight(0, weights[('#',)])
    for char, weight in weights.items():
        if char != ('#',):
            tr.add_transition(0, 
                hfst.HfstBasicTransition(0, char[0], char[0], weight))
    return hfst.HfstTransducer(tr)

Esempio n. 7

0

Mostra file

File: alergia.py Progetto: maciejjan/morle

 def to_hfst(self) -> hfst.HfstTransducer:
     result = hfst.HfstBasicTransducer()
     for state in self.states.values():
         total_freq = state.get_total_freq()
         for t in state.transitions.values():
             weight = -math.log(t.freq / total_freq)
             result.add_transition(
                 state.id,
                 hfst.HfstBasicTransition(t.target_state_id, t.symbol,
                                          t.symbol, weight))
         if state.final_freq > 0:
             final_weight = -math.log(state.final_freq / total_freq)
             result.set_final_weight(state.id, final_weight)
     return hfst.HfstTransducer(result)

Esempio n. 8

0

Mostra file

def delfilter(alphabet, length, deletion_symbol='@_DEL_@',
              deletion_slot_symbol='@_DELSLOT_@'):
    tr = hfst.HfstBasicTransducer()
    tr.set_final_weight(0, 0.0)
    tr.add_transition(0,
                      hfst.HfstBasicTransition(
                          0, deletion_slot_symbol, deletion_slot_symbol, 0.0))
    printable_chars = set(alphabet) -\
                      { hfst.EPSILON, hfst.IDENTITY, hfst.UNKNOWN,
                        deletion_symbol }
    for i in range(length):
        for c in printable_chars:
            tr.add_transition(i,
                              hfst.HfstBasicTransition(i+1, c, c, 0.0))
        tr.add_transition(i+1,
                          hfst.HfstBasicTransition(
                              i, deletion_symbol, hfst.EPSILON, 0.0))
        tr.add_transition(i+1,
                          hfst.HfstBasicTransition(
                              i+1, deletion_slot_symbol, deletion_slot_symbol, 0.0))
        tr.set_final_weight(i+1, 0.0)
    first_negative_state = length+1
    tr.add_transition(0, hfst.HfstBasicTransition(
                             first_negative_state, deletion_symbol,
                             hfst.EPSILON, 0.0))
    for c in printable_chars:
        tr.add_transition(first_negative_state, 
                          hfst.HfstBasicTransition(0, c, c, 0.0))
    for i in range(length-1):
        tr.add_transition(first_negative_state+i,
                          hfst.HfstBasicTransition(
                              first_negative_state+i+1, 
                              deletion_symbol, hfst.EPSILON, 0.0))
        tr.add_transition(first_negative_state+i+1,
                          hfst.HfstBasicTransition(
                              first_negative_state+i+1, deletion_slot_symbol, deletion_slot_symbol, 0.0))
        for c in printable_chars:
            tr.add_transition(first_negative_state+i+1,
                              hfst.HfstBasicTransition(
                                  first_negative_state+i, c, c, 0.0))
    tr_c = hfst.HfstTransducer(tr)
    return tr_c

Esempio n. 9

0

Mostra file

def accum_input_labels(fst, separator=""):
    """Encode, weight and prune a transducer

fst -- transducer to be processed, input labels are strings of alphabet symbols and output labels are single alphabet symbols

separator -- null string or a symbol not part of the alphabet

Returns a transducer where input labels of thrasitions are concatenations of the input label and the output label of the original transition, the weights are according to the weights of the resulting morphophonemes and all transitions with invalid morphophoneme labels are discarded.
"""
    if cfg.verbosity >= 10:
        print("to be accumulated:\n", fst)
    bfst = hfst.HfstBasicTransducer(fst)
    result_bfst = hfst.HfstBasicTransducer()
    for state in bfst.states():
        result_bfst.add_state(state)
        if bfst.is_final_state(state):
            weight = bfst.get_final_weight(state)
            result_bfst.set_final_weight(state, weight)
        for arc in bfst.transitions(state):
            tostate = arc.get_target_state()
            insym = arc.get_input_symbol()
            outsym = arc.get_output_symbol()
            weight = arc.get_weight()
            new_insym = insym + separator + outsym
            if cfg.verbosity >= 25:
                print("arc", state, tostate, insym, outsym, weight)
            if not alphabet.mphon_is_valid(new_insym):
                continue
            new_weight = alphabet.mphon_weight(new_insym)
            result_arc = hfst.HfstBasicTransition(tostate,
                                                  new_insym,
                                                  new_insym,
                                                  new_weight)
            result_bfst.add_transition(state, result_arc)
            if cfg.verbosity >= 25:
                print("after addition of transition:\n", result_bfst)
    result_fst = hfst.HfstTransducer(result_bfst)
    result_fst.minimize()
    if cfg.verbosity >= 10:
        print("accumulated fst:\n", result_fst)
    return result_fst

Esempio n. 10

0

Mostra file

File: test_hfst.py Progetto: busrabozoglu/hfst

    assert(tr.compare(hfst.empty_fst()))

    defs = {'foo':hfst.regex('Foo'), 'bar':hfst.regex('Bar')}
    tr = hfst.regex('foo bar', definitions=defs)
    assert(tr.compare(hfst.regex('Foo Bar')))
    tr = hfst.regex('foo bar')
    assert(tr.compare(hfst.regex('foo bar')))

# print('\n--- Testing HfstBasicTransducer ---\n')

# Create basic transducer, write it to file, read it, and test equivalence
fsm = hfst.HfstBasicTransducer()
fsm.add_state(0)
fsm.add_state(1)
fsm.set_final_weight(1, 0.3)
tr = hfst.HfstBasicTransition(1, 'foo', 'bar', 0.5)
fsm.add_transition(0, tr)
fsm.add_transition(0, 0, 'baz', 'baz')
fsm.add_transition(0, 0, 'baz', 'BAZ', 0.1)

f = open('foo_basic', 'w')
fsm.write_att(f)
f.close()

f = open('foo_basic', 'r')
fsm2 = hfst.HfstBasicTransducer(hfst.read_att_transducer(f, hfst.EPSILON))
f.close()

# Modify weights of a basic transducer
fsm = hfst.HfstBasicTransducer()
fsm.add_state(0)

Esempio n. 11

0

Mostra file

        tok.add_multichar_symbol('foo')
        tok.add_multichar_symbol('bar')
        tr = hfst.tokenized_fst(tok.tokenize('foobar', 'foobaz'))
        if not tr.compare(hfst.regex('[foo:foo bar:b 0:a 0:z]')):
            raise RuntimeError('')

        # HfstBasicTransducer
        # Create an empty transducer
        # The transducer has initially one start state (number zero)
        # that is not final
        fsm = hfst.HfstBasicTransducer()
        # Add two states to the transducer
        fsm.add_state(1)
        fsm.add_state(2)
        # Create a transition [foo:bar] leading to state 1 with weight 0.1
        tr = hfst.HfstBasicTransition(1, 'foo', 'bar', 0.1)
        # and add it to state zero
        fsm.add_transition(0, tr)
        # Add a transition [baz:baz] with weight 0 from state 1 to state 2
        fsm.add_transition(1, hfst.HfstBasicTransition(2, 'baz', 'baz', 0.0))
        # Set state 2 as final with weight 0.3
        fsm.set_final_weight(2, 0.3)
        # Go through all states
        for state, arcs in enumerate(fsm):
            for arc in arcs:
                print('%i ' % (state), end='')
                print(arc)
            if fsm.is_final_state(state):
                print('%i %f' % (state, fsm.get_final_weight(state)) )

        for state in fsm.states():

Esempio n. 12

0

Mostra file

 def _generator_for_seq(seq):
     tr = hfst.HfstBasicTransducer()
     for i, c in enumerate(seq):
         tr.add_transition(i, hfst.HfstBasicTransition(i+1, c, c, 0.0))
     tr.set_final_weight(len(seq), 0.0)
     return hfst.HfstTransducer(tr)