예제 #1
0
    def _make_transducer(self):
        segments = self.feature_table.get_segments()
        transducer = Transducer(segments, name=str(self))

        state1 = State('Contiguity1')
        state2 = State('Contiguity2')
        transducer.add_state(state1)
        transducer.add_state(state2)
        transducer.initial_state = state1
        transducer.add_final_state(state1)
        transducer.add_final_state(state2)

        for segment in segments:
            transducer.add_arc(Arc(state1, NULL_SEGMENT, segment, CostVector([0]), state1))
            transducer.add_arc(Arc(state1, segment, NULL_SEGMENT, CostVector([0]), state1))
            transducer.add_arc(Arc(state2, NULL_SEGMENT, segment, CostVector([1]), state1))
            transducer.add_arc(Arc(state2, segment, NULL_SEGMENT, CostVector([1]), state1))
            segment_symbol = segment.get_symbol()
            if segment_symbol in yimas_vowels:   # segment is vowel
                transducer.add_arc(Arc(state1, segment, segment, CostVector([0]), state1))
                transducer.add_arc(Arc(state2, segment, segment, CostVector([0]), state1))
            elif segment_symbol == "'":  # segment is stress
                transducer.add_arc(Arc(state1, segment, segment, CostVector([0]), state2))
                transducer.add_arc(Arc(state2, segment, segment, CostVector([0]), state2))
            elif segment_symbol in yimas_cons:  # segment is consonant
                transducer.add_arc(Arc(state1, segment, segment, CostVector([0]), state1))
                transducer.add_arc(Arc(state2, segment, segment, CostVector([0]), state1))
            else:
                raise ConstraintError("{} not supported in this constraint".format(segment_symbol))


        return transducer
예제 #2
0
    def _make_transducer(self):
        segments = self.feature_table.get_segments()
        transducer = Transducer(segments, name=str(self))

        state1 = State('Precede1')
        state2 = State('Precede2')   # After seeing +stress (now it is okay to see +vowel)
        transducer.add_state(state1)
        transducer.add_state(state2)
        transducer.initial_state = state1
        transducer.add_final_state(state1)
        transducer.add_final_state(state2)

        for segment in segments:
            segment_symbol = segment.get_symbol()
            if segment_symbol in yimas_vowels:   # segment is vowel
                transducer.add_arc(Arc(state1, JOKER_SEGMENT, segment, CostVector([1]), state1))
                transducer.add_arc(Arc(state2, JOKER_SEGMENT, segment, CostVector([0]), state2))
            elif segment_symbol == "'":  # segment is stress
                transducer.add_arc(Arc(state1, JOKER_SEGMENT, segment, CostVector([0]), state2))
                transducer.add_arc(Arc(state2, JOKER_SEGMENT, segment, CostVector([0]), state2))
            elif segment_symbol in yimas_cons:  # segment is consonant
                transducer.add_arc(Arc(state1, JOKER_SEGMENT, segment, CostVector([0]), state1))
                transducer.add_arc(Arc(state2, JOKER_SEGMENT, segment, CostVector([0]), state2))
            else:
                raise ConstraintError("{} not supported in this constraint".format(segment_symbol))
        for state in transducer.states:
            transducer.add_arc(Arc(state, JOKER_SEGMENT, NULL_SEGMENT, CostVector([0]), state))

        return transducer
예제 #3
0
파일: lexicon.py 프로젝트: taucompling/otml
    def _make_transducer(self):
        segments = self.feature_table.get_segments()
        transducer = Transducer(segments, length_of_cost_vectors=0)
        word_segments = self.get_segments()
        n = len(self.word_string)
        states = [State("q{}".format(i), i) for i in range(n+1)]
        for i, state in enumerate(states):
            transducer.add_state(state)
            transducer.add_arc(Arc(state, NULL_SEGMENT, JOKER_SEGMENT, CostVector.get_empty_vector(), state))
            if i != n:
                transducer.add_arc(Arc(states[i], word_segments[i], JOKER_SEGMENT, CostVector.get_empty_vector(), states[i+1]))

        transducer.initial_state = states[0]
        transducer.add_final_state(states[n])
        return transducer
예제 #4
0
    def _make_transducer(self):
        segments = self.feature_table.get_segments()
        transducer = Transducer(segments, length_of_cost_vectors=0)
        word_segments = self.get_segments()
        n = len(self.word_string)
        states = [State("q{}".format(i), i) for i in range(n+1)]
        for i, state in enumerate(states):
            transducer.add_state(state)
            transducer.add_arc(Arc(state, NULL_SEGMENT, JOKER_SEGMENT, CostVector.get_empty_vector(), state))
            if i != n:
                transducer.add_arc(Arc(states[i], word_segments[i], JOKER_SEGMENT, CostVector.get_empty_vector(), states[i+1]))

        transducer.initial_state = states[0]
        transducer.add_final_state(states[n])
        return transducer
def optimize_transducer_grammar_for_word(word, eval):
    states_by_index = {}
    for state in eval.states:
        if state.index in states_by_index.keys():
            states_by_index[state.index].append(state)
        else:
            states_by_index[state.index] = [state]

    arcs_by_index = {}
    for arc in eval._arcs:
        if arc.origin_state.index in arcs_by_index.keys():
            arcs_by_index[arc.origin_state.index].append(arc)
        else:
            arcs_by_index[arc.origin_state.index] = [arc]

    new_transducer = Transducer(eval.get_alphabet())

    state_costs = {}
    new_transducer.add_state(eval.initial_state)
    new_transducer.initial_state = eval.initial_state
    state_costs[eval.initial_state] = CostVector.get_vector(eval.get_length_of_cost_vectors(), 0)

    for index in range(len(word.get_segments())):
        new_arcs = _best_arcs(arcs_by_index[index], state_costs)
        for arc in new_arcs:
            new_transducer.add_arc(arc)
            new_transducer.add_state(arc.terminal_state)
            state_costs[arc.terminal_state] = state_costs[arc.origin_state] + arc.cost_vector

    new_final_states = [eval.final_states[0]]
    for state in eval.final_states[1:]:
        state_cost = state_costs[state]
        final_cost = state_costs[new_final_states[0]]
        if state_cost > final_cost:
            new_final_states = [state]
        elif state_cost == final_cost:
            new_final_states.append(state)

    for state in new_final_states:
        new_transducer.add_final_state(state)

    #new_transducer.clear_dead_states(with_impasse_states=True) #TODO give it a try

    return new_transducer
예제 #6
0
 def test_transducer_clear_dead_states(self):
     transducer = Transducer(self.feature_table.get_segments())
     state1 = State('q1')
     state2 = State('q2')
     state3 = State('q3')
     state4 = State('q4')
     transducer.add_state(state1)
     transducer.add_state(state2)
     transducer.add_state(state3)
     transducer.add_state(state4)
     transducer.initial_state = state1
     transducer.add_final_state(state2)
     transducer.add_arc(Arc(state1, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state2))
     transducer.add_arc(Arc(state1, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state1))
     transducer.add_arc(Arc(state2, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state2))
     transducer.add_arc(Arc(state3, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state3))
     transducer.add_arc(Arc(state4, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state3))
     transducer.clear_dead_states()
     self.assertEqual(transducer, get_pickle("clear_dead_states_test_transducer"))