def remove_suboptimal_paths(transducer):
    active_states = set(transducer.states)
    costs = {state: CostVector.get_inf_vector() for state in active_states}
    costs[transducer.initial_state] = CostVector.get_vector(transducer.get_length_of_cost_vectors(), 0)

    while active_states:
        cheapest_state = get_cheapest_state(list(active_states), costs)
        active_states.remove(cheapest_state)
        for state in active_states:
            for arc in transducer.get_arcs_by_origin_and_terminal_state(cheapest_state, state):
                costs[state] = max(costs[state], costs[cheapest_state] + arc.cost_vector)
    try:    #TODO for debug prints
        most_harmonic_final = get_cheapest_state(transducer.get_final_states(), costs)
    except KeyError as ex:
        #print(transducer.get_final_states())
        #print(transducer.dot_representation())
        raise ex
    transducer.set_final_state(most_harmonic_final)

    new_arcs = []
    for arc in transducer.get_arcs():
        if costs[arc.origin_state] + arc.cost_vector == costs[arc.terminal_state]:
            new_arcs.append(arc)
    transducer.set_arcs(new_arcs)

    #logger.debug("remove_suboptimal_paths: transducer output: %s", transducer)
    return transducer
コード例 #2
0
    def _make_transducer(self):
        transducer, segments, state = super(
            DepConstraint, self)._base_faithfulness_transducer()
        for segment in segments:
            transducer.add_arc(
                Arc(state, segment, segment, CostVector.get_vector(1, 0),
                    state))
            transducer.add_arc(
                Arc(state, segment, NULL_SEGMENT, CostVector.get_vector(1, 0),
                    state))
            if segment.has_feature_bundle(self.feature_bundle):
                transducer.add_arc(
                    Arc(state, NULL_SEGMENT, segment,
                        CostVector.get_vector(1, 1), state))
            else:
                transducer.add_arc(
                    Arc(state, NULL_SEGMENT, segment,
                        CostVector.get_vector(1, 0), state))

        if get_configuration("ALLOW_CANDIDATES_WITH_CHANGED_SEGMENTS"):
            for first_segment, second_segment in permutations(segments, 2):
                transducer.add_arc(
                    Arc(state, first_segment, second_segment,
                        CostVector.get_vector(1, 0), state))

        return transducer
コード例 #3
0
ファイル: constraint.py プロジェクト: taucompling/otml
    def _make_transducer(self):
        transducer, segments, state = super(FaithConstraint, self)._base_faithfulness_transducer()
        for segment in segments:
            transducer.add_arc(Arc(state, NULL_SEGMENT, segment, CostVector.get_vector(1, 1), state))
            transducer.add_arc(Arc(state, segment, NULL_SEGMENT, CostVector.get_vector(1, 1), state))
            transducer.add_arc(Arc(state, segment, segment, CostVector.get_vector(1, 0), state))

        if configurations["ALLOW_CANDIDATES_WITH_CHANGED_SEGMENTS"]:
            for first_segment, second_segment in permutations(segments, 2):
                transducer.add_arc(Arc(state, first_segment, second_segment, CostVector.get_vector(1, 1), state))

        return transducer
コード例 #4
0
ファイル: lexicon.py プロジェクト: taucompling/otml
    def _make_transducer(self):
        segments = self.feature_table.get_segments()
        transducer = Transducer(segments, length_of_cost_vectors=0)
        word_segments = self.get_segments()
        n = len(self.word_string)
        states = [State("q{}".format(i), i) for i in range(n+1)]
        for i, state in enumerate(states):
            transducer.add_state(state)
            transducer.add_arc(Arc(state, NULL_SEGMENT, JOKER_SEGMENT, CostVector.get_empty_vector(), state))
            if i != n:
                transducer.add_arc(Arc(states[i], word_segments[i], JOKER_SEGMENT, CostVector.get_empty_vector(), states[i+1]))

        transducer.initial_state = states[0]
        transducer.add_final_state(states[n])
        return transducer
コード例 #5
0
    def _make_transducer(self):
        segments = self.feature_table.get_segments()
        transducer = Transducer(segments, length_of_cost_vectors=0)
        word_segments = self.get_segments()
        n = len(self.word_string)
        states = [State("q{}".format(i), i) for i in range(n+1)]
        for i, state in enumerate(states):
            transducer.add_state(state)
            transducer.add_arc(Arc(state, NULL_SEGMENT, JOKER_SEGMENT, CostVector.get_empty_vector(), state))
            if i != n:
                transducer.add_arc(Arc(states[i], word_segments[i], JOKER_SEGMENT, CostVector.get_empty_vector(), states[i+1]))

        transducer.initial_state = states[0]
        transducer.add_final_state(states[n])
        return transducer
コード例 #6
0
 def _make_transducer(self):
     transducer, segments, state = super(
         IdentConstraint, self)._base_faithfulness_transducer()
     for segment in segments:
         transducer.add_arc(
             Arc(state, segment, segment, CostVector.get_vector(1, 0),
                 state))
         transducer.add_arc(
             Arc(state, segment, NULL_SEGMENT, CostVector.get_vector(1, 0),
                 state))
         transducer.add_arc(
             Arc(state, NULL_SEGMENT, segment, CostVector.get_vector(1, 0),
                 state))
         input_segment = segment
         if input_segment.has_feature_bundle(self.feature_bundle):
             for output_segment in segments:
                 if output_segment.has_feature_bundle(self.feature_bundle):
                     transducer.add_arc(
                         Arc(state, input_segment, output_segment,
                             CostVector.get_vector(1, 0), state))
                 else:
                     transducer.add_arc(
                         Arc(state, input_segment, output_segment,
                             CostVector.get_vector(1, 1), state))
         else:
             for output_segment in segments:
                 transducer.add_arc(
                     Arc(state, input_segment, output_segment,
                         CostVector.get_vector(1, 0), state))
     return transducer
def _get_path_cost(transducer):
    #logger.debug("_get_path_cost: transducer input: %s", transducer)
    current_state = transducer.get_a_final_state()
    path_cost = CostVector.get_vector(transducer.get_length_of_cost_vectors(), 0)
    initial_state = transducer.initial_state
    while current_state != initial_state:
        arcs_to_current_state = transducer.get_arcs_by_terminal_state(current_state)
        if arcs_to_current_state:
            arc = arcs_to_current_state[0]
            if arc.origin_state == current_state:
                raise TransducerOptimizationError('Cyclic Transducer')
        else:
            raise TransducerOptimizationError("No arcs leading to the current state. It is a dead state.")
        current_state = arc.origin_state
        path_cost += arc.cost_vector

    return path_cost
def optimize_transducer_grammar_for_word(word, eval):
    states_by_index = {}
    for state in eval.states:
        if state.index in states_by_index.keys():
            states_by_index[state.index].append(state)
        else:
            states_by_index[state.index] = [state]

    arcs_by_index = {}
    for arc in eval._arcs:
        if arc.origin_state.index in arcs_by_index.keys():
            arcs_by_index[arc.origin_state.index].append(arc)
        else:
            arcs_by_index[arc.origin_state.index] = [arc]

    new_transducer = Transducer(eval.get_alphabet())

    state_costs = {}
    new_transducer.add_state(eval.initial_state)
    new_transducer.initial_state = eval.initial_state
    state_costs[eval.initial_state] = CostVector.get_vector(eval.get_length_of_cost_vectors(), 0)

    for index in range(len(word.get_segments())):
        new_arcs = _best_arcs(arcs_by_index[index], state_costs)
        for arc in new_arcs:
            new_transducer.add_arc(arc)
            new_transducer.add_state(arc.terminal_state)
            state_costs[arc.terminal_state] = state_costs[arc.origin_state] + arc.cost_vector

    new_final_states = [eval.final_states[0]]
    for state in eval.final_states[1:]:
        state_cost = state_costs[state]
        final_cost = state_costs[new_final_states[0]]
        if state_cost > final_cost:
            new_final_states = [state]
        elif state_cost == final_cost:
            new_final_states.append(state)

    for state in new_final_states:
        new_transducer.add_final_state(state)

    #new_transducer.clear_dead_states(with_impasse_states=True) #TODO give it a try

    return new_transducer
コード例 #9
0
ファイル: constraint.py プロジェクト: taucompling/otml
 def _make_transducer(self):
     transducer, segments, state = super(IdentConstraint, self)._base_faithfulness_transducer()
     for segment in segments:
         transducer.add_arc(Arc(state, segment, segment, CostVector.get_vector(1, 0), state))
         transducer.add_arc(Arc(state, segment, NULL_SEGMENT, CostVector.get_vector(1, 0), state))
         transducer.add_arc(Arc(state, NULL_SEGMENT, segment, CostVector.get_vector(1, 0), state))
         input_segment = segment
         if input_segment.has_feature_bundle(self.feature_bundle):
             for output_segment in segments:
                 if output_segment.has_feature_bundle(self.feature_bundle):
                     transducer.add_arc(Arc(state, input_segment, output_segment, CostVector.get_vector(1, 0), state))
                 else:
                     transducer.add_arc(Arc(state, input_segment, output_segment, CostVector.get_vector(1, 1), state))
         else:
             for output_segment in segments:
                 transducer.add_arc(Arc(state, input_segment, output_segment, CostVector.get_vector(1, 0), state))
     return transducer
コード例 #10
0
    def _make_transducer(self):
        def compute_num_of_max_satisfied_bundle(segment):
            i = 0
            while i < n and symbol_bundle_characteristic_matrix[segment][i]:
                i += 1
            return i

        def compute_highest_num_of_satisfied_bundle(segment, j):
            for k in range(j + 1, 0, -1):
                if symbol_bundle_characteristic_matrix[segment][k - 1]:
                    return k
            else:
                return 0

        n = len(self.feature_bundles) - 1
        segments = self.feature_table.get_segments()
        transducer = Transducer(segments, name=str(self))

        symbol_bundle_characteristic_matrix = {
            segment: [
                segment.has_feature_bundle(self.feature_bundles[i])
                for i in range(n + 1)
            ]
            for segment in segments
        }

        states = {i: {j: 0 for j in range(i)} for i in range(n + 1)}

        initial_state = State(
            'q0|0'
        )  # here we use a tuple as label. it will change at the end of this function
        states[0][0] = initial_state

        transducer.set_as_single_state(initial_state)

        if not n:
            for segment in segments:
                transducer.add_arc(
                    Arc(
                        states[0][0], JOKER_SEGMENT, segment,
                        CostVector([
                            int(symbol_bundle_characteristic_matrix[segment]
                                [0])
                        ]), states[0][0]))
            transducer.add_arc(
                Arc(states[0][0], JOKER_SEGMENT, NULL_SEGMENT, CostVector([0]),
                    states[0][0]))

        else:
            for i in range(0, n + 1):
                for j in range(i):
                    state = State('q{0}|{1}'.format(i, j))
                    states[i][j] = state
                    transducer.add_state(state)
            max_num_of_satisfied_bundle_by_segment = {
                segment: compute_num_of_max_satisfied_bundle(segment)
                for segment in segments
            }
            for segment in segments:
                transducer.add_arc(
                    Arc(
                        states[0][0], JOKER_SEGMENT, segment, CostVector([0]),
                        states[symbol_bundle_characteristic_matrix[segment]
                               [0]][0]))
            for i in range(n + 1):
                for j in range(i):
                    state = states[i][j]
                    transducer.add_final_state(state)
                    if i != n:
                        for segment in segments:
                            if symbol_bundle_characteristic_matrix[segment][i]:
                                new_state_level = i + 1
                                new_state_mem = min([
                                    j + 1,
                                    max_num_of_satisfied_bundle_by_segment[
                                        segment]
                                ])
                            else:
                                new_state_level = compute_highest_num_of_satisfied_bundle(
                                    segment, j)
                                new_state_mem = min([
                                    max_num_of_satisfied_bundle_by_segment[
                                        segment],
                                    abs(new_state_level - 1)
                                ])
                            new_terminus = states[new_state_level][
                                new_state_mem]
                            transducer.add_arc(
                                Arc(state, JOKER_SEGMENT, segment,
                                    CostVector([0]), new_terminus))
                            transducer.add_arc(
                                Arc(new_terminus, JOKER_SEGMENT, segment,
                                    CostVector([0]), new_terminus))
                    else:  # i = n
                        for segment in segments:
                            new_state_level = compute_highest_num_of_satisfied_bundle(
                                segment, j)
                            new_state_mem = min([
                                max_num_of_satisfied_bundle_by_segment[
                                    segment],
                                abs(new_state_level - 1)
                            ])
                            new_terminus = states[new_state_level][
                                new_state_mem]
                            transducer.add_arc(
                                Arc(
                                    state, JOKER_SEGMENT, segment,
                                    CostVector([
                                        int(symbol_bundle_characteristic_matrix[
                                            segment][i])
                                    ]), new_terminus))

        transducer.clear_dead_states()
        for state in transducer.states:
            transducer.add_arc(
                Arc(state, JOKER_SEGMENT, NULL_SEGMENT, CostVector([0]),
                    state))
        return transducer
コード例 #11
0
ファイル: test_transducer.py プロジェクト: taucompling/otml
 def test_costVector_get_vector_with_size_n_and_number_m(self):
     self.assertEqual(CostVector.get_vector(4, 0), CostVector([0, 0, 0, 0]))
     self.assertEqual(CostVector.get_vector(1, 1), CostVector([1]))
     self.assertEqual(CostVector.get_vector(0, 0), CostVector([]))
     self.assertEqual(CostVector.get_empty_vector(), CostVector([]))
コード例 #12
0
ファイル: test_transducer.py プロジェクト: taucompling/otml
 def test_costVector_comparison(self):
     self.assertTrue(CostVector([0, 0, 0, 0, 0]) > CostVector([0, 0, 1, 0, 0]))
     self.assertFalse(CostVector([1, 0, 1]) > CostVector([0, 2, 0]))
     self.assertTrue(CostVector([1000, 0, 76]) > CostVector.get_inf_vector())
     self.assertFalse(CostVector.get_inf_vector() > CostVector([0, 1, 2]))
     self.assertFalse(CostVector.get_inf_vector() > CostVector.get_inf_vector())