Exemple #1
0
    def _make_transducer(self):
        segments = self.feature_table.get_segments()
        transducer = Transducer(segments, name=str(self))

        state1 = State('Precede1')
        state2 = State('Precede2')   # After seeing +stress (now it is okay to see +vowel)
        transducer.add_state(state1)
        transducer.add_state(state2)
        transducer.initial_state = state1
        transducer.add_final_state(state1)
        transducer.add_final_state(state2)

        for segment in segments:
            segment_symbol = segment.get_symbol()
            if segment_symbol in yimas_vowels:   # segment is vowel
                transducer.add_arc(Arc(state1, JOKER_SEGMENT, segment, CostVector([1]), state1))
                transducer.add_arc(Arc(state2, JOKER_SEGMENT, segment, CostVector([0]), state2))
            elif segment_symbol == "'":  # segment is stress
                transducer.add_arc(Arc(state1, JOKER_SEGMENT, segment, CostVector([0]), state2))
                transducer.add_arc(Arc(state2, JOKER_SEGMENT, segment, CostVector([0]), state2))
            elif segment_symbol in yimas_cons:  # segment is consonant
                transducer.add_arc(Arc(state1, JOKER_SEGMENT, segment, CostVector([0]), state1))
                transducer.add_arc(Arc(state2, JOKER_SEGMENT, segment, CostVector([0]), state2))
            else:
                raise ConstraintError("{} not supported in this constraint".format(segment_symbol))
        for state in transducer.states:
            transducer.add_arc(Arc(state, JOKER_SEGMENT, NULL_SEGMENT, CostVector([0]), state))

        return transducer
Exemple #2
0
    def _make_transducer(self):
        segments = self.feature_table.get_segments()
        transducer = Transducer(segments, name=str(self))

        state1 = State('Contiguity1')
        state2 = State('Contiguity2')
        transducer.add_state(state1)
        transducer.add_state(state2)
        transducer.initial_state = state1
        transducer.add_final_state(state1)
        transducer.add_final_state(state2)

        for segment in segments:
            transducer.add_arc(Arc(state1, NULL_SEGMENT, segment, CostVector([0]), state1))
            transducer.add_arc(Arc(state1, segment, NULL_SEGMENT, CostVector([0]), state1))
            transducer.add_arc(Arc(state2, NULL_SEGMENT, segment, CostVector([1]), state1))
            transducer.add_arc(Arc(state2, segment, NULL_SEGMENT, CostVector([1]), state1))
            segment_symbol = segment.get_symbol()
            if segment_symbol in yimas_vowels:   # segment is vowel
                transducer.add_arc(Arc(state1, segment, segment, CostVector([0]), state1))
                transducer.add_arc(Arc(state2, segment, segment, CostVector([0]), state1))
            elif segment_symbol == "'":  # segment is stress
                transducer.add_arc(Arc(state1, segment, segment, CostVector([0]), state2))
                transducer.add_arc(Arc(state2, segment, segment, CostVector([0]), state2))
            elif segment_symbol in yimas_cons:  # segment is consonant
                transducer.add_arc(Arc(state1, segment, segment, CostVector([0]), state1))
                transducer.add_arc(Arc(state2, segment, segment, CostVector([0]), state1))
            else:
                raise ConstraintError("{} not supported in this constraint".format(segment_symbol))


        return transducer
Exemple #3
0
    def _make_transducer(self):
        segments = self.feature_table.get_segments()
        transducer = Transducer(segments, length_of_cost_vectors=0)
        word_segments = self.get_segments()
        n = len(self.word_string)
        states = [State("q{}".format(i), i) for i in range(n+1)]
        for i, state in enumerate(states):
            transducer.add_state(state)
            transducer.add_arc(Arc(state, NULL_SEGMENT, JOKER_SEGMENT, CostVector.get_empty_vector(), state))
            if i != n:
                transducer.add_arc(Arc(states[i], word_segments[i], JOKER_SEGMENT, CostVector.get_empty_vector(), states[i+1]))

        transducer.initial_state = states[0]
        transducer.add_final_state(states[n])
        return transducer
    def _make_transducer(self):
        segments = self.feature_table.get_segments()
        transducer = Transducer(segments, length_of_cost_vectors=0)
        word_segments = self.get_segments()
        n = len(self.word_string)
        states = [State("q{}".format(i), i) for i in range(n+1)]
        for i, state in enumerate(states):
            transducer.add_state(state)
            transducer.add_arc(Arc(state, NULL_SEGMENT, JOKER_SEGMENT, CostVector.get_empty_vector(), state))
            if i != n:
                transducer.add_arc(Arc(states[i], word_segments[i], JOKER_SEGMENT, CostVector.get_empty_vector(), states[i+1]))

        transducer.initial_state = states[0]
        transducer.add_final_state(states[n])
        return transducer
def optimize_transducer_grammar_for_word(word, eval):
    states_by_index = {}
    for state in eval.states:
        if state.index in states_by_index.keys():
            states_by_index[state.index].append(state)
        else:
            states_by_index[state.index] = [state]

    arcs_by_index = {}
    for arc in eval._arcs:
        if arc.origin_state.index in arcs_by_index.keys():
            arcs_by_index[arc.origin_state.index].append(arc)
        else:
            arcs_by_index[arc.origin_state.index] = [arc]

    new_transducer = Transducer(eval.get_alphabet())

    state_costs = {}
    new_transducer.add_state(eval.initial_state)
    new_transducer.initial_state = eval.initial_state
    state_costs[eval.initial_state] = CostVector.get_vector(eval.get_length_of_cost_vectors(), 0)

    for index in range(len(word.get_segments())):
        new_arcs = _best_arcs(arcs_by_index[index], state_costs)
        for arc in new_arcs:
            new_transducer.add_arc(arc)
            new_transducer.add_state(arc.terminal_state)
            state_costs[arc.terminal_state] = state_costs[arc.origin_state] + arc.cost_vector

    new_final_states = [eval.final_states[0]]
    for state in eval.final_states[1:]:
        state_cost = state_costs[state]
        final_cost = state_costs[new_final_states[0]]
        if state_cost > final_cost:
            new_final_states = [state]
        elif state_cost == final_cost:
            new_final_states.append(state)

    for state in new_final_states:
        new_transducer.add_final_state(state)

    #new_transducer.clear_dead_states(with_impasse_states=True) #TODO give it a try

    return new_transducer
Exemple #6
0
 def test_transducer_clear_dead_states(self):
     transducer = Transducer(self.feature_table.get_segments())
     state1 = State('q1')
     state2 = State('q2')
     state3 = State('q3')
     state4 = State('q4')
     transducer.add_state(state1)
     transducer.add_state(state2)
     transducer.add_state(state3)
     transducer.add_state(state4)
     transducer.initial_state = state1
     transducer.add_final_state(state2)
     transducer.add_arc(Arc(state1, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state2))
     transducer.add_arc(Arc(state1, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state1))
     transducer.add_arc(Arc(state2, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state2))
     transducer.add_arc(Arc(state3, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state3))
     transducer.add_arc(Arc(state4, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state3))
     transducer.clear_dead_states()
     self.assertEqual(transducer, get_pickle("clear_dead_states_test_transducer"))
Exemple #7
0
    def _make_transducer(self):

        def compute_num_of_max_satisfied_bundle(segment):
            i = 0
            while i < n and symbol_bundle_characteristic_matrix[segment][i]:
                i += 1
            return i

        def compute_highest_num_of_satisfied_bundle(segment, j):
            for k in range(j + 1, 0,-1):
                if symbol_bundle_characteristic_matrix[segment][k-1]:
                    return k
            else:
                return 0

        n = len(self.feature_bundles) - 1
        segments = self.feature_table.get_segments()
        transducer = Transducer(segments, name=str(self))

        symbol_bundle_characteristic_matrix = {segment: [segment.has_feature_bundle(self.feature_bundles[i])
                                                         for i in range(n+1)]
                                               for segment in segments}


        states = {i: {j: 0 for j in range(i)} for i in range(n+1)}

        initial_state = State('q0|0')    # here we use a tuple as label. it will change at the end of this function
        states[0][0] = initial_state

        transducer.set_as_single_state(initial_state)


        if not n:
            for segment in segments:
                transducer.add_arc(Arc(states[0][0], JOKER_SEGMENT, segment, CostVector([int(symbol_bundle_characteristic_matrix[segment][0])]), states[0][0]))
            transducer.add_arc(Arc(states[0][0], JOKER_SEGMENT, NULL_SEGMENT, CostVector([0]), states[0][0]))

        else:
            for i in range(0, n+1):
                for j in range(i):
                    state = State('q{0}|{1}'.format(i,j))
                    states[i][j] = state
                    transducer.add_state(state)
            max_num_of_satisfied_bundle_by_segment = {segment: compute_num_of_max_satisfied_bundle(segment)
                                                      for segment in segments}
            for segment in segments:
                transducer.add_arc(Arc(states[0][0], JOKER_SEGMENT, segment, CostVector([0]),
                                       states[symbol_bundle_characteristic_matrix[segment][0]][0]))
            for i in range(n+1):
                for j in range(i):
                    state = states[i][j]
                    transducer.add_final_state(state)
                    if i != n:
                        for segment in segments:
                            if symbol_bundle_characteristic_matrix[segment][i]:
                                new_state_level = i+1
                                new_state_mem = min([j+1, max_num_of_satisfied_bundle_by_segment[segment]])
                            else:
                                new_state_level = compute_highest_num_of_satisfied_bundle(segment, j)
                                new_state_mem = min([max_num_of_satisfied_bundle_by_segment[segment],
                                                     abs(new_state_level - 1)])
                            new_terminus = states[new_state_level][new_state_mem]
                            transducer.add_arc(Arc(state, JOKER_SEGMENT, segment, CostVector([0]), new_terminus))
                    else:  # i = n
                        for segment in segments:
                            new_state_level = compute_highest_num_of_satisfied_bundle(segment, j)
                            new_state_mem = min([max_num_of_satisfied_bundle_by_segment[segment],
                                                 abs(new_state_level - 1)])
                            new_terminus = states[new_state_level][new_state_mem]
                            transducer.add_arc(Arc(state, JOKER_SEGMENT, segment,
                                                   CostVector([int(symbol_bundle_characteristic_matrix[segment][i])]), new_terminus))

        transducer.clear_dead_states()
        for state in transducer.states:
            transducer.add_arc(Arc( state, JOKER_SEGMENT, NULL_SEGMENT, CostVector([0]), state))

        return transducer
    def _make_transducer(self):
        def compute_num_of_max_satisfied_bundle(segment):
            i = 0
            while i < n and symbol_bundle_characteristic_matrix[segment][i]:
                i += 1
            return i

        def compute_highest_num_of_satisfied_bundle(segment, j):
            for k in range(j + 1, 0, -1):
                if symbol_bundle_characteristic_matrix[segment][k - 1]:
                    return k
            else:
                return 0

        n = len(self.feature_bundles) - 1
        segments = self.feature_table.get_segments()
        transducer = Transducer(segments, name=str(self))

        symbol_bundle_characteristic_matrix = {
            segment: [
                segment.has_feature_bundle(self.feature_bundles[i])
                for i in range(n + 1)
            ]
            for segment in segments
        }

        states = {i: {j: 0 for j in range(i)} for i in range(n + 1)}

        initial_state = State(
            'q0|0'
        )  # here we use a tuple as label. it will change at the end of this function
        states[0][0] = initial_state

        transducer.set_as_single_state(initial_state)

        if not n:
            for segment in segments:
                transducer.add_arc(
                    Arc(
                        states[0][0], JOKER_SEGMENT, segment,
                        CostVector([
                            int(symbol_bundle_characteristic_matrix[segment]
                                [0])
                        ]), states[0][0]))
            transducer.add_arc(
                Arc(states[0][0], JOKER_SEGMENT, NULL_SEGMENT, CostVector([0]),
                    states[0][0]))

        else:
            for i in range(0, n + 1):
                for j in range(i):
                    state = State('q{0}|{1}'.format(i, j))
                    states[i][j] = state
                    transducer.add_state(state)
            max_num_of_satisfied_bundle_by_segment = {
                segment: compute_num_of_max_satisfied_bundle(segment)
                for segment in segments
            }
            for segment in segments:
                transducer.add_arc(
                    Arc(
                        states[0][0], JOKER_SEGMENT, segment, CostVector([0]),
                        states[symbol_bundle_characteristic_matrix[segment]
                               [0]][0]))
            for i in range(n + 1):
                for j in range(i):
                    state = states[i][j]
                    transducer.add_final_state(state)
                    if i != n:
                        for segment in segments:
                            if symbol_bundle_characteristic_matrix[segment][i]:
                                new_state_level = i + 1
                                new_state_mem = min([
                                    j + 1,
                                    max_num_of_satisfied_bundle_by_segment[
                                        segment]
                                ])
                            else:
                                new_state_level = compute_highest_num_of_satisfied_bundle(
                                    segment, j)
                                new_state_mem = min([
                                    max_num_of_satisfied_bundle_by_segment[
                                        segment],
                                    abs(new_state_level - 1)
                                ])
                            new_terminus = states[new_state_level][
                                new_state_mem]
                            transducer.add_arc(
                                Arc(state, JOKER_SEGMENT, segment,
                                    CostVector([0]), new_terminus))
                            transducer.add_arc(
                                Arc(new_terminus, JOKER_SEGMENT, segment,
                                    CostVector([0]), new_terminus))
                    else:  # i = n
                        for segment in segments:
                            new_state_level = compute_highest_num_of_satisfied_bundle(
                                segment, j)
                            new_state_mem = min([
                                max_num_of_satisfied_bundle_by_segment[
                                    segment],
                                abs(new_state_level - 1)
                            ])
                            new_terminus = states[new_state_level][
                                new_state_mem]
                            transducer.add_arc(
                                Arc(
                                    state, JOKER_SEGMENT, segment,
                                    CostVector([
                                        int(symbol_bundle_characteristic_matrix[
                                            segment][i])
                                    ]), new_terminus))

        transducer.clear_dead_states()
        for state in transducer.states:
            transducer.add_arc(
                Arc(state, JOKER_SEGMENT, NULL_SEGMENT, CostVector([0]),
                    state))
        return transducer
Exemple #9
0
class TestTransducer(unittest.TestCase):

    def setUp(self):
        self.feature_table = FeatureTable.load(get_feature_table_fixture("feature_table.json"))
        self.phonotactic_test_feature_table = FeatureTable.load(get_feature_table_fixture(
            "phonotactic_test_feature_table.json"))
        self.transducer = Transducer(self.feature_table.get_segments())
        self.state1 = State('q1')
        self.state2 = State('q2')
        self.transducer.add_state(self.state1)
        self.transducer.add_state(self.state2)
        self.transducer.initial_state = self.state1
        self.transducer.add_final_state(self.state2)
        self.cost_vector1 = CostVector([3, 1, 0])
        self.cost_vector2 = CostVector([2, 0, 0])
        self.arc = Arc(self.state1, Segment('a', self.feature_table), Segment('b', self.feature_table), CostVector([0, 1, 0]), self.state2)
        self.transducer.add_arc(self.arc)

        self.simple_transducer = self.transducer
        self.loops_transducer = deepcopy(self.transducer)
        zero_cost_vector = CostVector([0])
        segment_a = Segment('a', self.feature_table)
        segment_b = Segment('b', self.feature_table)
        self.loops_transducer.add_arc(Arc(self.state1, JOKER_SEGMENT, segment_a, zero_cost_vector, self.state1))
        self.loops_transducer.add_arc(Arc(self.state1, JOKER_SEGMENT, segment_b, zero_cost_vector,self.state1))
        self.loops_transducer.add_arc(Arc(self.state2, NULL_SEGMENT, segment_a, zero_cost_vector,self.state2))
        self.loops_transducer.add_arc(Arc(self.state2, NULL_SEGMENT, segment_b, zero_cost_vector,self.state2))

        phonotactic = PhonotacticConstraint([{'cons': '+'}, {'voice': '+'}, {'labial': '+'}],
                                                         self.phonotactic_test_feature_table).get_transducer()
        dep = DepConstraint([{'labial': '-'}], self.phonotactic_test_feature_table).get_transducer()
        max = MaxConstraint([{'voice': '-'}], self.phonotactic_test_feature_table).get_transducer()

        self.intersection_test_transducer = Transducer.intersection(phonotactic, dep, max)


    #Transducer tests:
    def test_transducer_equality(self):
        feature_table = FeatureTable.load(get_feature_table_fixture("a_b_and_cons_feature_table.json"))
        faith = FaithConstraint([],feature_table).get_transducer()
        phonotactic = PhonotacticConstraint([{'cons': '+'}], feature_table).get_transducer()
        max = MaxConstraint([{'cons': '+'}], feature_table).get_transducer()
        transducer1 = Transducer.intersection(faith, phonotactic, max)
        temp_transducer = Transducer.intersection(phonotactic, max)
        transducer2 = Transducer.intersection(faith, temp_transducer)

        self.assertEqual(transducer1, transducer2)
        #write_to_dot_to_file(transducer1, "transducer1")
        #write_to_dot_to_file(transducer2, "transducer2")




    #one with constraint set

    #create with manual intersection


    def test_transducer_equality_with_deepcopy(self):
        phonotactic_transducer = PhonotacticConstraint([{'cons': '+'}, {'voice': '+'}, {'labial': '+'}],
                                                         self.phonotactic_test_feature_table).get_transducer()
        phonotactic_transducer_copy = deepcopy(phonotactic_transducer)
        self.assertEqual(phonotactic_transducer, phonotactic_transducer_copy)

    def test_transducer_equality_with_pickle(self):
        phonotactic_transducer = PhonotacticConstraint([{'cons': '+'}, {'voice': '+'}, {'labial': '+'}],
                                                         self.phonotactic_test_feature_table).get_transducer()
        pickled_phonotactic_transducer = get_pickle("equality_with_pickle_transducer")
        phonotactic_transducer == pickled_phonotactic_transducer
        self.assertEqual(phonotactic_transducer, pickled_phonotactic_transducer)

    def test_transducer_intersection(self):
        self.assertEqual(self.intersection_test_transducer, get_pickle("intersection_test_transducer"))

    def test_transducer_clear_dead_states(self):
        transducer = Transducer(self.feature_table.get_segments())
        state1 = State('q1')
        state2 = State('q2')
        state3 = State('q3')
        state4 = State('q4')
        transducer.add_state(state1)
        transducer.add_state(state2)
        transducer.add_state(state3)
        transducer.add_state(state4)
        transducer.initial_state = state1
        transducer.add_final_state(state2)
        transducer.add_arc(Arc(state1, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state2))
        transducer.add_arc(Arc(state1, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state1))
        transducer.add_arc(Arc(state2, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state2))
        transducer.add_arc(Arc(state3, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state3))
        transducer.add_arc(Arc(state4, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state3))
        transducer.clear_dead_states()
        self.assertEqual(transducer, get_pickle("clear_dead_states_test_transducer"))

    def test_get_arcs_by_origin_state(self):
        initial_state = self.intersection_test_transducer.initial_state
        arc_list = self.intersection_test_transducer.get_arcs_by_origin_state(initial_state)
        pickled_arc_list = get_pickle("get_arcs_by_origin_state_arc_list")
        self.assertTrue(_are_lists_equal(arc_list, pickled_arc_list))

    def test_get_arcs_by_terminal_state(self):
        initial_state = self.intersection_test_transducer.initial_state
        arc_list = self.intersection_test_transducer.get_arcs_by_origin_state(initial_state)
        pickled_arc_list = get_pickle("get_arcs_by_terminal_state_arc_list")
        self.assertTrue(_are_lists_equal(arc_list, pickled_arc_list))

    def test_get_range(self):
        pass  # see TestingParserSuite.test_geneare

    #State tests:
    def test_state_str(self):
        self.assertEqual(str(self.state1), "(q1,0)")

    def test_states_addition(self):
        new_state = State.states_addition(self.state1, self.state2)
        self.assertEqual(str(new_state), "(q1|q2,0)")
        new_state = State.states_addition(self.state1, self.state2)
        self.assertEqual(str(new_state), "(q1|q2,0)")

    #Arcs tests:
    def test_arc_str(self):
        self.assertEqual(str(self.arc), "['(q1,0)', 'a', 'b', '[0, 1, 0]', '(q2,0)']")

    #CostVector tests:
    def test_costVector_operations(self):
        self.assertEqual(self.cost_vector1 + self.cost_vector2, CostVector([5, 1, 0]))
        self.assertEqual(self.cost_vector1 * self.cost_vector2, CostVector([3, 1, 0, 2, 0, 0]))
        self.assertEqual(self.cost_vector1 - self.cost_vector2, CostVector([1, 1, 0]))

    def test_costVector_comparison(self):
        self.assertTrue(CostVector([0, 0, 0, 0, 0]) > CostVector([0, 0, 1, 0, 0]))
        self.assertFalse(CostVector([1, 0, 1]) > CostVector([0, 2, 0]))
        self.assertTrue(CostVector([1000, 0, 76]) > CostVector.get_inf_vector())
        self.assertFalse(CostVector.get_inf_vector() > CostVector([0, 1, 2]))
        self.assertFalse(CostVector.get_inf_vector() > CostVector.get_inf_vector())

    def test_costVector_get_vector_with_size_n_and_number_m(self):
        self.assertEqual(CostVector.get_vector(4, 0), CostVector([0, 0, 0, 0]))
        self.assertEqual(CostVector.get_vector(1, 1), CostVector([1]))
        self.assertEqual(CostVector.get_vector(0, 0), CostVector([]))
        self.assertEqual(CostVector.get_empty_vector(), CostVector([]))

    def test_costVector_str(self):
        self.assertEqual(str(CostVector([1, 1, 0])), "[1, 1, 0]")

    def test_costVector_illegal_operation(self):
        with self.assertRaises(CostVectorOperationError):
            CostVector([1,1]) + CostVector([1])

    def test_costVector_concatenation_with_empty_vector(self):
        cost_vector3 = CostVector([])
        self.assertEqual(self.cost_vector1 * cost_vector3, CostVector([3, 1, 0]))
        self.assertEqual(cost_vector3 * self.cost_vector1, CostVector([3, 1, 0]))