Exemple #1
0
def time_test():
    blank = 0
    batch_size = 32
    vocab_size = 30
    input_len = 400
    output_len = 80
    acts = np.random.rand(batch_size, input_len, output_len + 1, vocab_size)
    labels = np.random.randint(1, vocab_size, (batch_size, output_len))

    acts = torch.FloatTensor(acts)
    lengths = [acts.shape[1]] * acts.shape[0]
    label_lengths = [len(l) for l in labels]
    labels = np.array([l for label in labels for l in label])
    labels = torch.IntTensor(labels)
    lengths = torch.IntTensor(lengths)
    label_lengths = torch.IntTensor(label_lengths)
    log_probs = nn.functional.log_softmax(acts, dim=3)

    start = time.time()
    iters = 10
    for _ in range(iters):
        tfn = Transducer(blank_label=0)
        costs = tfn.apply(log_probs, labels, lengths, label_lengths)
    end = time.time()

    print("Time per iteration: {:.3f}(s)".format((end-start)/iters))
    def test_fwd_trivial(self):
        T = 3
        N = 2
        emissions = torch.FloatTensor([1.0, 0.0, 0.0, 1.0, 1.0,
                                       0.0]).view(1, T, N)
        log_probs = torch.log(emissions)

        # Check without blank:
        labels = [[0, 1, 0]]
        transducer = Transducer(tokens=["a", "b"],
                                graphemes_to_idx={
                                    "a": 0,
                                    "b": 1
                                })
        self.assertAlmostEqual(transducer(log_probs, labels).item(), 0.0)

        # Check with blank:
        labels = [[0, 0]]
        transducer = Transducer(tokens=["a"],
                                graphemes_to_idx={"a": 0},
                                blank="optional")
        self.assertAlmostEqual(transducer(log_probs, labels).item(), 0.0)

        # Check with repeats not allowed:
        labels = [[0, 0]]
        transducer = Transducer(
            tokens=["a"],
            graphemes_to_idx={"a": 0},
            blank="optional",
            allow_repeats=False,
        )
        self.assertAlmostEqual(transducer(log_probs, labels).item(), 0.0)
Exemple #3
0
    def _construct_hypothesis(self):
        """
        Utilize the observation table to construct a Mealy Machine.

        Returns:
            Transducer: A mealy machine build based on a closed and consistent
            observation table.
        """
        mm = Transducer()
        for access_string in self.ot.access_strings:
            for i in self.I:
                dst = self.ot.equiv_classes[access_string + (i, )]
                # If dst == None then the table is not closed.
                if dst is None:
                    logging.debug('Conjecture attempt on non closed table.')
                    return None
                out = self.ot[access_string, (i, )]
                src_id = self.ot.access_strings.index(access_string)
                dst_id = self.ot.access_strings.index(dst)
                if not self.ot[access_string, (i, )]:
                    out = [EPSILON]
                else:
                    out = [int(x) for x in self.ot[access_string, (i, )]]
                mm.add_arc(src_id, dst_id, [int(i)], out)

        # This is for format compatibility with the DFA/SFAs.
        for state in mm.states:
            state.final = True
        return mm
Exemple #4
0
    def _construct_hypothesis(self):
        """
        Utilize the observation table to construct a Mealy Machine.

        Returns:
            Transducer: A mealy machine build based on a closed and consistent
            observation table.
        """
        mm = Transducer()
        for access_string in self.ot.access_strings:
            for i in self.I:
                dst = self.ot.equiv_classes[access_string + (i,)]
                # If dst == None then the table is not closed.
                if dst is None:
                    logging.debug('Conjecture attempt on non closed table.')
                    return None
                out = self.ot[access_string, (i, )]
                src_id = self.ot.access_strings.index(access_string)
                dst_id = self.ot.access_strings.index(dst)
                if not self.ot[access_string, (i, )]:
                    out = [EPSILON]
                else:
                    out = [int(x) for x in self.ot[access_string, (i, )]]
                mm.add_arc(src_id, dst_id, [int(i)], out)

        # This is for format compatibility with the DFA/SFAs.
        for state in mm.states:
            state.final = True
        return mm
Exemple #5
0
    def setUp(self):
        self.feature_table = FeatureTable.load(get_feature_table_fixture("feature_table.json"))
        self.phonotactic_test_feature_table = FeatureTable.load(get_feature_table_fixture(
            "phonotactic_test_feature_table.json"))
        self.transducer = Transducer(self.feature_table.get_segments())
        self.state1 = State('q1')
        self.state2 = State('q2')
        self.transducer.add_state(self.state1)
        self.transducer.add_state(self.state2)
        self.transducer.initial_state = self.state1
        self.transducer.add_final_state(self.state2)
        self.cost_vector1 = CostVector([3, 1, 0])
        self.cost_vector2 = CostVector([2, 0, 0])
        self.arc = Arc(self.state1, Segment('a', self.feature_table), Segment('b', self.feature_table), CostVector([0, 1, 0]), self.state2)
        self.transducer.add_arc(self.arc)

        self.simple_transducer = self.transducer
        self.loops_transducer = deepcopy(self.transducer)
        zero_cost_vector = CostVector([0])
        segment_a = Segment('a', self.feature_table)
        segment_b = Segment('b', self.feature_table)
        self.loops_transducer.add_arc(Arc(self.state1, JOKER_SEGMENT, segment_a, zero_cost_vector, self.state1))
        self.loops_transducer.add_arc(Arc(self.state1, JOKER_SEGMENT, segment_b, zero_cost_vector,self.state1))
        self.loops_transducer.add_arc(Arc(self.state2, NULL_SEGMENT, segment_a, zero_cost_vector,self.state2))
        self.loops_transducer.add_arc(Arc(self.state2, NULL_SEGMENT, segment_b, zero_cost_vector,self.state2))

        phonotactic = PhonotacticConstraint([{'cons': '+'}, {'voice': '+'}, {'labial': '+'}],
                                                         self.phonotactic_test_feature_table).get_transducer()
        dep = DepConstraint([{'labial': '-'}], self.phonotactic_test_feature_table).get_transducer()
        max = MaxConstraint([{'voice': '-'}], self.phonotactic_test_feature_table).get_transducer()

        self.intersection_test_transducer = Transducer.intersection(phonotactic, dep, max)
Exemple #6
0
    def test_transducer_equality(self):
        feature_table = FeatureTable.load(get_feature_table_fixture("a_b_and_cons_feature_table.json"))
        faith = FaithConstraint([],feature_table).get_transducer()
        phonotactic = PhonotacticConstraint([{'cons': '+'}], feature_table).get_transducer()
        max = MaxConstraint([{'cons': '+'}], feature_table).get_transducer()
        transducer1 = Transducer.intersection(faith, phonotactic, max)
        temp_transducer = Transducer.intersection(phonotactic, max)
        transducer2 = Transducer.intersection(faith, temp_transducer)

        self.assertEqual(transducer1, transducer2)
 def __init__(self, filename):
     '''Read a transducer from filename
     '''
     handle = open(filename, "rb")
     self.header = Header(handle)
     self.alphabet = Alphabet(handle, self.header.number_of_symbols)
     if self.header.weighted:
         self.transducer = TransducerW(handle, self.header, self.alphabet)
     else:
         self.transducer = Transducer(handle, self.header, self.alphabet)
     handle.close()
Exemple #8
0
 def __init__(self, filename):
     '''Read a transducer from filename
     '''
     handle = open(filename, "rb")
     self.header = Header(handle)
     self.alphabet = Alphabet(handle, self.header.number_of_symbols)
     if self.header.weighted:
         self.transducer = TransducerW(handle, self.header, self.alphabet)
     else:
         self.transducer = Transducer(handle, self.header, self.alphabet)
     handle.close()
def launch_decoder_through_transducer(coder):
    print("Listening...")
    tranceducer = Transducer(mode=2, debug=0, coder=coder)
    while (True):
        data = tranceducer.receive()
        if len(data) > 0:
            # print("Transducer::Decode Decimalize: %s" % [int(d) for d in data])
            # print("Transducer::Decode Binarize: %s" % [format(int(d), 'b') for d in data])
            data_string = data_to_ascii_string(data)
            print("Decoded: %s" % data_string)
            operate_keyboard_if_necessary(data_string)
Exemple #10
0
def main():
    """
    Simple interface to convert transducers from text format to BEK programs
    """
    filename = 'transducer.txt'
    if len(argv) > 1:
        filename = argv[1]

    trans = Transducer()
    trans.load(filename)

    bek = BekProgram()
    bek.create_from_transducer(trans)
    print bek.bek_program
Exemple #11
0
def main():
    """
    Simple interface to convert transducers from text format to BEK programs
    """
    filename = 'transducer.txt'
    if len(argv) > 1:
        filename = argv[1]

    trans = Transducer()
    trans.load(filename)

    bek = BekProgram()
    bek.create_from_transducer(trans)
    print bek.bek_program
Exemple #12
0
    def _make_transducer(self):
        segments = self.feature_table.get_segments()
        transducer = Transducer(segments, length_of_cost_vectors=0)
        word_segments = self.get_segments()
        n = len(self.word_string)
        states = [State("q{}".format(i), i) for i in range(n+1)]
        for i, state in enumerate(states):
            transducer.add_state(state)
            transducer.add_arc(Arc(state, NULL_SEGMENT, JOKER_SEGMENT, CostVector.get_empty_vector(), state))
            if i != n:
                transducer.add_arc(Arc(states[i], word_segments[i], JOKER_SEGMENT, CostVector.get_empty_vector(), states[i+1]))

        transducer.initial_state = states[0]
        transducer.add_final_state(states[n])
        return transducer
 def test_backoff_transitions(self):
     transitions = gtn.loadtxt("trans_backoff_test.txt")
     T = 4
     N = 5
     inputs = torch.randn(1, T, N, dtype=torch.float, requires_grad=True)
     labels = [[0, 1, 0]]
     tokens = [(n, ) for n in range(N)]
     graphemes_to_idx = {n: n for n in range(N)}
     transducer = Transducer(
         tokens=tokens,
         graphemes_to_idx=graphemes_to_idx,
         blank="optional",
         allow_repeats=False,
         transitions=transitions,
     )
     loss = transducer(inputs, labels)
     loss.backward()
     trans_p = transducer.transition_params
     analytic_grad = trans_p.grad
     epsilon = 1e-3
     numerical_grad = []
     with torch.no_grad():
         for i in range(trans_p.numel()):
             transducer.transition_params.data[i] += epsilon
             loss_up = transducer(inputs, labels).item()
             transducer.transition_params.data[i] -= 2 * epsilon
             loss_down = transducer(inputs, labels).item()
             numerical_grad.append((loss_up - loss_down) / (2 * epsilon))
             transducer.transition_params.data[i] += epsilon
     numerical_grad = torch.tensor(numerical_grad)
     self.assertTrue(
         torch.allclose(analytic_grad, numerical_grad, rtol=1e-3,
                        atol=1e-3))
Exemple #14
0
def reference_rnnt_loss(input_data, target_data, input_lengths,
                        target_lengths):
    """ runs reference RNN-T code for given input data """
    tfn = Transducer(blank_label=0)
    cost, grads_wlogits, grads_wlogprobs = wrap_and_call(
        tfn, input_data, input_lengths, target_data, target_lengths)
    return cost, grads_wlogits, grads_wlogprobs
def make_optimal_paths(transducer_input):
    transducer = pickle.loads(pickle.dumps(transducer_input, -1))
    alphabet = transducer.get_alphabet()
    new_arcs = list()
    for segment in alphabet:
        word = Word(segment.get_symbol())
        word_transducer = word.get_transducer()
        #print(word_transducer.dot_representation())
        intersected_machine = Transducer.intersection(word_transducer, transducer)
        states = transducer.get_states()
        for state1, state2 in itertools.product(states, states):
            initial_state = word_transducer.initial_state & state1
            final_state = word_transducer.get_a_final_state() & state2
            temp_transducer = pickle.loads(pickle.dumps(intersected_machine, -1))
            temp_transducer.initial_state = initial_state
            temp_transducer.set_final_state(final_state)
            temp_transducer.clear_dead_states()
            if final_state in temp_transducer.get_final_states():  # otherwise no path.
                try:
                    temp_transducer = remove_suboptimal_paths(temp_transducer)
                    range = temp_transducer.get_range()
                    arc = Arc(state1, segment, range, _get_path_cost(temp_transducer), state2)
                    new_arcs.append(arc)
                except KeyError:
                    pass
                #print("****")
                #print(temp_transducer.dot_representation())

    transducer.set_arcs(new_arcs)
    return transducer
Exemple #16
0
def small_test():
    acts = np.array([[[0.1, 0.6, 0.1, 0.1, 0.1],
                      [0.1, 0.1, 0.6, 0.1, 0.1],
                      [0.1, 0.1, 0.2, 0.8, 0.1]],
                     [[0.1, 0.6, 0.1, 0.1, 0.1],
                      [0.1, 0.1, 0.2, 0.1, 0.1],
                      [0.7, 0.1, 0.2, 0.1, 0.1]]])
    labels = [[1, 2]]
    print("Acts.shape", acts.shape)
    acts = acts[None, ...]
    print("Acts.shape", acts.shape)

    tfn = Transducer(blank_label=0)
    cost, grads = wrap_and_call(tfn, acts, labels)
    expected_cost = 4.495666
    expected_grads = np.array([[[-0.308198071906, -0.6918019280939998, 0.0, 0.0, 0.0],
                                [-0.308198071906, 0.0, -0.3836038561880001, 0.0, 0.0],
                                [-0.3836038561880001, 0.0, 0.0, 0.0, 0.0]],
                               [[0.0, -0.308198071906, 0.0, 0.0, 0.0],
                                [0.0, 0.0, -0.6163961438119995, 0.0, 0.0],
                                [-0.9999999999999991, 0.0, 0.0, 0.0, 0.0]]])
    assert np.allclose(cost, expected_cost, rtol=1e-6), \
        "small_test costs mismatch."
    assert np.allclose(grads, expected_grads), \
        "small_test gradient mismatch."
    def test_simple_decomposition(self):
        T = 5
        tokens = ["a", "b", "ab", "ba", "aba"]
        scores = torch.randn((1, T, len(tokens)), requires_grad=True)
        labels = [[0, 1, 0]]
        transducer = Transducer(tokens=tokens,
                                graphemes_to_idx={
                                    "a": 0,
                                    "b": 1
                                })

        # Hand construct the alignment graph with all of the decompositions
        alignments = gtn.Graph(False)
        alignments.add_node(True)

        # Add the path ['a', 'b', 'a']
        alignments.add_node()
        alignments.add_arc(0, 1, 0)
        alignments.add_arc(1, 1, 0)
        alignments.add_node()
        alignments.add_arc(1, 2, 1)
        alignments.add_arc(2, 2, 1)
        alignments.add_node(False, True)
        alignments.add_arc(2, 3, 0)
        alignments.add_arc(3, 3, 0)

        # Add the path ['a', 'ba']
        alignments.add_node(False, True)
        alignments.add_arc(1, 4, 3)
        alignments.add_arc(4, 4, 3)

        # Add the path ['ab', 'a']
        alignments.add_node()
        alignments.add_arc(0, 5, 2)
        alignments.add_arc(5, 5, 2)
        alignments.add_arc(5, 3, 0)

        # Add the path ['aba']
        alignments.add_node(False, True)
        alignments.add_arc(0, 6, 4)
        alignments.add_arc(6, 6, 4)

        emissions = gtn.linear_graph(T, len(tokens), True)

        emissions.set_weights(scores.data_ptr())
        expected_loss = gtn.subtract(
            gtn.forward_score(emissions),
            gtn.forward_score(gtn.intersect(emissions, alignments)),
        )

        loss = transducer(scores, labels)
        self.assertAlmostEqual(loss.item(), expected_loss.item(), places=5)
        loss.backward()
        gtn.backward(expected_loss)

        expected_grad = torch.tensor(emissions.grad().weights_to_numpy())
        expected_grad = expected_grad.view((1, T, len(tokens)))
        self.assertTrue(
            torch.allclose(scores.grad, expected_grad, rtol=1e-4, atol=1e-5))
Exemple #18
0
    def _get_outputs(self, word):
        grammar_transducer = self.get_transducer()
        word_transducer = word.get_transducer()
        write_to_dot(grammar_transducer, "grammar_transducer")
        write_to_dot(word_transducer, "word_transducer")
        intersected_transducer = Transducer.intersection(word_transducer,    # a transducer with NULLs on inputs and JOKERs on outputs
                                                         grammar_transducer) # a transducer with segments on inputs and sets on outputs

        intersected_transducer.clear_dead_states()
        intersected_transducer = optimize_transducer_grammar_for_word(word, intersected_transducer)
        outputs = intersected_transducer.get_range()
        return outputs
    def _get_outputs(self, word):
        grammar_transducer = self.get_transducer()
        word_transducer = word.get_transducer()
        intersected_transducer = Transducer.intersection(
            word_transducer,  # a transducer with NULLs on inputs and JOKERs on outputs
            grammar_transducer
        )  # a transducer with segments on inputs and sets on outputs

        intersected_transducer.clear_dead_states()
        intersected_transducer = optimize_transducer_grammar_for_word(
            word, intersected_transducer)
        #dot(intersected_transducer, 'intersected')
        outputs = intersected_transducer.get_range()
        return outputs
 def test_fwd(self):
     T = 3
     N = 4
     labels = [[1, 2]]
     emissions = torch.FloatTensor([1.0] * T * N).view(1, T, N)
     log_probs = torch.log(emissions)
     log_probs = torch.nn.functional.log_softmax(torch.log(emissions), 2)
     transducer = Transducer(
         tokens=["a", "b", "c"],
         graphemes_to_idx={
             "a": 0,
             "b": 1,
             "c": 2
         },
         blank="optional",
     )
     fwd = transducer(log_probs, labels)
     self.assertAlmostEqual(fwd.item(), -math.log(0.25 * 0.25 * 0.25 * 5))
class OlTransducer:
    def __init__(self, filename):
        '''Read a transducer from filename
        '''
        handle = open(filename, "rb")
        self.header = Header(handle)
        self.alphabet = Alphabet(handle, self.header.number_of_symbols)
        if self.header.weighted:
            self.transducer = TransducerW(handle, self.header, self.alphabet)
        else:
            self.transducer = Transducer(handle, self.header, self.alphabet)
        handle.close()
    def analyse(self, string):
        '''Take string to analyse, return a vector of (string, weight) pairs.
        '''
        if self.transducer.analyze(string):
            return self.transducer.displayVector
        else:
            return []
    def test_ctc_compare(self):
        T = 20
        N = 15
        B = 5
        tgt = [
            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
            [1, 1],
            [0, 2, 3],
            [0, 0, 0, 0, 0],
            [0, 4, 8, 12],
        ]

        tokens = list((t, ) for t in range(N - 1))
        graphemes_to_idx = {t: t for t in range(N - 1)}
        inputs = torch.randn(B, T, N, dtype=torch.float, requires_grad=True)

        # With and without target length reduction:
        for reduction in ["none", "mean"]:
            transducer = Transducer(
                tokens=tokens,
                graphemes_to_idx=graphemes_to_idx,
                blank="optional",
                allow_repeats=False,
                reduction=reduction,
            )
            ctc_inputs = torch.nn.functional.log_softmax(inputs, 2)
            ctc_result = CTCLoss(ctc_inputs, tgt, N - 1, reduction)
            ctc_result.backward()
            ctc_grad = inputs.grad
            inputs.grad = None

            transducer_result = transducer(inputs, tgt)
            transducer_result.backward()
            transducer_grad = inputs.grad
            inputs.grad = None

            self.assertAlmostEqual(ctc_result.item(),
                                   transducer_result.item(),
                                   places=4)
            self.assertTrue(
                torch.allclose(ctc_grad, transducer_grad, rtol=1e-4,
                               atol=1e-5))
Exemple #23
0
class OlTransducer:
    def __init__(self, filename):
        '''Read a transducer from filename
        '''
        handle = open(filename, "rb")
        self.header = Header(handle)
        self.alphabet = Alphabet(handle, self.header.number_of_symbols)
        if self.header.weighted:
            self.transducer = TransducerW(handle, self.header, self.alphabet)
        else:
            self.transducer = Transducer(handle, self.header, self.alphabet)
        handle.close()

    def analyse(self, string):
        '''Take string to analyse, return a vector of (string, weight) pairs.
        '''
        if self.transducer.analyze(string):
            return self.transducer.displayVector
        else:
            return []
 def test_asg_viterbi(self):
     T = 4
     N = 3
     inputs = torch.tensor([0, 0, 7, 5, 4, 3, 5, 8, 5, 5, 4, 3],
                           dtype=torch.float32).view(1, T, N)
     transitions = torch.tensor([0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 0, 0],
                                dtype=torch.float32)
     expected_path = [2, 1, 0]
     tokens = [(n, ) for n in range(N)]
     graphemes_to_idx = {n: n for n in range(N)}
     asg_transitions = ASGLossFunction.create_transitions_graph(
         torch.zeros(N + 1, N))
     transducer = Transducer(
         tokens=tokens,
         graphemes_to_idx=graphemes_to_idx,
         transitions=asg_transitions,
     )
     transducer.transition_params.data = transitions
     path = transducer.viterbi(inputs)[0].tolist()
     self.assertTrue(path == expected_path)
Exemple #25
0
def big_test():

    # minibatch x T x U x alphabet_size
    activations = [
            [[[0.06535690384862791, 0.7875301411923206, 0.08159176605666074],
              [0.5297155426466327, 0.7506749639230854, 0.7541348379087998],
              [0.6097641124736383, 0.8681404965673826, 0.6225318186056529]],

             [[0.6685222872103057, 0.8580392805336061, 0.16453892311765583],
              [0.989779515236694, 0.944298460961015, 0.6031678586829663],
              [0.9467833543605416, 0.666202507295747, 0.28688179752461884]],

             [[0.09418426230195986, 0.3666735970751962, 0.736168049462793],
              [0.1666804425271342, 0.7141542198635192, 0.3993997272216727],
              [0.5359823524146038, 0.29182076440286386, 0.6126422611507932]],

             [[0.3242405528768486, 0.8007644367291621, 0.5241057606558068],
              [0.779194617063042, 0.18331417220174862, 0.113745182072432],
              [0.24022162381327106, 0.3394695622533106, 0.1341595066017014]]],


            [[[0.5055615569388828, 0.051597282072282646, 0.6402903936686337],
              [0.43073311517251, 0.8294731834714112, 0.1774668847323424],
              [0.3207001991262245, 0.04288308912457006, 0.30280282975568984]],

             [[0.6751777088333762, 0.569537369330242, 0.5584738347504452],
              [0.08313242153985256, 0.06016544344162322, 0.10795752845152584],
              [0.7486153608562472, 0.943918041459349, 0.4863558118797222]],

             [[0.4181986264486809, 0.6524078485043804, 0.024242983423721887],
              [0.13458171554507403, 0.3663418070512402, 0.2958297395361563],
              [0.9236695822497084, 0.6899291482654177, 0.7418981733448822]],

             [[0.25000547599982104, 0.6034295486281007, 0.9872887878887768],
              [0.5926057265215715, 0.8846724004467684, 0.5434495396894328],
              [0.6607698886038497, 0.3771277082495921, 0.3580209022231813]]]]
    print("Acts2", len(activations), len(activations[0]), len(activations[0][0]), len(activations[0][0][0]))

    expected_costs = [4.2806528590890736, 3.9384369822503591]
    expected_grads = [
            [[[-0.4322264564338117, -0.5677735435661883, 0.0],
              [-0.36565009313836844, 0.0, -0.20212345042782007],
              [-0.20212345042782007, 0.0, 0.0]],

             [[-0.16521672442463506, -0.2670097320091765, 0.0],
              [-0.3943653886107811, 0.0, -0.2382944365367636],
              [-0.44041788696458367, 0.0, 0.0]],

             [[-0.052129794015740985, -0.11308693040889405, 0.0],
              [-0.18313786985332664, 0.0, -0.3243144491663483],
              [-0.7647323361309323, 0.0, 0.0]],

             [[0.0, -0.052129794015740985, 0.0],
              [0.0, 0.0, -0.23526766386906767],
              [-1.0, 0.0, 0.0]]],

            [[[-0.7161424128232795, -0.2838575871767207, 0.0],
              [-0.18382932237365335, -0.10002826480306751, 0.0],
              [-0.10002826480306751, 0.0, 0.0]],

             [[-0.41121794618117213, -0.3049244666421072, 0.0],
              [-0.3295759402552584, -0.15917784876050195, 0.0],
              [-0.2592061135635692, 0.0, 0.0]],

             [[-0.11607642141651396, -0.29514152476465827, 0.0],
              [-0.2865333615432337, -0.3381841034766833, 0.0],
              [-0.5973902170402529, 0.0, 0.0]],

             [[0.0, -0.11607642141651396, 0.0],
              [0.0, -0.4026097829597475, 0.0],
              [-1.0, 0.0, 0.0]]]]

    activations = np.array(activations)
    labels = [[1, 2],
              [1, 1]]

    tfn = Transducer(blank_label=0)
    costs, grads = wrap_and_call(tfn, activations, labels)

    assert np.allclose(costs, expected_costs), \
        "big_test average costs mismatch."

    assert np.allclose(grads, expected_grads), \
        "big_test grads for average cost mismatch."
Exemple #26
0
class TestTransducer(unittest.TestCase):

    def setUp(self):
        self.feature_table = FeatureTable.load(get_feature_table_fixture("feature_table.json"))
        self.phonotactic_test_feature_table = FeatureTable.load(get_feature_table_fixture(
            "phonotactic_test_feature_table.json"))
        self.transducer = Transducer(self.feature_table.get_segments())
        self.state1 = State('q1')
        self.state2 = State('q2')
        self.transducer.add_state(self.state1)
        self.transducer.add_state(self.state2)
        self.transducer.initial_state = self.state1
        self.transducer.add_final_state(self.state2)
        self.cost_vector1 = CostVector([3, 1, 0])
        self.cost_vector2 = CostVector([2, 0, 0])
        self.arc = Arc(self.state1, Segment('a', self.feature_table), Segment('b', self.feature_table), CostVector([0, 1, 0]), self.state2)
        self.transducer.add_arc(self.arc)

        self.simple_transducer = self.transducer
        self.loops_transducer = deepcopy(self.transducer)
        zero_cost_vector = CostVector([0])
        segment_a = Segment('a', self.feature_table)
        segment_b = Segment('b', self.feature_table)
        self.loops_transducer.add_arc(Arc(self.state1, JOKER_SEGMENT, segment_a, zero_cost_vector, self.state1))
        self.loops_transducer.add_arc(Arc(self.state1, JOKER_SEGMENT, segment_b, zero_cost_vector,self.state1))
        self.loops_transducer.add_arc(Arc(self.state2, NULL_SEGMENT, segment_a, zero_cost_vector,self.state2))
        self.loops_transducer.add_arc(Arc(self.state2, NULL_SEGMENT, segment_b, zero_cost_vector,self.state2))

        phonotactic = PhonotacticConstraint([{'cons': '+'}, {'voice': '+'}, {'labial': '+'}],
                                                         self.phonotactic_test_feature_table).get_transducer()
        dep = DepConstraint([{'labial': '-'}], self.phonotactic_test_feature_table).get_transducer()
        max = MaxConstraint([{'voice': '-'}], self.phonotactic_test_feature_table).get_transducer()

        self.intersection_test_transducer = Transducer.intersection(phonotactic, dep, max)


    #Transducer tests:
    def test_transducer_equality(self):
        feature_table = FeatureTable.load(get_feature_table_fixture("a_b_and_cons_feature_table.json"))
        faith = FaithConstraint([],feature_table).get_transducer()
        phonotactic = PhonotacticConstraint([{'cons': '+'}], feature_table).get_transducer()
        max = MaxConstraint([{'cons': '+'}], feature_table).get_transducer()
        transducer1 = Transducer.intersection(faith, phonotactic, max)
        temp_transducer = Transducer.intersection(phonotactic, max)
        transducer2 = Transducer.intersection(faith, temp_transducer)

        self.assertEqual(transducer1, transducer2)
        #write_to_dot_to_file(transducer1, "transducer1")
        #write_to_dot_to_file(transducer2, "transducer2")




    #one with constraint set

    #create with manual intersection


    def test_transducer_equality_with_deepcopy(self):
        phonotactic_transducer = PhonotacticConstraint([{'cons': '+'}, {'voice': '+'}, {'labial': '+'}],
                                                         self.phonotactic_test_feature_table).get_transducer()
        phonotactic_transducer_copy = deepcopy(phonotactic_transducer)
        self.assertEqual(phonotactic_transducer, phonotactic_transducer_copy)

    def test_transducer_equality_with_pickle(self):
        phonotactic_transducer = PhonotacticConstraint([{'cons': '+'}, {'voice': '+'}, {'labial': '+'}],
                                                         self.phonotactic_test_feature_table).get_transducer()
        pickled_phonotactic_transducer = get_pickle("equality_with_pickle_transducer")
        phonotactic_transducer == pickled_phonotactic_transducer
        self.assertEqual(phonotactic_transducer, pickled_phonotactic_transducer)

    def test_transducer_intersection(self):
        self.assertEqual(self.intersection_test_transducer, get_pickle("intersection_test_transducer"))

    def test_transducer_clear_dead_states(self):
        transducer = Transducer(self.feature_table.get_segments())
        state1 = State('q1')
        state2 = State('q2')
        state3 = State('q3')
        state4 = State('q4')
        transducer.add_state(state1)
        transducer.add_state(state2)
        transducer.add_state(state3)
        transducer.add_state(state4)
        transducer.initial_state = state1
        transducer.add_final_state(state2)
        transducer.add_arc(Arc(state1, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state2))
        transducer.add_arc(Arc(state1, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state1))
        transducer.add_arc(Arc(state2, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state2))
        transducer.add_arc(Arc(state3, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state3))
        transducer.add_arc(Arc(state4, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state3))
        transducer.clear_dead_states()
        self.assertEqual(transducer, get_pickle("clear_dead_states_test_transducer"))

    def test_get_arcs_by_origin_state(self):
        initial_state = self.intersection_test_transducer.initial_state
        arc_list = self.intersection_test_transducer.get_arcs_by_origin_state(initial_state)
        pickled_arc_list = get_pickle("get_arcs_by_origin_state_arc_list")
        self.assertTrue(_are_lists_equal(arc_list, pickled_arc_list))

    def test_get_arcs_by_terminal_state(self):
        initial_state = self.intersection_test_transducer.initial_state
        arc_list = self.intersection_test_transducer.get_arcs_by_origin_state(initial_state)
        pickled_arc_list = get_pickle("get_arcs_by_terminal_state_arc_list")
        self.assertTrue(_are_lists_equal(arc_list, pickled_arc_list))

    def test_get_range(self):
        pass  # see TestingParserSuite.test_geneare

    #State tests:
    def test_state_str(self):
        self.assertEqual(str(self.state1), "(q1,0)")

    def test_states_addition(self):
        new_state = State.states_addition(self.state1, self.state2)
        self.assertEqual(str(new_state), "(q1|q2,0)")
        new_state = State.states_addition(self.state1, self.state2)
        self.assertEqual(str(new_state), "(q1|q2,0)")

    #Arcs tests:
    def test_arc_str(self):
        self.assertEqual(str(self.arc), "['(q1,0)', 'a', 'b', '[0, 1, 0]', '(q2,0)']")

    #CostVector tests:
    def test_costVector_operations(self):
        self.assertEqual(self.cost_vector1 + self.cost_vector2, CostVector([5, 1, 0]))
        self.assertEqual(self.cost_vector1 * self.cost_vector2, CostVector([3, 1, 0, 2, 0, 0]))
        self.assertEqual(self.cost_vector1 - self.cost_vector2, CostVector([1, 1, 0]))

    def test_costVector_comparison(self):
        self.assertTrue(CostVector([0, 0, 0, 0, 0]) > CostVector([0, 0, 1, 0, 0]))
        self.assertFalse(CostVector([1, 0, 1]) > CostVector([0, 2, 0]))
        self.assertTrue(CostVector([1000, 0, 76]) > CostVector.get_inf_vector())
        self.assertFalse(CostVector.get_inf_vector() > CostVector([0, 1, 2]))
        self.assertFalse(CostVector.get_inf_vector() > CostVector.get_inf_vector())

    def test_costVector_get_vector_with_size_n_and_number_m(self):
        self.assertEqual(CostVector.get_vector(4, 0), CostVector([0, 0, 0, 0]))
        self.assertEqual(CostVector.get_vector(1, 1), CostVector([1]))
        self.assertEqual(CostVector.get_vector(0, 0), CostVector([]))
        self.assertEqual(CostVector.get_empty_vector(), CostVector([]))

    def test_costVector_str(self):
        self.assertEqual(str(CostVector([1, 1, 0])), "[1, 1, 0]")

    def test_costVector_illegal_operation(self):
        with self.assertRaises(CostVectorOperationError):
            CostVector([1,1]) + CostVector([1])

    def test_costVector_concatenation_with_empty_vector(self):
        cost_vector3 = CostVector([])
        self.assertEqual(self.cost_vector1 * cost_vector3, CostVector([3, 1, 0]))
        self.assertEqual(cost_vector3 * self.cost_vector1, CostVector([3, 1, 0]))
Exemple #27
0
            if c not in Sigma:
                Sigma[c] = len(Sigma)

    Sigma_inv = {}
    for x, y in Sigma.items():
        Sigma_inv[y] = x


    # test training data
    train = numerize(train_str, Sigma)

    # number of total insertions per string
    INSERTION_LIMIT = 3
    
    # transducer
    t = Transducer(len(Sigma), INSERTION_LIMIT)
    string1 = train[0][0]
    string2 = train[0][1]


    features = Features(Sigma, Sigma_inv)
    for upper, lower in train_str:
        #print upper, lower, len(features.features)
        features.extract(upper, URC=0, ULC=0, create=True)

    # get tensor
    # This is equivalent to the earlier tensor.
    # tensor_features is a list of sparse W tensors.
    # Every element of tensor_feature is 5 dimensional
    # where the first 4 are the same as the W tensor.
    # And the last dimension is feature_index into set of features.
Exemple #28
0
 def _make_transducer(self):
     if len(self.constraints) is 1:                             # if there is only on constraint in the
         return pickle.loads(pickle.dumps(self.constraints[0].get_transducer(), -1))  # constraint set there is no need to intersect
     else:
         constraints_transducers = [constraint.get_transducer() for constraint in self.constraints]
         return Transducer.intersection(*constraints_transducers)
Exemple #29
0
 def test_transducer_clear_dead_states(self):
     transducer = Transducer(self.feature_table.get_segments())
     state1 = State('q1')
     state2 = State('q2')
     state3 = State('q3')
     state4 = State('q4')
     transducer.add_state(state1)
     transducer.add_state(state2)
     transducer.add_state(state3)
     transducer.add_state(state4)
     transducer.initial_state = state1
     transducer.add_final_state(state2)
     transducer.add_arc(Arc(state1, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state2))
     transducer.add_arc(Arc(state1, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state1))
     transducer.add_arc(Arc(state2, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state2))
     transducer.add_arc(Arc(state3, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state3))
     transducer.add_arc(Arc(state4, JOKER_SEGMENT, NULL_SEGMENT, CostVector([]), state3))
     transducer.clear_dead_states()
     self.assertEqual(transducer, get_pickle("clear_dead_states_test_transducer"))
    def _make_transducer(self):
        segments = self.feature_table.get_segments()
        transducer = Transducer(segments, length_of_cost_vectors=0)
        word_segments = self.get_segments()
        n = len(self.word_string)
        states = [State("q{}".format(i), i) for i in range(n+1)]
        for i, state in enumerate(states):
            transducer.add_state(state)
            transducer.add_arc(Arc(state, NULL_SEGMENT, JOKER_SEGMENT, CostVector.get_empty_vector(), state))
            if i != n:
                transducer.add_arc(Arc(states[i], word_segments[i], JOKER_SEGMENT, CostVector.get_empty_vector(), states[i+1]))

        transducer.initial_state = states[0]
        transducer.add_final_state(states[n])
        return transducer
Exemple #31
0
            return []


if __name__ == "__main__":
    if len(sys.argv) != 2:
        print "Usage: python HfstRuntimeReader FILE"
        sys.exit()
    transducerfile = open(sys.argv[1], "rb")
    header = Header(transducerfile)
    print "header read"
    alphabet = Alphabet(transducerfile, header.number_of_symbols)
    print "alphabet read"
    if header.weighted:
        transducer = TransducerW(transducerfile, header, alphabet)
    else:
        transducer = Transducer(transducerfile, header, alphabet)
    print "transducer ready"
    print

    while True:
        try:
            string = raw_input()
        except EOFError:
            sys.exit(0)
        print string + ":"
        if transducer.analyze(string):
            transducer.printAnalyses()
            print
        else:
            # tokenization failed
            pass
    def test_viterbi(self):
        T = 5
        N = 4
        B = 2

        # fmt: off
        emissions1 = torch.tensor(
            (
                0,
                4,
                0,
                1,
                0,
                2,
                1,
                1,
                0,
                0,
                0,
                2,
                0,
                0,
                0,
                2,
                8,
                0,
                0,
                2,
            ),
            dtype=torch.float,
        ).view(T, N)
        emissions2 = torch.tensor(
            (
                0,
                2,
                1,
                7,
                0,
                2,
                9,
                1,
                0,
                0,
                0,
                2,
                0,
                0,
                5,
                2,
                1,
                0,
                0,
                2,
            ),
            dtype=torch.float,
        ).view(T, N)
        # fmt: on

        # Test without blank:
        labels = [[1, 3, 0], [3, 2, 3, 2, 3]]
        transducer = Transducer(
            tokens=["a", "b", "c", "d"],
            graphemes_to_idx={
                "a": 0,
                "b": 1,
                "c": 2,
                "d": 3
            },
            blank="none",
        )
        emissions = torch.stack([emissions1, emissions2], dim=0)
        predictions = transducer.viterbi(emissions)
        self.assertEqual([p.tolist() for p in predictions], labels)

        # Test with blank without repeats:
        labels = [[1, 0], [2, 2]]
        transducer = Transducer(
            tokens=["a", "b", "c"],
            graphemes_to_idx={
                "a": 0,
                "b": 1,
                "c": 2
            },
            blank="optional",
            allow_repeats=False,
        )
        emissions = torch.stack([emissions1, emissions2], dim=0)
        predictions = transducer.viterbi(emissions)
        self.assertEqual([p.tolist() for p in predictions], labels)
Exemple #33
0
    def _make_transducer(self):
        segments = self.feature_table.get_segments()
        transducer = Transducer(segments, name=str(self))

        state1 = State('Precede1')
        state2 = State('Precede2')   # After seeing +stress (now it is okay to see +vowel)
        transducer.add_state(state1)
        transducer.add_state(state2)
        transducer.initial_state = state1
        transducer.add_final_state(state1)
        transducer.add_final_state(state2)

        for segment in segments:
            segment_symbol = segment.get_symbol()
            if segment_symbol in yimas_vowels:   # segment is vowel
                transducer.add_arc(Arc(state1, JOKER_SEGMENT, segment, CostVector([1]), state1))
                transducer.add_arc(Arc(state2, JOKER_SEGMENT, segment, CostVector([0]), state2))
            elif segment_symbol == "'":  # segment is stress
                transducer.add_arc(Arc(state1, JOKER_SEGMENT, segment, CostVector([0]), state2))
                transducer.add_arc(Arc(state2, JOKER_SEGMENT, segment, CostVector([0]), state2))
            elif segment_symbol in yimas_cons:  # segment is consonant
                transducer.add_arc(Arc(state1, JOKER_SEGMENT, segment, CostVector([0]), state1))
                transducer.add_arc(Arc(state2, JOKER_SEGMENT, segment, CostVector([0]), state2))
            else:
                raise ConstraintError("{} not supported in this constraint".format(segment_symbol))
        for state in transducer.states:
            transducer.add_arc(Arc(state, JOKER_SEGMENT, NULL_SEGMENT, CostVector([0]), state))

        return transducer
Exemple #34
0
    def _make_transducer(self):

        def compute_num_of_max_satisfied_bundle(segment):
            i = 0
            while i < n and symbol_bundle_characteristic_matrix[segment][i]:
                i += 1
            return i

        def compute_highest_num_of_satisfied_bundle(segment, j):
            for k in range(j + 1, 0,-1):
                if symbol_bundle_characteristic_matrix[segment][k-1]:
                    return k
            else:
                return 0

        n = len(self.feature_bundles) - 1
        segments = self.feature_table.get_segments()
        transducer = Transducer(segments, name=str(self))

        symbol_bundle_characteristic_matrix = {segment: [segment.has_feature_bundle(self.feature_bundles[i])
                                                         for i in range(n+1)]
                                               for segment in segments}


        states = {i: {j: 0 for j in range(i)} for i in range(n+1)}

        initial_state = State('q0|0')    # here we use a tuple as label. it will change at the end of this function
        states[0][0] = initial_state

        transducer.set_as_single_state(initial_state)


        if not n:
            for segment in segments:
                transducer.add_arc(Arc(states[0][0], JOKER_SEGMENT, segment, CostVector([int(symbol_bundle_characteristic_matrix[segment][0])]), states[0][0]))
            transducer.add_arc(Arc(states[0][0], JOKER_SEGMENT, NULL_SEGMENT, CostVector([0]), states[0][0]))

        else:
            for i in range(0, n+1):
                for j in range(i):
                    state = State('q{0}|{1}'.format(i,j))
                    states[i][j] = state
                    transducer.add_state(state)
            max_num_of_satisfied_bundle_by_segment = {segment: compute_num_of_max_satisfied_bundle(segment)
                                                      for segment in segments}
            for segment in segments:
                transducer.add_arc(Arc(states[0][0], JOKER_SEGMENT, segment, CostVector([0]),
                                       states[symbol_bundle_characteristic_matrix[segment][0]][0]))
            for i in range(n+1):
                for j in range(i):
                    state = states[i][j]
                    transducer.add_final_state(state)
                    if i != n:
                        for segment in segments:
                            if symbol_bundle_characteristic_matrix[segment][i]:
                                new_state_level = i+1
                                new_state_mem = min([j+1, max_num_of_satisfied_bundle_by_segment[segment]])
                            else:
                                new_state_level = compute_highest_num_of_satisfied_bundle(segment, j)
                                new_state_mem = min([max_num_of_satisfied_bundle_by_segment[segment],
                                                     abs(new_state_level - 1)])
                            new_terminus = states[new_state_level][new_state_mem]
                            transducer.add_arc(Arc(state, JOKER_SEGMENT, segment, CostVector([0]), new_terminus))
                    else:  # i = n
                        for segment in segments:
                            new_state_level = compute_highest_num_of_satisfied_bundle(segment, j)
                            new_state_mem = min([max_num_of_satisfied_bundle_by_segment[segment],
                                                 abs(new_state_level - 1)])
                            new_terminus = states[new_state_level][new_state_mem]
                            transducer.add_arc(Arc(state, JOKER_SEGMENT, segment,
                                                   CostVector([int(symbol_bundle_characteristic_matrix[segment][i])]), new_terminus))

        transducer.clear_dead_states()
        for state in transducer.states:
            transducer.add_arc(Arc( state, JOKER_SEGMENT, NULL_SEGMENT, CostVector([0]), state))

        return transducer
def send_by_transducer(coder, target_str):
    print("Encoding...")
    tranceducer = Transducer(mode=1, debug=0, coder=coder)
    target_list = [ord(t) for t in target_str]
    tranceducer.send(target_list)
def write_by_transducer(coder, target_str, file_name):
    print("Encoding...")
    tranceducer = Transducer(mode=1, debug=0, coder=coder)
    target_list = [ord(t) for t in target_str]
    tranceducer.write_to_file(target_list, file_name)
def optimize_transducer_grammar_for_word(word, eval):
    states_by_index = {}
    for state in eval.states:
        if state.index in states_by_index.keys():
            states_by_index[state.index].append(state)
        else:
            states_by_index[state.index] = [state]

    arcs_by_index = {}
    for arc in eval._arcs:
        if arc.origin_state.index in arcs_by_index.keys():
            arcs_by_index[arc.origin_state.index].append(arc)
        else:
            arcs_by_index[arc.origin_state.index] = [arc]

    new_transducer = Transducer(eval.get_alphabet())

    state_costs = {}
    new_transducer.add_state(eval.initial_state)
    new_transducer.initial_state = eval.initial_state
    state_costs[eval.initial_state] = CostVector.get_vector(eval.get_length_of_cost_vectors(), 0)

    for index in range(len(word.get_segments())):
        new_arcs = _best_arcs(arcs_by_index[index], state_costs)
        for arc in new_arcs:
            new_transducer.add_arc(arc)
            new_transducer.add_state(arc.terminal_state)
            state_costs[arc.terminal_state] = state_costs[arc.origin_state] + arc.cost_vector

    new_final_states = [eval.final_states[0]]
    for state in eval.final_states[1:]:
        state_cost = state_costs[state]
        final_cost = state_costs[new_final_states[0]]
        if state_cost > final_cost:
            new_final_states = [state]
        elif state_cost == final_cost:
            new_final_states.append(state)

    for state in new_final_states:
        new_transducer.add_final_state(state)

    #new_transducer.clear_dead_states(with_impasse_states=True) #TODO give it a try

    return new_transducer
    def _make_transducer(self):
        def compute_num_of_max_satisfied_bundle(segment):
            i = 0
            while i < n and symbol_bundle_characteristic_matrix[segment][i]:
                i += 1
            return i

        def compute_highest_num_of_satisfied_bundle(segment, j):
            for k in range(j + 1, 0, -1):
                if symbol_bundle_characteristic_matrix[segment][k - 1]:
                    return k
            else:
                return 0

        n = len(self.feature_bundles) - 1
        segments = self.feature_table.get_segments()
        transducer = Transducer(segments, name=str(self))

        symbol_bundle_characteristic_matrix = {
            segment: [
                segment.has_feature_bundle(self.feature_bundles[i])
                for i in range(n + 1)
            ]
            for segment in segments
        }

        states = {i: {j: 0 for j in range(i)} for i in range(n + 1)}

        initial_state = State(
            'q0|0'
        )  # here we use a tuple as label. it will change at the end of this function
        states[0][0] = initial_state

        transducer.set_as_single_state(initial_state)

        if not n:
            for segment in segments:
                transducer.add_arc(
                    Arc(
                        states[0][0], JOKER_SEGMENT, segment,
                        CostVector([
                            int(symbol_bundle_characteristic_matrix[segment]
                                [0])
                        ]), states[0][0]))
            transducer.add_arc(
                Arc(states[0][0], JOKER_SEGMENT, NULL_SEGMENT, CostVector([0]),
                    states[0][0]))

        else:
            for i in range(0, n + 1):
                for j in range(i):
                    state = State('q{0}|{1}'.format(i, j))
                    states[i][j] = state
                    transducer.add_state(state)
            max_num_of_satisfied_bundle_by_segment = {
                segment: compute_num_of_max_satisfied_bundle(segment)
                for segment in segments
            }
            for segment in segments:
                transducer.add_arc(
                    Arc(
                        states[0][0], JOKER_SEGMENT, segment, CostVector([0]),
                        states[symbol_bundle_characteristic_matrix[segment]
                               [0]][0]))
            for i in range(n + 1):
                for j in range(i):
                    state = states[i][j]
                    transducer.add_final_state(state)
                    if i != n:
                        for segment in segments:
                            if symbol_bundle_characteristic_matrix[segment][i]:
                                new_state_level = i + 1
                                new_state_mem = min([
                                    j + 1,
                                    max_num_of_satisfied_bundle_by_segment[
                                        segment]
                                ])
                            else:
                                new_state_level = compute_highest_num_of_satisfied_bundle(
                                    segment, j)
                                new_state_mem = min([
                                    max_num_of_satisfied_bundle_by_segment[
                                        segment],
                                    abs(new_state_level - 1)
                                ])
                            new_terminus = states[new_state_level][
                                new_state_mem]
                            transducer.add_arc(
                                Arc(state, JOKER_SEGMENT, segment,
                                    CostVector([0]), new_terminus))
                            transducer.add_arc(
                                Arc(new_terminus, JOKER_SEGMENT, segment,
                                    CostVector([0]), new_terminus))
                    else:  # i = n
                        for segment in segments:
                            new_state_level = compute_highest_num_of_satisfied_bundle(
                                segment, j)
                            new_state_mem = min([
                                max_num_of_satisfied_bundle_by_segment[
                                    segment],
                                abs(new_state_level - 1)
                            ])
                            new_terminus = states[new_state_level][
                                new_state_mem]
                            transducer.add_arc(
                                Arc(
                                    state, JOKER_SEGMENT, segment,
                                    CostVector([
                                        int(symbol_bundle_characteristic_matrix[
                                            segment][i])
                                    ]), new_terminus))

        transducer.clear_dead_states()
        for state in transducer.states:
            transducer.add_arc(
                Arc(state, JOKER_SEGMENT, NULL_SEGMENT, CostVector([0]),
                    state))
        return transducer
    def test_ctc(self):
        T = 5
        N = 6

        # Test 1
        labels = [[0, 1, 2, 1, 0]]
        # fmt: off
        emissions = torch.tensor(
            (
                0.633766,
                0.221185,
                0.0917319,
                0.0129757,
                0.0142857,
                0.0260553,
                0.111121,
                0.588392,
                0.278779,
                0.0055756,
                0.00569609,
                0.010436,
                0.0357786,
                0.633813,
                0.321418,
                0.00249248,
                0.00272882,
                0.0037688,
                0.0663296,
                0.643849,
                0.280111,
                0.00283995,
                0.0035545,
                0.00331533,
                0.458235,
                0.396634,
                0.123377,
                0.00648837,
                0.00903441,
                0.00623107,
            ),
            requires_grad=True,
        )
        # fmt: on
        log_emissions = torch.log(emissions.view(1, T, N))
        log_emissions.retain_grad()
        transducer = Transducer(
            tokens=["a", "b", "c", "d", "e"],
            graphemes_to_idx={
                "a": 0,
                "b": 1,
                "c": 2,
                "d": 3,
                "e": 4
            },
            blank="optional",
        )

        loss = transducer(log_emissions, labels)
        self.assertAlmostEqual(loss.item(), 3.34211, places=4)
        loss.backward(retain_graph=True)
        # fmt: off
        expected_grad = torch.tensor((
            -0.366234,
            0.221185,
            0.0917319,
            0.0129757,
            0.0142857,
            0.0260553,
            0.111121,
            -0.411608,
            0.278779,
            0.0055756,
            0.00569609,
            0.010436,
            0.0357786,
            0.633813,
            -0.678582,
            0.00249248,
            0.00272882,
            0.0037688,
            0.0663296,
            -0.356151,
            0.280111,
            0.00283995,
            0.0035545,
            0.00331533,
            -0.541765,
            0.396634,
            0.123377,
            0.00648837,
            0.00903441,
            0.00623107,
        )).view(1, T, N)
        # fmt: on
        self.assertTrue(log_emissions.grad.allclose(expected_grad))

        # Test 2
        labels = [[0, 1, 1, 0]]
        # fmt: off
        emissions = torch.tensor(
            (
                0.30176,
                0.28562,
                0.0831517,
                0.0862751,
                0.0816851,
                0.161508,
                0.24082,
                0.397533,
                0.0557226,
                0.0546814,
                0.0557528,
                0.19549,
                0.230246,
                0.450868,
                0.0389607,
                0.038309,
                0.0391602,
                0.202456,
                0.280884,
                0.429522,
                0.0326593,
                0.0339046,
                0.0326856,
                0.190345,
                0.423286,
                0.315517,
                0.0338439,
                0.0393744,
                0.0339315,
                0.154046,
            ),
            requires_grad=True,
        )
        # fmt: on
        log_emissions = torch.log(emissions.view(1, T, N))
        log_emissions.retain_grad()
        transducer = Transducer(
            tokens=["a", "b", "c", "d", "e"],
            graphemes_to_idx={
                "a": 0,
                "b": 1,
                "c": 2,
                "d": 3,
                "e": 4
            },
            blank="optional",
            allow_repeats=False,
        )
        loss = transducer(log_emissions, labels)
        self.assertAlmostEqual(loss.item(), 5.42262, places=4)
        loss.backward()

        # fmt: off
        expected_grad = torch.tensor((
            -0.69824,
            0.28562,
            0.0831517,
            0.0862751,
            0.0816851,
            0.161508,
            0.24082,
            -0.602467,
            0.0557226,
            0.0546814,
            0.0557528,
            0.19549,
            0.230246,
            0.450868,
            0.0389607,
            0.038309,
            0.0391602,
            -0.797544,
            0.280884,
            -0.570478,
            0.0326593,
            0.0339046,
            0.0326856,
            0.190345,
            -0.576714,
            0.315517,
            0.0338439,
            0.0393744,
            0.0339315,
            0.154046,
        )).view(1, T, N)
        # fmt: on
        self.assertTrue(log_emissions.grad.allclose(expected_grad))
Exemple #40
0
 def _make_transducer(self):
     if len(self.constraints) is 1:                             # if there is only on constraint in the
         return pickle.loads(pickle.dumps(self.constraints[0].get_transducer(), -1))  # constraint set there is no need to intersect
     else:
         constraints_transducers = [constraint.get_transducer() for constraint in self.constraints]
         return Transducer.intersection(*constraints_transducers)
Exemple #41
0
    def _make_transducer(self):
        segments = self.feature_table.get_segments()
        transducer = Transducer(segments, name=str(self))

        state1 = State('Contiguity1')
        state2 = State('Contiguity2')
        transducer.add_state(state1)
        transducer.add_state(state2)
        transducer.initial_state = state1
        transducer.add_final_state(state1)
        transducer.add_final_state(state2)

        for segment in segments:
            transducer.add_arc(Arc(state1, NULL_SEGMENT, segment, CostVector([0]), state1))
            transducer.add_arc(Arc(state1, segment, NULL_SEGMENT, CostVector([0]), state1))
            transducer.add_arc(Arc(state2, NULL_SEGMENT, segment, CostVector([1]), state1))
            transducer.add_arc(Arc(state2, segment, NULL_SEGMENT, CostVector([1]), state1))
            segment_symbol = segment.get_symbol()
            if segment_symbol in yimas_vowels:   # segment is vowel
                transducer.add_arc(Arc(state1, segment, segment, CostVector([0]), state1))
                transducer.add_arc(Arc(state2, segment, segment, CostVector([0]), state1))
            elif segment_symbol == "'":  # segment is stress
                transducer.add_arc(Arc(state1, segment, segment, CostVector([0]), state2))
                transducer.add_arc(Arc(state2, segment, segment, CostVector([0]), state2))
            elif segment_symbol in yimas_cons:  # segment is consonant
                transducer.add_arc(Arc(state1, segment, segment, CostVector([0]), state1))
                transducer.add_arc(Arc(state2, segment, segment, CostVector([0]), state1))
            else:
                raise ConstraintError("{} not supported in this constraint".format(segment_symbol))


        return transducer
Exemple #42
0
    test = numerize(test_str, Sigma)

<<<<<<< HEAD
def g(theta):
    theta_g = zeros_like(theta)
    for i, (x, y) in enumerate(train):
        t.grad_features(x, y, i, theta, theta_g, features, threshold)
    return theta_g
=======
>>>>>>> 76148e8887cbf535c1574c441fac8eecd4f467d5

    # number of total insertions per string
    INSERTION_LIMIT = 5

    # transducer
    t = Transducer(len(Sigma), INSERTION_LIMIT, features)
    #string1 = train[0][0]
    #string2 = train[0][1]

    theta = zeros((features.num_features))
    theta[0] = 10.0
    #theta = npr.rand(features.num_features)

    def f(theta):
        val = 0.0
        for i, (x, y) in enumerate(train):
            val += t.func_features(x, y, i, theta, features, 20)
        return val
        #return np.asarray(t.func_features(string1, string2, 0, theta, features))

    def f_tropical(theta):
    def test_asg(self):
        T = 5
        N = 6
        B = 3
        labels = [[2, 1, 5, 1, 3], [4, 3, 5], [3, 2, 2, 1]]
        emissions = torch.tensor(
            [
                [
                    [-0.4340, -0.0254, 0.3667, 0.4180, -0.3805, -0.1707],
                    [0.1060, 0.3631, -0.1122, -0.3825, -0.0031, -0.3801],
                    [0.0443, -0.3795, 0.3194, -0.3130, 0.0094, 0.1560],
                    [0.1252, 0.2877, 0.1997, -0.4554, 0.2774, -0.2526],
                    [-0.4001, -0.2402, 0.1295, 0.0172, 0.1805, -0.3299],
                ],
                [
                    [0.3298, -0.2259, -0.0959, 0.4909, 0.2996, -0.2543],
                    [-0.2863, 0.3239, -0.3988, 0.0732, -0.2107, -0.4739],
                    [-0.0906, 0.0480, -0.1301, 0.3975, -0.3317, -0.1967],
                    [0.4372, -0.2006, 0.0094, 0.3281, 0.1873, -0.2945],
                    [0.2399, 0.0320, -0.3768, -0.2849, -0.2248, 0.3186],
                ],
                [
                    [0.0225, -0.3867, -0.1929, -0.2904, -0.4958, -0.2533],
                    [0.4001, -0.1517, -0.2799, -0.2915, 0.4198, 0.4506],
                    [0.1446, -0.4753, -0.0711, 0.2876, -0.1851, -0.1066],
                    [0.2081, -0.1190, -0.3902, -0.1668, 0.1911, -0.2848],
                    [-0.3846, 0.1175, 0.1052, 0.2172, -0.0362, 0.3055],
                ],
            ],
            requires_grad=True,
        )

        tokens = [(n, ) for n in range(N)]
        graphemes_to_idx = {n: n for n in range(N)}
        asg_transitions = ASGLossFunction.create_transitions_graph(
            torch.zeros(N + 1, N))
        transducer = Transducer(
            tokens=tokens,
            graphemes_to_idx=graphemes_to_idx,
            transitions=asg_transitions,
        )

        loss = transducer(emissions, labels)
        self.assertAlmostEqual(loss.item(), 7.47995, places=4)

        loss.backward()
        expected_grad = torch.tensor([
            [
                [0.1060, 0.1595, -0.7639, 0.2485, 0.1118, 0.1380],
                [0.1915, -0.7524, 0.1539, 0.1175, 0.1717, 0.1178],
                [0.1738, 0.1137, 0.2288, 0.1216, 0.1678, -0.8057],
                [0.1766, -0.7923, 0.1902, 0.0988, 0.2056, 0.1210],
                [0.1212, 0.1422, 0.2059, -0.8160, 0.2166, 0.1300],
            ],
            [
                [0.2029, 0.1164, 0.1325, 0.2383, -0.8032, 0.1131],
                [0.1414, 0.2602, 0.1263, -0.3441, -0.3009, 0.1172],
                [0.1557, 0.1788, 0.1496, -0.5498, 0.0140, 0.0516],
                [0.2306, 0.1219, 0.1503, -0.4244, 0.1796, -0.2579],
                [0.2149, 0.1745, 0.1160, 0.1271, 0.1350, -0.7675],
            ],
            [
                [0.2195, 0.1458, 0.1770, -0.8395, 0.1307, 0.1666],
                [0.2148, 0.1237, -0.6613, -0.1223, 0.2191, 0.2259],
                [0.2002, 0.1077, -0.8386, 0.2310, 0.1440, 0.1557],
                [0.2197, -0.1466, -0.5742, 0.1510, 0.2160, 0.1342],
                [0.1050, -0.8265, 0.1714, 0.1917, 0.1488, 0.2094],
            ],
        ])
        expected_grad = expected_grad / B
        self.assertTrue(emissions.grad.allclose(expected_grad, rtol=1e-03))
        expected_trans_grad = (torch.tensor([
            [0.3990, 0.3396, 0.3486, 0.3922, 0.3504, 0.3155],
            [0.3666, 0.0116, -1.6678, 0.3737, 0.3361, -0.7152],
            [0.3468, 0.3163, -1.1583, -0.6803, 0.3216, 0.2722],
            [0.3694, -0.6688, 0.3047, -0.8531, -0.6571, 0.2870],
            [0.3866, 0.3321, 0.3447, 0.3664, -0.2163, 0.3039],
            [0.3640, -0.6943, 0.2988, -0.6722, 0.3215, -0.1860],
        ]).view(N, N) / B)
        trans_grad = transducer.transition_params.grad[N:].view(N, N)
        self.assertTrue(trans_grad.allclose(expected_trans_grad, rtol=1e-02))
Exemple #44
0
 def _base_faithfulness_transducer(self):
     segments = self.feature_table.get_segments()
     transducer = Transducer(segments, name=str(self))
     state = State('q0')
     transducer.set_as_single_state(state)
     return transducer, segments, state
Exemple #45
0
#!/usr/bin/env python
"""
This module implements the BekProgram class which is used to convert
Transducer() objects into BEK programs.
"""
from sys import argv
from collections import defaultdict
from operator import attrgetter
from transducer import Transducer, EPSILON


class _BekState(object):
    """
    Simple storage class, it holds information regarding the lookahead path
    each BEK state process.
    """
    def __init__(self):
        self.la_trans_list = set([])
        self.prefix = []
        self.prefix_out = []


class BekProgram(object):
    """
    Implements a compiler to transform Transducer objects into BEK programs
    which can then be further analyzed using the BEK infrastructure. For more
    information see http://rise4fun.com/Bek/tutorial.

    The main public method is create_from_transducer() which will compile the
    BEK program from a transducer which can then be accessed in the bek_program
    public variable.
 def _base_faithfulness_transducer(self):
     segments = self.feature_table.get_segments()
     transducer = Transducer(segments, name=str(self))
     state = State('q0')
     transducer.set_as_single_state(state)
     return transducer, segments, state