Ejemplo n.º 1
0
 def test_sequence_matcher2(self):
     """Test the sequence matcher."""
     a = ['a', 'b']
     b = ['a', 'b', 'd', 'c']
     sm = SequenceMatcher()
     sm.set_seq1(a)
     sm.set_seq2(b)
     self.assertTrue(sm.distance() == 2)
     sm.set_seqs(b, a)
     self.assertTrue(sm.distance() == 2)
Ejemplo n.º 2
0
 def test_sequence_matcher(self):
     """Test the sequence matcher."""
     a = ['a', 'b']
     b = ['a', 'b', 'd', 'c']
     sm = SequenceMatcher(a=a, b=b)
     opcodes = [['equal', 0, 1, 0, 1], ['equal', 1, 2, 1, 2], ['insert', 1, 1, 2, 3], ['insert', 1, 1, 3, 4]]
     self.assertTrue(sm.distance() == 2)
     self.assertTrue(sm.ratio() == 2 / 3)
     self.assertTrue(sm.quick_ratio() == 2 / 3)
     self.assertTrue(sm.real_quick_ratio() == 2 / 3)
     self.assertTrue(sm.distance() == 2)
     # This doesn't return anything, saves the value in the sm cache.
     self.assertTrue(not sm._compute_distance_fast())
     self.assertTrue(sm.get_opcodes() == opcodes)
     self.assertTrue(list(sm.get_matching_blocks()) == [[0, 0, 1], [1, 1, 1]])
Ejemplo n.º 3
0
 def test_issue4(self):
     """ Test for error reported here:
     https://github.com/belambert/edit-distance/issues/4 """
     a = ['that', 'continuous', 'sanction', ':=', '(', 'flee', 'U', 'complain', ')', 'E', 'attendance', 'eye', '^', 'flowery', 'revelation', '^', 'ridiculous', 'destination', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>']
     b = ['continuous', ':=', '(', 'sanction', '^', 'flee', '^', 'attendance', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>']
     target_opcodes = [['delete', 0, 1, 0, 0], ['equal', 1, 2, 0, 1], ['delete', 2, 3, 0, 0], ['equal', 3, 4, 1, 2], ['equal', 4, 5, 2, 3], ['insert', 4, 4, 3, 4], ['insert', 4, 4, 4, 5], ['equal', 5, 6, 5, 6], ['replace', 6, 7, 6, 7], ['replace', 7, 8, 7, 8], ['replace', 8, 9, 8, 9], ['replace', 9, 10, 9, 10], ['replace', 10, 11, 10, 11], ['replace', 11, 12, 11, 12], ['replace', 12, 13, 12, 13], ['replace', 13, 14, 13, 14], ['replace', 14, 15, 14, 15], ['replace', 15, 16, 15, 16], ['replace', 16, 17, 16, 17], ['replace', 17, 18, 17, 18], ['equal', 18, 19, 18, 19], ['equal', 19, 20, 19, 20], ['equal', 20, 21, 20, 21], ['equal', 21, 22, 21, 22], ['equal', 22, 23, 22, 23], ['equal', 23, 24, 23, 24], ['equal', 24, 25, 24, 25], ['equal', 25, 26, 25, 26], ['equal', 26, 27, 26, 27], ['equal', 27, 28, 27, 28], ['equal', 28, 29, 28, 29]]
     sm = SequenceMatcher(a=a, b=b)
     self.assertEqual(sm.distance(), 16)
     self.assertEqual(sm.get_opcodes(), target_opcodes)
Ejemplo n.º 4
0
 def test_issue4_simpler(self):
     """ Test for error reported here:
     https://github.com/belambert/edit-distance/issues/4 """
     a = ['that', 'continuous', 'sanction', ':=', '(']
     b = ['continuous', ':=', '(', 'sanction', '^']
     sm = SequenceMatcher(a=a, b=b)
     self.assertEqual(sm.distance(),  4)
     target_opcodes = [['delete', 0, 1, 0, 0], ['equal', 1, 2, 0, 1], ['delete', 2, 3, 0, 0], ['equal', 3, 4, 1, 2], ['equal', 4, 5, 2, 3], ['insert', 4, 4, 3, 4], ['insert', 4, 4, 4, 5]]
     self.assertEqual(sm.get_opcodes(), target_opcodes)
Ejemplo n.º 5
0
 def test_issue4_simpler(self):
     """Test for error reported here:
     https://github.com/belambert/edit-distance/issues/4"""
     a = ["that", "continuous", "sanction", ":=", "("]
     b = ["continuous", ":=", "(", "sanction", "^"]
     sm = SequenceMatcher(a=a, b=b)
     self.assertEqual(sm.distance(), 4)
     target_opcodes = [
         ["delete", 0, 1, 0, 0],
         ["equal", 1, 2, 0, 1],
         ["delete", 2, 3, 0, 0],
         ["equal", 3, 4, 1, 2],
         ["equal", 4, 5, 2, 3],
         ["insert", 5, 5, 3, 4],
         ["insert", 5, 5, 4, 5],
     ]
     self.assertEqual(sm.get_opcodes(), target_opcodes)
Ejemplo n.º 6
0
 def test_issue4(self):
     """ Test for error reported here:
     https://github.com/belambert/edit-distance/issues/4 """
     a = [
         'that', 'continuous', 'sanction', ':=', '(', 'flee', 'U',
         'complain', ')', 'E', 'attendance', 'eye', '^', 'flowery',
         'revelation', '^', 'ridiculous', 'destination', '<EOS>', '<EOS>',
         '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>',
         '<EOS>', '<EOS>'
     ]
     b = [
         'continuous', ':=', '(', 'sanction', '^', 'flee', '^',
         'attendance', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>',
         '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>',
         '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>',
         '<EOS>'
     ]
     target_opcodes = [['delete', 0, 1, 0, 0], ['equal', 1, 2, 0, 1],
                       ['delete', 2, 3, 0, 0], ['equal', 3, 4, 1, 2],
                       ['equal', 4, 5, 2, 3], ['insert', 4, 4, 3, 4],
                       ['insert', 4, 4, 4, 5], ['equal', 5, 6, 5, 6],
                       ['replace', 6, 7, 6, 7], ['replace', 7, 8, 7, 8],
                       ['replace', 8, 9, 8, 9], ['replace', 9, 10, 9, 10],
                       ['replace', 10, 11, 10, 11],
                       ['replace', 11, 12, 11, 12],
                       ['replace', 12, 13, 12, 13],
                       ['replace', 13, 14, 13, 14],
                       ['replace', 14, 15, 14, 15],
                       ['replace', 15, 16, 15, 16],
                       ['replace', 16, 17, 16, 17],
                       ['replace', 17, 18, 17, 18],
                       ['equal', 18, 19, 18, 19], ['equal', 19, 20, 19, 20],
                       ['equal', 20, 21, 20, 21], ['equal', 21, 22, 21, 22],
                       ['equal', 22, 23, 22, 23], ['equal', 23, 24, 23, 24],
                       ['equal', 24, 25, 24, 25], ['equal', 25, 26, 25, 26],
                       ['equal', 26, 27, 26, 27], ['equal', 27, 28, 27, 28],
                       ['equal', 28, 29, 28, 29]]
     sm = SequenceMatcher(a=a, b=b)
     self.assertEqual(sm.distance(), 16)
     self.assertEqual(sm.get_opcodes(), target_opcodes)
Ejemplo n.º 7
0
    def _update_state(self, y_true, y_pred, mask=None):
        for i in range(len(y_true)):
            assert len(y_true[i]) == len(y_pred[i])

            # select utterance
            y_true_ = y_true[i]
            y_pred_ = y_pred[i]

            # remove padding
            y_true_ = y_true_[mask[i]]
            y_pred_ = y_pred_[mask[i]]

            # merge consequence states
            y_true_ = _merge_consequent_states(y_true_)
            y_pred_ = _merge_consequent_states(y_pred_)

            # compute edit distance
            sm = SequenceMatcher(a=y_true_, b=y_pred_)
            edit_distance = sm.distance()

            # update state
            self.edit_distance.assign_add(edit_distance)
            self.length.assign_add(len(y_true_))
Ejemplo n.º 8
0
 def test_issue4(self):
     """Test for error reported here:
     https://github.com/belambert/edit-distance/issues/4"""
     a = [
         "that",
         "continuous",
         "sanction",
         ":=",
         "(",
         "flee",
         "U",
         "complain",
         ")",
         "E",
         "attendance",
         "eye",
         "^",
         "flowery",
         "revelation",
         "^",
         "ridiculous",
         "destination",
         "<EOS>",
         "<EOS>",
         "<EOS>",
         "<EOS>",
         "<EOS>",
         "<EOS>",
         "<EOS>",
         "<EOS>",
         "<EOS>",
         "<EOS>",
         "<EOS>",
     ]  # noqa
     b = [
         "continuous",
         ":=",
         "(",
         "sanction",
         "^",
         "flee",
         "^",
         "attendance",
         "<EOS>",
         "<EOS>",
         "<EOS>",
         "<EOS>",
         "<EOS>",
         "<EOS>",
         "<EOS>",
         "<EOS>",
         "<EOS>",
         "<EOS>",
         "<EOS>",
         "<EOS>",
         "<EOS>",
         "<EOS>",
         "<EOS>",
         "<EOS>",
         "<EOS>",
         "<EOS>",
         "<EOS>",
         "<EOS>",
         "<EOS>",
     ]  # noqa
     target_opcodes = [
         ["delete", 0, 1, 0, 0],
         ["equal", 1, 2, 0, 1],
         ["delete", 2, 3, 0, 0],
         ["equal", 3, 4, 1, 2],
         ["equal", 4, 5, 2, 3],
         ["insert", 5, 5, 3, 4],
         ["insert", 5, 5, 4, 5],
         ["equal", 5, 6, 5, 6],
         ["replace", 6, 7, 6, 7],
         ["replace", 7, 8, 7, 8],
         ["replace", 8, 9, 8, 9],
         ["replace", 9, 10, 9, 10],
         ["replace", 10, 11, 10, 11],
         ["replace", 11, 12, 11, 12],
         ["replace", 12, 13, 12, 13],
         ["replace", 13, 14, 13, 14],
         ["replace", 14, 15, 14, 15],
         ["replace", 15, 16, 15, 16],
         ["replace", 16, 17, 16, 17],
         ["replace", 17, 18, 17, 18],
         ["equal", 18, 19, 18, 19],
         ["equal", 19, 20, 19, 20],
         ["equal", 20, 21, 20, 21],
         ["equal", 21, 22, 21, 22],
         ["equal", 22, 23, 22, 23],
         ["equal", 23, 24, 23, 24],
         ["equal", 24, 25, 24, 25],
         ["equal", 25, 26, 25, 26],
         ["equal", 26, 27, 26, 27],
         ["equal", 27, 28, 27, 28],
         ["equal", 28, 29, 28, 29],
     ]  # noqa
     sm = SequenceMatcher(a=a, b=b)
     self.assertEqual(sm.distance(), 16)
     self.assertEqual(sm.get_opcodes(), target_opcodes)
Ejemplo n.º 9
0
y_pred_phones = states2phones(y_pred, phones, stateList)
y_true_phones = states2phones(y_true, phones, stateList)
accuracy.reset_states()
accuracy.update_state(y_true_phones, y_pred_phones)
print('Frame-by-frame accuracy at the phoneme level: {:.2f}%'.format(
    accuracy.result().numpy() * 100))
plt.figure()
plot_confusion_matrix(y_true_phones, y_pred_phones)
plt.title('Frame-by-frame confusion matrix at the phoneme level')

# PER at the state level
N = 10000  # number of frames to consider (distance computation is expensive)
y_pred_merged = merge_consequent_states(y_pred[:N])
y_true_merged = merge_consequent_states(y_true[:N])
sm = SequenceMatcher(a=y_true_merged, b=y_pred_merged)
edit_distance = sm.distance()
print('PER at the state level: {:.2f}%'.format(edit_distance / N * 100))

# PER at the phoneme level
y_pred_merged = merge_consequent_states(y_pred_phones[:N])
y_true_merged = merge_consequent_states(y_true_phones[:N])
sm = SequenceMatcher(a=y_true_merged, b=y_pred_merged)
edit_distance = sm.distance()
print('PER at the phoneme level: {:.2f}%'.format(edit_distance / N * 100))

# posteriors for first utterance
utterance = testdata[0]
x, y = prepare_matrices([utterance],
                        K,
                        feature_type,
                        dynamic_features=dynamic_features,
Ejemplo n.º 10
0
        # pred_four_layer_trans = transcribe(y_sample, prediction_four_layer)
        y_transcribed = transcribe(y_sample)
        pred_one_layer_trans = transcribe(prediction_one_layer)
        pred_four_layer_trans = transcribe(prediction_four_layer)

        fig, axs = plt.subplots(3)
        axs[0].set_title("Correct output, state level merged")
        axs[0].pcolormesh(y_transcribed.T)
        axs[1].set_title(name + " 1 layer")
        axs[1].pcolormesh(pred_one_layer_trans.T)
        axs[2].set_title(name + " 4 layers")
        axs[2].pcolormesh(pred_four_layer_trans.T)
        plt.show()

        seq1 = SequenceMatcher(y_transcribed, prediction_four_layer_merged)
        distance = seq1.distance() / 324 * 100

        # TODO: Then measure the Phone Error Rate (PER),
        #  that is the length normalised edit distance between the sequence
        #  of states from the DNN and the correct transcription

        # TODO: Use SequenceMatcher from edit distance to quickly calculate PER

        # Part 4 Phenome Level edit dist
        # y_transcribed_merged = transcribe(y_sample_merged, y_sample_merged)
        # pred_one_layer_trans_merged = transcribe(y_sample_merged, prediction_one_layer_merged)
        # pred_four_layer_trans_merged = transcribe(y_sample_merged, prediction_four_layer_merged)
        y_transcribed_merged = transcribe(y_sample_merged)
        pred_one_layer_trans_merged = transcribe(prediction_one_layer_merged)
        pred_four_layer_trans_merged = transcribe(prediction_four_layer_merged)