Esempio n. 1
0
 def test_as_training_data_error(self):
     with self.assertRaises(ValueError):
         feature = IndexedPairFeature([
             IndexedFeatureWord(1, [1, 2]),
             IndexedFeatureWord(4, [1, 2, 6])
         ], [IndexedFeatureWord(1, [1, 2]),
             IndexedFeatureWord(3, [5])], None)
         feature.as_training_data()
     with self.assertRaises(ValueError):
         self.feature.as_training_data(mode="words+character")
Esempio n. 2
0
class TestIndexedPairFeature(DuplicateTestCase):
    def setUp(self):
        super(TestIndexedPairFeature, self).setUp()
        self.feature = IndexedPairFeature([
            IndexedFeatureWord(1, [1, 2]),
            IndexedFeatureWord(2, [3, 4]),
            IndexedFeatureWord(3, [5]),
            IndexedFeatureWord(5, [1, 4, 1]),
            IndexedFeatureWord(4, [1, 2, 6])
        ], [
            IndexedFeatureWord(1, [1, 2]),
            IndexedFeatureWord(8, [3, 1, 2, 1]),
            IndexedFeatureWord(2, [3, 4]),
            IndexedFeatureWord(3, [5])
        ], [0, 1])

    def test_get_lengths(self):
        assert self.feature.get_lengths() == {
            "num_sentence_words": 5,
            'num_word_characters': 4
        }

    def test_pad_adds_padding_words(self):
        self.feature.pad({"num_sentence_words": 6, 'num_word_characters': 5})
        first_sent_word_idxs, second_sent_word_idxs = self.feature.get_int_word_indices(
        )
        first_sent_char_idxs, second_sent_char_idxs = self.feature.get_int_char_indices(
        )

        assert first_sent_word_idxs == [1, 2, 3, 5, 4, 0]
        assert second_sent_word_idxs == [1, 8, 2, 3, 0, 0]
        assert first_sent_char_idxs == [[1, 2, 0, 0, 0], [3, 4, 0, 0, 0],
                                        [5, 0, 0, 0, 0], [1, 4, 1, 0, 0],
                                        [1, 2, 6, 0, 0], [0, 0, 0, 0, 0]]
        assert second_sent_char_idxs == [[1, 2, 0, 0, 0], [3, 1, 2, 1, 0],
                                         [3, 4, 0, 0, 0], [5, 0, 0, 0, 0],
                                         [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]
        assert self.feature.label == [0, 1]

    def test_pad_truncates(self):
        self.feature.pad({"num_sentence_words": 2, 'num_word_characters': 3})
        first_sent_word_idxs, second_sent_word_idxs = self.feature.get_int_word_indices(
        )
        first_sent_char_idxs, second_sent_char_idxs = self.feature.get_int_char_indices(
        )

        assert first_sent_word_idxs == [1, 2]
        assert second_sent_word_idxs == [1, 8]
        assert first_sent_char_idxs == [[1, 2, 0], [3, 4, 0]]
        assert second_sent_char_idxs == [[1, 2, 0], [3, 1, 2]]
        assert self.feature.label == [0, 1]

    def test_pad_general(self):
        self.feature.pad(self.feature.get_lengths())
        first_sent_word_idxs, second_sent_word_idxs = self.feature.get_int_word_indices(
        )
        first_sent_char_idxs, second_sent_char_idxs = self.feature.get_int_char_indices(
        )

        assert first_sent_word_idxs == [1, 2, 3, 5, 4]
        assert second_sent_word_idxs == [1, 8, 2, 3, 0]
        assert first_sent_char_idxs == [[1, 2, 0, 0], [3, 4, 0,
                                                       0], [5, 0, 0, 0],
                                        [1, 4, 1, 0], [1, 2, 6, 0]]
        assert second_sent_char_idxs == [[1, 2, 0, 0], [3, 1, 2, 1],
                                         [3, 4, 0, 0], [5, 0, 0, 0],
                                         [0, 0, 0, 0]]
        assert self.feature.label == [0, 1]

    def test_as_training_data_produces_correct_numpy_arrays(self):
        self.feature.pad({'num_sentence_words': 3, 'num_word_characters': 2})
        inputs, label = self.feature.as_training_data()
        assert_allclose(label[0], np.asarray([0, 1]))
        assert len(inputs) == 2
        assert_allclose(inputs[0], np.asarray([1, 2, 3]))
        assert_allclose(inputs[1], np.asarray([1, 8, 2]))

        inputs, label = self.feature.as_training_data(mode="character")
        assert_allclose(label[0], np.asarray([0, 1]))
        assert len(inputs) == 2
        assert_allclose(inputs[0], np.asarray([[1, 2], [3, 4], [5, 0]]))
        assert_allclose(inputs[1], np.asarray([[1, 2], [3, 1], [3, 4]]))

        inputs, label = self.feature.as_training_data(mode="word+character")
        assert_allclose(label[0], np.asarray([0, 1]))
        assert len(inputs) == 4
        assert_allclose(inputs[0], np.asarray([1, 2, 3]))
        assert_allclose(inputs[1], np.asarray([[1, 2], [3, 4], [5, 0]]))
        assert_allclose(inputs[2], np.asarray([1, 8, 2]))
        assert_allclose(inputs[3], np.asarray([[1, 2], [3, 1], [3, 4]]))

    def test_as_training_data_error(self):
        with self.assertRaises(ValueError):
            feature = IndexedPairFeature([
                IndexedFeatureWord(1, [1, 2]),
                IndexedFeatureWord(4, [1, 2, 6])
            ], [IndexedFeatureWord(1, [1, 2]),
                IndexedFeatureWord(3, [5])], None)
            feature.as_training_data()
        with self.assertRaises(ValueError):
            self.feature.as_training_data(mode="words+character")

    def test_as_testing_data_produces_correct_numpy_arrays(self):
        self.feature.pad({'num_sentence_words': 4, 'num_word_characters': 2})
        inputs, labels = self.feature.as_testing_data()
        assert len(labels) == 0
        assert len(inputs) == 2
        assert_allclose(inputs[0], np.asarray([1, 2, 3, 5]))
        assert_allclose(inputs[1], np.asarray([1, 8, 2, 3]))

        inputs, label = self.feature.as_training_data(mode="character")
        assert len(labels) == 0
        assert len(inputs) == 2
        assert_allclose(inputs[0], np.asarray([[1, 2], [3, 4], [5, 0], [1,
                                                                        4]]))
        assert_allclose(inputs[1], np.asarray([[1, 2], [3, 1], [3, 4], [5,
                                                                        0]]))

        inputs, label = self.feature.as_training_data(mode="word+character")
        assert len(labels) == 0
        assert len(inputs) == 4
        assert_allclose(inputs[0], np.asarray([1, 2, 3, 5]))
        assert_allclose(inputs[1], np.asarray([[1, 2], [3, 4], [5, 0], [1,
                                                                        4]]))
        assert_allclose(inputs[2], np.asarray([1, 8, 2, 3]))
        assert_allclose(inputs[3], np.asarray([[1, 2], [3, 1], [3, 4], [5,
                                                                        0]]))

    def test_as_testing_data_error(self):
        with self.assertRaises(ValueError):
            self.feature.as_testing_data(mode="words+character")

    def test_equals(self):
        feature_1 = IndexedPairFeature(
            [IndexedFeatureWord(1, [1, 2]),
             IndexedFeatureWord(4, [1, 2, 6])],
            [IndexedFeatureWord(1, [1, 2]),
             IndexedFeatureWord(3, [5])], None)

        feature_2 = IndexedPairFeature(
            [IndexedFeatureWord(1, [1, 2]),
             IndexedFeatureWord(4, [1, 2, 6])],
            [IndexedFeatureWord(1, [1, 2]),
             IndexedFeatureWord(3, [5])], None)

        feature_3 = IndexedPairFeature(
            [IndexedFeatureWord(1, [1, 2]),
             IndexedFeatureWord(1, [2, 2])],
            [IndexedFeatureWord(1, [1, 2]),
             IndexedFeatureWord(3, [5])], None)
        feature_4 = IndexedPairFeature([IndexedFeatureWord(1, [1, 2])],
                                       [IndexedFeatureWord(1, [2, 2])], None)
        self.assertNotEquals(feature_1, feature_4)
        self.assertNotEquals(feature_1, feature_3)
        self.assertFalse(feature_1.__eq__(0))
        self.assertEquals(feature_1, feature_2)

    def test_less_than(self):
        feature_1 = IndexedPairFeature(
            [IndexedFeatureWord(1, [1, 2]),
             IndexedFeatureWord(4, [1, 2, 6])],
            [IndexedFeatureWord(1, [1, 2]),
             IndexedFeatureWord(3, [5])], None)

        feature_2 = IndexedPairFeature(
            [IndexedFeatureWord(1, [1, 2]),
             IndexedFeatureWord(4, [1, 2, 6])],
            [IndexedFeatureWord(2, [2, 2]),
             IndexedFeatureWord(3, [5])], None)

        feature_3 = IndexedPairFeature([IndexedFeatureWord(1, [1, 2])],
                                       [IndexedFeatureWord(1, [2, 2])], None)
        self.assertFalse(feature_1.__lt__(0))
        self.assertFalse(feature_2.__lt__(feature_1))
        self.assertLess(feature_1, feature_2)
        self.assertLess(feature_3, feature_2)