Esempio n. 1
0
 def test_as_training_data_error(self):
     with self.assertRaises(ValueError):
         feature = IndexedPairFeature([
             IndexedFeatureWord(1, [1, 2]),
             IndexedFeatureWord(4, [1, 2, 6])
         ], [IndexedFeatureWord(1, [1, 2]),
             IndexedFeatureWord(3, [5])], None)
         feature.as_training_data()
     with self.assertRaises(ValueError):
         self.feature.as_training_data(mode="words+character")
Esempio n. 2
0
 def setUp(self):
     super(TestIndexedPairFeature, self).setUp()
     self.feature = IndexedPairFeature([
         IndexedFeatureWord(1, [1, 2]),
         IndexedFeatureWord(2, [3, 4]),
         IndexedFeatureWord(3, [5]),
         IndexedFeatureWord(5, [1, 4, 1]),
         IndexedFeatureWord(4, [1, 2, 6])
     ], [
         IndexedFeatureWord(1, [1, 2]),
         IndexedFeatureWord(8, [3, 1, 2, 1]),
         IndexedFeatureWord(2, [3, 4]),
         IndexedFeatureWord(3, [5])
     ], [0, 1])
Esempio n. 3
0
    def test_less_than(self):
        feature_1 = IndexedPairFeature(
            [IndexedFeatureWord(1, [1, 2]),
             IndexedFeatureWord(4, [1, 2, 6])],
            [IndexedFeatureWord(1, [1, 2]),
             IndexedFeatureWord(3, [5])], None)

        feature_2 = IndexedPairFeature(
            [IndexedFeatureWord(1, [1, 2]),
             IndexedFeatureWord(4, [1, 2, 6])],
            [IndexedFeatureWord(2, [2, 2]),
             IndexedFeatureWord(3, [5])], None)

        feature_3 = IndexedPairFeature([IndexedFeatureWord(1, [1, 2])],
                                       [IndexedFeatureWord(1, [2, 2])], None)
        self.assertFalse(feature_1.__lt__(0))
        self.assertFalse(feature_2.__lt__(feature_1))
        self.assertLess(feature_1, feature_2)
        self.assertLess(feature_3, feature_2)
Esempio n. 4
0
 def test_sort(self):
     # lengths: 3, 4, 1, 2, 2
     sorted_features = [IndexedPairFeature([IndexedFeatureWord(3, [1, 4, 1]),
                                             IndexedFeatureWord(1, [1, 5])],
                                            [IndexedFeatureWord(3, [1, 4, 1]),
                                             IndexedFeatureWord(1, [1, 5]),
                                             IndexedFeatureWord(3, [1, 4, 1]),
                                             IndexedFeatureWord(2, [2, 1])],
                                            [1, 0]),
                         IndexedPairFeature([IndexedFeatureWord(1, [1, 5]),
                                             IndexedFeatureWord(2, [2, 1]),
                                             IndexedFeatureWord(3, [1, 4, 1])],
                                            [IndexedFeatureWord(2, [2, 1]),
                                             IndexedFeatureWord(3, [1, 4, 1])],
                                            [0, 1])]
     self.assertNotEqual(sorted_features, self.indexed_dataset.features)
     self.indexed_dataset.sort()
     self.assertEquals(sorted_features, self.indexed_dataset.features)
Esempio n. 5
0
    def test_as_testing_data(self):
        features = [IndexedPairFeature([IndexedFeatureWord(1, [1, 4, 4]),
                                         IndexedFeatureWord(2, [2, 3]),
                                         IndexedFeatureWord(3, [5, 1])],
                                        [IndexedFeatureWord(2, [2, 3]),
                                         IndexedFeatureWord(3, [5, 1])],
                                        None),
                     IndexedPairFeature([IndexedFeatureWord(3, [5, 1]),
                                         IndexedFeatureWord(1, [1, 4, 4])],
                                        [IndexedFeatureWord(3, [5, 1]),
                                         IndexedFeatureWord(1, [1, 4, 4]),
                                         IndexedFeatureWord(3, [5, 1]),
                                         IndexedFeatureWord(2, [2, 3])],
                                        None)]
        indexed_dataset = IndexedDataset(features)
        indexed_dataset.pad_features(indexed_dataset.max_lengths())
        inputs, labels = indexed_dataset.as_testing_data()
        assert len(labels) == 0

        first_sentence, second_sentence = inputs[0]
        assert_allclose(first_sentence, np.array([1, 2, 3, 0]))
        assert_allclose(second_sentence, np.array([2, 3, 0, 0]))

        first_sentence, second_sentence = inputs[1]
        assert_allclose(first_sentence, np.array([3, 1, 0, 0]))
        assert_allclose(second_sentence, np.array([3, 1, 3, 2]))

        inputs, labels = indexed_dataset.as_testing_data(mode="character")
        assert len(labels) == 0

        first_sentence, second_sentence = inputs[0]
        assert_allclose(first_sentence, np.array([[1, 4, 4], [2, 3, 0],
                                                  [5, 1, 0], [0, 0, 0]]))
        assert_allclose(second_sentence, np.array([[2, 3, 0], [5, 1, 0],
                                                   [0, 0, 0], [0, 0, 0]]))

        first_sentence, second_sentence = inputs[1]
        assert_allclose(first_sentence, np.array([[5, 1, 0], [1, 4, 4],
                                                  [0, 0, 0], [0, 0, 0]]))
        assert_allclose(second_sentence, np.array([[5, 1, 0], [1, 4, 4],
                                                   [5, 1, 0], [2, 3, 0]]))

        inputs, labels = indexed_dataset.as_testing_data(mode="word+character")
        assert len(labels) == 0

        (first_sentence_words, first_sentence_characters,
         second_sentence_words, second_sentence_characters) = inputs[0]
        assert_allclose(first_sentence_words, np.array([1, 2, 3, 0]))
        assert_allclose(second_sentence_words, np.array([2, 3, 0, 0]))
        assert_allclose(first_sentence_characters, np.array([[1, 4, 4], [2, 3, 0],
                                                             [5, 1, 0], [0, 0, 0]]))
        assert_allclose(second_sentence_characters, np.array([[2, 3, 0], [5, 1, 0],
                                                              [0, 0, 0], [0, 0, 0]]))

        (first_sentence_words, first_sentence_characters,
         second_sentence_words, second_sentence_characters) = inputs[1]
        assert_allclose(first_sentence_words, np.array([3, 1, 0, 0]))
        assert_allclose(second_sentence_words, np.array([3, 1, 3, 2]))
        assert_allclose(first_sentence_characters, np.array([[5, 1, 0], [1, 4, 4],
                                                             [0, 0, 0], [0, 0, 0]]))
        assert_allclose(second_sentence_characters, np.array([[5, 1, 0], [1, 4, 4],
                                                              [5, 1, 0], [2, 3, 0]]))
        with self.assertRaises(ValueError):
            indexed_dataset.as_testing_data(mode="char")
Esempio n. 6
0
 def setUp(self):
     super(TestIndexedDataset, self).setUp()
     self.features = [IndexedPairFeature([IndexedFeatureWord(1, [1, 5]),
                                           IndexedFeatureWord(2, [2, 1]),
                                           IndexedFeatureWord(3, [1, 4, 1])],
                                          [IndexedFeatureWord(2, [2, 1]),
                                           IndexedFeatureWord(3, [1, 4, 1])],
                                          [0, 1]),
                       IndexedPairFeature([IndexedFeatureWord(3, [1, 4, 1]),
                                           IndexedFeatureWord(1, [1, 5])],
                                          [IndexedFeatureWord(3, [1, 4, 1]),
                                           IndexedFeatureWord(1, [1, 5]),
                                           IndexedFeatureWord(3, [1, 4, 1]),
                                           IndexedFeatureWord(2, [2, 1])],
                                          [1, 0])]
     self.indexed_dataset = IndexedDataset(self.features)
Esempio n. 7
0
    def test_equals(self):
        feature_1 = IndexedPairFeature(
            [IndexedFeatureWord(1, [1, 2]),
             IndexedFeatureWord(4, [1, 2, 6])],
            [IndexedFeatureWord(1, [1, 2]),
             IndexedFeatureWord(3, [5])], None)

        feature_2 = IndexedPairFeature(
            [IndexedFeatureWord(1, [1, 2]),
             IndexedFeatureWord(4, [1, 2, 6])],
            [IndexedFeatureWord(1, [1, 2]),
             IndexedFeatureWord(3, [5])], None)

        feature_3 = IndexedPairFeature(
            [IndexedFeatureWord(1, [1, 2]),
             IndexedFeatureWord(1, [2, 2])],
            [IndexedFeatureWord(1, [1, 2]),
             IndexedFeatureWord(3, [5])], None)
        feature_4 = IndexedPairFeature([IndexedFeatureWord(1, [1, 2])],
                                       [IndexedFeatureWord(1, [2, 2])], None)
        self.assertNotEquals(feature_1, feature_4)
        self.assertNotEquals(feature_1, feature_3)
        self.assertFalse(feature_1.__eq__(0))
        self.assertEquals(feature_1, feature_2)