def test_as_training_data_error(self): with self.assertRaises(ValueError): feature = IndexedPairFeature([ IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(4, [1, 2, 6]) ], [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(3, [5])], None) feature.as_training_data() with self.assertRaises(ValueError): self.feature.as_training_data(mode="words+character")
def setUp(self): super(TestIndexedPairFeature, self).setUp() self.feature = IndexedPairFeature([ IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(2, [3, 4]), IndexedFeatureWord(3, [5]), IndexedFeatureWord(5, [1, 4, 1]), IndexedFeatureWord(4, [1, 2, 6]) ], [ IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(8, [3, 1, 2, 1]), IndexedFeatureWord(2, [3, 4]), IndexedFeatureWord(3, [5]) ], [0, 1])
def setUp(self): super(TestIndexedDataset, self).setUp() self.features = [IndexedPairFeature([IndexedFeatureWord(1, [1, 5]), IndexedFeatureWord(2, [2, 1]), IndexedFeatureWord(3, [1, 4, 1])], [IndexedFeatureWord(2, [2, 1]), IndexedFeatureWord(3, [1, 4, 1])], [0, 1]), IndexedPairFeature([IndexedFeatureWord(3, [1, 4, 1]), IndexedFeatureWord(1, [1, 5])], [IndexedFeatureWord(3, [1, 4, 1]), IndexedFeatureWord(1, [1, 5]), IndexedFeatureWord(3, [1, 4, 1]), IndexedFeatureWord(2, [2, 1])], [1, 0])] self.indexed_dataset = IndexedDataset(self.features)
def test_sort(self): # lengths: 3, 4, 1, 2, 2 sorted_features = [IndexedPairFeature([IndexedFeatureWord(3, [1, 4, 1]), IndexedFeatureWord(1, [1, 5])], [IndexedFeatureWord(3, [1, 4, 1]), IndexedFeatureWord(1, [1, 5]), IndexedFeatureWord(3, [1, 4, 1]), IndexedFeatureWord(2, [2, 1])], [1, 0]), IndexedPairFeature([IndexedFeatureWord(1, [1, 5]), IndexedFeatureWord(2, [2, 1]), IndexedFeatureWord(3, [1, 4, 1])], [IndexedFeatureWord(2, [2, 1]), IndexedFeatureWord(3, [1, 4, 1])], [0, 1])] self.assertNotEqual(sorted_features, self.indexed_dataset.features) self.indexed_dataset.sort() self.assertEquals(sorted_features, self.indexed_dataset.features)
def test_less_than(self): feature_1 = IndexedPairFeature( [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(4, [1, 2, 6])], [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(3, [5])], None) feature_2 = IndexedPairFeature( [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(4, [1, 2, 6])], [IndexedFeatureWord(2, [2, 2]), IndexedFeatureWord(3, [5])], None) feature_3 = IndexedPairFeature([IndexedFeatureWord(1, [1, 2])], [IndexedFeatureWord(1, [2, 2])], None) self.assertFalse(feature_1.__lt__(0)) self.assertFalse(feature_2.__lt__(feature_1)) self.assertLess(feature_1, feature_2) self.assertLess(feature_3, feature_2)
def test_equals(self): feature_1 = IndexedPairFeature( [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(4, [1, 2, 6])], [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(3, [5])], None) feature_2 = IndexedPairFeature( [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(4, [1, 2, 6])], [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(3, [5])], None) feature_3 = IndexedPairFeature( [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(1, [2, 2])], [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(3, [5])], None) feature_4 = IndexedPairFeature([IndexedFeatureWord(1, [1, 2])], [IndexedFeatureWord(1, [2, 2])], None) self.assertNotEquals(feature_1, feature_4) self.assertNotEquals(feature_1, feature_3) self.assertFalse(feature_1.__eq__(0)) self.assertEquals(feature_1, feature_2)
def test_as_testing_data(self): features = [IndexedPairFeature([IndexedFeatureWord(1, [1, 4, 4]), IndexedFeatureWord(2, [2, 3]), IndexedFeatureWord(3, [5, 1])], [IndexedFeatureWord(2, [2, 3]), IndexedFeatureWord(3, [5, 1])], None), IndexedPairFeature([IndexedFeatureWord(3, [5, 1]), IndexedFeatureWord(1, [1, 4, 4])], [IndexedFeatureWord(3, [5, 1]), IndexedFeatureWord(1, [1, 4, 4]), IndexedFeatureWord(3, [5, 1]), IndexedFeatureWord(2, [2, 3])], None)] indexed_dataset = IndexedDataset(features) indexed_dataset.pad_features(indexed_dataset.max_lengths()) inputs, labels = indexed_dataset.as_testing_data() assert len(labels) == 0 first_sentence, second_sentence = inputs[0] assert_allclose(first_sentence, np.array([1, 2, 3, 0])) assert_allclose(second_sentence, np.array([2, 3, 0, 0])) first_sentence, second_sentence = inputs[1] assert_allclose(first_sentence, np.array([3, 1, 0, 0])) assert_allclose(second_sentence, np.array([3, 1, 3, 2])) inputs, labels = indexed_dataset.as_testing_data(mode="character") assert len(labels) == 0 first_sentence, second_sentence = inputs[0] assert_allclose(first_sentence, np.array([[1, 4, 4], [2, 3, 0], [5, 1, 0], [0, 0, 0]])) assert_allclose(second_sentence, np.array([[2, 3, 0], [5, 1, 0], [0, 0, 0], [0, 0, 0]])) first_sentence, second_sentence = inputs[1] assert_allclose(first_sentence, np.array([[5, 1, 0], [1, 4, 4], [0, 0, 0], [0, 0, 0]])) assert_allclose(second_sentence, np.array([[5, 1, 0], [1, 4, 4], [5, 1, 0], [2, 3, 0]])) inputs, labels = indexed_dataset.as_testing_data(mode="word+character") assert len(labels) == 0 (first_sentence_words, first_sentence_characters, second_sentence_words, second_sentence_characters) = inputs[0] assert_allclose(first_sentence_words, np.array([1, 2, 3, 0])) assert_allclose(second_sentence_words, np.array([2, 3, 0, 0])) assert_allclose(first_sentence_characters, np.array([[1, 4, 4], [2, 3, 0], [5, 1, 0], [0, 0, 0]])) assert_allclose(second_sentence_characters, np.array([[2, 3, 0], [5, 1, 0], [0, 0, 0], [0, 0, 0]])) (first_sentence_words, first_sentence_characters, second_sentence_words, second_sentence_characters) = inputs[1] assert_allclose(first_sentence_words, np.array([3, 1, 0, 0])) assert_allclose(second_sentence_words, np.array([3, 1, 3, 2])) assert_allclose(first_sentence_characters, np.array([[5, 1, 0], [1, 4, 4], [0, 0, 0], [0, 0, 0]])) assert_allclose(second_sentence_characters, np.array([[5, 1, 0], [1, 4, 4], [5, 1, 0], [2, 3, 0]])) with self.assertRaises(ValueError): indexed_dataset.as_testing_data(mode="char")
class TestIndexedPairFeature(DuplicateTestCase): def setUp(self): super(TestIndexedPairFeature, self).setUp() self.feature = IndexedPairFeature([ IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(2, [3, 4]), IndexedFeatureWord(3, [5]), IndexedFeatureWord(5, [1, 4, 1]), IndexedFeatureWord(4, [1, 2, 6]) ], [ IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(8, [3, 1, 2, 1]), IndexedFeatureWord(2, [3, 4]), IndexedFeatureWord(3, [5]) ], [0, 1]) def test_get_lengths(self): assert self.feature.get_lengths() == { "num_sentence_words": 5, 'num_word_characters': 4 } def test_pad_adds_padding_words(self): self.feature.pad({"num_sentence_words": 6, 'num_word_characters': 5}) first_sent_word_idxs, second_sent_word_idxs = self.feature.get_int_word_indices( ) first_sent_char_idxs, second_sent_char_idxs = self.feature.get_int_char_indices( ) assert first_sent_word_idxs == [1, 2, 3, 5, 4, 0] assert second_sent_word_idxs == [1, 8, 2, 3, 0, 0] assert first_sent_char_idxs == [[1, 2, 0, 0, 0], [3, 4, 0, 0, 0], [5, 0, 0, 0, 0], [1, 4, 1, 0, 0], [1, 2, 6, 0, 0], [0, 0, 0, 0, 0]] assert second_sent_char_idxs == [[1, 2, 0, 0, 0], [3, 1, 2, 1, 0], [3, 4, 0, 0, 0], [5, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]] assert self.feature.label == [0, 1] def test_pad_truncates(self): self.feature.pad({"num_sentence_words": 2, 'num_word_characters': 3}) first_sent_word_idxs, second_sent_word_idxs = self.feature.get_int_word_indices( ) first_sent_char_idxs, second_sent_char_idxs = self.feature.get_int_char_indices( ) assert first_sent_word_idxs == [1, 2] assert second_sent_word_idxs == [1, 8] assert first_sent_char_idxs == [[1, 2, 0], [3, 4, 0]] assert second_sent_char_idxs == [[1, 2, 0], [3, 1, 2]] assert self.feature.label == [0, 1] def test_pad_general(self): self.feature.pad(self.feature.get_lengths()) first_sent_word_idxs, second_sent_word_idxs = self.feature.get_int_word_indices( ) first_sent_char_idxs, second_sent_char_idxs = self.feature.get_int_char_indices( ) assert first_sent_word_idxs == [1, 2, 3, 5, 4] assert second_sent_word_idxs == [1, 8, 2, 3, 0] assert first_sent_char_idxs == [[1, 2, 0, 0], [3, 4, 0, 0], [5, 0, 0, 0], [1, 4, 1, 0], [1, 2, 6, 0]] assert second_sent_char_idxs == [[1, 2, 0, 0], [3, 1, 2, 1], [3, 4, 0, 0], [5, 0, 0, 0], [0, 0, 0, 0]] assert self.feature.label == [0, 1] def test_as_training_data_produces_correct_numpy_arrays(self): self.feature.pad({'num_sentence_words': 3, 'num_word_characters': 2}) inputs, label = self.feature.as_training_data() assert_allclose(label[0], np.asarray([0, 1])) assert len(inputs) == 2 assert_allclose(inputs[0], np.asarray([1, 2, 3])) assert_allclose(inputs[1], np.asarray([1, 8, 2])) inputs, label = self.feature.as_training_data(mode="character") assert_allclose(label[0], np.asarray([0, 1])) assert len(inputs) == 2 assert_allclose(inputs[0], np.asarray([[1, 2], [3, 4], [5, 0]])) assert_allclose(inputs[1], np.asarray([[1, 2], [3, 1], [3, 4]])) inputs, label = self.feature.as_training_data(mode="word+character") assert_allclose(label[0], np.asarray([0, 1])) assert len(inputs) == 4 assert_allclose(inputs[0], np.asarray([1, 2, 3])) assert_allclose(inputs[1], np.asarray([[1, 2], [3, 4], [5, 0]])) assert_allclose(inputs[2], np.asarray([1, 8, 2])) assert_allclose(inputs[3], np.asarray([[1, 2], [3, 1], [3, 4]])) def test_as_training_data_error(self): with self.assertRaises(ValueError): feature = IndexedPairFeature([ IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(4, [1, 2, 6]) ], [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(3, [5])], None) feature.as_training_data() with self.assertRaises(ValueError): self.feature.as_training_data(mode="words+character") def test_as_testing_data_produces_correct_numpy_arrays(self): self.feature.pad({'num_sentence_words': 4, 'num_word_characters': 2}) inputs, labels = self.feature.as_testing_data() assert len(labels) == 0 assert len(inputs) == 2 assert_allclose(inputs[0], np.asarray([1, 2, 3, 5])) assert_allclose(inputs[1], np.asarray([1, 8, 2, 3])) inputs, label = self.feature.as_training_data(mode="character") assert len(labels) == 0 assert len(inputs) == 2 assert_allclose(inputs[0], np.asarray([[1, 2], [3, 4], [5, 0], [1, 4]])) assert_allclose(inputs[1], np.asarray([[1, 2], [3, 1], [3, 4], [5, 0]])) inputs, label = self.feature.as_training_data(mode="word+character") assert len(labels) == 0 assert len(inputs) == 4 assert_allclose(inputs[0], np.asarray([1, 2, 3, 5])) assert_allclose(inputs[1], np.asarray([[1, 2], [3, 4], [5, 0], [1, 4]])) assert_allclose(inputs[2], np.asarray([1, 8, 2, 3])) assert_allclose(inputs[3], np.asarray([[1, 2], [3, 1], [3, 4], [5, 0]])) def test_as_testing_data_error(self): with self.assertRaises(ValueError): self.feature.as_testing_data(mode="words+character") def test_equals(self): feature_1 = IndexedPairFeature( [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(4, [1, 2, 6])], [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(3, [5])], None) feature_2 = IndexedPairFeature( [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(4, [1, 2, 6])], [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(3, [5])], None) feature_3 = IndexedPairFeature( [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(1, [2, 2])], [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(3, [5])], None) feature_4 = IndexedPairFeature([IndexedFeatureWord(1, [1, 2])], [IndexedFeatureWord(1, [2, 2])], None) self.assertNotEquals(feature_1, feature_4) self.assertNotEquals(feature_1, feature_3) self.assertFalse(feature_1.__eq__(0)) self.assertEquals(feature_1, feature_2) def test_less_than(self): feature_1 = IndexedPairFeature( [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(4, [1, 2, 6])], [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(3, [5])], None) feature_2 = IndexedPairFeature( [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(4, [1, 2, 6])], [IndexedFeatureWord(2, [2, 2]), IndexedFeatureWord(3, [5])], None) feature_3 = IndexedPairFeature([IndexedFeatureWord(1, [1, 2])], [IndexedFeatureWord(1, [2, 2])], None) self.assertFalse(feature_1.__lt__(0)) self.assertFalse(feature_2.__lt__(feature_1)) self.assertLess(feature_1, feature_2) self.assertLess(feature_3, feature_2)