def test_as_training_data_error(self): with self.assertRaises(ValueError): feature = IndexedPairFeature([ IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(4, [1, 2, 6]) ], [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(3, [5])], None) feature.as_training_data() with self.assertRaises(ValueError): self.feature.as_training_data(mode="words+character")
class TestIndexedPairFeature(DuplicateTestCase): def setUp(self): super(TestIndexedPairFeature, self).setUp() self.feature = IndexedPairFeature([ IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(2, [3, 4]), IndexedFeatureWord(3, [5]), IndexedFeatureWord(5, [1, 4, 1]), IndexedFeatureWord(4, [1, 2, 6]) ], [ IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(8, [3, 1, 2, 1]), IndexedFeatureWord(2, [3, 4]), IndexedFeatureWord(3, [5]) ], [0, 1]) def test_get_lengths(self): assert self.feature.get_lengths() == { "num_sentence_words": 5, 'num_word_characters': 4 } def test_pad_adds_padding_words(self): self.feature.pad({"num_sentence_words": 6, 'num_word_characters': 5}) first_sent_word_idxs, second_sent_word_idxs = self.feature.get_int_word_indices( ) first_sent_char_idxs, second_sent_char_idxs = self.feature.get_int_char_indices( ) assert first_sent_word_idxs == [1, 2, 3, 5, 4, 0] assert second_sent_word_idxs == [1, 8, 2, 3, 0, 0] assert first_sent_char_idxs == [[1, 2, 0, 0, 0], [3, 4, 0, 0, 0], [5, 0, 0, 0, 0], [1, 4, 1, 0, 0], [1, 2, 6, 0, 0], [0, 0, 0, 0, 0]] assert second_sent_char_idxs == [[1, 2, 0, 0, 0], [3, 1, 2, 1, 0], [3, 4, 0, 0, 0], [5, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]] assert self.feature.label == [0, 1] def test_pad_truncates(self): self.feature.pad({"num_sentence_words": 2, 'num_word_characters': 3}) first_sent_word_idxs, second_sent_word_idxs = self.feature.get_int_word_indices( ) first_sent_char_idxs, second_sent_char_idxs = self.feature.get_int_char_indices( ) assert first_sent_word_idxs == [1, 2] assert second_sent_word_idxs == [1, 8] assert first_sent_char_idxs == [[1, 2, 0], [3, 4, 0]] assert second_sent_char_idxs == [[1, 2, 0], [3, 1, 2]] assert self.feature.label == [0, 1] def test_pad_general(self): self.feature.pad(self.feature.get_lengths()) first_sent_word_idxs, second_sent_word_idxs = self.feature.get_int_word_indices( ) first_sent_char_idxs, second_sent_char_idxs = self.feature.get_int_char_indices( ) assert first_sent_word_idxs == [1, 2, 3, 5, 4] assert second_sent_word_idxs == [1, 8, 2, 3, 0] assert first_sent_char_idxs == [[1, 2, 0, 0], [3, 4, 0, 0], [5, 0, 0, 0], [1, 4, 1, 0], [1, 2, 6, 0]] assert second_sent_char_idxs == [[1, 2, 0, 0], [3, 1, 2, 1], [3, 4, 0, 0], [5, 0, 0, 0], [0, 0, 0, 0]] assert self.feature.label == [0, 1] def test_as_training_data_produces_correct_numpy_arrays(self): self.feature.pad({'num_sentence_words': 3, 'num_word_characters': 2}) inputs, label = self.feature.as_training_data() assert_allclose(label[0], np.asarray([0, 1])) assert len(inputs) == 2 assert_allclose(inputs[0], np.asarray([1, 2, 3])) assert_allclose(inputs[1], np.asarray([1, 8, 2])) inputs, label = self.feature.as_training_data(mode="character") assert_allclose(label[0], np.asarray([0, 1])) assert len(inputs) == 2 assert_allclose(inputs[0], np.asarray([[1, 2], [3, 4], [5, 0]])) assert_allclose(inputs[1], np.asarray([[1, 2], [3, 1], [3, 4]])) inputs, label = self.feature.as_training_data(mode="word+character") assert_allclose(label[0], np.asarray([0, 1])) assert len(inputs) == 4 assert_allclose(inputs[0], np.asarray([1, 2, 3])) assert_allclose(inputs[1], np.asarray([[1, 2], [3, 4], [5, 0]])) assert_allclose(inputs[2], np.asarray([1, 8, 2])) assert_allclose(inputs[3], np.asarray([[1, 2], [3, 1], [3, 4]])) def test_as_training_data_error(self): with self.assertRaises(ValueError): feature = IndexedPairFeature([ IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(4, [1, 2, 6]) ], [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(3, [5])], None) feature.as_training_data() with self.assertRaises(ValueError): self.feature.as_training_data(mode="words+character") def test_as_testing_data_produces_correct_numpy_arrays(self): self.feature.pad({'num_sentence_words': 4, 'num_word_characters': 2}) inputs, labels = self.feature.as_testing_data() assert len(labels) == 0 assert len(inputs) == 2 assert_allclose(inputs[0], np.asarray([1, 2, 3, 5])) assert_allclose(inputs[1], np.asarray([1, 8, 2, 3])) inputs, label = self.feature.as_training_data(mode="character") assert len(labels) == 0 assert len(inputs) == 2 assert_allclose(inputs[0], np.asarray([[1, 2], [3, 4], [5, 0], [1, 4]])) assert_allclose(inputs[1], np.asarray([[1, 2], [3, 1], [3, 4], [5, 0]])) inputs, label = self.feature.as_training_data(mode="word+character") assert len(labels) == 0 assert len(inputs) == 4 assert_allclose(inputs[0], np.asarray([1, 2, 3, 5])) assert_allclose(inputs[1], np.asarray([[1, 2], [3, 4], [5, 0], [1, 4]])) assert_allclose(inputs[2], np.asarray([1, 8, 2, 3])) assert_allclose(inputs[3], np.asarray([[1, 2], [3, 1], [3, 4], [5, 0]])) def test_as_testing_data_error(self): with self.assertRaises(ValueError): self.feature.as_testing_data(mode="words+character") def test_equals(self): feature_1 = IndexedPairFeature( [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(4, [1, 2, 6])], [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(3, [5])], None) feature_2 = IndexedPairFeature( [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(4, [1, 2, 6])], [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(3, [5])], None) feature_3 = IndexedPairFeature( [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(1, [2, 2])], [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(3, [5])], None) feature_4 = IndexedPairFeature([IndexedFeatureWord(1, [1, 2])], [IndexedFeatureWord(1, [2, 2])], None) self.assertNotEquals(feature_1, feature_4) self.assertNotEquals(feature_1, feature_3) self.assertFalse(feature_1.__eq__(0)) self.assertEquals(feature_1, feature_2) def test_less_than(self): feature_1 = IndexedPairFeature( [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(4, [1, 2, 6])], [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(3, [5])], None) feature_2 = IndexedPairFeature( [IndexedFeatureWord(1, [1, 2]), IndexedFeatureWord(4, [1, 2, 6])], [IndexedFeatureWord(2, [2, 2]), IndexedFeatureWord(3, [5])], None) feature_3 = IndexedPairFeature([IndexedFeatureWord(1, [1, 2])], [IndexedFeatureWord(1, [2, 2])], None) self.assertFalse(feature_1.__lt__(0)) self.assertFalse(feature_2.__lt__(feature_1)) self.assertLess(feature_1, feature_2) self.assertLess(feature_3, feature_2)