def test_serialize(): # Sample dataset dataset = get_dataset() # Iterator it = LtrIterator(dataset, repeat=True, shuffle=False) # Set up serializer serializer = DictionarySerializer() # Serialize it.serialize(serializer) print(serializer.target['epoch']) # Perform one epoch iteration it.next() it.next() it.next() # Before serializing, our epoch variable should now be different assert_not_equal(serializer.target['epoch'], it.epoch) # After serializing it should be equal again it.serialize(serializer) assert_equal(serializer.target['epoch'], it.epoch)
def test_linear_network(): # To ensure repeatability of experiments np.random.seed(1042) # Load data set dataset = get_dataset(True) iterator = LtrIterator(dataset, repeat=True, shuffle=True) eval_iterator = LtrIterator(dataset, repeat=False, shuffle=False) # Create neural network with chainer and apply our loss function predictor = links.Linear(None, 1) loss = Ranker(predictor, listnet) # Build optimizer, updater and trainer optimizer = optimizers.Adam(alpha=0.2) optimizer.setup(loss) updater = training.StandardUpdater(iterator, optimizer) trainer = training.Trainer(updater, (10, 'epoch')) # Evaluate loss before training before_loss = eval(loss, eval_iterator) # Train neural network trainer.run() # Evaluate loss after training after_loss = eval(loss, eval_iterator) # Assert precomputed values assert_almost_equal(before_loss, 0.26958397) assert_almost_equal(after_loss, 0.2326711)
def test_len(): # Get sample data set dataset = get_dataset() # Assert the len operator works assert_equal(len(dataset), 3) assert_equal(len(dataset), dataset.nr_queries) assert_equal(len(dataset[0]), 1)
def test_get_sample_out_of_range(): # Get sample data set dataset = get_dataset() # Raise an exception by trying to get an element out of range item = dataset[-1] # This state should never be reached assert_true(False)
def test_normalize_false(): # Get sample data set dataset = get_dataset(normalize=False) # Check every feature has a correct maximum and minimum for i in range(len(dataset)): per_feature_max = np.max(dataset[i].feature_vectors, axis=0) assert_not_equal(np.min(per_feature_max), 1.0) per_feature_min = np.min(dataset[i].feature_vectors, axis=0) print(per_feature_min) assert_not_equal(np.max(per_feature_min), 0.0)
def test_normalize_true(): # Get sample data set dataset = get_dataset(normalize=True) # Check every feature has a correct maximum and minimum for i in range(len(dataset)): per_feature_max = np.max(dataset[i].feature_vectors, axis=0) # Maximum can be zero, in cases where the data has no range to normalize # by (i.e. when all features are always zero) assert_in(np.min(per_feature_max), (0.0, 1.0)) per_feature_min = np.min(dataset[i].feature_vectors, axis=0) assert_equal(np.max(per_feature_min), 0.0)
def test_repeat_true(): # Sample dataset dataset = get_dataset() # Iterator it = LtrIterator(dataset, repeat=True, shuffle=False) counter = 0 while it.epoch_detail <= 2.0: counter += 1 it.next() assert_equal(counter, 7)
def test_iterations(): # Sample dataset dataset = get_dataset() # Iterator it = LtrIterator(dataset, repeat=False, shuffle=False) assert isinstance(it, Iterator) assert isinstance(it, LtrIterator) # Get all items from the iterator and check their values items = list(it) assert_equal(len(items), 3) assert_equal(len(items[0]), 6) assert_equal(len(items[1]), 9) assert_equal(len(items[2]), 10)
def test_repeat_false(): # Sample dataset dataset = get_dataset() # Iterator it = LtrIterator(dataset, repeat=False, shuffle=False) # Attempt to iterate beyond the dataset size with repeat set to False counter = 0 while it.epoch_detail <= 2.0: counter += 1 it.next() # We should never reach this state, a StopIteration should've been raised assert_true(False)
def test_get_sample(): # Get sample data set dataset = get_dataset() # Assert that splitting per item works assert_equal(dataset[0].feature_vectors.shape, (6, 45)) assert_equal(dataset[0].relevance_scores.shape, (6, 1)) assert_equal(dataset[0].nr_queries, 1) assert_equal(dataset[0].query_ids, ['1']) assert_equal(dataset[1].feature_vectors.shape, (9, 45)) assert_equal(dataset[1].relevance_scores.shape, (9, 1)) assert_equal(dataset[1].nr_queries, 1) assert_equal(dataset[1].query_ids, ['16']) assert_equal(dataset[2].feature_vectors.shape, (10, 45)) assert_equal(dataset[2].relevance_scores.shape, (10, 1)) assert_equal(dataset[2].nr_queries, 1) assert_equal(dataset[2].query_ids, ['63'])
def test_shuffle_false(): # Sample dataset dataset = get_dataset() # Seed randomness for repeatability np.random.seed(4100) # Iterator it = LtrIterator(dataset, repeat=True, shuffle=False) assert_equal(len(it.next()), 6) assert_equal(len(it.next()), 9) assert_equal(len(it.next()), 10) assert_equal(len(it.next()), 6) assert_equal(len(it.next()), 9) assert_equal(len(it.next()), 10) assert_equal(len(it.next()), 6) assert_equal(len(it.next()), 9) assert_equal(len(it.next()), 10)
def test_save_and_load(): # Get sample data set dataset = get_dataset() # Get in-memory binary handle with BytesIO() as handle: # Save binary to handle dataset.save(handle) handle.seek(0) # Load binary from handle dataset2 = LtrDataset.load(handle) # Assert that everything loaded correctly assert_true( np.array_equal(dataset.feature_vectors, dataset2.feature_vectors)) assert_true( np.array_equal(dataset.relevance_scores, dataset2.relevance_scores)) assert_true( np.array_equal(dataset.query_pointer, dataset2.query_pointer)) assert_true(np.array_equal(dataset.query_ids, dataset2.query_ids))
def test_slicing(): # Get sample data set dataset = get_dataset() # Grab a slice dataset_slice = dataset[0:2] # Assert that the slice indexed the correct elements assert_equal(dataset_slice[0].feature_vectors.shape, (6, 45)) assert_equal(dataset_slice[0].relevance_scores.shape, (6, 1)) assert_equal(dataset_slice[0].nr_queries, 1) assert_equal(dataset_slice[0].query_ids, ['1']) assert_equal(dataset_slice[1].feature_vectors.shape, (9, 45)) assert_equal(dataset_slice[1].relevance_scores.shape, (9, 1)) assert_equal(dataset_slice[1].nr_queries, 1) assert_equal(dataset_slice[1].query_ids, ['16']) assert_equal(len(dataset_slice), 2) # Grab another slice dataset_slice = dataset[1:3] # Assert that the slice indexed the correct elements assert_equal(dataset_slice[0].feature_vectors.shape, (9, 45)) assert_equal(dataset_slice[0].relevance_scores.shape, (9, 1)) assert_equal(dataset_slice[0].nr_queries, 1) assert_equal(dataset_slice[0].query_ids, ['16']) assert_equal(dataset_slice[1].feature_vectors.shape, (10, 45)) assert_equal(dataset_slice[1].relevance_scores.shape, (10, 1)) assert_equal(dataset_slice[1].nr_queries, 1) assert_equal(dataset_slice[1].query_ids, ['63']) assert_equal(len(dataset_slice), 2)