def test_bigger_len(self): X, y = self.get_dataset(n_samples=9888, n_positives=10) gen = BalancedGenerator(X, y, positive_sample_perc=0.5, np_ratio=1, negative_perc=1) self.assertEqual(gen.__len__(), np.ceil(9878 / 5))
def test_len(self): X, y = self.get_dataset(n_samples=30, n_positives=5) gen = BalancedGenerator(X, y, positive_sample_perc=0.1, np_ratio=1, negative_perc=1) self.assertEqual(gen.__len__(), 25)
def test_negative_perc_len(self): n_samples = 1000000 n_positives = 300 X, y = self.get_dataset(n_samples, n_positives) gen = BalancedGenerator(X, y, positive_sample_perc=0.6, np_ratio=1, negative_perc=0.7) self.assertEqual(gen.__len__(), np.ceil(((n_samples - n_positives) * 0.7) / 180))
def test_bootstrapping_len(self): n_samples = 1000000 n_positives = 300 X, y = self.get_dataset(n_samples, n_positives) gen = BalancedGenerator(X, y, positive_sample_perc=1, np_ratio=1, negative_perc=1) self.assertEqual(gen.__len__(), np.ceil( (n_samples - n_positives) / 300))
def test_get_item(self): n_samples = 100 n_positives = 10 X, y = self.get_dataset(n_samples, n_positives) dataset = list(zip(X, y)) gen = BalancedGenerator(X, y, positive_sample_perc=1, np_ratio=1, negative_perc=1) batch_X, batch_y = gen.__getitem__(0) batch = list(zip(batch_X, batch_y)) self.assertDatasetContainsBatch(dataset, batch)
def test_get_all_items(self): print("started test") n_samples = 100 n_positives = 10 X, y = self.get_dataset(n_samples, n_positives) dataset = list(zip(X, y)) gen = BalancedGenerator(X, y, positive_sample_perc=1, np_ratio=1, negative_perc=0.5) for i in range(gen.__len__()): batch_X, batch_y = gen.__getitem__(i) batch = list(zip(batch_X, batch_y)) self.assertDatasetContainsBatch(dataset, batch)