def test_bigger_len(self):
        X, y = self.get_dataset(n_samples=9888, n_positives=10)

        gen = BalancedGenerator(X,
                                y,
                                positive_sample_perc=0.5,
                                np_ratio=1,
                                negative_perc=1)
        self.assertEqual(gen.__len__(), np.ceil(9878 / 5))
    def test_len(self):
        X, y = self.get_dataset(n_samples=30, n_positives=5)

        gen = BalancedGenerator(X,
                                y,
                                positive_sample_perc=0.1,
                                np_ratio=1,
                                negative_perc=1)
        self.assertEqual(gen.__len__(), 25)
    def test_negative_perc_len(self):
        n_samples = 1000000
        n_positives = 300
        X, y = self.get_dataset(n_samples, n_positives)

        gen = BalancedGenerator(X,
                                y,
                                positive_sample_perc=0.6,
                                np_ratio=1,
                                negative_perc=0.7)
        self.assertEqual(gen.__len__(),
                         np.ceil(((n_samples - n_positives) * 0.7) / 180))
    def test_bootstrapping_len(self):
        n_samples = 1000000
        n_positives = 300
        X, y = self.get_dataset(n_samples, n_positives)

        gen = BalancedGenerator(X,
                                y,
                                positive_sample_perc=1,
                                np_ratio=1,
                                negative_perc=1)
        self.assertEqual(gen.__len__(), np.ceil(
            (n_samples - n_positives) / 300))
 def test_get_item(self):
     n_samples = 100
     n_positives = 10
     X, y = self.get_dataset(n_samples, n_positives)
     dataset = list(zip(X, y))
     gen = BalancedGenerator(X,
                             y,
                             positive_sample_perc=1,
                             np_ratio=1,
                             negative_perc=1)
     batch_X, batch_y = gen.__getitem__(0)
     batch = list(zip(batch_X, batch_y))
     self.assertDatasetContainsBatch(dataset, batch)
 def test_get_all_items(self):
     print("started test")
     n_samples = 100
     n_positives = 10
     X, y = self.get_dataset(n_samples, n_positives)
     dataset = list(zip(X, y))
     gen = BalancedGenerator(X,
                             y,
                             positive_sample_perc=1,
                             np_ratio=1,
                             negative_perc=0.5)
     for i in range(gen.__len__()):
         batch_X, batch_y = gen.__getitem__(i)
         batch = list(zip(batch_X, batch_y))
         self.assertDatasetContainsBatch(dataset, batch)