Exemple #1
0
def testMaxModelsOnDisc2(ensemble_backend):
    # Test for Extreme scenarios
    # Make sure that the best predictions are kept
    ensbuilder = EnsembleBuilder(
        backend=ensemble_backend,
        dataset_name="TEST",
        task_type=BINARY_CLASSIFICATION,
        metric=roc_auc,
        seed=0,  # important to find the test files
        ensemble_nbest=50,
        max_models_on_disc=10000.0,
    )
    ensbuilder.read_preds = {}
    for i in range(50):
        ensbuilder.read_scores['pred' + str(i)] = {
            'ens_score': i * 10,
            'num_run': i,
            'loaded': 1,
            "seed": 1,
            "disc_space_cost_mb": 50 * i,
        }
        ensbuilder.read_preds['pred' + str(i)] = {Y_ENSEMBLE: True}
    sel_keys = ensbuilder.get_n_best_preds()
    assert ['pred49', 'pred48', 'pred47'] == sel_keys

    # Make sure at least one model is kept alive
    ensbuilder.max_models_on_disc = 0.0
    sel_keys = ensbuilder.get_n_best_preds()
    assert ['pred49'] == sel_keys
Exemple #2
0
    def testPerformanceRangeThresholdMaxBest(self):
        to_test = ((0.0, 1, 1), (0.0, 1.0, 4), (0.1, 2, 2), (0.3, 4, 3),
                   (0.5, 1, 1), (0.6, 10, 2), (0.8, 0.5, 1), (1, 1.0, 1))
        for performance_range_threshold, ensemble_nbest, exp in to_test:
            ensbuilder = EnsembleBuilder(
                backend=self.backend,
                dataset_name="TEST",
                task_type=1,  # Binary Classification
                metric=roc_auc,
                limit=-1,  # not used,
                seed=0,  # important to find the test files
                ensemble_nbest=ensemble_nbest,
                performance_range_threshold=performance_range_threshold,
                max_models_on_disc=None,
            )
            ensbuilder.read_preds = {
                'A': {'ens_score': 1, 'num_run': 1, 0: True, 'loaded': -1, "seed": 1},
                'B': {'ens_score': 2, 'num_run': 2, 0: True, 'loaded': -1, "seed": 1},
                'C': {'ens_score': 3, 'num_run': 3, 0: True, 'loaded': -1, "seed": 1},
                'D': {'ens_score': 4, 'num_run': 4, 0: True, 'loaded': -1, "seed": 1},
                'E': {'ens_score': 5, 'num_run': 5, 0: True, 'loaded': -1, "seed": 1},
            }
            sel_keys = ensbuilder.get_n_best_preds()

            self.assertEqual(len(sel_keys), exp)
    def testFallBackNBest(self):

        ensbuilder = EnsembleBuilder(backend=self.backend,
                                    dataset_name="TEST",
                                    task_type=1,  #Binary Classification
                                    metric=roc_auc,
                                    limit=-1, # not used,
                                    seed=0, # important to find the test files
                                    ensemble_nbest=1
                                    )

        ensbuilder.read_ensemble_preds()

        filename = os.path.join(
            self.backend.temporary_directory,
            ".auto-sklearn/predictions_ensemble/predictions_ensemble_0_2.npy"
        )
        ensbuilder.read_preds[filename]["ens_score"] = -1

        filename = os.path.join(
            self.backend.temporary_directory,
            ".auto-sklearn/predictions_ensemble/predictions_ensemble_0_1.npy"
        )
        ensbuilder.read_preds[filename]["ens_score"] = -1

        sel_keys = ensbuilder.get_n_best_preds()

        fixture = os.path.join(
            self.backend.temporary_directory,
            ".auto-sklearn/predictions_ensemble/predictions_ensemble_0_1.npy"
        )
        self.assertEquals(sel_keys[0], fixture)
 def testGetValidTestPreds(self):
     
     ensbuilder = EnsembleBuilder(backend=self.backend, 
                                 dataset_name="TEST",
                                 task_type=1,  #Binary Classification
                                 metric=roc_auc,
                                 limit=-1, # not used,
                                 seed=0, # important to find the test files
                                 ensemble_nbest=1
                                 )
     
     ensbuilder.read_ensemble_preds()
     
     d2 = os.path.join(
         self.backend.temporary_directory,
         ".auto-sklearn/predictions_ensemble/predictions_ensemble_0_2.npy"
     )
     d1 = os.path.join(
         self.backend.temporary_directory,
         ".auto-sklearn/predictions_ensemble/predictions_ensemble_0_1.npy"
     )
     
     sel_keys = ensbuilder.get_n_best_preds()
     
     ensbuilder.get_valid_test_preds(selected_keys=sel_keys)
     
     # selected --> read valid and test predictions
     self.assertIsNotNone(ensbuilder.read_preds[d2][Y_VALID])
     self.assertIsNotNone(ensbuilder.read_preds[d2][Y_TEST])
     
     # not selected --> should still be None
     self.assertIsNone(ensbuilder.read_preds[d1][Y_VALID])
     self.assertIsNone(ensbuilder.read_preds[d1][Y_TEST])
    def testFallBackNBest(self):
        
        ensbuilder = EnsembleBuilder(backend=self.backend, 
                                    dataset_name="TEST",
                                    task_type=1,  #Binary Classification
                                    metric=roc_auc,
                                    limit=-1, # not used,
                                    seed=0, # important to find the test files
                                    ensemble_nbest=1
                                    )
        
        ensbuilder.read_ensemble_preds()

        filename = os.path.join(
            self.backend.temporary_directory,
            ".auto-sklearn/predictions_ensemble/predictions_ensemble_0_2.npy"
        )
        ensbuilder.read_preds[filename]["ens_score"] = -1

        filename = os.path.join(
            self.backend.temporary_directory,
            ".auto-sklearn/predictions_ensemble/predictions_ensemble_0_1.npy"
        )
        ensbuilder.read_preds[filename]["ens_score"] = -1
        
        sel_keys = ensbuilder.get_n_best_preds()

        fixture = os.path.join(
            self.backend.temporary_directory,
            ".auto-sklearn/predictions_ensemble/predictions_ensemble_0_1.npy"
        )
        self.assertEquals(sel_keys[0], fixture)
Exemple #6
0
    def testNBest(self):
        for ensemble_nbest, models_on_disc, exp in (
            (1, None, 1),
            (1.0, None, 2),
            (0.1, None, 1),
            (0.9, None, 1),
            (1, 2, 1),
            (2, 1, 1),
        ):
            ensbuilder = EnsembleBuilder(
                backend=self.backend,
                dataset_name="TEST",
                task_type=1,  # Binary Classification
                metric=roc_auc,
                limit=-1,  # not used,
                seed=0,  # important to find the test files
                ensemble_nbest=ensemble_nbest,
                max_models_on_disc=models_on_disc,
            )

            ensbuilder.score_ensemble_preds()
            sel_keys = ensbuilder.get_n_best_preds()

            self.assertEqual(len(sel_keys), exp)

            fixture = os.path.join(
                self.backend.temporary_directory,
                ".auto-sklearn/predictions_ensemble/predictions_ensemble_0_2_0.0.npy"
            )
            self.assertEqual(sel_keys[0], fixture)
    def testGetValidTestPreds(self):

        ensbuilder = EnsembleBuilder(backend=self.backend,
                                    dataset_name="TEST",
                                    task_type=1,  #Binary Classification
                                    metric=roc_auc,
                                    limit=-1, # not used,
                                    seed=0, # important to find the test files
                                    ensemble_nbest=1
                                    )

        ensbuilder.read_ensemble_preds()

        d2 = os.path.join(
            self.backend.temporary_directory,
            ".auto-sklearn/predictions_ensemble/predictions_ensemble_0_2.npy"
        )
        d1 = os.path.join(
            self.backend.temporary_directory,
            ".auto-sklearn/predictions_ensemble/predictions_ensemble_0_1.npy"
        )

        sel_keys = ensbuilder.get_n_best_preds()

        ensbuilder.get_valid_test_preds(selected_keys=sel_keys)

        # selected --> read valid and test predictions
        self.assertIsNotNone(ensbuilder.read_preds[d2][Y_VALID])
        self.assertIsNotNone(ensbuilder.read_preds[d2][Y_TEST])

        # not selected --> should still be None
        self.assertIsNone(ensbuilder.read_preds[d1][Y_VALID])
        self.assertIsNone(ensbuilder.read_preds[d1][Y_TEST])
Exemple #8
0
    def testEntireEnsembleBuilder(self):

        ensbuilder = EnsembleBuilder(
            backend=self.backend,
            dataset_name="TEST",
            task_type=1,  # Binary Classification
            metric=roc_auc,
            limit=-1,  # not used,
            seed=0,  # important to find the test files
            ensemble_nbest=2,
        )
        ensbuilder.SAVE2DISC = False

        ensbuilder.score_ensemble_preds()

        d2 = os.path.join(
            self.backend.temporary_directory,
            ".auto-sklearn/predictions_ensemble/predictions_ensemble_0_2_0.0.npy"
        )

        sel_keys = ensbuilder.get_n_best_preds()
        self.assertGreater(len(sel_keys), 0)

        ensemble = ensbuilder.fit_ensemble(selected_keys=sel_keys)
        print(ensemble, sel_keys)

        n_sel_valid, n_sel_test = ensbuilder.get_valid_test_preds(
            selected_keys=sel_keys)

        # both valid and test prediction files are available
        self.assertGreater(len(n_sel_valid), 0)
        self.assertEqual(n_sel_valid, n_sel_test)

        y_valid = ensbuilder.predict(
            set_="valid",
            ensemble=ensemble,
            selected_keys=n_sel_valid,
            n_preds=len(sel_keys),
            index_run=1,
        )
        y_test = ensbuilder.predict(
            set_="test",
            ensemble=ensemble,
            selected_keys=n_sel_test,
            n_preds=len(sel_keys),
            index_run=1,
        )

        # predictions for valid and test are the same
        # --> should results in the same predictions
        np.testing.assert_array_almost_equal(y_valid, y_test)

        # since d2 provides perfect predictions
        # it should get a higher weight
        # so that y_valid should be exactly y_valid_d2
        y_valid_d2 = ensbuilder.read_preds[d2][Y_VALID][:, 1]
        np.testing.assert_array_almost_equal(y_valid, y_valid_d2)
    def testEntireEnsembleBuilder(self):
        
        ensbuilder = EnsembleBuilder(
            backend=self.backend,
            dataset_name="TEST",
            task_type=1,  #Binary Classification
            metric=roc_auc,
            limit=-1, # not used,
            seed=0, # important to find the test files
            ensemble_nbest=2,
        )
        ensbuilder.SAVE2DISC = False
        
        ensbuilder.read_ensemble_preds()

        d2 = os.path.join(
            self.backend.temporary_directory,
            ".auto-sklearn/predictions_ensemble/predictions_ensemble_0_2.npy"
        )

        sel_keys = ensbuilder.get_n_best_preds()
        self.assertGreater(len(sel_keys), 0)
        
        ensemble = ensbuilder.fit_ensemble(selected_keys=sel_keys)
        print(ensemble, sel_keys)
        
        n_sel_valid, n_sel_test = ensbuilder.get_valid_test_preds(selected_keys=sel_keys)
        
        # both valid and test prediction files are available
        self.assertGreater(len(n_sel_valid), 0)
        self.assertEqual(n_sel_valid, n_sel_test)

        y_valid = ensbuilder.predict(
            set_="valid",
            ensemble=ensemble,
            selected_keys=n_sel_valid,
            n_preds=len(sel_keys),
            index_run=1,
        )
        y_test = ensbuilder.predict(
            set_="test",
            ensemble=ensemble,
            selected_keys=n_sel_test,
            n_preds=len(sel_keys),
            index_run=1,
        )

        # predictions for valid and test are the same
        # --> should results in the same predictions
        np.testing.assert_array_almost_equal(y_valid, y_test)

        # since d2 provides perfect predictions
        # it should get a higher weight
        # so that y_valid should be exactly y_valid_d2
        y_valid_d2 = ensbuilder.read_preds[d2][Y_VALID][:, 1]
        np.testing.assert_array_almost_equal(y_valid, y_valid_d2)
def testEntireEnsembleBuilder(ensemble_backend):

    ensbuilder = EnsembleBuilder(
        backend=ensemble_backend,
        dataset_name="TEST",
        task_type=BINARY_CLASSIFICATION,
        metric=roc_auc,
        seed=0,  # important to find the test files
        ensemble_nbest=2,
    )
    ensbuilder.SAVE2DISC = False

    ensbuilder.compute_loss_per_model()

    d2 = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy")

    sel_keys = ensbuilder.get_n_best_preds()
    assert len(sel_keys) > 0

    ensemble = ensbuilder.fit_ensemble(selected_keys=sel_keys)
    print(ensemble, sel_keys)

    n_sel_valid, n_sel_test = ensbuilder.get_valid_test_preds(
        selected_keys=sel_keys)

    # both valid and test prediction files are available
    assert len(n_sel_valid) > 0
    assert n_sel_valid == n_sel_test

    y_valid = ensbuilder.predict(
        set_="valid",
        ensemble=ensemble,
        selected_keys=n_sel_valid,
        n_preds=len(sel_keys),
        index_run=1,
    )
    y_test = ensbuilder.predict(
        set_="test",
        ensemble=ensemble,
        selected_keys=n_sel_test,
        n_preds=len(sel_keys),
        index_run=1,
    )

    # predictions for valid and test are the same
    # --> should results in the same predictions
    np.testing.assert_array_almost_equal(y_valid, y_test)

    # since d2 provides perfect predictions
    # it should get a higher weight
    # so that y_valid should be exactly y_valid_d2
    y_valid_d2 = ensbuilder.read_preds[d2][Y_VALID][:, 1]
    np.testing.assert_array_almost_equal(y_valid, y_valid_d2)
Exemple #11
0
def testPerformanceRangeThresholdMaxBest(ensemble_backend,
                                         performance_range_threshold,
                                         ensemble_nbest, exp):
    ensbuilder = EnsembleBuilder(
        backend=ensemble_backend,
        dataset_name="TEST",
        task_type=BINARY_CLASSIFICATION,
        metric=roc_auc,
        seed=0,  # important to find the test files
        ensemble_nbest=ensemble_nbest,
        performance_range_threshold=performance_range_threshold,
        max_models_on_disc=None,
    )
    ensbuilder.read_scores = {
        'A': {
            'ens_score': 1,
            'num_run': 1,
            'loaded': -1,
            "seed": 1
        },
        'B': {
            'ens_score': 2,
            'num_run': 2,
            'loaded': -1,
            "seed": 1
        },
        'C': {
            'ens_score': 3,
            'num_run': 3,
            'loaded': -1,
            "seed": 1
        },
        'D': {
            'ens_score': 4,
            'num_run': 4,
            'loaded': -1,
            "seed": 1
        },
        'E': {
            'ens_score': 5,
            'num_run': 5,
            'loaded': -1,
            "seed": 1
        },
    }
    ensbuilder.read_preds = {
        key: {key_2: True
              for key_2 in (Y_ENSEMBLE, Y_VALID, Y_TEST)}
        for key in ensbuilder.read_scores
    }
    sel_keys = ensbuilder.get_n_best_preds()

    assert len(sel_keys) == exp
Exemple #12
0
    def testGetValidTestPreds(self):

        ensbuilder = EnsembleBuilder(
            backend=self.backend,
            dataset_name="TEST",
            task_type=1,  # Binary Classification
            metric=roc_auc,
            limit=-1,  # not used,
            seed=0,  # important to find the test files
            ensemble_nbest=1)

        ensbuilder.score_ensemble_preds()

        d1 = os.path.join(
            self.backend.temporary_directory,
            ".auto-sklearn/predictions_ensemble/predictions_ensemble_0_1_0.0.npy"
        )
        d2 = os.path.join(
            self.backend.temporary_directory,
            ".auto-sklearn/predictions_ensemble/predictions_ensemble_0_2_0.0.npy"
        )
        d3 = os.path.join(
            self.backend.temporary_directory,
            ".auto-sklearn/predictions_ensemble/predictions_ensemble_0_3_100.0.npy"
        )

        sel_keys = ensbuilder.get_n_best_preds()
        self.assertEqual(len(sel_keys), 1)
        ensbuilder.get_valid_test_preds(selected_keys=sel_keys)

        # Number of read files should be three and
        # predictions_ensemble_0_4_0.0.npy must not be in there
        self.assertEqual(len(ensbuilder.read_preds), 3)
        self.assertNotIn(
            os.path.join(
                self.backend.temporary_directory,
                ".auto-sklearn/predictions_ensemble/predictions_ensemble_0_4_0.0.npy"
            ), ensbuilder.read_preds)

        # not selected --> should still be None
        self.assertIsNone(ensbuilder.read_preds[d1][Y_VALID])
        self.assertIsNone(ensbuilder.read_preds[d1][Y_TEST])
        self.assertIsNone(ensbuilder.read_preds[d3][Y_VALID])
        self.assertIsNone(ensbuilder.read_preds[d3][Y_TEST])

        # selected --> read valid and test predictions
        self.assertIsNotNone(ensbuilder.read_preds[d2][Y_VALID])
        self.assertIsNotNone(ensbuilder.read_preds[d2][Y_TEST])
Exemple #13
0
def testMaxModelsOnDisc(ensemble_backend, test_case, exp):
    ensemble_nbest = 4
    ensbuilder = EnsembleBuilder(
        backend=ensemble_backend,
        dataset_name="TEST",
        task_type=BINARY_CLASSIFICATION,
        metric=roc_auc,
        seed=0,  # important to find the test files
        ensemble_nbest=ensemble_nbest,
        max_models_on_disc=test_case,
    )

    with unittest.mock.patch('os.path.getsize') as mock:
        mock.return_value = 100 * 1024 * 1024
        ensbuilder.score_ensemble_preds()
        sel_keys = ensbuilder.get_n_best_preds()
        assert len(sel_keys) == exp, test_case
def testGetValidTestPreds(ensemble_backend):

    ensbuilder = EnsembleBuilder(
        backend=ensemble_backend,
        dataset_name="TEST",
        task_type=BINARY_CLASSIFICATION,
        metric=roc_auc,
        seed=0,  # important to find the test files
        ensemble_nbest=1)

    ensbuilder.compute_loss_per_model()

    # d1 is a dummt prediction. d2 and d3 have the same prediction with
    # different name. num_run=2 is selected when doing sorted()
    d1 = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy")
    d2 = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy")
    d3 = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy")

    sel_keys = ensbuilder.get_n_best_preds()
    assert len(sel_keys) == 1
    ensbuilder.get_valid_test_preds(selected_keys=sel_keys)

    # Number of read files should be three and
    # predictions_ensemble_0_4_0.0.npy must not be in there
    assert len(ensbuilder.read_preds) == 3
    assert os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_4_0.0/predictions_ensemble_0_4_0.0.npy"
    ) not in ensbuilder.read_preds

    # not selected --> should still be None
    assert ensbuilder.read_preds[d1][Y_VALID] is None
    assert ensbuilder.read_preds[d1][Y_TEST] is None
    assert ensbuilder.read_preds[d3][Y_VALID] is None
    assert ensbuilder.read_preds[d3][Y_TEST] is None

    # selected --> read valid and test predictions
    assert ensbuilder.read_preds[d2][Y_VALID] is not None
    assert ensbuilder.read_preds[d2][Y_TEST] is not None
Exemple #15
0
def testNBest(ensemble_backend, ensemble_nbest, max_models_on_disc, exp):
    ensbuilder = EnsembleBuilder(
        backend=ensemble_backend,
        dataset_name="TEST",
        task_type=BINARY_CLASSIFICATION,
        metric=roc_auc,
        seed=0,  # important to find the test files
        ensemble_nbest=ensemble_nbest,
        max_models_on_disc=max_models_on_disc,
    )

    ensbuilder.score_ensemble_preds()
    sel_keys = ensbuilder.get_n_best_preds()

    assert len(sel_keys) == exp

    fixture = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy")
    assert sel_keys[0] == fixture
Exemple #16
0
def testFallBackNBest(ensemble_backend):

    ensbuilder = EnsembleBuilder(
        backend=ensemble_backend,
        dataset_name="TEST",
        task_type=BINARY_CLASSIFICATION,
        metric=roc_auc,
        seed=0,  # important to find the test files
        ensemble_nbest=1)

    ensbuilder.score_ensemble_preds()
    print()
    print(ensbuilder.read_preds.keys())
    print(ensbuilder.read_scores.keys())
    print(ensemble_backend.temporary_directory)

    filename = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy")
    ensbuilder.read_scores[filename]["ens_score"] = -1

    filename = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy")
    ensbuilder.read_scores[filename]["ens_score"] = -1

    filename = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy")
    ensbuilder.read_scores[filename]["ens_score"] = -1

    sel_keys = ensbuilder.get_n_best_preds()

    fixture = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy")
    assert len(sel_keys) == 1
    assert sel_keys[0] == fixture
Exemple #17
0
    def testMaxModelsOnDisc(self):

        ensemble_nbest = 4
        for (test_case, exp) in [
                # If None, no reduction
            (None, 2),
                # If Int, limit only on exceed
            (4, 2),
            (1, 1),
                # If Float, translate float to # models.
                # below, mock of each file is 100 Mb and
                # 4 files .model and .npy (test/val/pred) exist
            (700.0, 1),
            (800.0, 2),
            (9999.0, 2),
        ]:
            ensbuilder = EnsembleBuilder(
                backend=self.backend,
                dataset_name="TEST",
                task_type=1,  # Binary Classification
                metric=roc_auc,
                limit=-1,  # not used,
                seed=0,  # important to find the test files
                ensemble_nbest=ensemble_nbest,
                max_models_on_disc=test_case,
            )

            with unittest.mock.patch('os.path.getsize') as mock:
                mock.return_value = 100 * 1024 * 1024
                ensbuilder.score_ensemble_preds()
                sel_keys = ensbuilder.get_n_best_preds()
                self.assertEqual(len(sel_keys), exp)

        # Test for Extreme scenarios
        # Make sure that the best predictions are kept
        ensbuilder = EnsembleBuilder(
            backend=self.backend,
            dataset_name="TEST",
            task_type=1,  # Binary Classification
            metric=roc_auc,
            limit=-1,  # not used,
            seed=0,  # important to find the test files
            ensemble_nbest=50,
            max_models_on_disc=10000.0,
        )
        ensbuilder.read_preds = {}
        for i in range(50):
            ensbuilder.read_preds['pred' + str(i)] = {
                'ens_score': i * 10,
                'num_run': i,
                0: True,
                'loaded': 1,
                "seed": 1,
                "disc_space_cost_mb": 50 * i,
            }
        sel_keys = ensbuilder.get_n_best_preds()
        self.assertListEqual(['pred49', 'pred48', 'pred47', 'pred46'],
                             sel_keys)

        # Make sure at least one model is kept alive
        ensbuilder.max_models_on_disc = 0.0
        sel_keys = ensbuilder.get_n_best_preds()
        self.assertListEqual(['pred49'], sel_keys)