Example #1
0
def testMaxModelsOnDisc2(ensemble_backend):
    # Test for Extreme scenarios
    # Make sure that the best predictions are kept
    ensbuilder = EnsembleBuilder(
        backend=ensemble_backend,
        dataset_name="TEST",
        task_type=BINARY_CLASSIFICATION,
        metric=roc_auc,
        seed=0,  # important to find the test files
        ensemble_nbest=50,
        max_models_on_disc=10000.0,
    )
    ensbuilder.read_preds = {}
    for i in range(50):
        ensbuilder.read_scores['pred' + str(i)] = {
            'ens_score': i * 10,
            'num_run': i,
            'loaded': 1,
            "seed": 1,
            "disc_space_cost_mb": 50 * i,
        }
        ensbuilder.read_preds['pred' + str(i)] = {Y_ENSEMBLE: True}
    sel_keys = ensbuilder.get_n_best_preds()
    assert ['pred49', 'pred48', 'pred47'] == sel_keys

    # Make sure at least one model is kept alive
    ensbuilder.max_models_on_disc = 0.0
    sel_keys = ensbuilder.get_n_best_preds()
    assert ['pred49'] == sel_keys
Example #2
0
    def testMaxModelsOnDisc(self):

        ensemble_nbest = 4
        for (test_case, exp) in [
                # If None, no reduction
            (None, 2),
                # If Int, limit only on exceed
            (4, 2),
            (1, 1),
                # If Float, translate float to # models.
                # below, mock of each file is 100 Mb and
                # 4 files .model and .npy (test/val/pred) exist
            (700.0, 1),
            (800.0, 2),
            (9999.0, 2),
        ]:
            ensbuilder = EnsembleBuilder(
                backend=self.backend,
                dataset_name="TEST",
                task_type=1,  # Binary Classification
                metric=roc_auc,
                limit=-1,  # not used,
                seed=0,  # important to find the test files
                ensemble_nbest=ensemble_nbest,
                max_models_on_disc=test_case,
            )

            with unittest.mock.patch('os.path.getsize') as mock:
                mock.return_value = 100 * 1024 * 1024
                ensbuilder.score_ensemble_preds()
                sel_keys = ensbuilder.get_n_best_preds()
                self.assertEqual(len(sel_keys), exp)

        # Test for Extreme scenarios
        # Make sure that the best predictions are kept
        ensbuilder = EnsembleBuilder(
            backend=self.backend,
            dataset_name="TEST",
            task_type=1,  # Binary Classification
            metric=roc_auc,
            limit=-1,  # not used,
            seed=0,  # important to find the test files
            ensemble_nbest=50,
            max_models_on_disc=10000.0,
        )
        ensbuilder.read_preds = {}
        for i in range(50):
            ensbuilder.read_preds['pred' + str(i)] = {
                'ens_score': i * 10,
                'num_run': i,
                0: True,
                'loaded': 1,
                "seed": 1,
                "disc_space_cost_mb": 50 * i,
            }
        sel_keys = ensbuilder.get_n_best_preds()
        self.assertListEqual(['pred49', 'pred48', 'pred47', 'pred46'],
                             sel_keys)

        # Make sure at least one model is kept alive
        ensbuilder.max_models_on_disc = 0.0
        sel_keys = ensbuilder.get_n_best_preds()
        self.assertListEqual(['pred49'], sel_keys)