def testEntireEnsembleBuilder(ensemble_backend):

    ensbuilder = EnsembleBuilder(
        backend=ensemble_backend,
        dataset_name="TEST",
        task_type=BINARY_CLASSIFICATION,
        metric=roc_auc,
        seed=0,  # important to find the test files
        ensemble_nbest=2,
    )
    ensbuilder.SAVE2DISC = False

    ensbuilder.compute_loss_per_model()

    d2 = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy")

    sel_keys = ensbuilder.get_n_best_preds()
    assert len(sel_keys) > 0

    ensemble = ensbuilder.fit_ensemble(selected_keys=sel_keys)
    print(ensemble, sel_keys)

    n_sel_valid, n_sel_test = ensbuilder.get_valid_test_preds(
        selected_keys=sel_keys)

    # both valid and test prediction files are available
    assert len(n_sel_valid) > 0
    assert n_sel_valid == n_sel_test

    y_valid = ensbuilder.predict(
        set_="valid",
        ensemble=ensemble,
        selected_keys=n_sel_valid,
        n_preds=len(sel_keys),
        index_run=1,
    )
    y_test = ensbuilder.predict(
        set_="test",
        ensemble=ensemble,
        selected_keys=n_sel_test,
        n_preds=len(sel_keys),
        index_run=1,
    )

    # predictions for valid and test are the same
    # --> should results in the same predictions
    np.testing.assert_array_almost_equal(y_valid, y_test)

    # since d2 provides perfect predictions
    # it should get a higher weight
    # so that y_valid should be exactly y_valid_d2
    y_valid_d2 = ensbuilder.read_preds[d2][Y_VALID][:, 1]
    np.testing.assert_array_almost_equal(y_valid, y_valid_d2)
def testMaxModelsOnDisc(ensemble_backend, test_case, exp):
    ensemble_nbest = 4
    ensbuilder = EnsembleBuilder(
        backend=ensemble_backend,
        dataset_name="TEST",
        task_type=BINARY_CLASSIFICATION,
        metric=roc_auc,
        seed=0,  # important to find the test files
        ensemble_nbest=ensemble_nbest,
        max_models_on_disc=test_case,
    )

    with unittest.mock.patch('os.path.getsize') as mock:
        mock.return_value = 100 * 1024 * 1024
        ensbuilder.compute_loss_per_model()
        sel_keys = ensbuilder.get_n_best_preds()
        assert len(sel_keys) == exp, test_case
def testGetValidTestPreds(ensemble_backend):

    ensbuilder = EnsembleBuilder(
        backend=ensemble_backend,
        dataset_name="TEST",
        task_type=BINARY_CLASSIFICATION,
        metric=roc_auc,
        seed=0,  # important to find the test files
        ensemble_nbest=1)

    ensbuilder.compute_loss_per_model()

    # d1 is a dummt prediction. d2 and d3 have the same prediction with
    # different name. num_run=2 is selected when doing sorted()
    d1 = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy")
    d2 = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy")
    d3 = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy")

    sel_keys = ensbuilder.get_n_best_preds()
    assert len(sel_keys) == 1
    ensbuilder.get_valid_test_preds(selected_keys=sel_keys)

    # Number of read files should be three and
    # predictions_ensemble_0_4_0.0.npy must not be in there
    assert len(ensbuilder.read_preds) == 3
    assert os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_4_0.0/predictions_ensemble_0_4_0.0.npy"
    ) not in ensbuilder.read_preds

    # not selected --> should still be None
    assert ensbuilder.read_preds[d1][Y_VALID] is None
    assert ensbuilder.read_preds[d1][Y_TEST] is None
    assert ensbuilder.read_preds[d3][Y_VALID] is None
    assert ensbuilder.read_preds[d3][Y_TEST] is None

    # selected --> read valid and test predictions
    assert ensbuilder.read_preds[d2][Y_VALID] is not None
    assert ensbuilder.read_preds[d2][Y_TEST] is not None
def testNBest(ensemble_backend, ensemble_nbest, max_models_on_disc, exp):
    ensbuilder = EnsembleBuilder(
        backend=ensemble_backend,
        dataset_name="TEST",
        task_type=BINARY_CLASSIFICATION,
        metric=roc_auc,
        seed=0,  # important to find the test files
        ensemble_nbest=ensemble_nbest,
        max_models_on_disc=max_models_on_disc,
    )

    ensbuilder.compute_loss_per_model()
    sel_keys = ensbuilder.get_n_best_preds()

    assert len(sel_keys) == exp

    fixture = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy")
    assert sel_keys[0] == fixture
def testFallBackNBest(ensemble_backend):

    ensbuilder = EnsembleBuilder(
        backend=ensemble_backend,
        dataset_name="TEST",
        task_type=BINARY_CLASSIFICATION,
        metric=roc_auc,
        seed=0,  # important to find the test files
        ensemble_nbest=1)

    ensbuilder.compute_loss_per_model()
    print()
    print(ensbuilder.read_preds.keys())
    print(ensbuilder.read_losses.keys())
    print(ensemble_backend.temporary_directory)

    filename = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy")
    ensbuilder.read_losses[filename]["ens_loss"] = -1

    filename = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy")
    ensbuilder.read_losses[filename]["ens_loss"] = -1

    filename = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy")
    ensbuilder.read_losses[filename]["ens_loss"] = -1

    sel_keys = ensbuilder.get_n_best_preds()

    fixture = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy")
    assert len(sel_keys) == 1
    assert sel_keys[0] == fixture
def testRead(ensemble_backend):

    ensbuilder = EnsembleBuilder(
        backend=ensemble_backend,
        dataset_name="TEST",
        task_type=BINARY_CLASSIFICATION,
        metric=roc_auc,
        seed=0,  # important to find the test files
    )

    success = ensbuilder.compute_loss_per_model()
    assert success, str(ensbuilder.read_preds)
    assert len(ensbuilder.read_preds) == 3, ensbuilder.read_preds.keys()
    assert len(ensbuilder.read_losses) == 3, ensbuilder.read_losses.keys()

    filename = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy")
    assert ensbuilder.read_losses[filename]["ens_loss"] == 0.5

    filename = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy")
    assert ensbuilder.read_losses[filename]["ens_loss"] == 0.0