Ejemplo n.º 1
0
    def test_fit_and_predict(self):
        metric = Metric({"name": "logloss"})

        automl = AutoML(
            total_time_limit=5,
            algorithms=["Xgboost"],
            start_random_models=5,
            hill_climbing_steps=0,
            seed=13,
        )
        automl.fit(self.X, self.y)

        y_predicted = automl.predict(self.X)["p_1"]
        self.assertTrue(y_predicted is not None)
        loss = metric(self.y, y_predicted)
        self.assertTrue(loss < 0.7)

        params = automl.to_json()
        automl2 = AutoML()
        automl2.from_json(params)

        y_predicted2 = automl2.predict(self.X)["p_1"]
        self.assertTrue(y_predicted2 is not None)
        loss2 = metric(self.y, y_predicted2)
        self.assertTrue(loss2 < 0.7)

        assert_almost_equal(automl._threshold, automl2._threshold)
    def test_fit_and_predict(self):
        seed = 1709

        df = pd.read_csv(
            "./tests/data/housing_regression_missing_values_missing_target.csv"
        )
        print(df.columns)
        x_cols = [c for c in df.columns if c != "MEDV"]
        X = df[x_cols]
        y = df["MEDV"]

        X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
            X, y, test_size=0.3, random_state=seed)
        automl = AutoML(
            total_time_limit=10,
            algorithms=["Xgboost"
                        ],  # ["LightGBM", "RF", "NN", "CatBoost", "Xgboost"],
            start_random_models=1,
            hill_climbing_steps=0,
            top_models_to_improve=0,
            train_ensemble=True,
            verbose=True,
        )
        automl.fit(X_train, y_train)

        response = automl.predict(X_test)  # ["p_1"]
        print("Response", response)
    def test_fit_and_predict(self):
        seed = 1706 + 1
        for dataset_id in [31]:  # 720 # 31,44,737
            df = pd.read_csv("./tests/data/data/{0}.csv".format(dataset_id))
            x_cols = [c for c in df.columns if c != "target"]
            X = df[x_cols]
            y = df["target"]

            X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
                X, y, test_size=0.3, random_state=seed)
            automl = AutoML(
                total_time_limit=60 * 6000,
                algorithms=["LightGBM", "RF", "NN", "CatBoost", "Xgboost"],
                start_random_models=10,
                hill_climbing_steps=3,
                top_models_to_improve=3,
                train_ensemble=True,
                verbose=True,
            )
            automl.fit(X_train, y_train)

            response = automl.predict(X_test)
            # Compute the logloss on test dataset
            ll = log_loss(y_test, response)
            print("(*) Dataset id {} logloss {}".format(dataset_id, ll))

            for i, m in enumerate(automl._models):
                response = m.predict(X_test)
                ll = log_loss(y_test, response)
                print("{}) Dataset id {} logloss {}".format(i, dataset_id, ll))
Ejemplo n.º 4
0
def run(dataset, config):
    log.info("\n**** mljar-supervised ****\n")

    column_names, _ = zip(*dataset.columns)
    column_types = dict(dataset.columns)
    X_train = pd.DataFrame(dataset.train.X,
                           columns=column_names).astype(column_types,
                                                        copy=False)
    X_test = pd.DataFrame(dataset.test.X,
                          columns=column_names).astype(column_types,
                                                       copy=False)

    y_train = dataset.train.y.flatten()
    y_test = dataset.test.y.flatten()

    problem_mapping = dict(
        binary="binary_classification",
        multiclass="multiclass_classification",
        regression="regression",
    )
    is_classification = config.type == "classification"
    ml_task = problem_mapping.get(
        dataset.problem_type
    )  # if None the AutoML will guess about the ML task
    results_path = output_subdir("results", config)
    training_params = {
        k: v
        for k, v in config.framework_params.items() if not k.startswith("_")
    }

    automl = AutoML(results_path=results_path,
                    total_time_limit=config.max_runtime_seconds,
                    seed=config.seed,
                    ml_task=ml_task,
                    **training_params)

    with Timer() as training:
        automl.fit(X_train, y_train)

    preds = automl.predict(X_test)

    predictions, probabilities = None, None
    if is_classification:
        predictions = preds["label"].values
        probabilities = preds[preds.columns[:-1]].values
    else:
        predictions = preds["prediction"].values

    # clean the results
    if not config.framework_params.get("_save_artifacts", False):
        shutil.rmtree(results_path, ignore_errors=True)

    return result(
        output_file=config.output_predictions_file,
        predictions=predictions,
        truth=y_test,
        probabilities=probabilities,
        models_count=len(automl._models),
        training_duration=training.duration,
    )
Ejemplo n.º 5
0
    def run(self):
        from supervised.automl import AutoML

        dataset = Dataset(self.specification['input'])

        dataframe = dataset.get_dataframe().dropna()
        X = self.specification['problem']['predictors']
        y = self.specification['problem']['targets'][0]

        stimulus, preprocessor = preprocess(dataframe, self.specification)

        if self.specification.get('timeBoundSearch'):
            self.system_params['total_time_limit'] = self.specification[
                'timeBoundSearch']

        if self.specification.get('timeBoundRun'):
            self.system_params['learner_time_limit'] = self.specification[
                'timeBoundRun']

        automl = AutoML(**self.system_params)

        # mljar seems kind of fragile?
        stimulus = pandas.DataFrame(stimulus)
        stimulus.columns = [str(i).strip() for i in stimulus.columns]

        automl.fit(stimulus, dataframe[y])

        for model_mljar in sorted(automl._models,
                                  key=lambda m: m.get_final_loss())[:4]:
            model = ModelSklearn(
                model_mljar,
                system='mljar-supervised',
                search_id=self.search_id,
                predictors=X,
                targets=[y],
                preprocess=preprocessor,
                task=self.specification['problem']['taskType'])

            model.save()

            from tworaven_apps.solver_interfaces.tasks import FOUND_MODEL_CALLBACKS
            FOUND_MODEL_CALLBACKS[self.callback_found](
                model, **(self.callback_arguments or {}))

        return {
            KEY_SUCCESS: True,
            KEY_MESSAGE: 'search complete',
            KEY_DATA: {
                'search_id': self.search_id,
                'system': 'mljar-supervised'
            }
        }
Ejemplo n.º 6
0
 def test_fit_optimize_auc(self):
     automl = AutoML(
         total_time_limit=5,
         algorithms=["Xgboost"],
         start_random_models=2,
         hill_climbing_steps=0,
         optimize_metric="auc",
         seed=16,
     )
     automl.fit(self.X, self.y)
     ldb = automl.get_leaderboard()
     self.assertEqual(ldb["metric_type"][0], "auc")
     self.assertEqual(np.sum(ldb["metric_value"] > 0.5),
                      ldb.shape[0])  # all better than 0.5 AUC
Ejemplo n.º 7
0
    def test_predict_labels(self):
        # 3.csv') #
        df = pd.read_csv(
            'tests/data/adult_missing_values_missing_target_500rows.csv')
        X = df[df.columns[:-1]]
        y = df[df.columns[-1]]
        automl = AutoML(total_time_limit=15,
                        algorithms=["Xgboost"],
                        start_random_models=5,
                        hill_climbing_steps=0,
                        train_ensemble=True)
        automl.fit(X, y)

        y_predicted = automl.predict(X)
        self.assertTrue('A' in np.unique(y_predicted['label']))
        self.assertTrue('B' in np.unique(y_predicted['label']))
Ejemplo n.º 8
0
def train_titanic(train_data):
    train_df = pd.read_csv(train_data)
    # test_df = pd.read_csv(test_data)

    # feature_cols = train_df.drop(['Survived', 'PassengerId', 'Name'], axis=1).columns
    feature_cols = train_df.columns[2:]
    target_cols = 'Survived'

    X_train, X_test, y_train, y_test = train_test_split(train_df[feature_cols],
                                                        train_df[target_cols],
                                                        test_size=0.25)

    automl = AutoML(results_path="AutoML_titanic")
    automl.fit(X_train, y_train)

    predictions = automl.predict(X_test)
    print(f"Accuracy: {accuracy_score(y_test, predictions) * 100.0:.2f}%")
Ejemplo n.º 9
0
 def test_predict_labels(self):
     automl = AutoML(
         total_time_limit=15,
         algorithms=["Xgboost"],
         start_random_models=5,
         hill_climbing_steps=0,
         train_ensemble=True,
         seed=15,
     )
     automl.fit(self.X, self.y)
     ldb = automl.get_leaderboard()
     self.assertEqual(ldb.shape[0], len(automl._models))
     for col in [
             "uid", "model_type", "metric_type", "metric_value",
             "train_time"
     ]:
         self.assertTrue(col in ldb.columns)
Ejemplo n.º 10
0
 def test_reproduce_fit(self):
     metric = Metric({"name": "logloss"})
     losses = []
     for i in range(2):
         automl = AutoML(
             total_time_limit=
             10000,  # the time limit should be big enough too not interrupt the training
             algorithms=["Xgboost"],
             start_random_models=2,
             hill_climbing_steps=1,
             train_ensemble=True,
             verbose=True,
             seed=12,
         )
         automl.fit(self.X, self.y)
         y_predicted = automl.predict(self.X)["p_1"]
         loss = metric(self.y, y_predicted)
         losses += [loss]
     assert_almost_equal(losses[0], losses[1], decimal=4)
Ejemplo n.º 11
0
def train_digits():
    digits = load_digits()
    X_train, X_test, y_train, y_test = train_test_split(pd.DataFrame(
        digits.data),
                                                        digits.target,
                                                        stratify=digits.target,
                                                        test_size=0.25,
                                                        random_state=123)

    # train models with AutoML
    automl = AutoML(mode="Perform", results_path="AutoML_digits")
    automl.fit(X_train, y_train)

    # compute
    predictions = automl.predict_all(X_test)
    print(predictions.head())
    print("Test accuracy:",
          accuracy_score(y_test, predictions["label"].astype(int)))

    plot_digits(X_test, predictions)
    def test_fit_and_predict(self):

        for dataset_id in [3, 24, 31, 38, 44, 179, 737, 720]:
            df = pd.read_csv("./tests/data/{0}.csv".format(dataset_id))
            x_cols = [c for c in df.columns if c != "target"]
            X = df[x_cols]
            y = df["target"]

            for repeat in range(1):

                X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
                    X, y, test_size=0.3, random_state=1706 + repeat)
                automl = AutoML(
                    total_time_limit=60 * 1,  # 1h limit
                    algorithms=[
                        "Xgboost"
                    ],  # ["LightGBM", "CatBoost", "Xgboost", "RF", "NN"],
                    start_random_models=3,
                    hill_climbing_steps=1,
                    top_models_to_improve=1,
                    train_ensemble=True,
                    verbose=True,
                )
                automl.fit(X_train, y_train)

                response = automl.predict(X_test)["p_1"]
                labels = automl.predict(X_test)["label"]

                # Compute the logloss on test dataset
                ll = log_loss(y_test, response)
                f1 = f1_score(y_test, labels)
                print("iter: {}) id:{} logloss:{} f1:{} time:{}".format(
                    repeat, dataset_id, ll, f1, automl._fit_time))
                with open("./result.txt", "a") as f_result:
                    f_result.write("{} {} {} {} {}\n".format(
                        repeat, dataset_id, ll, f1, automl._fit_time))
    category=pd.core.common.SettingWithCopyWarning)  # message="*ndarray*")

# df = pd.read_csv("tests/data/iris_classes_missing_values_missing_target.csv")
df = pd.read_csv("tests/data/iris_missing_values_missing_target.csv")
X = df[["feature_1", "feature_2", "feature_3", "feature_4"]]
y = df["class"]

automl = AutoML(

    # results_path="AutoML_41",
    # algorithms=["CatBoost"],
    #algorithms=["Neural Network"],
    #    "Linear",
    #    "Xgboost",
    #    "Random Forest"
    # ],
    #total_time_limit=100,
    #tuning_mode="Normal",
    #explain_level=0,
    mode="Perform")
# automl.set_advanced(start_random_models=1)
automl.fit(X, y)

predictions = automl.predict(X)

print(predictions.head())
print(predictions.tail())

print(X.shape)
print(predictions.shape)
Ejemplo n.º 14
0
    if (len(ratedf) == 0):
        continue
    featvals_stoch.append("stoch20" + "(" + str(0.1 * (i + 1)) + ")")
    ratedf = k1[(k1.stoch20 > i * 0.1) & (k1.stoch20 < (i + 1) * 0.1)]
    featrate_stoch.append(len(ratedf[ratedf.rets_long == 1]) / (len(ratedf)))

k1 = k1.dropna()
feats = [
    'close_diff', 'gap1', 'rsi5', 'rsi5_smoothed', 'gap', 'stoch20', 'stoch14',
    'rsi14', 'rsi20', 'sine', 'bandpass', 'cci', 'decycle', 'quadlead',
    'velacc', 'VIX_Close', 'VIX_Close_diff', 'h', 'rsi20_diff', 'rsi14_diff',
    'stoch20_diff', 'res1', 'res2', 'res3', 'res4', 'res5'
]
feats1 = feats
xtrain = np.array(k1[feats1])
ytrain = np.array(k1.rets_long)
tr = RandomForestClassifier(n_estimators=550,
                            max_depth=6,
                            min_samples_split=10)
clf = tr
#clf = AdaBoostClassifier(base_estimator=tr,n_estimators=80,random_state=50,learning_rate=1.0)
automl.fit(xtrain, ytrain)
k2['predictions'] = automl.predict(np.array(k2[feats1]))
k2['rand_preds'] = [random.choice([0, 1]) for _ in range(len(k2))]
print("accs:")
print(len(k2[k2.predictions == k2.rets1]) / (len(k2)))
print("rets original:")
print(k2.rets.sum())
print("rets model")
print((k2.predictions * k2.rets).sum())
Ejemplo n.º 15
0
#y=y+.1
y = y.reshape(len(y), )
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.3)
#model = Sequential([
#Dense(input_shape=(x_train.shape[1],), units=1,kernel_initializer=tf.constant_initializer(1),
#          bias_initializer=tf.constant_initializer(0),kernel_constraint=MinMaxNorm(min_value=-1,max_value=1)),
#    tfpl.DistributionLambda(lambda t:tfd.Exponential(rate=t),
#                           convert_to_tensor_fn=tfd.Distribution.sample)
#])
#model.compile(loss=nll,
#optimizer=RMSprop(learning_rate=0.01))
#model.fit(x_train, y_train, epochs=1000, verbose=True);
#tr = RandomForestRegressor(n_estimators=50,max_depth=6,min_samples_split=10)
#clf=tr
#clf = AdaBoostClassifier(base_estimator=tr,n_estimators=80,random_state=50,learning_rate=1.0)
automl.fit(x_train, y_train)
print("MAP: ", mean_absolute_percentage_error(automl.predict(x_test), y_test))
dftt = pd.DataFrame()

dftt.loc[ind[0], cols[0]] = df[(df.doctype == 'SOW') & (df.qornot == 'Yes') &
                               (df.pages < sowmed)].tat.mean()
dftt.loc[ind[1], cols[0]] = df[(df.doctype == 'SOW') & (df.qornot == 'No') &
                               (df.pages < sowmed)].tat.mean()
dftt.loc[ind[2], cols[0]] = ttest(
    df[(df.doctype == 'SOW') & (df.qornot == 'Yes') & (df.pages < sowmed)].tat,
    df[(df.doctype == 'SOW') & (df.qornot == 'No') & (df.pages < sowmed)].tat)
dftt.loc[ind[0], cols[1]] = df[(df.doctype == 'SOW') & (df.qornot == 'Yes') &
                               (df.pages > sowmed)].tat.mean()
dftt.loc[ind[1], cols[1]] = df[(df.doctype == 'SOW') & (df.qornot == 'No') &
                               (df.pages > sowmed)].tat.mean()
dftt.loc[ind[2], cols[1]] = ttest(
Ejemplo n.º 16
0
def run(dataset, config):
    log.info(f"\n**** mljar-supervised [v{supervised.__version__}] ****\n")
    save_metadata(config, version=supervised.__version__)

    # Mapping of benchmark metrics to MLJAR metrics
    metrics_mapping = dict(auc='auc', logloss='logloss', rmse='rmse')
    eval_metric = metrics_mapping[
        config.metric] if config.metric in metrics_mapping else "auto"

    # Mapping of benchmark task to MLJAR ML task
    problem_mapping = dict(
        binary="binary_classification",
        multiclass="multiclass_classification",
        regression="regression",
    )
    ml_task = problem_mapping.get(
        dataset.problem_type
    )  # if None the AutoML will guess about the ML task
    is_classification = config.type == "classification"
    results_path = output_subdir("results", config)
    training_params = {
        k: v
        for k, v in config.framework_params.items() if not k.startswith("_")
    }

    column_names, _ = zip(*dataset.columns)
    column_types = dict(dataset.columns)
    label = dataset.target.name

    train = pd.DataFrame(dataset.train.data,
                         columns=column_names).astype(column_types, copy=False)
    X_train = train.drop(columns=label)
    y_train = train[label]

    test = pd.DataFrame(dataset.test.data,
                        columns=column_names).astype(column_types, copy=False)
    X_test = test.drop(columns=label)
    y_test = test[label]

    automl = AutoML(results_path=results_path,
                    total_time_limit=config.max_runtime_seconds,
                    random_state=config.seed,
                    ml_task=ml_task,
                    eval_metric=eval_metric,
                    **training_params)

    with utils.Timer() as training:
        automl.fit(X_train, y_train)

    with utils.Timer() as predict:
        preds = automl.predict_all(X_test)

    predictions, probabilities = None, None
    if is_classification:
        predictions = preds["label"].values
        cols = [f"prediction_{c}" for c in np.unique(y_train)]
        probabilities = preds[cols].values
    else:
        predictions = preds["prediction"].values

    # clean the results
    if not config.framework_params.get("_save_artifacts", False):
        shutil.rmtree(results_path, ignore_errors=True)

    return result(output_file=config.output_predictions_file,
                  predictions=predictions,
                  truth=y_test,
                  probabilities=probabilities,
                  models_count=len(automl._models),
                  training_duration=training.duration,
                  predict_duration=predict.duration)
Ejemplo n.º 17
0
#
# automl.train(y="Hall_of_Fame", x=['At_bats', 'Runs'], training_frame=data, fold_column='TWORAVENS_FOLD_COLUMN')
#
# best_model = h2o.get_model(automl.leaderboard.as_data_frame()['model_id'][0])
#
# print(best_model.cross_validation_fold_assignment())
#

from supervised.automl import AutoML
import pandas as pd

dataframe = pd.read_csv(data_path)
dataframe = dataframe[dataframe['Hall_of_Fame'] != 2]

automl_mljar = AutoML(total_time_limit=30)
automl_mljar.fit(dataframe[['Runs', 'At_bats']], dataframe['Hall_of_Fame'])

mljar_model = automl_mljar._models[0]

mljar_model.train({
    "train": {
        "X": dataframe[['Runs', 'At_bats']],
        "y": dataframe['Hall_of_Fame']
    }
})
mljar_model.predict(dataframe[['Runs', 'At_bats']])

# import mlbox.model.classification
# import mlbox.model.regression
#
#
import pandas as pd
from supervised.automl import AutoML
import os

df = pd.read_csv(
    "https://raw.githubusercontent.com/pplonski/datasets-for-start/master/adult/data.csv",
    skipinitialspace=True,
)

X = df[df.columns[:-1]]
y = df["income"]

automl = AutoML()
# results_path = "AutoML_8",
# total_time_limit=5,
# start_random_models=1,
# hill_climbing_steps=0,
# top_models_to_improve=3,
# train_ensemble=True)

print(X)
print(y)

automl.fit(X, y)
automl.fit(s, )
print(X)
print(y)

predictions = automl.predict(X)
print(predictions.head())
Ejemplo n.º 19
0
# tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2)
# tpot.fit(under_sampled_X,under_sampled_Y)
# print(tpot.score(test_X,test_Y))
# tpot.export('tpot_model_churn.py')

#%%

from supervised.automl import AutoML

automl = AutoML(total_time_limit=180 * 60,
                top_models_to_improve=3,
                learner_time_limit=240,
                algorithms=["Xgboost", "RF", "LightGBM"],
                start_random_models=10,
                hill_climbing_steps=4)
automl.fit(under_sampled_X, under_sampled_Y)

predictions = automl.predict(test_X)

#%%
accuracy = accuracy_score(test_Y.values, predictions['label'].values)

print("\n Classification report : \n",
      classification_report(test_Y, predictions['label'].values))
print("Accuracy   Score : ", accuracy)
#confusion matrix
conf_matrix = confusion_matrix(test_Y, predictions['label'].values)
#roc_auc_score
model_roc_auc = roc_auc_score(test_Y, predictions['label'].values)
print("Area under curve : ", model_roc_auc, "\n")
Ejemplo n.º 20
0
        train_ensemble=False,
        golden_features=False,
        features_selection=False,
        ml_task="regression",
    )
    nn = AutoML(
        algorithms=["Neural Network"],
        mode="Perform",
        explain_level=0,
        train_ensemble=False,
        golden_features=False,
        features_selection=False,
        ml_task="regression",
    )

    mlp.fit(train_X, train_y)
    mlp_time = np.round(time.time() - mlp._start_time, 2)
    nn.fit(train_X, train_y)
    nn_time = np.round(time.time() - nn._start_time, 2)

    mlp_mse = mean_squared_error(test_y, mlp.predict(test_X))
    nn_mse = mean_squared_error(test_y, nn.predict(test_X))

    print(dataset, X.shape, np.unique(y), mlp_mse, nn_mse)

    results += [{
        "dataset": dataset,
        "nrows": X.shape[0],
        "ncols": X.shape[1],
        "mlp_mse": mlp_mse,
        "nn_mse": nn_mse,
Ejemplo n.º 21
0
import pandas as pd

# scikit learn utilites
from sklearn.datasets import load_digits
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# mljar-supervised package
from supervised.automl import AutoML

# Load the data
digits = load_digits()
X_train, X_test, y_train, y_test = train_test_split(pd.DataFrame(digits.data),
                                                    digits.target,
                                                    stratify=digits.target,
                                                    test_size=0.25)

# train models
automl = AutoML(mode="Perform")
automl.fit(X_train, y_train)

# compute the accuracy on test data
predictions = automl.predict(X_test)
print(predictions.head())
print("Test accuracy:", accuracy_score(y_test,
                                       predictions["label"].astype(int)))
Ejemplo n.º 22
0
def run(dataset, config):
    log.info(f"\n**** mljar-supervised [v{supervised.__version__}] ****\n")

    # Mapping of benchmark metrics to MLJAR metrics
    metrics_mapping = dict(auc='auc', logloss='logloss', rmse='rmse')
    eval_metric = metrics_mapping[
        config.metric] if config.metric in metrics_mapping else "auto"

    # Mapping of benchmark task to MLJAR ML task
    problem_mapping = dict(
        binary="binary_classification",
        multiclass="multiclass_classification",
        regression="regression",
    )
    ml_task = problem_mapping.get(
        dataset.problem_type
    )  # if None the AutoML will guess about the ML task
    is_classification = config.type == "classification"
    results_path = output_subdir("results", config)
    training_params = {
        k: v
        for k, v in config.framework_params.items() if not k.startswith("_")
    }

    X_train, y_train = dataset.train.X, dataset.train.y.squeeze()
    X_test, y_test = dataset.test.X, dataset.test.y.squeeze()

    automl = AutoML(results_path=results_path,
                    total_time_limit=config.max_runtime_seconds,
                    random_state=config.seed,
                    ml_task=ml_task,
                    eval_metric=eval_metric,
                    **training_params)

    with Timer() as training:
        automl.fit(X_train, y_train)

    with Timer() as predict:
        preds = automl.predict_all(X_test)

    predictions, probabilities, probabilities_labels = None, None, None
    if is_classification:
        # preds is a dataframe with columns ["prediction_LABEL", .., "label"]
        if y_train.dtype == bool and preds["label"].dtype == int:
            # boolean target produces integer predictions for mljar-supervised <= 0.10.6
            # https://github.com/mljar/mljar-supervised/issues/442
            preds = preds.rename(
                {
                    "prediction_0": "False",
                    "prediction_1": "True"
                }, axis=1)
            preds["label"] = preds["label"].astype(bool)
        else:
            preds.columns = [
                c.replace("prediction_", "", 1) for c in preds.columns
            ]

        predictions = preds["label"].values
        probabilities_labels = list(preds.columns)[:-1]
        probabilities = preds[probabilities_labels].values
    else:
        predictions = preds["prediction"].values

    # clean the results
    if not config.framework_params.get("_save_artifacts", False):
        shutil.rmtree(results_path, ignore_errors=True)

    return result(output_file=config.output_predictions_file,
                  predictions=predictions,
                  truth=y_test,
                  probabilities=probabilities,
                  probabilities_labels=probabilities_labels,
                  models_count=len(automl._models),
                  training_duration=training.duration,
                  predict_duration=predict.duration)