def test_autosklearn2_classification_methods_returns_self_sparse(dask_client):
    X_train, y_train, X_test, y_test = putil.get_dataset('breast_cancer',
                                                         make_sparse=True)
    automl = AutoSklearn2Classifier(time_left_for_this_task=60,
                                    ensemble_size=0,
                                    delete_tmp_folder_after_terminate=False,
                                    dask_client=dask_client)

    automl_fitted = automl.fit(X_train, y_train)
    assert automl is automl_fitted

    automl_ensemble_fitted = automl.fit_ensemble(y_train, ensemble_size=5)
    assert automl is automl_ensemble_fitted

    automl_refitted = automl.refit(X_train.copy(), y_train.copy())
    assert automl is automl_refitted

    predictions = automl_fitted.predict(X_test)
    assert sklearn.metrics.accuracy_score(
        y_test, predictions) >= 2 / 3, print_debug_information(automl)

    assert "boosting" not in str(
        automl.get_configuration_space(X=X_train, y=y_train))

    pickle.dumps(automl_fitted)
Exemple #2
0
    def test_classification_methods_returns_self(self):
        X_train, y_train, X_test, y_test = putil.get_dataset('iris')
        automl = AutoSklearn2Classifier(time_left_for_this_task=60, ensemble_size=0,)

        automl_fitted = automl.fit(X_train, y_train)
        self.assertIs(automl, automl_fitted)

        automl_ensemble_fitted = automl.fit_ensemble(y_train, ensemble_size=5)
        self.assertIs(automl, automl_ensemble_fitted)

        automl_refitted = automl.refit(X_train.copy(), y_train.copy())
        self.assertIs(automl, automl_refitted)
Exemple #3
0
def init(df, param):
    params = deepcopy(param['options']['params'])
    params.pop('algo', None)
    params.pop('mode', None)
    params.pop('dataset_name', None)
    for key in params:
        try:
            if params[key].isdigit():
                params[key] = int(params[key])
        except:
            pass
    model = {}
    model["model"] = AutoSklearn2Classifier(**params)
    return model
def test_autosklearn2_classification_methods_returns_self(dask_client):
    X_train, y_train, X_test, y_test = putil.get_dataset('iris')
    automl = AutoSklearn2Classifier(time_left_for_this_task=60,
                                    ensemble_size=0,
                                    dask_client=dask_client)

    automl_fitted = automl.fit(X_train, y_train)
    assert automl is automl_fitted

    automl_ensemble_fitted = automl.fit_ensemble(y_train, ensemble_size=5)
    assert automl is automl_ensemble_fitted

    automl_refitted = automl.refit(X_train.copy(), y_train.copy())
    assert automl is automl_refitted

    predictions = automl_fitted.predict(X_test)
    assert sklearn.metrics.accuracy_score(y_test, predictions) >= 2 / 3

    pickle.dumps(automl_fitted)
Exemple #5
0
to_encode = [
    "Gender",
    "Customer Type",
    "Type of Travel",
    "Class",
]

for col in to_encode:
    orig_data[col] = LabelEncoder().fit_transform(orig_data[col])

orig_data["satisfaction"].replace("satisfied", 1, inplace=True)
orig_data["satisfaction"].replace("neutral or dissatisfied", 0, inplace=True)

scaler = StandardScaler()
for col in orig_data.columns:
    if col != target:
        orig_data[col] = scaler.fit_transform(orig_data[[col]])

data_X = orig_data.loc[:, [x for x in orig_data.columns if x != target]]
data_Y = orig_data.loc[:, target]
data_X_train, data_X_test, data_y_train, data_y_test = train_test_split(
    data_X, data_Y, test_size=0.3, random_state=1)

cls = AutoSklearn2Classifier(time_left_for_this_task=60)
print("Fitting..")
cls.fit(data_X_train, data_y_train)
pred = cls.predict(data_X_test)
print("AutoML precision_score", precision_score(data_y_test, pred))
print("AutoML recall_score", recall_score(data_y_test, pred))
print("AutoML roc_auc_score", roc_auc_score(data_y_test, pred))