Ejemplo n.º 1
0
    ml_memory_limit=1024 * 8,
    time_left_for_this_task=3600,
    resampling_strategy='cv',
    #                              ensemble_size=1,
    #                              initial_configurations_via_metalearning=0,
    resampling_strategy_arguments={'folds': 5})
start = time.time()

#X_train = X_train.astype('float') # when?
automl.fit(X_train, y_train,
           dataset_name='boston_housing')  #change dataset name accordingly
automl.refit(X_train.copy(), y_train.copy())
print(
    '[INFO] Elapsed time finding best model: {} seconds.'.format(time.time() -
                                                                 start))

predictions = automl.predict(X_test)
#print('--- CLASSIFICATION REPORT: ---')        #not for regression
#print(classification_report(y_test, predictions, digits=5))
print('\n\n--- MODELS: ---')
print(automl.show_models())
print('\n\n--- STATISTICS: ---')
print(automl.sprint_statistics())

#-----CLASSIFIER-----
#print('\n\n--- SCORE: ---')
#print("Balanced error score", 1 - balanced_accuracy_score(y_test, predictions))

#-----REGRESSION-----
print('\n\n--- SCORE: ---')
print("R2 score", r2_score(y_test, predictions))
Ejemplo n.º 2
0
if __name__ == '__main__':

    # load dataset
    url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/auto-insurance.csv'
    dataframe = read_csv(url, header=None)
    # split into input and output elements
    data = dataframe.values
    data = data.astype('float32')
    X, y = data[:, :-1], data[:, -1]
    # split into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=1)
    # define search
    model = AutoSklearnRegressor(time_left_for_this_task=20 * 60,
                                 per_run_time_limit=45,
                                 n_jobs=6,
                                 metric=auto_mean_absolute_error)
    # perform the search
    model.fit(X_train, y_train)
    # summarize
    print(model.sprint_statistics())
    # evaluate best model
    y_hat = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_hat)
    print("MAE: %.3f" % mae)

    print("Show models")
    models_def = model.show_models()
    print(models_def)
Ejemplo n.º 3
0
            output_folder=outpath + 'output_folder',
        )
    print("start searching")

    # perform the search
    model.fit(X_train,
              y_train,
              dataset_name=ml_type + '_t' + str(time_left_for_this_task) +
              '_lead' + str(l))

    # summarize
    file = open(
        'log_files/' + ml_type + '_t' + str(time_left_for_this_task) +
        '_lead' + str(l) + '.txt', 'w')

    file.write(model.sprint_statistics())
    file.write('\n')
    file.write(model.show_models())
    file.close()

    print(model.sprint_statistics())
    print(model.show_models())
    # evaluate best model
    y_hat = model.predict(X_val)
    metric = calc_metrics(y_val, y_hat, ml_type)
    if ml_type == 'regression':
        metrics[l] = metric
    elif ml_type == 'classification':
        metrics[l, :] = metric
    print("************************************")
    print("lead:" + str(l) + ", metric: " + str(metric))
Ejemplo n.º 4
0
                                    include_preprocessors=preprocessing_to_use,
                                    exclude_preprocessors=None,
                                    ml_memory_limit=6156,
                                    resampling_strategy="cv",
                                    resampling_strategy_arguments={"folds": 5})

# Train models
auto_sklearn.fit(X=X_train.copy(), y=y_train.copy(), metric=mean_squared_error)
it_fits = auto_sklearn.refit(X=X_train.copy(), y=y_train.copy())

# Predict
y_hat = auto_sklearn.predict(X_test)

# Show results
auto_sklearn.cv_results_
auto_sklearn.sprint_statistics()
auto_sklearn.show_models()
auto_sklearn.get_models_with_weights()

# TPOT

from tpot import TPOTRegressor

tpot_config = {
    "sklearn.linear_model.Ridge": {},
    "sklearn.ensemble.RandomForestClassifier": {},
    "sklearn.ensemble.ExtraTreesClassifier": {},
    "sklearn.ensemble.GradientBoostingClassifier": {},
}

auto_tpot = TPOTRegressor(generations=100,