예제 #1
0
def AB_random_search(X_train, y_train):

    model = AdaBoostRegressor(random_state=0)

    params = {
        'n_estimators': np.arange(10, 500, 10),
        'learning_rate': np.arange(0.005, 0.5, 0.001),
    }

    cv_inner = KFold(n_splits=3, shuffle=True, random_state=1)
    search = RandomizedSearchCV(model,
                                params,
                                cv=cv_inner,
                                scoring='neg_mean_absolute_error',
                                verbose=0,
                                n_jobs=-1,
                                n_iter=100,
                                refit=False,
                                random_state=999)
    search.fit(X_train, y_train)

    model.set_params(**search.best_params_)
    print(search.best_params_)

    return model
def ab_cv(cv_outer, data):

    MAE_results = []
    RMSE_results = []
    MedAE_results = []
    r2_results = []
    model_params = []

    for train_index, test_index in cv_outer:

        X_train, y_train = data.iloc[train_index,
                                     6:].values, data.iloc[train_index,
                                                           3].values
        X_test, y_test = data.iloc[test_index,
                                   6:].values, data.iloc[test_index, 3].values

        cv_inner = KFold(n_splits=3, shuffle=True, random_state=1)

        model = AdaBoostRegressor(random_state=0)

        params = {
            'n_estimators': np.arange(10, 500, 10),
            'learning_rate': np.arange(0.005, 0.5, 0.001),
        }

        search = RandomizedSearchCV(model,
                                    params,
                                    cv=cv_inner,
                                    scoring='neg_mean_absolute_error',
                                    verbose=0,
                                    n_jobs=-1,
                                    n_iter=100,
                                    refit=False,
                                    random_state=999)
        search.fit(X_train, y_train)

        model_params.append(search.best_params_)

        model.set_params(**search.best_params_)
        model.fit(X_train, y_train)
        print(search.best_params_)
        y_pred = model.predict(X_test)

        mae = MAE(y_test, y_pred)
        MAE_results.append(mae)

        rmse = mean_squared_error(y_test, y_pred, squared=False)
        RMSE_results.append(rmse)

        med = median_absolute_error(y_test, y_pred)
        MedAE_results.append(med)

        r2 = r2_score(y_test, y_pred)
        r2_results.append(r2)

    return MAE_results, RMSE_results, MedAE_results, r2_results, model_params
예제 #3
0
def train_and_save_final_model(X, y, X_train, y_train, params,
                               save_model_file_path, test_data):
    adbr = AdaBoostRegressor(random_state=0)
    adbr.set_params(**params)

    if test_data == None:
        adbr.fit(X_train, y_train)
    else:
        adbr.fit(X, y)

    #save model
    model_file_path = save_model_file_path + 'adbr.sav'
    pickle.dump(adbr, open(model_file_path, 'wb'))
def predict_validation(model_params, X_train, y_train, X_valid, y_valid):

    predictions = []

    for param in model_params:
        model = AdaBoostRegressor(random_state=0)
        model.set_params(**param)
        model.fit(X_train, y_train)

        y_pred = model.predict(X_valid)
        predictions.append(y_pred)

    y_hat = list(map(lambda x: sum(x) / len(x), np.array(predictions).T))

    return y_hat