def bo_tpe_knn(X, y):
    starttime = datetime.datetime.now()

    def objective(params):
        params = {'n_neighbors': abs(int(params['n_neighbors']))}
        clf = KNeighborsRegressor(**params)
        score = -np.mean(
            cross_val_score(
                clf, X, y, cv=3, n_jobs=-1, scoring="neg_mean_squared_error"))
        return {'loss': score, 'status': STATUS_OK}

    space = {
        'n_neighbors': hp.quniform('n_neighbors', 1, 20, 1),
    }

    trials_knn = Trials()
    best_knn = fmin(fn=objective,
                    space=space,
                    algo=tpe.suggest,
                    max_evals=10,
                    trials=trials_knn)
    print("KNN MSE score:%.4f" % min(trials_knn.losses()))
    endtime = datetime.datetime.now()
    process_time_knn = endtime - starttime
    print("程序执行时间(秒):{}".format(process_time_knn))
    print("最佳超参数值集合:", best_knn)
    save_model_object(best_knn, 'BO-TPE', 'KNN', 'KNN')
    return min(trials_knn.losses()), process_time_knn, best_knn
def optuna_xgb(X, y):
    # 参考 https://xgboost.readthedocs.io/en/latest/tutorials/rf.html
    # https://data-analysis-stats.jp/%e6%a9%9f%e6%a2%b0%e5%ad%a6%e7%bf%92/python%e3%81%a7xgboost/
    def objective(trial):
        params = {
            'learning_rate':
            trial.suggest_float("learning_rate", 1e-4, 1, log=True),
            'max_depth':
            trial.suggest_int("max_depth", 1, 46, step=5),
            'n_estimators':
            trial.suggest_int("n_neighbors", 100, 220, step=30),
            'objective':
            'reg:squarederror',
        }
        clf = xgb.XGBRegressor(**params)
        score = -np.mean(
            cross_val_score(clf, X, y, cv=3, scoring="neg_mean_squared_error"))
        return score

    study_xgb = optuna.create_study(direction="minimize")
    study_xgb.optimize(objective, n_trials=10)
    optuna_xgb_mse_score = study_xgb.best_value
    optuna_xgb_time = (study_xgb.best_trial.datetime_complete -
                       study_xgb.best_trial.datetime_start).total_seconds()
    # 秒数转化为时间格式
    m, s = divmod(optuna_xgb_time, 60)
    h, m = divmod(m, 60)
    optuna_xgb_time = "%d:%02d:%09f" % (h, m, s)
    print("XGBoost MSE score:%.4f" % optuna_xgb_mse_score)
    print("程序执行时间(秒):{}".format(optuna_xgb_time))
    print("最佳超参数值集合:", study_xgb.best_params)
    save_model_object(study_xgb, 'BO-TPE', 'NGBoost', 'NGBoost')
    return optuna_xgb_mse_score, optuna_xgb_time, study_xgb.best_params
def optuna_svr(X, y):
    def objective(trial):
        params = {
            "kernel":
            trial.suggest_categorical("kernel", ["linear", "poly", "rbf"]),
            "C":
            trial.suggest_loguniform("C", 1e-5, 1e2),
            # 'degree': trial.suggest_int("degree", 1,10,step=1),
            # 'epsilon': trial.suggest_float("epsilon", 0.1,0.5,step=0.1),
        }
        clf = SVR(**params, gamma="scale")
        score = -np.mean(
            cross_val_score(clf, X, y, cv=3, scoring="neg_mean_squared_error"))
        return score

    study_svr = optuna.create_study(direction="minimize")
    study_svr.optimize(objective, n_trials=5)
    optuna_svr_mse_score = study_svr.best_value
    optuna_svr_time = (study_svr.best_trial.datetime_complete -
                       study_svr.best_trial.datetime_start).total_seconds()
    # 秒数转化为时间格式
    m, s = divmod(optuna_svr_time, 60)
    h, m = divmod(m, 60)
    optuna_svr_time = "%d:%02d:%09f" % (h, m, s)
    print("SVR MSE score:%.4f" % optuna_svr_mse_score)
    print("程序执行时间(秒):{}".format(optuna_svr_time))
    print("最佳超参数值集合:", study_svr.best_params)
    save_model_object(study_svr, 'Optuna', 'SVR', 'SVR')
    return optuna_svr_mse_score, optuna_svr_time, study_svr.best_params
def bo_tpe_svr(X, y):
    starttime = datetime.datetime.now()

    def objective(params):
        params = {
            'C': abs(float(params['C'])),
            "kernel": str(params['kernel']),
            'epsilon': abs(float(params['epsilon'])),
        }
        clf = SVR(gamma='scale', **params)
        score = -np.mean(
            cross_val_score(
                clf, X, y, cv=3, n_jobs=-1, scoring="neg_mean_squared_error"))

        return {'loss': score, 'status': STATUS_OK}

    space = {
        'C': hp.normal('C', 0, 50),
        "kernel": hp.choice('kernel', ['poly', 'rbf', 'sigmoid']),
        'epsilon': hp.normal('epsilon', 0, 1),
    }

    trials_svr = Trials()
    best_svr = fmin(fn=objective,
                    space=space,
                    algo=tpe.suggest,
                    max_evals=20,
                    trials=trials_svr)
    print("SVM MSE score:%.4f" % min(trials_svr.losses()))
    endtime = datetime.datetime.now()
    process_time_svr = endtime - starttime
    print("程序执行时间(秒):{}".format(process_time_svr))
    print("最佳超参数值集合:", best_svr)
    save_model_object(best_svr, 'BO-TPE', 'SVR', 'SVR')
    return min(trials_svr.losses()), process_time_svr, best_svr
예제 #5
0
def bo_ANN(X, y):
    rf_params = {
        'activation': ['relu', 'tanh'],
        'loss': ['mse'],
        'batch_size': [32, 64, 128],
        'neurons': Integer(256, 1024),
        'epochs': [20, 30, 50, 60]
        # 'patience': Integer(3, 20)
    }
    starttime = datetime.datetime.now()
    clf = KerasRegressor(build_fn=ANN, verbose=verbose)
    Bayes_ann = BayesSearchCV(clf,
                              rf_params,
                              cv=3,
                              n_iter=10,
                              scoring='neg_mean_squared_error')
    Bayes_ann.fit(X, y)
    print("ANN MSE score:" + str(-Bayes_ann.best_score_))
    endtime = datetime.datetime.now()
    process_time_ann = endtime - starttime
    print("程序执行时间(秒):{}".format(process_time_ann))
    print("最佳超参数值集合:", Bayes_ann.best_params_)
    model_bo_ann = ANN(**Bayes_ann.best_params_)
    save_model_object(model_bo_ann, 'BO-GP', 'ANN', 'ANN')
    return str(
        -Bayes_ann.best_score_), process_time_ann, Bayes_ann.best_params_
예제 #6
0
def bo_RandomForestRegressor(X, y):
    # Define the hyperparameter configuration space
    rf_params = {
        'n_estimators': Integer(10, 100),
        "max_features": Integer(1, 13),
        'max_depth': Integer(5, 50),
        "min_samples_split": Integer(2, 11),
        "min_samples_leaf": Integer(1, 11),
        "criterion": ['mse', 'mae']
    }
    starttime = datetime.datetime.now()
    clf = RandomForestRegressor(random_state=0)
    Bayes_rf = BayesSearchCV(clf,
                             rf_params,
                             cv=3,
                             n_iter=20,
                             scoring='neg_mean_squared_error')
    # number of iterations is set to 20, you can increase this number if time permits
    Bayes_rf.fit(X, y)
    # bclf = Bayes_rf.best_estimator_
    print("RandomForestRegressor MSE score:" + str(-Bayes_rf.best_score_))
    endtime = datetime.datetime.now()
    process_time_rf = endtime - starttime
    print("程序执行时间(秒):{}".format(process_time_rf))
    print("最佳超参数值集合:", Bayes_rf.best_params_)
    save_model_object(Bayes_rf, 'BO-GP', 'RandomForestRegressor',
                      'RandomForestRegressor')
    return str(-Bayes_rf.best_score_), process_time_rf, Bayes_rf.best_params_
예제 #7
0
def gpminimize_RandomForestRegressor(X, y):
    starttime = datetime.datetime.now()
    reg = RandomForestRegressor()
    # Define the hyperparameter configuration space
    space = [
        Integer(10, 100, name='n_estimators'),
        Integer(5, 50, name='max_depth'),
        Integer(1, 13, name='max_features'),
        Integer(2, 11, name='min_samples_split'),
        Integer(1, 11, name='min_samples_leaf'),
        Categorical(['mse', 'mae'], name='criterion')
    ]
    # Define the objective function

    @use_named_args(space)
    def objective(**params):
        reg.set_params(**params)

        return -np.mean(
            cross_val_score(
                reg, X, y, cv=3, n_jobs=-1, scoring="neg_mean_squared_error"))

    res_gp_rf = gp_minimize(objective, space, n_calls=20, random_state=0)
    # number of iterations is set to 20, you can increase this number if time permits
    print("RandomForestRegressor MSE score:%.4f" % res_gp_rf.fun)
    endtime = datetime.datetime.now()
    process_time_rf = endtime - starttime
    print("程序执行时间(秒):{}".format(process_time_rf))
    print("最佳超参数值集合:", res_gp_rf.x)
    save_model_object(res_gp_rf.models, 'gp_minimize', 'RandomForestRegressor',
                      'RandomForestRegressor')
    return res_gp_rf.fun, process_time_rf, res_gp_rf.x
def rs_ANN(X, y):
    rf_params = {
        'activation': ['relu', 'tanh'],
        'loss': ['mse'],
        'batch_size': [32, 64, 128],
        'neurons': sp_randint(256, 1024),
        'epochs': [30, 50, 80]
        # 'patience': sp_randint(3, 20)
    }
    n_iter_search = 10
    starttime = datetime.datetime.now()
    clf = KerasRegressor(build_fn=ANN, verbose=verbose)
    Random_ann = RandomizedSearchCV(clf,
                                    param_distributions=rf_params,
                                    n_iter=n_iter_search,
                                    cv=3,
                                    scoring='neg_mean_squared_error')
    Random_ann.fit(X, y)
    print("ANN MSE score:" + str(-Random_ann.best_score_))
    endtime = datetime.datetime.now()
    process_time_ann = endtime - starttime
    print("程序执行时间(秒):{}".format(process_time_ann))
    print("最佳超参数值集合:", Random_ann.best_params_)
    model_random_ann = ANN(**Random_ann.best_params_)
    save_model_object(model_random_ann, 'random_search', 'ANN', 'ANN')
    return str(
        -Random_ann.best_score_), process_time_ann, Random_ann.best_params_
def grid_RandomForestRegressor(X, y):
    # Define the hyperparameter configuration space
    rf_params = {
        'n_estimators': [10, 20, 30],
        'max_features': ['sqrt', 0.5],
        'max_depth': [15, 20, 30, 50],
        'min_samples_leaf': [1, 2, 4, 8],
        "bootstrap": [True, False],
        "criterion": ['mse', 'mae']
    }
    starttime = datetime.datetime.now()
    clf = RandomForestRegressor(random_state=0)
    grid_rf = GridSearchCV(clf,
                           rf_params,
                           cv=3,
                           scoring='neg_mean_squared_error')
    grid_rf.fit(X, y)
    print(grid_rf.best_params_)
    print("RandomForestRegressor MSE score:" + str(-grid_rf.best_score_))
    endtime = datetime.datetime.now()
    process_time_rf = endtime - starttime
    print("程序执行时间(秒):{}".format(process_time_rf))
    print("最佳超参数值集合:", grid_rf.best_params_)
    save_model_object(grid_rf, 'grid_search', 'RandomForestRegressor',
                      'RandomForestRegressor')
    return str(-grid_rf.best_score_), process_time_rf, grid_rf.best_params_
def optuna_RandomForestRegressor(X, y):
    # Define the objective function
    def objective(trial):
        n_estimators = trial.suggest_int("n_estimators", 1, 100)
        max_depth = trial.suggest_int("max_depth", 1, 20)
        clf = RandomForestRegressor(n_estimators=n_estimators,
                                    max_depth=max_depth,
                                    random_state=0)
        score = -np.mean(
            cross_val_score(clf, X, y, cv=3, scoring="neg_mean_squared_error"))
        return score

    # 因为我们要获得最好的MSE,所以方向是min。direction="minimize"
    study_rf = optuna.create_study(direction="minimize")
    study_rf.optimize(objective, n_trials=5)
    optuna_rf_mse_score = study_rf.best_value
    optuna_rf_time = (study_rf.best_trial.datetime_complete -
                      study_rf.best_trial.datetime_start).total_seconds()
    # 秒数转化为时间格式
    m, s = divmod(optuna_rf_time, 60)
    h, m = divmod(m, 60)
    optuna_rf_time = "%d:%02d:%09f" % (h, m, s)
    print("RandomForestRegressor MSE score:%.4f" % optuna_rf_mse_score)
    print("程序执行时间(秒):{}".format(optuna_rf_time))
    print("最佳超参数值集合:", study_rf.best_params)
    save_model_object(study_rf, 'Optuna', 'RandomForestRegressor',
                      'RandomForestRegressor')
    return optuna_rf_mse_score, optuna_rf_time, study_rf.best_params
def rs_RandomForestRegressor(X, y):
    # Define the hyperparameter configuration space
    rf_params = {
        'n_estimators': sp_randint(10, 100),
        "max_features": sp_randint(1, 13),
        'max_depth': sp_randint(5, 50),
        "min_samples_split": sp_randint(2, 11),
        "min_samples_leaf": sp_randint(1, 11),
        "criterion": ['mse', 'mae']
    }
    # number of iterations is set to 20, you can increase this number if time permits
    n_iter_search = 20
    starttime = datetime.datetime.now()
    clf = RandomForestRegressor(random_state=0)
    Random_rf = RandomizedSearchCV(clf,
                                   param_distributions=rf_params,
                                   n_iter=n_iter_search,
                                   cv=3,
                                   scoring='neg_mean_squared_error')
    Random_rf.fit(X, y)
    print("RandomForestRegressor MSE score:" + str(-Random_rf.best_score_))
    endtime = datetime.datetime.now()
    process_time_rf = endtime - starttime
    print("程序执行时间(秒):{}".format(process_time_rf))
    print("最佳超参数值集合:", Random_rf.best_params_)
    save_model_object(Random_rf, 'random_search', 'RandomForestRegressor',
                      'RandomForestRegressor')
    return str(-Random_rf.best_score_), process_time_rf, Random_rf.best_params_
def bo_tpe_ANN(X, y):
    starttime = datetime.datetime.now()

    def objective(params):
        params = {
            "activation": str(params['activation']),
            "loss": str(params['loss']),
            'batch_size': abs(int(params['batch_size'])),
            'neurons': abs(int(params['neurons'])),
            'epochs': abs(int(params['epochs'])),
            'learning_rate': abs(float(params['learning_rate']))
        }
        clf = KerasRegressor(build_fn=ANN, **params, verbose=verbose)
        score = -np.mean(
            cross_val_score(clf, X, y, cv=3, scoring="neg_mean_squared_error"))

        return {'loss': score, 'status': STATUS_OK}

    space_activation = ['relu', 'tanh']
    space_loss = ['mse', 'mae']
    space = {
        "activation": hp.choice('activation', space_activation),
        "loss": hp.choice('loss', space_loss),
        'batch_size': hp.quniform('batch_size', 32, 128, 32),
        'neurons': hp.quniform('neurons', 256, 1024, 256),
        'epochs': hp.quniform('epochs', 30, 60, 10),
        'learning_rate': hp.uniform('learning_rate', 1e-5, 1e-2)
    }

    trials_ann = Trials()
    best_ann = fmin(fn=objective,
                    space=space,
                    algo=tpe.suggest,
                    max_evals=10,
                    trials=trials_ann)
    print("ANN MSE score:%.4f" % min(trials_ann.losses()))
    endtime = datetime.datetime.now()
    process_time_ann = endtime - starttime
    print("程序执行时间(秒):{}".format(process_time_ann))
    print("最佳超参数值集合:", best_ann)
    best_params_ann = {
        'activation': space_activation[best_ann['activation']],
        'loss': space_loss[best_ann['loss']],
        'batch_size': int(best_ann['batch_size']),
        'neurons': int(best_ann['neurons']),
        'epochs': int(best_ann['epochs']),
        'learning_rate': float(best_ann['learning_rate'])
    }
    model_bo_tpe_ann = ANN(**best_params_ann)
    save_model_object(model_bo_tpe_ann, 'BO-TPE', 'ANN', 'ANN')
    return min(trials_ann.losses()), process_time_ann, best_ann
def bs_ANN(X, y):
    starttime = datetime.datetime.now()
    base_ann = KerasRegressor(build_fn=ANN, verbose=0)
    score = cross_val_score(base_ann,
                            X,
                            y,
                            cv=3,
                            scoring='neg_mean_squared_error')
    base_ann_score = -score.mean()
    endtime = datetime.datetime.now()
    process_time_ann = endtime - starttime
    print("ANN MSE score {}".format(str(-score.mean())))
    print("程序执行时间(秒):{}".format(process_time_ann))
    save_model_object(base_ann, 'baseline', 'ann', 'ann')
    return base_ann_score, process_time_ann
def bs_svr(X, y):
    starttime = datetime.datetime.now()
    base_svr = SVR()
    score = cross_val_score(base_svr,
                            X,
                            y,
                            cv=3,
                            scoring='neg_mean_squared_error')
    base_svr_score = -score.mean()
    endtime = datetime.datetime.now()
    process_time_svr = endtime - starttime
    print("SVR MSE score {}".format(-score.mean()))
    print("程序执行时间(秒):{}".format(process_time_svr))
    save_model_object(base_svr, 'baseline', 'svr', 'svr')
    return base_svr_score, process_time_svr
def bs_KNN(X, y):
    starttime = datetime.datetime.now()
    base_knn = KNeighborsRegressor()
    score = cross_val_score(base_knn,
                            X,
                            y,
                            cv=3,
                            scoring='neg_mean_squared_error')
    base_knn_score = -score.mean()
    endtime = datetime.datetime.now()
    process_time_knn = endtime - starttime
    print("KNN MSE score {}".format(-score.mean()))
    print("程序执行时间(秒):{}".format(process_time_knn))
    save_model_object(base_knn, 'baseline', 'knn', 'knn')
    return base_knn_score, process_time_knn
def bs_random_forest_regressor(X, y):
    starttime = datetime.datetime.now()
    base_rf = RandomForestRegressor()
    score = cross_val_score(base_rf,
                            X,
                            y,
                            cv=3,
                            scoring='neg_mean_squared_error')
    base_rf_score = -score.mean()
    endtime = datetime.datetime.now()
    process_time_rf = endtime - starttime
    print(" RandomForestRegressor MSE score {}".format(-score.mean()))
    print("程序执行时间(秒):{}".format(process_time_rf))
    save_model_object(base_rf, 'baseline', 'randomforest', 'randomforest')
    return base_rf_score, process_time_rf
def grid_knn(X, y):
    knn_params = {'n_neighbors': [2, 3, 5, 7, 10]}
    starttime = datetime.datetime.now()
    clf = KNeighborsRegressor()
    grid_knn = GridSearchCV(clf,
                            knn_params,
                            cv=3,
                            scoring='neg_mean_squared_error')
    grid_knn.fit(X, y)
    print("KNN MSE score:" + str(-grid_knn.best_score_))
    endtime = datetime.datetime.now()
    process_time_knn = endtime - starttime
    print("程序执行时间(秒):{}".format(process_time_knn))
    print("最佳超参数值集合:", grid_knn.best_params_)
    save_model_object(grid_knn, 'grid_search', 'KNN', 'KNN')
    return str(-grid_knn.best_score_), process_time_knn, grid_knn.best_params_
def bo_tpe_RandomForestRegressor(X, y):

    starttime = datetime.datetime.now()

    # Define the objective function
    def objective(params):
        params = {
            'n_estimators': int(params['n_estimators']),
            'max_depth': int(params['max_depth']),
            'max_features': int(params['max_features']),
            "min_samples_split": int(params['min_samples_split']),
            "min_samples_leaf": int(params['min_samples_leaf']),
            "criterion": str(params['criterion'])
        }
        clf = RandomForestRegressor(**params)
        score = -np.mean(
            cross_val_score(
                clf, X, y, cv=3, n_jobs=-1, scoring="neg_mean_squared_error"))

        return {'loss': score, 'status': STATUS_OK}

    # Define the hyperparameter configuration space
    space = {
        'n_estimators': hp.quniform('n_estimators', 10, 150, 1),
        'max_depth': hp.quniform('max_depth', 5, 50, 1),
        "max_features": hp.quniform('max_features', 1, 13, 1),
        "min_samples_split": hp.quniform('min_samples_split', 2, 11, 1),
        "min_samples_leaf": hp.quniform('min_samples_leaf', 1, 11, 1),
        "criterion": hp.choice('criterion', ['mse', 'mae'])
    }
    trials_rf = Trials()
    best_rf = fmin(fn=objective,
                   space=space,
                   algo=tpe.suggest,
                   max_evals=20,
                   trials=trials_rf)
    print("Random Forest MSE score:%.4f" % min(trials_rf.losses()))
    endtime = datetime.datetime.now()
    process_time_rf = endtime - starttime
    print("程序执行时间(秒):{}".format(process_time_rf))
    print("最佳超参数值集合:", best_rf)
    save_model_object(best_rf, 'BO-TPE', 'RandomForestRegressor',
                      'RandomForestRegressor')
    return min(trials_rf.losses()), process_time_rf, best_rf
def optuna_ANN(X, y):
    # 官网optuna都是使用sklearn里面定义好的模型,自定义模型要想使用optuna比较复杂。
    # 一些参数使用默认就可以,不需要调整,默认值基本都是mes分数最低的

    def objective(trial):
        params = {
            "learning_rate": trial.suggest_loguniform('learning_rate', 1e-5,
                                                      1e-2),
            "batch_size": trial.suggest_categorical("batch_size", [128, 256]),
            "activation": trial.suggest_categorical("activation",
                                                    ['relu', 'tanh']),
            'neurons': trial.suggest_int("neurons", 512, 2048, step=128),
            'epochs': trial.suggest_int("epochs", 40, 100, step=10),
        }

        clf = KerasRegressor(build_fn=ANN, **params, verbose=verbose)
        score = cross_val_score(clf,
                                X,
                                y,
                                cv=3,
                                scoring='neg_mean_squared_error')
        obtuna_ann_score = -score.mean()
        # 官网optuna都是使用sklearn里面定义好的模型,自定义模型要想使用optuna比较复杂。
        return obtuna_ann_score

    study_name_ann = 'optuna-ann'  # Unique identifier of the study.
    study_ann = optuna.create_study(direction="minimize",
                                    study_name=study_name_ann)
    study_ann.optimize(objective, n_trials=10)
    optuna_ann_mse_score = study_ann.best_value
    optuna_ann_time = (study_ann.best_trial.datetime_complete -
                       study_ann.best_trial.datetime_start).total_seconds()
    # 秒数转化为时间格式
    m, s = divmod(optuna_ann_time, 60)
    h, m = divmod(m, 60)
    optuna_ann_time = "%d:%02d:%09f" % (h, m, s)
    print("ANN MSE score:%.4f" % optuna_ann_mse_score)
    print("程序执行时间(秒):{}".format(optuna_ann_time))
    print("最佳超参数值集合:", study_ann.best_params)
    model_optuna_ann = ANN(**study_ann.best_params)
    save_model_object(model_optuna_ann, 'Optuna', 'ANN', 'ANN')
    return optuna_ann_mse_score, optuna_ann_time, study_ann.best_params
예제 #20
0
def bo_knn(X, y):
    rf_params = {
        'n_neighbors': Integer(1, 20),
    }
    starttime = datetime.datetime.now()
    clf = KNeighborsRegressor()
    Bayes_knn = BayesSearchCV(clf,
                              rf_params,
                              cv=3,
                              n_iter=10,
                              scoring='neg_mean_squared_error')
    Bayes_knn.fit(X, y)
    print("KNN MSE score:" + str(-Bayes_knn.best_score_))
    endtime = datetime.datetime.now()
    process_time_knn = endtime - starttime
    print("程序执行时间(秒):{}".format(process_time_knn))
    print("最佳超参数值集合:", Bayes_knn.best_params_)
    save_model_object(Bayes_knn, 'BO-GP', 'KNN', 'KNN')
    return str(
        -Bayes_knn.best_score_), process_time_knn, Bayes_knn.best_params_
예제 #21
0
def gpminimize_knn(X, y):
    starttime = datetime.datetime.now()
    reg = KNeighborsRegressor()
    space = [Integer(1, 20, name='n_neighbors')]

    @use_named_args(space)
    def objective(**params):
        reg.set_params(**params)
        return -np.mean(
            cross_val_score(
                reg, X, y, cv=3, n_jobs=-1, scoring="neg_mean_squared_error"))

    res_gp_knn = gp_minimize(objective, space, n_calls=10, random_state=0)
    print("KNN MSE score:%.4f" % res_gp_knn.fun)
    endtime = datetime.datetime.now()
    process_time_knn = endtime - starttime
    print("程序执行时间(秒):{}".format(process_time_knn))
    print("最佳超参数值集合:", res_gp_knn.x)
    save_model_object(res_gp_knn.models, 'gp_minimize', 'KNN', 'KNN')
    return res_gp_knn.fun, process_time_knn, res_gp_knn.x
def rs_knn(X, y):
    rf_params = {
        'n_neighbors': sp_randint(1, 20),
    }
    n_iter_search = 10
    starttime = datetime.datetime.now()
    clf = KNeighborsRegressor()
    Random_knn = RandomizedSearchCV(clf,
                                    param_distributions=rf_params,
                                    n_iter=n_iter_search,
                                    cv=3,
                                    scoring='neg_mean_squared_error')
    Random_knn.fit(X, y)
    print("KNN MSE score:" + str(-Random_knn.best_score_))
    endtime = datetime.datetime.now()
    process_time_knn = endtime - starttime
    print("程序执行时间(秒):{}".format(process_time_knn))
    print("最佳超参数值集合:", Random_knn.best_params_)
    save_model_object(Random_knn, 'random_search', 'KNN', 'KNN')
    return str(
        -Random_knn.best_score_), process_time_knn, Random_knn.best_params_
def grid_svr(X, y):
    # Define the hyperparameter configuration space
    svr_params = {
        'C': [1, 10, 100],
        "kernel": ['poly', 'rbf', 'sigmoid'],
        "degree": np.arange(1, 10, 1),
        "epsilon": [0.01, 0.1, 1]
    }
    starttime = datetime.datetime.now()
    clf = SVR()
    grid_svr = GridSearchCV(clf,
                            svr_params,
                            cv=3,
                            scoring='neg_mean_squared_error')
    grid_svr.fit(X, y)
    print("SVR MSE score:" + str(-grid_svr.best_score_))
    endtime = datetime.datetime.now()
    process_time_svr = endtime - starttime
    print("程序执行时间(秒):{}".format(process_time_svr))
    print("最佳超参数值集合:", grid_svr.best_params_)
    save_model_object(grid_svr, 'grid_search', 'SVR', 'SVR')
    return str(-grid_svr.best_score_), process_time_svr, grid_svr.best_params_
예제 #24
0
def bo_svr(X, y):
    rf_params = {
        'C': Real(1, 50),
        "kernel": ['poly', 'rbf', 'sigmoid'],
        'epsilon': Real(0, 1)
    }
    starttime = datetime.datetime.now()
    clf = SVR(gamma='scale')
    Bayes_svr = BayesSearchCV(clf,
                              rf_params,
                              cv=3,
                              n_iter=20,
                              scoring='neg_mean_squared_error')
    Bayes_svr.fit(X, y)
    print("SVR MSE score:" + str(-Bayes_svr.best_score_))
    endtime = datetime.datetime.now()
    process_time_svr = endtime - starttime
    print("程序执行时间(秒):{}".format(process_time_svr))
    print("最佳超参数值集合:", Bayes_svr.best_params_)
    save_model_object(Bayes_svr, 'BO-GP', 'SVR', 'SVR')
    return str(
        -Bayes_svr.best_score_), process_time_svr, Bayes_svr.best_params_
def rs_svr(X, y):
    rf_params = {
        'C': stats.uniform(0, 50),
        "kernel": ['poly', 'rbf', 'sigmoid'],
        "epsilon": stats.uniform(0, 1)
    }
    n_iter_search = 20
    starttime = datetime.datetime.now()
    clf = SVR(gamma='scale')
    Random_svr = RandomizedSearchCV(clf,
                                    param_distributions=rf_params,
                                    n_iter=n_iter_search,
                                    cv=3,
                                    scoring='neg_mean_squared_error')
    Random_svr.fit(X, y)
    print("SVR MSE score:" + str(-Random_svr.best_score_))
    endtime = datetime.datetime.now()
    process_time_svr = endtime - starttime
    print("程序执行时间(秒):{}".format(process_time_svr))
    print("最佳超参数值集合:", Random_svr.best_params_)
    save_model_object(Random_svr, 'random_search', 'SVR', 'SVR')
    return str(
        -Random_svr.best_score_), process_time_svr, Random_svr.best_params_
예제 #26
0
def gpminimize_svr(X, y):
    starttime = datetime.datetime.now()
    reg = SVR(gamma='scale')
    space = [
        Real(1, 50, name='C'),
        Categorical(['poly', 'rbf', 'sigmoid'], name='kernel'),
        Real(0, 1, name='epsilon'),
    ]

    @use_named_args(space)
    def objective(**params):
        reg.set_params(**params)
        return -np.mean(
            cross_val_score(
                reg, X, y, cv=3, n_jobs=-1, scoring="neg_mean_squared_error"))

    res_gp_svr = gp_minimize(objective, space, n_calls=20, random_state=0)
    print("SVR MSE score:%.4f" % res_gp_svr.fun)
    endtime = datetime.datetime.now()
    process_time_svr = endtime - starttime
    print("程序执行时间(秒):{}".format(process_time_svr))
    print("最佳超参数值集合:", res_gp_svr.x)
    save_model_object(res_gp_svr.models, 'gp_minimize', 'SVR', 'SVR')
    return res_gp_svr.fun, process_time_svr, res_gp_svr.x
def optuna_knn(X, y):
    def objective(trial):
        params = {
            'n_neighbors': trial.suggest_int("n_neighbors", 1, 20, step=1),
        }
        clf = KNeighborsRegressor(**params)
        score = -np.mean(
            cross_val_score(clf, X, y, cv=3, scoring="neg_mean_squared_error"))
        return score

    study_knn = optuna.create_study(direction="minimize")
    study_knn.optimize(objective, n_trials=5)
    optuna_knn_mse_score = study_knn.best_value
    optuna_knn_time = (study_knn.best_trial.datetime_complete -
                       study_knn.best_trial.datetime_start).total_seconds()
    # 秒数转化为时间格式
    m, s = divmod(optuna_knn_time, 60)
    h, m = divmod(m, 60)
    optuna_knn_time = "%d:%02d:%09f" % (h, m, s)
    print("KNN MSE score:%.4f" % optuna_knn_mse_score)
    print("程序执行时间(秒):{}".format(optuna_knn_time))
    print("最佳超参数值集合:", study_knn.best_params)
    save_model_object(study_knn, 'Optuna', 'KNN', 'KNN')
    return optuna_knn_mse_score, optuna_knn_time, study_knn.best_params
def optuna_GradientBoostingRegressor(X, y):
    def objective(trial):
        #     设定了4个搜索范围subsample,n_estimators,max_depth,lr
        subsample = trial.suggest_discrete_uniform("subsample", 0.1, 1.0, 0.1)
        n_estimators = trial.suggest_int("n_estimators", 50, 200)
        max_depth = trial.suggest_int("max_depth", 1, 20)
        lr = trial.suggest_loguniform("lr", 1e-4, 1e-1)
        clf = GradientBoostingRegressor(n_estimators=n_estimators,
                                        subsample=subsample,
                                        learning_rate=lr,
                                        max_depth=max_depth,
                                        random_state=0)
        score = -np.mean(
            cross_val_score(clf, X, y, cv=3, scoring="neg_mean_squared_error"))
        return score

    study_name_gbr = 'optuna-gbr'  # Unique identifier of the study.
    study_gbr = optuna.create_study(direction="minimize",
                                    study_name=study_name_gbr)
    # 可以加载sqlite3的db数据库里面的信息
    # study = optuna.create_study(study_name='example-study', storage='sqlite:///example.db', load_if_exists=True)
    # 加载后直接优化模型
    study_gbr.optimize(objective, n_trials=5)
    optuna_gbr_mse_score = study_gbr.best_value
    optuna_gbr_time = (study_gbr.best_trial.datetime_complete -
                       study_gbr.best_trial.datetime_start).total_seconds()
    # 秒数转化为时间格式
    m, s = divmod(optuna_gbr_time, 60)
    h, m = divmod(m, 60)
    optuna_gbr_time = "%d:%02d:%09f" % (h, m, s)
    print("GradientBoostingRegressor MSE score:%.4f" % optuna_gbr_mse_score)
    print("程序执行时间(秒):{}".format(optuna_gbr_time))
    print("最佳超参数值集合:", study_gbr.best_params)
    save_model_object(study_gbr, 'Optuna', 'GradientBoostingRegressor',
                      'GradientBoostingRegressor')
    return optuna_gbr_mse_score, optuna_gbr_time, study_gbr.best_params
def grid_ANN(X, y):
    ann_params = {
        "neurons": [512, 1028],
        "batch_size": [128],
        "epochs": [60, 80],
        # "activation": ['sigmoid', 'relu', 'tanh'],
        "patience": [3],
        "loss": ['mse']
    }
    starttime = datetime.datetime.now()
    clf = KerasRegressor(build_fn=ANN, verbose=verbose)
    grid_ann = GridSearchCV(clf,
                            ann_params,
                            cv=3,
                            scoring='neg_mean_squared_error')
    grid_ann.fit(X, y)
    print("ANN MSE score:" + str(-grid_ann.best_score_))
    endtime = datetime.datetime.now()
    process_time_ann = endtime - starttime
    print("程序执行时间(秒):{}".format(process_time_ann))
    print("最佳超参数值集合:", grid_ann.best_params_)
    model_grid_ann = ANN(**grid_ann.best_params_)
    save_model_object(model_grid_ann, 'grid_search', 'ANN', 'ANN')
    return str(-grid_ann.best_score_), process_time_ann, grid_ann.best_params_
def bo_tpe_lightgbm(X, y):
    # 参考
    # https://qiita.com/TomokIshii/items/3729c1b9c658cc48b5cb

    data = X
    target = y
    # 2次数据划分,这样可以分成3份数据  test  train  validation
    X_intermediate, X_test, y_intermediate, y_test = train_test_split(
        data, target, shuffle=True, test_size=0.2, random_state=1)

    # train/validation split (gives us train and validation sets)
    X_train, X_validation, y_train, y_validation = train_test_split(
        X_intermediate,
        y_intermediate,
        shuffle=False,
        test_size=0.25,
        random_state=1)

    # delete intermediate variables
    del X_intermediate, y_intermediate

    # 显示数据集的分配比例
    print('train: {}% | validation: {}% | test {}%'.format(
        round((len(y_train) / len(target)) * 100, 2),
        round((len(y_validation) / len(target)) * 100, 2),
        round((len(y_test) / len(target)) * 100, 2)))

    starttime = datetime.datetime.now()

    space = {
        # 'learning_rate': hp.uniform('learning_rate', 0.001, 0.5),
        # 'minibatch_frac': hp.choice('minibatch_frac', [1.0, 0.5]),
        # 'Base': hp.choice('Base', [b1, b2, b3])
        "lambda_l1": hp.uniform("lambda_l1", 1e-8, 1.0),
        "lambda_l2": hp.uniform("lambda_l2", 1e-8, 1.0),
        "min_child_samples": hp.uniformint("min_child_samples", 5, 100),
        'learning_rate': hp.uniform("learning_rate", 0.001, 0.5),
        "n_estimators": hp.uniformint("n_estimators", 10, 100),
        "num_leaves": hp.uniformint("num_leaves", 5, 35)
    }

    # n_estimators表示一套参数下,有多少个评估器,简单说就是迭代多少次
    default_params = {
        # "n_estimators": 80,
        "random_state": 1,
        "objective": "regression",
        "boosting_type": "gbdt",
        # "num_leaves": 30,
        # "learning_rate": 0.3,
        "feature_fraction": 0.9,
        "bagging_fraction": 0.8,
        "bagging_freq": 5,
        "verbose": -1,
    }

    def objective(params):
        #     下面这个是分类classification使用的模型,不能用在regressor
        #     dtrain = lgb.Dataset(X_train, label=y_train)
        params.update(default_params)
        clf = lgb.LGBMRegressor(**params)
        score = -np.mean(
            cross_val_score(clf,
                            X_train,
                            y_train,
                            cv=3,
                            n_jobs=-1,
                            scoring="neg_mean_squared_error"))
        return {'loss': score, 'status': STATUS_OK}

    trials_lgb = Trials()
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        best = fmin(
            fn=objective,
            space=space,
            algo=tpe.suggest,
            # max_evals是设定多少套参数组合,组合数越大准确度可能更高但是训练的时间越长
            max_evals=50,
            trials=trials_lgb)

    best_params = space_eval(space, best)
    lgb_model = lgb.LGBMRegressor(**best_params).fit(
        X_train,
        y_train,
        eval_set=[(X_validation, y_validation)],
        verbose=-1,
        #  假定n_estimators迭代器有100个设定了早期停止后也许不到100次迭代就完成了训练停止了
        early_stopping_rounds=2)

    y_pred = lgb_model.predict(X_test)
    test_MSE_lgb = mean_squared_error(y_pred, y_test)
    print("LightGBM MSE score:%.4f" % test_MSE_lgb)
    endtime = datetime.datetime.now()
    process_time_lgb = endtime - starttime
    print("程序执行时间(秒):{}".format(process_time_lgb))
    print("最佳超参数值集合:", best_params)
    save_model_object(lgb_model, 'BO-TPE', 'NGBoost', 'NGBoost')
    return test_MSE_lgb, process_time_lgb, best_params