def run_cross_validation(train, train_target, folds):
    cv_params = {
        'estimator__min_child_weight': [1, 3, 5],
        'estimator__subsample': [0.7, 0.8, 0.9],
        'estimator__learning_rate': [0.1, 0.01],
        'estimator__max_depth': [3, 5, 7],
        'estimator__n_estimators': [1000]  #try 100
    }
    ind_params = {
        'colsample_bytree': 0.8,
        'silent': 1,
        'seed': 0,
        'objective': 'reg:linear'
    }

    pipeline = MultiOutputRegressor(xgb.XGBRegressor(**ind_params))

    print sorted(pipeline.get_params().keys())

    optimized_GBM = GridSearchCV(pipeline,
                                 cv_params,
                                 scoring='r2',
                                 cv=folds,
                                 n_jobs=1,
                                 verbose=4)

    optimized_GBM.fit(train, train_target)

    print "best scores: " + str(optimized_GBM.grid_scores_)

    our_params = ind_params.copy()
    our_params.update(optimized_GBM.best_params_)

    return our_params
def generate_joint_model(single_model):
    model = MultiOutputRegressor(single_model)
    model.fit(X_train, Y_train)
    
    score_train = model.score(X_train, Y_train)
    print('Score of train', round(score_train * 100, 1), "%")
    
    score = model.score(X_test, Y_test)
    print('Score of test', round(score * 100, 1), "%")
    
    model_path = model_folder + r"/" +  \
                    str(round(score, 3)).replace('.', '_') + r"_" +  \
                    str(model.get_params()['estimator']).split('(')[0] + \
                    '.joblib'
    joblib.dump(model, model_path)
    print("Save model file", model_path)
    
    return model, model_path
예제 #3
0
    score_rows_list = []
    scores_dict = {}
    mse_dict = {}
    mae_dict = {}
    mape_dict = {}
    
    scores_dict_f3 = {}
    mse_dict_f3 = {}
    mae_dict_f3 = {}
    mape_dict_f3 = {}
    test_start_time = time.clock()

    # try to save the trees attribute
    if tree_regressor_check_cond == True:
        print("aux_reg_regressor estimator_params", aux_reg_regressor.estimators_)
    getparams_dict = aux_reg_regressor.get_params(deep=True)
    print(getparams_dict)
    getparams_df = pd.DataFrame.from_dict(data=getparams_dict, orient='index')
    getparams_df.to_csv(analysis_path + model_id + "getparams.csv")
    model_as_pkl_filename = analysis_path + model_id + ".pkl"
    joblib.dump(aux_reg_regressor, filename=model_as_pkl_filename)
    # np.savetxt(analysis_path + "rf5getparams.txt",fmt='%s',X=str(aux_reg_regressor.get_params(deep=True)))
    # np.savetxt(analysis_path + "rf5estimatorparams.txt",fmt='%s',X=aux_reg_regressor.estimator_params) USELESS
    # np.savetxt(analysis_path + "rf5classes.txt",fmt='%s',X=aux_reg_regressor.classes_)
    # np.savetxt(analysis_path + "rf5baseestim.txt",fmt='%s',X=aux_reg_regressor.base_estimator_)

    # TODO: CHANGE THIS BACK!!
    for files in combined_filenames:
        print("filename", files)
        i += 1
        data_load_path = test_path + '/data/' + files[0]