def run_cross_validation(train, train_target, folds): cv_params = { 'estimator__min_child_weight': [1, 3, 5], 'estimator__subsample': [0.7, 0.8, 0.9], 'estimator__learning_rate': [0.1, 0.01], 'estimator__max_depth': [3, 5, 7], 'estimator__n_estimators': [1000] #try 100 } ind_params = { 'colsample_bytree': 0.8, 'silent': 1, 'seed': 0, 'objective': 'reg:linear' } pipeline = MultiOutputRegressor(xgb.XGBRegressor(**ind_params)) print sorted(pipeline.get_params().keys()) optimized_GBM = GridSearchCV(pipeline, cv_params, scoring='r2', cv=folds, n_jobs=1, verbose=4) optimized_GBM.fit(train, train_target) print "best scores: " + str(optimized_GBM.grid_scores_) our_params = ind_params.copy() our_params.update(optimized_GBM.best_params_) return our_params
def generate_joint_model(single_model): model = MultiOutputRegressor(single_model) model.fit(X_train, Y_train) score_train = model.score(X_train, Y_train) print('Score of train', round(score_train * 100, 1), "%") score = model.score(X_test, Y_test) print('Score of test', round(score * 100, 1), "%") model_path = model_folder + r"/" + \ str(round(score, 3)).replace('.', '_') + r"_" + \ str(model.get_params()['estimator']).split('(')[0] + \ '.joblib' joblib.dump(model, model_path) print("Save model file", model_path) return model, model_path
score_rows_list = [] scores_dict = {} mse_dict = {} mae_dict = {} mape_dict = {} scores_dict_f3 = {} mse_dict_f3 = {} mae_dict_f3 = {} mape_dict_f3 = {} test_start_time = time.clock() # try to save the trees attribute if tree_regressor_check_cond == True: print("aux_reg_regressor estimator_params", aux_reg_regressor.estimators_) getparams_dict = aux_reg_regressor.get_params(deep=True) print(getparams_dict) getparams_df = pd.DataFrame.from_dict(data=getparams_dict, orient='index') getparams_df.to_csv(analysis_path + model_id + "getparams.csv") model_as_pkl_filename = analysis_path + model_id + ".pkl" joblib.dump(aux_reg_regressor, filename=model_as_pkl_filename) # np.savetxt(analysis_path + "rf5getparams.txt",fmt='%s',X=str(aux_reg_regressor.get_params(deep=True))) # np.savetxt(analysis_path + "rf5estimatorparams.txt",fmt='%s',X=aux_reg_regressor.estimator_params) USELESS # np.savetxt(analysis_path + "rf5classes.txt",fmt='%s',X=aux_reg_regressor.classes_) # np.savetxt(analysis_path + "rf5baseestim.txt",fmt='%s',X=aux_reg_regressor.base_estimator_) # TODO: CHANGE THIS BACK!! for files in combined_filenames: print("filename", files) i += 1 data_load_path = test_path + '/data/' + files[0]