Ejemplo n.º 1
0
    'max_depth': [100, 200, None],
    'max_features': [5, 10],
    'n_estimators': [600, 1000]
}
rf_CV = GridSearchCV(estimator=rf,
                     param_grid=param_grid,
                     cv=3,
                     verbose=2,
                     n_jobs=-1,
                     scoring="neg_mean_squared_error")

#rf_CV.fit(np.column_stack((feat_07_corr, feat_04_corr)), halo_mass_training.reshape(-1, 1))
rf_CV.fit(feat_04_corr, halo_mass_training.reshape(-1, 1))

ml.write_to_file_cv_results(
    "/share/data1/lls/regression/in_halos_only/log_m_output/larger_training_set"
    "/04_only/cv_results.txt", rf_CV)
joblib.dump(
    rf_CV,
    "/share/data1/lls/regression/in_halos_only/log_m_output/larger_training_set/04_only/clf.pkl"
)

# predictions

dup = np.copy(halo_mass_testing)
dup1 = np.tile(dup, (50, 1)).transpose()

noise_04 = np.random.normal(0, 2.7, size=[len(halo_mass_testing), 50])
test_feat_04_corr = dup1 + noise_04

# noise_07 = np.random.normal(0, 1.2, [len(halo_mass_testing), 50])
    "max_depth": 5,
    "max_features": 10,
    "warm_start": True,
}

gbm_CV, pred_test = gbm_fun.train_and_test_gradboost(training_features,
                                                     traj_testing,
                                                     param_grid=param_grid,
                                                     cv=cv_i)
np.save(saving_path + "predicted_test_set.npy", pred_test)
joblib.dump(gbm_CV, saving_path + "clf.pkl")

# predictions

if cv_i is True:
    alg = gbm_CV.best_estimator_
    ml.write_to_file_cv_results(saving_path + "cv_results.txt", gbm_CV)
else:
    alg = gbm_CV

ada_r2_train = np.zeros(len(alg.estimators_), )
for i, y_pred in enumerate(alg.staged_predict(training_features[:, :-1])):
    ada_r2_train[i] = r2_score(log_halo_training, y_pred)

np.save(saving_path + "r2_train_staged_scores.npy", ada_r2_train)

ada_r2_test = np.zeros(len(alg.estimators_), )
for i, y_pred in enumerate(alg.staged_predict(traj_testing)):
    ada_r2_test[i] = r2_score(y_test, y_pred)

np.save(saving_path + "r2_test_staged_scores.npy", ada_r2_test)
Ejemplo n.º 3
0
    "n_estimators": [2000, 3000, 4000],
    "learning_rate": [0.001, 0.01],
    "base_estimator__max_depth": [2, 5]
}
base_estimator = DecisionTreeRegressor(max_depth=5)
ada_b = AdaBoostRegressor(base_estimator=base_estimator, random_state=20)
ada_CV = GridSearchCV(estimator=ada_b,
                      param_grid=param_grid,
                      cv=3,
                      verbose=2,
                      n_jobs=-1,
                      scoring="r2")

ada_CV.fit(traj_training, log_halo_training)

ml.write_to_file_cv_results(saving_path + "/cv_results.txt", ada_CV)
joblib.dump(ada_CV, saving_path + "clf.pkl")

# predictions

pred_test = ada_CV.predict(traj_testing)
np.save(saving_path + "predicted_test_set.npy", pred_test)

ada_r2_train = np.zeros(len(ada_CV.best_estimator_.estimators_), )
for i, y_pred in enumerate(
        ada_CV.best_estimator_.staged_predict(traj_training)):
    ada_r2_train[i] = r2_score(log_halo_training, y_pred)

np.save(saving_path + "r2_train_staged_scores.npy", ada_r2_train)

ada_r2_test = np.zeros(len(ada_CV.best_estimator_.estimators_), )