from hpo_framework.hp_spaces import space_keras, space_rf, space_svr, space_xgb, space_ada, space_dt, space_linr, space_knn_r from hpo_framework.hpo_metrics import root_mean_squared_error import datasets.dummy.preprocessing as pp from hpo_framework.trial import Trial # Loading data and preprocessing # >>> Linux OS and Windows require different path representations -> use pathlib <<< abs_folder_path = os.path.abspath(path='datasets/dummy') data_folder = Path(abs_folder_path) train_file = "train.csv" test_file = "test.csv" submission_file = "sample_submission.csv" train_raw = pp.load_data(data_folder, train_file) test_raw = pp.load_data(data_folder, test_file) X_train, y_train, X_val, y_val, X_test = pp.process(train_raw, test_raw, standardization=False, logarithmic=False, count_encoding=False) # Flag for debug mode (yes/no) # yes (True) -> set parameters for this trial in source code (below) # no (False) -> call script via terminal and pass arguments via argparse debug = False if debug: # Set parameters manually
from skopt.optimizer import gp_minimize from skopt.optimizer import forest_minimize from sklearn.ensemble import RandomForestRegressor from sklearn.metrics import mean_squared_error from math import sqrt from tensorflow import keras import matplotlib.pyplot as plt from datasets.dummy import preprocessing as pp FOLDER = r'C:\Users\Max\Documents\GitHub\housing_regression\datasets' TRAIN_FILE = 'train.csv' TEST_FILE = 'test.csv' SAMPLE_SUB = 'sample_submission.csv' train_raw = pp.load_data(FOLDER, TRAIN_FILE) test_raw = pp.load_data(FOLDER, TEST_FILE) X_train, y_train, X_val, y_val, X_test = pp.process(train_raw, test_raw, standardization=False, logarithmic=False, count_encoding=False) # ML-algorithm ALGORITHM = 'RandomForestRegressor' # 'RandomForestRegressor', 'Keras' # HPO-method OPTIMIZER = 'SMAC' # 'GPBO', 'SMAC' def train_evaluate_rf(X_train, y_train, X_val, y_val, params): rf_reg = RandomForestRegressor(**params, random_state=0)