예제 #1
0
from hpo_framework.hp_spaces import space_keras, space_rf, space_svr, space_xgb, space_ada, space_dt, space_linr, space_knn_r

from hpo_framework.hpo_metrics import root_mean_squared_error
import datasets.dummy.preprocessing as pp
from hpo_framework.trial import Trial

# Loading data and preprocessing
# >>> Linux OS and Windows require different path representations -> use pathlib <<<
abs_folder_path = os.path.abspath(path='datasets/dummy')
data_folder = Path(abs_folder_path)
train_file = "train.csv"
test_file = "test.csv"
submission_file = "sample_submission.csv"

train_raw = pp.load_data(data_folder, train_file)
test_raw = pp.load_data(data_folder, test_file)

X_train, y_train, X_val, y_val, X_test = pp.process(train_raw,
                                                    test_raw,
                                                    standardization=False,
                                                    logarithmic=False,
                                                    count_encoding=False)

# Flag for debug mode (yes/no)
# yes (True) -> set parameters for this trial in source code (below)
# no (False) -> call script via terminal and pass arguments via argparse
debug = False

if debug:
    # Set parameters manually
예제 #2
0
from skopt.optimizer import gp_minimize
from skopt.optimizer import forest_minimize
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from math import sqrt
from tensorflow import keras
import matplotlib.pyplot as plt

from datasets.dummy import preprocessing as pp

FOLDER = r'C:\Users\Max\Documents\GitHub\housing_regression\datasets'
TRAIN_FILE = 'train.csv'
TEST_FILE = 'test.csv'
SAMPLE_SUB = 'sample_submission.csv'

train_raw = pp.load_data(FOLDER, TRAIN_FILE)
test_raw = pp.load_data(FOLDER, TEST_FILE)

X_train, y_train, X_val, y_val, X_test = pp.process(train_raw,
                                                    test_raw,
                                                    standardization=False,
                                                    logarithmic=False,
                                                    count_encoding=False)
# ML-algorithm
ALGORITHM = 'RandomForestRegressor'  # 'RandomForestRegressor', 'Keras'
# HPO-method
OPTIMIZER = 'SMAC'  # 'GPBO', 'SMAC'


def train_evaluate_rf(X_train, y_train, X_val, y_val, params):
    rf_reg = RandomForestRegressor(**params, random_state=0)