formats=["hdf"], type="train", verbose=False)[1].values()) validation = pd.concat( manager.read_data(global_dirs.splitted_data_path, formats=["hdf"], type="validation", verbose=False)[1].values()) manager.assign_sets(train=train) tup = manager.create_mask( train.iloc[:, :-1], global_dirs.variable_selection[0], select=global_dirs.variable_selection[1] ) # This tuple shouldn't take care about y_column index scalers = manager.preprocess_train(tup, scale_Y=True) #max_depth_tuning=np.arange(1,50) #best_depth=1 #best_r2=0 #history_r2=[] #for md in max_depth_tuning: # manager.fit_rf_regression(max_depth=md) # r2=manager.predict_rf_regression(validation, tup)["r2"] # if abs(r2) > abs(best_r2): # best_depth=md # best_r2=r2 # history_r2.append(r2) # print("{}: {}".format(md,r2)) #rf_model = manager.fit_rf_regression(max_depth=best_depth)
type="train", verbose=False)[1].values()) validation = pd.concat( manager.read_data(global_dirs.splitted_data_path, formats=["hdf"], type="validation", verbose=False)[1].values()) manager.assign_sets(train=train, val=validation) #tup = manager.create_mask(train.iloc[:,:-1], [0, 1, 2], select=False) #This tuple shouldn't take care about y_column index tup = manager.create_mask( train.iloc[:, :-1], global_dirs.variable_selection[0], select=global_dirs.variable_selection[1] ) #This tuple shouldn't take care about y_column index scalers = manager.preprocess_train(tup, scale_Y=False) xgb_model = manager.fit_xgboost_regression() if not os.path.isdir(global_dirs.results_path): os.mkdir(global_dirs.results_path) if not os.path.isdir(global_dirs.xgboost_path): os.mkdir(global_dirs.xgboost_path) if not os.path.isdir(global_dirs.xgboost_path + "scalers/"): os.mkdir(global_dirs.xgboost_path + "scalers/") if not os.path.isdir(global_dirs.xgboost_path + "model/"): os.mkdir(global_dirs.xgboost_path + "model/") if not os.path.isdir(global_dirs.xgboost_path + "results/"): os.mkdir(global_dirs.xgboost_path + "results/") if (isinstance(scalers, tuple)):