def _get_RF(self, X_values, y_values, model_num, random_state): rkf = RepeatedKFold(n_splits=5, n_repeats=4, random_state=random_state) # RF RF_model_errors = np.asarray([]) RF_resid = np.asarray([]) for train_index, test_index in rkf.split(X_values): X_train, X_test = X_values[train_index], X_values[test_index] y_train, y_test = y_values[train_index], y_values[test_index] RF = rf.RF() RF.train(X_train, y_train, model_num) rf_pred, RF_errors = RF.predict(X_test, True) rf_res = y_test - rf_pred RF_model_errors = np.concatenate((RF_model_errors, RF_errors), axis=None) RF_resid = np.concatenate((RF_resid, rf_res), axis=None) return RF_resid, RF_model_errors
def find_stats(X_values, y_values, stdev): # define cross-validation splits rkf = RepeatedKFold(n_splits=5, n_repeats=4, random_state=91936274) # RF print("finding rf scale factors") RF_model_errors = np.asarray([]) RF_resid = np.asarray([]) for train_index, test_index in rkf.split(X_values): #print("RF: {}".format(ctr)) #ctr = ctr + 1 X_train, X_test = X_values[train_index], X_values[test_index] y_train, y_test = y_values[train_index], y_values[test_index] RF = rf.RF() RF.train_synth(X_train, y_train, std=stdev) rf_pred, RF_errors = RF.predict_no_divide(X_test, True) rf_res = y_test - rf_pred RF_model_errors = np.concatenate((RF_model_errors, RF_errors), axis=None) RF_resid = np.concatenate((RF_resid, rf_res), axis=None) abs_residuals = abs(RF_resid) return abs_residuals, RF_model_errors
def _get_RF(self, X_train, y_train, X_test, y_test, model_num): RF = rf.RF() RF.train(X_train, y_train, model_num) predictions, model_errors = RF.predict(X_test, True) residuals = y_test - predictions return residuals, model_errors
y_train, y_test = Y.iloc[train_index_2], Y.iloc[test_index_2] testGroup2 = np.delete(groups2, train_index_2) if checkAlreadyDone(testGroup2[0], alreadyDone): continue frames = [X_test_1, X_test] twoTest = pd.concat(frames) yTest = [y_test_1, y_test] yFrames = pd.concat(yTest) testFinal = np.concatenate((testGroup, testGroup2)) RF = rf.RF() RF.train(X_train, y_train, std=y_std) GPR = gpr.GPR() GPR.train(X_train, y_train, userkernel=gprsavedkernel, std=y_std, optimizer_restarts=0) # Here instead of res, sigma try calculating domain prediction for the test data. gpr_pred, GPR_errors = GPR.predict(twoTest, True) rf_pred, RF_errors = RF.predict(twoTest, True) RF_errors = rfslope * RF_errors + rfintercept # Start measuring on different thresholds
def find_stats(X_values, y_values): RF_model_errors = np.asarray([]) RF_residuals = np.asarray([]) GPR_model_errors = np.asarray([]) GPR_residuals = np.asarray([]) # define cross-validation splits rkf = RepeatedKFold(n_splits=5, n_repeats=4, random_state=91936274) #GPR ctr = 1 for train_index, test_index in rkf.split(X_values): print("GPR: {}/20 (iteration: {}/10)".format(ctr, outerctr)) ctr = ctr + 1 X_train, X_test = X_values[train_index], X_values[test_index] y_train, y_test = y_values[train_index], y_values[test_index] GPR = gpr.GPR() GPR.train_synth(X_train, y_train, std=standard_deviation, kernelchoice=1, optimizer_restarts=10) gpr_pred, gpr_errors = GPR.predict_no_divide(X_test, True) gpr_res = (y_test - gpr_pred) / standard_deviation gpr_errors = gpr_errors / standard_deviation GPR_model_errors = np.concatenate((GPR_model_errors, gpr_errors), axis=None) GPR_residuals = np.concatenate((GPR_residuals, gpr_res), axis=None) # define quantities to return GPR_model_error_std = np.std(GPR_model_errors) print("GPR standard deviation of model errors: {}".format( GPR_model_error_std)) GPR_model_error_mean = np.mean(GPR_model_errors) print("GPR mean of model errors: {}".format(GPR_model_error_mean)) cutoff = GPR_model_error_mean # RF print("finding rf scale factors") RF_model_errors = np.asarray([]) RF_residuals = np.asarray([]) for train_index, test_index in rkf.split(X_values): #print("RF: {}".format(ctr)) #ctr = ctr + 1 X_train, X_test = X_values[train_index], X_values[test_index] y_train, y_test = y_values[train_index], y_values[test_index] RF = rf.RF() RF.train_synth(X_train, y_train, std=standard_deviation) rf_pred, RF_errors = RF.predict_no_divide(X_test, True) rf_res = y_test - rf_pred RF_model_errors = np.concatenate((RF_model_errors, RF_errors), axis=None) RF_residuals = np.concatenate((RF_residuals, rf_res), axis=None) abs_residuals = abs(RF_residuals) res = np.asarray([]) sigma = np.asarray([]) # remove rf model errors and residuals that have gpr model error over cutoff for i in range(0, len(GPR_model_errors)): if GPR_model_errors[i] < cutoff: res = np.append(res, abs_residuals[i]) sigma = np.append(sigma, RF_model_errors[i]) return cutoff, res, sigma