lmbd = lmbd, input_activation = 'sigmoid', output_activation = 'linear', cost_function = 'MSE') ffnn.add_layer(20, activation_method = 'sigmoid') ffnn.add_layer(20, activation_method = 'sigmoid') #Train network ffnn.train() #Save predictions y_tilde_train = ffnn.predict(X_train) y_tilde_test = ffnn.predict(X_test) #Save metrics into exportable matrices train_mse[i][j], train_R2[i][j] = statistics.calc_statistics(y_train, y_tilde_train) test_mse[i][j], test_R2[i][j] = statistics.calc_statistics(y_test, y_tilde_test) if best_train_mse > train_mse[i][j]: best_train_mse = train_mse[i][j] best_y_tilde_train = y_tilde_train if best_test_mse > test_mse[i][j]: best_test_mse = test_mse[i][j] best_y_tilde_test = y_tilde_test #print metrics print('Learning rate: ', eta) print('lambda: ', lmbd) print('Train. mse = ', train_mse[i][j], 'R2 = ', train_R2[i][j])
def kfold_cross_validation(self, k, method, deg=5, lambd=1): """Method that implements the k-fold cross-validation algorithm. It takes as input the method we want to use. if "least squares" an ordinary OLS will be evaulated. if "ridge" then the ridge method will be used, and respectively the same for "lasso".""" inst = self.inst lowest_mse = 1e5 self.mse = [] self.R2 = [] self.mse_train = [] self.R2_train = [] self.bias = [] self.variance = [] design_matrix = fit(inst) whole_DM = design_matrix.create_design_matrix( deg=deg).copy() #design matrix for the whole dataset whole_z = inst.z_1d.copy() #save the whole output for i in range(self.inst.k): #pick the i-th set as test inst.sort_training_test_kfold(i) inst.fill_array_test_training() design_matrix.create_design_matrix( deg=deg ) #create design matrix for the training set, and evaluate if method == "least squares": z_train, beta_train = design_matrix.fit_design_matrix_numpy() elif method == "ridge": z_train, beta_train = design_matrix.fit_design_matrix_ridge( lambd) elif method == "lasso": z_train, beta_train = design_matrix.fit_design_matrix_lasso( lambd) else: sys.exit("Wrongly designated method: ", method, " not found") #Find out which values get predicted by the training set X_test = design_matrix.create_design_matrix(x=inst.test_x_1d, y=inst.test_y_1d, z=inst.test_z_1d, N=inst.N_testing, deg=deg) z_pred = design_matrix.test_design_matrix(beta_train, X=X_test) #Take the real values from the dataset for comparison z_test = inst.test_z_1d #Calculate the prediction for the whole dataset whole_z_pred = design_matrix.test_design_matrix(beta_train, X=whole_DM) # Statistically evaluate the training set with test and predicted solution. mse, calc_r2 = statistics.calc_statistics(z_test, z_pred) # Statistically evaluate the training set with itself mse_train, calc_r2_train = statistics.calc_statistics( inst.z_1d, z_train) # Get the values for the bias and the variance bias, variance = statistics.calc_bias_variance(z_test, z_pred) self.mse.append(mse) self.R2.append(calc_r2) self.mse_train.append(mse_train) self.R2_train.append(calc_r2_train) self.bias.append(bias) self.variance.append(variance) # If needed/wanted: if abs(mse) < lowest_mse: lowest_mse = abs(mse) self.best_predicting_beta = beta_train
def kfold_cross_validation(self, method, descent_method='SGD-skl', deg=0, Niterations=100, lambd=0.01, eta=0.000005, m=5, verbose=False): """Method that implements the k-fold cross-validation algorithm. It takes as input the method we want to use. if "least squares" an ordinary OLS will be evaulated. if "ridge" then the ridge method will be used, and respectively the same for "lasso".""" inst = self.inst lowest_mse = 1e5 self.mse = [] self.R2 = [] self.mse_train = [] self.R2_train = [] self.bias = [] self.variance = [] self.accuracy = [] self.design_matrix = fit(inst) self.rocaucs = [] self.area_ratios = [] #whole_DM = self.design_matrix.create_design_matrix(deg=deg).copy() #design matrix for the whole dataset #whole_y = inst.y_1d.copy() #save the whole output for i in range(self.inst.k): #pick the i-th set as test inst.sort_training_test_kfold(i) inst.fill_array_test_training() self.design_matrix.create_design_matrix( deg=deg ) #create design matrix for the training set, and evaluate if method == 'OLS': y_train, beta_train = self.design_matrix.fit_design_matrix_numpy( ) elif method == "Ridge": y_train, beta_train = self.design_matrix.fit_design_matrix_ridge( lambd) elif method == "LASSO": y_train, beta_train = self.design_matrix.fit_design_matrix_lasso( lambd, maxiter=Niterations) elif method == 'logreg': y_train, beta_train = self.design_matrix.fit_design_matrix_logistic_regression( descent_method=descent_method, eta=eta, Niteration=Niterations, m=m, verbose=verbose) else: sys.exit("Wrongly designated method: ", method, " not found") #Find out which values get predicted by the training set X_test = self.design_matrix.create_design_matrix(x=inst.test_x_1d, N=inst.N_testing, deg=deg) y_pred = self.design_matrix.test_design_matrix(beta_train, X=X_test) #Take the real target values from the test datset for comparison (and also a rescaled set) y_test = inst.test_y_1d _, y_test_rescaled = inst.rescale_back(x=inst.test_x_1d, y=inst.test_y_1d, split=True) target = y_test_rescaled.astype(int) #Calculate the prediction for the whole dataset #whole_y_pred = self.design_matrix.test_design_matrix(beta_train, X=whole_DM) if method == 'logreg': # Statistically evaluate the training set with test and predicted solution. y_pred_onehot = np.column_stack((1 - y_pred, y_pred)) accuracy_batch = statistics.calc_accuracy(target, y_pred) rocaucs_batch = statistics.calc_rocauc(target, y_pred) max_area_test = statistics.calc_cumulative_auc( target, make_onehot(target)) area_ratio_batch = (statistics.calc_cumulative_auc( target, y_pred_onehot) - 0.5) / (max_area_test - 0.5) self.accuracy.append(accuracy_batch) self.rocaucs.append(rocaucs_batch) self.area_ratios.append(area_ratio_batch) else: # Statistically evaluate the training set with test and predicted solution. mse, calc_r2 = statistics.calc_statistics(y_test, y_pred) # Statistically evaluate the training set with itself mse_train, calc_r2_train = statistics.calc_statistics( inst.y_1d, y_train) # Get the values for the bias and the variance bias, variance = statistics.calc_bias_variance(y_test, y_pred) self.mse.append(mse) self.R2.append(calc_r2) self.mse_train.append(mse_train) self.R2_train.append(calc_r2_train) self.bias.append(bias) self.variance.append(variance) # If needed/wanted: if abs(mse) < lowest_mse: lowest_mse = abs(mse) self.best_predicting_beta = beta_train
# Or you can generate directly. #dataset = data_generate() #dataset.generate_franke(n=100, noise=0.2) # Normalize the dataset dataset.normalize_dataset() # Fit design matrix fitted_model = fit(dataset) # Ordinary least square fitting fitted_model.create_design_matrix(deg) z_model_norm, beta = fitted_model.fit_design_matrix_numpy() # Statistical evaluation mse, calc_r2 = statistics.calc_statistics(dataset.z_1d, z_model_norm) print("Mean square error: ", mse, "\n", "R2 score: ", calc_r2) # Scale back the dataset rescaled_dataset = dataset.rescale_back(z=z_model_norm) #x_model = rescaled_dataset[0] #y_model = rescaled_dataset[1] z_model = rescaled_dataset[2] # Generate analytical solution for plotting purposes analytical = data_generate() analytical.generate_franke(n, noise=0) # Plot solutions and analytical for comparison plot_3d(dataset.x_unscaled, dataset.y_unscaled, z_model, analytical.x_mesh, analytical.y_mesh, analytical.z_mesh, ["surface", "scatter"])