def test(self, dataset, repetitions=1, preliminary_search=False, to_fix=[]): """ This function implements the testing procedure for the monks' datasets. It permits two kind of search for the best hyperparameters. The first kind of search simply performs a search using one HyperGrid, and selecting the set of hyperparameters which returns the best result. The second kind of search performs a deeper search, by searching first the best value for some hyperparameters, fixing them, and searching again the values for the remaing ones. Parameters ---------- dataset: int or list either a single index or a list of indexes, each one representing a dataset in self.monks repetitions: int cross validation's repetitions (Default value = 1) preliminary_search: bool whether or not to execute a preliminary search for the best value for some hyperparameters, fix them, and search again for the remaining hyperparameters (Default value = False) to_fix: list a list of hyperparameters that must be fixed (Default value = []) size: int the new hypergrid's size for the new search for the best hyperparameters in the preliminary_search function (Default value = 0) Returns ------- """ if type(dataset) == int: assert dataset >= 0 and dataset <= 2 dataset = [dataset] else: assert len(dataset) > 0 and len(dataset) <= 3 for ds in dataset: print 'TESTING MONK DATASET {}\n'.format(ds + 1) self.train_set = pd.\ read_csv(self.monks[ds][0], names=['class'] + ['x{}'.format(j) for j in range(17)]).values self.test_set = pd.\ read_csv(self.monks[ds][1], names=['class'] + ['x{}'.format(j) for j in range(17)]).values self.grid = val.HyperGrid(self.param_ranges, size=self.grid_size, seed=datetime.now()) self.selection = val.ModelSelectionCV(self.grid, repetitions=repetitions) # PRELIMINARY SEARCH FOR SOME OF THE PARAMETERS ################### assert len(to_fix) != 0 self.preliminary_search(ds, to_fix, repetitions) # SEARCHING FOR THE OTHER PARAMETERS ############################## self.selection.search( self.train_set[:, 1:], self.train_set[:, 0].reshape(-1, 1), save_results=True, fname='../data/model_selection_results_monk_{}.json'.format( ds + 1), par_name='REMAINING PARAMETERS') best_model = self.selection.\ select_best_model( self.train_set[:, 1:], self.train_set[:, 0].reshape(-1, 1), fname='../data/model_selection_results_monk_{}.json'. format(ds + 1)) y_pred = best_model.predict(self.test_set[:, 1:]) y_pred = np.apply_along_axis(lambda x: 0 if x < .5 else 1, 1, y_pred).reshape(-1, 1) print '\n\n\n' bca = metrics.BinaryClassifierAssessment( self.test_set[:, 0].reshape(-1, 1), y_pred) self.save_best_result(ds, best_model, bca) self.plot_best_result(ds, best_model) self.param_ranges = self.param_ranges_backup.copy()
def validate(self, X, y, neural_net, nfolds, plot_curves=False, **kwargs): """ This function implements the core of the k-fold cross validation algorithm. For each fold, the neural network is trained using the training set created for that fold, and is tested on the respective test set. Finally, the error between the test's target and the predicted one is collected. Parameters ---------- X. numpy.ndarray the design matrix y: numpy.ndarray the target column vector neural_net: nn.NeuralNetwork the neural network that has to be cross validated nfolds: int the number of folds to be applied in the algorithm plot_curves: bool whether or not to plot the learning curve for each one of the cross validation's iterations kwargs: dict a dictionary which contains the parameters for the neural network's initialization Returns ------- """ for i in tqdm(np.arange(nfolds), desc='{}-FOLD CROSS VALIDATION PROGRESS'.format(nfolds)): train_set = np.vstack( [self.folds[j] for j in np.arange(len(self.folds)) if j != i]) X_train, y_train = np.hsplit(train_set, [X.shape[1]]) X_va, y_va = np.hsplit(self.folds[i], [X.shape[1]]) neural_net.train(X_train, y_train, X_va, y_va) # assessment = self.model_assessment(X_va, y_va, model=neural_net) assessment = {'mse': neural_net.error_per_epochs_va[-1]} self.results.append(assessment) # self.results.append(loss) fold_results = { 'id_fold': i + 1, 'mse_tr': neural_net.error_per_epochs[-1], 'mse_va': neural_net.error_per_epochs_va[-1], 'mee_tr': neural_net.mee_per_epochs[-1], 'mee_va': neural_net.mee_per_epochs_va[-1], 'error_per_epochs': neural_net.error_per_epochs, 'error_per_epochs_va': neural_net.error_per_epochs_va, 'mee_per_epochs': neural_net.mee_per_epochs, 'mee_per_epochs_va': neural_net.mee_per_epochs_va, # 'accuracy_per_epochs': neural_net.accuracy_per_epochs, # 'accuracy_per_epochs_va': neural_net.accuracy_per_epochs_va, 'hyperparams': neural_net.get_params() } if neural_net.task == 'classifier': y_pred = neural_net.predict(X_va) y_pred = np.apply_along_axis(lambda x: 0 if x < .5 else 1, 1, y_pred).reshape(-1, 1) # y_pred = np.round(y_pred) bca = metrics.BinaryClassifierAssessment(y_pred, y_va, printing=False) fold_results['accuracy'] = bca.accuracy fold_results['f1_score'] = bca.f1_score if neural_net.task == 'regression': # add mean euclidean error pass self.fold_results.append(fold_results) neural_net.reset() if plot_curves: plt.plot(range(len(neural_net.error_per_epochs)), neural_net.error_per_epochs, label='FOLD {}, VALIDATION ERROR: {}'.format( i, round(assessment['mse'], 2))) self.aggregated_results = self.aggregate_assessments() if plot_curves: plt.title('LEARNING CURVES FOR A {}-FOLD CROSS VALIDATION.\nMEAN ' 'VALIDATION ERROR {}, VARIANCE {}.'.format( nfolds, round(self.aggregated_results['mse']['mean'], 2), round(self.aggregated_results['mse']['std'], 2)), fontsize=8) plt.ylabel('ERROR PER EPOCH') plt.xlabel('EPOCHS') plt.grid() plt.legend(fontsize=8) plt.savefig('../images/{}_fold_cross_val_lcs.pdf'.format(nfolds), bbox_inches='tight') plt.close() return self.aggregated_results
def optimize(self, nn, X, y, X_va, y_va, max_epochs, error_goal, plus=True, strong=False, **kwargs): """ This function implements the optimization procedure following the Conjugate Gradient Descent, as described in the paper 'A new conjugate gradient algorithm for training neural networks based on a modified secant equation.'. Parameters ---------- nn: nn.NeuralNetwork the neural network which has to be optimized X: numpy.ndarray the design matrix y: numpy.ndarray the target column vector max_epochs: int the maximum number of iterations for optimizing the network error_goal: float the stopping criteria based on a threshold for the maximum error allowed plus: bool whether or not to use the modified HS formula (Default value = True) strong: bool whether or not to use the strong Armijo-Wolfe condition (Default value = False) kwargs: dict additional parameters Returns ------- """ start_time = dt.datetime.now() k = 0 g_prev = 0 y_pred, y_pred_va = None, None bin_assess, bin_assess_va = None, None while True: start_iteration = dt.datetime.now() dataset = np.hstack((X, y)) np.random.shuffle(dataset) X, y = np.hsplit(dataset, [X.shape[1]]) # BACK-PROPAGATION ALGORITHM ###################################### self.error = self.forward_propagation(nn, X, y) self.back_propagation(nn, X, y) self.error_per_epochs.append(self.error) y_pred = self.h[-1].reshape(-1, 1) g = self.flat_weights(self.delta_W, self.delta_b) if self.error < error_goal: # mod self.statistics['epochs'] = (k + 1) self.statistics['time_train'] = \ (dt.datetime.now() - start_time).total_seconds() * 1000 self.time_per_epochs.append( (dt.datetime.now() - start_time).total_seconds() * 1000) self.error_per_epochs_va.append(self.error_per_epochs_va[-1]) return 1 elif np.all(g <= 1e-10): self.statistics['epochs'] = (k + 1) self.statistics['time_train'] = \ (dt.datetime.now() - start_time).total_seconds() * 1000 self.time_per_epochs.append( (dt.datetime.now() - start_time).total_seconds() * 1000) return None flatted_weights = self.flat_weights(nn.W, nn.b) flatted_copies = self.flat_weights(nn.W_copy, nn.b_copy) if k == 0: self.error_prev = self.error g_prev, d_prev, w_prev = 0, -g, 0 if self.beta_m == 'cd': g_prev = g # TODO refactoring chiamata del calcolo per beta if self.beta_m == 'fr' or self.beta_m == 'pr' or self.beta_m == 'cd': beta = self.get_beta(g, g_prev, self.beta_m, plus=plus) elif self.beta_m == 'hs' or self.beta_m == 'dy': beta = self.get_beta(g, g_prev, self.beta_m, plus=plus, d_prev=d_prev) elif self.beta_m == 'dl': beta = self.get_beta(g, g_prev, self.beta_m, plus=plus, w=flatted_weights, w_prev=w_prev, d_prev=d_prev, t=self.t) else: beta = self.get_beta(g, g_prev, self.beta_m, plus=plus, d_prev=d_prev, error=self.error, error_prev=self.error_prev, w=flatted_weights, w_prev=w_prev, rho=self.rho) d = self.get_direction(k, g, beta, d_prev=d_prev, method=self.d_m) eta = self.line_search(nn, X, y, flatted_weights, d, np.asscalar(g.T.dot(d)), self.error) # WEIGHTS' UPDATE ################################################# new_W = flatted_copies + (eta * d) nn.W, nn.b = self.unflat_weights(new_W, nn.n_layers, nn.topology) nn.update_copies() g_prev, d_prev, w_prev = g, d, flatted_copies self.error_prev = self.error # IN LOCO VALIDATION ############################################## if X_va is not None: error_va = self.forward_propagation(nn, X_va, y_va) self.error_per_epochs_va.append(error_va) y_pred_va = self.h[-1].reshape(-1, 1) # ACCURACY ESTIMATION ############################################# if nn.task == 'classifier': y_pred_bin = np.apply_along_axis(lambda x: 0 if x < .5 else 1, 1, y_pred).reshape(-1, 1) y_pred_bin_va = np.apply_along_axis( lambda x: 0 if x < .5 else 1, 1, y_pred_va).reshape(-1, 1) bin_assess = metrics.BinaryClassifierAssessment(y, y_pred_bin, printing=False) bin_assess_va = metrics.BinaryClassifierAssessment( y_va, y_pred_bin_va, printing=False) self.accuracy_per_epochs.append(bin_assess.accuracy) self.accuracy_per_epochs_va.append(bin_assess_va.accuracy) self.f1_score_per_epochs.append(bin_assess.f1_score) self.f1_score_per_epochs_va.append(bin_assess_va.f1_score) if (bin_assess_va.accuracy > self.max_accuracy): self.max_accuracy = bin_assess_va.accuracy self.statistics['acc_epoch'] = k norm_gradient = np.linalg.norm(self.g) self.gradient_norm_per_epochs.append(norm_gradient) if (k > 0 and (norm_gradient <= self.convergence_goal)) or\ (max_epochs is not None and k == max_epochs): self.statistics['epochs'] = (k + 1) self.statistics['ls'] = self.ls_it / (k + 1) self.statistics['time_train'] = \ (dt.datetime.now() - start_time).total_seconds() * 1000 self.time_per_epochs.append( (dt.datetime.now() - start_time).total_seconds() * 1000) return 1 self.statistics['epochs'] = (k + 1) self.statistics['ls'] = self.ls_it / (k + 1) self.time_per_epochs.append( (dt.datetime.now() - start_time).total_seconds() * 1000) self.statistics['time_train'] = \ (dt.datetime.now() - start_time).total_seconds() * 1000 k += 1 return 0
def optimize(self, nn, X, y, X_va, y_va, epochs=None): """ This functions implements the optimization routine. Parameters ---------- nn: nn.NeuralNetwork: a reference to the network X : numpy.ndarray the design matrix y : numpy.ndarray the target column vector X_va: numpy.ndarray the design matrix used for the validation y_va: numpy.ndarray the target column vector used for the validation epochs: int the optimization routine's maximum number of epochs (Default value = None) """ bin_assess, bin_assess_va = None, None start_time = dt.datetime.now() e = 0 while True: start_iteration = dt.datetime.now() error_per_batch = [] y_pred, y_pred_va = None, None dataset = np.hstack((X, y)) np.random.shuffle(dataset) X, y = np.hsplit(dataset, [X.shape[1]]) for b_start in np.arange(0, X.shape[0], X.shape[0]): # BACK-PROPAGATION ALGORITHM ################################## x_batch = X[b_start:b_start + X.shape[0], :] y_batch = y[b_start:b_start + X.shape[0], :] # MOMENTUM CHECK ############################################## if self.momentum['type'] == 'nesterov': for layer in range(nn.n_layers): nn.W[layer] += self.momentum['alpha'] * \ self.velocity_W[layer] error = self.forward_propagation(nn, x_batch, y_batch) self.error_per_batch.append(error) error_per_batch.append(error) y_pred = self.h[-1].reshape(-1, 1) if error < self.convergence_goal or e >= 10000: self.statistics['epochs'] = e self.statistics['time_train'] = \ (dt.datetime.now() - start_time).total_seconds() * 1000 return 1 self.back_propagation(nn, x_batch, y_batch) # WEIGHTS' UPDATE ############################################# for layer in range(nn.n_layers): weight_decay = reg.regularization(nn.W[layer], self.reg_lambda, self.reg_method) self.velocity_b[layer] = (self.momentum['alpha'] * self.velocity_b[layer]) \ - (self.eta / x_batch.shape[0]) * \ self.delta_b[layer] nn.b[layer] += self.velocity_b[layer] self.velocity_W[layer] = (self.momentum['alpha'] * self.velocity_W[layer]) \ - (self.eta / x_batch.shape[0]) * \ self.delta_W[layer] nn.W[layer] += self.velocity_W[layer] - weight_decay self.error_per_epochs.append(np.sum(error_per_batch)) # IN LOCO VALIDATION ############################################## if X_va is not None: error_va = self.forward_propagation(nn, X_va, y_va) self.error_per_epochs_va.append(error_va) y_pred_va = self.h[-1].reshape(-1, 1) # PERFORMANCE ESTIMATION ########################################## if nn.task == 'classifier': y_pred_bin = np.apply_along_axis(lambda x: 0 if x < .5 else 1, 1, y_pred).reshape(-1, 1) y_pred_bin_va = np.apply_along_axis( lambda x: 0 if x < .5 else 1, 1, y_pred_va).reshape(-1, 1) bin_assess = metrics.BinaryClassifierAssessment(y, y_pred_bin, printing=False) bin_assess_va = metrics.BinaryClassifierAssessment( y_va, y_pred_bin_va, printing=False) self.accuracy_per_epochs.append(bin_assess.accuracy) self.accuracy_per_epochs_va.append(bin_assess_va.accuracy) self.f1_score_per_epochs.append(bin_assess.f1_score) self.f1_score_per_epochs_va.append(bin_assess_va.f1_score) if (bin_assess_va.accuracy > self.max_accuracy): self.max_accuracy = bin_assess_va.accuracy self.statistics['acc_epoch'] = e # GRADIENT'S NORM STORING ######################################### norm_gradient = np.linalg.norm(self.g) self.gradient_norm_per_epochs.append(norm_gradient) self.time_per_epochs.append( (dt.datetime.now() - start_time).total_seconds() * 1000) e += 1 if (norm_gradient <= self.convergence_goal) or \ (epochs is not None and e == epochs): self.statistics['epochs'] = e self.statistics['time_train'] = \ (dt.datetime.now() - start_time).total_seconds() * 1000 return 0 self.statistics['epochs'] = e self.statistics['time_train'] = (dt.datetime.now() - start_time).total_seconds() * 1000
def train(self, X, y, X_va=None, y_va=None): """ This function implements the neural network's training routine. Parameters ---------- X : numpy.ndarray the design matrix y : numpy.ndarray the target column vector X_va: numpy.ndarray the design matrix used for the validation (Default value = None) y_va: numpy.ndarray the target column vector used for the validation (Default value = None) Returns ------- """ velocity_W = [0 for i in range(self.n_layers)] velocity_b = [0 for i in range(self.n_layers)] self.error_per_epochs = [] self.error_per_epochs_old = [] self.error_per_batch = [] self.mee_per_epochs = [] if X_va is not None: self.error_per_epochs_va = [] self.mee_per_epochs_va = [] else: self.error_per_epochs_va = None self.mee_per_epochs_va = None if self.task == 'classifier': self.accuracy_per_epochs = [] self.accuracy_per_epochs_va = [] self.stop_GL = None self.stop_PQ = None stop_GL = False stop_PQ = False # for e in tqdm(range(self.epochs), desc='TRAINING'): for e in range(self.epochs): error_per_batch = [] dataset = np.hstack((X, y)) np.random.shuffle(dataset) X, y = np.hsplit(dataset, [X.shape[1]]) for b_start in np.arange(0, X.shape[0], self.batch_size): # BACK-PROPAGATION ALGORITHM ################################## x_batch = X[b_start:b_start + self.batch_size, :] y_batch = y[b_start:b_start + self.batch_size, :] error = self.forward_propagation(x_batch, y_batch) self.error_per_batch.append(error) error_per_batch.append(error) self.back_propagation(x_batch, y_batch) # WEIGHTS' UPDATE ############################################# for layer in range(self.n_layers): weight_decay = reg.regularization(self.W[layer], self.reg_lambda, self.reg_method) velocity_b[layer] = (self.alpha * velocity_b[layer]) \ - (self.eta / x_batch.shape[0]) * self.delta_b[layer] self.b[layer] += velocity_b[layer] velocity_W[layer] = (self.alpha * velocity_W[layer]) \ - (self.eta / x_batch.shape[0]) * self.delta_W[layer] self.W[layer] += velocity_W[layer] - weight_decay ############################################################### # COMPUTING OVERALL MSE ########################################### self.error_per_epochs_old.append( np.sum(error_per_batch)/X.shape[0]) y_pred = self.predict(X) self.error_per_epochs.append(metrics.mse(y, y_pred)) self.mee_per_epochs.append(metrics.mee(y, y_pred)) if X_va is not None: y_pred_va = self.predict(X_va) self.error_per_epochs_va.append( metrics.mse(y_va, y_pred_va)) self.mee_per_epochs_va.append( metrics.mee(y_va, y_pred_va)) if self.task == 'classifier': y_pred_bin = np.apply_along_axis( lambda x: 0 if x < .5 else 1, 1, y_pred).reshape(-1, 1) y_pred_bin_va = np.apply_along_axis( lambda x: 0 if x < .5 else 1, 1, y_pred_va).reshape(-1, 1) bin_assess = metrics.BinaryClassifierAssessment( y, y_pred_bin, printing=False) bin_assess_va = metrics.BinaryClassifierAssessment( y_va, y_pred_bin_va, printing=False) self.accuracy_per_epochs.append(bin_assess.accuracy) self.accuracy_per_epochs_va.append(bin_assess_va.accuracy) # CHECKING FOR EARLY STOPPING ##################################### if self.early_stop is not None \ and e > self.early_stop_min_epochs \ and (e + 1) % 5 == 0: generalization_loss = 100 \ * ((self.error_per_epochs_va[e] / min(self.error_per_epochs_va)) - 1) # GL method if generalization_loss > self.epsilon: stop_GL = True # PQ method if self.early_stop != 'GL': # PQ or 'testing' min_e_per_strip = min( self.error_per_epochs_va[e - 4:e + 1]) sum_per_strip = sum(self.error_per_epochs_va[e - 4:e + 1]) progress = 1000 * \ ((sum_per_strip / (5 * min_e_per_strip)) - 1) progress_quotient = generalization_loss / progress if progress_quotient > self.epsilon: stop_PQ = True # stopping if stop_GL and self.stop_GL is None: self.stop_GL = e if stop_PQ and self.stop_PQ is None: self.stop_PQ = e if self.early_stop != 'testing' and (stop_GL or stop_PQ): break
nn.error_per_epochs_va[:epochs_plot], nn.get_params(), task='validation', accuracy_h_plot=True, accuracy_per_epochs=nn.accuracy_per_epochs[:epochs_plot], accuracy_per_epochs_va=nn.accuracy_per_epochs_va[:epochs_plot], fname=preliminary_name) # u.plot_learning_curve(nn, fname='../images/monks_learning_curve.pdf') # u.plot_learning_curve(nn, fname='../images/monks_{}_{}_{}.pdf'.format(dataset, 'stochastic', 'notearly', 'relu')) y_pred = nn.predict(X_test) y_pred = np.apply_along_axis(lambda x: 0 if x < .5 else 1, 1, y_pred).reshape(-1, 1) # y_pred = np.round(y_pred) bca = metrics.BinaryClassifierAssessment(y_pred, y_test, printing=True) y_pred_test = np.round(nn.predict(X_test)) metrics.BinaryClassifierAssessment(y_test, y_pred_test) ########################################################### nn.h[0].shape nn.h[1] nn.W[0] np.round(nn.h[0], 2) ########################################################### # EXPERIMENT GRID SEARCH