plt.title('Optimal lambda: 1e{0}'.format(np.log10(opt_lambda))) plt.loglog(lambdas,train_err_vs_lambda.T,'b.-',lambdas,test_err_vs_lambda.T,'r.-') plt.xlabel('Regularization factor') plt.ylabel('Squared error (crossvalidation)') plt.legend(['Train error','Validation error']) plt.grid() print("Optimal regularization strenght is: {0}".format(round(opt_lambda, 4))) #%% #------- REGULARIZED MUTINOMINAL REGRESSION --------------------------- # Parameters lambdas = np.logspace(-5, 5, 20) cvf = 10 opt_val_err, opt_lambda, mean_w_vs_lambda, train_err_vs_lambda, test_err_vs_lambda = regmultinominal_regression(xIn, y_class, lambdas, cvf=cvf) # Display the results for the last cross-validation fold plt.figure(1, figsize=(12,8)) plt.subplot(1,2,1) plt.semilogx(lambdas,mean_w_vs_lambda.T[:,1:],'.-') # Don't plot the bias term plt.xlabel('Regularization factor') plt.ylabel('Mean Coefficient Values') plt.grid() plt.legend(attributeNames[1:], loc='best') plt.subplot(1,2,2) plt.title('Optimal lambda: 1e{0}'.format(np.round(np.log10(opt_lambda), 4))) plt.loglog(lambdas,train_err_vs_lambda.T,'b.-',lambdas,test_err_vs_lambda.T,'r.-') plt.xlabel('Regularization factor') plt.ylabel('Squared error (crossvalidation)')
def twoLevelCV_classification(xIn, yIn, models, K1, K2, lambdas, hidden_units, CV_ann, n_replicates, max_iter, tolerance): M = xIn.shape[1] CV_outer = model_selection.KFold(n_splits=K1, shuffle=True) CV_inner = model_selection.KFold(n_splits=K2, shuffle=True) # Initialize variables error_train = np.empty((K2, len(models))) error_val = np.empty((K2, len(models))) error_test = np.empty((K1, len(models))) inner_lambdas = np.zeros(K2) # Inner loop values for optimal lambda outer_lambdas = np.zeros(K1) # Outer loop values for optimal lambda inner_hidden_units = np.zeros( K2) # Inner loop values for optimal number of hidden units outer_hidden_units = np.zeros( K1) # Outer loop values for optimal number of hidden units best_models_idx = np.empty((1, len(models))) estimatedGenError = np.empty((1, len(models))) # r parameter for the correlated t test initialization r = np.empty((K1, len(models))) # Outer cross-validation loop. Performance Evaluation k1 = 0 for par_index, test_index in CV_outer.split(xIn): # extract par and test set for current CV fold X_par = xIn[par_index, :] y_par = yIn[par_index] X_test = xIn[test_index, :] y_test = yIn[test_index] # Inner cross-validation loop. Model selection and parameter optimization k2 = 0 models_rmr = [] models_ann = [] models_baseline = [] for train_index, val_index in CV_inner.split(X_par): print("\nOuter Iteration {0}/{1} -----------------------------". format(k1 + 1, K1)) print("\nInner Iteration {0}/{1} -----------------------------". format(k2 + 1, K2)) # Extract train and test set for current CV fold X_train = X_par[train_index, :] y_train = y_par[train_index] X_val = X_par[val_index, :] y_val = y_par[val_index] for s, model in enumerate(models): if s == 0: # REGULARIZED MULTINOMINAL LOGISTIC REGRESSION print( "\nInner {}/{} - Regularized Multinominal Regression". format(k2 + 1, K2)) opt_lambda = regmultinominal_regression(xIn, yIn, lambdas, cvf=10)[1] # Save the values of the optimal regularization strength inner_lambdas[k2] = opt_lambda print("Optimal Lambda = {}".format(np.round(opt_lambda, 3))) # Fit multinomial logistic regression model modelRMR = lm.LogisticRegression(solver='lbfgs', multi_class='multinomial', tol=1e-4, random_state=1, penalty='l2', C=1 / opt_lambda, max_iter=1000) m = modelRMR.fit(X_train, y_train) # Save the trained model models_rmr.append(m) # Compute Errors Rate = {number of misclassified observations}/len(y_val) error_train[k2, s] = np.sum( m.predict(X_train) != y_train) / len(y_train) error_val[k2, s] = np.sum( m.predict(X_val) != y_val) / len(y_val) if s == 1: # ANN MULTI-CLASSIFICATION print("\nInner {}/{} - ANN MultiClassification".format( k2 + 1, K2)) opt_n_hidden_units = ann_multiclass_validate( X_train, y_train, 3, hidden_units, CV_ann, n_replicates=n_replicates, max_iter=max_iter, tolerance=tolerance)[0] inner_hidden_units[k2] = opt_n_hidden_units model = lambda: torch.nn.Sequential( torch.nn.Linear(M, opt_n_hidden_units), torch.nn.Tanhshrink(), torch.nn.Linear(opt_n_hidden_units, 3), torch.nn.Softmax(dim=1)) # Training the ann model with the optimal number of hidden units print( "\n\tTraining the model with the optimal number of hidden units" ) loss_fn = torch.nn.CrossEntropyLoss() net = train_neural_net( model, loss_fn, X=torch.from_numpy(X_train).float(), y=torch.from_numpy(y_train).long().squeeze(), n_replicates=n_replicates, max_iter=max_iter, tolerance=tolerance)[0] # Save the trained model models_ann.append(net) # Determine probability of each class using trained network softmax_logits_train = net( torch.from_numpy(X_train).float()) softmax_logits_val = net(torch.from_numpy(X_val).float()) # Get the estimated class as the class with highest probability (argmax on softmax_logits) y_train_est = (torch.max(softmax_logits_train, dim=1)[1]).data.numpy() y_val_est = (torch.max(softmax_logits_val, dim=1)[1]).data.numpy() # Compute Errors Rate = {number of misclassified observations}/len(y_val) e_train = (y_train_est != y_train) e_val = (y_val_est != y_val) error_train[k2, s] = np.sum(e_train) / len(y_train) error_val[k2, s] = np.sum(e_val) / len(y_val) if s == 2: # BASELINE CLASSIFICATION print("\nInner {}/{} - Baseline Classification".format( k2 + 1, K2)) baseline_class = np.array((np.sum(y_train.squeeze() == 0), np.sum(y_train.squeeze() == 1), np.sum(y_train.squeeze() == 2))) models_baseline.append(baseline_class) # Compute Errors Rate = {number of misclassified observations}/len(y_val) baseline_prediction = np.argmax(baseline_class) * np.ones( y_val.shape[0]) error_val[k2, s] = np.sum( (baseline_prediction != y_val)) / len(y_val) print("Validation error - Model {0}: {1}".format( s + 1, np.round(error_val[k2, s], 4))) k2 += 1 print("\nSummary Optimal models Outer {}/{}".format(k1 + 1, K1)) for s, model in enumerate(models): # Find the CV index of optimal models best_models_idx[0, s] = error_val[:, s].argmin() print("\n- The best model {0} was: CV number {1}".format( s + 1, int(best_models_idx[0, s] + 1))) if s == 0: # Save the optimal lambda of the optimal model # Trace back the model according to its CV fold index modelrmr_opt = models_rmr[int(best_models_idx[0, s])] # Compute Error Test Rate for the optimal model error_test[k1, s] = np.square(y_test - modelrmr_opt.predict(X_test) ).sum() / y_test.shape[0] outer_lambdas[k1] = inner_lambdas[int(best_models_idx[0, s])] if s == 1: # Save the optimal number of hidden units of the optimal model # Trace back the model according to its CV fold index net_opt = models_ann[int(best_models_idx[0, s])] # Compute Error Test Rate for the optimal model softmax_logits_test = net_opt(torch.from_numpy(X_test).float()) y_test_est = (torch.max(softmax_logits_test, dim=1)[1]).data.numpy() error_test[k1, s] = np.sum( (y_test_est != y_test)) / len(y_test) outer_hidden_units[k1] = inner_hidden_units[int( best_models_idx[0, s])] if s == 2: # Baseline computing test error # Trace back the model according to its CV fold index modelbaseline_opt = models_baseline[int(best_models_idx[0, s])] # Compute Error Test Rate for the optimal baseline baseline_prediction_opt = np.argmax( modelbaseline_opt) * np.ones(y_test.shape[0]) error_test[k1, s] = np.sum( baseline_prediction_opt != y_test) / len(y_test) # Append the list of the differences in the generalization errors of two models. r is a matrix of k1 rows and 3 columns # column 0: ann vs lrl - column 1: ann vs baseline - column 2: lrl vs baseline (same notation as the project description) r[k1, 0] = np.mean(error_test[:, 1]) - np.mean(error_test[:, 0]) r[k1, 1] = np.mean(error_test[:, 1]) - np.mean(error_test[:, 2]) r[k1, 2] = np.mean(error_test[:, 0]) - np.mean(error_test[:, 2]) k1 += 1 print("\n") estimatedGenError = np.round(np.mean(error_test, axis=0), 4) print("\n") for s in range(len(models)): print("Estimated Generalization Error for Model {0}: {1}".format( s + 1, estimatedGenError[s])) return error_test, outer_lambdas, outer_hidden_units, r, estimatedGenError