def worker(user_rating, train, test, Itrain, Itest, movies_data, epochs, status): if status == "GD": error, users, movies = MMF(user_rating, train, Itrain, epochs) ploting = plot_error(error, "Gradient_error.png") elif status == "R_GD": error, users, movies = MMF_r(user_rating, train, Itrain, beta, epochs) ploting = plot_error(error, "Randomize_gradient_error.png") if ploting == False: print("Something is wrong with errors.") print("Error you have : ", error) print("Training error : ", accuracy(users, movies, train, Itrain)) print("Testing error : ", accuracy(users, movies, test, Itest)) print("\n**********************************************************\n") print("It's time to recommend : \n") print("Enter User ID : ") user_id = 2 movie_index = recommend(users, movies, user_id, user_rating) for i in movie_index: temp = movies_data.iloc[i] print("\n", temp["movieId"], "\t\t\t", temp["title"])
def model_selection(self, grid, plot=False, fpath='../images/'): """ Holdout model selection Parameters ---------- grid : instance of HyperRandomGrid class hyperparameter grid plot : bool if plot=True plots the learning curve for each grid parameter fpath : str path for images storing Returns ------- neural network object """ self.fpath = fpath params = [] errors_va = [] for i, pars in enumerate(grid): net = nn.NeuralNetwork(self.X_train, self.y_train, **pars) net.train(self.X_train, self.y_train) print('trained') params.append(net.get_params()) # assess on validation set errors_va.append( net.predict(self.X_va, self.y_va) / (self.X_va.shape[0])) if plot is True: u.plot_error(net, fname=fpath + 'learning_curve_{}.png'.format(i)) # choosing the best hyperparameters self.best_index = np.argmin(errors_va) best_hyperparams = params[self.best_index] # retraining on design set net_retrained = nn.NeuralNetwork( hidden_sizes=best_hyperparams.pop('hidden_sizes')) net_retrained.train(self.X_design, self.y_design, **best_hyperparams) df_pars = pd.DataFrame(list(grid)) df_pars['error'] = errors_va self.best_hyperparams = best_hyperparams self.df_pars = df_pars self.model = net_retrained return self.model
def optimizer_function(user_rating, train, test, Itrain, Itest, movies_data): print("\nInitiating sliding window optimizer : \n") errors, users, movies = MMF_sliding_window(user_rating, train, Itrain, 10000, 5) ploting = plot_error(errors, "Sliding_window_error.png") if ploting == False: print("Something is wrong with errors.") print("Error you have : ", errors) print("Training error : ", accuracy(users, movies, train, Itrain)) print("Testing error : ", accuracy(users, movies, test, Itest)) print("\n**********************************************************\n") print("It's time to recommend : \n") print("Enter User ID : ") user_id = 2 movie_index = recommend(users, movies, user_id, user_rating) for i in movie_index: temp = movies_data.iloc[i] print("\n", temp["movieId"], "\t\t\t", temp["title"]) print("\nInitiating line search optimizer : \n") errors, users, movies = MMF_line_search(user_rating, train, Itrain, 10000) ploting = plot_error(errors, "Line_search_error.png") if ploting == False: print("Something is wrong with errors.") print("Error you have : ", errors) print("Training error : ", accuracy(users, movies, train, Itrain)) print("Testing error : ", accuracy(users, movies, test, Itest)) print("\n**********************************************************\n") print("It's time to recommend : \n") print("Enter User ID : ") user_id = 2 movie_index = recommend(users, movies, user_id, user_rating) for i in movie_index: temp = movies_data.iloc[i] print("\n", temp["movieId"], "\t\t\t", temp["title"])
def plots_info(opt, name): plt.close('all') k, x, e, fxk = calc_trajectory(opt, error, error_to_optim, more_data=True) #import ipdb; ipdb.set_trace() plt.figure(figsize=(13,13)) aux() plot_trayectory(x, title=f'Trayectoria {name}', k=k, with_lines=True, step_numbers=1000000) plt.xlim(-1.1,1.1) plt.ylim(-1.1,1.1) plt.figure() plot_error(e, title=f'Error {name}') print_info(k,x,e,error,f'{name}') plot_f_evolution(fxk, title=f'Evolucion de f {name}')
max_parents = res[0].astype(int) mean_ll, std_ll = res[1], res[2] #from_size, to_size, n_sample, n_restart, max_condset, alpha = parameters #fig_title = curve.capitalize() + " for " + method + " on " + distribution \ # + " data " + "generated from " + structure + " network\n" \ # + "Mode: " + mode + ", Restarts: " + n_restart \ # + ", Alpha: " + str(int(alpha)/100) \ # + ", MaxCondSet: " + max_condset fig_title = "Wine Train" fig, ax = plt.subplots() ax.set_xlabel('Maximum number of parents') ax.set_ylabel('10-fold train log-probability / instance') ax.set_xlim([0, 4]) ax.set_ylim(0, 3) ax.set_title(fig_title) alpha_t = 0.4 ax.plot(max_parents, mean_ll) ut.plot_error(max_parents, mean_ll, std_ll, alpha_t, ax=ax) plt.savefig(path.join(fig_directory, res_file_name + ".pdf"), transparent=True) print("Saving figure in ", path.join(fig_directory, res_file_name + ".pdf")) plt.show()
#ax.yaxis.tick_right() #nameOfPlot = 'GDP per hour (constant prices, indexed to 2007)' #plt.ylabel(nameOfPlot,rotation=0) #ax.legend(frameon=False, loc='upper left',ncol=2,handlelength=4) alpha_t = 0.4 if method == "cpc": ax.plot(sizes, res[3], linestyle="-.", linewidth=1.25, color="green", label='cpc') ut.plot_error(sizes, mean_fscore, std_fscore, alpha_t, ax=ax, color="green") elif method == "elidan": ax.plot(sizes, res[3], linestyle="--", linewidth=1.25, color="orange", label='elidan') ut.plot_error(sizes, mean_fscore, std_fscore, alpha_t, ax=ax, color="orange")
def dataset_test(classifier, validation, sample_estimate=False, shuffle=True, real_dataset=False): """ :param classifier: choose between bayes, kNN, MLP and Tree :param validation: choose between resub, holdout and cross :param sample_estimate: works only with bayes classifier :param shuffle: if False dataset is composed by all class1 elements followed by all class2 elements, otherwise all samples are mixed-up :param real_dataset: if True use the bank loan dataset, else dataset is generated from two 1-d gaussian distribution, does not work with bayes classifier :return: print results in a excel file and save the plots in the plot folder """ wb = load_workbook("error-estimates.xlsx") sheet1 = wb['Foglio1'] row = 29 column = 3 error1 = [] error2 = [] error = [] test_list = [100, 200, 500, 1000, 2000, 5000, 10000, 20000, 50000] # single element correspond to sample per class e1, e2, b1, b2, tmp1, tmp2, tmp = 0, 0, 0, 0, 0, 0, 0 mu1 = 0 sigma1 = math.sqrt(1) mu2 = 0 sigma2 = math.sqrt(0.25) clf = ClassifierSelector(classifier) for test in test_list: for i in range(10): if real_dataset: x, y = dataset_loader(test) if shuffle: shuffle_idx = np.arange(len(y)) np.random.shuffle(shuffle_idx) x = x[shuffle_idx, :] y = y[shuffle_idx] else: x1 = np.random.normal(mu1, sigma1, test) x2 = np.random.normal(mu2, sigma2, test) x = np.concatenate((x1, x2), axis=0) y1 = np.zeros(test) y2 = np.full(test, 1) y = np.concatenate((y1, y2), axis=0) if shuffle: shuffle_idx = np.arange(len(y)) np.random.shuffle(shuffle_idx) x = x[shuffle_idx] y = y[shuffle_idx] if validation == 'resub': if classifier == 'bayes': if sample_estimate: mu1 = np.mean(x1) mu2 = np.mean(x2) sigma1 = math.sqrt(np.var(x1)) sigma2 = math.sqrt(np.var(x2)) y_pred, e1, e2, b1, b2 = bayes_rule( x, mu1, sigma1, mu2, sigma2, 0.5, 0.5) else: clf.fit(x, y) y_pred = clf.predict(x) conf_matrix = metrics.confusion_matrix(y, y_pred) tmp1 = conf_matrix[0, 1] / list(y).count(0) tmp2 = conf_matrix[1, 0] / list(y).count(1) tmp = (conf_matrix[0, 1] + conf_matrix[1, 0]) / len(y) if validation == 'holdout': x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.4, random_state=0, stratify=y) if classifier == 'bayes': if sample_estimate: x1 = x_train[y_train == 0] x2 = x_train[y_train == 1] mu1 = np.mean(x1) mu2 = np.mean(x2) sigma1 = math.sqrt(np.var(x1)) sigma2 = math.sqrt(np.var(x2)) y_pred, e1, e2, b1, b2 = bayes_rule( x_test, mu1, sigma1, mu2, sigma2, 0.5, 0.5) else: clf.fit(x_train, y_train) y_pred = clf.predict(x_test) conf_matrix = metrics.confusion_matrix(y_test, y_pred) tmp1 = conf_matrix[0, 1] / list(y_test).count(0) tmp2 = conf_matrix[1, 0] / list(y_test).count(1) tmp = (conf_matrix[0, 1] + conf_matrix[1, 0]) / len(y_test) if validation == 'cross': cross1 = [] cross2 = [] cross = [] if len(x.shape) == 1: x = np.reshape(x, [len(x), 1]) skf = StratifiedKFold(n_splits=10) skf.get_n_splits(x, y) for train_index, test_index in skf.split(x, y): x_train, x_test = x[train_index], x[test_index] y_train, y_test = y[train_index], y[test_index] if classifier == 'bayes': if sample_estimate: x1 = x_train[y_train == 0] x2 = x_train[y_train == 1] mu1 = np.mean(x1) mu2 = np.mean(x2) sigma1 = math.sqrt(np.var(x1)) sigma2 = math.sqrt(np.var(x2)) y_pred, e1, e2, b1, b2 = bayes_rule( x_test, mu1, sigma1, mu2, sigma2, 0.5, 0.5) else: clf.fit(x_train, y_train) y_pred = clf.predict(x_test) conf_matrix = metrics.confusion_matrix(y_test, y_pred) c1 = conf_matrix[0, 1] / list(y_test).count(0) c2 = conf_matrix[1, 0] / list(y_test).count(1) c = (conf_matrix[0, 1] + conf_matrix[1, 0]) / len(y_test) cross1.append(c1) cross2.append(c2) cross.append(c) tmp1 = np.average(cross1) tmp2 = np.average(cross2) tmp = np.average(cross) error1.append(tmp1) error2.append(tmp2) error.append(tmp) sheet1.cell(row=row, column=column).value = np.average(error1) sheet1.cell(row=row + 1, column=column).value = np.var(error1) sheet1.cell(row=row + 2, column=column).value = np.average(error2) sheet1.cell(row=row + 3, column=column).value = np.var(error2) sheet1.cell(row=row + 4, column=column).value = np.average(error) sheet1.cell(row=row + 5, column=column).value = np.var(error) if test == 100: row1 = 14 col1 = 2 for er in error1: sheet1.cell(row=row1, column=col1).value = er row1 += 1 row1 = 14 col1 = 3 for er in error2: sheet1.cell(row=row1, column=col1).value = er row1 += 1 row1 = 14 col1 = 4 for er in error: sheet1.cell(row=row1, column=col1).value = er row1 += 1 wb.save("error-estimates.xlsx") column += 1 if classifier == 'bayes' and validation == 'resub' and not sample_estimate: info = [classifier, 'generic', test] else: info = [classifier, validation, test] # plot_hist(x1, x2, info) plot_error(error, e1 + e2, info) # plot_distr(mu1, sigma1, mu2, sigma2, b1, b2, info) error = [] print("Bayes error1", e1) print("Bayes error2", e2) print("Bayes error", e1 + e2) print("Bayes border: {}, {}".format(b1, b2))
f = MinSquareRoot(A, b) error_to_optim = ErrorToOptim(f.min()) error = Error() x0 = np.full((2, 1), 0) #%% plt.close('all') step = 1 / (2 * np.linalg.norm(A, 2)**2) opt = Optimizer(f, 'constante', step=step, x0=x0) k, x, e = calc_trajectory(opt, error, error_to_optim) plt.figure() plot_trayectory(x, title='Trayectoria Paso constante', k=k) plt.figure() plot_error(e, title='Paso constante') print_info(k, x, e, error, 'Paso constante') #%% opt = Optimizer(f, 'decreciente', constant=0.001, x0=x0) k, x, e = calc_trajectory(opt, error, error_to_optim) plt.figure() plot_trayectory(x, title='Trayectoria Paso decreciente', k=k) plt.figure() plot_error(e, title='Paso decreciente') print_info(k, x, e, error, 'Paso constante') #%% opt = Optimizer(f, 'line_search', n_points=100, long=0.001, x0=x0) k, x, e = calc_trajectory(opt, error, error_to_optim) plt.figure()
print("Training Completed using L1 Regularization") print("Plot Accuracy") model_l1.plot_accuracy("L1 Regularization") print("Plot Error") model_l1.plot_error("L1 Regularization") #Read test csv file data.read(config.TEST_PATH) print("Test Data Read Successfully") model_l1.test(data) print("Predicted test values using L1 Regularization!!!!") #""" #L2 Regularization data.read(config.TRAIN_PATH) print("train data read successfully") model_l2 = model_L2.Model(data.size[1]) acc_list_L2, error_list_L2 = model_l2.train(data) print("Training Completed using L2 Regularization") print("Plot Accuracy") model_l2.plot_accuracy("L2 Regularization") print("Plot Error") model_l2.plot_error("L2 Regularization") #Read test csv file data.read(config.TEST_PATH) print("Test Data Read Successfully") model_l2.test(data) print("Predicted test values using L2 Regularization!!!!") #""" utils.plot_accuracy(acc_list, acc_list_L1, acc_list_L2) utils.plot_error(error_list, error_list_L1, error_list_L2)
from_size, to_size, n_sample, n_restart, max_condset, alpha = parameters fig_title = curve.capitalize() + " for " + method + " on " + distribution \ + " data " + "generated from " + structure + " network\n" \ + ", Restarts: " + n_restart \ + ", Alpha: " + str(int(alpha)/100) \ + ", MaxCondSet: " + max_condset elif method == "elidan": from_size, to_size, n_sample, n_restart, max_parents, hc_restart = parameters fig_title = curve.capitalize() + " for " + method + " on " + distribution \ + " data " + "generated from " + structure + " network\n" \ + ", Restarts: " + n_restart \ + ", HCRestarts: " + hc_restart \ + ", MaxParents: " + max_parents fig, ax = plt.subplots() ax.set_xlabel('Size') ax.set_ylabel('Log-probability / instance') ax.set_title(fig_title) alpha_t = 0.4 ax.set_xlim([int(from_size), int(to_size)]) #ax.set_ylim(0.2,0.7) ax.plot(sizes, mean_ll) ut.plot_error(sizes, mean_ll, std_ll, alpha_t, ax=ax) plt.savefig(path.join(fig_directory, res_file_name + ".pdf"), transparent=True) print("Saving figure in ", path.join(fig_directory, res_file_name + ".pdf"))
sizes_elidan = res_elidan[0].astype(int) mean_shd_cpc, std_shd_cpc = res_cpc[1], res_cpc[2] mean_shd_elidan, std_shd_elidan = res_elidan[1], res_elidan[2] fig, ax = plt.subplots() ax.set_xlabel('') ax.set_ylabel('') alpha_t = 0.4 if method == "cpc": ax.plot(sizes, res[1], linestyle="-.", linewidth=1.25, color="green", label='cpc') ut.plot_error(sizes, mean_shd, std_shd, alpha_t, ax=ax, color="green") elif method == "elidan": ax.plot(sizes, res[1], linestyle="--", linewidth=1.25, color="orange", label='elidan') ut.plot_error(sizes, mean_shd, std_shd, alpha_t, ax=ax, color="orange") elif method == "both": ax.plot(sizes_cpc, res_cpc[1], linestyle="-.", linewidth=1.25, color="green", label='cpc') ut.plot_error(sizes_cpc, mean_shd_cpc, std_shd_cpc, alpha_t, ax=ax, color="green") ax.plot(sizes_elidan, res_elidan[1], linestyle="--", linewidth=1.25, color="orange", label='elidan') ut.plot_error(sizes_elidan, mean_shd_elidan, std_shd_elidan, alpha_t, ax=ax, color="orange") ax.set_ylim([0, ax.set_ylim()[1]]) ax.set_xlim([int(from_size), int(to_size)]) if (method == "cpc") or (method == "elidan"): ax.legend() plt.savefig(path.join(fig_directory, res_file_name + ".pdf"), transparent=True)