def worker(user_rating, train, test, Itrain, Itest, movies_data, epochs,
           status):

    if status == "GD":
        error, users, movies = MMF(user_rating, train, Itrain, epochs)
        ploting = plot_error(error, "Gradient_error.png")

    elif status == "R_GD":
        error, users, movies = MMF_r(user_rating, train, Itrain, beta, epochs)
        ploting = plot_error(error, "Randomize_gradient_error.png")

    if ploting == False:
        print("Something is wrong with errors.")
        print("Error you have : ", error)

    print("Training error : ", accuracy(users, movies, train, Itrain))
    print("Testing error : ", accuracy(users, movies, test, Itest))

    print("\n**********************************************************\n")
    print("It's time to recommend : \n")
    print("Enter User ID : ")

    user_id = 2

    movie_index = recommend(users, movies, user_id, user_rating)

    for i in movie_index:
        temp = movies_data.iloc[i]
        print("\n", temp["movieId"], "\t\t\t", temp["title"])
Example #2
0
    def model_selection(self, grid, plot=False, fpath='../images/'):
        """
        Holdout model selection

        Parameters
        ----------
        grid : instance of HyperRandomGrid class
            hyperparameter grid
        plot : bool
            if plot=True plots the learning curve for each grid parameter

        fpath : str
            path for images storing
        Returns
        -------
        neural network object
        """

        self.fpath = fpath
        params = []
        errors_va = []
        for i, pars in enumerate(grid):

            net = nn.NeuralNetwork(self.X_train, self.y_train, **pars)
            net.train(self.X_train, self.y_train)
            print('trained')
            params.append(net.get_params())
            # assess on validation set
            errors_va.append(
                net.predict(self.X_va, self.y_va) / (self.X_va.shape[0]))
            if plot is True:
                u.plot_error(net,
                             fname=fpath + 'learning_curve_{}.png'.format(i))

        # choosing the best hyperparameters
        self.best_index = np.argmin(errors_va)
        best_hyperparams = params[self.best_index]

        # retraining on design set
        net_retrained = nn.NeuralNetwork(
            hidden_sizes=best_hyperparams.pop('hidden_sizes'))
        net_retrained.train(self.X_design, self.y_design, **best_hyperparams)

        df_pars = pd.DataFrame(list(grid))
        df_pars['error'] = errors_va

        self.best_hyperparams = best_hyperparams
        self.df_pars = df_pars
        self.model = net_retrained

        return self.model
def optimizer_function(user_rating, train, test, Itrain, Itest, movies_data):

    print("\nInitiating sliding window optimizer : \n")
    errors, users, movies = MMF_sliding_window(user_rating, train, Itrain,
                                               10000, 5)

    ploting = plot_error(errors, "Sliding_window_error.png")

    if ploting == False:
        print("Something is wrong with errors.")
        print("Error you have : ", errors)

    print("Training error : ", accuracy(users, movies, train, Itrain))
    print("Testing error : ", accuracy(users, movies, test, Itest))

    print("\n**********************************************************\n")
    print("It's time to recommend : \n")
    print("Enter User ID : ")

    user_id = 2

    movie_index = recommend(users, movies, user_id, user_rating)

    for i in movie_index:
        temp = movies_data.iloc[i]
        print("\n", temp["movieId"], "\t\t\t", temp["title"])

    print("\nInitiating line search optimizer : \n")
    errors, users, movies = MMF_line_search(user_rating, train, Itrain, 10000)

    ploting = plot_error(errors, "Line_search_error.png")

    if ploting == False:
        print("Something is wrong with errors.")
        print("Error you have : ", errors)

    print("Training error : ", accuracy(users, movies, train, Itrain))
    print("Testing error : ", accuracy(users, movies, test, Itest))

    print("\n**********************************************************\n")
    print("It's time to recommend : \n")
    print("Enter User ID : ")

    user_id = 2

    movie_index = recommend(users, movies, user_id, user_rating)

    for i in movie_index:
        temp = movies_data.iloc[i]
        print("\n", temp["movieId"], "\t\t\t", temp["title"])
Example #4
0
def plots_info(opt, name):
    plt.close('all')
    
    k, x, e, fxk = calc_trajectory(opt, error, error_to_optim, more_data=True)
    #import ipdb; ipdb.set_trace()
    plt.figure(figsize=(13,13))
    aux()
    plot_trayectory(x, title=f'Trayectoria {name}', k=k, with_lines=True,
                                   step_numbers=1000000)
    plt.xlim(-1.1,1.1)
    plt.ylim(-1.1,1.1)
    plt.figure()
    plot_error(e, title=f'Error {name}')
    print_info(k,x,e,error,f'{name}')
    plot_f_evolution(fxk, title=f'Evolucion de f {name}')
Example #5
0
max_parents = res[0].astype(int)
mean_ll, std_ll = res[1], res[2]

#from_size, to_size, n_sample, n_restart, max_condset, alpha = parameters
#fig_title = curve.capitalize() + " for " + method + " on " + distribution \
#                  + " data " + "generated from " + structure + " network\n" \
#                  + "Mode: " + mode + ", Restarts: " + n_restart \
#                  + ", Alpha: " + str(int(alpha)/100) \
#                  + ", MaxCondSet: " + max_condset
fig_title = "Wine Train"

fig, ax = plt.subplots()

ax.set_xlabel('Maximum number of parents')
ax.set_ylabel('10-fold train log-probability / instance')

ax.set_xlim([0, 4])

ax.set_ylim(0, 3)

ax.set_title(fig_title)

alpha_t = 0.4

ax.plot(max_parents, mean_ll)
ut.plot_error(max_parents, mean_ll, std_ll, alpha_t, ax=ax)

plt.savefig(path.join(fig_directory, res_file_name + ".pdf"), transparent=True)
print("Saving figure in ", path.join(fig_directory, res_file_name + ".pdf"))
plt.show()
Example #6
0
#ax.yaxis.tick_right()
#nameOfPlot = 'GDP per hour (constant prices, indexed to 2007)'
#plt.ylabel(nameOfPlot,rotation=0)
#ax.legend(frameon=False, loc='upper left',ncol=2,handlelength=4)

alpha_t = 0.4
if method == "cpc":
    ax.plot(sizes,
            res[3],
            linestyle="-.",
            linewidth=1.25,
            color="green",
            label='cpc')
    ut.plot_error(sizes,
                  mean_fscore,
                  std_fscore,
                  alpha_t,
                  ax=ax,
                  color="green")
elif method == "elidan":
    ax.plot(sizes,
            res[3],
            linestyle="--",
            linewidth=1.25,
            color="orange",
            label='elidan')
    ut.plot_error(sizes,
                  mean_fscore,
                  std_fscore,
                  alpha_t,
                  ax=ax,
                  color="orange")
Example #7
0
def dataset_test(classifier,
                 validation,
                 sample_estimate=False,
                 shuffle=True,
                 real_dataset=False):
    """
    :param classifier: choose between bayes, kNN, MLP and Tree
    :param validation: choose between resub, holdout and cross
    :param sample_estimate: works only with bayes classifier
    :param shuffle: if False dataset is composed by all class1 elements followed by all class2 elements, otherwise all
           samples are mixed-up
    :param real_dataset: if True use the bank loan dataset, else dataset is generated from two 1-d gaussian
           distribution, does not work with bayes classifier
    :return: print results in a excel file and save the plots in the plot folder
    """
    wb = load_workbook("error-estimates.xlsx")
    sheet1 = wb['Foglio1']
    row = 29
    column = 3
    error1 = []
    error2 = []
    error = []
    test_list = [100, 200, 500, 1000, 2000, 5000, 10000, 20000,
                 50000]  # single element correspond to sample per class
    e1, e2, b1, b2, tmp1, tmp2, tmp = 0, 0, 0, 0, 0, 0, 0
    mu1 = 0
    sigma1 = math.sqrt(1)
    mu2 = 0
    sigma2 = math.sqrt(0.25)
    clf = ClassifierSelector(classifier)

    for test in test_list:
        for i in range(10):
            if real_dataset:
                x, y = dataset_loader(test)
                if shuffle:
                    shuffle_idx = np.arange(len(y))
                    np.random.shuffle(shuffle_idx)
                    x = x[shuffle_idx, :]
                    y = y[shuffle_idx]
            else:
                x1 = np.random.normal(mu1, sigma1, test)
                x2 = np.random.normal(mu2, sigma2, test)
                x = np.concatenate((x1, x2), axis=0)
                y1 = np.zeros(test)
                y2 = np.full(test, 1)
                y = np.concatenate((y1, y2), axis=0)
                if shuffle:
                    shuffle_idx = np.arange(len(y))
                    np.random.shuffle(shuffle_idx)
                    x = x[shuffle_idx]
                    y = y[shuffle_idx]

            if validation == 'resub':
                if classifier == 'bayes':
                    if sample_estimate:
                        mu1 = np.mean(x1)
                        mu2 = np.mean(x2)
                        sigma1 = math.sqrt(np.var(x1))
                        sigma2 = math.sqrt(np.var(x2))
                    y_pred, e1, e2, b1, b2 = bayes_rule(
                        x, mu1, sigma1, mu2, sigma2, 0.5, 0.5)
                else:
                    clf.fit(x, y)
                    y_pred = clf.predict(x)
                conf_matrix = metrics.confusion_matrix(y, y_pred)
                tmp1 = conf_matrix[0, 1] / list(y).count(0)
                tmp2 = conf_matrix[1, 0] / list(y).count(1)
                tmp = (conf_matrix[0, 1] + conf_matrix[1, 0]) / len(y)

            if validation == 'holdout':
                x_train, x_test, y_train, y_test = train_test_split(
                    x, y, test_size=0.4, random_state=0, stratify=y)

                if classifier == 'bayes':
                    if sample_estimate:
                        x1 = x_train[y_train == 0]
                        x2 = x_train[y_train == 1]
                        mu1 = np.mean(x1)
                        mu2 = np.mean(x2)
                        sigma1 = math.sqrt(np.var(x1))
                        sigma2 = math.sqrt(np.var(x2))
                    y_pred, e1, e2, b1, b2 = bayes_rule(
                        x_test, mu1, sigma1, mu2, sigma2, 0.5, 0.5)
                else:
                    clf.fit(x_train, y_train)
                    y_pred = clf.predict(x_test)

                conf_matrix = metrics.confusion_matrix(y_test, y_pred)
                tmp1 = conf_matrix[0, 1] / list(y_test).count(0)
                tmp2 = conf_matrix[1, 0] / list(y_test).count(1)
                tmp = (conf_matrix[0, 1] + conf_matrix[1, 0]) / len(y_test)

            if validation == 'cross':
                cross1 = []
                cross2 = []
                cross = []
                if len(x.shape) == 1:
                    x = np.reshape(x, [len(x), 1])
                skf = StratifiedKFold(n_splits=10)
                skf.get_n_splits(x, y)

                for train_index, test_index in skf.split(x, y):
                    x_train, x_test = x[train_index], x[test_index]
                    y_train, y_test = y[train_index], y[test_index]

                    if classifier == 'bayes':
                        if sample_estimate:
                            x1 = x_train[y_train == 0]
                            x2 = x_train[y_train == 1]
                            mu1 = np.mean(x1)
                            mu2 = np.mean(x2)
                            sigma1 = math.sqrt(np.var(x1))
                            sigma2 = math.sqrt(np.var(x2))
                        y_pred, e1, e2, b1, b2 = bayes_rule(
                            x_test, mu1, sigma1, mu2, sigma2, 0.5, 0.5)
                    else:
                        clf.fit(x_train, y_train)
                        y_pred = clf.predict(x_test)

                    conf_matrix = metrics.confusion_matrix(y_test, y_pred)
                    c1 = conf_matrix[0, 1] / list(y_test).count(0)
                    c2 = conf_matrix[1, 0] / list(y_test).count(1)
                    c = (conf_matrix[0, 1] + conf_matrix[1, 0]) / len(y_test)
                    cross1.append(c1)
                    cross2.append(c2)
                    cross.append(c)

                tmp1 = np.average(cross1)
                tmp2 = np.average(cross2)
                tmp = np.average(cross)
            error1.append(tmp1)
            error2.append(tmp2)
            error.append(tmp)

        sheet1.cell(row=row, column=column).value = np.average(error1)
        sheet1.cell(row=row + 1, column=column).value = np.var(error1)
        sheet1.cell(row=row + 2, column=column).value = np.average(error2)
        sheet1.cell(row=row + 3, column=column).value = np.var(error2)
        sheet1.cell(row=row + 4, column=column).value = np.average(error)
        sheet1.cell(row=row + 5, column=column).value = np.var(error)

        if test == 100:
            row1 = 14
            col1 = 2
            for er in error1:
                sheet1.cell(row=row1, column=col1).value = er
                row1 += 1
            row1 = 14
            col1 = 3
            for er in error2:
                sheet1.cell(row=row1, column=col1).value = er
                row1 += 1
            row1 = 14
            col1 = 4
            for er in error:
                sheet1.cell(row=row1, column=col1).value = er
                row1 += 1

        wb.save("error-estimates.xlsx")
        column += 1
        if classifier == 'bayes' and validation == 'resub' and not sample_estimate:
            info = [classifier, 'generic', test]
        else:
            info = [classifier, validation, test]
        # plot_hist(x1, x2, info)
        plot_error(error, e1 + e2, info)
        # plot_distr(mu1, sigma1, mu2, sigma2, b1, b2, info)
        error = []
    print("Bayes error1", e1)
    print("Bayes error2", e2)
    print("Bayes error", e1 + e2)
    print("Bayes border: {}, {}".format(b1, b2))
Example #8
0
f = MinSquareRoot(A, b)
error_to_optim = ErrorToOptim(f.min())
error = Error()

x0 = np.full((2, 1), 0)

#%%
plt.close('all')
step = 1 / (2 * np.linalg.norm(A, 2)**2)
opt = Optimizer(f, 'constante', step=step, x0=x0)
k, x, e = calc_trajectory(opt, error, error_to_optim)
plt.figure()
plot_trayectory(x, title='Trayectoria Paso constante', k=k)
plt.figure()
plot_error(e, title='Paso constante')
print_info(k, x, e, error, 'Paso constante')

#%%
opt = Optimizer(f, 'decreciente', constant=0.001, x0=x0)
k, x, e = calc_trajectory(opt, error, error_to_optim)
plt.figure()
plot_trayectory(x, title='Trayectoria Paso decreciente', k=k)
plt.figure()
plot_error(e, title='Paso decreciente')
print_info(k, x, e, error, 'Paso constante')

#%%
opt = Optimizer(f, 'line_search', n_points=100, long=0.001, x0=x0)
k, x, e = calc_trajectory(opt, error, error_to_optim)
plt.figure()
Example #9
0
    print("Training Completed using L1 Regularization")
    print("Plot Accuracy")
    model_l1.plot_accuracy("L1 Regularization")
    print("Plot Error")
    model_l1.plot_error("L1 Regularization")
    #Read test csv file
    data.read(config.TEST_PATH)
    print("Test Data Read Successfully")
    model_l1.test(data)
    print("Predicted test values using L1 Regularization!!!!")

    #"""
    #L2 Regularization
    data.read(config.TRAIN_PATH)
    print("train data read successfully")
    model_l2 = model_L2.Model(data.size[1])
    acc_list_L2, error_list_L2 = model_l2.train(data)
    print("Training Completed using L2 Regularization")
    print("Plot Accuracy")
    model_l2.plot_accuracy("L2 Regularization")
    print("Plot Error")
    model_l2.plot_error("L2 Regularization")
    #Read test csv file
    data.read(config.TEST_PATH)
    print("Test Data Read Successfully")
    model_l2.test(data)
    print("Predicted test values using L2 Regularization!!!!")
    #"""
    utils.plot_accuracy(acc_list, acc_list_L1, acc_list_L2)
    utils.plot_error(error_list, error_list_L1, error_list_L2)
Example #10
0
    from_size, to_size, n_sample, n_restart, max_condset, alpha = parameters
    fig_title = curve.capitalize() + " for " + method + " on " + distribution \
                  + " data " + "generated from " + structure + " network\n" \
                  + ", Restarts: " + n_restart \
                  + ", Alpha: " + str(int(alpha)/100) \
                  + ", MaxCondSet: " + max_condset
elif method == "elidan":
    from_size, to_size, n_sample, n_restart, max_parents, hc_restart = parameters
    fig_title = curve.capitalize() + " for " + method + " on " + distribution \
                  + " data " + "generated from " + structure + " network\n" \
                  + ", Restarts: " + n_restart \
                  + ", HCRestarts: " + hc_restart \
                  + ", MaxParents: " + max_parents

fig, ax = plt.subplots()

ax.set_xlabel('Size')
ax.set_ylabel('Log-probability / instance')

ax.set_title(fig_title)

alpha_t = 0.4

ax.set_xlim([int(from_size), int(to_size)])
#ax.set_ylim(0.2,0.7)

ax.plot(sizes, mean_ll)
ut.plot_error(sizes, mean_ll, std_ll, alpha_t, ax=ax)

plt.savefig(path.join(fig_directory, res_file_name + ".pdf"), transparent=True)
print("Saving figure in ", path.join(fig_directory, res_file_name + ".pdf"))
Example #11
0
    sizes_elidan = res_elidan[0].astype(int)
    
    mean_shd_cpc, std_shd_cpc = res_cpc[1], res_cpc[2]
    mean_shd_elidan, std_shd_elidan = res_elidan[1], res_elidan[2]
    

fig, ax = plt.subplots()

ax.set_xlabel('')
ax.set_ylabel('')


alpha_t = 0.4
if method == "cpc":
    ax.plot(sizes, res[1], linestyle="-.", linewidth=1.25, color="green", label='cpc')
    ut.plot_error(sizes, mean_shd, std_shd, alpha_t, ax=ax, color="green")
elif method == "elidan":
    ax.plot(sizes, res[1], linestyle="--", linewidth=1.25, color="orange", label='elidan')
    ut.plot_error(sizes, mean_shd, std_shd, alpha_t, ax=ax, color="orange")
elif method == "both":
    ax.plot(sizes_cpc, res_cpc[1], linestyle="-.", linewidth=1.25, color="green", label='cpc')
    ut.plot_error(sizes_cpc, mean_shd_cpc, std_shd_cpc, alpha_t, ax=ax, color="green")
    ax.plot(sizes_elidan, res_elidan[1], linestyle="--", linewidth=1.25, color="orange", label='elidan')
    ut.plot_error(sizes_elidan, mean_shd_elidan, std_shd_elidan, alpha_t, ax=ax, color="orange")

ax.set_ylim([0, ax.set_ylim()[1]])
ax.set_xlim([int(from_size), int(to_size)])

if (method == "cpc") or (method == "elidan"):
    ax.legend()
    plt.savefig(path.join(fig_directory, res_file_name + ".pdf"), transparent=True)