Ejemplo n.º 1
0
def second_order_expansion(loss_old, proj_S, proj_B, S, B, gradient_old, L,
                           lambda_1, lambda_2):
    first_term = loss_old
    S = S.reshape(proj_S.shape[0], proj_S.shape[1])
    B = B.reshape(proj_B.shape[0], proj_B.shape[1])
    x_k = (proj_S + proj_B - (S + B))
    second_term = np.inner((x_k).ravel(), gradient_old.ravel())
    third_term = L / 2 * ((np.linalg.norm((x_k), ord=2))**2)
    fourth_term = lambda_1 * norm_11(proj_S) + lambda_2 * norm_1_infinity(
        proj_B)
    output = first_term + second_term + third_term + fourth_term
    return (output)
Ejemplo n.º 2
0
def logistic_loss_function(X1, Y1, S1, B1, lambda_1, lambda_2, n_var, n_task):
    N = len(X1)
    Y2 = np.matmul(X1, (B1 + S1))
    Y2 = sigmoid(Y2)
    class_cost_1 = np.multiply(-Y1, np.log(Y2))
    class_cost_2 = np.multiply((1 - Y1), np.log(1 - Y2))
    total = class_cost_1 - class_cost_2
    total_loss = total.sum() / N
    final_loss = total_loss + lambda_1 * norm_11(S1.reshape(
        (n_var, n_task))) + lambda_2 * norm_1_infinity(
            B1.reshape((n_var, n_task)))
    a1 = np.subtract(Y2, Y1)
    gradient = np.matmul(X1.T, a1) / N
    return (final_loss, total_loss, gradient)
Ejemplo n.º 3
0
def first_condition(proj_S, proj_B, X1, Y1, lambda_1, lambda_2, loss_option,
                    n_var, n_task):
    s1 = proj_S.ravel().reshape(-1, 1)
    b1 = proj_B.ravel().reshape(-1, 1)
    if loss_option == "logistic":
        loss, log_loss, gradient_updated = logistic_loss_function(
            X1, Y1, s1, b1, lambda_1, lambda_2, n_var, n_task)
        return (loss, log_loss, gradient_updated)
    else:
        activation = np.matmul(X1, (s1 + b1))
        log_loss_2 = (np.linalg.norm(Y1 - activation))**2
        loss = log_loss_2 + lambda_1 * norm_11(
            proj_S) + lambda_2 * norm_1_infinity(proj_B)
        a1 = np.subtract(activation, Y1)
        gradient_updated = 2 * np.matmul(X1.T, a1)
        gradient_updated = gradient_updated.reshape(proj_S.shape[0],
                                                    proj_S.shape[1])
        return (loss, log_loss_2, gradient_updated)
Ejemplo n.º 4
0
def FISTA_algorithm_multitasking(X1,
                                 Y1,
                                 n_var,
                                 n_task,
                                 max_iteration,
                                 loss_option=None):
    n_obs = X1.shape[0]
    #    n_task = 139
    #    n_var = 28
    plt.figure(figsize=(1, 1))
    lambda_1 = 1
    lambda_2 = 1
    eta = 3
    S1 = np.random.normal(0, 1, (n_var, n_task))
    B1 = np.random.normal(0, 1, (n_var, n_task))
    S1 = S1.flatten()
    B1 = B1.flatten()
    S_old = S1
    B_old = B1
    final_loss_list = []
    Y2 = np.zeros((Y1.shape[0], Y1.shape[1]))
    #    max_iteration = 2000
    loss_old = 0
    rand_iteration = 50
    L_new = 10
    precision = 1e-3
    t_1 = 1
    total_loss_list = []
    norm1_list = []
    norminf_list = []
    for i in range(max_iteration):
        if loss_option == "logistic":
            final_loss, total_loss, gradient = logistic_loss_function(
                X1, Y1, S1, B1, lambda_1, lambda_2, n_var, n_task)
        else:
            Y2 = np.matmul(X1, (B1 + S1))
            Y2 = Y2.reshape(-1, 1)
            total_loss = np.linalg.norm(Y2 - Y1)**2
            total_loss_list.append(total_loss)
            final_loss = total_loss + lambda_1 * norm_11(
                S1.reshape((n_var, n_task))) + lambda_2 * norm_1_infinity(
                    B1.reshape((n_var, n_task)))
            print(final_loss)
            final_loss_list.append(final_loss)
            a1 = np.subtract(Y2, Y1)
            gradient = 2 * np.matmul(X1.T, a1)
        if abs(loss_old - final_loss) <= precision:
            return (S1, B1)
        norm1_list.append(norm_11(S1.reshape((n_var, n_task))))
        norminf_list.append(norm_1_infinity(B1.reshape((n_var, n_task))))
        """
        Now trying to find best possible Lipchitz constant for the given function
        """
        for j in range(rand_iteration):
            L_new = (eta**j) * (L_new)
            proj_S = project_L1_ball((S1.reshape(
                (n_var, n_task)) - gradient.reshape((n_var, n_task)) / L_new),
                                     lambda_1 / L_new)
            proj_B = project_L1_infinity_ball((B1.reshape(
                (n_var, n_task)) - gradient.reshape((n_var, n_task)) / L_new),
                                              lambda_2 / L_new)
            """ checking the condition"""
            loss_old, log_loss_2, gradient_old = first_condition(
                proj_S, proj_B, X1, Y1, lambda_1, lambda_2, loss_option, n_var,
                n_task)
            b = second_order_expansion(total_loss, proj_S, proj_B, S1, B1,
                                       gradient, L_new, lambda_1, lambda_2)
            if loss_old <= b:
                break
            else:
                print("Condition not true")
        """Now since we have found the Lipchitz constant we update the parameters"""
        proj_S = project_L1_ball((S1.reshape(
            (n_var, n_task)) - gradient.reshape((n_var, n_task)) / L_new),
                                 lambda_1 / L_new)
        proj_B = project_L1_infinity_ball((B1.reshape(
            (n_var, n_task)) - gradient.reshape((n_var, n_task)) / L_new),
                                          lambda_2 / L_new)
        S1 = proj_S.ravel()
        B1 = proj_B.ravel()
        t_new = (1 + np.sqrt(1 + 4 * (t_1**2)) / 2)
        eta_1 = (t_1 - 1) / t_new
        S1 = S1 + eta_1 * (S1 - S_old)
        B1 = B1 + eta_1 * (B1 - B_old)
        #        diff = np.linalg.norm(S1)
        S_old = S1
        B_old = B1
        loss_old = final_loss
        t_1 = t_new
#    plt.matshow(S1.reshape((n_var, n_task)))
#    plt.matshow(B1.reshape((n_var, n_task)))
    return (S1, B1)
Ejemplo n.º 5
0
def ADMM_with_least_square(X_train, y_train):
    loss_old = 0
    lambda_1 = 0.009
    lambda_2 = 0.009
    U_k = np.random.normal(0, 1, (X_train.shape[1], y_train.shape[1]))
    rho = 0.008
    S = np.random.normal(0, 1, (X_train.shape[1], y_train.shape[1]))
    B = np.random.normal(0, 1, (X_train.shape[1], y_train.shape[1]))
    total_loss_list = []
    activation = np.zeros((y_train.shape[0], y_train.shape[1]))
    #    activation_1 = np.zeros((y_train.shape[0], y_train.shape[1]))
    max_iteration = 150
    precision = 1e-8
    theta_1 = S + B
    Z_k = S + B
    N = len(X_train)
    bias = np.random.normal(0, 1, (X_train.shape[0], 1))
    bias = np.ones((X_train.shape[0], 1))
    for j in range(max_iteration):
        loss_1 = 0
        for p in range(len(X_train)):
            hypothesis = np.dot(np.transpose(theta_1), X_train[p])
            activation[p] = hypothesis
        loss_1 = np.linalg.norm(np.subtract(y_train, activation))**2
        total_loss = ((1 / 2) * loss_1 + lambda_1 * (norm_11(S)) +
                      lambda_2 * norm_1_infinity(B))
        print(total_loss)
        total_loss_list.append(total_loss)
        if abs(total_loss - loss_old) <= precision:
            return (theta_1, S, B, total_loss_list, activation)
        theta_1 = solvin_argmin_least_square(activation, X_train, y_train,
                                             theta_1, Z_k, U_k, rho)
        proj_S = project_L1_ball((theta_1 - B + U_k), lambda_1 / rho)
        proj_B = projected_L1_infinity_ball((theta_1 - S + U_k),
                                            lambda_2 / rho)
        Z_k = proj_S + proj_B
        U_k = U_k + theta_1 - Z_k
        S = proj_S
        B = proj_B
        loss_old = total_loss
    print(j)
    return (theta_1, S, B, total_loss_list, activation)


#theta_1, S, B, total_loss_list, activation = ADMM_with_least_square(X_train, y_train)
#plt.plot(np.array(total_loss_list))
#plt.show()
#%%

#def fista_function(X, y):
#    loss_old = 0------------------------------------
#    lambda_1 = 1
#    lambda_2 = 9
#    L_0 = 15
#    eta = 10
#    random_number = 30
#    S = np.random.normal(0, 1,(X_train.shape[1], y_train.shape[1]))
#    B = np.random.normal(0,1, (X_train.shape[1], y_train.shape[1]))
#    print(S.shape)
#    print(B.shape)
#    total_loss_list = []
#    activation = np.zeros((y_train.shape[0], y_train.shape[1]))
#    activation_1 = np.zeros((y_train.shape[0], y_train.shape[1]))
#    max_iteration = 100
#    precision = 1e-1
#    p =0
#    for j in range(max_iteration):
#        for p in range(0,len(X_train)):
#            hypothesis = np.dot(np.transpose(S+B), X_train[p])
#            activation[p]= sigmoid_activation(hypothesis)
#        loss_1 = log_loss(y_train, activation)
#        print("This is first regularizer", norm_11(S))
#        print("This is second regularizer", norm_1_infinity(B))
#        total_loss = loss_1 + lambda_1*norm_11(S)+ lambda_2*norm_1_infinity(B)
#        total_loss_list.append(total_loss)
#        print(total_loss)
#        if abs(loss_old - total_loss) == precision:
#            return(S, B, total_loss_list, p, activation)
#        gradient = gradient_logistic_function(activation, X_train, y_train)
#        for i in range(random_number):
#            if i == 0:
#                L_new = (eta**i)*(L_0)
#                print(L_new)
#                t = 1
#            else:
#                L_new = (eta**i)*L_old
#                print(L_new)
#                t = 1/L_new
#            proj_S = project_L1_ball((S - t*gradient).T, lambda_1)
#            proj_B = projected_L1_infinity_ball((B - t*gradient).T, lambda_2)
#            proj_S = proj_S.T
#            proj_B = proj_B.T
#            loss_old,log_loss_2,gradient_old = first_condition(proj_S, proj_B, X_train,y_train, lambda_1, lambda_2)
#            b = second_order_expansion(log_loss_2, proj_S, proj_B, S, B, gradient_old, L_new, lambda_1, lambda_2)
#            print("This is loss_old", loss_old)
#            print("This is second_part", b)
#            if loss_old <= b:
#                S = proj_S - S
#                B = proj_B - B
#                break
#            else:
#                L_old = L_new
#                S = proj_S - S
#                B = proj_B - B
##                if L_new == L_old:
##                    break
#        print("This is the new L", L_new)
#        t_1 = 1/L_new
#        t_new = (1 + np.sqrt(1 + 4 *(t_1**2)))/2
#        eta_1= (t_1-1)/t_new
#        if j == 0:
#            S_old = 0
#            B_old = 0
#        else:
#            S = S + eta_1*(S - S_old)
#            B = B + eta_1*(B - B_old)
#        S_old = S
#        B_old = B
#        loss_old = total_loss
#    return(S, B, total_loss_list, p, activation)
#S, B, total_loss_list, p, activation_1 = fista_function(X_train, y_train)
#print(p)
#plt.plot(np.array(total_loss_list))
#plt.show()

#%%