Esempio n. 1
0
def objective(x_train, y_train, W1, b1, z1, a1, W2, b2, z2, a2, W3, b3, z3, u, v1, v2, rho):
    r1 = np.sum((z1 - mul(W1, x_train) - b1) * (z1 - mul(W1, x_train) - b1))
    r2 = np.sum((z2 - mul(W2, a1) - b2) * (z2 - mul(W2, a1) - b2))
    r3 = np.sum((z3 - mul(W3, a2) - b3) * (z3 - mul(W3, a2) - b3))
    loss = common.cross_entropy_with_softmax(y_train, z3)
    obj = loss + np.trace(mul(z3 - mul(W3, a2) - b3, np.transpose(u)))
    obj = obj + rho / 2 * r1 + rho / 2 * r2 + rho / 2 * r3
    obj = obj + rho / 2 * np.sum((a1 - common.relu(z1) + v1) * (a1 - common.relu(z1) + v1)) + rho / 2 * np.sum(
        (a2 - common.relu(z2) + v2) * (a2 - common.relu(z2) + v2))
    return obj
Esempio n. 2
0
def update_zl(W, a_last, zl_old, label, u, rho):
    fzl = 10e10
    MAX_ITER = 500
    zl = zl_old
    t = 1
    zeta = zl
    eta = 4
    TOLERANCE = 1e-3
    for i in range(MAX_ITER):
        fzl_old = fzl
        fzl = cross_entropy_with_softmax(label, zl)+rho/2*np.sum((zl-mul(W,a_last)+u/rho)*(zl-mul(W,a_last)+u/rho))
        if abs(fzl - fzl_old) < TOLERANCE:
            break
        t_old = t
        t = (1 + np.sqrt(1 + 4 * t * t)) / 2
        theta = (1 - t_old) / t
        gradients2 = (softmax(zl) - label)
        zeta_old = zeta
        zeta = (rho * (mul(W, a_last)-u/rho) + (zl - eta * gradients2) / eta) / (rho + 1 / eta)
        zl = (1 - theta) * zeta + theta * zeta_old
    return zl
Esempio n. 3
0
def update_zl(a_last, W, b, label, zl_old, u,rho):
    fzl = 10e10
    MAX_ITER = 500
    zl = zl_old
    lamda = 1
    zeta = zl
    eta = 4
    TOLERANCE = 10e-5
    for i in range(MAX_ITER):
        fzl_old = fzl
        fzl = cross_entropy_with_softmax(label, zl)+rho/2*np.sum((zl-mul(W,a_last)-b+u/rho)*(zl-mul(W,a_last)-b+u/rho))
        if abs(fzl - fzl_old) < TOLERANCE:
            break
        lamda_old = lamda
        lamda = (1 + np.sqrt(1 + 4 * lamda * lamda)) / 2
        gamma = (1 - lamda_old) / lamda
        gradients2 = (softmax(zl) - label)
        zeta_old = zeta
        zeta = (rho * (mul(W, a_last)+b-u/rho) + (zl - eta * gradients2) / eta) / (rho + 1 / eta)
        zl = (1 - gamma) * zeta + gamma * zeta_old
    return zl
Esempio n. 4
0
def admm_train(x_train, y_train, W1, z1, a1, W2, z2, a2, W3, z3, u, rho,
               gamma):
    W1 = para_func.update_W(W1, x_train, z1, 0, rho)
    a1 = para_func.update_a(W2, a1, z2, z1, 0, rho, gamma)
    z1 = para_func.update_z(W1, x_train, a1, z1, rho, gamma)

    W2 = para_func.update_W(W2, a1, z2, 0, rho)
    a2 = para_func.update_a(W3, a2, z3, z2, u, rho, gamma)
    z2 = para_func.update_z(W2, a1, a2, z2, rho, gamma)

    W3 = para_func.update_W(W3, a2, z3, u, rho)
    z3 = para_func.update_zl(W3, a2, z3, y_train, u, rho)

    u = u + rho * (z3 - mul(W3, a2))

    return W1, z1, a1, W2, z2, a2, W3, z3, u
Esempio n. 5
0
def update_z(W, a_last, a, z, rho, gamma):
    m = mul(W, a_last)
    sol1 = (gamma * a + rho * m) / (gamma + rho)
    sol2 = m
    z1 = np.zeros_like(a)
    z2 = np.zeros_like(a)
    z = np.zeros_like(a)

    z1[sol1 >= 0.] = sol1[sol1 >= 0.]
    z2[sol2 <= 0.] = sol2[sol2 <= 0.]
    #print("z1, z2", sol1>=0., sol1[sol1>=0.])

    fz_1 = gamma * np.power(a - relu(z1), 2) + rho * np.power(z1 - m, 2)
    fz_2 = gamma * np.power(a - relu(z2), 2) + rho * np.power(z2 - m, 2)

    index_z1 = fz_1 <= fz_2
    index_z2 = fz_2 < fz_1
    z[index_z1] = z1[index_z1]
    z[index_z2] = z2[index_z2]
    return z
Esempio n. 6
0
def eq1_z(a, W_next, b_next, z_next, u_next, rho):
    res = rho * (z_next - b_next - mul(W_next, a) + u_next / rho)
    return res
Esempio n. 7
0
def eq1_b(a, W_next, b_next, z_next, u_next, rho):
    res = np.mean(rho * (mul(W_next, a) + b_next - z_next - u_next / rho),
                  axis=1).reshape(-1, 1)
    return res
Esempio n. 8
0
def eq1_W(a, W_next, b_next, z_next, u_next, rho):
    temp = mul(W_next, a) + b_next - z_next - u_next / rho
    temp2 = a.T
    res = rho * mul(temp, temp2)
    return res
Esempio n. 9
0
def eq1_a(a, W_next, b_next, z_next, u_next, rho):
    res = rho * mul(np.transpose(W_next),
                    mul(W_next, a) + b_next - z_next - u_next / rho)
    return res
Esempio n. 10
0
def eq1(a, W_next, b_next, z_next, u_next, rho):
    temp = z_next - mul(W_next, a) - b_next + u_next / rho
    res = rho / 2 * np.sum(temp * temp)
    return res
Esempio n. 11
0
def z_obj(a_last, W, b, z, u, v, a, rho):
    f = (z - mul(W, a_last) - b + u / rho) * (z - mul(
        W, a_last) - b + u / rho) + (a - relu(z) + v) * (a - relu(z) + v)
    return f
Esempio n. 12
0
    a1 = common.update_a(W2, b2, z2, z1, a1, 0, 0, rho, tau)
    z1 = common.update_z(x_train, W1, b1, a1, z1, 0, 0, rho)
    b1 = common.update_b(x_train, W1, z1, b1, 0, rho)
    W1 = common.update_W(x_train, b1, z1, W1, 0, rho, theta)
    W1 = common.update_W(x_train, b1, z1, W1, 0, rho, theta)
    b1 = common.update_b(x_train, W1, z1, b1, 0, rho)
    z1 = common.update_z(x_train, W1, b1, a1, z1, 0, 0, rho)
    a1 = common.update_a(W2, b2, z2, z1, a1, 0, 0, rho, tau)
    W2 = common.update_W(a1, b2, z2, W2, 0, rho, theta)
    b2 = common.update_b(a1, W2, z2, b2, 0, rho)
    z2 = common.update_z(a1, W2, b2, a2, z2, 0, 0, rho)
    a2 = common.update_a(W3, b3, z3, z2, a2, u, 0, rho, tau)
    W3 = common.update_W(a2, b3, z3, W3, u, rho, theta)
    b3 = common.update_b(a2, W3, z3, b3, u, rho)
    z3 = common.update_zl(a2, W3, b3, y_train, z3, u, rho)
    u = u + rho * (z3 - mul(W3, a2) - b3)
    print("Time per iteration:", time.time() - pre)
    r1 = np.sum((z1 - mul(W1, x_train) - b1) * (z1 - mul(W1, x_train) - b1))
    r2 = np.sum((z2 - mul(W2, a1) - b2) * (z2 - mul(W2, a1) - b2))
    r3 = np.sum((z3 - mul(W3, a2) - b3) * (z3 - mul(W3, a2) - b3))
    linear_r[i] = r3

    obj = objective(x_train, y_train, W1, b1, z1, a1, W2, b2, z2, a2, W3, b3,
                    z3, u, 0, 0, rho)
    print("obj=", obj)
    objective_value[i] = obj
    print("r1=", r1)
    print("r2=", r2)
    print("r3=", r3)
    print("rho=", rho)
    (train_acc[i], train_cost[i]) = test_accuracy(W1, b1, W2, b2, W3, b3,
Esempio n. 13
0
def grad_a(W_next, a, z, z_next, u_next, rho, gamma):
    res = rho * mul(np.transpose(W_next),
                    mul(W_next, a) - z_next - u_next / rho)
    res = res + gamma * (a - relu(z))
    return res
Esempio n. 14
0
def obj_a(W_next, a, z, z_next, u_next, rho, gamma):
    temp = z_next - mul(W_next, a) + u_next / rho
    res = rho / 2 * np.sum(temp * temp)
    #return res
    res = res + gamma / 2 * np.sum((a - relu(z)) * (a - relu(z)))
    return res