Example #1
0
def stf_4dim_time_day(tensor, r, random_seed=0, num_iter=100, eps=1e-8, lr=1):
    np.random.seed(random_seed)
    args_num = [1, 2, 3, 4]

    def cost(tensor, home, appliance, day, hour):
        pred = np.einsum('Hr, Ar, ADr, ATr ->HADT', home, appliance, day, hour)
        mask = ~np.isnan(tensor)
        error = (pred - tensor)[mask].flatten()

        return np.sqrt((error**2).mean())

    mg = multigrad(cost, argnums=args_num)
    sizes = [(x, r) for x in tensor.shape]
    # ADr
    sizes[-2] = (tensor.shape[1], tensor.shape[-2], r)
    # ATr
    sizes[-1] = (tensor.shape[1], tensor.shape[-1], r)
    home = np.random.rand(*sizes[0])
    appliance = np.random.rand(*sizes[1])
    day = np.random.rand(*sizes[2])
    hour = np.random.rand(*sizes[3])

    sum_home = np.zeros_like(home)
    sum_appliance = np.zeros_like(appliance)
    sum_day = np.zeros_like(day)
    sum_hour = np.zeros_like(hour)

    # GD procedure
    for i in range(num_iter):
        del_home, del_appliance, del_day, del_hour = mg(
            tensor, home, appliance, day, hour)

        sum_home += eps + np.square(del_home)
        lr_home = np.divide(lr, np.sqrt(sum_home))
        home -= lr_home * del_home

        sum_appliance += eps + np.square(del_appliance)
        lr_appliance = np.divide(lr, np.sqrt(sum_appliance))
        appliance -= lr_appliance * del_appliance

        sum_day += eps + np.square(del_day)
        lr_day = np.divide(lr, np.sqrt(sum_day))
        day -= lr_day * del_day

        sum_hour += eps + np.square(del_hour)
        lr_hour = np.divide(lr, np.sqrt(sum_hour))
        hour -= lr_hour * del_hour

        # Projection to non-negative space
        home[home < 0] = 1e-8
        appliance[appliance < 0] = 1e-8
        day[day < 0] = 1e-8
        hour[hour < 0] = 1e-8

        if i % 50 == 0:
            print(cost(tensor, home, appliance, day, hour), i)
            sys.stdout.flush()

    return home, appliance, day, hour
Example #2
0
def factorization(tensor,
                  num_latent,
                  num_iter=2000,
                  lr=1,
                  dis=False,
                  random_seed=0,
                  eps=1e-8,
                  T_known=None):
    np.random.seed(random_seed)
    cost = cost_abs

    args_num = [0, 1, 2]
    mg = autograd.multigrad(cost, argnums=args_num)
    M, N, K = tensor.shape

    H = np.random.rand(M, num_latent)
    A = np.random.rand(N, num_latent)
    T = np.random.rand(K, num_latent)

    sum_square_gradients_A = np.zeros_like(A)
    sum_square_gradients_H = np.zeros_like(H)
    sum_square_gradients_T = np.zeros_like(T)
    if T_known is not None:
        T = set_known(T, T_known)

    # GD procedure
    for i in range(num_iter):
        del_h, del_a, del_t = mg(H, A, T, tensor)

        sum_square_gradients_A += eps + np.square(del_a)
        lr_a = np.divide(lr, np.sqrt(sum_square_gradients_A))
        A -= lr_a * del_a

        sum_square_gradients_H += eps + np.square(del_h)
        sum_square_gradients_T += eps + np.square(del_t)

        lr_h = np.divide(lr, np.sqrt(sum_square_gradients_H))
        lr_t = np.divide(lr, np.sqrt(sum_square_gradients_T))

        H -= lr_h * del_h
        T -= lr_t * del_t

        if T_known is not None:
            T = set_known(T, T_known)

        # Projection to non-negative space
        H[H < 0] = 1e-8
        A[A < 0] = 1e-8
        T[T < 0] = 1e-8

        if i % 500 == 0:
            if dis:
                print(cost(H, A, T, tensor))

    return H, A, T
Example #3
0
def test_multigrad():
    def complicated_fun(a, b, c, d, e, f=1.1, g=9.0):
        return a + np.sin(b) + np.cosh(c) + np.cos(d) + np.tan(e) + f + g

    def complicated_fun_3_1(d, b):
        return complicated_fun(A, b, C, d, E, f=F, g=G)

    A = 0.5
    B = -0.3
    C = 0.2
    D = -1.1
    E = 0.7
    F = 0.6
    G = -0.1

    exact = multigrad(complicated_fun, argnums=[3, 1])(A, B, C, D, E, f=F, g=G)
    numeric = nd(complicated_fun_3_1, D, B)
    check_equivalent(exact, numeric)
Example #4
0
def test_multigrad():
    def complicated_fun(a,b,c,d,e,f=1.1, g=9.0):
        return a + np.sin(b) + np.cosh(c) + np.cos(d) + np.tan(e) + f + g

    def complicated_fun_3_1(d, b):
        return complicated_fun(A, b, C, d, E, f=F, g=G)

    A = 0.5
    B = -0.3
    C = 0.2
    D = -1.1
    E = 0.7
    F = 0.6
    G = -0.1

    exact = multigrad(complicated_fun, argnums=[3, 1])(A, B, C, D, E, f=F, g=G)
    numeric = nd(complicated_fun_3_1, D, B)
    check_equivalent(exact, numeric)
def test_value_and_multigrad():
    def complicated_fun(a, b, c, d, e, f=1.1, g=9.0):
        return a + np.sin(b) + np.cosh(c) + np.cos(d) + np.tan(e) + f + g

    A = 0.5
    B = -0.3
    C = 0.2
    D = -1.1
    E = 0.7
    F = 0.6
    G = -0.1

    dfun = multigrad(complicated_fun, argnums=[3, 1])
    dfun_both = value_and_multigrad(complicated_fun, argnums=[3, 1])

    check_equivalent(complicated_fun(A, B, C, D, E, f=F, g=G),
                     dfun_both(A, B, C, D, E, f=F, g=G)[0])

    check_equivalent(dfun(A, B, C, D, E, f=F, g=G),
                     dfun_both(A, B, C, D, E, f=F, g=G)[1])
Example #6
0
def test_value_and_multigrad():
    def complicated_fun(a,b,c,d,e,f=1.1, g=9.0):
        return a + np.sin(b) + np.cosh(c) + np.cos(d) + np.tan(e) + f + g

    A = 0.5
    B = -0.3
    C = 0.2
    D = -1.1
    E = 0.7
    F = 0.6
    G = -0.1

    dfun = multigrad(complicated_fun, argnums=[3, 1])
    dfun_both = value_and_multigrad(complicated_fun, argnums=[3, 1])

    check_equivalent(complicated_fun(A, B, C, D, E, f=F, g=G),
                     dfun_both(A, B, C, D, E, f=F, g=G)[0])

    check_equivalent(dfun(A, B, C, D, E, f=F, g=G),
                     dfun_both(A, B, C, D, E, f=F, g=G)[1])
    init_dsc_params = init_random_params(param_scale, dsc_layer_sizes)

    num_batches = int(np.ceil(len(train_images) / batch_size))
    def batch_indices(iter):
        idx = iter % num_batches
        return slice(idx * batch_size, (idx+1) * batch_size)

    # Define training objective
    seed = npr.RandomState(0)
    def objective(gen_params, dsc_params, iter):
        idx = batch_indices(iter)
        return gan_objective(gen_params, dsc_params, train_images[idx],
                             batch_size, noise_dim, seed)

    # Get gradients of objective using autograd.
    both_objective_grad = multigrad(objective, argnums=[0,1])

    print("     Epoch     |    Objective  |       Fake probability | Real Probability  ")
    def print_perf(gen_params, dsc_params, iter, gen_gradient, dsc_gradient):
        if iter % 10 == 0:
            ability = np.mean(objective(gen_params, dsc_params, iter))
            fake_data = generate_from_noise(gen_params, 20, noise_dim, seed)
            real_data = train_images[batch_indices(iter)]
            probs_fake = np.mean(sigmoid(neural_net_predict(dsc_params, fake_data)))
            probs_real = np.mean(sigmoid(neural_net_predict(dsc_params, real_data)))
            print("{:15}|{:20}|{:20}|{:20}".format(iter//num_batches, ability, probs_fake, probs_real))
            save_images(fake_data, 'gan_samples.png', vmin=0, vmax=1)

    # The optimizers provided can optimize lists, tuples, or dicts of parameters.
    optimized_params = adam_minimax(both_objective_grad,
                                    init_gen_params, init_dsc_params,
    rprime = lambda r: grad(loglike)(r, p(r))
    r = newton(rprime, r_guess)
    return r, p(r)


if __name__ == "__main__":
    # generate data
    npr.seed(0)
    data = negbin_sample(r=5, p=0.5, size=1000)

    # fit likelihood-extremizing parameters
    r, p = fit_maxlike(data, r_guess=1)

    # report fit
    print('Fit parameters:')
    print('r={r}, p={p}'.format(r=r, p=p))

    print('Check that we are at a local stationary point:')
    print(multigrad(lambda r, p: np.sum(negbin_loglike(r, p, data)))(r, p))

    import matplotlib.pyplot as plt
    xm = data.max()
    plt.figure()
    plt.hist(data, bins=np.arange(xm+1)-0.5, normed=True, label='normed data counts')
    plt.xlim(0,xm)
    plt.plot(np.arange(xm), np.exp(negbin_loglike(r, p, np.arange(xm))), label='maxlike fit')
    plt.xlabel('k')
    plt.ylabel('p(k)')
    plt.legend(loc='best')
    plt.show()
Example #9
0
                                  seed) - \
               c2 * entropy_objective(gen_params,
                                      batch_size,
                                      noise_dimZ,
                                      seed,
                                      neighbors_function)

    def c1c2_schedule(iter):
        if iter < 50:
            return 0, 1
        else:
            return 1, 0.2
        return c1, c2

    # Get gradients of objective using autograd.
    both_objective_grad = multigrad(objective, argnums=[0, 1])

    print(
        "     Epoch     |    Objective  |       Fake probability | Real Probability  "
    )

    def print_perf(gen_params, dsc_params, iter, gen_gradient, dsc_gradient):
        if iter % 10 == 0:
            ability = np.mean(
                objective(gen_params, dsc_params, iter, neighbors_function))

            fake_z = generate_from_noise(gen_params, 10000, noise_dimZ, seed)
            noiseX = seed.randn(10000, noise_dimX)
            fake_data = igp_hat(fake_z, noiseX)

            # fake_data = fake_z
Example #10
0
	sum_square_gradients_G3 = np.zeros_like(G3)
	sum_square_gradients_U0 = np.zeros_like(U0)
	sum_square_gradients_U1 = np.zeros_like(U1)
	sum_square_gradients_U2 = np.zeros_like(U2)
	sum_square_gradients_I0 = np.zeros_like(I0)
	sum_square_gradients_I1 = np.zeros_like(I1)
	sum_square_gradients_I2 = np.zeros_like(I2)
	sum_square_gradients_F0 = np.zeros_like(F0)
	sum_square_gradients_F1 = np.zeros_like(F1)
	sum_square_gradients_F2 = np.zeros_like(F2)
	sum_square_gradients_F3 = np.zeros_like(F3)
	sum_square_gradients_W0 = np.zeros_like(W0)
	sum_square_gradients_W1 = np.zeros_like(W1)
	sum_square_gradients_W2 = np.zeros_like(W2)

	mg = multigrad(cost, argnums=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])

	# SGD procedure
	for i in range(num_iter):
		starttime = time.time()
		print(i+1)

		#print('?')
		del_g1, del_g2, del_g3, del_u0, del_u1, del_u2, del_i0, del_i1, del_i2, del_f0, del_f1, del_f2, del_f3, del_w0, del_w1, del_w2 = mg(G1, G2, G3, U0, U1, U2, I0, I1, I2, F0, F1, F2, F3, W0, W1, W2, 
												sps_tensor_useritemf, sps_tensor_userwordf, sps_tensor_itemwordf, 
												element_list_useritemf, element_list_userwordf, element_list_itemwordf, overall_rating_matrix, I_num, F_num, 
												U0_dim, U1_dim, U2_dim, I0_dim, I1_dim, I2_dim, F0_dim, F1_dim, F2_dim, F3_dim, W0_dim, W1_dim, W2_dim, lmd_BPR, case)

		sum_square_gradients_G1 += eps + np.square(del_g1)
		sum_square_gradients_G2 += eps + np.square(del_g2)
		sum_square_gradients_G3 += eps + np.square(del_g3)
Example #11
0
def minibatch_adagradSGD_train(uiaw_list, uw_frequency_mat,
                               ui_rating_dic, uia_senti_dic, iaw_frequency_dic,
                               U_dim, I_dim, F_dim, W_dim, U_num, I_num, F_num_1more, W_num, num_iter,
                               lmd_reg, lmd_r, lmd_s, lmd_o, neg_sample_rate, lmd_bpr, minibatch,
                               lr, ui_rating_dic_test, uia_senti_dic_test, random_seed=0, eps=1e-8):
    np.random.seed(random_seed)
    cost = minibath_sparse_cost

    U_dim_initial = (U_num, U_dim)
    I_dim_initial = (I_num, I_dim)
    F_dim_initial = (F_num_1more, F_dim)
    W_dim_initial = (W_num, W_dim)

    U = np.random.rand(*U_dim_initial)
    I = np.random.rand(*I_dim_initial)
    F = np.random.rand(*F_dim_initial)
    W = np.random.rand(*W_dim_initial)

    sum_square_gradients_U = np.zeros_like(U)
    sum_square_gradients_I = np.zeros_like(I)
    sum_square_gradients_F = np.zeros_like(F)
    sum_square_gradients_W = np.zeros_like(W)

    # mg = multigrad(cost, argnums=[0, 1, 2, 3])
    mg = multigrad(cost, argnums=[0, 1, 2])
    # mg = multigrad_dict(cost)

    # SGD procedure
    for i in range(num_iter):
        starttime = time.time()
        Print = False
        if i % 100 == 0:
            print(i)
            Print = True
        # del_u, del_i, del_f, del_w = mg(U, I, F, W, uiaw_list, uw_frequency_mat,
        #                                 ui_rating_dic, uia_senti_dic, iaw_frequency_dic,
        #                                 lmd_reg, lmd_r, lmd_s, lmd_o, neg_sample_rate, lmd_bpr, minibatch, Print)

        del_u, del_i, del_f = mg(U, I, F, W, uiaw_list, uw_frequency_mat,
                                        ui_rating_dic, uia_senti_dic, iaw_frequency_dic,
                                        lmd_reg, lmd_r, lmd_s, lmd_o, neg_sample_rate, lmd_bpr, minibatch, Print)
        # eps+del_g**2
        sum_square_gradients_U += eps + np.square(del_u)
        sum_square_gradients_I += eps + np.square(del_i)
        sum_square_gradients_F += eps + np.square(del_f)
        # sum_square_gradients_W += eps + np.square(del_w)

        # np.divide()对位除法只保留整数部分,np.sqrt()各元素平方根 lr=0.1,# 0.1/((eps+del_g**2)**1/2)
        lr_u = np.divide(lr, np.sqrt(sum_square_gradients_U))
        lr_i = np.divide(lr, np.sqrt(sum_square_gradients_I))
        lr_f = np.divide(lr, np.sqrt(sum_square_gradients_F))
        # lr_w = np.divide(lr, np.sqrt(sum_square_gradients_W))

        # 自适应梯度下降 G1=G1 - 0.1/(adagrad**1/2) * del_g
        U -= lr_u * del_u
        I -= lr_i * del_i
        F -= lr_f * del_f
        # W -= lr_w * del_w

        # Projection to non-negative space
        U[U < 0] = 0
        I[I < 0] = 0
        F[F < 0] = 0
        # W[W < 0] = 0

        nowtime = time.time()
        timeleft = (nowtime - starttime) * (num_iter - i - 1)

        if i % config.print_every_times == 0:

            if timeleft / 60 > 60:
                print('time left: ' + str(int(timeleft / 3600)) + ' hr ' + str(int(timeleft / 60 % 60)) + ' min ' + str(
                    int(timeleft % 60)) + ' s')
            else:
                print("time left: " + str(int(timeleft / 60)) + ' min ' + str(int(timeleft % 60)) + ' s')

            # ---------Evaluate  or Not------------
            print('Evaluate...')
            evaluate_res = []
            evaluate_senti_res = []
            rec_item = np.einsum('ma,na ->mn ', U, np.hstack((I, np.tile(F[104], (I_num, 1)))))
            for key in ui_rating_dic_test.keys():
                real_rating = ui_rating_dic_test[key]
                key = key[1:-1].split(",")
                u_id = int(key[0])
                i_id = int(key[1])
                rec_rating = rec_item[u_id][i_id]
                evaluate_res.append([u_id, i_id, real_rating, rec_rating])
            for key in uia_senti_dic_test.keys():
                real_senti = uia_senti_dic_test[key]
                key = key[1:-1].split(",")
                u_id = int(key[0])
                i_id = int(key[1])
                a_id = int(key[2])
                A_ = np.hstack((I[i_id], F[a_id]))
                rec_senti = np.einsum("a,a->", U[u_id], A_)
                evaluate_senti_res.append([u_id, (i_id, a_id), real_senti, rec_senti])
            from FSER.Metric import metric
            cur_time = time.time()
            train_time = cur_time-starttime
            metric = metric.Metric()
            print("MAE:")
            MAEv = metric.MAE(evaluate_res)
            MAEs = metric.MAE(evaluate_senti_res)
            print(str(round(MAEv, 4)) + "\t" + str(round(MAEs, 6)))
            print("RMSE:")
            RMSEv = metric.RMSE(evaluate_res)
            RMSEs = metric.RMSE(evaluate_senti_res)
            print(str(round(RMSEv, 4)) + "\t" + str(round(RMSEs, 6)))
            with open("./Result/FSER_/print_every_"+str(config.print_every_times)+"in_" + str(num_iter)+"."+config.dataset_name, "a") as rf:
                rf.write("MAE:{}\t".format(str(MAEv)))
                rf.write("MAEs:{}".format(str(MAEs)))
                rf.write("\n")
                rf.write("RMSE:{}\t".format(str(RMSEv)))
                rf.write("RMSEs:{}".format(str(RMSEs)))
                rf.write("\n")
                rf.write("train_time:{}".format(str(train_time)))
                rf.write("\n")



    return U, I, F, W
if __name__ == "__main__":
    # generate data
    npr.seed(0)
    data = negbin_sample(r=5, p=0.5, size=1000)

    # fit likelihood-extremizing parameters
    r, p = fit_maxlike(data, r_guess=1)

    # report fit
    print('Fit parameters:')
    print('r={r}, p={p}'.format(r=r, p=p))

    print('Check that we are at a local stationary point:')
    loglike = lambda r, p: np.sum(negbin_loglike(r, p, data))
    grad_both = multigrad(loglike, argnums=[0, 1])
    print(grad_both(r, p))

    import matplotlib.pyplot as plt
    xm = data.max()
    plt.figure()
    plt.hist(data,
             bins=np.arange(xm + 1) - 0.5,
             normed=True,
             label='normed data counts')
    plt.xlim(0, xm)
    plt.plot(np.arange(xm),
             np.exp(negbin_loglike(r, p, np.arange(xm))),
             label='maxlike fit')
    plt.xlabel('k')
    plt.ylabel('p(k)')
        HBATs.append(multiply_HBAT(H, B, A, T))
        if i % 100 == 0:
            if dis:
                print(cost(H, B, A, T, tensor))

    return H, B, A, T, Hs, Bs, As, Ts, HBATs, costs


def learn_HAT_adagrad_graph(case, tensor, L, num_home_factors, num_season_factors, num_iter=2000, lr=0.01, dis=False,
                            lam=1, random_seed=0, eps=1e-8, A_known = None, T_known = None):
    np.random.seed(random_seed)
    cost = cost_graph_laplacian
    
    args_num=[0,1,2]
    mg = multigrad(cost, argnums=args_num)

    params = {}
    params['M'], params['N'], params['O'] = tensor.shape
    params['a'] = num_home_factors
    params['b'] = num_season_factors
    H_dim_chars = list(cases[case]['HA'].split(",")[0].strip())
    H_dim = tuple(params[x] for x in H_dim_chars)
    A_dim_chars = list(cases[case]['HA'].split(",")[1].split("-")[0].strip())
    A_dim = tuple(params[x] for x in A_dim_chars)
    T_dim_chars = list(cases[case]['HAT'].split(",")[1].split("-")[0].strip())
    T_dim = tuple(params[x] for x in T_dim_chars)

    H = np.random.rand(*H_dim)
    A = np.random.rand(*A_dim)
    T = np.random.rand(*T_dim)
Example #14
0
def test_multigrad_onearg():
    fun = lambda x, y: np.sum(x + np.sin(y))
    packed_fun = lambda xy: np.sum(xy[0] + np.sin(xy[1]))
    A, B = npr.randn(3), npr.randn(3)
    check_equivalent(multigrad(fun)(A,B), grad(packed_fun)((A,B)))
Example #15
0
def test_multigrad_onearg():
    fun = lambda x, y: np.sum(x + np.sin(y))
    packed_fun = lambda xy: np.sum(xy[0] + np.sin(xy[1]))
    A, B = npr.randn(3), npr.randn(3)
    check_equivalent(multigrad(fun)(A, B), (grad(packed_fun)((A, B))[0], ))
Example #16
0


def cost_abs(H, A, T, E_np_masked, case):
    HAT = multiply_case(H, A, T, case)
    mask = ~np.isnan(E_np_masked)
    # error = (HAT - E_np_masked)[mask].flatten()
    error = (HAT - E_np_masked)[mask].flatten()
    return np.sqrt((error ** 2).mean())


def learn_HAT_adagrad(case, tensor, num_home_factors, num_season_factors, num_iter=2000, lr=0.01, dis=False, random_seed=0, eps=1e-8, A_known=None, T_known=None, cost=cost_abs):
    np.random.seed(random_seed)
    args_num = [0, 1, 2]

    mg = multigrad(cost, argnums=args_num)

    params = {}
    params['M'], params['N'], params['O'] = tensor.shape
    params['a'] = num_home_factors
    params['b'] = num_season_factors
    H_dim_chars = list(cases[case]['HA'].split(",")[0].strip())
    H_dim = tuple(params[x] for x in H_dim_chars)
    A_dim_chars = list(cases[case]['HA'].split(",")[1].split("-")[0].strip())
    A_dim = tuple(params[x] for x in A_dim_chars)
    T_dim_chars = list(cases[case]['HAT'].split(",")[1].split("-")[0].strip())
    T_dim = tuple(params[x] for x in T_dim_chars)

    H = np.random.rand(*H_dim)
    A = np.random.rand(*A_dim)
    T = np.random.rand(*T_dim)
Example #17
0
def plot_results(data, r, p):
    xm = data.max()
    plt.figure()
    plt.hist(data, bins=np.arange(xm+1)-0.5, normed=True, label='normed data counts')
    plt.xlim(0,xm)
    plt.plot(np.arange(xm), np.exp(negbin_loglike(r, p, np.arange(xm))), label='maxlike fit')
    plt.xlabel('k')
    plt.ylabel('p(k)')
    plt.legend(loc='best')


if __name__ == "__main__":
    # generate data
    npr.seed(0)
    data = negbin_sample(r=5, p=0.5, size=1000)

    # fit likelihood-extremizing parameters
    r, p = fit_maxlike(data, r_guess=1)

    # report fit
    print('Fit parameters:')
    print('r={r}, p={p}'.format(r=r, p=p))

    print('Check that we are at a local stationary point:')
    loglike = lambda r, p: np.sum(negbin_loglike(r, p, data))
    grad_both = multigrad(loglike, argnums=[0,1])
    print(grad_both(r, p))

    plot_results(data, r, p)
    plt.show()
def learn_HAT_SGD_adagrad(sps_tensor_useritemf,
                          sps_tensor_ifw,
                          sps_overall_rating,
                          U_dim,
                          I_dim,
                          F_dim,
                          W_dim,
                          U_num,
                          I_num,
                          F_num_1more,
                          W_num,
                          num_iter=100000,
                          lr=0.1,
                          dis=False,
                          cost_function='abs',
                          random_seed=0,
                          eps=1e-8):
    F_num = F_num_1more - 1
    np.random.seed(random_seed)
    cost = cost_abs_sparse_BPR_SGD
    element_list_useritemf = list(sps_tensor_useritemf)
    element_list_ifw = list(sps_tensor_ifw)
    element_list_ifw_2 = []
    for item in sps_tensor_ifw.items():
        if item[1] > 3.93:
            element_list_ifw_2.append(item[0])
    element_list_overall_rating = list(sps_overall_rating)

    params = {}
    params['M'], params['N'], params['F'], params['W'] = (U_num, I_num, F_num,
                                                          W_num)
    '''
	params['a'] = U0_dim
	params['b'] = U1_dim
	params['c'] = U2_dim
	params['d'] = I0_dim
	params['e'] = I1_dim
	params['f'] = I2_dim
	params['g'] = F_dim
	params['h'] = W_dim
	'''
    print("users:" + str(params['M']))
    print("items:" + str(params['N']))
    print("features:" + str(params['F']))
    print("words:" + str(params['W']))

    U_dim_initial = (U_num, U_dim)
    I_dim_initial = (I_num, I_dim)
    F_dim_initial = (F_num_1more, F_dim)
    W_dim_initial = (W_num, W_dim)

    U = np.random.rand(*U_dim_initial)
    I = np.random.rand(*I_dim_initial)
    F = np.random.rand(*F_dim_initial)
    W = np.random.rand(*W_dim_initial)

    sum_square_gradients_U = np.zeros_like(U)
    sum_square_gradients_I = np.zeros_like(I)
    sum_square_gradients_F = np.zeros_like(F)
    sum_square_gradients_W = np.zeros_like(W)

    mg = multigrad(cost, argnums=[0, 1, 2, 3])

    # SGD procedure
    for i in range(num_iter):
        starttime = time.time()
        print(i + 1)
        # print('?')
        del_u, del_i, del_f, del_w = mg(U, I, F, W, sps_tensor_useritemf,
                                        sps_tensor_ifw, element_list_useritemf,
                                        element_list_ifw, element_list_ifw_2,
                                        sps_overall_rating,
                                        element_list_overall_rating)

        # eps+del_g**2

        sum_square_gradients_U += eps + np.square(del_u)
        sum_square_gradients_I += eps + np.square(del_i)
        sum_square_gradients_F += eps + np.square(del_f)
        sum_square_gradients_W += eps + np.square(del_w)

        # np.divide()对位除法只保留整数部分,np.sqrt()各元素平方根 lr=0.1,# 0.1/((eps+del_g**2)**1/2)

        lr_u = np.divide(lr, np.sqrt(sum_square_gradients_U))

        lr_i = np.divide(lr, np.sqrt(sum_square_gradients_I))

        lr_f = np.divide(lr, np.sqrt(sum_square_gradients_F))

        lr_w = np.divide(lr, np.sqrt(sum_square_gradients_W))

        # 梯度下降 G1=G1 - 0.1/((eps+del_g**2)**1/2) * del_g

        U -= lr_u * del_u
        I -= lr_i * del_i
        F -= lr_f * del_f
        W -= lr_w * del_w

        # Projection to non-negative space

        U[U < 0] = 0
        I[I < 0] = 0
        F[F < 0] = 0
        W[W < 0] = 0

        nowtime = time.time()
        timeleft = (nowtime - starttime) * (num_iter - i - 1)

        if timeleft / 60 > 60:
            print('time left: ' + str(int(timeleft / 3600)) + ' hr ' +
                  str(int(timeleft / 60 % 60)) + ' min ' +
                  str(int(timeleft % 60)) + ' s')
        else:
            print("time left: " + str(int(timeleft / 60)) + ' min ' +
                  str(int(timeleft % 60)) + ' s')

    return U, I, F, W
def learn_HBAT_adagrad_graph(tensor, num_home_factors, num_season_factors, num_iter=2000, lr=0.01, dis=False,
                             random_seed=0, eps=1e-8, B_known=None, A_known=None, T_known=None):


    def multiply_HBAT(H, B, A, T):
        return np.einsum('mh, hn, ns, ts ->mnt', H, B, A, T)

    def cost(H, B, A, T, tensor):
        mask = ~np.isnan(tensor)
        HBAT = multiply_HBAT(H, B, A, T)
        error = (HBAT - tensor)[mask].flatten()
        return np.sqrt((error ** 2).mean())

    np.random.seed(random_seed)


    args_num = [0, 1, 2, 3]
    mg = multigrad(cost, argnums=args_num)

    m, n, t = tensor.shape
    h, s = num_home_factors, num_season_factors

    H = np.random.rand(m, h)
    B = np.random.rand(h, n)
    A = np.random.rand(n, s)
    T = np.random.rand(t, s)

    if A_known is not None:
        A = set_known(A, A_known)
    if B_known is not None:
        B = set_known(B, B_known)
    sum_square_gradients_A = np.zeros_like(A)
    sum_square_gradients_B = np.zeros_like(B)
    sum_square_gradients_H = np.zeros_like(H)
    sum_square_gradients_T = np.zeros_like(T)
    Hs = [H.copy()]
    Bs = [B.copy()]
    Ts = [T.copy()]
    As = [A.copy()]
    HBATs = [multiply_HBAT(H, B, A, T)]
    costs = [cost(H, B, A, T, tensor)]

    # GD procedure
    for i in range(num_iter):
        del_h, del_b, del_a, del_t = mg(H, B, A, T,  tensor)

        sum_square_gradients_A += eps + np.square(del_a)
        lr_a = np.divide(lr, np.sqrt(sum_square_gradients_A))
        A -= lr_a * del_a

        sum_square_gradients_H += eps + np.square(del_h)
        sum_square_gradients_B += eps + np.square(del_b)
        sum_square_gradients_T += eps + np.square(del_t)

        lr_h = np.divide(lr, np.sqrt(sum_square_gradients_H))
        lr_t = np.divide(lr, np.sqrt(sum_square_gradients_T))

        H -= lr_h * del_h
        T -= lr_t * del_t


        if A_known is not None:
            A = set_known(A, A_known)
        if B_known is not None:
            B = set_known(B, B_known)
        if T_known is not None:
            T = set_known(T, T_known)

        # Projection to non-negative space
        H[H < 0] = 1e-8
        A[A < 0] = 1e-8
        T[T < 0] = 1e-8
        B[B<0] = 1e-8

        As.append(A.copy())
        Ts.append(T.copy())
        Hs.append(H.copy())
        Bs.append(B.copy())

        costs.append(cost(H, B, A, T,  tensor))

        HBATs.append(multiply_HBAT(H, B, A, T))
        if i % 100 == 0:
            if dis:
                print(cost(H, B, A, T, tensor))

    return H, B, A, T, Hs, Bs, As, Ts, HBATs, costs