def stf_4dim_time_day(tensor, r, random_seed=0, num_iter=100, eps=1e-8, lr=1): np.random.seed(random_seed) args_num = [1, 2, 3, 4] def cost(tensor, home, appliance, day, hour): pred = np.einsum('Hr, Ar, ADr, ATr ->HADT', home, appliance, day, hour) mask = ~np.isnan(tensor) error = (pred - tensor)[mask].flatten() return np.sqrt((error**2).mean()) mg = multigrad(cost, argnums=args_num) sizes = [(x, r) for x in tensor.shape] # ADr sizes[-2] = (tensor.shape[1], tensor.shape[-2], r) # ATr sizes[-1] = (tensor.shape[1], tensor.shape[-1], r) home = np.random.rand(*sizes[0]) appliance = np.random.rand(*sizes[1]) day = np.random.rand(*sizes[2]) hour = np.random.rand(*sizes[3]) sum_home = np.zeros_like(home) sum_appliance = np.zeros_like(appliance) sum_day = np.zeros_like(day) sum_hour = np.zeros_like(hour) # GD procedure for i in range(num_iter): del_home, del_appliance, del_day, del_hour = mg( tensor, home, appliance, day, hour) sum_home += eps + np.square(del_home) lr_home = np.divide(lr, np.sqrt(sum_home)) home -= lr_home * del_home sum_appliance += eps + np.square(del_appliance) lr_appliance = np.divide(lr, np.sqrt(sum_appliance)) appliance -= lr_appliance * del_appliance sum_day += eps + np.square(del_day) lr_day = np.divide(lr, np.sqrt(sum_day)) day -= lr_day * del_day sum_hour += eps + np.square(del_hour) lr_hour = np.divide(lr, np.sqrt(sum_hour)) hour -= lr_hour * del_hour # Projection to non-negative space home[home < 0] = 1e-8 appliance[appliance < 0] = 1e-8 day[day < 0] = 1e-8 hour[hour < 0] = 1e-8 if i % 50 == 0: print(cost(tensor, home, appliance, day, hour), i) sys.stdout.flush() return home, appliance, day, hour
def factorization(tensor, num_latent, num_iter=2000, lr=1, dis=False, random_seed=0, eps=1e-8, T_known=None): np.random.seed(random_seed) cost = cost_abs args_num = [0, 1, 2] mg = autograd.multigrad(cost, argnums=args_num) M, N, K = tensor.shape H = np.random.rand(M, num_latent) A = np.random.rand(N, num_latent) T = np.random.rand(K, num_latent) sum_square_gradients_A = np.zeros_like(A) sum_square_gradients_H = np.zeros_like(H) sum_square_gradients_T = np.zeros_like(T) if T_known is not None: T = set_known(T, T_known) # GD procedure for i in range(num_iter): del_h, del_a, del_t = mg(H, A, T, tensor) sum_square_gradients_A += eps + np.square(del_a) lr_a = np.divide(lr, np.sqrt(sum_square_gradients_A)) A -= lr_a * del_a sum_square_gradients_H += eps + np.square(del_h) sum_square_gradients_T += eps + np.square(del_t) lr_h = np.divide(lr, np.sqrt(sum_square_gradients_H)) lr_t = np.divide(lr, np.sqrt(sum_square_gradients_T)) H -= lr_h * del_h T -= lr_t * del_t if T_known is not None: T = set_known(T, T_known) # Projection to non-negative space H[H < 0] = 1e-8 A[A < 0] = 1e-8 T[T < 0] = 1e-8 if i % 500 == 0: if dis: print(cost(H, A, T, tensor)) return H, A, T
def test_multigrad(): def complicated_fun(a, b, c, d, e, f=1.1, g=9.0): return a + np.sin(b) + np.cosh(c) + np.cos(d) + np.tan(e) + f + g def complicated_fun_3_1(d, b): return complicated_fun(A, b, C, d, E, f=F, g=G) A = 0.5 B = -0.3 C = 0.2 D = -1.1 E = 0.7 F = 0.6 G = -0.1 exact = multigrad(complicated_fun, argnums=[3, 1])(A, B, C, D, E, f=F, g=G) numeric = nd(complicated_fun_3_1, D, B) check_equivalent(exact, numeric)
def test_multigrad(): def complicated_fun(a,b,c,d,e,f=1.1, g=9.0): return a + np.sin(b) + np.cosh(c) + np.cos(d) + np.tan(e) + f + g def complicated_fun_3_1(d, b): return complicated_fun(A, b, C, d, E, f=F, g=G) A = 0.5 B = -0.3 C = 0.2 D = -1.1 E = 0.7 F = 0.6 G = -0.1 exact = multigrad(complicated_fun, argnums=[3, 1])(A, B, C, D, E, f=F, g=G) numeric = nd(complicated_fun_3_1, D, B) check_equivalent(exact, numeric)
def test_value_and_multigrad(): def complicated_fun(a, b, c, d, e, f=1.1, g=9.0): return a + np.sin(b) + np.cosh(c) + np.cos(d) + np.tan(e) + f + g A = 0.5 B = -0.3 C = 0.2 D = -1.1 E = 0.7 F = 0.6 G = -0.1 dfun = multigrad(complicated_fun, argnums=[3, 1]) dfun_both = value_and_multigrad(complicated_fun, argnums=[3, 1]) check_equivalent(complicated_fun(A, B, C, D, E, f=F, g=G), dfun_both(A, B, C, D, E, f=F, g=G)[0]) check_equivalent(dfun(A, B, C, D, E, f=F, g=G), dfun_both(A, B, C, D, E, f=F, g=G)[1])
def test_value_and_multigrad(): def complicated_fun(a,b,c,d,e,f=1.1, g=9.0): return a + np.sin(b) + np.cosh(c) + np.cos(d) + np.tan(e) + f + g A = 0.5 B = -0.3 C = 0.2 D = -1.1 E = 0.7 F = 0.6 G = -0.1 dfun = multigrad(complicated_fun, argnums=[3, 1]) dfun_both = value_and_multigrad(complicated_fun, argnums=[3, 1]) check_equivalent(complicated_fun(A, B, C, D, E, f=F, g=G), dfun_both(A, B, C, D, E, f=F, g=G)[0]) check_equivalent(dfun(A, B, C, D, E, f=F, g=G), dfun_both(A, B, C, D, E, f=F, g=G)[1])
init_dsc_params = init_random_params(param_scale, dsc_layer_sizes) num_batches = int(np.ceil(len(train_images) / batch_size)) def batch_indices(iter): idx = iter % num_batches return slice(idx * batch_size, (idx+1) * batch_size) # Define training objective seed = npr.RandomState(0) def objective(gen_params, dsc_params, iter): idx = batch_indices(iter) return gan_objective(gen_params, dsc_params, train_images[idx], batch_size, noise_dim, seed) # Get gradients of objective using autograd. both_objective_grad = multigrad(objective, argnums=[0,1]) print(" Epoch | Objective | Fake probability | Real Probability ") def print_perf(gen_params, dsc_params, iter, gen_gradient, dsc_gradient): if iter % 10 == 0: ability = np.mean(objective(gen_params, dsc_params, iter)) fake_data = generate_from_noise(gen_params, 20, noise_dim, seed) real_data = train_images[batch_indices(iter)] probs_fake = np.mean(sigmoid(neural_net_predict(dsc_params, fake_data))) probs_real = np.mean(sigmoid(neural_net_predict(dsc_params, real_data))) print("{:15}|{:20}|{:20}|{:20}".format(iter//num_batches, ability, probs_fake, probs_real)) save_images(fake_data, 'gan_samples.png', vmin=0, vmax=1) # The optimizers provided can optimize lists, tuples, or dicts of parameters. optimized_params = adam_minimax(both_objective_grad, init_gen_params, init_dsc_params,
rprime = lambda r: grad(loglike)(r, p(r)) r = newton(rprime, r_guess) return r, p(r) if __name__ == "__main__": # generate data npr.seed(0) data = negbin_sample(r=5, p=0.5, size=1000) # fit likelihood-extremizing parameters r, p = fit_maxlike(data, r_guess=1) # report fit print('Fit parameters:') print('r={r}, p={p}'.format(r=r, p=p)) print('Check that we are at a local stationary point:') print(multigrad(lambda r, p: np.sum(negbin_loglike(r, p, data)))(r, p)) import matplotlib.pyplot as plt xm = data.max() plt.figure() plt.hist(data, bins=np.arange(xm+1)-0.5, normed=True, label='normed data counts') plt.xlim(0,xm) plt.plot(np.arange(xm), np.exp(negbin_loglike(r, p, np.arange(xm))), label='maxlike fit') plt.xlabel('k') plt.ylabel('p(k)') plt.legend(loc='best') plt.show()
seed) - \ c2 * entropy_objective(gen_params, batch_size, noise_dimZ, seed, neighbors_function) def c1c2_schedule(iter): if iter < 50: return 0, 1 else: return 1, 0.2 return c1, c2 # Get gradients of objective using autograd. both_objective_grad = multigrad(objective, argnums=[0, 1]) print( " Epoch | Objective | Fake probability | Real Probability " ) def print_perf(gen_params, dsc_params, iter, gen_gradient, dsc_gradient): if iter % 10 == 0: ability = np.mean( objective(gen_params, dsc_params, iter, neighbors_function)) fake_z = generate_from_noise(gen_params, 10000, noise_dimZ, seed) noiseX = seed.randn(10000, noise_dimX) fake_data = igp_hat(fake_z, noiseX) # fake_data = fake_z
sum_square_gradients_G3 = np.zeros_like(G3) sum_square_gradients_U0 = np.zeros_like(U0) sum_square_gradients_U1 = np.zeros_like(U1) sum_square_gradients_U2 = np.zeros_like(U2) sum_square_gradients_I0 = np.zeros_like(I0) sum_square_gradients_I1 = np.zeros_like(I1) sum_square_gradients_I2 = np.zeros_like(I2) sum_square_gradients_F0 = np.zeros_like(F0) sum_square_gradients_F1 = np.zeros_like(F1) sum_square_gradients_F2 = np.zeros_like(F2) sum_square_gradients_F3 = np.zeros_like(F3) sum_square_gradients_W0 = np.zeros_like(W0) sum_square_gradients_W1 = np.zeros_like(W1) sum_square_gradients_W2 = np.zeros_like(W2) mg = multigrad(cost, argnums=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) # SGD procedure for i in range(num_iter): starttime = time.time() print(i+1) #print('?') del_g1, del_g2, del_g3, del_u0, del_u1, del_u2, del_i0, del_i1, del_i2, del_f0, del_f1, del_f2, del_f3, del_w0, del_w1, del_w2 = mg(G1, G2, G3, U0, U1, U2, I0, I1, I2, F0, F1, F2, F3, W0, W1, W2, sps_tensor_useritemf, sps_tensor_userwordf, sps_tensor_itemwordf, element_list_useritemf, element_list_userwordf, element_list_itemwordf, overall_rating_matrix, I_num, F_num, U0_dim, U1_dim, U2_dim, I0_dim, I1_dim, I2_dim, F0_dim, F1_dim, F2_dim, F3_dim, W0_dim, W1_dim, W2_dim, lmd_BPR, case) sum_square_gradients_G1 += eps + np.square(del_g1) sum_square_gradients_G2 += eps + np.square(del_g2) sum_square_gradients_G3 += eps + np.square(del_g3)
def minibatch_adagradSGD_train(uiaw_list, uw_frequency_mat, ui_rating_dic, uia_senti_dic, iaw_frequency_dic, U_dim, I_dim, F_dim, W_dim, U_num, I_num, F_num_1more, W_num, num_iter, lmd_reg, lmd_r, lmd_s, lmd_o, neg_sample_rate, lmd_bpr, minibatch, lr, ui_rating_dic_test, uia_senti_dic_test, random_seed=0, eps=1e-8): np.random.seed(random_seed) cost = minibath_sparse_cost U_dim_initial = (U_num, U_dim) I_dim_initial = (I_num, I_dim) F_dim_initial = (F_num_1more, F_dim) W_dim_initial = (W_num, W_dim) U = np.random.rand(*U_dim_initial) I = np.random.rand(*I_dim_initial) F = np.random.rand(*F_dim_initial) W = np.random.rand(*W_dim_initial) sum_square_gradients_U = np.zeros_like(U) sum_square_gradients_I = np.zeros_like(I) sum_square_gradients_F = np.zeros_like(F) sum_square_gradients_W = np.zeros_like(W) # mg = multigrad(cost, argnums=[0, 1, 2, 3]) mg = multigrad(cost, argnums=[0, 1, 2]) # mg = multigrad_dict(cost) # SGD procedure for i in range(num_iter): starttime = time.time() Print = False if i % 100 == 0: print(i) Print = True # del_u, del_i, del_f, del_w = mg(U, I, F, W, uiaw_list, uw_frequency_mat, # ui_rating_dic, uia_senti_dic, iaw_frequency_dic, # lmd_reg, lmd_r, lmd_s, lmd_o, neg_sample_rate, lmd_bpr, minibatch, Print) del_u, del_i, del_f = mg(U, I, F, W, uiaw_list, uw_frequency_mat, ui_rating_dic, uia_senti_dic, iaw_frequency_dic, lmd_reg, lmd_r, lmd_s, lmd_o, neg_sample_rate, lmd_bpr, minibatch, Print) # eps+del_g**2 sum_square_gradients_U += eps + np.square(del_u) sum_square_gradients_I += eps + np.square(del_i) sum_square_gradients_F += eps + np.square(del_f) # sum_square_gradients_W += eps + np.square(del_w) # np.divide()对位除法只保留整数部分,np.sqrt()各元素平方根 lr=0.1,# 0.1/((eps+del_g**2)**1/2) lr_u = np.divide(lr, np.sqrt(sum_square_gradients_U)) lr_i = np.divide(lr, np.sqrt(sum_square_gradients_I)) lr_f = np.divide(lr, np.sqrt(sum_square_gradients_F)) # lr_w = np.divide(lr, np.sqrt(sum_square_gradients_W)) # 自适应梯度下降 G1=G1 - 0.1/(adagrad**1/2) * del_g U -= lr_u * del_u I -= lr_i * del_i F -= lr_f * del_f # W -= lr_w * del_w # Projection to non-negative space U[U < 0] = 0 I[I < 0] = 0 F[F < 0] = 0 # W[W < 0] = 0 nowtime = time.time() timeleft = (nowtime - starttime) * (num_iter - i - 1) if i % config.print_every_times == 0: if timeleft / 60 > 60: print('time left: ' + str(int(timeleft / 3600)) + ' hr ' + str(int(timeleft / 60 % 60)) + ' min ' + str( int(timeleft % 60)) + ' s') else: print("time left: " + str(int(timeleft / 60)) + ' min ' + str(int(timeleft % 60)) + ' s') # ---------Evaluate or Not------------ print('Evaluate...') evaluate_res = [] evaluate_senti_res = [] rec_item = np.einsum('ma,na ->mn ', U, np.hstack((I, np.tile(F[104], (I_num, 1))))) for key in ui_rating_dic_test.keys(): real_rating = ui_rating_dic_test[key] key = key[1:-1].split(",") u_id = int(key[0]) i_id = int(key[1]) rec_rating = rec_item[u_id][i_id] evaluate_res.append([u_id, i_id, real_rating, rec_rating]) for key in uia_senti_dic_test.keys(): real_senti = uia_senti_dic_test[key] key = key[1:-1].split(",") u_id = int(key[0]) i_id = int(key[1]) a_id = int(key[2]) A_ = np.hstack((I[i_id], F[a_id])) rec_senti = np.einsum("a,a->", U[u_id], A_) evaluate_senti_res.append([u_id, (i_id, a_id), real_senti, rec_senti]) from FSER.Metric import metric cur_time = time.time() train_time = cur_time-starttime metric = metric.Metric() print("MAE:") MAEv = metric.MAE(evaluate_res) MAEs = metric.MAE(evaluate_senti_res) print(str(round(MAEv, 4)) + "\t" + str(round(MAEs, 6))) print("RMSE:") RMSEv = metric.RMSE(evaluate_res) RMSEs = metric.RMSE(evaluate_senti_res) print(str(round(RMSEv, 4)) + "\t" + str(round(RMSEs, 6))) with open("./Result/FSER_/print_every_"+str(config.print_every_times)+"in_" + str(num_iter)+"."+config.dataset_name, "a") as rf: rf.write("MAE:{}\t".format(str(MAEv))) rf.write("MAEs:{}".format(str(MAEs))) rf.write("\n") rf.write("RMSE:{}\t".format(str(RMSEv))) rf.write("RMSEs:{}".format(str(RMSEs))) rf.write("\n") rf.write("train_time:{}".format(str(train_time))) rf.write("\n") return U, I, F, W
if __name__ == "__main__": # generate data npr.seed(0) data = negbin_sample(r=5, p=0.5, size=1000) # fit likelihood-extremizing parameters r, p = fit_maxlike(data, r_guess=1) # report fit print('Fit parameters:') print('r={r}, p={p}'.format(r=r, p=p)) print('Check that we are at a local stationary point:') loglike = lambda r, p: np.sum(negbin_loglike(r, p, data)) grad_both = multigrad(loglike, argnums=[0, 1]) print(grad_both(r, p)) import matplotlib.pyplot as plt xm = data.max() plt.figure() plt.hist(data, bins=np.arange(xm + 1) - 0.5, normed=True, label='normed data counts') plt.xlim(0, xm) plt.plot(np.arange(xm), np.exp(negbin_loglike(r, p, np.arange(xm))), label='maxlike fit') plt.xlabel('k') plt.ylabel('p(k)')
HBATs.append(multiply_HBAT(H, B, A, T)) if i % 100 == 0: if dis: print(cost(H, B, A, T, tensor)) return H, B, A, T, Hs, Bs, As, Ts, HBATs, costs def learn_HAT_adagrad_graph(case, tensor, L, num_home_factors, num_season_factors, num_iter=2000, lr=0.01, dis=False, lam=1, random_seed=0, eps=1e-8, A_known = None, T_known = None): np.random.seed(random_seed) cost = cost_graph_laplacian args_num=[0,1,2] mg = multigrad(cost, argnums=args_num) params = {} params['M'], params['N'], params['O'] = tensor.shape params['a'] = num_home_factors params['b'] = num_season_factors H_dim_chars = list(cases[case]['HA'].split(",")[0].strip()) H_dim = tuple(params[x] for x in H_dim_chars) A_dim_chars = list(cases[case]['HA'].split(",")[1].split("-")[0].strip()) A_dim = tuple(params[x] for x in A_dim_chars) T_dim_chars = list(cases[case]['HAT'].split(",")[1].split("-")[0].strip()) T_dim = tuple(params[x] for x in T_dim_chars) H = np.random.rand(*H_dim) A = np.random.rand(*A_dim) T = np.random.rand(*T_dim)
def test_multigrad_onearg(): fun = lambda x, y: np.sum(x + np.sin(y)) packed_fun = lambda xy: np.sum(xy[0] + np.sin(xy[1])) A, B = npr.randn(3), npr.randn(3) check_equivalent(multigrad(fun)(A,B), grad(packed_fun)((A,B)))
def test_multigrad_onearg(): fun = lambda x, y: np.sum(x + np.sin(y)) packed_fun = lambda xy: np.sum(xy[0] + np.sin(xy[1])) A, B = npr.randn(3), npr.randn(3) check_equivalent(multigrad(fun)(A, B), (grad(packed_fun)((A, B))[0], ))
def cost_abs(H, A, T, E_np_masked, case): HAT = multiply_case(H, A, T, case) mask = ~np.isnan(E_np_masked) # error = (HAT - E_np_masked)[mask].flatten() error = (HAT - E_np_masked)[mask].flatten() return np.sqrt((error ** 2).mean()) def learn_HAT_adagrad(case, tensor, num_home_factors, num_season_factors, num_iter=2000, lr=0.01, dis=False, random_seed=0, eps=1e-8, A_known=None, T_known=None, cost=cost_abs): np.random.seed(random_seed) args_num = [0, 1, 2] mg = multigrad(cost, argnums=args_num) params = {} params['M'], params['N'], params['O'] = tensor.shape params['a'] = num_home_factors params['b'] = num_season_factors H_dim_chars = list(cases[case]['HA'].split(",")[0].strip()) H_dim = tuple(params[x] for x in H_dim_chars) A_dim_chars = list(cases[case]['HA'].split(",")[1].split("-")[0].strip()) A_dim = tuple(params[x] for x in A_dim_chars) T_dim_chars = list(cases[case]['HAT'].split(",")[1].split("-")[0].strip()) T_dim = tuple(params[x] for x in T_dim_chars) H = np.random.rand(*H_dim) A = np.random.rand(*A_dim) T = np.random.rand(*T_dim)
def plot_results(data, r, p): xm = data.max() plt.figure() plt.hist(data, bins=np.arange(xm+1)-0.5, normed=True, label='normed data counts') plt.xlim(0,xm) plt.plot(np.arange(xm), np.exp(negbin_loglike(r, p, np.arange(xm))), label='maxlike fit') plt.xlabel('k') plt.ylabel('p(k)') plt.legend(loc='best') if __name__ == "__main__": # generate data npr.seed(0) data = negbin_sample(r=5, p=0.5, size=1000) # fit likelihood-extremizing parameters r, p = fit_maxlike(data, r_guess=1) # report fit print('Fit parameters:') print('r={r}, p={p}'.format(r=r, p=p)) print('Check that we are at a local stationary point:') loglike = lambda r, p: np.sum(negbin_loglike(r, p, data)) grad_both = multigrad(loglike, argnums=[0,1]) print(grad_both(r, p)) plot_results(data, r, p) plt.show()
def learn_HAT_SGD_adagrad(sps_tensor_useritemf, sps_tensor_ifw, sps_overall_rating, U_dim, I_dim, F_dim, W_dim, U_num, I_num, F_num_1more, W_num, num_iter=100000, lr=0.1, dis=False, cost_function='abs', random_seed=0, eps=1e-8): F_num = F_num_1more - 1 np.random.seed(random_seed) cost = cost_abs_sparse_BPR_SGD element_list_useritemf = list(sps_tensor_useritemf) element_list_ifw = list(sps_tensor_ifw) element_list_ifw_2 = [] for item in sps_tensor_ifw.items(): if item[1] > 3.93: element_list_ifw_2.append(item[0]) element_list_overall_rating = list(sps_overall_rating) params = {} params['M'], params['N'], params['F'], params['W'] = (U_num, I_num, F_num, W_num) ''' params['a'] = U0_dim params['b'] = U1_dim params['c'] = U2_dim params['d'] = I0_dim params['e'] = I1_dim params['f'] = I2_dim params['g'] = F_dim params['h'] = W_dim ''' print("users:" + str(params['M'])) print("items:" + str(params['N'])) print("features:" + str(params['F'])) print("words:" + str(params['W'])) U_dim_initial = (U_num, U_dim) I_dim_initial = (I_num, I_dim) F_dim_initial = (F_num_1more, F_dim) W_dim_initial = (W_num, W_dim) U = np.random.rand(*U_dim_initial) I = np.random.rand(*I_dim_initial) F = np.random.rand(*F_dim_initial) W = np.random.rand(*W_dim_initial) sum_square_gradients_U = np.zeros_like(U) sum_square_gradients_I = np.zeros_like(I) sum_square_gradients_F = np.zeros_like(F) sum_square_gradients_W = np.zeros_like(W) mg = multigrad(cost, argnums=[0, 1, 2, 3]) # SGD procedure for i in range(num_iter): starttime = time.time() print(i + 1) # print('?') del_u, del_i, del_f, del_w = mg(U, I, F, W, sps_tensor_useritemf, sps_tensor_ifw, element_list_useritemf, element_list_ifw, element_list_ifw_2, sps_overall_rating, element_list_overall_rating) # eps+del_g**2 sum_square_gradients_U += eps + np.square(del_u) sum_square_gradients_I += eps + np.square(del_i) sum_square_gradients_F += eps + np.square(del_f) sum_square_gradients_W += eps + np.square(del_w) # np.divide()对位除法只保留整数部分,np.sqrt()各元素平方根 lr=0.1,# 0.1/((eps+del_g**2)**1/2) lr_u = np.divide(lr, np.sqrt(sum_square_gradients_U)) lr_i = np.divide(lr, np.sqrt(sum_square_gradients_I)) lr_f = np.divide(lr, np.sqrt(sum_square_gradients_F)) lr_w = np.divide(lr, np.sqrt(sum_square_gradients_W)) # 梯度下降 G1=G1 - 0.1/((eps+del_g**2)**1/2) * del_g U -= lr_u * del_u I -= lr_i * del_i F -= lr_f * del_f W -= lr_w * del_w # Projection to non-negative space U[U < 0] = 0 I[I < 0] = 0 F[F < 0] = 0 W[W < 0] = 0 nowtime = time.time() timeleft = (nowtime - starttime) * (num_iter - i - 1) if timeleft / 60 > 60: print('time left: ' + str(int(timeleft / 3600)) + ' hr ' + str(int(timeleft / 60 % 60)) + ' min ' + str(int(timeleft % 60)) + ' s') else: print("time left: " + str(int(timeleft / 60)) + ' min ' + str(int(timeleft % 60)) + ' s') return U, I, F, W
def learn_HBAT_adagrad_graph(tensor, num_home_factors, num_season_factors, num_iter=2000, lr=0.01, dis=False, random_seed=0, eps=1e-8, B_known=None, A_known=None, T_known=None): def multiply_HBAT(H, B, A, T): return np.einsum('mh, hn, ns, ts ->mnt', H, B, A, T) def cost(H, B, A, T, tensor): mask = ~np.isnan(tensor) HBAT = multiply_HBAT(H, B, A, T) error = (HBAT - tensor)[mask].flatten() return np.sqrt((error ** 2).mean()) np.random.seed(random_seed) args_num = [0, 1, 2, 3] mg = multigrad(cost, argnums=args_num) m, n, t = tensor.shape h, s = num_home_factors, num_season_factors H = np.random.rand(m, h) B = np.random.rand(h, n) A = np.random.rand(n, s) T = np.random.rand(t, s) if A_known is not None: A = set_known(A, A_known) if B_known is not None: B = set_known(B, B_known) sum_square_gradients_A = np.zeros_like(A) sum_square_gradients_B = np.zeros_like(B) sum_square_gradients_H = np.zeros_like(H) sum_square_gradients_T = np.zeros_like(T) Hs = [H.copy()] Bs = [B.copy()] Ts = [T.copy()] As = [A.copy()] HBATs = [multiply_HBAT(H, B, A, T)] costs = [cost(H, B, A, T, tensor)] # GD procedure for i in range(num_iter): del_h, del_b, del_a, del_t = mg(H, B, A, T, tensor) sum_square_gradients_A += eps + np.square(del_a) lr_a = np.divide(lr, np.sqrt(sum_square_gradients_A)) A -= lr_a * del_a sum_square_gradients_H += eps + np.square(del_h) sum_square_gradients_B += eps + np.square(del_b) sum_square_gradients_T += eps + np.square(del_t) lr_h = np.divide(lr, np.sqrt(sum_square_gradients_H)) lr_t = np.divide(lr, np.sqrt(sum_square_gradients_T)) H -= lr_h * del_h T -= lr_t * del_t if A_known is not None: A = set_known(A, A_known) if B_known is not None: B = set_known(B, B_known) if T_known is not None: T = set_known(T, T_known) # Projection to non-negative space H[H < 0] = 1e-8 A[A < 0] = 1e-8 T[T < 0] = 1e-8 B[B<0] = 1e-8 As.append(A.copy()) Ts.append(T.copy()) Hs.append(H.copy()) Bs.append(B.copy()) costs.append(cost(H, B, A, T, tensor)) HBATs.append(multiply_HBAT(H, B, A, T)) if i % 100 == 0: if dis: print(cost(H, B, A, T, tensor)) return H, B, A, T, Hs, Bs, As, Ts, HBATs, costs