def sparse_update(T, factors, Lambda, sizes, rank, stepSize, sample_rate, times): starting_time = time.time() t_go = ctf.timer("SGD_getOmega") t_go.start() omega = getOmega(T) t_go.stop() dimension = len(sizes) indexes = INDEX_STRING[:dimension] R = ctf.tensor(copy=T) #ctf.tensor(tuple(sizes), sp = True) times[2] += time.time() - starting_time for i in range(dimension): tup_list = [factors[i].i(indexes[i] + "r") for i in range(dimension)] #R.i(indexes) << T.i(indexes) - omega.i(indexes) * reduce(lambda x, y: x * y, tup_list) starting_time = time.time() R.i(indexes) << -1. * ctf.TTTP(omega, factors).i(indexes) times[3] += time.time() - starting_time starting_time = time.time() #H = ctf.tensor(tuple((sizes[:i] + sizes[i + 1:] + [rank]))) times[4] += time.time() - starting_time starting_time = time.time() #H.i(indexes[:i] + indexes[i + 1:] + "r") << Hterm = reduce(lambda x, y: x * y, tup_list[:i] + tup_list[i + 1:]) times[5] += time.time() - starting_time starting_time = time.time() t_ctr = ctf.timer("SGD_main_contraction") t_ctr.start() (1 - stepSize * 2 * Lambda * sample_rate ) * factors[i].i(indexes[i] + "r") << stepSize * Hterm * R.i(indexes) t_ctr.stop() times[6] += time.time() - starting_time if i < dimension - 1: R = ctf.tensor(copy=T)
def sparse_update(T, factors, Lambda, sizes, rank, stepSize, sample_rate, times, use_MTTKRP): starting_time = time.time() t_go = ctf.timer("SGD_getOmega") t_go.start() omega = getOmega(T) t_go.stop() dimension = len(sizes) indexes = INDEX_STRING[:dimension] R = ctf.tensor(copy=T) #ctf.tensor(tuple(sizes), sp = True) times[2] += time.time() - starting_time for i in range(dimension): starting_time = time.time() R.i(indexes) << -1.* ctf.TTTP(omega, factors).i(indexes) times[3] += time.time() - starting_time starting_time = time.time() times[4] += time.time() - starting_time starting_time = time.time() times[5] += time.time() - starting_time starting_time = time.time() t_ctr = ctf.timer("SGD_main_contraction") t_ctr.start() if use_MTTKRP: new_fi = (1- stepSize * 2 * Lambda * sample_rate)*factors[i] ctf.MTTKRP(R, factors, i) stepSize*factors[i].i("ir") << new_fi.i("ir") else: tup_list = [factors[i].i(indexes[i] + "r") for i in range(dimension)] Hterm = reduce(lambda x, y: x * y, tup_list[:i] + tup_list[i + 1:]) (1- stepSize * 2 * Lambda * sample_rate)*factors[i].i(indexes[i] + "r") << stepSize * Hterm * R.i(indexes) t_ctr.stop() times[6] += time.time() - starting_time if i < dimension - 1: R = ctf.tensor(copy=T)
def getOmega(T): t_om = ctf.timer("ccd_getOmega") t_om.start() [inds, data] = T.read_local_nnz() data[:] = 1. Omega = ctf.tensor(T.shape, sp=True) Omega.write(inds, data) t_om.stop() return Omega
def get_objective(T,U,V,W,omega,regParam): t_obj = ctf.timer("ccd_get_objective") t_obj.start() L = ctf.tensor(T.shape, sp=T.sp) t0 = time.time() L.i("ijk") << T.i("ijk") - ctf.TTTP(omega, [U,V,W]).i("ijk") t1 = time.time() normL = ctf.vecnorm(L) if T.sp == True: RMSE = normL/(T.nnz_tot**.5) else: nnz_tot = ctf.sum(omega) RMSE = normL/(nnz_tot**.5) objective = normL + (ctf.vecnorm(U) + ctf.vecnorm(V) + ctf.vecnorm(W)) * regParam t2 = time.time() if glob_comm.rank() == 0 and status_prints == True: print('generate L takes {}'.format(t1 - t0)) print('calc objective takes {}'.format(t2 - t1)) t_obj.stop() return [objective, RMSE]
def CG(A, b, x0, r, regParam, I, is_implicit=False): t_batch_cg = ctf.timer("ALS_exp_cg") t_batch_cg.start() Ax0 = ctf.tensor((I, r)) if is_implicit: Ax0.i("ir") << A.mul("ir", x0) else: Ax0.i("ir") << A.i("irl") * x0.i("il") Ax0 += regParam * x0 rk = b - Ax0 sk = rk xk = x0 for i in range(sk.shape[-1]): # how many iterations? Ask = ctf.tensor((I, r)) t_cg_bmvec = ctf.timer("ALS_exp_cg_mvec") t_cg_bmvec.start() t0 = time.time() if is_implicit: Ask.i("ir") << A.mul("ir", sk) else: Ask.i("ir") << A.i("irl") * sk.i("il") t1 = time.time() if ctf.comm().rank == 0 and status_prints == True: print('form Ask takes {}'.format(t1 - t0)) t_cg_bmvec.stop() Ask += regParam * sk rnorm = ctf.tensor(I) rnorm.i("i") << rk.i("ir") * rk.i("ir") skAsk = ctf.tensor(I) skAsk.i("i") << sk.i("ir") * Ask.i("ir") alpha = rnorm / (skAsk + 1.e-30) alphask = ctf.tensor((I, r)) alphask.i("ir") << alpha.i("i") * sk.i("ir") xk1 = xk + alphask alphaask = ctf.tensor((I, r)) alphaask.i("ir") << alpha.i("i") * Ask.i("ir") rk1 = rk - alphaask rk1norm = ctf.tensor(I) rk1norm.i("i") << rk1.i("ir") * rk1.i("ir") beta = rk1norm / (rnorm + 1.e-30) betask = ctf.tensor((I, r)) betask.i("ir") << beta.i("i") * sk.i("ir") sk1 = rk1 + betask rk = rk1 xk = xk1 sk = sk1 if ctf.vecnorm(rk) < CG_thresh: break #print("explicit CG residual after",sk.shape[-1],"iterations is",ctf.vecnorm(rk)) t_batch_cg.stop() return xk
def getALS_CG(T, U, V, W, regParam, omega, I, J, K, r, block_size, num_iter=100, err_thresh=.001, time_limit=600, use_implicit=True): if use_implicit == True: t_ALS_CG = ctf.timer_epoch("als_CG_implicit") if ctf.comm().rank() == 0: print( "--------------------------------ALS with implicit CG------------------------" ) else: t_ALS_CG = ctf.timer_epoch("als_CG_explicit") if ctf.comm().rank() == 0: print( "--------------------------------ALS with explicit CG------------------------" ) if T.sp == True: nnz_tot = T.nnz_tot else: nnz_tot = ctf.sum(omega) t_ALS_CG.begin() it = 0 if block_size <= 0: block_size = max(I, J, K) t_init_error_norm = ctf.timer("ALS_init_error_tensor_norm") t_init_error_norm.start() t0 = time.time() E = ctf.tensor((I, J, K), sp=T.sp) #E.i("ijk") << T.i("ijk") - omega.i("ijk")*U.i("iu")*V.i("ju")*W.i("ku") E.i("ijk") << T.i("ijk") - ctf.TTTP(omega, [U, V, W]).i("ijk") t1 = time.time() curr_err_norm = ctf.vecnorm(E) + (ctf.vecnorm(U) + ctf.vecnorm(V) + ctf.vecnorm(W)) * regParam t2 = time.time() t_init_error_norm.stop() if ctf.comm().rank() == 0 and status_prints == True: print('ctf.TTTP() takes {}'.format(t1 - t0)) print('ctf.vecnorm {}'.format(t2 - t1)) t_before_loop = time.time() t_obj_calc = 0. ctf.random.seed(42) while True: t_upd_cg = ctf.timer("ALS_upd_cg") t_upd_cg.start() U = updateFactor(T, U, V, W, regParam, omega, I, J, K, r, block_size, "U", use_implicit) V = updateFactor(T, U, V, W, regParam, omega, I, J, K, r, block_size, "V", use_implicit) W = updateFactor(T, U, V, W, regParam, omega, I, J, K, r, block_size, "W", use_implicit) duration = time.time() - t_before_loop - t_obj_calc t_b_obj = time.time() E.set_zero() #E.i("ijk") << T.i("ijk") - omega.i("ijk")*U.i("iu")*V.i("ju")*W.i("ku") E.i("ijk") << T.i("ijk") - ctf.TTTP(omega, [U, V, W]).i("ijk") diff_norm = ctf.vecnorm(E) RMSE = diff_norm / (nnz_tot**.5) next_err_norm = diff_norm + (ctf.vecnorm(U) + ctf.vecnorm(V) + ctf.vecnorm(W)) * regParam t_obj_calc += time.time() - t_b_obj t_upd_cg.stop() it += 1 if ctf.comm().rank() == 0: #print("Last residual:",curr_err_norm,"New residual",next_err_norm) print('Objective after', duration, 'seconds (', it, 'iterations) is: {}'.format(next_err_norm)) print('RMSE after', duration, 'seconds (', it, 'iterations) is: {}'.format(RMSE)) if abs(curr_err_norm - next_err_norm ) < err_thresh or it >= num_iter or duration > time_limit: break curr_err_norm = next_err_norm t_ALS_CG.end() duration = time.time() - t_before_loop - t_obj_calc if glob_comm.rank() == 0: print('ALS (implicit =', use_implicit, ') time per sweep: {}'.format(duration / it))
def updateFactor(T, U, V, W, regParam, omega, I, J, K, r, block_size, string, use_implicit): t_RHS = ctf.timer("ALS_imp_cg_RHS") t_cg_TTTP = ctf.timer("ALS_imp_cg_TTTP") t_o_slice = ctf.timer("ALS_imp_omega_slice") t_form_EQs = ctf.timer("ALS_exp_form_EQs") t_form_RHS = ctf.timer("ALS_exp_form_RHS") if (string == "U"): num_blocks = int((I + block_size - 1) / block_size) for n in range(num_blocks): I_start = n * block_size I_end = min(I, I_start + block_size) bsize = I_end - I_start t_o_slice.start() nomega = omega[I_start:I_end, :, :] t_o_slice.stop() x0 = ctf.random.random((bsize, r)) b = ctf.tensor((bsize, r)) t_RHS.start() b.i("ir") << V.i("Jr") * W.i("Kr") * T[I_start:I_end, :, :].i( "iJK") # RHS; ATb t_RHS.stop() if use_implicit: Ax0 = ctf.tensor((bsize, r)) t_cg_TTTP.start() Ax0.i("ir") << V.i("Jr") * W.i("Kr") * ctf.TTTP( nomega, [x0, V, W]).i("iJK") t_cg_TTTP.stop() Ax0 += regParam * x0 U[I_start:I_end, :] = CG(implicit_ATA(V, W, nomega, "U"), b, x0, r, regParam, bsize, True) else: A = ctf.tensor((bsize, r, r)) t_form_EQs.start() A.i("iuv") << V.i("Ju") * W.i("Ku") * nomega.i("iJK") * V.i( "Jv") * W.i("Kv") t_form_EQs.stop() U[I_start:I_end, :] = CG(A, b, x0, r, regParam, bsize) return U if (string == "V"): num_blocks = int((J + block_size - 1) / block_size) for n in range(num_blocks): J_start = n * block_size J_end = min(J, J_start + block_size) bsize = J_end - J_start t_o_slice.start() nomega = omega[:, J_start:J_end, :] t_o_slice.stop() x0 = ctf.random.random((bsize, r)) b = ctf.tensor((bsize, r)) t_RHS.start() b.i("jr") << U.i("Ir") * W.i("Kr") * T[:, J_start:J_end, :].i( "IjK") # RHS; ATb t_RHS.stop() if use_implicit: Ax0 = ctf.tensor((bsize, r)) t_cg_TTTP.start() Ax0.i("jr") << U.i("Ir") * W.i("Kr") * ctf.TTTP( nomega, [U, x0, W]).i("IjK") t_cg_TTTP.stop() Ax0 += regParam * x0 V[J_start:J_end, :] = CG(implicit_ATA(U, W, nomega, "V"), b, x0, r, regParam, bsize, True) else: A = ctf.tensor((bsize, r, r)) t_form_EQs.start() A.i("juv") << U.i("Iu") * W.i("Ku") * nomega.i("IjK") * U.i( "Iv") * W.i("Kv") t_form_EQs.stop() V[J_start:J_end, :] = CG(A, b, x0, r, regParam, bsize) return V if (string == "W"): num_blocks = int((K + block_size - 1) / block_size) for n in range(num_blocks): K_start = n * block_size K_end = min(K, K_start + block_size) bsize = K_end - K_start t_o_slice.start() nomega = omega[:, :, K_start:K_end] t_o_slice.stop() x0 = ctf.random.random((bsize, r)) b = ctf.tensor((bsize, r)) t_RHS.start() b.i("kr") << U.i("Ir") * V.i("Jr") * T[:, :, K_start:K_end].i( "IJk") # RHS; ATb t_RHS.stop() if use_implicit: Ax0 = ctf.tensor((bsize, r)) t_cg_TTTP.start() Ax0.i("kr") << U.i("Ir") * V.i("Jr") * ctf.TTTP( nomega, [U, V, x0]).i("IJk") t_cg_TTTP.stop() Ax0 += regParam * x0 W[K_start:K_end, :] = CG(implicit_ATA(U, V, nomega, "W"), b, x0, r, regParam, bsize, True) else: A = ctf.tensor((bsize, r, r)) t_form_EQs.start() A.i("kuv") << U.i( "Iu") * V.i("Ju") * nomega.i("IJk") * U.i("Iv") * V.i( "Jv") # LHS; ATA using matrix-vector multiplication t_form_EQs.stop() W[K_start:K_end, :] = CG(A, b, x0, r, regParam, bsize) return W
def run_CCD(T,U,V,W,omega,regParam,num_iter,time_limit,objective_frequency,use_MTTKRP=True): U_vec_list = [] V_vec_list = [] W_vec_list = [] r = U.shape[1] for f in range(r): U_vec_list.append(U[:,f]) V_vec_list.append(V[:,f]) W_vec_list.append(W[:,f]) # print(T) # T.write_to_file('tensor_out.txt') # assert(T.sp == 1) ite = 0 objectives = [] t_before_loop = time.time() t_obj_calc = 0. t_CCD = ctf.timer_epoch("ccd_CCD") t_CCD.begin() while True: t_iR_upd = ctf.timer("ccd_init_R_upd") t_iR_upd.start() t0 = time.time() R = ctf.copy(T) t1 = time.time() # R -= ctf.einsum('ijk, ir, jr, kr -> ijk', omega, U, V, W) R -= ctf.TTTP(omega, [U,V,W]) t2 = time.time() # R += ctf.einsum('ijk, i, j, k -> ijk', omega, U[:,0], V[:,0], W[:,0]) R += ctf.TTTP(omega, [U[:,0], V[:,0], W[:,0]]) t3 = time.time() t_iR_upd.stop() t_b_obj = time.time() if ite % objective_frequency == 0: duration = time.time() - t_before_loop - t_obj_calc [objective, RMSE] = get_objective(T,U,V,W,omega,regParam) objectives.append(objective) if glob_comm.rank() == 0: print('Objective after',duration,'seconds (',ite,'iterations) is: {}'.format(objective)) print('RMSE after',duration,'seconds (',ite,'iterations) is: {}'.format(RMSE)) t_obj_calc += time.time() - t_b_obj if glob_comm.rank() == 0 and status_prints == True: print('ctf.copy() takes {}'.format(t1-t0)) print('ctf.TTTP() takes {}'.format(t2-t1)) print('ctf.TTTP() takes {}'.format(t3-t2)) for f in range(r): # update U[:,f] if glob_comm.rank() == 0 and status_prints == True: print('updating U[:,{}]'.format(f)) t0 = time.time() if use_MTTKRP: alphas = ctf.tensor(R.shape[0]) #ctf.einsum('ijk -> i', ctf.TTTP(R, [None, V_vec_list[f], W_vec_list[f]]),out=alphas) ctf.MTTKRP(R, [alphas, V_vec_list[f], W_vec_list[f]], 0) else: alphas = ctf.einsum('ijk, j, k -> i', R, V_vec_list[f], W_vec_list[f]) t1 = time.time() if use_MTTKRP: betas = ctf.tensor(R.shape[0]) #ctf.einsum('ijk -> i', ctf.TTTP(omega, [None, V_vec_list[f]*V_vec_list[f], W_vec_list[f]*W_vec_list[f]]),out=betas) ctf.MTTKRP(omega, [betas, V_vec_list[f]*V_vec_list[f], W_vec_list[f]*W_vec_list[f]], 0) else: betas = ctf.einsum('ijk, j, j, k, k -> i', omega, V_vec_list[f], V_vec_list[f], W_vec_list[f], W_vec_list[f]) t2 = time.time() U_vec_list[f] = alphas / (regParam + betas) U[:,f] = U_vec_list[f] if glob_comm.rank() == 0 and status_prints == True: print('ctf.einsum() takes {}'.format(t1-t0)) print('ctf.einsum() takes {}'.format(t2-t1)) # update V[:,f] if glob_comm.rank() == 0 and status_prints == True: print('updating V[:,{}]'.format(f)) if use_MTTKRP: alphas = ctf.tensor(R.shape[1]) #ctf.einsum('ijk -> j', ctf.TTTP(R, [U_vec_list[f], None, W_vec_list[f]]),out=alphas) ctf.MTTKRP(R, [U_vec_list[f], alphas, W_vec_list[f]], 1) else: alphas = ctf.einsum('ijk, i, k -> j', R, U_vec_list[f], W_vec_list[f]) if use_MTTKRP: betas = ctf.tensor(R.shape[1]) #ctf.einsum('ijk -> j', ctf.TTTP(omega, [U_vec_list[f]*U_vec_list[f], None, W_vec_list[f]*W_vec_list[f]]),out=betas) ctf.MTTKRP(omega, [U_vec_list[f]*U_vec_list[f], betas, W_vec_list[f]*W_vec_list[f]], 1) else: betas = ctf.einsum('ijk, i, i, k, k -> j', omega, U_vec_list[f], U_vec_list[f], W_vec_list[f], W_vec_list[f]) V_vec_list[f] = alphas / (regParam + betas) V[:,f] = V_vec_list[f] if glob_comm.rank() == 0 and status_prints == True: print('updating W[:,{}]'.format(f)) if use_MTTKRP: alphas = ctf.tensor(R.shape[2]) #ctf.einsum('ijk -> k', ctf.TTTP(R, [U_vec_list[f], V_vec_list[f], None]),out=alphas) ctf.MTTKRP(R, [U_vec_list[f], V_vec_list[f], alphas], 2) else: alphas = ctf.einsum('ijk, i, j -> k', R, U_vec_list[f], V_vec_list[f]) if use_MTTKRP: betas = ctf.tensor(R.shape[2]) #ctf.einsum('ijk -> k', ctf.TTTP(omega, [U_vec_list[f]*U_vec_list[f], V_vec_list[f]*V_vec_list[f], None]),out=betas) ctf.MTTKRP(omega, [U_vec_list[f]*U_vec_list[f], V_vec_list[f]*V_vec_list[f], betas], 2) else: betas = ctf.einsum('ijk, i, i, j, j -> k', omega, U_vec_list[f], U_vec_list[f], V_vec_list[f], V_vec_list[f]) W_vec_list[f] = alphas / (regParam + betas) W[:,f] = W_vec_list[f] t_tttp = ctf.timer("ccd_TTTP") t_tttp.start() R -= ctf.TTTP(omega, [U_vec_list[f], V_vec_list[f], W_vec_list[f]]) if f+1 < r: R += ctf.TTTP(omega, [U_vec_list[f+1], V_vec_list[f+1], W_vec_list[f+1]]) t_tttp.stop() t_iR_upd.stop() ite += 1 if ite == num_iter or time.time() - t_before_loop - t_obj_calc > time_limit: break t_CCD.end() duration = time.time() - t_before_loop - t_obj_calc [objective, RMSE] = get_objective(T,U,V,W,omega,regParam) if glob_comm.rank() == 0: print('CCD amortized seconds per sweep: {}'.format(duration/ite)) print('Time/CCD Iteration: {}'.format(duration/ite)) print('Objective after',duration,'seconds (',ite,'iterations) is: {}'.format(objective)) print('RMSE after',duration,'seconds (',ite,'iterations) is: {}'.format(RMSE))