def sparse_SGD(T, U, V, W, Lambda, omega, I, J, K, r, stepSize, sample_rate, num_iter, errThresh, time_limit, work_cycle, use_MTTKRP): times = [0 for i in range(7)] iteration_count = 0 total_count = 0 R = ctf.tensor((I, J, K), sp=T.sp) if T.sp == True: nnz_tot = T.nnz_tot else: nnz_tot = ctf.sum(omega) start_time = time.time() starting_time = time.time() dtime = 0 R.i("ijk") << T.i("ijk") - ctf.TTTP(omega, [U, V, W]).i("ijk") curr_err_norm = ctf.vecnorm(R) + (ctf.vecnorm(U) + ctf.vecnorm(V) + ctf.vecnorm(W)) * Lambda times[0] += time.time() - starting_time norm = [curr_err_norm] step = stepSize * 0.5 t_obj_calc = 0. while iteration_count < num_iter and time.time() - start_time - t_obj_calc < time_limit: iteration_count += 1 starting_time = time.time() sampled_T = T.copy() sampled_T.sample(sample_rate) times[1] += time.time() - starting_time sparse_update(sampled_T, [U, V, W], Lambda, [I, J, K], r, stepSize * 0.5 + step, sample_rate, times, use_MTTKRP) #step *= 0.99 sampled_T.set_zero() if iteration_count % work_cycle == 0: duration = time.time() - start_time - t_obj_calc t_b_obj = time.time() total_count += 1 R.set_zero() R.i("ijk") << T.i("ijk") - ctf.TTTP(omega, [U, V, W]).i("ijk") diff_norm = ctf.vecnorm(R) RMSE = diff_norm/(nnz_tot**.5) next_err_norm = diff_norm + (ctf.vecnorm(U) + ctf.vecnorm(V) + ctf.vecnorm(W)) * Lambda if glob_comm.rank() == 0: print('Objective after',duration,'seconds (',iteration_count,'iterations) is: {}'.format(next_err_norm)) print('RMSE after',duration,'seconds (',iteration_count,'iterations) is: {}'.format(RMSE)) t_obj_calc += time.time() - t_b_obj if abs(curr_err_norm - next_err_norm) < errThresh: break curr_err_norm = next_err_norm norm.append(curr_err_norm) duration = time.time() - start_time - t_obj_calc if ctf.comm().rank() == 0: print('SGD amortized seconds per sweep: {}'.format(duration/(iteration_count*sample_rate))) print("Time/SGD iteration: {}".format(duration/iteration_count)) return norm
def mul(self, idx, sk): if self.string == "U": return self.f1.i("J"+idx[1]) \ *self.f2.i("K"+idx[1]) \ *ctf.TTTP(self.omega, [sk, self.f1, self.f2]).i(idx[0]+"JK") if self.string == "V": return self.f1.i("I"+idx[1]) \ *self.f2.i("K"+idx[1]) \ *ctf.TTTP(self.omega, [self.f1, sk, self.f2]).i("I"+idx[0]+"K") if self.string == "W": return self.f1.i("I"+idx[1]) \ *self.f2.i("J"+idx[1]) \ *ctf.TTTP(self.omega, [self.f1, self.f2, sk]).i("IJ"+idx[0])
def sparse_update(T, factors, Lambda, sizes, rank, stepSize, sample_rate, times, use_MTTKRP): starting_time = time.time() t_go = ctf.timer("SGD_getOmega") t_go.start() omega = getOmega(T) t_go.stop() dimension = len(sizes) indexes = INDEX_STRING[:dimension] R = ctf.tensor(copy=T) #ctf.tensor(tuple(sizes), sp = True) times[2] += time.time() - starting_time for i in range(dimension): starting_time = time.time() R.i(indexes) << -1.* ctf.TTTP(omega, factors).i(indexes) times[3] += time.time() - starting_time starting_time = time.time() times[4] += time.time() - starting_time starting_time = time.time() times[5] += time.time() - starting_time starting_time = time.time() t_ctr = ctf.timer("SGD_main_contraction") t_ctr.start() if use_MTTKRP: new_fi = (1- stepSize * 2 * Lambda * sample_rate)*factors[i] ctf.MTTKRP(R, factors, i) stepSize*factors[i].i("ir") << new_fi.i("ir") else: tup_list = [factors[i].i(indexes[i] + "r") for i in range(dimension)] Hterm = reduce(lambda x, y: x * y, tup_list[:i] + tup_list[i + 1:]) (1- stepSize * 2 * Lambda * sample_rate)*factors[i].i(indexes[i] + "r") << stepSize * Hterm * R.i(indexes) t_ctr.stop() times[6] += time.time() - starting_time if i < dimension - 1: R = ctf.tensor(copy=T)
def sparse_update(T, factors, Lambda, sizes, rank, stepSize, sample_rate, times): starting_time = time.time() t_go = ctf.timer("SGD_getOmega") t_go.start() omega = getOmega(T) t_go.stop() dimension = len(sizes) indexes = INDEX_STRING[:dimension] R = ctf.tensor(copy=T) #ctf.tensor(tuple(sizes), sp = True) times[2] += time.time() - starting_time for i in range(dimension): tup_list = [factors[i].i(indexes[i] + "r") for i in range(dimension)] #R.i(indexes) << T.i(indexes) - omega.i(indexes) * reduce(lambda x, y: x * y, tup_list) starting_time = time.time() R.i(indexes) << -1. * ctf.TTTP(omega, factors).i(indexes) times[3] += time.time() - starting_time starting_time = time.time() #H = ctf.tensor(tuple((sizes[:i] + sizes[i + 1:] + [rank]))) times[4] += time.time() - starting_time starting_time = time.time() #H.i(indexes[:i] + indexes[i + 1:] + "r") << Hterm = reduce(lambda x, y: x * y, tup_list[:i] + tup_list[i + 1:]) times[5] += time.time() - starting_time starting_time = time.time() t_ctr = ctf.timer("SGD_main_contraction") t_ctr.start() (1 - stepSize * 2 * Lambda * sample_rate ) * factors[i].i(indexes[i] + "r") << stepSize * Hterm * R.i(indexes) t_ctr.stop() times[6] += time.time() - starting_time if i < dimension - 1: R = ctf.tensor(copy=T)
def create_lowr_tensor(I, J, K, r, sp_frac, use_sp_rep): U = ctf.random.random((I, r)) V = ctf.random.random((J, r)) W = ctf.random.random((K, r)) T = ctf.tensor((I, J, K), sp=use_sp_rep) T.fill_sp_random(1, 1, sp_frac) T = ctf.TTTP(T, [U, V, W]) return T
def test_TTTP_vec(self): A = numpy.random.random((4, 3, 5)) u = numpy.random.random((4, )) v = numpy.random.random((5, )) ans = numpy.einsum("ijk,i,k->ijk", A, u, v) cA = ctf.astensor(A) cu = ctf.astensor(u) cv = ctf.astensor(v) cans = ctf.TTTP(cA, [cu, None, cv]) self.assertTrue(allclose(ans, cans))
def getALS_CG(T, U, V, W, regParam, omega, I, J, K, r, block): it = 0 E = ctf.tensor((I, J, K), sp=True) #E.i("ijk") << T.i("ijk") - omega.i("ijk")*U.i("iu")*V.i("ju")*W.i("ku") E.i("ijk") << T.i("ijk") - ctf.TTTP(omega, [U, V, W]).i("ijk") assert (E.sp == 1) curr_err_norm = ctf.vecnorm(E) + (ctf.vecnorm(U) + ctf.vecnorm(V) + ctf.vecnorm(W)) * regParam t = time.time() while True: U = updateFactor_CG(T, U, V, W, regParam, omega, I, J, K, r, block, "U") assert (U.sp == 1) V = updateFactor_CG(T, U, V, W, regParam, omega, I, J, K, r, block, "V") assert (V.sp == 1) W = updateFactor_CG(T, U, V, W, regParam, omega, I, J, K, r, block, "W") assert (W.sp == 1) E.set_zero() #E.i("ijk") << T.i("ijk") - omega.i("ijk")*U.i("iu")*V.i("ju")*W.i("ku") E.i("ijk") << T.i("ijk") - ctf.TTTP(omega, [U, V, W]).i("ijk") assert (E.sp == 1) next_err_norm = ctf.vecnorm(E) + (ctf.vecnorm(U) + ctf.vecnorm(V) + ctf.vecnorm(W)) * regParam if ctf.comm().rank() == 0: print(curr_err_norm, next_err_norm) it += 1 if abs(curr_err_norm - next_err_norm) < .001 or it > 100: break curr_err_norm = next_err_norm nt = np.round_(time.time() - t, 4) return it, nt
def test_sp_TTTP_mat(self): A = ctf.tensor((5, 1, 4, 2, 3), sp=True) A.fill_sp_random(0., 1., .2) u = ctf.random.random((5, 3)) v = ctf.random.random((1, 3)) w = ctf.random.random((4, 3)) x = ctf.random.random((2, 3)) y = ctf.random.random((3, 3)) ans = ctf.einsum("ijklm,ia,ja,ka,la,ma->ijklm", A, u, v, w, x, y) cans = ctf.TTTP(A, [u, v, w, x, y]) self.assertTrue(allclose(ans, cans))
def MTTKRP_TTTP(self, sk, out): if self.use_MTTKRP: if self.string == "U": ctf.MTTKRP(ctf.TTTP(self.omega, [sk, self.f1, self.f2]), [out, self.f1, self.f2], 0) elif self.string == "V": ctf.MTTKRP(ctf.TTTP(self.omega, [self.f1, sk, self.f2]), [self.f1, out, self.f2], 1) elif self.string == "W": ctf.MTTKRP(ctf.TTTP(self.omega, [self.f1, self.f2, sk]), [self.f1, self.f2, out], 2) else: print("Invalid string for implicit MTTKRP_TTTP") else: idx = "ir" if self.string == "U": out.i(idx) << self.f1.i("J"+idx[1]) \ *self.f2.i("K"+idx[1]) \ *ctf.TTTP(self.omega, [sk, self.f1, self.f2]).i(idx[0]+"JK") if self.string == "V": out.i(idx) << self.f1.i("I"+idx[1]) \ *self.f2.i("K"+idx[1]) \ *ctf.TTTP(self.omega, [self.f1, sk, self.f2]).i("I"+idx[0]+"K") if self.string == "W": out.i(idx) << self.f1.i("I"+idx[1]) \ *self.f2.i("J"+idx[1]) \ *ctf.TTTP(self.omega, [self.f1, self.f2, sk]).i("IJ"+idx[0])
def test_TTTP_mat(self): A = numpy.random.random((5, 1, 4, 2, 3)) u = numpy.random.random((5, 3)) v = numpy.random.random((1, 3)) w = numpy.random.random((4, 3)) x = numpy.random.random((2, 3)) y = numpy.random.random((3, 3)) ans = numpy.einsum("ijklm,ia,ja,ka,la,ma->ijklm", A, u, v, w, x, y) cA = ctf.astensor(A) cu = ctf.astensor(u) cv = ctf.astensor(v) cw = ctf.astensor(w) cx = ctf.astensor(x) cy = ctf.astensor(y) cans = ctf.TTTP(cA, [cu, cv, cw, cx, cy]) self.assertTrue(allclose(ans, cans))
def get_objective(T,U,V,W,omega,regParam): t_obj = ctf.timer("ccd_get_objective") t_obj.start() L = ctf.tensor(T.shape, sp=T.sp) t0 = time.time() L.i("ijk") << T.i("ijk") - ctf.TTTP(omega, [U,V,W]).i("ijk") t1 = time.time() normL = ctf.vecnorm(L) if T.sp == True: RMSE = normL/(T.nnz_tot**.5) else: nnz_tot = ctf.sum(omega) RMSE = normL/(nnz_tot**.5) objective = normL + (ctf.vecnorm(U) + ctf.vecnorm(V) + ctf.vecnorm(W)) * regParam t2 = time.time() if glob_comm.rank() == 0 and status_prints == True: print('generate L takes {}'.format(t1 - t0)) print('calc objective takes {}'.format(t2 - t1)) t_obj.stop() return [objective, RMSE]
def create_function_tensor(I, J, K, sp_frac, use_sp_rep): T = ctf.tensor((I, J, K), sp=use_sp_rep) T.fill_sp_random(1, 1, sp_frac) sizes = [I, J, K] index = ["i", "j", "k"] vs = [] for i in range(3): n = sizes[i] v = np.linspace(-1, 1, n) vs.append(ctf.astensor(v**2)) T = ctf.TTTP(T, vs) [inds, data] = T.read_local_nnz() data[:] **= .5 data[:] *= -1. T = ctf.tensor(T.shape, sp=use_sp_rep) T.write(inds, data) return T
def TTTP(T, A): """ Tensor Times Tensor Product """ return ctf.TTTP(T, A)
def updateFactor_CG(T, U, V, W, regParam, omega, I, J, K, r, block, string): if (string == "U"): #M1 = ctf.tensor((J,K,r)) #M1.i("jku") << V.i("ju")*W.i("ku") #num_nonzero, dense_omega = getDenseOmega(T,U,V,W,regParam,omega,I,J,K,r,"i") #Z = ctf.tensor((I,J*K,r)) #Z.i("itr") << dense_omega.i("jkti")*M1.i("jkr") #Tbar = ctf.tensor((I,num_nonzero)) #Tbar.i("it") << dense_omega.i("ijkt") *T.i("ijk") size = int(I / block) for n in range(block): nomega = omega[n * size:(n + 1) * size, :, :] #assert(nomega.sp == 1) x0 = ctf.random.random((size, r)) Ax0 = ctf.tensor((size, r), sp=True) #Ax0.i("ir") << M.i("jkr")*dense_omega.i("jkti")*dense_omega.i("jktI")*M.i("jkR")*x0.i("IR") #Ax0.i("ir") << V.i("Jr")*W.i("Kr")*nomega.i("iJK")*V.i("JR")*W.i("KR")*x0.i("iR") # LHS; ATA using matrix-vector multiplication Ax0.i("ir") << V.i("Jr") * W.i("Kr") * ctf.TTTP( nomega, [x0, V, W]).i("iJK") Ax0 += regParam * x0 assert (Ax0.sp == 1) b = ctf.tensor((size, r), sp=True) #b.i("ir") << M.i("JKr") * dense_omega.i("JKti") * dense_omega.i("JKtI") * T.i("IJK") b.i("ir") << V.i("Jr") * W.i("Kr") * T[n * size: (n + 1) * size, :, :].i( "iJK") # RHS; ATb assert (b.sp == 1) U[n * size:(n + 1) * size, :].set_zero() U[n * size:(n + 1) * size, :] = CG(Ax0, b, x0, V, W, r, regParam, nomega, size, "U") assert (U.sp == 1) return U if (string == "V"): #M2 = ctf.tensor((I,K,r)) #M2.i("iku") << U.i("iu")*W.i("ku") #num_nonzero, dense_omega = getDenseOmega(T,U,V,W,regParam,omega,I,J,K,r) #Z = ctf.tensor((J,num_nonzero,r)) #Z.i("jtr") << dense_omega.i("ijkt")*M2.i("ikr") #Tbar = ctf.tensor((J,num_nonzero)) #Tbar.i("jt") << dense_omega.i("ijkt") *T.i("ijk") size = int(J / block) for n in range(block): nomega = omega[:, n * size:(n + 1) * size, :] x0 = ctf.random.random((size, r)) Ax0 = ctf.tensor((size, r), sp=True) #Ax0.i("jr") << U.i("Ir")*W.i("Kr")*nomega.i("IjK")*U.i("IR")*W.i("KR")*x0.i("jR") # LHS; ATA using matrix-vector multiplication Ax0.i("jr") << U.i("Ir") * W.i("Kr") * ctf.TTTP( nomega, [U, x0, W]).i("IjK") Ax0 += regParam * x0 assert (Ax0.sp == 1) b = ctf.tensor((size, r), sp=True) b.i("jr") << U.i("Ir") * W.i( "Kr") * T[:, n * size:(n + 1) * size, :].i("IjK") # RHS; ATb assert (b.sp == 1) V[n * size:(n + 1) * size, :].set_zero() V[n * size:(n + 1) * size, :] = CG(Ax0, b, x0, U, W, r, regParam, nomega, size, "V") assert (V.sp == 1) return V if (string == "W"): #M3 = ctf.tensor((I,J,r)) #M3.i("iju") << U.i("iu")*V.i("ju") #num_nonzero, dense_omega = getDenseOmega(T,U,V,W,regParam,omega,I,J,K,r) #Z = ctf.tensor((K,num_nonzero,r)) #Z.i("ktr") << dense_omega.i("ijkt")*M3.i("ijr") #Tbar = ctf.tensor((K,num_nonzero)) #Tbar.i("kt") << dense_omega.i("ijkt") *T.i("ijk") size = int(K / block) for n in range(block): nomega = omega[:, :, n * size:(n + 1) * size] x0 = ctf.random.random((size, r)) Ax0 = ctf.tensor((size, r), sp=True) #Ax0.i("kr") << U.i("Ir")*V.i("Jr")*nomega.i("IJk")*U.i("IR")*V.i("JR")*x0.i("kR") # LHS; ATA using matrix-vector multiplication Ax0.i("kr") << U.i("Ir") * V.i("Jr") * ctf.TTTP( nomega, [U, V, x0]).i("IJk") Ax0 += regParam * x0 assert (Ax0.sp == 1) b = ctf.tensor((size, r), sp=True) b.i("kr") << U.i("Ir") * V.i("Jr") * T[:, :, n * size:(n + 1) * size].i("IJk") # RHS; ATb assert (b.sp == 1) W[n * size:(n + 1) * size, :].set_zero() W[n * size:(n + 1) * size, :] = CG(Ax0, b, x0, U, V, r, regParam, nomega, size, "W") assert (W.sp == 1) return W
def run_CCD(T,U,V,W,omega,regParam,num_iter,time_limit,objective_frequency,use_MTTKRP=True): U_vec_list = [] V_vec_list = [] W_vec_list = [] r = U.shape[1] for f in range(r): U_vec_list.append(U[:,f]) V_vec_list.append(V[:,f]) W_vec_list.append(W[:,f]) # print(T) # T.write_to_file('tensor_out.txt') # assert(T.sp == 1) ite = 0 objectives = [] t_before_loop = time.time() t_obj_calc = 0. t_CCD = ctf.timer_epoch("ccd_CCD") t_CCD.begin() while True: t_iR_upd = ctf.timer("ccd_init_R_upd") t_iR_upd.start() t0 = time.time() R = ctf.copy(T) t1 = time.time() # R -= ctf.einsum('ijk, ir, jr, kr -> ijk', omega, U, V, W) R -= ctf.TTTP(omega, [U,V,W]) t2 = time.time() # R += ctf.einsum('ijk, i, j, k -> ijk', omega, U[:,0], V[:,0], W[:,0]) R += ctf.TTTP(omega, [U[:,0], V[:,0], W[:,0]]) t3 = time.time() t_iR_upd.stop() t_b_obj = time.time() if ite % objective_frequency == 0: duration = time.time() - t_before_loop - t_obj_calc [objective, RMSE] = get_objective(T,U,V,W,omega,regParam) objectives.append(objective) if glob_comm.rank() == 0: print('Objective after',duration,'seconds (',ite,'iterations) is: {}'.format(objective)) print('RMSE after',duration,'seconds (',ite,'iterations) is: {}'.format(RMSE)) t_obj_calc += time.time() - t_b_obj if glob_comm.rank() == 0 and status_prints == True: print('ctf.copy() takes {}'.format(t1-t0)) print('ctf.TTTP() takes {}'.format(t2-t1)) print('ctf.TTTP() takes {}'.format(t3-t2)) for f in range(r): # update U[:,f] if glob_comm.rank() == 0 and status_prints == True: print('updating U[:,{}]'.format(f)) t0 = time.time() if use_MTTKRP: alphas = ctf.tensor(R.shape[0]) #ctf.einsum('ijk -> i', ctf.TTTP(R, [None, V_vec_list[f], W_vec_list[f]]),out=alphas) ctf.MTTKRP(R, [alphas, V_vec_list[f], W_vec_list[f]], 0) else: alphas = ctf.einsum('ijk, j, k -> i', R, V_vec_list[f], W_vec_list[f]) t1 = time.time() if use_MTTKRP: betas = ctf.tensor(R.shape[0]) #ctf.einsum('ijk -> i', ctf.TTTP(omega, [None, V_vec_list[f]*V_vec_list[f], W_vec_list[f]*W_vec_list[f]]),out=betas) ctf.MTTKRP(omega, [betas, V_vec_list[f]*V_vec_list[f], W_vec_list[f]*W_vec_list[f]], 0) else: betas = ctf.einsum('ijk, j, j, k, k -> i', omega, V_vec_list[f], V_vec_list[f], W_vec_list[f], W_vec_list[f]) t2 = time.time() U_vec_list[f] = alphas / (regParam + betas) U[:,f] = U_vec_list[f] if glob_comm.rank() == 0 and status_prints == True: print('ctf.einsum() takes {}'.format(t1-t0)) print('ctf.einsum() takes {}'.format(t2-t1)) # update V[:,f] if glob_comm.rank() == 0 and status_prints == True: print('updating V[:,{}]'.format(f)) if use_MTTKRP: alphas = ctf.tensor(R.shape[1]) #ctf.einsum('ijk -> j', ctf.TTTP(R, [U_vec_list[f], None, W_vec_list[f]]),out=alphas) ctf.MTTKRP(R, [U_vec_list[f], alphas, W_vec_list[f]], 1) else: alphas = ctf.einsum('ijk, i, k -> j', R, U_vec_list[f], W_vec_list[f]) if use_MTTKRP: betas = ctf.tensor(R.shape[1]) #ctf.einsum('ijk -> j', ctf.TTTP(omega, [U_vec_list[f]*U_vec_list[f], None, W_vec_list[f]*W_vec_list[f]]),out=betas) ctf.MTTKRP(omega, [U_vec_list[f]*U_vec_list[f], betas, W_vec_list[f]*W_vec_list[f]], 1) else: betas = ctf.einsum('ijk, i, i, k, k -> j', omega, U_vec_list[f], U_vec_list[f], W_vec_list[f], W_vec_list[f]) V_vec_list[f] = alphas / (regParam + betas) V[:,f] = V_vec_list[f] if glob_comm.rank() == 0 and status_prints == True: print('updating W[:,{}]'.format(f)) if use_MTTKRP: alphas = ctf.tensor(R.shape[2]) #ctf.einsum('ijk -> k', ctf.TTTP(R, [U_vec_list[f], V_vec_list[f], None]),out=alphas) ctf.MTTKRP(R, [U_vec_list[f], V_vec_list[f], alphas], 2) else: alphas = ctf.einsum('ijk, i, j -> k', R, U_vec_list[f], V_vec_list[f]) if use_MTTKRP: betas = ctf.tensor(R.shape[2]) #ctf.einsum('ijk -> k', ctf.TTTP(omega, [U_vec_list[f]*U_vec_list[f], V_vec_list[f]*V_vec_list[f], None]),out=betas) ctf.MTTKRP(omega, [U_vec_list[f]*U_vec_list[f], V_vec_list[f]*V_vec_list[f], betas], 2) else: betas = ctf.einsum('ijk, i, i, j, j -> k', omega, U_vec_list[f], U_vec_list[f], V_vec_list[f], V_vec_list[f]) W_vec_list[f] = alphas / (regParam + betas) W[:,f] = W_vec_list[f] t_tttp = ctf.timer("ccd_TTTP") t_tttp.start() R -= ctf.TTTP(omega, [U_vec_list[f], V_vec_list[f], W_vec_list[f]]) if f+1 < r: R += ctf.TTTP(omega, [U_vec_list[f+1], V_vec_list[f+1], W_vec_list[f+1]]) t_tttp.stop() t_iR_upd.stop() ite += 1 if ite == num_iter or time.time() - t_before_loop - t_obj_calc > time_limit: break t_CCD.end() duration = time.time() - t_before_loop - t_obj_calc [objective, RMSE] = get_objective(T,U,V,W,omega,regParam) if glob_comm.rank() == 0: print('CCD amortized seconds per sweep: {}'.format(duration/ite)) print('Time/CCD Iteration: {}'.format(duration/ite)) print('Objective after',duration,'seconds (',ite,'iterations) is: {}'.format(objective)) print('RMSE after',duration,'seconds (',ite,'iterations) is: {}'.format(RMSE))
def updateFactor(T, U, V, W, regParam, omega, I, J, K, r, block_size, string, use_implicit): t_RHS = ctf.timer("ALS_imp_cg_RHS") t_cg_TTTP = ctf.timer("ALS_imp_cg_TTTP") t_o_slice = ctf.timer("ALS_imp_omega_slice") t_form_EQs = ctf.timer("ALS_exp_form_EQs") t_form_RHS = ctf.timer("ALS_exp_form_RHS") if (string == "U"): num_blocks = int((I + block_size - 1) / block_size) for n in range(num_blocks): I_start = n * block_size I_end = min(I, I_start + block_size) bsize = I_end - I_start t_o_slice.start() nomega = omega[I_start:I_end, :, :] t_o_slice.stop() x0 = ctf.random.random((bsize, r)) b = ctf.tensor((bsize, r)) t_RHS.start() b.i("ir") << V.i("Jr") * W.i("Kr") * T[I_start:I_end, :, :].i( "iJK") # RHS; ATb t_RHS.stop() if use_implicit: Ax0 = ctf.tensor((bsize, r)) t_cg_TTTP.start() Ax0.i("ir") << V.i("Jr") * W.i("Kr") * ctf.TTTP( nomega, [x0, V, W]).i("iJK") t_cg_TTTP.stop() Ax0 += regParam * x0 U[I_start:I_end, :] = CG(implicit_ATA(V, W, nomega, "U"), b, x0, r, regParam, bsize, True) else: A = ctf.tensor((bsize, r, r)) t_form_EQs.start() A.i("iuv") << V.i("Ju") * W.i("Ku") * nomega.i("iJK") * V.i( "Jv") * W.i("Kv") t_form_EQs.stop() U[I_start:I_end, :] = CG(A, b, x0, r, regParam, bsize) return U if (string == "V"): num_blocks = int((J + block_size - 1) / block_size) for n in range(num_blocks): J_start = n * block_size J_end = min(J, J_start + block_size) bsize = J_end - J_start t_o_slice.start() nomega = omega[:, J_start:J_end, :] t_o_slice.stop() x0 = ctf.random.random((bsize, r)) b = ctf.tensor((bsize, r)) t_RHS.start() b.i("jr") << U.i("Ir") * W.i("Kr") * T[:, J_start:J_end, :].i( "IjK") # RHS; ATb t_RHS.stop() if use_implicit: Ax0 = ctf.tensor((bsize, r)) t_cg_TTTP.start() Ax0.i("jr") << U.i("Ir") * W.i("Kr") * ctf.TTTP( nomega, [U, x0, W]).i("IjK") t_cg_TTTP.stop() Ax0 += regParam * x0 V[J_start:J_end, :] = CG(implicit_ATA(U, W, nomega, "V"), b, x0, r, regParam, bsize, True) else: A = ctf.tensor((bsize, r, r)) t_form_EQs.start() A.i("juv") << U.i("Iu") * W.i("Ku") * nomega.i("IjK") * U.i( "Iv") * W.i("Kv") t_form_EQs.stop() V[J_start:J_end, :] = CG(A, b, x0, r, regParam, bsize) return V if (string == "W"): num_blocks = int((K + block_size - 1) / block_size) for n in range(num_blocks): K_start = n * block_size K_end = min(K, K_start + block_size) bsize = K_end - K_start t_o_slice.start() nomega = omega[:, :, K_start:K_end] t_o_slice.stop() x0 = ctf.random.random((bsize, r)) b = ctf.tensor((bsize, r)) t_RHS.start() b.i("kr") << U.i("Ir") * V.i("Jr") * T[:, :, K_start:K_end].i( "IJk") # RHS; ATb t_RHS.stop() if use_implicit: Ax0 = ctf.tensor((bsize, r)) t_cg_TTTP.start() Ax0.i("kr") << U.i("Ir") * V.i("Jr") * ctf.TTTP( nomega, [U, V, x0]).i("IJk") t_cg_TTTP.stop() Ax0 += regParam * x0 W[K_start:K_end, :] = CG(implicit_ATA(U, V, nomega, "W"), b, x0, r, regParam, bsize, True) else: A = ctf.tensor((bsize, r, r)) t_form_EQs.start() A.i("kuv") << U.i( "Iu") * V.i("Ju") * nomega.i("IJk") * U.i("Iv") * V.i( "Jv") # LHS; ATA using matrix-vector multiplication t_form_EQs.stop() W[K_start:K_end, :] = CG(A, b, x0, r, regParam, bsize) return W
def TTTP(T, A): return ctf.TTTP(T, A)
def getALS_CG(T, U, V, W, regParam, omega, I, J, K, r, block_size, num_iter=100, err_thresh=.001, time_limit=600, use_implicit=True): if use_implicit == True: t_ALS_CG = ctf.timer_epoch("als_CG_implicit") if ctf.comm().rank() == 0: print( "--------------------------------ALS with implicit CG------------------------" ) else: t_ALS_CG = ctf.timer_epoch("als_CG_explicit") if ctf.comm().rank() == 0: print( "--------------------------------ALS with explicit CG------------------------" ) if T.sp == True: nnz_tot = T.nnz_tot else: nnz_tot = ctf.sum(omega) t_ALS_CG.begin() it = 0 if block_size <= 0: block_size = max(I, J, K) t_init_error_norm = ctf.timer("ALS_init_error_tensor_norm") t_init_error_norm.start() t0 = time.time() E = ctf.tensor((I, J, K), sp=T.sp) #E.i("ijk") << T.i("ijk") - omega.i("ijk")*U.i("iu")*V.i("ju")*W.i("ku") E.i("ijk") << T.i("ijk") - ctf.TTTP(omega, [U, V, W]).i("ijk") t1 = time.time() curr_err_norm = ctf.vecnorm(E) + (ctf.vecnorm(U) + ctf.vecnorm(V) + ctf.vecnorm(W)) * regParam t2 = time.time() t_init_error_norm.stop() if ctf.comm().rank() == 0 and status_prints == True: print('ctf.TTTP() takes {}'.format(t1 - t0)) print('ctf.vecnorm {}'.format(t2 - t1)) t_before_loop = time.time() t_obj_calc = 0. ctf.random.seed(42) while True: t_upd_cg = ctf.timer("ALS_upd_cg") t_upd_cg.start() U = updateFactor(T, U, V, W, regParam, omega, I, J, K, r, block_size, "U", use_implicit) V = updateFactor(T, U, V, W, regParam, omega, I, J, K, r, block_size, "V", use_implicit) W = updateFactor(T, U, V, W, regParam, omega, I, J, K, r, block_size, "W", use_implicit) duration = time.time() - t_before_loop - t_obj_calc t_b_obj = time.time() E.set_zero() #E.i("ijk") << T.i("ijk") - omega.i("ijk")*U.i("iu")*V.i("ju")*W.i("ku") E.i("ijk") << T.i("ijk") - ctf.TTTP(omega, [U, V, W]).i("ijk") diff_norm = ctf.vecnorm(E) RMSE = diff_norm / (nnz_tot**.5) next_err_norm = diff_norm + (ctf.vecnorm(U) + ctf.vecnorm(V) + ctf.vecnorm(W)) * regParam t_obj_calc += time.time() - t_b_obj t_upd_cg.stop() it += 1 if ctf.comm().rank() == 0: #print("Last residual:",curr_err_norm,"New residual",next_err_norm) print('Objective after', duration, 'seconds (', it, 'iterations) is: {}'.format(next_err_norm)) print('RMSE after', duration, 'seconds (', it, 'iterations) is: {}'.format(RMSE)) if abs(curr_err_norm - next_err_norm ) < err_thresh or it >= num_iter or duration > time_limit: break curr_err_norm = next_err_norm t_ALS_CG.end() duration = time.time() - t_before_loop - t_obj_calc if glob_comm.rank() == 0: print('ALS (implicit =', use_implicit, ') time per sweep: {}'.format(duration / it))
def CG(Ax0, b, x0, f1, f2, r, regParam, omega, I, string): rk = b - Ax0 sk = rk xk = x0 for i in range(sk.shape[-1]): # how many iterations? Ask = ctf.tensor((I, r), sp=True) if (string == "U"): #Ask.i("ir") << f1.i("Jr")*f2.i("Kr")*omega.i("iJK")*f1.i("JR")*f2.i("KR")*sk.i("iR") Ask.i("ir") << f1.i("Jr") * f2.i("Kr") * ctf.TTTP( omega, [sk, f1, f2]).i("iJK") if (string == "V"): #Ask.i("jr") << f1.i("Ir")*f2.i("Kr")*omega.i("IjK")*f1.i("IR")*f2.i("KR")*sk.i("jR") Ask.i("jr") << f1.i("Ir") * f2.i("Kr") * ctf.TTTP( omega, [f1, sk, f2]).i("IjK") if (string == "W"): #Ask.i("kr") << f1.i("Ir")*f2.i("Jr")*omega.i("IJk")*f1.i("IR")*f2.i("JR")*sk.i("kR") Ask.i("kr") << f1.i("Ir") * f2.i("Jr") * ctf.TTTP( omega, [f1, f2, sk]).i("IJk") Ask += regParam * sk assert (Ask.sp == 1) rnorm = ctf.tensor(I, sp=True) rnorm.i("i") << rk.i("ir") * rk.i("ir") assert (rnorm.sp == 1) #print("rnorm",rnorm.to_nparray()) #for i in range(I): # if rnorm[i] < 1.e-16: # break skAsk = ctf.tensor(I, sp=True) skAsk.i("i") << sk.i("ir") * Ask.i("ir") assert (skAsk.sp == 1) #if (rnorm[i] < 1.e-30): # continue alpha = rnorm / (skAsk + 1.e-30) alphask = ctf.tensor((I, r), sp=True) alphask.i("ir") << alpha.i("i") * sk.i("ir") assert (alphask.sp == 1) xk1 = xk + alphask alphaask = ctf.tensor((I, r), sp=True) alphaask.i("ir") << alpha.i("i") * Ask.i("ir") assert (alphaask.sp == 1) rk1 = rk - alphaask rk1norm = ctf.tensor(I, sp=True) rk1norm.i("i") << rk1.i("ir") * rk1.i("ir") assert (rk1norm.sp == 1) #if (rk1norm[i] < 1.e-30): # continue beta = rk1norm / (rnorm + 1.e-30) betask = ctf.tensor((I, r), sp=True) betask.i("ir") << beta.i("i") * sk.i("ir") assert (betask.sp == 1) sk1 = rk1 + betask rk = rk1 xk = xk1 sk = sk1 #print("rk",ctf.vecnorm(rk)) return xk
def run_bench(num_iter, s_start, s_end, mult, R, sp, sp_init, use_tttp): wrld = ctf.comm() s = s_start nnz = float(s_start * s_start * s_start) * sp_init agg_s = [] agg_avg_times = [] agg_min_times = [] agg_max_times = [] agg_min_95 = [] agg_max_95 = [] if num_iter > 1: if ctf.comm().rank() == 0: print("Performing TTTP WARMUP with s =", s, "nnz =", nnz, "sp", sp, "sp_init is", sp_init, "use_tttp", use_tttp) T = ctf.tensor((s, s, s), sp=sp) T.fill_sp_random(-1., 1., float(nnz) / float(s * s * s)) U = ctf.random.random((s, R)) V = ctf.random.random((s, R)) W = ctf.random.random((s, R)) if use_tttp: S = ctf.TTTP(T, [U, V, W]) else: if sp: S = ctf.tensor((s, s, s), sp=sp) Z = ctf.tensor((s, s, s, R), sp=sp) Z.i("ijkr") << T.i("ijk") * U.i("ir") Z.i("ijkr") << Z.i("ijkr") * V.i("jr") S.i("ijk") << Z.i("ijkr") * W.i("kr") else: S = ctf.einsum("ijk,iR,jR,kR->ijk", T, U, V, W) if ctf.comm().rank() == 0: print("Completed TTTP WARMUP with s =", s, "nnz =", nnz, "sp", sp, "sp_init is", sp_init, "use_tttp", use_tttp) while s <= s_end: agg_s.append(s) if ctf.comm().rank() == 0: print("Performing TTTP with s =", s, "nnz =", nnz, "sp", sp, "sp_init is", sp_init, "use_tttp", use_tttp) T = ctf.tensor((s, s, s), sp=sp) T.fill_sp_random(-1., 1., float(nnz) / float(s * s * s)) te1 = 0. times = [] if R > 1: U = ctf.random.random((s, R)) V = ctf.random.random((s, R)) W = ctf.random.random((s, R)) for i in range(num_iter): t0 = time.time() if use_tttp: S = ctf.TTTP(T, [U, V, W]) else: if sp: S = ctf.tensor((s, s, s), sp=sp) Z = ctf.tensor((s, s, s, R), sp=sp) Z.i("ijkr") << T.i("ijk") * U.i("ir") Z.i("ijkr") << Z.i("ijkr") * V.i("jr") S.i("ijk") << Z.i("ijkr") * W.i("kr") #S.i("ijk") << T.i("ijk")*U.i("iR")*V.i("jR")*W.i("kR") else: S = ctf.einsum("ijk,iR,jR,kR->ijk", T, U, V, W) t1 = time.time() ite1 = t1 - t0 te1 += ite1 times.append(ite1) if ctf.comm().rank() == 0: print(ite1) else: U = ctf.random.random((s)) V = ctf.random.random((s)) W = ctf.random.random((s)) for i in range(num_iter): t0 = time.time() if use_tttp: S = ctf.TTTP(T, [U, V, W]) else: if sp: S = ctf.tensor((s, s, s), sp=sp) S.i("ijk") << T.i("ijk") * U.i("i") 0.0 * S.i("ijk") << S.i("ijk") * V.i("j") 0.0 * S.i("ijk") << S.i("ijk") * W.i("k") else: S = ctf.einsum("ijk,i,j,k->ijk", T, U, V, W) t1 = time.time() ite1 = t1 - t0 te1 += ite1 times.append(ite1) if ctf.comm().rank() == 0: print(ite1) if ctf.comm().rank() == 0: avg_time = (te1) / (num_iter) agg_avg_times.append(avg_time) print("TTTP", avg_time, "seconds on average with s =", s, "nnz =", nnz, "sp", sp, "sp_init is", sp_init, "use_tttp", use_tttp) min_time = np.min(times) max_time = np.max(times) agg_min_times.append(min_time) agg_max_times.append(max_time) print("min/max interval is [", min_time, ",", max_time, "]") stddev = np.std(times) min_95 = te1 / num_iter - 2 * stddev max_95 = te1 / num_iter + 2 * stddev agg_min_95.append(min_95) agg_max_95.append(max_95) print("95% confidence interval is [", min_95, ",", max_95, "]") s = int(s * mult) if ctf.comm().rank() == 0: print("s min_time min_95 avg_time max_95 max_time") for i in range(len(agg_s)): print(agg_s[i], agg_min_times[i], agg_min_95[i], agg_avg_times[i], agg_max_95[i], agg_max_times[i])