Python timer_epochの例、ctf.timer_epoch Pythonの例

コード例 #1

0

ファイルを表示

def run_bench(num_iter, s, k):
    wrld = ctf.comm()
    M = ctf.random.random((s,s))
    X = ctf.random.random((k,s))
    [U,S,VT] = ctf.svd(M)
    S = np.arange(0,s)+1
    M = ctf.dot(U*S,U.T())
    te = ctf.timer_epoch("BENCHMARK: SPD SOLVE")
    te.begin()
    times = []
    for i in range(num_iter):
        t0 = time.time()
        X = ctf.solve_spd(M,X)
        times.append(time.time()-t0)
    te.end()
    if ctf.comm().rank() == 0:
        print("ctf.solve_spd average time:",np.sum(times)/num_iter,"sec")
        print("ctf.solve_spd iteration timings:",times)
    te = ctf.timer_epoch("BENCHMARK: Manual Cholesky+TRSM SPD SOLVE")
    te.begin()
    times = []
    for i in range(num_iter):
        t0 = time.time()
        L = ctf.cholesky(M)
        X = ctf.solve_tri(M,X,from_left=False)
        times.append(time.time()-t0)
    te.end()
    if ctf.comm().rank() == 0:
        print("ctf.cholesky+solve_tri average time:",np.sum(times)/num_iter,"sec")
        print("ctf.cholesky+solve_tri iteration timings:",times)

コード例 #2

0

ファイルを表示

ファイル: ccd_new.py プロジェクト: navjo2323/Tensor_completion

def ccd(tenpy, T_in, T, O, X, reg_als, num_iter_als, tol, csv_file):
    opt = ccd_Completer(tenpy, T_in, O, X)
    #if T_in.sp == True:
    #    nnz_tot = T_in.nnz_tot
    #else:
    #    nnz_tot = ctf.sum(omega)
    if tenpy.name() == 'ctf':
        nnz_tot = T_in.nnz_tot
    else:
        nnz_tot = np.sum(O)
    t_ccd = ctf.timer_epoch("ccd")

    regu = reg_als
    tenpy.printf("--------------------------------ccd-----------------------")
    start = time.time()
    # T_in = backend.einsum('ijk,ijk->ijk',T,O)
    it = 0
    time_all = 0

    if csv_file is not None:
        csv_writer = csv.writer(csv_file,
                                delimiter=',',
                                quotechar='|',
                                quoting=csv.QUOTE_MINIMAL)

    for i in range(num_iter_als):
        it += 1
        s = time.time()
        t_ccd.begin()
        X = opt.step(regu)
        t_ccd.end()
        e = time.time()
        time_all += e - s
        M = tenpy.TTTP(O, X)
        ctf.Sparse_add(M, T_in, beta=-1)
        rmse = tenpy.vecnorm(M) / (nnz_tot)**0.5
        M.set_zero()
        if tenpy.is_master_proc():
            tenpy.printf("After " + str(it) + " iterations,")
            tenpy.printf("RMSE is", rmse)
            #print("Full Tensor Objective",(tenpy.norm(tenpy.einsum('ir,jr,kr->ijk',X[0],X[1],X[2])-T)))
            if csv_file is not None:
                csv_writer.writerow([i, time_all, rmse, i, 'CCD'])
                csv_file.flush()
            if rmse < tol:
                tenpy.printf("Ending algo due to tolerance")
                break

    end = time.time()

    tenpy.printf('ccd time taken is ', end - start)

    return X

コード例 #3

0

ファイルを表示

ファイル: run_als.py プロジェクト: LinjianMa/tensor_decompositions

def run_als(args):
    # Set up CSV logging
    csv_path = join(results_dir, arg_defs.get_file_prefix(args) + '.csv')
    is_new_log = not Path(csv_path).exists()
    csv_file = open(csv_path, 'a')
    csv_writer = csv.writer(csv_file,
                            delimiter=',',
                            quotechar='|',
                            quoting=csv.QUOTE_MINIMAL)

    profiler.do_profile(args.profile)

    if args.backend == "numpy":
        import backend.numpy_ext as tenpy
    elif args.backend == "ctf":
        import backend.ctf_ext as tenpy
        import ctf
        tepoch = ctf.timer_epoch("ALS")
        tepoch.begin()

    if tenpy.is_master_proc():
        for arg in vars(args):
            print(arg + ':', getattr(args, arg))
        if is_new_log:
            csv_writer.writerow([
                'iterations', 'time', 'residual', 'fitness', 'flag_dt',
                'fitness_diff'
            ])

    tenpy.seed(args.seed)
    if args.decomposition == "CP":
        return run_als_cpd(args, tenpy, csv_file)
    elif args.decomposition == "Tucker":
        return run_als_tucker(args, tenpy, csv_file)
    elif args.decomposition == "Tucker_simulate":
        return run_als_tucker_simulate(args, tenpy, csv_file)
    elif args.decomposition == "CP_simulate":
        return run_als_cp_simulate(args, tenpy, csv_file)

コード例 #4

0

ファイルを表示

ファイル: run_als3.py プロジェクト: LinjianMa/tensor_decompositions

def run_als(args):
    ret_list = []

    if args.backend == 'numpy':
        import backend.numpy_ext as tenpy
    elif args.backend == 'ctf':
        import backend.ctf_ext as tenpy

    flag_dt = True

    R = args.R
    s = args.s
    res_calc_freq = args.res_calc_freq

    csv_path = join(results_dir, get_file_prefix(args) + '.csv')
    is_new_log = not Path(csv_path).exists()
    csv_file = open(csv_path, 'a')
    csv_writer = csv.writer(csv_file,
                            delimiter=',',
                            quotechar='|',
                            quoting=csv.QUOTE_MINIMAL)

    if tenpy.is_master_proc():
        for arg in vars(args):
            print(arg + ':', getattr(args, arg))
        if is_new_log:
            csv_writer.writerow([
                'iterations', 'time', 'residual', 'fitness', 'flag_dt',
                'fitness_diff'
            ])

    tenpy.seed(args.seed)
    if args.tensor == 'random':
        X = tenpy.random((R, s))
        Y = tenpy.random((R, s))
        Z = tenpy.random((R, s))
        T = khatri_rao_product_chain(tenpy, [X, Y, Z])
    elif args.tensor == 'random_col':
        T = synthetic_tensors.init_const_collinearity_tensor(tenpy,
                                                             s,
                                                             3,
                                                             R,
                                                             col=args.col,
                                                             seed=args.seed)
    elif args.tensor == 'scf':
        filename = f'saved-tensors/scf_{args.num_molecule}_mol.npy'
        if not os.path.exists(filename):
            T = real_tensors.get_scf_tensor(args.num_molecule)
            with open(filename, 'wb') as f:
                np.save(f, T)
                print(f"file {filename} saved.")
                assert 0
        with open(filename, 'rb') as f:
            T = np.load(f)
            if tenpy.name() == 'ctf':
                T = tenpy.from_nparray(T)
    elif args.tensor == "graph":
        T = real_tensors.graph_state_5_party(tenpy)

    tenpy.printf(f"The shape of the input tensor is: {T.shape}")

    X = tenpy.random((R, T.shape[0])) + 1j * tenpy.random((R, T.shape[0]))
    Y = tenpy.random((R, T.shape[1])) + 1j * tenpy.random((R, T.shape[1]))
    Z = tenpy.random((R, T.shape[2])) + 1j * tenpy.random((R, T.shape[2]))

    optimizer_list = {
        'DT-quad': quad_als_optimizer(tenpy, T, X, Y),
        'PP-quad': quad_pp_optimizer(tenpy, T, X, Y, args),
        'DT': als_optimizer(tenpy, T, X, Y, Z, args),
        'PP': als_pp_optimizer(tenpy, T, X, Y, Z, args),
    }
    optimizer = optimizer_list[args.method]

    normT = tenpy.vecnorm(T)
    time_all = 0.
    fitness_old = 0.

    if args.backend == 'ctf':
        import backend.ctf_ext as tenpy
        import ctf
        tepoch = ctf.timer_epoch("ALS")
        tepoch.begin()

    for i in range(args.num_iter):
        if args.method == 'PP-quad' or args.method == 'DT-quad':
            t0 = time.time()
            if args.method == 'PP-quad':
                X, Y, pp_restart = optimizer.step()
                flag_dt = not pp_restart
            else:
                X, Y = optimizer.step()
            t1 = time.time()
            tenpy.printf(f"[ {i} ] Sweep took {t1 - t0} seconds")
            time_all += t1 - t0
            if (i % res_calc_freq == 0 or i == args.num_iter - 1
                    or not flag_dt):
                res = get_residual(tenpy, optimizer.mttkrp_last_mode,
                                   [X, Y, Y], normT)
                fitness = 1 - res / normT
                fitness_diff = abs(fitness - fitness_old)
                fitness_old = fitness
                if tenpy.is_master_proc():
                    ret_list.append(
                        [i, time_all, res, fitness, flag_dt, fitness_diff])
                    print(
                        f"[ {i} ] Residual is {res}, fitness is: {fitness}, fitness diff is: {fitness_diff}"
                    )
                    if csv_file is not None:
                        csv_writer.writerow(
                            [i, time_all, res, fitness, flag_dt, fitness_diff])
                        csv_file.flush()
                # check the fitness difference
                if (i % res_calc_freq == 0):
                    if abs(fitness_diff) <= args.stopping_tol * res_calc_freq:
                        return ret_list, optimizer.num_iters_map, optimizer.time_map, optimizer.pp_init_iter
        elif args.method == 'PP' or args.method == 'DT':
            t0 = time.time()
            if args.method == 'PP':
                X, Y, Z, pp_restart = optimizer.step()
                flag_dt = not pp_restart
            else:
                X, Y, Z = optimizer.step()
            t1 = time.time()
            tenpy.printf(f"[ {i} ] Sweep took {t1 - t0} seconds")
            time_all += t1 - t0
            if (i % res_calc_freq == 0 or i == args.num_iter - 1
                    or not flag_dt):
                res = tenpy.norm(
                    T - tenpy.einsum("ka,kb,kc->abc", X, Y, Z)
                )  #get_residual(tenpy, optimizer.mttkrp_last_mode, [X, Y, Z], normT)
                fitness = 1 - res / normT
                fitness_diff = abs(fitness - fitness_old)
                fitness_old = fitness
                if tenpy.is_master_proc():
                    ret_list.append(
                        [i, time_all, res, fitness, flag_dt, fitness_diff])
                    print(
                        f"[ {i} ] Residual is {res}, fitness is: {fitness}, fitness diff is: {fitness_diff}, timeall is: {time_all}"
                    )
                    if csv_file is not None:
                        csv_writer.writerow(
                            [i, time_all, res, fitness, flag_dt, fitness_diff])
                        csv_file.flush()
                # check the fitness difference
                if (i % res_calc_freq == 0):
                    if abs(fitness_diff) <= args.stopping_tol * res_calc_freq:
                        print("timeall", time_all)
                        return ret_list, optimizer.num_iters_map, optimizer.time_map, optimizer.pp_init_iter

    print("timeall", time_all)
    if args.backend == "ctf":
        tepoch.end()
    return ret_list, optimizer.num_iters_map, optimizer.time_map, optimizer.pp_init_iter

コード例 #5

0

ファイルを表示

    ham = return_op(Nx, Ny, sym=None, backend=backend)
else:
    ham = return_op(Nx, Ny, sym='Z2', backend=backend)

# Create PEPS
peps = PEPS(Nx,
            Ny,
            d,
            D,
            chi,
            Zn=Zn,
            chi_norm=10,
            chi_op=10,
            backend=backend,
            normalize=False)

# Setup Profiling
profile_fname = 'calc_norm_stats_Nx{}_Ny{}_d{}_D{}_chi{}_Zn{}_{}'.format(
    Nx, Ny, d, D, chi, Zn, backend)
if backend == 'ctf':
    from ctf import timer_epoch
    te = timer_epoch('1')
    te.begin()
t0 = time.time()
# Evaluate Operator
cProfile.run('val = peps.calc_norm(chi=chi)', profile_fname)
tf = time.time()
# Print Results
if backend == 'ctf': te.end()
print(tf - t0)

コード例 #6

0

ファイルを表示

def getPCPGN(tenpy, T_in, T, O, X, reg_GN, num_iter_GN,tol,csv_file):
    opt = Poisson_CP_GN_Completer(tenpy, T_in, O, X)
    if tenpy.name() == 'ctf':
        nnz_tot = T_in.nnz_tot
    else:
        nnz_tot = np.sum(O)
    regu = reg_GN
    tenpy.printf("--------------------------------Poisson GN WIth  CG-----------------------------")
    t_ALS = ctf.timer_epoch("Poisson_GN")
    start= time.time()
    # T_in = backend.einsum('ijk,ijk->ijk',T,O)
    it = 0
    time_all = 0
    P = T_in.copy()

    ctf.Sparse_log(P)
    ctf.Sparse_mul(P,T_in)
    ctf.Sparse_add(P,T_in,beta=-1)
    val2 = ctf.sum(P)
    #val2 = ctf.sum(subtract_sparse(elementwise_prod(T_in,elementwise_log(T_in)),T_in))
    M = tenpy.TTTP(O,X)
        #val = ctf.sum(subtract_sparse(ctf.exp(M),elementwise_prod(T_in,M) ))

    P = M.copy()
    ctf.Sparse_mul(P,T_in)
    ctf.Sparse_exp(M)
    #rmse_lsq =  tenpy.vecnorm(T_in-M)/(nnz_tot)**0.5
    #tenpy.printf("least square RMSE is",rmse_lsq)

    ctf.Sparse_add(M,P,beta=-1)
    val = ctf.sum(M)
    P.set_zero()
    M.set_zero()
    rmse = (val+val2)/nnz_tot
    P.set_zero()
    if tenpy.is_master_proc():
            tenpy.printf("After " + str(it) + " iterations,")
            tenpy.printf("RMSE is",rmse)
    if csv_file is not None:
        csv_writer = csv.writer(
            csv_file, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
    
    for i in range(num_iter_GN):
        it+=1
        s = time.time()
        t_ALS.begin()
        X = opt.step(regu)
        t_ALS.end()
        e = time.time()
        time_all+= e- s
        #rmse = tenpy.vecnorm(tenpy.TTTP(O,[U,V,W])-T_in)/(nnz_tot)**0.5
        M = tenpy.TTTP(O,X)
        #val = ctf.sum(subtract_sparse(ctf.exp(M),elementwise_prod(T_in,M) ))

        P = M.copy()
        ctf.Sparse_mul(P,T_in)
        ctf.Sparse_exp(M)
        #rmse_lsq =  tenpy.vecnorm(T_in-M)/(nnz_tot)**0.5
        #tenpy.printf("least square RMSE is",rmse_lsq)

        ctf.Sparse_add(M,P,beta=-1)
        val = ctf.sum(M)
        P.set_zero()
        M.set_zero()
        rmse = (val+val2)/nnz_tot
        regu = regu/2
        if tenpy.is_master_proc():
            tenpy.printf("After " + str(it) + " iterations,")
            tenpy.printf("RMSE is",rmse)
            #print("Full Tensor Objective",(tenpy.norm(tenpy.einsum('ir,jr,kr->ijk',U,V,W)-T)))
            if csv_file is not None:
                csv_writer.writerow([i,time_all , rmse, i,'PGN'])
                csv_file.flush()
            if abs(rmse) < tol:
                tenpy.printf("Ending algo due to tolerance")
                break
    
    end= time.time()
    end= time.time()

    tenpy.printf('Poisson_GN time taken is ',end - start)
    
    return X

コード例 #7

0

ファイルを表示

    fact = args.varying_fact
    lower = args.lower
    upper = args.upper
    diag = args.diag
    Arm = args.arm
    c = args.c
    tau = args.tau
    arm_iters=args.arm_iters
    

    if tlib == "numpy":
        import backend.numpy_ext as tenpy
    elif tlib == "ctf":
        import backend.ctf_ext as tenpy
        import ctf
        tepoch = ctf.timer_epoch("ALS")
        tepoch.begin();

    if tenpy.is_master_proc():
        # print the arguments
        for arg in vars(args) :
            print( arg+':', getattr(args, arg))
        # initialize the csv file
        if is_new_log:
            csv_writer.writerow([
                'iterations', 'time', 'residual', 'fitness'
            ])

    tenpy.seed(args.seed)

    if args.load_tensor is not '':

コード例 #8

0

ファイルを表示

ファイル: Poisson_sgd.py プロジェクト: navjo2323/Tensor_completion

def sgd_poisson(tenpy, T_in, T, O, U, V, W, reg_als, I, J, K, R, num_iter_als,
                tol, csv_file):
    step_size = 0.03
    opt = Poisson_sgd_Completer(tenpy, T_in, O, [U, V, W], step_size)
    #if T_in.sp == True:
    #    nnz_tot = T_in.nnz_tot
    #else:
    #    nnz_tot = ctf.sum(omega)
    if tenpy.name() == 'ctf':
        nnz_tot = T_in.nnz_tot
    else:
        nnz_tot = np.sum(O)
    t_ALS = ctf.timer_epoch("poisson_sgd")

    regu = reg_als
    tenpy.printf(
        "--------------------------------Poisson_sgd-----------------------")
    start = time.time()
    # T_in = backend.einsum('ijk,ijk->ijk',T,O)
    it = 0
    time_all = 0

    #val2 = ctf.sum(subtract_sparse(elementwise_prod(T_in,elementwise_log(T_in)),T_in))
    P = T_in.copy()

    ctf.Sparse_log(P)
    ctf.Sparse_mul(P, T_in)
    ctf.Sparse_add(P, T_in, beta=-1)
    val2 = ctf.sum(P)
    P.set_zero()

    if csv_file is not None:
        csv_writer = csv.writer(csv_file,
                                delimiter=',',
                                quotechar='|',
                                quoting=csv.QUOTE_MINIMAL)

    for i in range(num_iter_als):
        it += 1
        s = time.time()
        #t_ALS.begin()
        [U, V, W] = opt.step(regu)
        #t_ALS.end()
        e = time.time()
        time_all += e - s
        #rmse = tenpy.vecnorm(tenpy.TTTP(O,[U,V,W])-T_in)/(nnz_tot)**0.5
        if it % 20 == 0:
            M = tenpy.TTTP(O, [U, V, W])
            #val = ctf.sum(subtract_sparse(ctf.exp(M),elementwise_prod(T_in,M) ))
            P = M.copy()
            ctf.Sparse_mul(P, T_in)
            ctf.Sparse_exp(M)

            ctf.Sparse_add(M, P, beta=-1)
            val = ctf.sum(M)
            P.set_zero()
            M.set_zero()
            rmse = (val + val2) / nnz_tot
            if tenpy.is_master_proc():
                tenpy.printf("After " + str(it) + " iterations, and time is",
                             time_all)
                tenpy.printf("RMSE is", rmse)
                #print("Full Tensor Objective",(tenpy.norm(tenpy.einsum('ir,jr,kr->ijk',U,V,W)-T)))
                if csv_file is not None:
                    csv_writer.writerow([i, time_all, rmse, i, 'PALS'])
                    csv_file.flush()
                if abs(rmse) < tol:
                    tenpy.printf("Ending algo due to tolerance")
                    break

    end = time.time()

    tenpy.printf('Poisson sgd time taken is ', end - start)

    return [U, V, W]

コード例 #9

0

ファイルを表示

def getALS_CG(T,
              U,
              V,
              W,
              regParam,
              omega,
              I,
              J,
              K,
              r,
              block_size,
              num_iter=100,
              err_thresh=.001,
              time_limit=600,
              use_implicit=True):

    if use_implicit == True:
        t_ALS_CG = ctf.timer_epoch("als_CG_implicit")
        if ctf.comm().rank() == 0:
            print(
                "--------------------------------ALS with implicit CG------------------------"
            )
    else:
        t_ALS_CG = ctf.timer_epoch("als_CG_explicit")
        if ctf.comm().rank() == 0:
            print(
                "--------------------------------ALS with explicit CG------------------------"
            )
    if T.sp == True:
        nnz_tot = T.nnz_tot
    else:
        nnz_tot = ctf.sum(omega)
    t_ALS_CG.begin()

    it = 0

    if block_size <= 0:
        block_size = max(I, J, K)

    t_init_error_norm = ctf.timer("ALS_init_error_tensor_norm")
    t_init_error_norm.start()
    t0 = time.time()
    E = ctf.tensor((I, J, K), sp=T.sp)
    #E.i("ijk") << T.i("ijk") - omega.i("ijk")*U.i("iu")*V.i("ju")*W.i("ku")
    E.i("ijk") << T.i("ijk") - ctf.TTTP(omega, [U, V, W]).i("ijk")
    t1 = time.time()
    curr_err_norm = ctf.vecnorm(E) + (ctf.vecnorm(U) + ctf.vecnorm(V) +
                                      ctf.vecnorm(W)) * regParam
    t2 = time.time()

    t_init_error_norm.stop()
    if ctf.comm().rank() == 0 and status_prints == True:
        print('ctf.TTTP() takes {}'.format(t1 - t0))
        print('ctf.vecnorm {}'.format(t2 - t1))

    t_before_loop = time.time()
    t_obj_calc = 0.
    ctf.random.seed(42)
    while True:

        t_upd_cg = ctf.timer("ALS_upd_cg")
        t_upd_cg.start()

        U = updateFactor(T, U, V, W, regParam, omega, I, J, K, r, block_size,
                         "U", use_implicit)
        V = updateFactor(T, U, V, W, regParam, omega, I, J, K, r, block_size,
                         "V", use_implicit)
        W = updateFactor(T, U, V, W, regParam, omega, I, J, K, r, block_size,
                         "W", use_implicit)

        duration = time.time() - t_before_loop - t_obj_calc
        t_b_obj = time.time()
        E.set_zero()
        #E.i("ijk") << T.i("ijk") - omega.i("ijk")*U.i("iu")*V.i("ju")*W.i("ku")
        E.i("ijk") << T.i("ijk") - ctf.TTTP(omega, [U, V, W]).i("ijk")
        diff_norm = ctf.vecnorm(E)
        RMSE = diff_norm / (nnz_tot**.5)
        next_err_norm = diff_norm + (ctf.vecnorm(U) + ctf.vecnorm(V) +
                                     ctf.vecnorm(W)) * regParam
        t_obj_calc += time.time() - t_b_obj

        t_upd_cg.stop()

        it += 1
        if ctf.comm().rank() == 0:
            #print("Last residual:",curr_err_norm,"New residual",next_err_norm)
            print('Objective after', duration, 'seconds (', it,
                  'iterations) is: {}'.format(next_err_norm))
            print('RMSE after', duration, 'seconds (', it,
                  'iterations) is: {}'.format(RMSE))

        if abs(curr_err_norm - next_err_norm
               ) < err_thresh or it >= num_iter or duration > time_limit:
            break

        curr_err_norm = next_err_norm

    t_ALS_CG.end()
    duration = time.time() - t_before_loop - t_obj_calc

    if glob_comm.rank() == 0:
        print('ALS (implicit =', use_implicit,
              ') time per sweep: {}'.format(duration / it))

コード例 #10

0

ファイルを表示

def run_CCD(T,U,V,W,omega,regParam,num_iter,time_limit,objective_frequency,use_MTTKRP=True):
    U_vec_list = []
    V_vec_list = []
    W_vec_list = []
    r = U.shape[1]
    for f in range(r):
        U_vec_list.append(U[:,f])
        V_vec_list.append(V[:,f])
        W_vec_list.append(W[:,f])


    # print(T)
    # T.write_to_file('tensor_out.txt')
    # assert(T.sp == 1)

    ite = 0
    objectives = []

    t_before_loop = time.time()
    t_obj_calc = 0.

    t_CCD = ctf.timer_epoch("ccd_CCD")
    t_CCD.begin()
    while True:

        t_iR_upd = ctf.timer("ccd_init_R_upd")
        t_iR_upd.start()
        t0 = time.time()
        R = ctf.copy(T)
        t1 = time.time()
        # R -= ctf.einsum('ijk, ir, jr, kr -> ijk', omega, U, V, W)
        R -= ctf.TTTP(omega, [U,V,W])
        t2 = time.time()
        # R += ctf.einsum('ijk, i, j, k -> ijk', omega, U[:,0], V[:,0], W[:,0])
        R += ctf.TTTP(omega, [U[:,0], V[:,0], W[:,0]])
        t3 = time.time()

        t_iR_upd.stop()

        t_b_obj = time.time()
        if ite % objective_frequency == 0:
            duration = time.time() - t_before_loop - t_obj_calc
            [objective, RMSE] = get_objective(T,U,V,W,omega,regParam)
            objectives.append(objective)
            if glob_comm.rank() == 0:
                print('Objective after',duration,'seconds (',ite,'iterations) is: {}'.format(objective))
                print('RMSE after',duration,'seconds (',ite,'iterations) is: {}'.format(RMSE))
        t_obj_calc += time.time() - t_b_obj

        if glob_comm.rank() == 0 and status_prints == True:
            print('ctf.copy() takes {}'.format(t1-t0))
            print('ctf.TTTP() takes {}'.format(t2-t1))
            print('ctf.TTTP() takes {}'.format(t3-t2))


        for f in range(r):

            # update U[:,f]
            if glob_comm.rank() == 0 and status_prints == True:
                print('updating U[:,{}]'.format(f))

            t0 = time.time()
            if use_MTTKRP:
                alphas = ctf.tensor(R.shape[0])
                #ctf.einsum('ijk -> i', ctf.TTTP(R, [None, V_vec_list[f], W_vec_list[f]]),out=alphas)
                ctf.MTTKRP(R, [alphas, V_vec_list[f], W_vec_list[f]], 0)
            else:
                alphas = ctf.einsum('ijk, j, k -> i', R, V_vec_list[f], W_vec_list[f])

            t1 = time.time()

            if use_MTTKRP:
                betas = ctf.tensor(R.shape[0])
                #ctf.einsum('ijk -> i', ctf.TTTP(omega, [None, V_vec_list[f]*V_vec_list[f], W_vec_list[f]*W_vec_list[f]]),out=betas)
                ctf.MTTKRP(omega, [betas, V_vec_list[f]*V_vec_list[f], W_vec_list[f]*W_vec_list[f]], 0)
            else:
                betas = ctf.einsum('ijk, j, j, k, k -> i', omega, V_vec_list[f], V_vec_list[f], W_vec_list[f], W_vec_list[f])

            t2 = time.time()

            U_vec_list[f] = alphas / (regParam + betas)
            U[:,f] = U_vec_list[f]

            if glob_comm.rank() == 0 and status_prints == True:
                print('ctf.einsum() takes {}'.format(t1-t0))
                print('ctf.einsum() takes {}'.format(t2-t1))


            # update V[:,f]
            if glob_comm.rank() == 0 and status_prints == True:
                print('updating V[:,{}]'.format(f))
            if use_MTTKRP:
                alphas = ctf.tensor(R.shape[1])
                #ctf.einsum('ijk -> j', ctf.TTTP(R, [U_vec_list[f], None, W_vec_list[f]]),out=alphas)
                ctf.MTTKRP(R, [U_vec_list[f], alphas, W_vec_list[f]], 1)
            else:
                alphas = ctf.einsum('ijk, i, k -> j', R, U_vec_list[f], W_vec_list[f])

            if use_MTTKRP:
                betas = ctf.tensor(R.shape[1])
                #ctf.einsum('ijk -> j', ctf.TTTP(omega, [U_vec_list[f]*U_vec_list[f], None, W_vec_list[f]*W_vec_list[f]]),out=betas)
                ctf.MTTKRP(omega, [U_vec_list[f]*U_vec_list[f], betas, W_vec_list[f]*W_vec_list[f]], 1)
            else:
                betas = ctf.einsum('ijk, i, i, k, k -> j', omega, U_vec_list[f], U_vec_list[f], W_vec_list[f], W_vec_list[f])

            V_vec_list[f] = alphas / (regParam + betas)
            V[:,f] = V_vec_list[f]


            if glob_comm.rank() == 0 and status_prints == True:
                print('updating W[:,{}]'.format(f))
            if use_MTTKRP:
                alphas = ctf.tensor(R.shape[2])
                #ctf.einsum('ijk -> k', ctf.TTTP(R, [U_vec_list[f], V_vec_list[f], None]),out=alphas)
                ctf.MTTKRP(R, [U_vec_list[f], V_vec_list[f], alphas], 2)
            else:
                alphas = ctf.einsum('ijk, i, j -> k', R, U_vec_list[f], V_vec_list[f])

            if use_MTTKRP:
                betas = ctf.tensor(R.shape[2])
                #ctf.einsum('ijk -> k', ctf.TTTP(omega, [U_vec_list[f]*U_vec_list[f], V_vec_list[f]*V_vec_list[f], None]),out=betas)
                ctf.MTTKRP(omega, [U_vec_list[f]*U_vec_list[f], V_vec_list[f]*V_vec_list[f], betas], 2)
            else:
                betas = ctf.einsum('ijk, i, i, j, j -> k', omega, U_vec_list[f], U_vec_list[f], V_vec_list[f], V_vec_list[f])

            W_vec_list[f] = alphas / (regParam + betas)
            W[:,f] = W_vec_list[f]



            t_tttp = ctf.timer("ccd_TTTP")
            t_tttp.start()
            R -= ctf.TTTP(omega, [U_vec_list[f], V_vec_list[f], W_vec_list[f]])

            if f+1 < r:
                R += ctf.TTTP(omega, [U_vec_list[f+1], V_vec_list[f+1], W_vec_list[f+1]])

            t_tttp.stop()
        t_iR_upd.stop()

        ite += 1

        if ite == num_iter or time.time() - t_before_loop - t_obj_calc > time_limit:
            break

    t_CCD.end()
    duration = time.time() - t_before_loop - t_obj_calc
    [objective, RMSE] = get_objective(T,U,V,W,omega,regParam)

    if glob_comm.rank() == 0:
        print('CCD amortized seconds per sweep: {}'.format(duration/ite))
        print('Time/CCD Iteration: {}'.format(duration/ite))
        print('Objective after',duration,'seconds (',ite,'iterations) is: {}'.format(objective))
        print('RMSE after',duration,'seconds (',ite,'iterations) is: {}'.format(RMSE))