Beispiel #1
0
def run_bench(num_iter, s, k):
    wrld = ctf.comm()
    M = ctf.random.random((s,s))
    X = ctf.random.random((k,s))
    [U,S,VT] = ctf.svd(M)
    S = np.arange(0,s)+1
    M = ctf.dot(U*S,U.T())
    te = ctf.timer_epoch("BENCHMARK: SPD SOLVE")
    te.begin()
    times = []
    for i in range(num_iter):
        t0 = time.time()
        X = ctf.solve_spd(M,X)
        times.append(time.time()-t0)
    te.end()
    if ctf.comm().rank() == 0:
        print("ctf.solve_spd average time:",np.sum(times)/num_iter,"sec")
        print("ctf.solve_spd iteration timings:",times)
    te = ctf.timer_epoch("BENCHMARK: Manual Cholesky+TRSM SPD SOLVE")
    te.begin()
    times = []
    for i in range(num_iter):
        t0 = time.time()
        L = ctf.cholesky(M)
        X = ctf.solve_tri(M,X,from_left=False)
        times.append(time.time()-t0)
    te.end()
    if ctf.comm().rank() == 0:
        print("ctf.cholesky+solve_tri average time:",np.sum(times)/num_iter,"sec")
        print("ctf.cholesky+solve_tri iteration timings:",times)
def getALS_SVD(T, U, V, W, regParam, omega, I, J, K, r):

    it = 0
    E = ctf.tensor((I, J, K))
    E.i("ijk"
        ) << T.i("ijk") - omega.i("ijk") * U.i("iu") * V.i("ju") * W.i("ku")
    curr_err_norm = ctf.vecnorm(E) + (ctf.vecnorm(U) + ctf.vecnorm(V) +
                                      ctf.vecnorm(W)) * regParam

    while True:

        U = updateFactor_SVD(T, U, V, W, regParam, omega, I, J, K, r, "U")
        V = updateFactor_SVD(T, U, V, W, regParam, omega, I, J, K, r, "V")
        W = updateFactor_SVD(T, U, V, W, regParam, omega, I, J, K, r, "W")

        E.set_zero()
        E.i("ijk") << T.i(
            "ijk") - omega.i("ijk") * U.i("iu") * V.i("ju") * W.i("ku")
        next_err_norm = ctf.vecnorm(E) + (ctf.vecnorm(U) + ctf.vecnorm(V) +
                                          ctf.vecnorm(W)) * regParam

        if ctf.comm().rank() == 0:
            print(curr_err_norm, next_err_norm)

        if abs(curr_err_norm - next_err_norm) < .001 or it > 20:
            break
        curr_err_norm = next_err_norm
        it += 1

    if ctf.comm().rank() == 0:
        print("Number of iterations: ", it)
    return U, V, W
Beispiel #3
0
def run_tests():
    numpy.random.seed(5330);
    wrld = ctf.comm()
    if ctf.comm().rank() != 0:
        result = unittest.TextTestRunner(stream = open(os.devnull, 'w')).run(unittest.TestSuite(unittest.TestLoader().loadTestsFromTestCase(KnowValues)))
    else:
        print("Tests for basic numpy ndarray functionality")
        result = unittest.TextTestRunner().run(unittest.TestSuite(unittest.TestLoader().loadTestsFromTestCase(KnowValues)))
    return result
def getALS_CG(T, U, V, W, regParam, omega, I, J, K, r, block):

    t0 = time.time()
    it = 0
    E = ctf.tensor((I, J, K), sp=True)
    E.i("ijk"
        ) << T.i("ijk") - omega.i("ijk") * U.i("iu") * V.i("ju") * W.i("ku")
    if ctf.comm().rank() == 0:
        print("contraction to form the error tensor cost %f seconds" %
              (np.round_(time.time() - t0, 4)))
    assert (E.sp == 1)
    curr_err_norm = ctf.vecnorm(E) + (ctf.vecnorm(U) + ctf.vecnorm(V) +
                                      ctf.vecnorm(W)) * regParam

    while True:

        t = time.time()

        U = updateFactor_CG(T, U, V, W, regParam, omega, I, J, K, r, block,
                            "U")
        assert (U.sp == 1)
        V = updateFactor_CG(T, U, V, W, regParam, omega, I, J, K, r, block,
                            "V")
        assert (V.sp == 1)
        W = updateFactor_CG(T, U, V, W, regParam, omega, I, J, K, r, block,
                            "W")
        assert (W.sp == 1)
        if ctf.comm().rank() == 0:
            print(
                "CG update factor matrices in the following iteration cost %f seconds"
                % np.round_(time.time() - t, 4))

        E.set_zero()
        E.i("ijk") << T.i(
            "ijk") - omega.i("ijk") * U.i("iu") * V.i("ju") * W.i("ku")
        assert (E.sp == 1)
        next_err_norm = ctf.vecnorm(E) + (ctf.vecnorm(U) + ctf.vecnorm(V) +
                                          ctf.vecnorm(W)) * regParam

        if ctf.comm().rank() == 0:
            print(curr_err_norm, next_err_norm)
            it += 1

        if abs(curr_err_norm - next_err_norm) < .001 or it > 100:
            break

        curr_err_norm = next_err_norm

    nt = np.round_(time.time() - t0, 4)

    return it, nt
Beispiel #5
0
def run_tests():
    numpy.random.seed(5330)
    wrld = ctf.comm()
    print("I am rank")
    if ctf.comm().rank() != 0:
        result = unittest.TextTestRunner(stream=open(os.devnull, 'w')).run(
            unittest.TestSuite(
                unittest.TestLoader().loadTestsFromTestCase(KnowValues)))
    else:
        print("Tests for partition")
        result = unittest.TextTestRunner().run(
            unittest.TestSuite(
                unittest.TestLoader().loadTestsFromTestCase(KnowValues)))
    return result
Beispiel #6
0
def main():
    regParam = 0.00001

    file_name = sys.argv[1]
    I = int(sys.argv[2])
    J = int(sys.argv[3])
    K = int(sys.argv[4])
    stepSize = int(sys.argv[5])
    r = int(sys.argv[6])
    sample_rate = float(sys.argv[7])

    T = read_from_frostt(file_name, I, J, K)
    # T.read_from_file("T.txt")
    Omega = getOmega(T)

    s = T.sum()
    os = Omega.sum()
    if ctf.comm().rank() == 0:
        print(s, os)
        print(sys.argv)
    #T = function_tensor(I, J, K, sparsity)
    U = ctf.random.random((I, r))
    V = ctf.random.random((J, r))
    W = ctf.random.random((K, r))

    #T.write_to_file("T.txt")
    sparse_SGD(T, U, V, W, regParam, Omega, I, J, K, r, stepSize, sample_rate,
               100, 1.e-5, 1.E3, 30, 10)
Beispiel #7
0
def sparse_SGD(T, U, V, W, Lambda, omega, I, J, K, r, stepSize, sample_rate, num_iter, errThresh, time_limit, work_cycle, use_MTTKRP):
    times = [0 for i in range(7)]

    iteration_count = 0
    total_count = 0
    R = ctf.tensor((I, J, K), sp=T.sp)
    if T.sp == True:
        nnz_tot = T.nnz_tot
    else:
        nnz_tot = ctf.sum(omega)
    start_time = time.time()
    starting_time = time.time()
    dtime = 0
    R.i("ijk") << T.i("ijk") - ctf.TTTP(omega, [U, V, W]).i("ijk")
    curr_err_norm = ctf.vecnorm(R) + (ctf.vecnorm(U) + ctf.vecnorm(V) + ctf.vecnorm(W)) * Lambda
    times[0] += time.time() - starting_time
    norm = [curr_err_norm]
    step = stepSize * 0.5
    t_obj_calc = 0.

    while iteration_count < num_iter and time.time() - start_time - t_obj_calc < time_limit:
        iteration_count += 1
        starting_time = time.time()
        sampled_T = T.copy()
        sampled_T.sample(sample_rate)
        times[1] += time.time() - starting_time

        sparse_update(sampled_T, [U, V, W], Lambda, [I, J, K], r, stepSize * 0.5 + step, sample_rate, times, use_MTTKRP)
        #step *= 0.99
        sampled_T.set_zero()

        if iteration_count % work_cycle == 0:
            duration = time.time() - start_time - t_obj_calc
            t_b_obj = time.time()
            total_count += 1
            R.set_zero()
            R.i("ijk") << T.i("ijk") - ctf.TTTP(omega, [U, V, W]).i("ijk")
            diff_norm = ctf.vecnorm(R)
            RMSE = diff_norm/(nnz_tot**.5)
            next_err_norm = diff_norm + (ctf.vecnorm(U) + ctf.vecnorm(V) + ctf.vecnorm(W)) * Lambda
            if glob_comm.rank() == 0:
                print('Objective after',duration,'seconds (',iteration_count,'iterations) is: {}'.format(next_err_norm))
                print('RMSE after',duration,'seconds (',iteration_count,'iterations) is: {}'.format(RMSE))
            t_obj_calc += time.time() - t_b_obj

            if abs(curr_err_norm - next_err_norm) < errThresh:
                break

            curr_err_norm = next_err_norm
            norm.append(curr_err_norm)

    duration = time.time() - start_time - t_obj_calc
    if ctf.comm().rank() == 0:
        print('SGD amortized seconds per sweep: {}'.format(duration/(iteration_count*sample_rate)))
        print("Time/SGD iteration: {}".format(duration/iteration_count))
    return norm
Beispiel #8
0
def run_tests():
    np.random.seed(5330)
    wrld = ctf.comm()
    if wrld.rank() != 0:
        result = unittest.TextTestRunner(stream=open(os.devnull, 'w')).run(
            unittest.TestSuite(
                unittest.TestLoader().loadTestsFromTestCase(KnowValues)))
    else:
        print("Tests for linear algebra functionality")
        result = unittest.TextTestRunner().run(
            unittest.TestSuite(
                unittest.TestLoader().loadTestsFromTestCase(KnowValues)))
    return result
Beispiel #9
0
def getALS_CG(T, U, V, W, regParam, omega, I, J, K, r, block):

    it = 0
    E = ctf.tensor((I, J, K), sp=True)
    E.i("ijk"
        ) << T.i("ijk") - omega.i("ijk") * U.i("iu") * V.i("ju") * W.i("ku")
    NNZ = T.read_local_nnz()[1].shape[
        0]  # number of nonzero entries, i.e. sample size
    curr_err_norm = (
        ctf.vecnorm(E) +
        (ctf.vecnorm(U) + ctf.vecnorm(V) + ctf.vecnorm(W)) * regParam) / NNZ
    norm = [curr_err_norm]
    timeList = [0]
    t = time.time()

    while True:

        U = updateFactor_CG(T, U, V, W, regParam, omega, I, J, K, r, block,
                            "U")
        V = updateFactor_CG(T, U, V, W, regParam, omega, I, J, K, r, block,
                            "V")
        W = updateFactor_CG(T, U, V, W, regParam, omega, I, J, K, r, block,
                            "W")

        E.set_zero()
        E.i("ijk") << T.i(
            "ijk") - omega.i("ijk") * U.i("iu") * V.i("ju") * W.i("ku")
        next_err_norm = (ctf.vecnorm(E) +
                         (ctf.vecnorm(U) + ctf.vecnorm(V) + ctf.vecnorm(W)) *
                         regParam) / NNZ

        if ctf.comm().rank() == 0:
            print(curr_err_norm, next_err_norm)

        if abs(curr_err_norm - next_err_norm) < .0001 or it > 100:
            break

        curr_err_norm = next_err_norm
        norm.append(curr_err_norm)
        timeList.append(np.round_(time.time() - t, 4))
        it += 1

    return norm, it, timeList
def getALS_CG(T, U, V, W, regParam, omega, I, J, K, r, block):

    it = 0
    E = ctf.tensor((I, J, K), sp=True)
    #E.i("ijk") << T.i("ijk") - omega.i("ijk")*U.i("iu")*V.i("ju")*W.i("ku")
    E.i("ijk") << T.i("ijk") - ctf.TTTP(omega, [U, V, W]).i("ijk")
    assert (E.sp == 1)
    curr_err_norm = ctf.vecnorm(E) + (ctf.vecnorm(U) + ctf.vecnorm(V) +
                                      ctf.vecnorm(W)) * regParam
    t = time.time()

    while True:

        U = updateFactor_CG(T, U, V, W, regParam, omega, I, J, K, r, block,
                            "U")
        assert (U.sp == 1)
        V = updateFactor_CG(T, U, V, W, regParam, omega, I, J, K, r, block,
                            "V")
        assert (V.sp == 1)
        W = updateFactor_CG(T, U, V, W, regParam, omega, I, J, K, r, block,
                            "W")
        assert (W.sp == 1)

        E.set_zero()
        #E.i("ijk") << T.i("ijk") - omega.i("ijk")*U.i("iu")*V.i("ju")*W.i("ku")
        E.i("ijk") << T.i("ijk") - ctf.TTTP(omega, [U, V, W]).i("ijk")
        assert (E.sp == 1)
        next_err_norm = ctf.vecnorm(E) + (ctf.vecnorm(U) + ctf.vecnorm(V) +
                                          ctf.vecnorm(W)) * regParam

        if ctf.comm().rank() == 0:
            print(curr_err_norm, next_err_norm)
            it += 1

        if abs(curr_err_norm - next_err_norm) < .001 or it > 100:
            break

        curr_err_norm = next_err_norm

    nt = np.round_(time.time() - t, 4)

    return it, nt
Beispiel #11
0
 def nproc(self):
     return ctf.comm().np()
def is_master_proc():
    if ctf.comm().rank() == 0:
        return True
    else:
        return False
def printf(*string):
    if ctf.comm().rank() == 0:
        print(string)
def print_CG_time_summary():
    if ctf.comm().rank() == 0:
        print("total block CG contraction time %d" % blockCG_contraction_time)
Beispiel #15
0
import cProfile

# Get inputs
Nx = int(argv[1])
Ny = int(argv[2])
D = int(argv[3])
chi = int(argv[4])
Zn = int(argv[5])
if Zn == 0:
    Zn = None
backend = argv[6]
d = 2

if backend == 'ctf':
    import ctf
    wrld = ctf.comm()

# TEBD Parameters
step_sizes = [0.1]
n_step = [5]

# Get Hamiltonian
if Zn is None:
    ham = return_op(Nx, Ny, sym=None, backend=backend)
else:
    ham = return_op(Nx, Ny, sym='Z2', backend=backend)

# Create PEPS
peps = PEPS(Nx,
            Ny,
            d,
Beispiel #16
0
        a2 = ctf.astensor(numpy.ones((2,5)))
        self.assertTrue(ctf.hstack((a1, a2)).shape == (2,9))

        a1 = ctf.astensor(numpy.ones((2,4)))
        a2 = ctf.astensor(numpy.ones((2,5))+0j)
        self.assertTrue(ctf.hstack((a1, a2)).shape == (2,9))
        self.assertTrue(ctf.hstack((a1, a2)).dtype == numpy.complex128)

        a1 = numpy.ones((2,4))
        a2 = ctf.astensor(numpy.ones((2,5))+0j)
        self.assertTrue(ctf.hstack((a1, a2)).shape == (2,9))
        na2 = numpy.ones((2,5))+0j
        self.assertTrue(ctf.all(ctf.hstack((a1, a2)) == numpy.hstack((a1,na2))))

        a1 = ctf.astensor(numpy.ones(4))
        self.assertTrue(ctf.hstack((a1, 1.5)).shape == (5,))

        a1 = ctf.astensor(numpy.ones((2,4,2)))
        a2 = ctf.astensor(numpy.ones((2,5,2)))
        self.assertTrue(ctf.hstack((a1, a2)).shape == (2,9,2))


if __name__ == "__main__":
    if ctf.comm().rank() != 0:
        result = unittest.TextTestRunner(stream = open(os.devnull, 'w')).run(unittest.TestSuite(unittest.TestLoader().loadTestsFromTestCase(KnowValues)))
    else:
        print("Tests for basic numpy ndarray functionality")
        result = unittest.TextTestRunner().run(unittest.TestSuite(unittest.TestLoader().loadTestsFromTestCase(KnowValues)))
    ctf.MPI_Stop()
    sys.exit(not result)
Beispiel #17
0
def run_bench(num_iter, s_start, s_end, mult, R, sp, sp_init):
    wrld = ctf.comm()
    s = s_start
    nnz = float(s_start * s_start * s_start) * sp_init
    agg_s = []
    agg_avg_times = []
    agg_min_times = []
    agg_max_times = []
    agg_min_95 = []
    agg_max_95 = []
    if num_iter > 1:
        if ctf.comm().rank() == 0:
            print("Performing MTTKRP WARMUP with s =", s, "nnz =", nnz, "sp =",
                  sp, "sp_init =", sp_init)
        T = ctf.tensor((s, s, s), sp=sp)
        T.fill_sp_random(-1., 1., float(nnz) / float(s * s * s))
        U = ctf.random.random((s, R))
        V = ctf.random.random((s, R))
        W = ctf.random.random((s, R))
        U = ctf.einsum("ijk,jr,kr->ir", T, V, W)
        V = ctf.einsum("ijk,ir,kr->jr", T, U, W)
        W = ctf.einsum("ijk,ir,jr->kr", T, U, V)
        if ctf.comm().rank() == 0:
            print("Completed MTTKRP WARMUP with s =", s, "nnz =", nnz, "sp =",
                  sp, "sp_init =", sp_init)
    while s <= s_end:
        agg_s.append(s)
        T = ctf.tensor((s, s, s), sp=sp)
        T.fill_sp_random(-1., 1., float(nnz) / float(s * s * s))
        if ctf.comm().rank() == 0:
            print("Performing MTTKRP with s =", s, "nnz =", nnz, "sp =", sp,
                  "sp_init =", sp_init)
        U = ctf.random.random((s, R))
        V = ctf.random.random((s, R))
        W = ctf.random.random((s, R))
        te1 = 0.
        te2 = 0.
        te3 = 0.
        avg_times = []
        for i in range(num_iter):
            t0 = time.time()
            U = ctf.einsum("ijk,jr,kr->ir", T, V, W)
            t1 = time.time()
            ite1 = t1 - t0
            te1 += ite1

            t0 = time.time()
            V = ctf.einsum("ijk,ir,kr->jr", T, U, W)
            t1 = time.time()
            ite2 = t1 - t0
            te2 += ite2

            t0 = time.time()
            W = ctf.einsum("ijk,ir,jr->kr", T, U, V)
            t1 = time.time()
            ite3 = t1 - t0
            te3 += ite3
            if ctf.comm().rank() == 0:
                print(ite1, ite2, ite3, "avg:", (ite1 + ite2 + ite3) / 3.)
            avg_times.append((ite1 + ite2 + ite3) / 3.)
        if ctf.comm().rank() == 0:
            print("Completed", num_iter, "iterations, took", te1 / num_iter,
                  te2 / num_iter, te3 / num_iter,
                  "seconds on average for 3 variants.")
            avg_time = (te1 + te2 + te3) / (3 * num_iter)
            agg_avg_times.append(avg_time)
            print("MTTKRP took", avg_times,
                  "seconds on average across variants with s =", s, "nnz =",
                  nnz, "sp =", sp, "sp_init =", sp_init)
            min_time = np.min(avg_times)
            max_time = np.max(avg_times)
            agg_min_times.append(min_time)
            agg_max_times.append(max_time)
            print("min/max interval is [", min_time, ",", max_time, "]")
            stddev = np.std(avg_times)
            min_95 = (te1 + te2 + te3) / (3 * num_iter) - 2 * stddev
            max_95 = (te1 + te2 + te3) / (3 * num_iter) + 2 * stddev
            agg_min_95.append(min_95)
            agg_max_95.append(max_95)
            print("95% confidence interval is [", min_95, ",", max_95, "]")
        s = int(s * mult)
    if ctf.comm().rank() == 0:
        print("s min_time min_95 avg_time max_95 max_time")
        for i in range(len(agg_s)):
            print(agg_s[i], agg_min_times[i], agg_min_95[i], agg_avg_times[i],
                  agg_max_95[i], agg_max_times[i])
def main():

    #ut = UnitTests()
    #ut.runAllTests()

    #I = random.randint(6,6)
    #J = random.randint(6,6)
    #K = random.randint(6,6)
    I = 1000
    J = 1000
    K = 1000
    r = 2
    sparsity = .000001
    regParam = .1
    block = 100

    # 3rd-order tensor
    T_SVD = ctf.tensor((I, J, K), sp=True)
    T_SVD.fill_sp_random(0, 1, sparsity)
    #T_SVD = function_tensor(I,J,K,sparsity)
    assert (T_SVD.sp == 1)

    #omega = updateOmega(T_SVD,I,J,K)
    omega = getOmega(T_SVD)
    assert (omega.sp == 1)

    ctf.random.seed(42)
    U_SVD = ctf.random.random((I, r), sp=True)
    V_SVD = ctf.random.random((J, r), sp=True)
    W_SVD = ctf.random.random((K, r), sp=True)

    U_CG = ctf.copy(U_SVD)
    V_CG = ctf.copy(V_SVD)
    W_CG = ctf.copy(W_SVD)
    T_CG = ctf.copy(T_SVD)

    U_CG2 = ctf.copy(U_SVD)
    V_CG2 = ctf.copy(V_SVD)
    W_CG2 = ctf.copy(W_SVD)
    T_CG2 = ctf.copy(T_SVD)

    #t = time.time()
    #getALS_SVD(T_SVD,U_SVD,V_SVD,W_SVD,regParam,omega,I,J,K,r)
    #print("ALS SVD costs time = ",np.round_(time.time()- t,4))

    if ctf.comm().rank() == 0:
        print(
            "--------------------------------ALS iterative CG------------------------"
        )
    blockCGit, blockCGtime = getALS_CG(T_CG, U_CG, V_CG, W_CG, regParam, omega,
                                       I, J, K, r, block)
    if ctf.comm().rank() == 0:
        print("Number of iterations: %d" % (blockCGit))
        print("CG block size: %d " % (block))
        print("ALS iterative CG costs time: %f" % (blockCGtime))

    if ctf.comm().rank() == 0:
        print(
            "--------------------------------ALS direct SVD------------------------"
        )
    kressnerit, kressnertime = getALS_Kressner(T_CG2, U_CG2, V_CG2, W_CG2,
                                               regParam, omega, I, J, K, r)
    if ctf.comm().rank() == 0:
        print("Number of iterations: %d" % (kressnerit))
        print("ALS direct CG costs time: %f" % (kressnertime))
Beispiel #19
0
def getALS_CG(T,
              U,
              V,
              W,
              regParam,
              omega,
              I,
              J,
              K,
              r,
              block_size,
              num_iter=100,
              err_thresh=.001,
              time_limit=600,
              use_implicit=True):

    if use_implicit == True:
        t_ALS_CG = ctf.timer_epoch("als_CG_implicit")
        if ctf.comm().rank() == 0:
            print(
                "--------------------------------ALS with implicit CG------------------------"
            )
    else:
        t_ALS_CG = ctf.timer_epoch("als_CG_explicit")
        if ctf.comm().rank() == 0:
            print(
                "--------------------------------ALS with explicit CG------------------------"
            )
    if T.sp == True:
        nnz_tot = T.nnz_tot
    else:
        nnz_tot = ctf.sum(omega)
    t_ALS_CG.begin()

    it = 0

    if block_size <= 0:
        block_size = max(I, J, K)

    t_init_error_norm = ctf.timer("ALS_init_error_tensor_norm")
    t_init_error_norm.start()
    t0 = time.time()
    E = ctf.tensor((I, J, K), sp=T.sp)
    #E.i("ijk") << T.i("ijk") - omega.i("ijk")*U.i("iu")*V.i("ju")*W.i("ku")
    E.i("ijk") << T.i("ijk") - ctf.TTTP(omega, [U, V, W]).i("ijk")
    t1 = time.time()
    curr_err_norm = ctf.vecnorm(E) + (ctf.vecnorm(U) + ctf.vecnorm(V) +
                                      ctf.vecnorm(W)) * regParam
    t2 = time.time()

    t_init_error_norm.stop()
    if ctf.comm().rank() == 0 and status_prints == True:
        print('ctf.TTTP() takes {}'.format(t1 - t0))
        print('ctf.vecnorm {}'.format(t2 - t1))

    t_before_loop = time.time()
    t_obj_calc = 0.
    ctf.random.seed(42)
    while True:

        t_upd_cg = ctf.timer("ALS_upd_cg")
        t_upd_cg.start()

        U = updateFactor(T, U, V, W, regParam, omega, I, J, K, r, block_size,
                         "U", use_implicit)
        V = updateFactor(T, U, V, W, regParam, omega, I, J, K, r, block_size,
                         "V", use_implicit)
        W = updateFactor(T, U, V, W, regParam, omega, I, J, K, r, block_size,
                         "W", use_implicit)

        duration = time.time() - t_before_loop - t_obj_calc
        t_b_obj = time.time()
        E.set_zero()
        #E.i("ijk") << T.i("ijk") - omega.i("ijk")*U.i("iu")*V.i("ju")*W.i("ku")
        E.i("ijk") << T.i("ijk") - ctf.TTTP(omega, [U, V, W]).i("ijk")
        diff_norm = ctf.vecnorm(E)
        RMSE = diff_norm / (nnz_tot**.5)
        next_err_norm = diff_norm + (ctf.vecnorm(U) + ctf.vecnorm(V) +
                                     ctf.vecnorm(W)) * regParam
        t_obj_calc += time.time() - t_b_obj

        t_upd_cg.stop()

        it += 1
        if ctf.comm().rank() == 0:
            #print("Last residual:",curr_err_norm,"New residual",next_err_norm)
            print('Objective after', duration, 'seconds (', it,
                  'iterations) is: {}'.format(next_err_norm))
            print('RMSE after', duration, 'seconds (', it,
                  'iterations) is: {}'.format(RMSE))

        if abs(curr_err_norm - next_err_norm
               ) < err_thresh or it >= num_iter or duration > time_limit:
            break

        curr_err_norm = next_err_norm

    t_ALS_CG.end()
    duration = time.time() - t_before_loop - t_obj_calc

    if glob_comm.rank() == 0:
        print('ALS (implicit =', use_implicit,
              ') time per sweep: {}'.format(duration / it))
Beispiel #20
0
def CG(A, b, x0, r, regParam, I, is_implicit=False):

    t_batch_cg = ctf.timer("ALS_exp_cg")
    t_batch_cg.start()

    Ax0 = ctf.tensor((I, r))
    if is_implicit:
        Ax0.i("ir") << A.mul("ir", x0)
    else:
        Ax0.i("ir") << A.i("irl") * x0.i("il")
    Ax0 += regParam * x0
    rk = b - Ax0
    sk = rk
    xk = x0
    for i in range(sk.shape[-1]):  # how many iterations?
        Ask = ctf.tensor((I, r))
        t_cg_bmvec = ctf.timer("ALS_exp_cg_mvec")
        t_cg_bmvec.start()
        t0 = time.time()
        if is_implicit:
            Ask.i("ir") << A.mul("ir", sk)
        else:
            Ask.i("ir") << A.i("irl") * sk.i("il")
        t1 = time.time()
        if ctf.comm().rank == 0 and status_prints == True:
            print('form Ask takes {}'.format(t1 - t0))
        t_cg_bmvec.stop()

        Ask += regParam * sk

        rnorm = ctf.tensor(I)
        rnorm.i("i") << rk.i("ir") * rk.i("ir")

        skAsk = ctf.tensor(I)
        skAsk.i("i") << sk.i("ir") * Ask.i("ir")

        alpha = rnorm / (skAsk + 1.e-30)

        alphask = ctf.tensor((I, r))
        alphask.i("ir") << alpha.i("i") * sk.i("ir")
        xk1 = xk + alphask

        alphaask = ctf.tensor((I, r))
        alphaask.i("ir") << alpha.i("i") * Ask.i("ir")
        rk1 = rk - alphaask

        rk1norm = ctf.tensor(I)
        rk1norm.i("i") << rk1.i("ir") * rk1.i("ir")

        beta = rk1norm / (rnorm + 1.e-30)

        betask = ctf.tensor((I, r))
        betask.i("ir") << beta.i("i") * sk.i("ir")
        sk1 = rk1 + betask
        rk = rk1
        xk = xk1
        sk = sk1
        if ctf.vecnorm(rk) < CG_thresh:
            break

    #print("explicit CG residual after",sk.shape[-1],"iterations is",ctf.vecnorm(rk))

    t_batch_cg.stop()
    return xk
Beispiel #21
0
def train(model,
          features,
          adj,
          y_train,
          y_val,
          train_mask,
          val_mask,
          lr=0.1,
          epochs=200,
          patience=0,
          save_best=False):
    begin = time.time()
    val_loss_history = []
    min_epoch = -1
    computation_time = 0
    for epoch in range(epochs):

        # the backpropogation time
        start = time.time()
        model.backward(features, y_train, adj, train_mask, lr)
        end = time.time()

        train_loss = model.loss(adj, features, y_train, train_mask)
        val_loss = model.loss(adj, features, y_val, val_mask)

        train_accuracy = model.accuracy(adj, features, y_train, train_mask)
        val_accuracy = model.accuracy(adj, features, y_val, val_mask)

        if model.package == "ctf" and ctf.comm().rank() == 0:
            print("Epoch:", '%04d' % (epoch + 1), "train_loss=",
                  "{:.5f}".format(train_loss), "train_acc=",
                  "{:.5f}".format(train_accuracy), "val_loss=",
                  "{:.5f}".format(val_loss), "val_acc=",
                  "{:.5f}".format(val_accuracy), "time=",
                  "{:.5f}".format(end - start))
        elif model.package != "ctf":
            print("Epoch:", '%04d' % (epoch + 1), "train_loss=",
                  "{:.5f}".format(train_loss), "train_acc=",
                  "{:.5f}".format(train_accuracy), "val_loss=",
                  "{:.5f}".format(val_loss), "val_acc=",
                  "{:.5f}".format(val_accuracy), "time=",
                  "{:.5f}".format(end - start))

        computation_time += (end - start)

        # if the validation loss is not decreasing in patience times,
        if patience != 0:
            if len(val_loss_history) == 0:
                val_loss_history.append(val_loss)
                continue
            else:
                min_epoch = np.argmin(np.array(val_loss_history))
                min_loss = val_loss_history[min_epoch]
                val_loss_history.append(val_loss)
                if val_loss < min_loss:
                    min_loss = val_loss
                    min_epoch = epoch
                    if save_best == True:
                        model.save('params/best.pkl')
                else:
                    if epoch - min_epoch >= patience:
                        print("Validation loss has not been improved for",
                              '%04d' % patience,
                              "epochs, reaching the specified patience.")
                        break
    # total time
    stop = time.time()
    if model.package == "ctf" and ctf.comm().rank() == 0:
        print("Total time: {:.4f}s, ".format(stop - begin),
              "Computation time: {:.4f}s".format(computation_time))
    elif model.package != "ctf":
        print("Total time: {:.4f}s, ".format(stop - begin),
              "Computation time: {:.4f}s".format(computation_time))
def updateFactor_CG(T, U, V, W, regParam, omega, I, J, K, r, block, string):

    if (string == "U"):
        #M1 = ctf.tensor((J,K,r))
        #M1.i("jku") << V.i("ju")*W.i("ku")

        #num_nonzero, dense_omega = getDenseOmega(T,U,V,W,regParam,omega,I,J,K,r,"i")
        #Z = ctf.tensor((I,J*K,r))
        #Z.i("itr") << dense_omega.i("jkti")*M1.i("jkr")
        #Tbar = ctf.tensor((I,num_nonzero))
        #Tbar.i("it") << dense_omega.i("ijkt") *T.i("ijk")

        size = int(I / block)
        for n in range(block):
            t = time.time()
            nomega = omega[n * size:(n + 1) * size, :, :]
            if print_flag and ctf.comm().rank() == 0:
                print("slicing omega cost %f seconds" %
                      (np.round_(time.time() - t, 4)))
            #assert(nomega.sp == 1)
            # ------------------ SPARSITY NOT PRESERVED IN THE ABOVE LINE ----------------#

            x0 = ctf.random.random((size, r))
            Ax0 = ctf.tensor((size, r), sp=True)
            #Ax0.i("ir") << M.i("jkr")*dense_omega.i("jkti")*dense_omega.i("jktI")*M.i("jkR")*x0.i("IR")
            Ax0.i("ir") << V.i("Jr") * W.i("Kr") * nomega.i("iJK") * V.i(
                "JR") * W.i("KR") * x0.i(
                    "iR")  # LHS; ATA using matrix-vector multiplication
            Ax0 += regParam * x0
            if print_flag and ctf.comm().rank() == 0:
                #blockCG_contraction_time += time.time()- t
                print("contraction to form LHS cost %f seconds" %
                      (np.round_(time.time() - t, 4)))
            assert (Ax0.sp == 1)

            b = ctf.tensor((size, r), sp=True)
            #b.i("ir") << M.i("JKr") * dense_omega.i("JKti") * dense_omega.i("JKtI") * T.i("IJK")
            b.i("ir") << V.i("Jr") * W.i("Kr") * T[n * size:
                                                   (n + 1) * size, :, :].i(
                                                       "iJK")  # RHS; ATb
            if print_flag and ctf.comm().rank() == 0:
                #blockCG_contraction_time += time.time()- t
                print("contraction to form RHS cost %f seconds" %
                      (np.round_(time.time() - t, 4)))
            assert (b.sp == 1)

            U[n * size:(n + 1) * size, :].set_zero()
            U[n * size:(n + 1) * size, :] = CG(Ax0, b, x0, V, W, r, regParam,
                                               nomega, size, "U")
            assert (U.sp == 1)

        return U

    if (string == "V"):
        #M2 = ctf.tensor((I,K,r))
        #M2.i("iku") << U.i("iu")*W.i("ku")

        #num_nonzero, dense_omega = getDenseOmega(T,U,V,W,regParam,omega,I,J,K,r)
        #Z = ctf.tensor((J,num_nonzero,r))
        #Z.i("jtr") << dense_omega.i("ijkt")*M2.i("ikr")
        #Tbar = ctf.tensor((J,num_nonzero))
        #Tbar.i("jt") << dense_omega.i("ijkt") *T.i("ijk")

        size = int(J / block)
        for n in range(block):
            nomega = omega[:, n * size:(n + 1) * size, :]
            x0 = ctf.random.random((size, r))
            Ax0 = ctf.tensor((size, r), sp=True)
            Ax0.i("jr") << U.i("Ir") * W.i("Kr") * nomega.i("IjK") * U.i(
                "IR") * W.i("KR") * x0.i(
                    "jR")  # LHS; ATA using matrix-vector multiplication
            Ax0 += regParam * x0
            assert (Ax0.sp == 1)
            b = ctf.tensor((size, r), sp=True)
            b.i("jr") << U.i("Ir") * W.i(
                "Kr") * T[:, n * size:(n + 1) * size, :].i("IjK")  # RHS; ATb
            assert (b.sp == 1)
            V[n * size:(n + 1) * size, :].set_zero()
            V[n * size:(n + 1) * size, :] = CG(Ax0, b, x0, U, W, r, regParam,
                                               nomega, size, "V")

        assert (V.sp == 1)

        return V

    if (string == "W"):
        #M3 = ctf.tensor((I,J,r))
        #M3.i("iju") << U.i("iu")*V.i("ju")

        #num_nonzero, dense_omega = getDenseOmega(T,U,V,W,regParam,omega,I,J,K,r)
        #Z = ctf.tensor((K,num_nonzero,r))
        #Z.i("ktr") << dense_omega.i("ijkt")*M3.i("ijr")

        #Tbar = ctf.tensor((K,num_nonzero))
        #Tbar.i("kt") << dense_omega.i("ijkt") *T.i("ijk")

        size = int(K / block)
        for n in range(block):
            nomega = omega[:, :, n * size:(n + 1) * size]
            x0 = ctf.random.random((size, r))
            Ax0 = ctf.tensor((size, r), sp=True)
            Ax0.i("kr") << U.i("Ir") * V.i("Jr") * nomega.i("IJk") * U.i(
                "IR") * V.i("JR") * x0.i(
                    "kR")  # LHS; ATA using matrix-vector multiplication
            Ax0 += regParam * x0
            assert (Ax0.sp == 1)
            b = ctf.tensor((size, r), sp=True)
            b.i("kr") << U.i("Ir") * V.i("Jr") * T[:, :, n * size:(n + 1) *
                                                   size].i("IJk")  # RHS; ATb
            assert (b.sp == 1)
            W[n * size:(n + 1) * size, :].set_zero()
            W[n * size:(n + 1) * size, :] = CG(Ax0, b, x0, U, V, r, regParam,
                                               nomega, size, "W")

        assert (W.sp == 1)

        return W
Beispiel #23
0
        self.ovov = None
        self.foo = None
        self.fvv = None
        self.fov = None


if __name__ == '__main__':
    assert (len(sys.argv) <= 4)
    assert (len(sys.argv) >= 3)
    nocc = int(sys.argv[1])
    nvir = int(sys.argv[2])
    cutoff = None
    if (len(sys.argv) > 3):
        cutoff = float(sys.argv[3])

    cm = ctf.comm()
    eris = integrals()
    NS = ctf.SYM.NS
    SY = ctf.SYM.SY
    eris.oovv = ctf.tensor([nocc, nocc, nvir, nvir], sym=[NS, NS, SY, NS])
    eris.ovvv = ctf.tensor([nocc, nvir, nvir, nvir], sym=[NS, NS, SY, NS])
    eris.vvvv = ctf.tensor([nvir, nvir, nvir, nvir], sym=[SY, NS, SY, NS])
    eris.ovov = ctf.tensor([nocc, nvir, nocc, nvir], sym=[NS, NS, NS, NS])
    eris.ooov = ctf.tensor([nocc, nocc, nocc, nvir], sym=[NS, NS, NS, NS])
    eris.oooo = ctf.tensor([nocc, nocc, nocc, nocc], sym=[NS, NS, NS, NS])
    eris.fvv = ctf.tensor([nvir, nvir])
    eris.fov = ctf.tensor([nocc, nvir])
    eris.foo = ctf.tensor([nocc, nocc])

    for e in [
            eris.ovvv, eris.oovv, eris.oooo, eris.ooov, eris.vvvv, eris.ovov,
Beispiel #24
0
#!/usr/bin/env python

import ctf

from ctf import random

A = ctf.random.random((32,32))

[U,S,VT]=ctf.svd(A)

err = A-ctf.dot(U,ctf.dot(ctf.diag(S),VT))

success=True

err_nrm = err.norm2()
if err_nrm > 1.E-6:
  success=False

if ctf.comm().rank() == 0:
    if success:
      print("success, norm is ", err_nrm)
    else:
      print("failure, norm is ", err_nrm)

ctf.MPI_Stop()

Beispiel #25
0
 def rank(self):
     return ctf.comm().rank()
    print("95% confidence interval for fast is [", avg_fast - 2 * stddev_fast,
          ",", avg_fast + 2 * stddev_fast, "]")


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--n',
                        type=int,
                        default=4,
                        metavar='int',
                        help='Dimension of symmetric modes (default: 4)')
    parser.add_argument('--b',
                        type=int,
                        default=10,
                        metavar='int',
                        help='Dimension of nonsymmetric modes (default: 10)')
    parser.add_argument(
        '--niter',
        type=int,
        default=10,
        metavar='int',
        help='Number of iterations for benchmarking (default: 10)')

    args, _ = parser.parse_known_args()
    n = args.n
    b = args.b
    niter = args.niter
    w = ctf.comm()
    test(n, b)
    bench(n, b, niter)
from ctf import random as crandom
import gzip
import shutil
import os
import argparse
import arg_defs as arg_defs

sys.path.insert(0, '../SGD')
from gradient1 import sparse_SGD
sys.path.insert(0, '../ALS')
from als_sp import getALS_CG
sys.path.insert(0, '../CCD')
from ccd_sp import run_CCD
from ccd_sp import get_objective

glob_comm = ctf.comm()


def getOmega(T):
    [inds, data] = T.read_local_nnz()
    data[:] = 1.
    Omega = ctf.tensor(T.shape, sp=T.sp)
    Omega.write(inds, data)
    return Omega


def read_frostt_tensor(file_name, I, J, K, use_sp_rep):
    unzipped_file_name = file_name + '.tns'
    exists = os.path.isfile(unzipped_file_name)

    if not exists:
Beispiel #28
0
def main():

    #ut = UnitTests()
    #ut.runAllTests()

    #I = random.randint(6,6)
    #J = random.randint(6,6)
    #K = random.randint(6,6)
    I = 8
    J = 8
    K = 8
    r = 2
    sparsity = .1
    regParam = .1
    block = 4
    ntrails = 5

    # 3rd-order tensor
    #T_SVD = ctf.tensor((I,J,K),sp=True)
    #T_SVD.fill_sp_random(0,1,sparsity)
    T_SVD = function_tensor(I, J, K, sparsity)
    assert (T_SVD.sp == 1)

    omega = updateOmega(T_SVD, I, J, K)
    assert (omega.sp == 1)

    blockCGerrList = []
    blockCGtimeList = []
    KressnererrList = []
    KressnertimeList = []
    for i in range(ntrails):

        ctf.random.seed(42 + i)
        U_SVD = ctf.random.random((I, r), sp=True)
        V_SVD = ctf.random.random((J, r), sp=True)
        W_SVD = ctf.random.random((K, r), sp=True)

        U_CG = ctf.copy(U_SVD)
        V_CG = ctf.copy(V_SVD)
        W_CG = ctf.copy(W_SVD)
        T_CG = ctf.copy(T_SVD)

        U_CG2 = ctf.copy(U_SVD)
        V_CG2 = ctf.copy(V_SVD)
        W_CG2 = ctf.copy(W_SVD)
        T_CG2 = ctf.copy(T_SVD)

        #t = time.time()
        #getALS_SVD(T_SVD,U_SVD,V_SVD,W_SVD,regParam,omega,I,J,K,r)
        #print("ALS SVD costs time = ",np.round_(time.time()- t,4))

        t = time.time()
        blockCGnorm, blockCGit, blockCGtime = getALS_CG(
            T_CG, U_CG, V_CG, W_CG, regParam, omega, I, J, K, r, block)
        blockCGerrList.append(blockCGnorm)
        blockCGtimeList.append(blockCGtime)
        if ctf.comm().rank() == 0:
            print("Number of iterations: %d" % (blockCGit))
            print("CG block size: %d " % (block))
            print("ALS iterative CG costs time: %f" %
                  (np.round_(time.time() - t, 4)))

        t = time.time()
        kressnernorm, kressnerit, kressnertime = getALS_Kressner(
            T_CG2, U_CG2, V_CG2, W_CG2, regParam, omega, I, J, K, r)
        KressnererrList.append(kressnernorm)
        KressnertimeList.append(kressnertime)
        if ctf.comm().rank() == 0:
            print("Number of iterations: %d" % (kressnerit))
            print("ALS direct CG costs time: %f" %
                  (np.round_(time.time() - t, 4)))


#----------------------------------------- plot -------------------------------------------------#

    plt.figure()
    for i in range(ntrails):
        if ctf.comm().rank() == 0:
            plt.plot(blockCGtimeList[i],
                     blockCGerrList[i],
                     label="trail %d" % (i + 1))
            plt.legend()
            plt.title(
                "Function tensor(%d*%d*%d), iterative block CG, block size %d, rank %d, sparsity %f"
                % (I, J, K, block, r, sparsity))
            plt.xlabel("Time[s]")
            plt.ylabel("Training Error Norm")
            plt.savefig('iterative_block_CG.png')

    plt.figure()
    for i in range(ntrails):
        if ctf.comm().rank() == 0:
            plt.plot(KressnertimeList[i],
                     KressnererrList[i],
                     label="trail %d" % (i + 1))
            plt.legend()
            plt.title(
                "Funtion tensor(%d*%d*%d),direct CG (Kressner), rank %d, sparsity %f "
                % (I, J, K, r, sparsity))
            plt.xlabel("Time[s]")
            plt.ylabel("Training Error Norm")
            plt.savefig('direct_CG.png')
Beispiel #29
0
            max_95 = (te1 + te2 + te3) / (3 * num_iter) + 2 * stddev
            agg_min_95.append(min_95)
            agg_max_95.append(max_95)
            print("95% confidence interval is [", min_95, ",", max_95, "]")
        s = int(s * mult)
    if ctf.comm().rank() == 0:
        print("s min_time min_95 avg_time max_95 max_time")
        for i in range(len(agg_s)):
            print(agg_s[i], agg_min_times[i], agg_min_95[i], agg_avg_times[i],
                  agg_max_95[i], agg_max_times[i])


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    sargs.add_arguments(parser)
    args, _ = parser.parse_known_args()

    num_iter = args.num_iter
    s_start = args.s_start
    s_end = args.s_end
    mult = args.mult
    R = args.R
    sp = args.sp
    sp_init = args.sp_init

    if ctf.comm().rank() == 0:
        print("num_iter is", num_iter, "s_start is", s_start, "s_end is",
              s_end, "mult is", mult, "R is", R, "sp is", sp, "sp_init is",
              sp_init)
    run_bench(num_iter, s_start, s_end, mult, R, sp, sp_init)
import ctf,time,random
import numpy as np
import numpy.linalg as la
from ctf import random as crandom
glob_comm = ctf.comm()
from scipy.sparse.linalg import lsqr as lsqr

# In[6]:


class UnitTests:
        
    def test_3d_purturb1(self):
        
        I = random.randint(3,5)
        J = random.randint(3,5)
        K = random.randint(3,5)
        r = 2 
        sparsity = .2
        regParam = 10
        
        ctf.random.seed(42)
        U = ctf.random.random((I,r))
        V= ctf.random.random((J,r))
        W= ctf.random.random((K,r))
    
    
        # 3rd-order tensor
        T = ctf.tensor((I,J,K))
        T.fill_random(0,1)
Beispiel #31
0
        self.assertTrue(ctf.hstack((a1, a2)).shape == (2, 9))
        self.assertTrue(ctf.hstack((a1, a2)).dtype == numpy.complex128)

        a1 = numpy.ones((2, 4))
        a2 = ctf.astensor(numpy.ones((2, 5)) + 0j)
        self.assertTrue(ctf.hstack((a1, a2)).shape == (2, 9))
        na2 = numpy.ones((2, 5)) + 0j
        self.assertTrue(
            ctf.all(ctf.hstack((a1, a2)) == numpy.hstack((a1, na2))))

        a1 = ctf.astensor(numpy.ones(4))
        self.assertTrue(ctf.hstack((a1, 1.5)).shape == (5, ))

        a1 = ctf.astensor(numpy.ones((2, 4, 2)))
        a2 = ctf.astensor(numpy.ones((2, 5, 2)))
        self.assertTrue(ctf.hstack((a1, a2)).shape == (2, 9, 2))


if __name__ == "__main__":
    if ctf.comm().rank() != 0:
        result = unittest.TextTestRunner(stream=open(os.devnull, 'w')).run(
            unittest.TestSuite(
                unittest.TestLoader().loadTestsFromTestCase(KnowValues)))
    else:
        print("Tests for basic numpy ndarray functionality")
        result = unittest.TextTestRunner().run(
            unittest.TestSuite(
                unittest.TestLoader().loadTestsFromTestCase(KnowValues)))
    ctf.MPI_Stop()
    sys.exit(not result)
def updateFactor_Kressner(T, U, V, W, regParam, omega, I, J, K, r, string):

    if (string == "U"):
        #M1 = ctf.tensor((J,K,r))
        #M1.i("jku") << V.i("ju")*W.i("ku")
        for i in range(I):
            t = time.time()
            A = ctf.tensor((r, r), sp=True)
            sliced_omega = omega[i, :, :]
            if print_flag and ctf.comm().rank() == 0:
                print("slicing omega cost %f seconds" %
                      (np.round_(time.time() - t, 4)))
            A.i("uv") << V.i("Ju") * W.i("Ku") * sliced_omega.i("JK") * V.i(
                "Jv") * W.i("Kv")
            if print_flag and ctf.comm().rank() == 0:
                print("contraction to form LHS cost %f seconds" %
                      (np.round_(time.time() - t, 4)))
            assert (A.sp == 1)
            #assert(omega[i,:,:].sp==1)     TODO!

            b = ctf.tensor(r, sp=True)
            sliced_T = T[i, :, :]
            if print_flag and ctf.comm().rank() == 0:
                print("slicing original tensor cost %f seconds" %
                      (np.round_(time.time() - t, 4)))
            b.i("r") << V.i("Jr") * W.i("Kr") * sliced_T.i("JK")  # RHS; ATb
            if print_flag and ctf.comm().rank() == 0:
                print("contraction to form RHS cost %f seconds" %
                      (np.round_(time.time() - t, 4)))
            assert (b.sp == 1)
            U[i, :].set_zero()
            U[i, :] = Kressner(A, b, U[i, :], r, regParam)

        assert (U.sp == 1)
        return U

    if (string == "V"):
        #M2 = ctf.tensor((I,K,r))
        #M2.i("iku") << U.i("iu")*W.i("ku")
        for j in range(J):
            A = ctf.tensor((r, r), sp=True)
            A.i("uv") << U.i("Iu") * W.i("Ku") * omega[:, j, :].i("IK") * U.i(
                "Iv") * W.i("Kv")
            assert (A.sp == 1)
            b = ctf.tensor(r, sp=True)
            b.i("r") << U.i("Ir") * W.i("Kr") * T[:, j, :].i("IK")  # RHS; ATb
            assert (b.sp == 1)
            V[j, :].set_zero()
            V[j, :] = Kressner(A, b, V[j, :], r, regParam)

        assert (V.sp == 1)
        return V

    if (string == "W"):
        #M3 = ctf.tensor((I,J,r))
        #M3.i("iju") << U.i("iu")*V.i("ju")
        for k in range(K):
            A = ctf.tensor((r, r), sp=True)
            A.i("uv") << U.i(
                "Iu") * V.i("Ju") * omega[:, :, k].i("IJ") * U.i("Iv") * V.i(
                    "Jv")  # LHS; ATA using matrix-vector multiplication
            assert (A.sp == 1)
            b = ctf.tensor(r, sp=True)
            b.i("r") << U.i("Ir") * V.i("Jr") * T[:, :, k].i("IJ")  # RHS; ATb
            assert (b.sp == 1)
            W[k, :].set_zero()
            W[k, :] = Kressner(A, b, W[k, :], r, regParam)

        assert (W.sp == 1)
        return W