def als(X, rank, **kwargs):
    Alternating least-sqaures algorithm to compute the CP decomposition.

    X : tensor_mixin
        The tensor to be decomposed.
    rank : int
        Tensor rank of the decomposition.
    init : {'random', 'nvecs'}, optional
        The initialization method to use.
            - random : Factor matrices are initialized randomly.
            - nvecs : Factor matrices are initialzed via HOSVD.
        (default 'nvecs')
    max_iter : int, optional
        Maximium number of iterations of the ALS algorithm.
        (default 500)
    fit_method : {'full', None}
        The method to compute the fit of the factorization
            - 'full' : Compute least-squares fit of the dense approximation of.
                       X and X.
            - None : Do not compute the fit of the factorization, but iterate
                     until ``max_iter`` (Useful for large-scale tensors).
        (default 'full')
    conv : float
        Convergence tolerance on difference of fit between iterations
        (default 1e-5)

    P : ktensor
        Rank ``rank`` factorization of X. ``P.U[i]`` corresponds to the factor
        matrix for the i-th mode. ``P.lambda[i]`` corresponds to the weight
        of the i-th mode.
    fit : float
        Fit of the factorization compared to ``X``
    itr : int
        Number of iterations that were needed until convergence
    exectimes : ndarray of floats
        Time needed for each single iteration

    Create random dense tensor

    >>> from sktensor import dtensor, ktensor
    >>> U = [np.random.rand(i,3) for i in (20, 10, 14)]
    >>> T = dtensor(ktensor(U).toarray())

    Compute rank-3 CP decomposition of ``T`` with ALS

    >>> P, fit, itr, _ = als(T, 3)

    Result is a decomposed tensor stored as a Kruskal operator

    >>> type(P)
    <class 'sktensor.ktensor.ktensor'>

    Factorization should be close to original data

    >>> np.allclose(T, P.totensor())

    .. [1] Kolda, T. G. & Bader, B. W.
           Tensor Decompositions and Applications.
           SIAM Rev. 51, 455–500 (2009).
    .. [2] Harshman, R. A.
           Foundations of the PARAFAC procedure: models and conditions for an 'explanatory' multimodal factor analysis.
           UCLA Working Papers in Phonetics 16, (1970).
    .. [3] Carroll, J. D.,  Chang, J. J.
           Analysis of individual differences in multidimensional scaling via an N-way generalization of 'Eckart-Young' decomposition.
           Psychometrika 35, 283–319 (1970).

    # init options
    ainit = kwargs.pop('init', _DEF_INIT)
    maxiter = kwargs.pop('max_iter', _DEF_MAXITER)
    fit_method = kwargs.pop('fit_method', _DEF_FIT_METHOD)
    conv = kwargs.pop('conv', _DEF_CONV)
    dtype = kwargs.pop('dtype', _DEF_TYPE)
    if not len(kwargs) == 0:
        raise ValueError('Unknown keywords (%s)' % (kwargs.keys()))

    N = X.ndim
    normX = norm(X)

    U = _init(ainit, X, N, rank, dtype)
    fit = 0
    exectimes = []
    for itr in range(maxiter):
        tic = time.clock()
        fitold = fit

        for n in range(N):
            Unew = X.uttkrp(U, n)
            Y = ones((rank, rank), dtype=dtype)
            for i in (list(range(n)) + list(range(n + 1, N))):
                Y = Y * dot(U[i].T, U[i])
            Unew = Unew.dot(pinv(Y))
            # Normalize
            if itr == 0:
                lmbda = sqrt((Unew**2).sum(axis=0))
                lmbda = Unew.max(axis=0)
                lmbda[lmbda < 1] = 1
            U[n] = Unew / lmbda

        P = ktensor(U, lmbda)
        if fit_method == 'full':
            normresidual = normX**2 + P.norm()**2 - 2 * P.innerprod(X)
            fit = 1 - (normresidual / normX**2)
            fit = itr
        fitchange = abs(fitold - fit)
        exectimes.append(time.clock() - tic)
        #print('fitchange:', fitchange)
        _log.debug('[%3d] fit: %.5f | delta: %7.1e | secs: %.5f' %
                   (itr, fit, fitchange, exectimes[-1]))
        if itr > 0 and fitchange < conv:

    return P, fit, itr, array(exectimes)
def als(X, Yl, rank, **kwargs):
    Alternating least-sqaures algorithm to compute the CP decomposition taking into 
    consideration the labels of the set
    Yl -> lx1
    X -> pxuxu

    # init options
    ainit = kwargs.pop('init', _DEF_INIT)
    maxiter = kwargs.pop('max_iter', _DEF_MAXITER)
    fit_method = kwargs.pop('fit_method', _DEF_FIT_METHOD)
    conv = kwargs.pop('conv', _DEF_CONV)
    dtype = kwargs.pop('dtype', _DEF_TYPE)
    if not len(kwargs) == 0:
        raise ValueError('Unknown keywords (%s)' % (kwargs.keys()))

    N = X.ndim
    normX = norm(X)

    Yl = np.asarray(Yl)
    Yl = np.reshape(Yl, (-1, 1))
    normYl = np.linalg.norm(Yl)

    U = _init(ainit, X, N, rank, dtype)
    fit = 0

    vecX = np.reshape(X, (np.product(X.shape), ))

    W = ones((rank, 1), dtype=dtype)

    l = Yl.shape[0]
    p = 182
    D = np.zeros((l, p))
    for i in range(l):
        for j in range(l):
            if i == j:
                D[i, j] = 1

    for itr in range(maxiter):
        fitold = fit
        for n in range(N):
            Unew = X.uttkrp(U, n)
            # Y is ZtZ
            Y = ones((rank, rank), dtype=dtype)
            for i in (list(range(n)) + list(range(n + 1, N))):
                Y = Y * dot(U[i].T, U[i])
            if n != 1:
                # Updates remain the same for U0,U2
                Unew = Unew.dot(pinv(Y))
                Ip = np.identity(p)
                IptIp = dot(Ip.T, Ip)
                GtG = np.kron(Y, IptIp)
                vecA = np.reshape(U[1], (np.product(U[1].shape), 1))
                GtvecX1 = dot(GtG, vecA)

                L = np.kron(W.T, D)
                LtL = dot(L.T, L)

                Sum1 = inv(GtG + LtL)
                dot0 = dot(L.T, Yl)
                Sum2 = GtvecX1 + dot0
                vecA = dot(Sum1, Sum2)

                Unew = np.reshape(vecA, (p, rank))

            # Normalize
            if itr == 0:
                lmbda = sqrt((Unew**2).sum(axis=0))
                lmbda = Unew.max(axis=0)
                lmbda[lmbda < 1] = 1

            U[n] = Unew / lmbda

        # update W
        AtDt = dot(U[1].T, D.T)
        DA = dot(D, U[1])
        inv1 = inv(dot(AtDt, DA))
        #print('ok inv')
        dot2 = dot(AtDt, Yl)
        W = dot(inv1, dot2)

        P = ktensor(U, lmbda)
        A = U[1]
        Ai = A[146:]

        ypred = dot(Ai, W)
        ypred[abs(ypred) > 0.5] = 1
        ypred[abs(ypred) < 0.5] = 0

        DAW = dot(DA, W)
        normDAW = np.linalg.norm(DAW)

        if fit_method == 'full':
            normresidual1 = normX**2 + P.norm()**2 - 2 * P.innerprod(X)
            normresidual2 = normYl**2 + normDAW**2 - 2 * dot(Yl.T, DAW)
            normresidual = normresidual1 + normresidual2
            #fit = 1 - (normresidual / normX ** 2)
            fit = normresidual
            fit = itr

        fitchange = abs(fitold - fit) / fitold

        if itr > 0 and fitchange < conv:

    ypred[abs(ypred) > 0.5] = 1
    ypred[abs(ypred) < 0.5] = 0

    return P, ypred, fit, itr
def NN_Factorize(rank, Iters, odir, lambdas, FIT):

	gstart = datetime.datetime.now()

	Factors, G = Initialize(rank)

	print "Initialization Done\n" 

	I = X1.shape[0]
	J = X1.shape[1]
	K = X2.shape[1]

	print "*** Input Summary ***\n"
	print "X1 : "+ str(X1.shape)
	print "X2 : "+ str(X2.shape)
	print "X3 : "+ str(X3.shape)
	print "Factorization rank: "+str(rank)

	lambda_a = lambdas[0]
	lambda_b = lambdas[1]
	lambda_c = lambdas[2]
	odir = os.path.join(args.odir,"_"+str(rank[0])+"_"+str(rank[1])+"_"+str(rank[2])+"_"+str(lambda_a)+"_"+str(lambda_b)+"_"+str(lambda_c))
	ep = 1e-9
	conv = 1e-9

	Logs = os.path.join(odir,"Logs")
	if not os.path.exists(Logs):

	normX1 = norm(X1)
	normX2 = norm(X2)
	normX3 = norm(X3)

	fit = np.zeros(3)
	avgFit = 0

	for iter in range(1,Iters+1):
		print "\n Starting Iteration: "+str(iter)+"\n"

		if iter%5 == 0:

		start = datetime.datetime.now()

		FactorsT_T = [np.dot(M.T,M) for M in Factors]

		G_1 = dtensor(G[0])
		G_2 = dtensor(G[1])
		G_3 = dtensor(G[2])
		## Updating A

		GB = G_1.ttm(Factors[1], mode =1, transp=False)
		GB = GB.unfold(0)		
		GC = G_3.ttm(Factors[2], mode =1, transp=False)
		GC = GC.unfold(0)
		Num_A = X1.unfold(0,transp=False).tocsr()
		Num_A = Num_A.dot(GB.T)
		Num_A += X3.unfold(0,transp=False).tocsr().dot(GC.T)
		Denom_A = np.dot(GB, GB.T) + np.dot(GC, GC.T)
		Denom_A = np.dot(Factors[0], Denom_A)
		Denom_A += np.multiply(lambda_a, Factors[0])
		Denom_A += ep

		## updating B

		GA = G_1.ttm(Factors[0], mode =0, transp=False)
		GA = GA.unfold(1)
		GC = G_2.ttm(Factors[2], mode =1, transp=False)
		GC = GC.unfold(0)
		Num_B = X1.unfold(1,transp=False).tocsr()
		Num_B = Num_B.dot(GA.T)
		Num_B += X2.unfold(0,transp=False).tocsr().dot(GC.T)
		Denom_B = np.dot(GA, GA.T) + np.dot(GC, GC.T)  
		Denom_B = np.dot(Factors[1],Denom_B)
		Denom_B += np.multiply(lambda_b, Factors[1])
		Denom_B += ep

		## updating C

		GB = G_2.ttm(Factors[1], mode =0, transp=False)
		GB = GB.unfold(1)
		GA = G_3.ttm(Factors[0], mode =0, transp=False)
		GA = GA.unfold(1)
		Num_C = X2.unfold(1,transp=False).tocsr()
		Num_C = Num_C.dot(GB.T)
		Num_C += X3.unfold(1,transp=False).tocsr().dot(GA.T)
		Denom_C = np.dot(GA, GA.T) + np.dot(GB, GB.T)
		Denom_C = np.dot(Factors[2],Denom_C)
		Denom_C += np.multiply(lambda_c, Factors[2])
		Denom_C += ep

		## Updating Cores 

		Num = X1.ttm(Factors[0],mode = 0,transp=True)
		Num = Num.ttm(Factors[1], mode=1, transp=True)
		Denom = G_1.ttm(FactorsT_T[0],mode=0,transp=True)
		Denom = Denom.ttm(FactorsT_T[1], mode=1, transp=True)
		Denom += ep

		G[0] = np.multiply(G[0], np.divide(Num, Denom))

		Num = X2.ttm(Factors[1],mode = 0,transp=True)
		Num = Num.ttm(Factors[2], mode=1, transp=True)
		Denom = G_2.ttm(FactorsT_T[1],mode=0,transp=True)
		Denom = Denom.ttm(FactorsT_T[2], mode=1, transp=True)
		Denom += ep

		G[1] = np.multiply(G[1], np.divide(Num, Denom))

		Num = X3.ttm(Factors[0],mode = 0,transp=True)
		Num = Num.ttm(Factors[2], mode=1, transp=True)
		Denom = G_3.ttm(FactorsT_T[0],mode=0,transp=True)
		Denom = Denom.ttm(FactorsT_T[2], mode=1, transp=True)
		Denom += ep

		G[2] = np.multiply(G[2], np.divide(Num, Denom))

		Factors[0] = np.multiply(Factors[0], np.divide(Num_A, Denom_A))
		Factors[1] = np.multiply(Factors[1], np.divide(Num_B, Denom_B))
		Factors[2] = np.multiply(Factors[2], np.divide(Num_C, Denom_C))

		'''if FIT =='Y':

			G_1 = dtensor(G[0])
			G_2 = dtensor(G[1])
			G_3 = dtensor(G[2])

			normRes1 = sqrt(np.abs(normX1 ** 2 - norm(G[0]) ** 2))
			normRes2 = sqrt(np.abs(normX2 ** 2 - norm(G[1]) ** 2))
			normRes3 = sqrt(np.abs(normX3 ** 2 - norm(G[2]) ** 2))

			fit[0] = 1 - (normRes1/normX1)
			fit[1] = 1 - (normRes2/normX2)
			fit[2] = 1 - (normRes3/normX3)

			avgFit = sum(fit)/3

			print "\n Average Fit: "+str(avgFit)
			print "\n Fit: "+str(fit)

			if abs(avgFit-avgFitOld) < conv:

		end = datetime.datetime.now()

		print "Time taken for Iteration: "+str(end-start)

	end = datetime.datetime.now()
	total_time = str(end-gstart)
	print "\n Total factorization time: "+total_time


	## Writing Schemas
	dictIDS1 = {}
	dictIDS2 = {}
	dictIDS3 = {}
	idx = 0
	for i in range(rank[0]):
		for j in range(rank[1]):			
			dictIDS1[idx] = str(i)+","+str(j)
			idx += 1

	idx = 0
	for i in range(rank[1]):
		for j in range(rank[2]):			
			dictIDS2[idx] = str(i)+","+str(j)
			idx += 1

	idx = 0
	for i in range(rank[0]):
		for j in range(rank[2]):			
			dictIDS3[idx] = str(i)+","+str(j)
			idx += 1

	dictIDS[0] = dictIDS1
	dictIDS[1] = dictIDS2
	dictIDS[2] = dictIDS3

	tops = 6
	Agents =[]
	Patients = []
	Instruments = []

	fp = open(os.path.join(odir,'Agents.txt'), 'w')
	for col in range(rank[0]):
		topIds = np.argsort(Factors[0][:,col])[::-1][:tops]
		l = []
		for id in topIds:
			fp.write("\t"+agents[id]+" ,"+str(Factors[0][id,col]))




	fp = open(os.path.join(odir,'Patients.txt'), 'w')
	for col in range(rank[1]):
		topIds = np.argsort(Factors[1][:,col])[::-1][:tops]
		l = []
		for id in topIds:
			fp.write("\t"+patnts[id]+" ,"+str(Factors[1][id,col]))



	fp = open(os.path.join(odir,'Instruments.txt'), 'w')
	for col in range(rank[2]):
		topIds = np.argsort(Factors[2][:,col])[::-1][:tops]
		l = []
		for id in topIds:
			fp.write("\t"+instmnts[id]+" ,"+str(Factors[2][id,col]))




	sp = open(os.path.join(odir, "Schemas.txt"),'w')
	for p in range(len(predicates)):
		ids1 = np.argsort(G[0][:,:,p], axis=None)[::-1][:8]
		ids2 = np.argsort(G[1][:,:,p], axis=None)[::-1][:8]
		ids3 = np.argsort(G[2][:,:,p], axis=None)[::-1][:8]

		idx1 = [dictIDS1[idx] for idx in ids1]
		idx2 = [dictIDS2[idx] for idx in ids2]
		idx3 = [dictIDS3[idx] for idx in ids3]

		schemas = getSchema(idx1[:3],idx2[:3],idx3[:3])

		if len(schemas) == 0:
			schemas = getSchema(idx1,idx2,idx3)

		h_schemas = mergeSchemas(schemas)
		if len(h_schemas)>0:
			for s in range(len(h_schemas)):
				schm = h_schemas[s]
				print schm

		if len(schemas)>0:
			for s  in range(len(schemas)):
				schm = schemas[s]
				print schm



	for g in range(len(G)):
		np_file = os.path.join(odir, "Schemas_"+str(g)+".txt")
		fp = open(np_file,'w')
		for p in range(len(predicates)):			
			ids = np.argsort(G[g][:,:,p], axis=None)[::-1][:5]


	start = datetime.datetime.now()
	## Fit Computation
	if FIT == 'Y':
		print "Computing Fit ...\n"
		f = 0
		A = np.array(X1.subs)
		for p in range(len(predicates)):			
			sidx = np.where(A[2]==p)
			G_p = np.dot(Factors[0], np.dot(G[0][:,:,p], Factors[1].T))
			for idx in sidx:
				G_p[A[0,idx],A[1,idx]] = X1.vals[idx] - G_p[A[0,idx],A[1,idx]]
			f += np.linalg.norm(G_p)

		fit[0] = 1 - (f/normX1)

		f = 0
		A = np.array(X2.subs)		
		for p in range(len(predicates)):			
			sidx = np.where(A[2]==p)
			G_p = np.dot(Factors[1], np.dot(G[1][:,:,p], Factors[2].T))
			for idx in sidx:
				G_p[A[0,idx],A[1,idx]] = X2.vals[idx] - G_p[A[0,idx],A[1,idx]]
			f += np.linalg.norm(G_p)

		fit[1] = 1 - (f/normX2)

		f = 0		
		A = np.array(X3.subs)		
		for p in range(len(predicates)):			
			sidx = np.where(A[2]==p)
			G_p = np.dot(Factors[0], np.dot(G[2][:,:,p], Factors[2].T))
			for idx in sidx:
				G_p[A[0,idx],A[1,idx]] = X3.vals[idx] - G_p[A[0,idx],A[1,idx]]
			f += np.linalg.norm(G_p)

		fit[2] = 1 - (f/normX3)
		avgFit = sum(fit)/3

		fp = open(os.path.join(odir,"Fit.txt"),'w')
		fp.write("Average Fit: "+str(avgFit))

		end = datetime.datetime.now()
		print "Fit Computation Time: "+str(end-start)
def fold_in(X, Uold, rank, **kwargs):

    # init options
    ainit = kwargs.pop('init', _DEF_INIT)
    maxiter = kwargs.pop('max_iter', _DEF_MAXITER)
    fit_method = kwargs.pop('fit_method', _DEF_FIT_METHOD)
    conv = kwargs.pop('conv', _DEF_CONV)
    dtype = kwargs.pop('dtype', _DEF_TYPE)

    if not len(kwargs) == 0:
        raise ValueError('Unknown keywords (%s)' % (kwargs.keys()))

    N = X.ndim
    normX = norm(X)

    U = _init(ainit, X, N, Uold, rank, dtype)
    fit = 0
    O = U[1]
    exectimes = []
    for itr in range(maxiter):
        tic = time.clock()
        fitold = fit
        n = 1  # Mode 1 is the array that changes
        Unew = X.uttkrp(U, n)
        # Can't implement because of memory error
        n, p = U[0].shape
        m, pC = U[2].shape
        print('n -> U[0], m-> U[2] ROWS', n, m)
        C = np.einsum('ij, kj -> ikj', U[0], U[2]).reshape(m * n, p)
        nk, ni, nj = X.shape
        jk = nk * nj
        sess = tf.Session()
        with sess.as_default():
           Xnew = tf.reshape(X, [1,jk])
           Xnew = Xnew.eval()
        print('C shape:', C.shape)
        print('Xnew shape:', Xnew.shape)
        Z = Xnew.dot(pinv(C))
        Unew = (Unew.dot(Z)).dot(inv(Unew))
        Y = ones((rank, rank), dtype=dtype)
        for i in (list(range(n)) + list(range(n + 1, N))):
            Y = Y * dot(U[i].T, U[i])
        Unew = Unew.dot(pinv(Y))
        #O = O*Unew/(O.dot(Y))
        # Normalize

        if itr == 0:
            lmbda = sqrt((Unew**2).sum(axis=0))
            lmbda = Unew.max(axis=0)
            lmbda[lmbda < 1] = 1
        if itr == 0:
            lmbda = sqrt((O ** 2).sum(axis=0))
            lmbda = O.max(axis=0)
            lmbda[lmbda < 1] = 1
        U[1] = Unew / lmbda
        #U[1] = O / lmbda
        P = ktensor(U, lmbda)

        if fit_method == 'full':
            normresidual = normX**2 + P.norm()**2 - 2 * P.innerprod(X)
            #normresidual = normX ** 2 + np.linalg.norm(U[1]) ** 2 - 2 * (np.linalg.norm(U[1]))*(X)
            fit = 1 - (normresidual / normX**2)
            fit = itr

        fitchange = abs(fitold - fit)
        exectimes.append(time.clock() - tic)

        if itr > 0 and fitchange < conv:

    return U[1], P
def orth_als(X, rank, **kwargs):
    Orthogonalized Alternating least-sqaures algorithm to compute the CP decomposition.
    Orth-ALS is a variant of standard ALS where the factor estimates are
    orthogonalized before the ALS step. The orthogonalization may be continued till the end, or
    up to a fixed number of iterations. 

    For more details about Orth-ALS, see reference [4].

    X : tensor_mixin
        The tensor to be decomposed.
    rank : int
        Tensor rank of the decomposition.
    init : {'random', 'nvecs'}, optional
        The initialization method to use.
            - random : Factor matrices are initialized randomly.
            - nvecs :  Factor matrices are initialzed via HOSVD.
        (default 'nvecs')
    max_iter : int, optional
        Maximium number of iterations of the ALS algorithm.
        (default 500)
    stop_orth: int, optional
        Number of iterations till which orthogonalization is to be continued
        (default 5)
    fit_method : {'full', None}
        The method to compute the fit of the factorization
            - 'full' : Compute least-squares fit of the dense approximation of.
                       X and X.
            - None : Do not compute the fit of the factorization, but iterate
                     until ``max_iter`` (Useful for large-scale tensors).
        (default 'full')
    conv : float
        Convergence tolerance on difference of fit between iterations
        (default 1e-5)

    P : ktensor
        Rank ``rank`` factorization of X. ``P.U[i]`` corresponds to the factor
        matrix for the i-th mode. ``P.lambda[i]`` corresponds to the weight
        of the i-th mode.
    fit : float
        Fit of the factorization compared to ``X``
    itr : int
        Number of iterations that were needed until convergence
    exectimes : ndarray of floats
        Time needed for each single iteration

    Create random dense tensor

    >>> from sktensor import dtensor, ktensor
    >>> U = [np.random.rand(i,3) for i in (20, 10, 14)]
    >>> T = dtensor(ktensor(U).toarray())

    Compute rank-3 CP decomposition of ``T`` with Orth-ALS/ Hybrid-ALS

    >>> P, fit, itr, _ = als(T, 3)

    Result is a decomposed tensor stored as a Kruskal operator

    >>> type(P)
    <class 'sktensor.ktensor.ktensor'>

    Factorization should be close to original data

    >>> np.allclose(T, P.totensor())

    .. [1] Kolda, T. G. & Bader, B. W.
           Tensor Decompositions and Applications.
           SIAM Rev. 51, 455–500 (2009).
    .. [2] Harshman, R. A.
           Foundations of the PARAFAC procedure: models and conditions for an 'explanatory' multimodal factor analysis.
           UCLA Working Papers in Phonetics 16, (1970).
    .. [3] Carroll, J. D.,  Chang, J. J.
           Analysis of individual differences in multidimensional scaling 
           via an N-way generalization of 'Eckart-Young' decomposition.
           Psychometrika 35, 283–319 (1970).
    .. [4] V. Sharan, G. Valiant
           Orthogonalized ALS: A Theoretically Principled Tensor Decomposition Algorithm for Practical Use 
           arXiv:1703.01804, 2017

    # init options
    ainit = kwargs.pop('init', _DEF_INIT)
    maxiter = kwargs.pop('max_iter', _DEF_MAXITER)
    fit_method = kwargs.pop('fit_method', _DEF_FIT_METHOD)
    conv = kwargs.pop('conv', _DEF_CONV)
    stop_orth = kwargs.pop('stop_orth', _DEF_STOP_ORTH)
    dtype = kwargs.pop('dtype', _DEF_TYPE)
    if not len(kwargs) == 0:
        raise ValueError('Unknown keywords (%s)' % (kwargs.keys()))

    N = X.ndim
    normX = norm(X)

    U = _init(ainit, X, N, rank, dtype)
    fit = 0
    exectimes = []
    for itr in range(maxiter):
        tic = time.clock()
        fitold = fit

        if itr != 0 and itr < stop_orth:

            t = U[0].shape
            dim_max = t[0] - 1
            for n in range(N):
                t = U[n].shape
                if (t[0] - 1) < dim_max:
                    dim_max = t[0] - 1

            for n in range(N):

                Q = U[n]
                t = Q.shape
                J = t[1]
                count = 0
                for i in range(J):

                    if LA.norm(Q[:, i] == 0):
                        count += 1
                        # _log.debug(
                        # 'Zero norm, mode %d' % (n)
                        # )
                        Q[:, i] = rand(t[0])
                        Q[:, i] = Q[:, i] / LA.norm(Q[:, i])
                        Q[:, i] = Q[:, i] / LA.norm(Q[:, i])
                    if i <= dim_max:
                        for j in range(i + 1, J):
                            Q[:, j] = Q[:, j] - 1 * np.inner(Q[:, j],
                                                             Q[:, i]) * Q[:, i]
                U[n] = Q

                _log.debug('Zero norm, mode %d, count %d' % (n, count))

        for n in range(N):
            Unew = X.uttkrp(U, n)
            Y = ones((rank, rank), dtype=dtype)
            for i in (list(range(n)) + list(range(n + 1, N))):
                Y = Y * dot(U[i].T, U[i])
            Unew = Unew.dot(pinv(Y))
            # Normalize
            if itr == 0:
                lmbda = sqrt((Unew**2).sum(axis=0))
                lmbda = Unew.max(axis=0)
                lmbda[lmbda < 1] = 1
            U[n] = Unew / lmbda

        P = ktensor(U, lmbda)
        if fit_method == 'full':
            normresidual = normX**2 + P.norm()**2 - 2 * P.innerprod(X)
            fit = 1 - (normresidual / normX**2)
            fit = itr
        fitchange = abs(fitold - fit)
        exectimes.append(time.clock() - tic)
        _log.debug('[%3d] fit: %.5f | delta: %7.1e | secs: %.5f' %
                   (itr, fit, fitchange, exectimes[-1]))
        if itr > 0 and fitchange < conv:

    return P, fit, itr, array(exectimes)
def hooi(X, rank=0, **kwargs):
    Compute Tucker decomposition of a tensor using Higher-Order Orthogonal

    X : tensor_mixin
        The tensor to be decomposed
    rank : array_like
        The rank of the decomposition for each mode of the tensor.
        The length of ``rank`` must match the number of modes of ``X``.
    init : {'random', 'nvecs'}, optional
        The initialization method to use.
            - random : Factor matrices are initialized randomly.
            - nvecs : Factor matrices are initialzed via HOSVD.
        default : 'nvecs'

    Create dense tensor

    >>> T = np.zeros((3, 4, 2))
    >>> T[:, :, 0] = [[ 1,  4,  7, 10], [ 2,  5,  8, 11], [3,  6,  9, 12]]
    >>> T[:, :, 1] = [[13, 16, 19, 22], [14, 17, 20, 23], [15, 18, 21, 24]]
    >>> T = dtensor(T)

    Compute Tucker decomposition of ``T`` with n-rank [2, 3, 1] via higher-order
    orthogonal iterations

    >>> Y = hooi(T, [2, 3, 1], init='nvecs')

    Shape of the core tensor matches n-rank of the decomposition.

    >>> Y['core'].shape
    (2, 3, 1)
    >>> Y['U'][1].shape
    (3, 2)

    .. [1] L. De Lathauwer, B. De Moor, J. Vandewalle: On the best rank-1 and
           rank-(R_1, R_2, \ldots, R_N) approximation of higher order tensors;
           IEEE Trans. Signal Process. 49 (2001), pp. 2262-2271
    # init options
    ainit = kwargs.pop('init', __DEF_INIT)
    maxIter = kwargs.pop('maxIter', __DEF_MAXITER)
    conv = kwargs.pop('conv', __DEF_CONV)
    dtype = kwargs.pop('dtype', X.dtype)
    if not len(kwargs) == 0:
        raise ValueError('Unknown keywords (%s)' % (list(kwargs.keys())))

    use_full_svd = False

    ndims = X.ndim
    if is_number(rank):
        if rank == 0:
            use_full_svd = True
            rank = rank * ones(ndims)

    if use_full_svd:
        core = X
        U = []
        for dimension in np.arange(ndims):
            # from tensor_hosvd.m of MATLAB tensor_toolkit (MTT) for rank 0 for all dimensions
            # http://www.sandia.gov/~tgkolda/TensorToolbox/
            M = tenmat(X, dimension).as_ndarray()
            U_dim, S, Vh = np.linalg.svd(M)
            core = ttm(core, U_dim.T, dimension, transp=True)

        normX = norm(X)

        U = __init(ainit, X, ndims, rank, dtype)
        fit = 0
        exectimes = []
        for itr in range(maxIter):
            tic = time.clock()
            fitold = fit

            for n in range(ndims):
                Utilde = ttm(X, U, n, transp=True, without=True)
                U[n] = nvecs(Utilde, n, rank[n])

            # compute core tensor to get fit
            core = ttm(Utilde, U, n, transp=True)

            # since factors are orthonormal, compute fit on core tensor
            normresidual = sqrt(normX**2 - norm(core)**2)

            # fraction explained by model
            fit = 1 - (normresidual / normX)
            fitchange = abs(fitold - fit)
            exectimes.append(time.clock() - tic)

            _log.debug('[%3d] fit: %.5f | delta: %7.1e | secs: %.5f' %
                       (itr, fit, fitchange, exectimes[-1]))
            if itr > 1 and fitchange < conv:
    return core, U