Ejemplo n.º 1
0
def generateSolution(sz, R, AFill, LambdaHat):
    A = []
    for n in range(len(sz)):
        A.append(np.zeros((sz[n], R)))
        for r in range(R):
            # randomly select some entries to be nonzero
            nnz = random.sample(
                range(sz[n]), AFill[n]
            )  #selects AFill[n] elements among the array range(sz(n))
            A[n][nnz, r] = np.random.random(size=AFill[n])
            # percentage of large size
            bigSamp = int(0.1 * sz[n])
            if bigSamp > AFill[n]:
                bigSamp = 1
            big = random.sample(nnz, bigSamp)
            A[n][big, r] = 10 * A[n][big, r]
    lmbda = np.random.random_integers(low=1, high=1, size=R)
    M = ktensor.ktensor(lmbda, A)
    M.normalize_sort(1)
    ## generate the noise bias
    U = []
    for n in range(len(sz)):
        U.append(np.zeros((sz[n], 1)))
        U[n][:, 0] = np.random.random(size=sz[n])
    Mhat = ktensor.ktensor(np.array([1]), U)
    Mhat.normalize(1)
    Mhat.lmbda[0] = LambdaHat
    return M, Mhat
Ejemplo n.º 2
0
def generateSolution(sz, R, AFill, LambdaHat):
	A = []
	for n in range(len(sz)):
		A.append(np.zeros((sz[n], R)))
		for r in range(R):
			# randomly select some entries to be nonzero
			nnz = random.sample(range(sz[n]), AFill[n]) #selects AFill[n] elements among the array range(sz(n))
			A[n][nnz, r] = np.random.random(size=AFill[n])
			# percentage of large size
			bigSamp = int (0.1*sz[n])
			if bigSamp > AFill[n]:
				bigSamp = 1
			big = random.sample(nnz, bigSamp)
			A[n][big, r] = 10 * A[n][big, r]
	lmbda = np.random.random_integers(low = 1, high = 1, size=R)
	M = ktensor.ktensor(lmbda, A)
	M.normalize_sort(1)
	## generate the noise bias
	U = []
	for n in range(len(sz)):
		U.append(np.zeros((sz[n], 1)))
		U[n][:, 0] = np.random.random(size=sz[n])
	Mhat = ktensor.ktensor(np.array([1]), U)
	Mhat.normalize(1)
	Mhat.lmbda[0] = LambdaHat
	return M, Mhat
Ejemplo n.º 3
0
def generateOriginalTensor(L, A, U, tensorModes, alpha):
    MFull = []
    ## for each set of modes, we will construct both M and MHat
    for k in range(len(tensorModes)):
        Alist = [A[n] for n in tensorModes[k]]
        Ulist = [U[n] for n in tensorModes[k]]
        M = ktensor.ktensor(L, Alist)
        Mhat = ktensor.ktensor(np.array([alpha]), Ulist)
        MFull.append(M.toTensor() + Mhat.toTensor())
    return MFull
Ejemplo n.º 4
0
def generateOriginalTensor(L, A, U, tensorModes, alpha):
	MFull = []
	## for each set of modes, we will construct both M and MHat
	for k in range(len(tensorModes)):
		Alist = [A[n] for n in tensorModes[k]]
		Ulist = [U[n] for n in tensorModes[k]]
		M = ktensor.ktensor(L, Alist)
		Mhat = ktensor.ktensor(np.array([alpha]), Ulist)
		MFull.append(M.toTensor() + Mhat.toTensor())
	return MFull
Ejemplo n.º 5
0
def useHier(topX, regX, R, hierIters, hierInner, regIters, regInner,
            tensorInfo):
    topY1, top1stats, top1mstats = CP_APR.cp_apr(topX,
                                                 R,
                                                 maxiters=hierIters,
                                                 maxinner=hierInner)
    # reduce them to probability and then just sort them
    topY1.normalize_sort(1)
    topY1 = pmdTools.zeroSmallFactors(topY1, 1e-4)
    ### Use the factors to populate the factors
    Udiag = np.zeros((len(tensorInfo['diag']), R))
    Umed = np.zeros((len(tensorInfo['med']), R))
    ### Patient factors stays the same
    for idx, diag in enumerate(tensorInfo['diag']):
        topDiagIdx = tensorInfo['diagHier'][diag]
        diagCount = tensorInfo['diagHierCount'][topDiagIdx]
        Udiag[idx, :] = topY1.U[1][topDiagIdx, :] / diagCount
    for idx, med in enumerate(tensorInfo['med']):
        topMedIdx = tensorInfo['medHier'][med]
        medCount = tensorInfo['medHierCount'][topMedIdx]
        Umed[idx, :] = topY1.U[2][topMedIdx, :] / medCount
    Mtop = ktensor.ktensor(np.ones(R), [topY1.U[0].copy(), Udiag, Umed])
    Y1, ystats, mstats = CP_APR.cp_apr(X1,
                                       R,
                                       Minit=Mtop,
                                       maxiters=regIters,
                                       maxinner=regInner)
    return Y1, topY1, top1stats, top1mstats, ystats, mstats
Ejemplo n.º 6
0
 def initialize(self, M=None):
     """
     Initialize the tensor decomposition
     """
     if M == None:
         AU = tensorTools.randomInit(self.X.shape, 1)
         F = tensorTools.randomInit(self.X.shape, self.R)
         self.M[REG_LOCATION] = ktensor.ktensor(np.ones(self.R), F)
         self.M[AUG_LOCATION] = ktensor.ktensor(np.ones(1), AU)
     else:
         ## do a quick sanity check
         if len(M) != 2:
             raise ValueError("Initialization needs to be of size 2")
         if M[0].__class__ != ktensor.ktensor and M[1].__class__ != ktensor.ktensor:
             raise ValueError("Not ktensor type")
         self.M = M
Ejemplo n.º 7
0
 def projectSlice(self, X, n, iters=100, epsilon=1e-10, convTol=1e-4):
     """ 
     Project a slice, solving for the factors of the nth mode
     
     Parameters
     ------------
     X : the tensor to project onto the basis
     n : the mode to project onto
     iters : the max number of inner iterations
     epsilon : parameter to avoid dividing by zero
     convTol : the convergence tolerance
     
     Output
     -----------
     the projection matrix
     """
     ## Setup the 'initial guess'
     F = []
     for m in range(X.ndims()):
         if m == n:
             F.append(np.random.rand(X.shape[m], self.R))
         else:
             ## double check the shape is the right dimensions
             if (self.basis[m].shape[0] != X.shape[m]):
                 raise ValueError("Shape of the tensor X is incorrect")
             F.append(self.basis[m])
     #print(F)
     M = ktensor.ktensor(np.ones(self.R), F)
     #print(M)
     ## Solve for the subproblem
     M, Phi, totIter, kktMV = CP_APR.solveForModeB(X, M, n, iters, epsilon,
                                                   convTol)
     #print(M)
     ## scale by summing across the rows
     totWeight = np.sum(M.U[n], axis=1)
     print totWeight.shape
     zeroIdx = np.where(totWeight < 1e-100)[0]
     if len(zeroIdx) > 0:
         # for the zero ones we're going to evenly distribute
         evenDist = np.repeat(1.0 / self.R, len(zeroIdx) * self.R)
         M.U[n][zeroIdx, :] = evenDist.reshape((len(zeroIdx), self.R))
         totWeight = np.sum(M.U[n], axis=1)
     twMat = np.repeat(totWeight, self.R).reshape(X.shape[n], self.R)
     M.U[n] = M.U[n] / twMat
     #print(M)
     return M.U[n]
Ejemplo n.º 8
0
def useHier(topX, regX, R, hierIters, hierInner, regIters, regInner, tensorInfo):
    topY1, top1stats, top1mstats = CP_APR.cp_apr(topX, R, maxiters=hierIters, maxinner=hierInner)
    # reduce them to probability and then just sort them
    topY1.normalize_sort(1)
    topY1 = pmdTools.zeroSmallFactors(topY1, 1e-4)
    ### Use the factors to populate the factors
    Udiag = np.zeros((len(tensorInfo['diag']), R))
    Umed = np.zeros((len(tensorInfo['med']), R))
    ### Patient factors stays the same
    for idx, diag in enumerate(tensorInfo['diag']):
        topDiagIdx = tensorInfo['diagHier'][diag]
        diagCount = tensorInfo['diagHierCount'][topDiagIdx]
        Udiag[idx,:] = topY1.U[1][topDiagIdx,:] / diagCount
    for idx, med in enumerate(tensorInfo['med']):
        topMedIdx = tensorInfo['medHier'][med]
        medCount = tensorInfo['medHierCount'][topMedIdx]        
        Umed[idx,:] = topY1.U[2][topMedIdx,:] / medCount
    Mtop = ktensor.ktensor(np.ones(R), [topY1.U[0].copy(), Udiag, Umed])
    Y1, ystats, mstats = CP_APR.cp_apr(X1, R, Minit=Mtop, maxiters=regIters, maxinner=regInner)
    return Y1, topY1, top1stats, top1mstats, ystats, mstats
Ejemplo n.º 9
0
 def projectSlice(self, X, n, iters=10, epsilon=1e-10, convTol=1e-4):
     """ 
     Project a slice, solving for the factors of the nth mode
     
     Parameters
     ------------
     X : the tensor to project onto the basis
     n : the mode to project onto
     iters : the max number of inner iterations
     epsilon : parameter to avoid dividing by zero
     convTol : the convergence tolerance
     
     Output
     -----------
     the projection matrix
     """
     ## Setup the 'initial guess'
     F = []
     for m in range(X.ndims()):
         if m == n:
             F.append(np.random.rand(X.shape[m], self.R));
         else:
             ## double check the shape is the right dimensions
             if (self.basis[m].shape[0] != X.shape[m]):
                 raise ValueError("Shape of the tensor X is incorrect");
             F.append(self.basis[m])
     M = ktensor.ktensor(np.ones(self.R), F);
     ## Solve for the subproblem
     M, Phi, totIter, kktMV = CP_APR.solveForModeB(X, M, n, iters, epsilon, convTol)
     ## scale by summing across the rows
     totWeight = np.sum(M.U[n], axis=1)
     zeroIdx = np.where(totWeight < 1e-100)[0]
     if len(zeroIdx) > 0:
         # for the zero ones we're going to evenly distribute
         evenDist = np.repeat(1.0 / self.R, len(zeroIdx)*self.R)
         M.U[n][zeroIdx, :] = evenDist.reshape((len(zeroIdx), self.R))
         totWeight = np.sum(M.U[n], axis=1)
     twMat = np.repeat(totWeight, self.R).reshape(X.shape[n], self.R)
     M.U[n] = M.U[n] / twMat
     return M.U[n]
Ejemplo n.º 10
0
import ktensor
import numpy as np

R = 4
A = ktensor.ktensor(
    np.ones(R),
    [np.random.rand(5, R),
     np.random.rand(5, R),
     np.random.rand(2, R)])
B = ktensor.ktensor(
    np.ones(R),
    [np.random.rand(5, R),
     np.random.rand(5, R),
     np.random.rand(2, R)])

rawFMS = A.fms(B)
topFMS = A.top_fms(B, 2)
greedFMS = A.greedy_fms(B)

print rawFMS, topFMS, greedFMS

np.random.seed(10)
A = ktensor.ktensor(
    np.ones(R),
    [np.random.randn(5, R),
     np.random.randn(5, R),
     np.random.randn(2, R)])
A.U = [np.multiply((A.U[n] > 0).astype(int), A.U[n]) for n in range(A.ndims())]
B = ktensor.ktensor(
    np.ones(R),
    [np.random.randn(5, R),
Ejemplo n.º 11
0
def cp_apr(X,
           R,
           Minit=None,
           tol=1e-4,
           maxiters=1000,
           maxinner=10,
           epsilon=1e-10,
           kappatol=1e-10,
           kappa=1e-2):
    """ 
    Compute nonnegative CP with alternative Poisson regression.
    Code is the python implementation of cp_apr in the MATLAB Tensor Toolbox 
    
    Parameters
    ----------
    X : input tensor of the class tensor or sptensor
    R : the rank of the CP
    Minit : the initial guess (in the form of a ktensor), if None random guess
    tol : tolerance on the inner KKT violation
    maxiters : maximum number of iterations
    maxinner : maximum number of inner iterations
    epsilon : parameter to avoid dividing by zero
    kappatol : tolerance on complementary slackness
    kappa : offset to fix complementary slackness

    Returns
    -------
    M : the CP model as a ktensor
    cpStats: the statistics for each inner iteration
    modelStats: a dictionary item with the final statistics for this tensor factorization
    """
    N = X.ndims()

    ## Random initialization
    if Minit == None:
        F = tensorTools.randomInit(X.shape, R)
        Minit = ktensor.ktensor(np.ones(R), F)

    nInnerIters = np.zeros(maxiters)

    ## Initialize M and Phi for iterations
    M = Minit
    M.normalize(1)
    Phi = [[] for i in range(N)]
    kktModeViolations = np.zeros(N)
    kktViolations = -np.ones(maxiters)
    nViolations = np.zeros(maxiters)

    ## statistics
    cpStats = np.zeros(7)

    for iteration in range(maxiters):
        startIter = time.time()
        isConverged = True
        for n in range(N):
            startMode = time.time()
            ## Make adjustments to M[n] entries that violate complementary slackness
            if iteration > 0:
                V = np.logical_and(Phi[n] > 1, M.U[n] < kappatol)
                if np.count_nonzero(V) > 0:
                    nViolations[iteration] = nViolations[iteration] + 1
                    M.U[n][V > 0] = M.U[n][V > 0] + kappa
            M, Phi[n], inner, kktModeViolations[
                n], isConverged = __solveSubproblem(X, M, n, maxinner,
                                                    isConverged, epsilon, tol)
            #print '****************************************'
            #print M.U[0][1,:]
            #print M.U[0].shape
            #print '****************************************'
            elapsed = time.time() - startMode
            # only write the outer iterations for now
            #cpStats = np.vstack((cpStats, np.array([iteration, n, inner, tensorTools.lsqrFit(X,M), tensorTools.loglikelihood(X,[M]), kktModeViolations[n], elapsed])))

        kktViolations[iteration] = np.max(kktModeViolations)
        elapsed = time.time() - startIter
        #cpStats = np.vstack((cpStats, np.array([iter, -1, -1, kktViolations[iter], __loglikelihood(X,M), elapsed])))
        print(
            "Iteration {0}: Inner Its={1} with KKT violation={2}, nViolations={3}, and elapsed time={4}"
            .format(iteration, nInnerIters[iteration],
                    kktViolations[iteration], nViolations[iteration], elapsed))
        if isConverged:
            break

    cpStats = np.delete(cpStats, (0),
                        axis=0)  # delete the first row which was superfluous
    ### Print the statistics
    fit = tensorTools.lsqrFit(X, M)
    ll = tensorTools.loglikelihood(X, [M])
    print("Number of iterations = {0}".format(iteration))
    print("Final least squares fit = {0}".format(fit))
    print("Final log-likelihood = {0}".format(ll))
    print("Final KKT Violation = {0}".format(kktViolations[iteration]))
    print("Total inner iterations = {0}".format(np.sum(nInnerIters)))

    modelStats = {
        "Iters": iter,
        "LS": fit,
        "LL": ll,
        "KKT": kktViolations[iteration]
    }
    return M, cpStats, modelStats
Ejemplo n.º 12
0
def als(X, rank, **kwargs):
    """
    Alternating least-sqaures algorithm to compute the CP decomposition.

    Parameters
    ----------
    X : tensor_mixin
        The tensor to be decomposed.
    rank : int
        Tensor rank of the decomposition.
    init : {'random', 'nvecs'}, optional
        The initialization method to use.
            - random : Factor matrices are initialized randomly.
            - nvecs : Factor matrices are initialzed via HOSVD.
        (default 'nvecs')
    max_iter : int, optional
        Maximium number of iterations of the ALS algorithm.
        (default 500)
    fit_method : {'full', None}
        The method to compute the fit of the factorization
            - 'full' : Compute least-squares fit of the dense approximation of.
                       X and X.
            - None : Do not compute the fit of the factorization, but iterate
                     until ``max_iter`` (Useful for large-scale tensors).
        (default 'full')
    conv : float
        Convergence tolerance on difference of fit between iterations
        (default 1e-5)

    Returns
    -------
    P : ktensor
        Rank ``rank`` factorization of X. ``P.U[i]`` corresponds to the factor
        matrix for the i-th mode. ``P.lambda[i]`` corresponds to the weight
        of the i-th mode.
    fit : float
        Fit of the factorization compared to ``X``
    itr : int
        Number of iterations that were needed until convergence
    exectimes : ndarray of floats
        Time needed for each single iteration

    Examples
    --------
    Create random dense tensor

    >>> from sktensor import dtensor, ktensor
    >>> U = [np.random.rand(i,3) for i in (20, 10, 14)]
    >>> T = dtensor(ktensor(U).toarray())

    Compute rank-3 CP decomposition of ``T`` with ALS

    >>> P, fit, itr, _ = als(T, 3)

    Result is a decomposed tensor stored as a Kruskal operator

    >>> type(P)
    <class 'sktensor.ktensor.ktensor'>

    Factorization should be close to original data

    >>> np.allclose(T, P.totensor())
    True

    References
    ----------
    .. [1] Kolda, T. G. & Bader, B. W.
           Tensor Decompositions and Applications.
           SIAM Rev. 51, 455–500 (2009).
    .. [2] Harshman, R. A.
           Foundations of the PARAFAC procedure: models and conditions for an 'explanatory' multimodal factor analysis.
           UCLA Working Papers in Phonetics 16, (1970).
    .. [3] Carroll, J. D.,  Chang, J. J.
           Analysis of individual differences in multidimensional scaling via an N-way generalization of 'Eckart-Young' decomposition.
           Psychometrika 35, 283–319 (1970).
    """

    # init options
    ainit = kwargs.pop('init', _DEF_INIT)
    maxiter = kwargs.pop('max_iter', _DEF_MAXITER)
    fit_method = kwargs.pop('fit_method', _DEF_FIT_METHOD)
    conv = kwargs.pop('conv', _DEF_CONV)
    dtype = kwargs.pop('dtype', _DEF_TYPE)
    if not len(kwargs) == 0:
        raise ValueError('Unknown keywords (%s)' % (kwargs.keys()))

    N = X.ndim
    normX = norm(X)

    U = _init(ainit, X, N, rank, dtype)
    fit = 0
    exectimes = []
    for itr in range(maxiter):
        tic = time.clock()
        fitold = fit

        for n in range(N):
            Unew = X.uttkrp(U, n)
            Y = ones((rank, rank), dtype=dtype)
            for i in (list(range(n)) + list(range(n + 1, N))):
                Y = Y * dot(U[i].T, U[i])
            Unew = Unew.dot(pinv(Y))
            # Normalize
            if itr == 0:
                lmbda = sqrt((Unew**2).sum(axis=0))
            else:
                lmbda = Unew.max(axis=0)
                lmbda[lmbda < 1] = 1
            U[n] = Unew / lmbda

        P = ktensor(U, lmbda)
        if fit_method == 'full':
            normresidual = normX**2 + P.norm()**2 - 2 * P.innerprod(X)
            fit = 1 - (normresidual / normX**2)
        else:
            fit = itr
        fitchange = abs(fitold - fit)
        exectimes.append(time.clock() - tic)
        _log.debug('[%3d] fit: %.5f | delta: %7.1e | secs: %.5f' %
                   (itr, fit, fitchange, exectimes[-1]))
        if itr > 0 and fitchange < conv:
            break

    return P, fit, itr, array(exectimes)
Ejemplo n.º 13
0
def cp_apr(X, R, Minit=None, tol=1e-4, maxiters=1000, maxinner=10, 
           epsilon=1e-10, kappatol=1e-10, kappa=1e-2):
    """ 
    Compute nonnegative CP with alternative Poisson regression.
    Code is the python implementation of cp_apr in the MATLAB Tensor Toolbox 
    
    Parameters
    ----------
    X : input tensor of the class tensor or sptensor
    R : the rank of the CP
    Minit : the initial guess (in the form of a ktensor), if None random guess
    tol : tolerance on the inner KKT violation
    maxiters : maximum number of iterations
    maxinner : maximum number of inner iterations
    epsilon : parameter to avoid dividing by zero
    kappatol : tolerance on complementary slackness
    kappa : offset to fix complementary slackness

    Returns
    -------
    M : the CP model as a ktensor
    cpStats: the statistics for each inner iteration
    modelStats: a dictionary item with the final statistics for this tensor factorization
    """
    N = X.ndims()
     
    ## Random initialization
    if Minit == None:
        F = tensorTools.randomInit(X.shape, R)
        Minit = ktensor.ktensor(np.ones(R), F);
        
    nInnerIters = np.zeros(maxiters);
    
    ## Initialize M and Phi for iterations
    M = Minit
    M.normalize(1)
    Phi = [[] for i in range(N)]
    kktModeViolations = np.zeros(N)
    kktViolations = -np.ones(maxiters)
    nViolations = np.zeros(maxiters)
    
    ## statistics
    cpStats = np.zeros(7)
    
    for iteration in range(maxiters):
        startIter = time.time()
        isConverged = True;
        for n in range(N):
            startMode = time.time()
            ## Make adjustments to M[n] entries that violate complementary slackness
            if iteration > 0:
                V = np.logical_and(Phi[n] > 1, M.U[n] < kappatol)
                if np.count_nonzero(V) > 0:
                    nViolations[iteration] = nViolations[iteration] + 1
                    M.U[n][V > 0] = M.U[n][V > 0] + kappa
            M, Phi[n], inner, kktModeViolations[n], isConverged = __solveSubproblem(X, M, n, maxinner, isConverged, epsilon, tol)
            elapsed = time.time() - startMode
            # only write the outer iterations for now
            cpStats = np.vstack((cpStats, np.array([iteration, n, inner, tensorTools.lsqrFit(X,M), tensorTools.loglikelihood(X,[M]), kktModeViolations[n], elapsed])))

        kktViolations[iteration] = np.max(kktModeViolations);
        elapsed = time.time()-startIter
        #cpStats = np.vstack((cpStats, np.array([iter, -1, -1, kktViolations[iter], __loglikelihood(X,M), elapsed])))
        print("Iteration {0}: Inner Its={1} with KKT violation={2}, nViolations={3}, and elapsed time={4}".format(iteration, nInnerIters[iteration], kktViolations[iteration], nViolations[iteration], elapsed));
        if isConverged:
            break;
    
    cpStats = np.delete(cpStats, (0), axis=0) # delete the first row which was superfluous
    ### Print the statistics
    fit = tensorTools.lsqrFit(X,M)
    ll = tensorTools.loglikelihood(X,[M])
    print("Number of iterations = {0}".format(iteration))
    print("Final least squares fit = {0}".format(fit))
    print("Final log-likelihood = {0}".format(ll))
    print("Final KKT Violation = {0}".format(kktViolations[iteration]))
    print("Total inner iterations = {0}".format(np.sum(nInnerIters)))
    
    modelStats = {"Iters" : iter, "LS" : fit, "LL" : ll, "KKT" : kktViolations[iteration]}
    return M, cpStats, modelStats
Ejemplo n.º 14
0
def als(X, rank, dtype=np.float, **kwargs):
    """
    Alternating least-sqaures algorithm to compute the CP decomposition.

    Parameters
    ----------
    X : tensor_mixin
        The tensor to be decomposed.
    rank : int
        Tensor rank of the decomposition.
    init : {'random', 'nvecs'}, optional
        The initialization method to use.
            - random : Factor matrices are initialized randomly.
            - nvecs : Factor matrices are initialzed via HOSVD.
        (default 'nvecs')
    max_iter : int, optional
        Maximium number of iterations of the ALS algorithm.
        (default 500)
    fit_method : {'full', None}
        The method to compute the fit of the factorization
            - 'full' : Compute least-squares fit of the dense approximation of.
                       X and X.
            - None : Do not compute the fit of the factorization, but iterate
                     until ``max_iter`` (Useful for large-scale tensors).
        (default 'full')
    conv : float
        Convergence tolerance on difference of fit between iterations
        (default 1e-5)

    Returns
    -------
    P : ktensor
        Rank ``rank`` factorization of X. ``P.U[i]`` corresponds to the factor
        matrix for the i-th mode. ``P.lambda[i]`` corresponds to the weight
        of the i-th mode.
    fit : float
        Fit of the factorization compared to ``X``
    itr : int
        Number of iterations that were needed until convergence
    exectimes : ndarray of floats
        Time needed for each single iteration

    Examples
    --------
    Create random dense tensor

    >>> from sktensor import dtensor
    >>> U = [np.random.rand(i,3) for i in (20, 10, 14)]
    >>> T = dtensor(ktensor(U).toarray())

    Compute rank-3 CP decomposition of ``T`` with ALS

    >>> P, fit, itr, _ = als(T, 3)

    Result is a decomposed tensor stored as a Kruskal operator

    >>> type(P)
    <class 'sktensor.ktensor.ktensor'>

    Factorization should be close to original data

    >>> np.allclose(T, P.totensor())
    True

    References
    ----------
    .. [1] Kolda, T. G. & Bader, B. W.
           Tensor Decompositions and Applications.
           SIAM Rev. 51, 455–500 (2009).
    .. [2] Harshman, R. A.
           Foundations of the PARAFAC procedure: models and conditions for an 'explanatory' multimodal factor analysis.
           UCLA Working Papers in Phonetics 16, (1970).
    .. [3] Carroll, J. D.,  Chang, J. J.
           Analysis of individual differences in multidimensional scaling via an N-way generalization of 'Eckart-Young' decomposition.
           Psychometrika 35, 283–319 (1970).
    """
    N = len(X.shape)
    normX = norm(X)

    # init options
    ainit = kwargs.pop('init', __DEF_INIT)
    maxiter = kwargs.pop('maxIter', __DEF_MAXITER)
    fit_method = kwargs.pop('fit_method', __DEF_FIT_METHOD)
    conv = kwargs.pop('conv', __DEF_CONV)
    if not len(kwargs) == 0:
        raise ValueError('Unknown keywords (%s)' % (kwargs.keys()))

    U = __init(ainit, X, N, rank, dtype)
    fit = 0
    exectimes = []
    for itr in xrange(maxiter):
        tic = time.clock()
        fitold = fit

        for n in range(N):
            Unew = X.uttkrp(U, n)
            Y = ones((rank, rank), dtype=dtype)
            for i in (range(n) + range(n + 1, N)):
                Y = Y * dot(U[i].T, U[i])
            Unew = Unew.dot(pinv(Y))
            # Normalize
            if itr == 0:
                lmbda = sqrt((Unew ** 2).sum(axis=0))
            else:
                lmbda = Unew.max(axis=0)
                lmbda[lmbda < 1] = 1
            U[n] = Unew / lmbda

        P = ktensor(U, lmbda)
        if fit_method == 'full':
            normresidual = normX ** 2 + P.norm() ** 2 - 2 * P.innerprod(X)
            fit = 1 - (normresidual / normX ** 2)
        else:
            fit = itr
        fitchange = abs(fitold - fit)
        exectimes.append(time.clock() - tic)
        _log.debug(
            '[%3d] fit: %.5f | delta: %7.1e | secs: %.5f' %
            (itr, fit, fitchange, exectimes[-1])
        )
        if itr > 0 and fitchange < conv:
            break

    return P, fit, itr, array(exectimes)
Ejemplo n.º 15
0
def cp_als(X, R, tol=1e-4, maxiters=50):
    """ 
    Compute an estimate of the best rank-R CP model of a tensor X using an alternating
    least-squares algorithm. The fit is defined as 1 - norm(X - full(P))/norm(X) and is
    loosely the proportion of data described by the CP model.
    
    Parameters
    ----------
    X - input tensor of the class tensor or sptensor
    R - the rank of the CP

    Returns
    -------
    out : the CP model as a ktensor
    """
    N = X.ndims()
    # number of dimensions
    normX = X.norm()
    # norm
    Uinit = []

    # for initialization we ignore the first one
    Uinit.append(None)
    for idx in np.arange(1, N):
        Uinit.append(np.random.rand(X.shape[idx], R))

    ## Setup for iterations, initializing U and the fit
    U = Uinit
    fit = 0

    for iter in range(maxiters):
        fitold = fit
        # iterate over all the range
        for n in np.arange(N):
            # Calculate Unew = X_(n) * khatrirao(all U except n, 'r').
            Unew = X.mttkrp(U, n)

            # Compute the linear system coefficients
            Y = np.ones((R, R))
            for i in np.concatenate((np.arange(0, n), np.arange(n + 1, N))):
                Y = np.multiply(Y, np.dot(U[i].transpose(), U[i]))

            Unew = np.linalg.solve(Y, Unew.transpose()).transpose()

            # Normalize each vector to prevent singularities
            if iter == 0:
                lmda = np.sqrt(np.sum(np.square(Unew), axis=0))
            else:
                lmda = Unew.max(axis=0)

            U[n] = Unew * sparse.spdiags(1 / lmda, 0, R, R, format='csr')

        P = ktensor.ktensor(lmda, U)
        normresidual = np.sqrt(
            np.square(normX) + np.square(P.norm()) - 2 * P.innerprod(X))
        fit = 1 - (normresidual / normX)
        # fraction of residual explained by model
        fitchange = abs(fitold - fit)
        print("Iteration {0}: fit={1} with delta={2}".format(
            iter, fit, fitchange))
        if iter > 0 and fitchange < tol:
            break

    ## Clean up the final result by normalizing the tensor
    P.arrange()
    P.fixsigns()

    return P, normresidual
Ejemplo n.º 16
0
def cp_apr(X, Y1, R, Minit=None, tol=1e-4, maxiters=1000, maxinner=50,
           epsilon=1e-10, kappatol=1e-10, kappa=1e-2):
    """ 
    Compute nonnegative CP with alternative Poisson regression.
    Code is the python implementation of cp_apr in the MATLAB Tensor Toolbox 
    
    Parameters
    ----------
    X : input tensor of the class tensor or sptensor
    R : the rank of the CP
    lambta1 is the parameter of docomposition of demographic information
    lambta4 is the patameter of penalty item of demoU
    Minit : the initial guess (in the form of a ktensor), if None random guess
    tol : tolerance on the inner KKT violation
    maxiters : maximum number of iterations
    maxinner : maximum number of inner iterations
    epsilon : parameter to avoid dividing by zero
    kappatol : tolerance on complementary slackness
    kappa : offset to fix complementary slackness

    Returns
    -------
    M : the CP model as a ktensor
    cpStats: the statistics for each inner iteration
    modelStats: a dictionary item with the final statistics for this tensor factorization
    """
    N = X.ndims()
     
    ## Random initialization
    if Minit == None:
        F = tensorTools.randomInit(X.shape, R)
        Minit = ktensor.ktensor(np.ones(R), F);
    nInnerIters = np.zeros(maxiters);

    ## Initialize M and Phi for iterations
    M = Minit
    M.normalize(1)
    Phi = [[] for i in range(N)]
    kktModeViolations = np.zeros(N)
    kktViolations = -np.ones(maxiters)
    nViolations = np.zeros(maxiters)

    lambda2=1
    lambda3=1
    sita=np.random.rand(R+1,1);
    ## statistics
    cpStats = np.zeros(7)
    for iteration in range(maxiters):
        startIter = time.time()
        isConverged = True;
        for n in range(N):
            startMode = time.time()
            ## Make adjustments to M[n] entries that violate complementary slackness
            if iteration > 0:
                V = np.logical_and(Phi[n] > 1, M.U[n] < kappatol)
                if np.count_nonzero(V) > 0:
                    nViolations[iteration] = nViolations[iteration] + 1
                    print 'V:',V.shape,V.dtype
                    print 'M.U[n]',M.U[n].shape,M.U[n].dtype
                    M.U[n][V > 0] = M.U[n][V > 0] + kappa
            if n==0:
                sita=__solveLogis(M.U[n],Y1,200,epsilon,lambda2,lambda3,sita)
                M, Phi[n], inner, kktModeViolations[n], isConverged  = __solveSubproblem1(X, M, n, maxinner, isConverged, epsilon, tol,sita,Y1, lambda2)
            else:
                M, Phi[n], inner, kktModeViolations[n], isConverged  = __solveSubproblem0(X, M, n, maxinner, isConverged, epsilon, tol)
            elapsed = time.time() - startMode

        kktViolations[iteration] = np.max(kktModeViolations);
        elapsed = time.time()-startIter
        print("Iteration {0}: Inner Its={1} with KKT violation={2}, nViolations={3}, and elapsed time={4}".format(iteration, nInnerIters[iteration], kktViolations[iteration], nViolations[iteration], elapsed));
        if isConverged:
            break;

    cpStats = np.delete(cpStats, (0), axis=0) # delete the first row which was superfluous
    ### Print the statistics
    #fit = tensorTools.lsqrFit(X,M)
    #ll = tensorTools.loglikelihood(X,[M])
    print("Number of iterations = {0}".format(iteration))
    #print("Final least squares fit = {0}".format(fit))
    #print("Final log-likelihood = {0}".format(ll))
    print("Final KKT Violation = {0}".format(kktViolations[iteration]))
    print("Total inner iterations = {0}".format(np.sum(nInnerIters)))
    
    #modelStats = {"Iters" : iter, "LS" : fit, "LL" : ll, "KKT" : kktViolations[iteration]}
    return M, cpStats
Ejemplo n.º 17
0
 def __randomInitialization(shape, R):
     F = []
     for n in range(len(shape)):
         F.append(np.random.rand(shape[n], R))
     return(ktensor.ktensor(np.ones(R), F))
Ejemplo n.º 18
0
import CP_APR
import ktensor
""" 
Test file associated with the CP decomposition using APR
"""
""" Test factorization of sparse matrix """
subs = np.array([[0, 3, 1], [1, 0, 1], [1, 2, 1], [1, 3, 1], [3, 0, 0]])
vals = np.array([[1], [1], [1], [1], [3]])
siz = np.array([5, 5, 2])  # 5x5x2 tensor
X = sptensor.sptensor(subs, vals, siz)
U0 = np.array([[0.7689, 0.8843, 0.7487, 0.0900],
               [0.1673, 0.5880, 0.8256, 0.1117],
               [0.8620, 0.1548, 0.7900, 0.1363],
               [0.9899, 0.1999, 0.3185, 0.6787],
               [0.5144, 0.4070, 0.5341, 0.4952]])
U1 = np.array([[0.1897, 0.5606, 0.8790, 0.9900],
               [0.4950, 0.9296, 0.9889, 0.5277],
               [0.1476, 0.6967, 0.0006, 0.4795],
               [0.0550, 0.5828, 0.8654, 0.8013],
               [0.8507, 0.8154, 0.6126, 0.2278]])
U2 = np.array([[0.4981, 0.5747, 0.7386, 0.2467],
               [0.9009, 0.8452, 0.5860, 0.6664]])
Minit = ktensor.ktensor(np.ones(4), [U0, U1, U2])
fms = Minit.fms(Minit)

Y, cpstats, modelStats = CP_APR.cp_apr(X, 4, Minit=Minit, maxiters=100)
Y.normalize_sort(1)
""" Test factorization of regular matrix """
X = tensor.tensor(range(1, 25), [3, 4, 2])
print CP_APR.cp_apr(X, 4)
Ejemplo n.º 19
0
def cp_als(X, R, tol=1e-4, maxiters=50):
    """ 
    Compute an estimate of the best rank-R CP model of a tensor X using an alternating
    least-squares algorithm. The fit is defined as 1 - norm(X - full(P))/norm(X) and is
    loosely the proportion of data described by the CP model.
    
    Parameters
    ----------
    X - input tensor of the class tensor or sptensor
    R - the rank of the CP

    Returns
    -------
    out : the CP model as a ktensor
    """
    N = X.ndims();      # number of dimensions
    normX = X.norm();   # norm
    Uinit = [];
    
    # for initialization we ignore the first one
    Uinit.append(None);
    for idx in np.arange(1,N):
        Uinit.append(np.random.rand(X.shape[idx], R));
    
    ## Setup for iterations, initializing U and the fit
    U = Uinit;
    fit = 0;
    
    for iter in range(maxiters):
        fitold = fit;
        # iterate over all the range
        for n in np.arange(N):
            # Calculate Unew = X_(n) * khatrirao(all U except n, 'r').
            Unew = X.mttkrp(U,n);
            
            # Compute the linear system coefficients
            Y = np.ones((R, R));
            for i in np.concatenate((np.arange(0,n), np.arange(n+1, N))):
                Y = np.multiply(Y, np.dot(U[i].transpose(),U[i]));
            
            Unew = np.linalg.solve(Y, Unew.transpose()).transpose();
            
            # Normalize each vector to prevent singularities
            if iter == 0:
                lmda = np.sqrt(np.sum(np.square(Unew), axis=0));
            else:
                lmda = Unew.max(axis=0);
            
            U[n] = Unew * sparse.spdiags(1/lmda, 0, R, R, format='csr');
        
        P = ktensor.ktensor(lmda, U);
        normresidual = np.sqrt(np.square(normX) + np.square(P.norm()) - 2*P.innerprod(X));
        fit = 1 - (normresidual / normX);  # fraction of residual explained by model
        fitchange = abs(fitold - fit);
        print("Iteration {0}: fit={1} with delta={2}".format(iter, fit, fitchange));
        if iter > 0 and fitchange < tol:
            break;
    
    ## Clean up the final result by normalizing the tensor
    P.arrange();
    P.fixsigns();
    
    return P, normresidual;
Ejemplo n.º 20
0
import ktensor
import numpy as np

R = 4
A = ktensor.ktensor(np.ones(R), [np.random.rand(5,R), np.random.rand(5,R), np.random.rand(2,R)])
B = ktensor.ktensor(np.ones(R), [np.random.rand(5,R), np.random.rand(5,R), np.random.rand(2,R)])

rawFMS = A.fms(B)
topFMS = A.top_fms(B, 2)
greedFMS = A.greedy_fms(B)

print rawFMS, topFMS, greedFMS

np.random.seed(10)
A = ktensor.ktensor(np.ones(R), [np.random.randn(5,R), np.random.randn(5,R), np.random.randn(2,R)])
A.U = [np.multiply((A.U[n] > 0).astype(int), A.U[n])  for n in range(A.ndims())]
B = ktensor.ktensor(np.ones(R), [np.random.randn(5,R), np.random.randn(5,R), np.random.randn(2,R)])
B.U = [np.multiply((B.U[n] > 0).astype(int), B.U[n]) for n in range(B.ndims())]

rawFOS = A.fos(B)
topFOS = A.top_fos(B, 2)
greedFOS = A.greedy_fos(B)

print rawFOS, topFOS, greedFOS
Ejemplo n.º 21
0
import numpy as np;
import CP_APR
import ktensor
import KLProjection

""" 
Test file associated with the CP decomposition using APR
"""

""" Test factorization of sparse matrix """
subs = np.array([[0,3,1], [1,0,1], [1,2,1], [1,3,1], [3,0,0]]);
vals = np.array([[1],[1],[1],[1],[3]]);
siz = np.array([5,5,2]) # 5x5x2 tensor
X = sptensor.sptensor(subs, vals, siz)
U0 = np.array([[0.7689, 0.8843, 0.7487, 0.0900], [0.1673, 0.5880, 0.8256, 0.1117], [0.8620, 0.1548, 0.7900, 0.1363], [0.9899, 0.1999, 0.3185, 0.6787], [0.5144, 0.4070, 0.5341, 0.4952]])
U1 = np.array([[0.1897, 0.5606, 0.8790, 0.9900], [0.4950, 0.9296, 0.9889, 0.5277], [0.1476, 0.6967, 0.0006, 0.4795], [0.0550, 0.5828, 0.8654, 0.8013], [0.8507, 0.8154, 0.6126, 0.2278]])
U2 = np.array([[0.4981, 0.5747, 0.7386, 0.2467], [0.9009, 0.8452, 0.5860, 0.6664]])
Minit = ktensor.ktensor(np.ones(4), [U0, U1, U2])
fms = Minit.fms(Minit)

Y, cpstats, modelStats = CP_APR.cp_apr(X,4, Minit=Minit, maxiters=100);
Y.normalize_sort(1)

subs2 = np.array([[0,3,1], [1,2,0]])
vals2 = np.array([[1], [1]])
siz2 = np.array([2,5,2])
Xhat = sptensor.sptensor(subs2, vals2, siz2)

klproj = KLProjection.KLProjection(Y.U, 4)
np.random.seed(10)
klproj.projectSlice(Xhat, 0)
Ejemplo n.º 22
0
# load the sparse tensor information
subs = np.load(infile)
vals = np.load(infile)
siz = np.load(infile)
infile.close()
# now factor it
X = sptensor.sptensor(subs, vals, siz)
# Create a random initialization
N = X.ndims()
np.random.seed(0)
F = [];
for n in range(N):
    F.append(np.random.rand(X.shape[n], R))

Minit = ktensor.ktensor(np.ones(R), F)
Y, ystats, fmsStats, mstats = cp_apr(X, R, Minit=Minit, outputfile=outfile, maxiters=iter)

## automate the creation of the sql file
ystats = np.column_stack((np.repeat(modelID, ystats.shape[0]), ystats))
np.savetxt(statsFile, ystats, delimiter="|")

fmsStats = np.column_stack((np.repeat(modelID, fmsStats.shape[0]), fmsStats))
np.savetxt(fmsFile, fmsStats, delimiter="|")

sqlLoad = file(sqlLoadFile, "w")
for i in range(iter):
    dbFile = outfile.format(i)
    sqlLoad.write("load client from /home/joyceho/workspace/tensor/{0} of del modified by coldel| insert into joyceho.tensor_iter_factors;\n".format(dbFile))

sqlLoad.write("load client from /home/joyceho/workspace/tensor/{0} of del modified by coldel| insert into joyceho.tensor_iter_results;\n".format(statsFile))
Ejemplo n.º 23
0
print "Running Uniqueness Experiment with ID {0} and iterations {1}".format(exptID, maxIters)
modelOut = file(sqlOutfile, "w")

for i in range(totalIter):
    # initialize the seed for repeatability
    np.random.seed(seedArray[i])
    print "Random Start with seed {0}".format(seedArray[i])
    Y, ystats, mstats = decompTools.decomposeCountTensor(inputFile, R=R, outerIters=maxIters, innerIters=innerIters, zeroTol=1e-4)
    Y.writeRawFile(rawfilePattern.format(exptID,i))
    dbYFile = outfilePattern.format(exptID, i)
    dbOut = decompTools.getDBOutput(Y, yaxis)
    dbOut = np.column_stack((np.repeat(exptID, dbOut.shape[0]), np.repeat(i, dbOut.shape[0]), dbOut))
    dbOut = np.insert(dbOut, 4, np.repeat(-100, dbOut.shape[0]), axis=1)
    np.savetxt(dbYFile, dbOut, fmt="%s", delimiter="|")
    yFactor.append(ktensor.ktensor(Y.lmbda.copy(), [Y.U[n].copy() for n in range(Y.ndims())]))
    # write to the sequel file for ease
    modelOut.write("insert into joyceho.tensor_uniq_models values({0},{1},{2},\'{3}\',{4},{5},{6},{7},{8});\n".format(exptID, i, labelID, exptDesc, maxIters, innerIters, mstats['LS'], mstats['LL'], mstats['KKT']))
    modelOut.write("load client from /home/joyceho/workspace/tensor/{0} of del modified by coldel| insert into joyceho.tensor_uniq_results;\n".format(dbYFile))

## Calculate all the scores
def __generateInfo(n, exptID, type, method, i, k):
    info = np.tile(np.array([exptID, type, method, i, k], dtype="S20"), n)
    info = info.reshape((n, 5))
    return info

scoreResults = np.empty((1,9), dtype="S20")
for i in range(totalIter):
    for k in range(i+1, totalIter):
        A = yFactor[i]
        B = yFactor[k]