Exemple #1
0
    def reconstructLowRank(U, s, V, k):
        """
        Take the SVD of a low rank matrix and partially compute it with at most
        k values. If k is an array of values [0, U.shape[0]*V.shape[0]] then these
        indices are used for reconstruction.
        """
        (m, n) = (U.shape[0], V.shape[0])

        if type(k) == numpy.ndarray:
            inds = k
            inds = numpy.unique(inds)
            rowInds, colInds = numpy.unravel_index(inds, (m, n))
        elif type(k) == tuple:
            rowInds, colInds = k
        else:
            inds = numpy.random.randint(0, n * m, k)
            inds = numpy.unique(inds)
            rowInds, colInds = numpy.unravel_index(inds, (m, n))

        U = numpy.ascontiguousarray(U)
        V = numpy.ascontiguousarray(V)
        X = SparseUtilsCython.partialReconstructPQ((rowInds, colInds), U * s,
                                                   V)

        return X
Exemple #2
0
    def reconstructLowRankPQ(P, Q, inds):
        """
        Given an array of unique indices inds in [0, U.shape[0]*V.shape[0]-1],
        partially reconstruct $P*Q^T$. The returned matrix is a scipy csc_matrix.
        """
        (m, n) = (P.shape[0], Q.shape[0])

        if type(inds) == tuple:
            rowInds, colInds = inds
            rowInds = numpy.array(rowInds, numpy.int)
            colInds = numpy.array(colInds, numpy.int)
        else:
            rowInds, colInds = numpy.unravel_index(inds, (m, n))

        X = SparseUtilsCython.partialReconstructPQ((rowInds, colInds), P, Q)

        return X
Exemple #3
0
    def reconstructLowRankPQ(P, Q, inds):
        """
        Given an array of unique indices inds in [0, U.shape[0]*V.shape[0]-1],
        partially reconstruct $P*Q^T$. The returned matrix is a scipy csc_matrix.
        """
        (m, n) = (P.shape[0], Q.shape[0])

        if type(inds) == tuple:
            rowInds, colInds = inds
            rowInds = numpy.array(rowInds, numpy.int)
            colInds = numpy.array(colInds, numpy.int)
        else:
            rowInds, colInds = numpy.unravel_index(inds, (m, n))

        X = SparseUtilsCython.partialReconstructPQ((rowInds, colInds), P, Q)

        return X
Exemple #4
0
    def reconstructLowRank(U, s, V, k):
        """
        Take the SVD of a low rank matrix and partially compute it with at most
        k values. If k is an array of values [0, U.shape[0]*V.shape[0]] then these
        indices are used for reconstruction.
        """
        (m, n) = (U.shape[0], V.shape[0])

        if type(k) == numpy.ndarray:
            inds = k
            inds = numpy.unique(inds)
            rowInds, colInds = numpy.unravel_index(inds, (m, n))
        elif type(k) == tuple:
            rowInds, colInds = k
        else:
            inds = numpy.random.randint(0, n*m, k)
            inds = numpy.unique(inds)
            rowInds, colInds = numpy.unravel_index(inds, (m, n))

        U = numpy.ascontiguousarray(U)
        V = numpy.ascontiguousarray(V)
        X = SparseUtilsCython.partialReconstructPQ((rowInds, colInds), U*s, V)

        return X
Exemple #5
0
            def next(self):
                X = self.XIterator.next()
                logging.debug("Learning on matrix with shape: " +
                              str(X.shape) + " and " + str(X.nnz) +
                              " non-zeros")

                if self.iterativeSoftImpute.weighted:
                    #Compute row and col probabilities
                    up, vp = SparseUtils.nonzeroRowColsProbs(X)
                    nzuInds = up == 0
                    nzvInds = vp == 0
                    u = numpy.sqrt(1 / (up + numpy.array(nzuInds, numpy.int)))
                    v = numpy.sqrt(1 / (vp + numpy.array(nzvInds, numpy.int)))
                    u[nzuInds] = 0
                    v[nzvInds] = 0

                if self.rhos != None:
                    self.iterativeSoftImpute.setRho(self.rhos.next())

                if not scipy.sparse.isspmatrix_csc(X):
                    raise ValueError("X must be a csc_matrix not " +
                                     str(type(X)))

                #Figure out what lambda should be
                #PROPACK has problems with convergence
                Y = scipy.sparse.csc_matrix(X, dtype=numpy.float)
                U, s, V = ExpSU.SparseUtils.svdArpack(Y, 1, kmax=20)
                del Y
                #U, s, V = SparseUtils.svdPropack(X, 1, kmax=20)
                maxS = s[0]
                logging.debug("Largest singular value : " + str(maxS))

                (n, m) = X.shape

                if self.j == 0:
                    self.oldU = numpy.zeros((n, 1))
                    self.oldS = numpy.zeros(1)
                    self.oldV = numpy.zeros((m, 1))
                else:
                    oldN = self.oldU.shape[0]
                    oldM = self.oldV.shape[0]

                    if self.iterativeSoftImpute.updateAlg == "initial":
                        if n > oldN:
                            self.oldU = Util.extendArray(
                                self.oldU, (n, self.oldU.shape[1]))
                        elif n < oldN:
                            self.oldU = self.oldU[0:n, :]

                        if m > oldM:
                            self.oldV = Util.extendArray(
                                self.oldV, (m, self.oldV.shape[1]))
                        elif m < oldN:
                            self.oldV = self.oldV[0:m, :]
                    elif self.iterativeSoftImpute.updateAlg == "zero":
                        self.oldU = numpy.zeros((n, 1))
                        self.oldS = numpy.zeros(1)
                        self.oldV = numpy.zeros((m, 1))
                    else:
                        raise ValueError("Unknown SVD update algorithm: " +
                                         self.updateAlg)

                rowInds, colInds = X.nonzero()

                gamma = self.iterativeSoftImpute.eps + 1
                i = 0

                self.iterativeSoftImpute.measures = numpy.zeros(
                    (self.iterativeSoftImpute.maxIterations, 4))

                while gamma > self.iterativeSoftImpute.eps:
                    if i == self.iterativeSoftImpute.maxIterations:
                        logging.debug("Maximum number of iterations reached")
                        break

                    ZOmega = SparseUtilsCython.partialReconstructPQ(
                        (rowInds, colInds), self.oldU * self.oldS, self.oldV)
                    Y = X - ZOmega
                    #Y = Y.tocsc()
                    #del ZOmega
                    Y = csarray(Y, storagetype="row")
                    gc.collect()

                    #os.system('taskset -p 0xffffffff %d' % os.getpid())

                    if self.iterativeSoftImpute.svdAlg == "propack":
                        L = LinOperatorUtils.sparseLowRankOp(Y,
                                                             self.oldU,
                                                             self.oldS,
                                                             self.oldV,
                                                             parallel=False)
                        newU, newS, newV = SparseUtils.svdPropack(
                            L,
                            k=self.iterativeSoftImpute.k,
                            kmax=self.iterativeSoftImpute.kmax)
                    elif self.iterativeSoftImpute.svdAlg == "arpack":
                        L = LinOperatorUtils.sparseLowRankOp(Y,
                                                             self.oldU,
                                                             self.oldS,
                                                             self.oldV,
                                                             parallel=False)
                        newU, newS, newV = SparseUtils.svdArpack(
                            L,
                            k=self.iterativeSoftImpute.k,
                            kmax=self.iterativeSoftImpute.kmax)
                    elif self.iterativeSoftImpute.svdAlg == "svdUpdate":
                        newU, newS, newV = SVDUpdate.addSparseProjected(
                            self.oldU, self.oldS, self.oldV, Y,
                            self.iterativeSoftImpute.k)
                    elif self.iterativeSoftImpute.svdAlg == "rsvd":
                        L = LinOperatorUtils.sparseLowRankOp(Y,
                                                             self.oldU,
                                                             self.oldS,
                                                             self.oldV,
                                                             parallel=True)
                        newU, newS, newV = RandomisedSVD.svd(
                            L,
                            self.iterativeSoftImpute.k,
                            p=self.iterativeSoftImpute.p,
                            q=self.iterativeSoftImpute.q)
                    elif self.iterativeSoftImpute.svdAlg == "rsvdUpdate":
                        L = LinOperatorUtils.sparseLowRankOp(Y,
                                                             self.oldU,
                                                             self.oldS,
                                                             self.oldV,
                                                             parallel=True)
                        if self.j == 0:
                            newU, newS, newV = RandomisedSVD.svd(
                                L,
                                self.iterativeSoftImpute.k,
                                p=self.iterativeSoftImpute.p,
                                q=self.iterativeSoftImpute.q)
                        else:
                            newU, newS, newV = RandomisedSVD.svd(
                                L,
                                self.iterativeSoftImpute.k,
                                p=self.iterativeSoftImpute.p,
                                q=self.iterativeSoftImpute.qu,
                                omega=self.oldV)
                    elif self.iterativeSoftImpute.svdAlg == "rsvdUpdate2":

                        if self.j == 0:
                            L = LinOperatorUtils.sparseLowRankOp(Y,
                                                                 self.oldU,
                                                                 self.oldS,
                                                                 self.oldV,
                                                                 parallel=True)
                            newU, newS, newV = RandomisedSVD.svd(
                                L,
                                self.iterativeSoftImpute.k,
                                p=self.iterativeSoftImpute.p,
                                q=self.iterativeSoftImpute.q)
                        else:
                            #Need linear operator which is U s V
                            L = LinOperatorUtils.lowRankOp(
                                self.oldU, self.oldS, self.oldV)
                            Y = GeneralLinearOperator.asLinearOperator(
                                Y, parallel=True)
                            newU, newS, newV = RandomisedSVD.updateSvd(
                                L,
                                self.oldU,
                                self.oldS,
                                self.oldV,
                                Y,
                                self.iterativeSoftImpute.k,
                                p=self.iterativeSoftImpute.p)
                    else:
                        raise ValueError("Unknown SVD algorithm: " +
                                         self.iterativeSoftImpute.svdAlg)

                    if self.iterativeSoftImpute.weighted and i == 0:
                        delta = numpy.diag((u * newU.T).dot(newU))
                        pi = numpy.diag((v * newV.T).dot(newV))
                        lmbda = (maxS / numpy.max(
                            delta * pi)) * self.iterativeSoftImpute.rho
                        lmbdav = lmbda * delta * pi
                    elif not self.iterativeSoftImpute.weighted:
                        lmbda = maxS * self.iterativeSoftImpute.rho
                        if i == 0:
                            logging.debug("lambda: " + str(lmbda))
                        lmbdav = lmbda

                    newS = newS - lmbdav
                    #Soft threshold
                    newS = numpy.clip(newS, 0, numpy.max(newS))

                    normOldZ = (self.oldS**2).sum()
                    normNewZmOldZ = (self.oldS**2).sum() + (
                        newS**2).sum() - 2 * numpy.trace(
                            (self.oldV.T.dot(newV * newS)).dot(
                                newU.T.dot(self.oldU * self.oldS)))

                    #We can get newZ == oldZ in which case we break
                    if normNewZmOldZ < self.tol:
                        gamma = 0
                    elif abs(normOldZ) < self.tol:
                        gamma = self.iterativeSoftImpute.eps + 1
                    else:
                        gamma = normNewZmOldZ / normOldZ

                    if self.iterativeSoftImpute.verbose:
                        theta1 = (
                            self.iterativeSoftImpute.k -
                            numpy.linalg.norm(self.oldU.T.dot(newU), 'fro')**
                            2) / self.iterativeSoftImpute.k
                        theta2 = (
                            self.iterativeSoftImpute.k -
                            numpy.linalg.norm(self.oldV.T.dot(newV), 'fro')**
                            2) / self.iterativeSoftImpute.k
                        thetaS = numpy.linalg.norm(
                            newS - self.oldS)**2 / numpy.linalg.norm(newS)**2
                        self.iterativeSoftImpute.measures[i, :] = numpy.array(
                            [gamma, theta1, theta2, thetaS])

                    self.oldU = newU.copy()
                    self.oldS = newS.copy()
                    self.oldV = newV.copy()

                    logging.debug("Iteration " + str(i) + " gamma=" +
                                  str(gamma))
                    i += 1

                if self.iterativeSoftImpute.postProcess:
                    #Add the mean vectors
                    previousS = newS
                    newU = numpy.c_[newU, numpy.array(X.mean(1)).ravel()]
                    newV = numpy.c_[newV, numpy.array(X.mean(0)).ravel()]
                    newS = self.iterativeSoftImpute.unshrink(X, newU, newV)

                    #Note that this increases the rank of U and V by 1
                    #print("Difference in s after postprocessing: " + str(numpy.linalg.norm(previousS - newS[0:-1])))
                    logging.debug("Difference in s after postprocessing: " +
                                  str(numpy.linalg.norm(previousS -
                                                        newS[0:-1])))

                logging.debug("Number of iterations for rho=" +
                              str(self.iterativeSoftImpute.rho) + ": " +
                              str(i))
                self.j += 1
                return (newU, newS, newV)
Exemple #6
0
    def learnModel(self, X, fullMatrices=True):
        """
        Learn the matrix completion using a sparse matrix X. This is the simple 
        version of the soft impute algorithm in which we store the entire 
        matrices, newZ and oldZ. 
        """
        if not scipy.sparse.isspmatrix_csc(X):
            raise ValueError("Input matrix must be csc_matrix")
            
        (n, m) = X.shape
        oldU = numpy.zeros((n, 1))
        oldS = numpy.zeros(1)
        oldV = numpy.zeros((m, 1))
        omega = X.nonzero()
        tol = 10**-6
        
        rowInds = numpy.array(omega[0], numpy.int)
        colInds = numpy.array(omega[1], numpy.int)
         
        ZList = []
        
        for rho in self.rhos:
            gamma = self.eps + 1
            i = 0
            
            Y = scipy.sparse.csc_matrix(X, dtype=numpy.float)
            U, s, V = ExpSU.SparseUtils.svdArpack(Y, 1, kmax=20)
            lmbda = rho*numpy.max(s)
            
            while gamma > self.eps:
                ZOmega = SparseUtilsCython.partialReconstructPQ((rowInds, colInds), oldU*oldS, oldV)
                Y = X - ZOmega
                Y = Y.tocsc()

                newU, newS, newV = ExpSU.SparseUtils.svdSparseLowRank(Y, oldU, oldS, oldV)
        
                #Soft threshold 
                newS = newS - lmbda
                newS = numpy.clip(newS, 0, numpy.max(newS))
                
                
                normOldZ = (oldS**2).sum()
                normNewZmOldZ = (oldS**2).sum() + (newS**2).sum() - 2*numpy.trace((oldV.T.dot(newV*newS)).dot(newU.T.dot(oldU*oldS)))
                
                #We can get newZ == oldZ in which case we break
                if normNewZmOldZ < tol: 
                    gamma = 0
                elif abs(normOldZ) < tol:
                    gamma = self.eps + 1 
                else: 
                    gamma = normNewZmOldZ/normOldZ
                
                oldU = newU.copy() 
                oldS = newS.copy() 
                oldV = newV.copy() 
                
                logging.debug("Iteration " + str(i) + " gamma="+str(gamma)) 
                i += 1 
                
            logging.debug("Number of iterations for lambda="+str(rho) + ": " + str(i))
            
            if fullMatrices: 
                newZ = scipy.sparse.lil_matrix((newU*newS).dot(newV.T))
                ZList.append(newZ)
            else: 
                ZList.append((newU,newS,newV))
        
        if self.rhos.shape[0] != 1:
            return ZList
        else:
            return ZList[0]
            def next(self):
                X = self.XIterator.next()
                logging.debug("Learning on matrix with shape: " + str(X.shape) + " and " + str(X.nnz) + " non-zeros")    
                
                if self.iterativeSoftImpute.weighted: 
                    #Compute row and col probabilities 
                    up, vp = SparseUtils.nonzeroRowColsProbs(X)
                    nzuInds = up==0
                    nzvInds = vp==0
                    u = numpy.sqrt(1/(up + numpy.array(nzuInds, numpy.int))) 
                    v = numpy.sqrt(1/(vp + numpy.array(nzvInds, numpy.int)))
                    u[nzuInds] = 0 
                    v[nzvInds] = 0 
                
                if self.rhos != None: 
                    self.iterativeSoftImpute.setRho(self.rhos.next())

                if not scipy.sparse.isspmatrix_csc(X):
                    raise ValueError("X must be a csc_matrix not " + str(type(X)))
                    
                #Figure out what lambda should be 
                #PROPACK has problems with convergence 
                Y = scipy.sparse.csc_matrix(X, dtype=numpy.float)
                U, s, V = ExpSU.SparseUtils.svdArpack(Y, 1, kmax=20)
                del Y
                #U, s, V = SparseUtils.svdPropack(X, 1, kmax=20)
                maxS = s[0]
                logging.debug("Largest singular value : " + str(maxS))

                (n, m) = X.shape

                if self.j == 0:
                    self.oldU = numpy.zeros((n, 1))
                    self.oldS = numpy.zeros(1)
                    self.oldV = numpy.zeros((m, 1))
                else:
                    oldN = self.oldU.shape[0]
                    oldM = self.oldV.shape[0]

                    if self.iterativeSoftImpute.updateAlg == "initial":
                        if n > oldN:
                            self.oldU = Util.extendArray(self.oldU, (n, self.oldU.shape[1]))
                        elif n < oldN:
                            self.oldU = self.oldU[0:n, :]

                        if m > oldM:
                            self.oldV = Util.extendArray(self.oldV, (m, self.oldV.shape[1]))
                        elif m < oldN:
                            self.oldV = self.oldV[0:m, :]
                    elif self.iterativeSoftImpute.updateAlg == "zero":
                        self.oldU = numpy.zeros((n, 1))
                        self.oldS = numpy.zeros(1)
                        self.oldV = numpy.zeros((m, 1))
                    else:
                        raise ValueError("Unknown SVD update algorithm: " + self.updateAlg)

                rowInds, colInds = X.nonzero()

                gamma = self.iterativeSoftImpute.eps + 1
                i = 0

                self.iterativeSoftImpute.measures = numpy.zeros((self.iterativeSoftImpute.maxIterations, 4))

                while gamma > self.iterativeSoftImpute.eps:
                    if i == self.iterativeSoftImpute.maxIterations: 
                        logging.debug("Maximum number of iterations reached")
                        break 
                    
                    ZOmega = SparseUtilsCython.partialReconstructPQ((rowInds, colInds), self.oldU*self.oldS, self.oldV)
                    Y = X - ZOmega
                    #Y = Y.tocsc()
                    #del ZOmega
                    Y = csarray(Y, storagetype="row")
                    gc.collect()
                    
                    #os.system('taskset -p 0xffffffff %d' % os.getpid())

                    if self.iterativeSoftImpute.svdAlg=="propack":
                        L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=False)                        
                        newU, newS, newV = SparseUtils.svdPropack(L, k=self.iterativeSoftImpute.k, kmax=self.iterativeSoftImpute.kmax)
                    elif self.iterativeSoftImpute.svdAlg=="arpack":
                        L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=False)                        
                        newU, newS, newV = SparseUtils.svdArpack(L, k=self.iterativeSoftImpute.k, kmax=self.iterativeSoftImpute.kmax)
                    elif self.iterativeSoftImpute.svdAlg=="svdUpdate":
                        newU, newS, newV = SVDUpdate.addSparseProjected(self.oldU, self.oldS, self.oldV, Y, self.iterativeSoftImpute.k)
                    elif self.iterativeSoftImpute.svdAlg=="rsvd":
                        L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=True)
                        newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.q)
                    elif self.iterativeSoftImpute.svdAlg=="rsvdUpdate": 
                        L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=True)
                        if self.j == 0: 
                            newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.q)
                        else: 
                            newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.qu, omega=self.oldV)
                    elif self.iterativeSoftImpute.svdAlg=="rsvdUpdate2":
                        
                        if self.j == 0: 
                            L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=True)
                            newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.q)
                        else: 
                            #Need linear operator which is U s V 
                            L = LinOperatorUtils.lowRankOp(self.oldU, self.oldS, self.oldV)
                            Y = GeneralLinearOperator.asLinearOperator(Y, parallel=True)
                            newU, newS, newV = RandomisedSVD.updateSvd(L, self.oldU, self.oldS, self.oldV, Y, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p)
                    else:
                        raise ValueError("Unknown SVD algorithm: " + self.iterativeSoftImpute.svdAlg)

                    if self.iterativeSoftImpute.weighted and i==0: 
                        delta = numpy.diag((u*newU.T).dot(newU))
                        pi = numpy.diag((v*newV.T).dot(newV))
                        lmbda = (maxS/numpy.max(delta*pi))*self.iterativeSoftImpute.rho
                        lmbdav = lmbda*delta*pi
                    elif not self.iterativeSoftImpute.weighted: 
                        lmbda = maxS*self.iterativeSoftImpute.rho
                        if i==0: 
                            logging.debug("lambda: " + str(lmbda))
                        lmbdav = lmbda
                        
                    newS = newS - lmbdav                    
                    #Soft threshold
                    newS = numpy.clip(newS, 0, numpy.max(newS))
                    

                    normOldZ = (self.oldS**2).sum()
                    normNewZmOldZ = (self.oldS**2).sum() + (newS**2).sum() - 2*numpy.trace((self.oldV.T.dot(newV*newS)).dot(newU.T.dot(self.oldU*self.oldS)))

                    #We can get newZ == oldZ in which case we break
                    if normNewZmOldZ < self.tol:
                        gamma = 0
                    elif abs(normOldZ) < self.tol:
                        gamma = self.iterativeSoftImpute.eps + 1
                    else:
                        gamma = normNewZmOldZ/normOldZ
                        
                    if self.iterativeSoftImpute.verbose: 
                        theta1 = (self.iterativeSoftImpute.k - numpy.linalg.norm(self.oldU.T.dot(newU), 'fro')**2)/self.iterativeSoftImpute.k
                        theta2 = (self.iterativeSoftImpute.k - numpy.linalg.norm(self.oldV.T.dot(newV), 'fro')**2)/self.iterativeSoftImpute.k
                        thetaS = numpy.linalg.norm(newS - self.oldS)**2/numpy.linalg.norm(newS)**2
                        self.iterativeSoftImpute.measures[i, :] = numpy.array([gamma, theta1, theta2, thetaS])

                    self.oldU = newU.copy()
                    self.oldS = newS.copy()
                    self.oldV = newV.copy()

                    logging.debug("Iteration " + str(i) + " gamma="+str(gamma))
                    i += 1

                if self.iterativeSoftImpute.postProcess: 
                    #Add the mean vectors 
                    previousS = newS
                    newU = numpy.c_[newU, numpy.array(X.mean(1)).ravel()]
                    newV = numpy.c_[newV, numpy.array(X.mean(0)).ravel()]
                    newS = self.iterativeSoftImpute.unshrink(X, newU, newV)  
                    
                    #Note that this increases the rank of U and V by 1 
                    #print("Difference in s after postprocessing: " + str(numpy.linalg.norm(previousS - newS[0:-1]))) 
                    logging.debug("Difference in s after postprocessing: " + str(numpy.linalg.norm(previousS - newS[0:-1]))) 

                logging.debug("Number of iterations for rho="+str(self.iterativeSoftImpute.rho) + ": " + str(i))
                self.j += 1
                return (newU, newS, newV)