Example #1
0
 def testUpdateSvd(self): 
     """
     Let's see if the update to the SVD works. 
     """
     numRuns = 10        
     
     for i in range(numRuns): 
         m, n = numpy.random.randint(10, 100), numpy.random.randint(10, 100) 
         k = 3
         
         X = numpy.random.rand(m, n)
         
         U, s, V = RandomisedSVD.svd(X, k)
         
         E = numpy.random.randn(m, n) * 0.2 
         
         U2, s2, V2 = RandomisedSVD.svd(X + E, k)
         U3, s3, V3 = RandomisedSVD.updateSvd(X, U, s, V, E, k)
         
         XE = X + E
         error1 = numpy.linalg.norm(XE - (U*s).dot(V.T))
         error2 = numpy.linalg.norm(XE - (U2*s2).dot(V2.T))
         error3 = numpy.linalg.norm(XE - (U3*s3).dot(V3.T))
 
         self.assertTrue(error1 >= error3)
         #print(error1, error2, error3)
         
         #Test use of linear opertors 
         X = GeneralLinearOperator.asLinearOperator(X)
         E = GeneralLinearOperator.asLinearOperator(E) 
         
         U3, s3, V3 = RandomisedSVD.updateSvd(X, U, s, V, E, k)
         error4 = numpy.linalg.norm(XE - (U2*s2).dot(V2.T))
         self.assertEquals(error4, error2)
Example #2
0
            def next(self):
                X = self.XIterator.next()
                logging.debug("Learning on matrix with shape: " +
                              str(X.shape) + " and " + str(X.nnz) +
                              " non-zeros")

                if self.iterativeSoftImpute.weighted:
                    #Compute row and col probabilities
                    up, vp = SparseUtils.nonzeroRowColsProbs(X)
                    nzuInds = up == 0
                    nzvInds = vp == 0
                    u = numpy.sqrt(1 / (up + numpy.array(nzuInds, numpy.int)))
                    v = numpy.sqrt(1 / (vp + numpy.array(nzvInds, numpy.int)))
                    u[nzuInds] = 0
                    v[nzvInds] = 0

                if self.rhos != None:
                    self.iterativeSoftImpute.setRho(self.rhos.next())

                if not scipy.sparse.isspmatrix_csc(X):
                    raise ValueError("X must be a csc_matrix not " +
                                     str(type(X)))

                #Figure out what lambda should be
                #PROPACK has problems with convergence
                Y = scipy.sparse.csc_matrix(X, dtype=numpy.float)
                U, s, V = ExpSU.SparseUtils.svdArpack(Y, 1, kmax=20)
                del Y
                #U, s, V = SparseUtils.svdPropack(X, 1, kmax=20)
                maxS = s[0]
                logging.debug("Largest singular value : " + str(maxS))

                (n, m) = X.shape

                if self.j == 0:
                    self.oldU = numpy.zeros((n, 1))
                    self.oldS = numpy.zeros(1)
                    self.oldV = numpy.zeros((m, 1))
                else:
                    oldN = self.oldU.shape[0]
                    oldM = self.oldV.shape[0]

                    if self.iterativeSoftImpute.updateAlg == "initial":
                        if n > oldN:
                            self.oldU = Util.extendArray(
                                self.oldU, (n, self.oldU.shape[1]))
                        elif n < oldN:
                            self.oldU = self.oldU[0:n, :]

                        if m > oldM:
                            self.oldV = Util.extendArray(
                                self.oldV, (m, self.oldV.shape[1]))
                        elif m < oldN:
                            self.oldV = self.oldV[0:m, :]
                    elif self.iterativeSoftImpute.updateAlg == "zero":
                        self.oldU = numpy.zeros((n, 1))
                        self.oldS = numpy.zeros(1)
                        self.oldV = numpy.zeros((m, 1))
                    else:
                        raise ValueError("Unknown SVD update algorithm: " +
                                         self.updateAlg)

                rowInds, colInds = X.nonzero()

                gamma = self.iterativeSoftImpute.eps + 1
                i = 0

                self.iterativeSoftImpute.measures = numpy.zeros(
                    (self.iterativeSoftImpute.maxIterations, 4))

                while gamma > self.iterativeSoftImpute.eps:
                    if i == self.iterativeSoftImpute.maxIterations:
                        logging.debug("Maximum number of iterations reached")
                        break

                    ZOmega = SparseUtilsCython.partialReconstructPQ(
                        (rowInds, colInds), self.oldU * self.oldS, self.oldV)
                    Y = X - ZOmega
                    #Y = Y.tocsc()
                    #del ZOmega
                    Y = csarray(Y, storagetype="row")
                    gc.collect()

                    #os.system('taskset -p 0xffffffff %d' % os.getpid())

                    if self.iterativeSoftImpute.svdAlg == "propack":
                        L = LinOperatorUtils.sparseLowRankOp(Y,
                                                             self.oldU,
                                                             self.oldS,
                                                             self.oldV,
                                                             parallel=False)
                        newU, newS, newV = SparseUtils.svdPropack(
                            L,
                            k=self.iterativeSoftImpute.k,
                            kmax=self.iterativeSoftImpute.kmax)
                    elif self.iterativeSoftImpute.svdAlg == "arpack":
                        L = LinOperatorUtils.sparseLowRankOp(Y,
                                                             self.oldU,
                                                             self.oldS,
                                                             self.oldV,
                                                             parallel=False)
                        newU, newS, newV = SparseUtils.svdArpack(
                            L,
                            k=self.iterativeSoftImpute.k,
                            kmax=self.iterativeSoftImpute.kmax)
                    elif self.iterativeSoftImpute.svdAlg == "svdUpdate":
                        newU, newS, newV = SVDUpdate.addSparseProjected(
                            self.oldU, self.oldS, self.oldV, Y,
                            self.iterativeSoftImpute.k)
                    elif self.iterativeSoftImpute.svdAlg == "rsvd":
                        L = LinOperatorUtils.sparseLowRankOp(Y,
                                                             self.oldU,
                                                             self.oldS,
                                                             self.oldV,
                                                             parallel=True)
                        newU, newS, newV = RandomisedSVD.svd(
                            L,
                            self.iterativeSoftImpute.k,
                            p=self.iterativeSoftImpute.p,
                            q=self.iterativeSoftImpute.q)
                    elif self.iterativeSoftImpute.svdAlg == "rsvdUpdate":
                        L = LinOperatorUtils.sparseLowRankOp(Y,
                                                             self.oldU,
                                                             self.oldS,
                                                             self.oldV,
                                                             parallel=True)
                        if self.j == 0:
                            newU, newS, newV = RandomisedSVD.svd(
                                L,
                                self.iterativeSoftImpute.k,
                                p=self.iterativeSoftImpute.p,
                                q=self.iterativeSoftImpute.q)
                        else:
                            newU, newS, newV = RandomisedSVD.svd(
                                L,
                                self.iterativeSoftImpute.k,
                                p=self.iterativeSoftImpute.p,
                                q=self.iterativeSoftImpute.qu,
                                omega=self.oldV)
                    elif self.iterativeSoftImpute.svdAlg == "rsvdUpdate2":

                        if self.j == 0:
                            L = LinOperatorUtils.sparseLowRankOp(Y,
                                                                 self.oldU,
                                                                 self.oldS,
                                                                 self.oldV,
                                                                 parallel=True)
                            newU, newS, newV = RandomisedSVD.svd(
                                L,
                                self.iterativeSoftImpute.k,
                                p=self.iterativeSoftImpute.p,
                                q=self.iterativeSoftImpute.q)
                        else:
                            #Need linear operator which is U s V
                            L = LinOperatorUtils.lowRankOp(
                                self.oldU, self.oldS, self.oldV)
                            Y = GeneralLinearOperator.asLinearOperator(
                                Y, parallel=True)
                            newU, newS, newV = RandomisedSVD.updateSvd(
                                L,
                                self.oldU,
                                self.oldS,
                                self.oldV,
                                Y,
                                self.iterativeSoftImpute.k,
                                p=self.iterativeSoftImpute.p)
                    else:
                        raise ValueError("Unknown SVD algorithm: " +
                                         self.iterativeSoftImpute.svdAlg)

                    if self.iterativeSoftImpute.weighted and i == 0:
                        delta = numpy.diag((u * newU.T).dot(newU))
                        pi = numpy.diag((v * newV.T).dot(newV))
                        lmbda = (maxS / numpy.max(
                            delta * pi)) * self.iterativeSoftImpute.rho
                        lmbdav = lmbda * delta * pi
                    elif not self.iterativeSoftImpute.weighted:
                        lmbda = maxS * self.iterativeSoftImpute.rho
                        if i == 0:
                            logging.debug("lambda: " + str(lmbda))
                        lmbdav = lmbda

                    newS = newS - lmbdav
                    #Soft threshold
                    newS = numpy.clip(newS, 0, numpy.max(newS))

                    normOldZ = (self.oldS**2).sum()
                    normNewZmOldZ = (self.oldS**2).sum() + (
                        newS**2).sum() - 2 * numpy.trace(
                            (self.oldV.T.dot(newV * newS)).dot(
                                newU.T.dot(self.oldU * self.oldS)))

                    #We can get newZ == oldZ in which case we break
                    if normNewZmOldZ < self.tol:
                        gamma = 0
                    elif abs(normOldZ) < self.tol:
                        gamma = self.iterativeSoftImpute.eps + 1
                    else:
                        gamma = normNewZmOldZ / normOldZ

                    if self.iterativeSoftImpute.verbose:
                        theta1 = (
                            self.iterativeSoftImpute.k -
                            numpy.linalg.norm(self.oldU.T.dot(newU), 'fro')**
                            2) / self.iterativeSoftImpute.k
                        theta2 = (
                            self.iterativeSoftImpute.k -
                            numpy.linalg.norm(self.oldV.T.dot(newV), 'fro')**
                            2) / self.iterativeSoftImpute.k
                        thetaS = numpy.linalg.norm(
                            newS - self.oldS)**2 / numpy.linalg.norm(newS)**2
                        self.iterativeSoftImpute.measures[i, :] = numpy.array(
                            [gamma, theta1, theta2, thetaS])

                    self.oldU = newU.copy()
                    self.oldS = newS.copy()
                    self.oldV = newV.copy()

                    logging.debug("Iteration " + str(i) + " gamma=" +
                                  str(gamma))
                    i += 1

                if self.iterativeSoftImpute.postProcess:
                    #Add the mean vectors
                    previousS = newS
                    newU = numpy.c_[newU, numpy.array(X.mean(1)).ravel()]
                    newV = numpy.c_[newV, numpy.array(X.mean(0)).ravel()]
                    newS = self.iterativeSoftImpute.unshrink(X, newU, newV)

                    #Note that this increases the rank of U and V by 1
                    #print("Difference in s after postprocessing: " + str(numpy.linalg.norm(previousS - newS[0:-1])))
                    logging.debug("Difference in s after postprocessing: " +
                                  str(numpy.linalg.norm(previousS -
                                                        newS[0:-1])))

                logging.debug("Number of iterations for rho=" +
                              str(self.iterativeSoftImpute.rho) + ": " +
                              str(i))
                self.j += 1
                return (newU, newS, newV)
            def next(self):
                X = self.XIterator.next()
                logging.debug("Learning on matrix with shape: " + str(X.shape) + " and " + str(X.nnz) + " non-zeros")    
                
                if self.iterativeSoftImpute.weighted: 
                    #Compute row and col probabilities 
                    up, vp = SparseUtils.nonzeroRowColsProbs(X)
                    nzuInds = up==0
                    nzvInds = vp==0
                    u = numpy.sqrt(1/(up + numpy.array(nzuInds, numpy.int))) 
                    v = numpy.sqrt(1/(vp + numpy.array(nzvInds, numpy.int)))
                    u[nzuInds] = 0 
                    v[nzvInds] = 0 
                
                if self.rhos != None: 
                    self.iterativeSoftImpute.setRho(self.rhos.next())

                if not scipy.sparse.isspmatrix_csc(X):
                    raise ValueError("X must be a csc_matrix not " + str(type(X)))
                    
                #Figure out what lambda should be 
                #PROPACK has problems with convergence 
                Y = scipy.sparse.csc_matrix(X, dtype=numpy.float)
                U, s, V = ExpSU.SparseUtils.svdArpack(Y, 1, kmax=20)
                del Y
                #U, s, V = SparseUtils.svdPropack(X, 1, kmax=20)
                maxS = s[0]
                logging.debug("Largest singular value : " + str(maxS))

                (n, m) = X.shape

                if self.j == 0:
                    self.oldU = numpy.zeros((n, 1))
                    self.oldS = numpy.zeros(1)
                    self.oldV = numpy.zeros((m, 1))
                else:
                    oldN = self.oldU.shape[0]
                    oldM = self.oldV.shape[0]

                    if self.iterativeSoftImpute.updateAlg == "initial":
                        if n > oldN:
                            self.oldU = Util.extendArray(self.oldU, (n, self.oldU.shape[1]))
                        elif n < oldN:
                            self.oldU = self.oldU[0:n, :]

                        if m > oldM:
                            self.oldV = Util.extendArray(self.oldV, (m, self.oldV.shape[1]))
                        elif m < oldN:
                            self.oldV = self.oldV[0:m, :]
                    elif self.iterativeSoftImpute.updateAlg == "zero":
                        self.oldU = numpy.zeros((n, 1))
                        self.oldS = numpy.zeros(1)
                        self.oldV = numpy.zeros((m, 1))
                    else:
                        raise ValueError("Unknown SVD update algorithm: " + self.updateAlg)

                rowInds, colInds = X.nonzero()

                gamma = self.iterativeSoftImpute.eps + 1
                i = 0

                self.iterativeSoftImpute.measures = numpy.zeros((self.iterativeSoftImpute.maxIterations, 4))

                while gamma > self.iterativeSoftImpute.eps:
                    if i == self.iterativeSoftImpute.maxIterations: 
                        logging.debug("Maximum number of iterations reached")
                        break 
                    
                    ZOmega = SparseUtilsCython.partialReconstructPQ((rowInds, colInds), self.oldU*self.oldS, self.oldV)
                    Y = X - ZOmega
                    #Y = Y.tocsc()
                    #del ZOmega
                    Y = csarray(Y, storagetype="row")
                    gc.collect()
                    
                    #os.system('taskset -p 0xffffffff %d' % os.getpid())

                    if self.iterativeSoftImpute.svdAlg=="propack":
                        L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=False)                        
                        newU, newS, newV = SparseUtils.svdPropack(L, k=self.iterativeSoftImpute.k, kmax=self.iterativeSoftImpute.kmax)
                    elif self.iterativeSoftImpute.svdAlg=="arpack":
                        L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=False)                        
                        newU, newS, newV = SparseUtils.svdArpack(L, k=self.iterativeSoftImpute.k, kmax=self.iterativeSoftImpute.kmax)
                    elif self.iterativeSoftImpute.svdAlg=="svdUpdate":
                        newU, newS, newV = SVDUpdate.addSparseProjected(self.oldU, self.oldS, self.oldV, Y, self.iterativeSoftImpute.k)
                    elif self.iterativeSoftImpute.svdAlg=="rsvd":
                        L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=True)
                        newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.q)
                    elif self.iterativeSoftImpute.svdAlg=="rsvdUpdate": 
                        L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=True)
                        if self.j == 0: 
                            newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.q)
                        else: 
                            newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.qu, omega=self.oldV)
                    elif self.iterativeSoftImpute.svdAlg=="rsvdUpdate2":
                        
                        if self.j == 0: 
                            L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=True)
                            newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.q)
                        else: 
                            #Need linear operator which is U s V 
                            L = LinOperatorUtils.lowRankOp(self.oldU, self.oldS, self.oldV)
                            Y = GeneralLinearOperator.asLinearOperator(Y, parallel=True)
                            newU, newS, newV = RandomisedSVD.updateSvd(L, self.oldU, self.oldS, self.oldV, Y, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p)
                    else:
                        raise ValueError("Unknown SVD algorithm: " + self.iterativeSoftImpute.svdAlg)

                    if self.iterativeSoftImpute.weighted and i==0: 
                        delta = numpy.diag((u*newU.T).dot(newU))
                        pi = numpy.diag((v*newV.T).dot(newV))
                        lmbda = (maxS/numpy.max(delta*pi))*self.iterativeSoftImpute.rho
                        lmbdav = lmbda*delta*pi
                    elif not self.iterativeSoftImpute.weighted: 
                        lmbda = maxS*self.iterativeSoftImpute.rho
                        if i==0: 
                            logging.debug("lambda: " + str(lmbda))
                        lmbdav = lmbda
                        
                    newS = newS - lmbdav                    
                    #Soft threshold
                    newS = numpy.clip(newS, 0, numpy.max(newS))
                    

                    normOldZ = (self.oldS**2).sum()
                    normNewZmOldZ = (self.oldS**2).sum() + (newS**2).sum() - 2*numpy.trace((self.oldV.T.dot(newV*newS)).dot(newU.T.dot(self.oldU*self.oldS)))

                    #We can get newZ == oldZ in which case we break
                    if normNewZmOldZ < self.tol:
                        gamma = 0
                    elif abs(normOldZ) < self.tol:
                        gamma = self.iterativeSoftImpute.eps + 1
                    else:
                        gamma = normNewZmOldZ/normOldZ
                        
                    if self.iterativeSoftImpute.verbose: 
                        theta1 = (self.iterativeSoftImpute.k - numpy.linalg.norm(self.oldU.T.dot(newU), 'fro')**2)/self.iterativeSoftImpute.k
                        theta2 = (self.iterativeSoftImpute.k - numpy.linalg.norm(self.oldV.T.dot(newV), 'fro')**2)/self.iterativeSoftImpute.k
                        thetaS = numpy.linalg.norm(newS - self.oldS)**2/numpy.linalg.norm(newS)**2
                        self.iterativeSoftImpute.measures[i, :] = numpy.array([gamma, theta1, theta2, thetaS])

                    self.oldU = newU.copy()
                    self.oldS = newS.copy()
                    self.oldV = newV.copy()

                    logging.debug("Iteration " + str(i) + " gamma="+str(gamma))
                    i += 1

                if self.iterativeSoftImpute.postProcess: 
                    #Add the mean vectors 
                    previousS = newS
                    newU = numpy.c_[newU, numpy.array(X.mean(1)).ravel()]
                    newV = numpy.c_[newV, numpy.array(X.mean(0)).ravel()]
                    newS = self.iterativeSoftImpute.unshrink(X, newU, newV)  
                    
                    #Note that this increases the rank of U and V by 1 
                    #print("Difference in s after postprocessing: " + str(numpy.linalg.norm(previousS - newS[0:-1]))) 
                    logging.debug("Difference in s after postprocessing: " + str(numpy.linalg.norm(previousS - newS[0:-1]))) 

                logging.debug("Number of iterations for rho="+str(self.iterativeSoftImpute.rho) + ": " + str(i))
                self.j += 1
                return (newU, newS, newV)