예제 #1
0
    def _addSparseRSVD(U, s, V, X, k=10, kX=None, kRand=None, q=None):
        """
        Perform a randomised SVD of the matrix X + U diag(s) V.T. We use th
        """
        if kX==None:
            kX=k
        if kRand==None:
            kRand=k
        if q==None:
            q=1

        m, n = X.shape
        Us = U*s

        kX = numpy.min([m, n, kX])
        UX, sX, VX = SparseUtils.svdPropack(X, kX)
        omega = numpy.c_[V, VX, numpy.random.randn(n, kRand)]
        
        def rMultA(x):
            return Us.dot(V.T.dot(x)) + X.dot(x)
        def rMultAT(x):
            return V.dot(Us.T.dot(x)) + X.T.dot(x)
        
        Y = rMultA(omega)
        for i in range(q): 
            Y = rMultAT(Y)
            Y = rMultA(Y)
        
        Q, R = numpy.linalg.qr(Y)
        B = rMultAT(Q).T   
        U, s, VT = numpy.linalg.svd(B, full_matrices=False)
        U, s, V = Util.indSvd(U, s, VT, numpy.flipud(numpy.argsort(s))[:k])
        U = Q.dot(U)
        
        return U, s, V 
예제 #2
0
    def profilePropackSvd(self):
        dataDir = PathDefaults.getDataDir() + "erasm/contacts/"
        trainFilename = dataDir + "contacts_train"

        trainX = scipy.io.mmread(trainFilename)
        trainX = scipy.sparse.csc_matrix(trainX, dtype=numpy.int8)

        k = 500
        U, s, V = SparseUtils.svdPropack(trainX, k, kmax=k * 5)

        print(s)

        #Memory consumption is dependent on kmax
        print("All done")
예제 #3
0
    def profilePropackSvd(self):
        dataDir = PathDefaults.getDataDir() + "erasm/contacts/"
        trainFilename = dataDir + "contacts_train"

        trainX = scipy.io.mmread(trainFilename)
        trainX = scipy.sparse.csc_matrix(trainX, dtype=numpy.int8)

        k = 500
        U, s, V = SparseUtils.svdPropack(trainX, k, kmax=k * 5)

        print(s)

        # Memory consumption is dependent on kmax
        print("All done")
예제 #4
0
    def testSvdPropack(self): 
        shape = (500, 100)
        r = 5 
        k = 1000 

        X, U, s, V = SparseUtils.generateSparseLowRank(shape, r, k, verbose=True)                
        
        k2 = 10 
        U, s, V = SparseUtils.svdPropack(X, k2)

        U2, s2, V2 = numpy.linalg.svd(X.todense())
        V2 = V2.T

        nptst.assert_array_almost_equal(s, s2[0:k2])
        nptst.assert_array_almost_equal(numpy.abs(U), numpy.abs(U2[:, 0:k2]), 3)
        nptst.assert_array_almost_equal(numpy.abs(V), numpy.abs(V2[:, 0:k2]), 3)
예제 #5
0
    def testSvdPropack(self):
        shape = (500, 100)
        r = 5
        k = 1000

        X, U, s, V = SparseUtils.generateSparseLowRank(shape,
                                                       r,
                                                       k,
                                                       verbose=True)

        k2 = 10
        U, s, V = SparseUtils.svdPropack(X, k2)

        U2, s2, V2 = numpy.linalg.svd(X.todense())
        V2 = V2.T

        nptst.assert_array_almost_equal(s, s2[0:k2])
        nptst.assert_array_almost_equal(numpy.abs(U), numpy.abs(U2[:, 0:k2]),
                                        3)
        nptst.assert_array_almost_equal(numpy.abs(V), numpy.abs(V2[:, 0:k2]),
                                        3)
예제 #6
0
  def _addSparseProjected(U, s, V, X, k=10):
      kk = len(s)
      m, n = X.shape
      
      # decompose X as X1 X2.T   
      inds = scipy.unique(X.nonzero()[1])
      if len(inds) > 0:
          X1 = numpy.array(X[:,inds].todense())
          X2 = numpy.zeros((n, len(inds)))
          X2[(inds, numpy.arange(len(inds)))] = 1
      
      nptst.assert_array_almost_equal(X.todense(), X1.dot(X2.T))        
      
      # svd decomposition of projections of X1 and X2
      UTX1 = U.T.dot(X1)
      Q1, R1 = numpy.linalg.qr(X1-U.dot(UTX1))
      k1 = Q1.shape[1]
      VTX2 = V.T.dot(X2)
      Q2, R2 = numpy.linalg.qr(X2-V.dot(VTX2))
      k2 = Q2.shape[1]
      
      # construct W
      W = scipy.zeros((kk+k1, kk+k2))
      W[(numpy.arange(k), numpy.arange(k))] = s
      W[:kk,:kk] += UTX1.dot(VTX2.T)
      W[:kk,kk:] = UTX1.dot(R2.T)
      W[kk:,:kk] = R1.dot(VTX2.T)
      W[kk:,kk:] = R1.dot(R2.T)
      
      # svd of W
      W = scipy.sparse.csc_matrix(W)
      UW, sW, VW = SparseUtils.svdPropack(W, k)
      VWT = VW.T
 
      # reconstruct the correct decomposition
      Ures = numpy.c_[U, Q1].dot(UW)
      sres = sW
      Vres = numpy.c_[V, Q2].dot(VWT.T)
      
      return Ures, sres, Vres 
예제 #7
0
    def _addSparseProjected(U, s, V, X, k=10):
        kk = len(s)
        m, n = X.shape

        # decompose X as X1 X2.T
        inds = scipy.unique(X.nonzero()[1])
        if len(inds) > 0:
            X1 = numpy.array(X[:, inds].todense())
            X2 = numpy.zeros((n, len(inds)))
            X2[(inds, numpy.arange(len(inds)))] = 1

        nptst.assert_array_almost_equal(X.todense(), X1.dot(X2.T))

        # svd decomposition of projections of X1 and X2
        UTX1 = U.T.dot(X1)
        Q1, R1 = numpy.linalg.qr(X1 - U.dot(UTX1))
        k1 = Q1.shape[1]
        VTX2 = V.T.dot(X2)
        Q2, R2 = numpy.linalg.qr(X2 - V.dot(VTX2))
        k2 = Q2.shape[1]

        # construct W
        W = scipy.zeros((kk + k1, kk + k2))
        W[(numpy.arange(k), numpy.arange(k))] = s
        W[:kk, :kk] += UTX1.dot(VTX2.T)
        W[:kk, kk:] = UTX1.dot(R2.T)
        W[kk:, :kk] = R1.dot(VTX2.T)
        W[kk:, kk:] = R1.dot(R2.T)

        # svd of W
        W = scipy.sparse.csc_matrix(W)
        UW, sW, VW = SparseUtils.svdPropack(W, k)
        VWT = VW.T

        # reconstruct the correct decomposition
        Ures = numpy.c_[U, Q1].dot(UW)
        sres = sW
        Vres = numpy.c_[V, Q2].dot(VWT.T)

        return Ures, sres, Vres
예제 #8
0
    def _addSparseRSVD(U, s, V, X, k=10, kX=None, kRand=None, q=None):
        """
        Perform a randomised SVD of the matrix X + U diag(s) V.T. We use th
        """
        if kX == None:
            kX = k
        if kRand == None:
            kRand = k
        if q == None:
            q = 1

        m, n = X.shape
        Us = U * s

        kX = numpy.min([m, n, kX])
        UX, sX, VX = SparseUtils.svdPropack(X, kX)
        omega = numpy.c_[V, VX, numpy.random.randn(n, kRand)]

        def rMultA(x):
            return Us.dot(V.T.dot(x)) + X.dot(x)

        def rMultAT(x):
            return V.dot(Us.T.dot(x)) + X.T.dot(x)

        Y = rMultA(omega)
        for i in range(q):
            Y = rMultAT(Y)
            Y = rMultA(Y)

        Q, R = numpy.linalg.qr(Y)
        B = rMultAT(Q).T
        U, s, VT = numpy.linalg.svd(B, full_matrices=False)
        U, s, V = Util.indSvd(U, s, VT, numpy.flipud(numpy.argsort(s))[:k])
        U = Q.dot(U)

        return U, s, V
예제 #9
0
            def next(self):
                X = self.XIterator.next()
                logging.debug("Learning on matrix with shape: " +
                              str(X.shape) + " and " + str(X.nnz) +
                              " non-zeros")

                if self.iterativeSoftImpute.weighted:
                    #Compute row and col probabilities
                    up, vp = SparseUtils.nonzeroRowColsProbs(X)
                    nzuInds = up == 0
                    nzvInds = vp == 0
                    u = numpy.sqrt(1 / (up + numpy.array(nzuInds, numpy.int)))
                    v = numpy.sqrt(1 / (vp + numpy.array(nzvInds, numpy.int)))
                    u[nzuInds] = 0
                    v[nzvInds] = 0

                if self.rhos != None:
                    self.iterativeSoftImpute.setRho(self.rhos.next())

                if not scipy.sparse.isspmatrix_csc(X):
                    raise ValueError("X must be a csc_matrix not " +
                                     str(type(X)))

                #Figure out what lambda should be
                #PROPACK has problems with convergence
                Y = scipy.sparse.csc_matrix(X, dtype=numpy.float)
                U, s, V = ExpSU.SparseUtils.svdArpack(Y, 1, kmax=20)
                del Y
                #U, s, V = SparseUtils.svdPropack(X, 1, kmax=20)
                maxS = s[0]
                logging.debug("Largest singular value : " + str(maxS))

                (n, m) = X.shape

                if self.j == 0:
                    self.oldU = numpy.zeros((n, 1))
                    self.oldS = numpy.zeros(1)
                    self.oldV = numpy.zeros((m, 1))
                else:
                    oldN = self.oldU.shape[0]
                    oldM = self.oldV.shape[0]

                    if self.iterativeSoftImpute.updateAlg == "initial":
                        if n > oldN:
                            self.oldU = Util.extendArray(
                                self.oldU, (n, self.oldU.shape[1]))
                        elif n < oldN:
                            self.oldU = self.oldU[0:n, :]

                        if m > oldM:
                            self.oldV = Util.extendArray(
                                self.oldV, (m, self.oldV.shape[1]))
                        elif m < oldN:
                            self.oldV = self.oldV[0:m, :]
                    elif self.iterativeSoftImpute.updateAlg == "zero":
                        self.oldU = numpy.zeros((n, 1))
                        self.oldS = numpy.zeros(1)
                        self.oldV = numpy.zeros((m, 1))
                    else:
                        raise ValueError("Unknown SVD update algorithm: " +
                                         self.updateAlg)

                rowInds, colInds = X.nonzero()

                gamma = self.iterativeSoftImpute.eps + 1
                i = 0

                self.iterativeSoftImpute.measures = numpy.zeros(
                    (self.iterativeSoftImpute.maxIterations, 4))

                while gamma > self.iterativeSoftImpute.eps:
                    if i == self.iterativeSoftImpute.maxIterations:
                        logging.debug("Maximum number of iterations reached")
                        break

                    ZOmega = SparseUtilsCython.partialReconstructPQ(
                        (rowInds, colInds), self.oldU * self.oldS, self.oldV)
                    Y = X - ZOmega
                    #Y = Y.tocsc()
                    #del ZOmega
                    Y = csarray(Y, storagetype="row")
                    gc.collect()

                    #os.system('taskset -p 0xffffffff %d' % os.getpid())

                    if self.iterativeSoftImpute.svdAlg == "propack":
                        L = LinOperatorUtils.sparseLowRankOp(Y,
                                                             self.oldU,
                                                             self.oldS,
                                                             self.oldV,
                                                             parallel=False)
                        newU, newS, newV = SparseUtils.svdPropack(
                            L,
                            k=self.iterativeSoftImpute.k,
                            kmax=self.iterativeSoftImpute.kmax)
                    elif self.iterativeSoftImpute.svdAlg == "arpack":
                        L = LinOperatorUtils.sparseLowRankOp(Y,
                                                             self.oldU,
                                                             self.oldS,
                                                             self.oldV,
                                                             parallel=False)
                        newU, newS, newV = SparseUtils.svdArpack(
                            L,
                            k=self.iterativeSoftImpute.k,
                            kmax=self.iterativeSoftImpute.kmax)
                    elif self.iterativeSoftImpute.svdAlg == "svdUpdate":
                        newU, newS, newV = SVDUpdate.addSparseProjected(
                            self.oldU, self.oldS, self.oldV, Y,
                            self.iterativeSoftImpute.k)
                    elif self.iterativeSoftImpute.svdAlg == "rsvd":
                        L = LinOperatorUtils.sparseLowRankOp(Y,
                                                             self.oldU,
                                                             self.oldS,
                                                             self.oldV,
                                                             parallel=True)
                        newU, newS, newV = RandomisedSVD.svd(
                            L,
                            self.iterativeSoftImpute.k,
                            p=self.iterativeSoftImpute.p,
                            q=self.iterativeSoftImpute.q)
                    elif self.iterativeSoftImpute.svdAlg == "rsvdUpdate":
                        L = LinOperatorUtils.sparseLowRankOp(Y,
                                                             self.oldU,
                                                             self.oldS,
                                                             self.oldV,
                                                             parallel=True)
                        if self.j == 0:
                            newU, newS, newV = RandomisedSVD.svd(
                                L,
                                self.iterativeSoftImpute.k,
                                p=self.iterativeSoftImpute.p,
                                q=self.iterativeSoftImpute.q)
                        else:
                            newU, newS, newV = RandomisedSVD.svd(
                                L,
                                self.iterativeSoftImpute.k,
                                p=self.iterativeSoftImpute.p,
                                q=self.iterativeSoftImpute.qu,
                                omega=self.oldV)
                    elif self.iterativeSoftImpute.svdAlg == "rsvdUpdate2":

                        if self.j == 0:
                            L = LinOperatorUtils.sparseLowRankOp(Y,
                                                                 self.oldU,
                                                                 self.oldS,
                                                                 self.oldV,
                                                                 parallel=True)
                            newU, newS, newV = RandomisedSVD.svd(
                                L,
                                self.iterativeSoftImpute.k,
                                p=self.iterativeSoftImpute.p,
                                q=self.iterativeSoftImpute.q)
                        else:
                            #Need linear operator which is U s V
                            L = LinOperatorUtils.lowRankOp(
                                self.oldU, self.oldS, self.oldV)
                            Y = GeneralLinearOperator.asLinearOperator(
                                Y, parallel=True)
                            newU, newS, newV = RandomisedSVD.updateSvd(
                                L,
                                self.oldU,
                                self.oldS,
                                self.oldV,
                                Y,
                                self.iterativeSoftImpute.k,
                                p=self.iterativeSoftImpute.p)
                    else:
                        raise ValueError("Unknown SVD algorithm: " +
                                         self.iterativeSoftImpute.svdAlg)

                    if self.iterativeSoftImpute.weighted and i == 0:
                        delta = numpy.diag((u * newU.T).dot(newU))
                        pi = numpy.diag((v * newV.T).dot(newV))
                        lmbda = (maxS / numpy.max(
                            delta * pi)) * self.iterativeSoftImpute.rho
                        lmbdav = lmbda * delta * pi
                    elif not self.iterativeSoftImpute.weighted:
                        lmbda = maxS * self.iterativeSoftImpute.rho
                        if i == 0:
                            logging.debug("lambda: " + str(lmbda))
                        lmbdav = lmbda

                    newS = newS - lmbdav
                    #Soft threshold
                    newS = numpy.clip(newS, 0, numpy.max(newS))

                    normOldZ = (self.oldS**2).sum()
                    normNewZmOldZ = (self.oldS**2).sum() + (
                        newS**2).sum() - 2 * numpy.trace(
                            (self.oldV.T.dot(newV * newS)).dot(
                                newU.T.dot(self.oldU * self.oldS)))

                    #We can get newZ == oldZ in which case we break
                    if normNewZmOldZ < self.tol:
                        gamma = 0
                    elif abs(normOldZ) < self.tol:
                        gamma = self.iterativeSoftImpute.eps + 1
                    else:
                        gamma = normNewZmOldZ / normOldZ

                    if self.iterativeSoftImpute.verbose:
                        theta1 = (
                            self.iterativeSoftImpute.k -
                            numpy.linalg.norm(self.oldU.T.dot(newU), 'fro')**
                            2) / self.iterativeSoftImpute.k
                        theta2 = (
                            self.iterativeSoftImpute.k -
                            numpy.linalg.norm(self.oldV.T.dot(newV), 'fro')**
                            2) / self.iterativeSoftImpute.k
                        thetaS = numpy.linalg.norm(
                            newS - self.oldS)**2 / numpy.linalg.norm(newS)**2
                        self.iterativeSoftImpute.measures[i, :] = numpy.array(
                            [gamma, theta1, theta2, thetaS])

                    self.oldU = newU.copy()
                    self.oldS = newS.copy()
                    self.oldV = newV.copy()

                    logging.debug("Iteration " + str(i) + " gamma=" +
                                  str(gamma))
                    i += 1

                if self.iterativeSoftImpute.postProcess:
                    #Add the mean vectors
                    previousS = newS
                    newU = numpy.c_[newU, numpy.array(X.mean(1)).ravel()]
                    newV = numpy.c_[newV, numpy.array(X.mean(0)).ravel()]
                    newS = self.iterativeSoftImpute.unshrink(X, newU, newV)

                    #Note that this increases the rank of U and V by 1
                    #print("Difference in s after postprocessing: " + str(numpy.linalg.norm(previousS - newS[0:-1])))
                    logging.debug("Difference in s after postprocessing: " +
                                  str(numpy.linalg.norm(previousS -
                                                        newS[0:-1])))

                logging.debug("Number of iterations for rho=" +
                              str(self.iterativeSoftImpute.rho) + ": " +
                              str(i))
                self.j += 1
                return (newU, newS, newV)
예제 #10
0
            def next(self):
                X = self.XIterator.next()
                logging.debug("Learning on matrix with shape: " + str(X.shape) + " and " + str(X.nnz) + " non-zeros")    
                
                if self.iterativeSoftImpute.weighted: 
                    #Compute row and col probabilities 
                    up, vp = SparseUtils.nonzeroRowColsProbs(X)
                    nzuInds = up==0
                    nzvInds = vp==0
                    u = numpy.sqrt(1/(up + numpy.array(nzuInds, numpy.int))) 
                    v = numpy.sqrt(1/(vp + numpy.array(nzvInds, numpy.int)))
                    u[nzuInds] = 0 
                    v[nzvInds] = 0 
                
                if self.rhos != None: 
                    self.iterativeSoftImpute.setRho(self.rhos.next())

                if not scipy.sparse.isspmatrix_csc(X):
                    raise ValueError("X must be a csc_matrix not " + str(type(X)))
                    
                #Figure out what lambda should be 
                #PROPACK has problems with convergence 
                Y = scipy.sparse.csc_matrix(X, dtype=numpy.float)
                U, s, V = ExpSU.SparseUtils.svdArpack(Y, 1, kmax=20)
                del Y
                #U, s, V = SparseUtils.svdPropack(X, 1, kmax=20)
                maxS = s[0]
                logging.debug("Largest singular value : " + str(maxS))

                (n, m) = X.shape

                if self.j == 0:
                    self.oldU = numpy.zeros((n, 1))
                    self.oldS = numpy.zeros(1)
                    self.oldV = numpy.zeros((m, 1))
                else:
                    oldN = self.oldU.shape[0]
                    oldM = self.oldV.shape[0]

                    if self.iterativeSoftImpute.updateAlg == "initial":
                        if n > oldN:
                            self.oldU = Util.extendArray(self.oldU, (n, self.oldU.shape[1]))
                        elif n < oldN:
                            self.oldU = self.oldU[0:n, :]

                        if m > oldM:
                            self.oldV = Util.extendArray(self.oldV, (m, self.oldV.shape[1]))
                        elif m < oldN:
                            self.oldV = self.oldV[0:m, :]
                    elif self.iterativeSoftImpute.updateAlg == "zero":
                        self.oldU = numpy.zeros((n, 1))
                        self.oldS = numpy.zeros(1)
                        self.oldV = numpy.zeros((m, 1))
                    else:
                        raise ValueError("Unknown SVD update algorithm: " + self.updateAlg)

                rowInds, colInds = X.nonzero()

                gamma = self.iterativeSoftImpute.eps + 1
                i = 0

                self.iterativeSoftImpute.measures = numpy.zeros((self.iterativeSoftImpute.maxIterations, 4))

                while gamma > self.iterativeSoftImpute.eps:
                    if i == self.iterativeSoftImpute.maxIterations: 
                        logging.debug("Maximum number of iterations reached")
                        break 
                    
                    ZOmega = SparseUtilsCython.partialReconstructPQ((rowInds, colInds), self.oldU*self.oldS, self.oldV)
                    Y = X - ZOmega
                    #Y = Y.tocsc()
                    #del ZOmega
                    Y = csarray(Y, storagetype="row")
                    gc.collect()
                    
                    #os.system('taskset -p 0xffffffff %d' % os.getpid())

                    if self.iterativeSoftImpute.svdAlg=="propack":
                        L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=False)                        
                        newU, newS, newV = SparseUtils.svdPropack(L, k=self.iterativeSoftImpute.k, kmax=self.iterativeSoftImpute.kmax)
                    elif self.iterativeSoftImpute.svdAlg=="arpack":
                        L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=False)                        
                        newU, newS, newV = SparseUtils.svdArpack(L, k=self.iterativeSoftImpute.k, kmax=self.iterativeSoftImpute.kmax)
                    elif self.iterativeSoftImpute.svdAlg=="svdUpdate":
                        newU, newS, newV = SVDUpdate.addSparseProjected(self.oldU, self.oldS, self.oldV, Y, self.iterativeSoftImpute.k)
                    elif self.iterativeSoftImpute.svdAlg=="rsvd":
                        L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=True)
                        newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.q)
                    elif self.iterativeSoftImpute.svdAlg=="rsvdUpdate": 
                        L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=True)
                        if self.j == 0: 
                            newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.q)
                        else: 
                            newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.qu, omega=self.oldV)
                    elif self.iterativeSoftImpute.svdAlg=="rsvdUpdate2":
                        
                        if self.j == 0: 
                            L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=True)
                            newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.q)
                        else: 
                            #Need linear operator which is U s V 
                            L = LinOperatorUtils.lowRankOp(self.oldU, self.oldS, self.oldV)
                            Y = GeneralLinearOperator.asLinearOperator(Y, parallel=True)
                            newU, newS, newV = RandomisedSVD.updateSvd(L, self.oldU, self.oldS, self.oldV, Y, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p)
                    else:
                        raise ValueError("Unknown SVD algorithm: " + self.iterativeSoftImpute.svdAlg)

                    if self.iterativeSoftImpute.weighted and i==0: 
                        delta = numpy.diag((u*newU.T).dot(newU))
                        pi = numpy.diag((v*newV.T).dot(newV))
                        lmbda = (maxS/numpy.max(delta*pi))*self.iterativeSoftImpute.rho
                        lmbdav = lmbda*delta*pi
                    elif not self.iterativeSoftImpute.weighted: 
                        lmbda = maxS*self.iterativeSoftImpute.rho
                        if i==0: 
                            logging.debug("lambda: " + str(lmbda))
                        lmbdav = lmbda
                        
                    newS = newS - lmbdav                    
                    #Soft threshold
                    newS = numpy.clip(newS, 0, numpy.max(newS))
                    

                    normOldZ = (self.oldS**2).sum()
                    normNewZmOldZ = (self.oldS**2).sum() + (newS**2).sum() - 2*numpy.trace((self.oldV.T.dot(newV*newS)).dot(newU.T.dot(self.oldU*self.oldS)))

                    #We can get newZ == oldZ in which case we break
                    if normNewZmOldZ < self.tol:
                        gamma = 0
                    elif abs(normOldZ) < self.tol:
                        gamma = self.iterativeSoftImpute.eps + 1
                    else:
                        gamma = normNewZmOldZ/normOldZ
                        
                    if self.iterativeSoftImpute.verbose: 
                        theta1 = (self.iterativeSoftImpute.k - numpy.linalg.norm(self.oldU.T.dot(newU), 'fro')**2)/self.iterativeSoftImpute.k
                        theta2 = (self.iterativeSoftImpute.k - numpy.linalg.norm(self.oldV.T.dot(newV), 'fro')**2)/self.iterativeSoftImpute.k
                        thetaS = numpy.linalg.norm(newS - self.oldS)**2/numpy.linalg.norm(newS)**2
                        self.iterativeSoftImpute.measures[i, :] = numpy.array([gamma, theta1, theta2, thetaS])

                    self.oldU = newU.copy()
                    self.oldS = newS.copy()
                    self.oldV = newV.copy()

                    logging.debug("Iteration " + str(i) + " gamma="+str(gamma))
                    i += 1

                if self.iterativeSoftImpute.postProcess: 
                    #Add the mean vectors 
                    previousS = newS
                    newU = numpy.c_[newU, numpy.array(X.mean(1)).ravel()]
                    newV = numpy.c_[newV, numpy.array(X.mean(0)).ravel()]
                    newS = self.iterativeSoftImpute.unshrink(X, newU, newV)  
                    
                    #Note that this increases the rank of U and V by 1 
                    #print("Difference in s after postprocessing: " + str(numpy.linalg.norm(previousS - newS[0:-1]))) 
                    logging.debug("Difference in s after postprocessing: " + str(numpy.linalg.norm(previousS - newS[0:-1]))) 

                logging.debug("Number of iterations for rho="+str(self.iterativeSoftImpute.rho) + ": " + str(i))
                self.j += 1
                return (newU, newS, newV)
예제 #11
0
ps = [0] 
qs = [1, 2]

errors = []
times = []

for i, X in enumerate(trainIterator):
    print(i)
    if i == 10: 
        break 
    
    tempTimes = []
    tempErrors = []
    
    startTime = time.time()
    U, s, V = SparseUtils.svdPropack(X, k)
    tempTimes.append(time.time()-startTime)
    tempErrors.append(numpy.linalg.norm(numpy.array(X.todense()) - (U*s).dot(V.T))/numpy.linalg.norm(X.todense()))
    
    for p in ps: 
        for q in qs: 
            startTime = time.time()
            U2, s2, V2 = RandomisedSVD.svd(X, k, p, q)
            tempTimes.append(time.time()-startTime)
            tempErrors.append(numpy.linalg.norm(numpy.array(X.todense()) - (U2*s2).dot(V2.T))/numpy.linalg.norm(X.todense()) )
            
            startTime = time.time()
            if i == 0: 
                U3, s3, V3 = RandomisedSVD.svd(X, k, p, q)
            else: 
                U3, s3, V3 = RandomisedSVD.svd(X, k, p, q, omega=lastV)