예제 #1
0
    def testUncentre(self): 
        shape = (50, 10)
        r = 5 
        k = 100 

        X, U, s, V = SparseUtils.generateSparseLowRank(shape, r, k, verbose=True)   
        rowInds, colInds = X.nonzero()  
        
        Y = X.copy()

        inds = X.nonzero()
        X, mu1 = SparseUtils.centerRows(X)
        X, mu2 = SparseUtils.centerCols(X, inds=inds)   
        
        cX = X.copy()
        
        Y2 = SparseUtils.uncenter(X, mu1, mu2)
        
        nptst.assert_array_almost_equal(Y.todense(), Y2.todense(), 3)
        
        #We try softImpute on a centered matrix and check if the results are the same 
        lmbdas = numpy.array([0.1])
        softImpute = SoftImpute(lmbdas)
        
        Z = softImpute.learnModel(cX, fullMatrices=False)
        Z = softImpute.predict([Z], cX.nonzero())[0]
        
        error1 = MCEvaluator.rootMeanSqError(cX, Z)
        
        X = SparseUtils.uncenter(cX, mu1, mu2)
        Z2 = SparseUtils.uncenter(Z, mu1, mu2)
        
        error2 = MCEvaluator.rootMeanSqError(X, Z2)
        
        self.assertAlmostEquals(error1, error2)
예제 #2
0
 def testLearnModel2(self): 
     #Test the SVD updating solution in the case where we get an exact solution 
     lmbda = 0.0 
     eps = 0.1 
     k = 20
     
     matrixIterator = iter(self.matrixList)
     iterativeSoftImpute = IterativeSoftImpute(lmbda, k=k, eps=eps, svdAlg="rsvd")
     ZList = iterativeSoftImpute.learnModel(matrixIterator)
     
     #Check that ZList is the same as XList 
     for i, Z in enumerate(ZList):
         U, s, V = Z
         Xhat = (U*s).dot(V.T)
         
         nptst.assert_array_almost_equal(Xhat, self.matrixList[i].todense())
     
     #Compare solution with that of SoftImpute class 
     rhoList = [0.1, 0.2, 0.5, 1.0]
     
     for rho in rhoList: 
         iterativeSoftImpute = IterativeSoftImpute(rho, k=k, eps=eps, svdAlg="rsvd", updateAlg="zero")
         
         matrixIterator = iter(self.matrixList)
         ZList = iterativeSoftImpute.learnModel(matrixIterator)
         
         rhos = numpy.array([rho])
         
         softImpute = SoftImpute(rhos, k=k, eps=eps)
         Z1 = softImpute.learnModel(self.matrixList[0])
         Z2 = softImpute.learnModel(self.matrixList[1])
         Z3 = softImpute.learnModel(self.matrixList[2])
         
         ZList2 = [Z1, Z2, Z3]
         
         for j, Zhat in enumerate(ZList):
             U, s, V = Zhat 
             Z = (U*s).dot(V.T)
             nptst.assert_array_almost_equal(Z, ZList2[j].todense())
             
             #Also test with true solution Z = S_lambda(X + Z^\bot_\omega)
             Zomega = numpy.zeros(self.matrixList[j].shape)
             
             rowInds, colInds = self.matrixList[j].nonzero()
             for i in range(self.matrixList[j].nonzero()[0].shape[0]): 
                 Zomega[rowInds[i], colInds[i]] = Z[rowInds[i], colInds[i]]
                 
             U, s, V = ExpSU.SparseUtils.svdArpack(self.matrixList[j], 1, kmax=20)
             lmbda = rho*numpy.max(s)
                 
             U, s, V = ExpSU.SparseUtils.svdSoft(numpy.array(self.matrixList[j]-Zomega+Z), lmbda)      
             
             tol = 0.1
             self.assertTrue(numpy.linalg.norm(Z -(U*s).dot(V.T))**2 < tol)
예제 #3
0
 def testLearnModel(self): 
     lmbda = 0.0 
     eps = 0.1 
     k = 10
     
     matrixIterator = iter(self.matrixList)
     iterativeSoftImpute = IterativeSoftImpute(lmbda, k=k, eps=eps, svdAlg="propack")
     ZList = iterativeSoftImpute.learnModel(matrixIterator)
     
     #Check that ZList is the same as XList 
     for i, Z in enumerate(ZList):
         U, s, V = Z
         Xhat = (U*s).dot(V.T)
         
         nptst.assert_array_almost_equal(Xhat, numpy.array(self.matrixList[i].todense()))
     
     #Compare solution with that of SoftImpute class 
     lmbdaList = [0.1, 0.2, 0.5, 1.0]
     
     for lmbda in lmbdaList: 
         iterativeSoftImpute = IterativeSoftImpute(lmbda, k=k, eps=eps, svdAlg="propack", updateAlg="zero")
         
         matrixIterator = iter(self.matrixList)
         ZList = iterativeSoftImpute.learnModel(matrixIterator)
         
         lmbdas = numpy.array([lmbda])
         
         softImpute = SoftImpute(lmbdas, k=k, eps=eps)
         Z1 = softImpute.learnModel(self.matrixList[0])
         Z2 = softImpute.learnModel(self.matrixList[1])
         Z3 = softImpute.learnModel(self.matrixList[2])
         
         ZList2 = [Z1, Z2, Z3]
         
         for j, Zhat in enumerate(ZList):
             U, s, V = Zhat 
             Z = (U*s).dot(V.T)
             nptst.assert_array_almost_equal(Z, ZList2[j].todense())
             
             #Also test with true solution Z = S_lambda(X + Z^\bot_\omega)
             Zomega = numpy.zeros(self.matrixList[j].shape)
             
             rowInds, colInds = self.matrixList[j].nonzero()
             for i in range(self.matrixList[j].nonzero()[0].shape[0]): 
                 Zomega[rowInds[i], colInds[i]] = Z[rowInds[i], colInds[i]]
                 
             U, s, V = ExpSU.SparseUtils.svdSoft(numpy.array(self.matrixList[j]-Zomega+Z), lmbda)      
             
             tol = 0.1
             self.assertTrue(numpy.linalg.norm(Z -(U*s).dot(V.T))**2 < tol)
예제 #4
0
    def testModelSelect(self):
        lmbda = 0.1
        shape = (20, 20) 
        r = 20 
        numInds = 100
        noise = 0.2
        X = ExpSU.SparseUtils.generateSparseLowRank(shape, r, numInds, noise)
        
        U, s, V = numpy.linalg.svd(X.todense())

        k = 15

        iterativeSoftImpute = IterativeSoftImpute(lmbda, k=None, svdAlg="propack", updateAlg="zero")
        iterativeSoftImpute.numProcesses = 1
        rhos = numpy.linspace(0.5, 0.001, 20)
        ks = numpy.array([k], numpy.int)
        folds = 3
        cvInds = Sampling.randCrossValidation(folds, X.nnz)
        meanTestErrors, meanTrainErrors = iterativeSoftImpute.modelSelect(X, rhos, ks, cvInds)

        #Now do model selection manually 
        (rowInds, colInds) = X.nonzero()
        trainErrors = numpy.zeros((rhos.shape[0], len(cvInds)))
        testErrors = numpy.zeros((rhos.shape[0], len(cvInds)))
        
        for i, rho in enumerate(rhos): 
            for j, (trainInds, testInds) in enumerate(cvInds): 
                trainX = scipy.sparse.csc_matrix(X.shape)
                testX = scipy.sparse.csc_matrix(X.shape)
                
                for p in trainInds: 
                    trainX[rowInds[p], colInds[p]] = X[rowInds[p], colInds[p]]
                    
                for p in testInds: 
                    testX[rowInds[p], colInds[p]] = X[rowInds[p], colInds[p]]
                                 
                softImpute = SoftImpute(numpy.array([rho]), k=ks[0]) 
                ZList = [softImpute.learnModel(trainX, fullMatrices=False)]
                
                predTrainX = softImpute.predict(ZList, trainX.nonzero())[0]
                predX = softImpute.predict(ZList, testX.nonzero())[0]

                testErrors[i, j] = MCEvaluator.rootMeanSqError(testX, predX)
                trainErrors[i, j] = MCEvaluator.rootMeanSqError(trainX, predTrainX)
        
        meanTestErrors2 = testErrors.mean(1)   
        meanTrainErrors2 = trainErrors.mean(1)  
        
        nptst.assert_array_almost_equal(meanTestErrors.ravel(), meanTestErrors2, 1) 
예제 #5
0
    def testPredict(self): 
        X = scipy.sparse.rand(10, 10, 0.2)
        X = X.tocsc()        
        lmbdas = numpy.array([2.0, 1.5, 1.0, 0.5, 0.2, 0.1])
        eps = 0.001         
        k = 9
        
        #Check out singular values 
        U, s, V = sparsesvd(X.tocsc(), k) 
        
        softImpute = SoftImpute(lmbdas, eps, k)
        ZList = softImpute.learnModel(X, fullMatrices=False)
        
        inds = X.nonzero()
        
        predXList = softImpute.predict(ZList, inds)
        
        U, s, V = ZList[0]

        for predX in predXList: 
            nptst.assert_array_equal(predX.nonzero()[0], inds[0])
            nptst.assert_array_equal(predX.nonzero()[1], inds[1])
예제 #6
0
    def testPredict(self):
        X = scipy.sparse.rand(10, 10, 0.2)
        X = X.tocsc()
        lmbdas = numpy.array([2.0, 1.5, 1.0, 0.5, 0.2, 0.1])
        eps = 0.001
        k = 9

        #Check out singular values
        U, s, V = sparsesvd(X.tocsc(), k)

        softImpute = SoftImpute(lmbdas, eps, k)
        ZList = softImpute.learnModel(X, fullMatrices=False)

        inds = X.nonzero()

        predXList = softImpute.predict(ZList, inds)

        U, s, V = ZList[0]

        for predX in predXList:
            nptst.assert_array_equal(predX.nonzero()[0], inds[0])
            nptst.assert_array_equal(predX.nonzero()[1], inds[1])
예제 #7
0
    def testLearnModel2(self):
        X = scipy.sparse.rand(10, 10, 0.2)
        X = X.tocsc()
        lmbdas = numpy.array([10.0, 0.0])
        eps = 0.01
        k = 9

        #Check out singular values
        U, s, V = sparsesvd(X.tocsc(), k)

        softImpute = SoftImpute(lmbdas, eps, k)
        ZList = softImpute.learnModel2(X)

        #Test that when lambda=0 get approx original matrix back
        X2 = ZList[1].todense()
        nptst.assert_almost_equal(X.todense(), X2)

        #When lambda is greater or equal to largest singular value, get 0
        U, s, V = sparsesvd(X.tocsc(), k)
        lmbdas = numpy.array([numpy.max(s)])
        softImpute = SoftImpute(lmbdas, eps, k)
        Z = softImpute.learnModel2(X)
        self.assertEquals(numpy.linalg.norm(Z.todense()), 0)

        #Check solution for medium values of lambda
        eps = 0.1
        lmbdas = numpy.array([0.1, 0.2, 0.5, 1.0])
        softImpute = SoftImpute(lmbdas, eps, k)
        ZList = softImpute.learnModel2(X)

        for j, Z in enumerate(ZList):
            Z = Z.todense()
            Zomega = numpy.zeros(X.shape)

            rowInds, colInds = X.nonzero()
            for i in range(X.nonzero()[0].shape[0]):
                Zomega[rowInds[i], colInds[i]] = Z[rowInds[i], colInds[i]]

            U, s, V = ExpSU.SparseUtils.svdSoft(numpy.array(X - Zomega + Z),
                                                lmbdas[j])

            tol = 0.1
            self.assertTrue(numpy.linalg.norm(Z - (U * s).dot(V.T))**2 < tol)
예제 #8
0
    def testUncentre(self):
        shape = (50, 10)
        r = 5
        k = 100

        X, U, s, V = SparseUtils.generateSparseLowRank(shape,
                                                       r,
                                                       k,
                                                       verbose=True)
        rowInds, colInds = X.nonzero()

        Y = X.copy()

        inds = X.nonzero()
        X, mu1 = SparseUtils.centerRows(X)
        X, mu2 = SparseUtils.centerCols(X, inds=inds)

        cX = X.copy()

        Y2 = SparseUtils.uncenter(X, mu1, mu2)

        nptst.assert_array_almost_equal(Y.todense(), Y2.todense(), 3)

        #We try softImpute on a centered matrix and check if the results are the same
        lmbdas = numpy.array([0.1])
        softImpute = SoftImpute(lmbdas)

        Z = softImpute.learnModel(cX, fullMatrices=False)
        Z = softImpute.predict([Z], cX.nonzero())[0]

        error1 = MCEvaluator.rootMeanSqError(cX, Z)

        X = SparseUtils.uncenter(cX, mu1, mu2)
        Z2 = SparseUtils.uncenter(Z, mu1, mu2)

        error2 = MCEvaluator.rootMeanSqError(X, Z2)

        self.assertAlmostEquals(error1, error2)
예제 #9
0
    def testLearnModel2(self): 
        X = scipy.sparse.rand(10, 10, 0.2)
        X = X.tocsc()        
        lmbdas = numpy.array([10.0, 0.0])
        eps = 0.01         
        k = 9
        
        #Check out singular values 
        U, s, V = sparsesvd(X.tocsc(), k) 

        softImpute = SoftImpute(lmbdas, eps, k)
        ZList = softImpute.learnModel2(X)
        
        #Test that when lambda=0 get approx original matrix back 
        X2 = ZList[1].todense()
        nptst.assert_almost_equal(X.todense(), X2)
        
        #When lambda is greater or equal to largest singular value, get 0 
        U, s, V = sparsesvd(X.tocsc(), k) 
        lmbdas = numpy.array([numpy.max(s)]) 
        softImpute = SoftImpute(lmbdas, eps, k)
        Z = softImpute.learnModel2(X)
        self.assertEquals(numpy.linalg.norm(Z.todense()), 0)
        
        #Check solution for medium values of lambda 
        eps = 0.1
        lmbdas = numpy.array([0.1, 0.2, 0.5, 1.0])
        softImpute = SoftImpute(lmbdas, eps, k)
        ZList = softImpute.learnModel2(X)
        
        for j, Z in enumerate(ZList): 
            Z = Z.todense()
            Zomega = numpy.zeros(X.shape)
            
            rowInds, colInds = X.nonzero()
            for i in range(X.nonzero()[0].shape[0]): 
                Zomega[rowInds[i], colInds[i]] = Z[rowInds[i], colInds[i]]
                
            U, s, V = ExpSU.SparseUtils.svdSoft(numpy.array(X-Zomega+Z), lmbdas[j])      
            
            tol = 0.1
            self.assertTrue(numpy.linalg.norm(Z -(U*s).dot(V.T))**2 < tol)
예제 #10
0
    def profileLearnModel(self):
        lmbdas = numpy.array([0.5])
        softImpute = SoftImpute(lmbdas)

        ProfileUtils.profile('softImpute.learnModel(self.X, False)', globals(),
                             locals())