コード例 #1
0
ファイル: SparseUtilsTest.py プロジェクト: rezaarmand/sandbox
    def testCentreCols(self):
        shape = (50, 10)
        r = 5
        k = 100

        X, U, s, V = SparseUtils.generateSparseLowRank(shape,
                                                       r,
                                                       k,
                                                       verbose=True)
        rowInds, colInds = X.nonzero()

        mu2 = numpy.array(X.sum(0)).ravel()
        numNnz = numpy.zeros(X.shape[1])

        for i in range(X.shape[0]):
            for j in range(X.shape[1]):
                if X[i, j] != 0:
                    numNnz[j] += 1

        mu2 /= numNnz
        mu2[numNnz == 0] = 0

        X, mu = SparseUtils.centerCols(X)
        nptst.assert_array_almost_equal(
            numpy.array(X.mean(0)).ravel(), numpy.zeros(X.shape[1]))
        nptst.assert_array_almost_equal(mu, mu2)
コード例 #2
0
ファイル: SparseUtilsTest.py プロジェクト: charanpald/sandbox
    def testCentreRows(self): 
        shape = (50, 10)
        r = 5 
        k = 100 

        X, U, s, V = SparseUtils.generateSparseLowRank(shape, r, k, verbose=True)   
        rowInds, colInds = X.nonzero()
        
        for i in range(rowInds.shape[0]): 
            self.assertEquals(X[rowInds[i], colInds[i]], numpy.array(X[X.nonzero()]).ravel()[i])
        
        mu2 = numpy.array(X.sum(1)).ravel()
        numNnz = numpy.zeros(X.shape[0])
        
        for i in range(X.shape[0]): 
            for j in range(X.shape[1]):     
                if X[i,j]!=0:                 
                    numNnz[i] += 1
                    
        mu2 /= numNnz 
        mu2[numNnz==0] = 0
        
        X, mu = SparseUtils.centerRows(X)      
        nptst.assert_array_almost_equal(numpy.array(X.mean(1)).ravel(), numpy.zeros(X.shape[0]))
        nptst.assert_array_almost_equal(mu, mu2)
コード例 #3
0
ファイル: SparseUtilsTest.py プロジェクト: charanpald/sandbox
    def testUncentre(self): 
        shape = (50, 10)
        r = 5 
        k = 100 

        X, U, s, V = SparseUtils.generateSparseLowRank(shape, r, k, verbose=True)   
        rowInds, colInds = X.nonzero()  
        
        Y = X.copy()

        inds = X.nonzero()
        X, mu1 = SparseUtils.centerRows(X)
        X, mu2 = SparseUtils.centerCols(X, inds=inds)   
        
        cX = X.copy()
        
        Y2 = SparseUtils.uncenter(X, mu1, mu2)
        
        nptst.assert_array_almost_equal(Y.todense(), Y2.todense(), 3)
        
        #We try softImpute on a centered matrix and check if the results are the same 
        lmbdas = numpy.array([0.1])
        softImpute = SoftImpute(lmbdas)
        
        Z = softImpute.learnModel(cX, fullMatrices=False)
        Z = softImpute.predict([Z], cX.nonzero())[0]
        
        error1 = MCEvaluator.rootMeanSqError(cX, Z)
        
        X = SparseUtils.uncenter(cX, mu1, mu2)
        Z2 = SparseUtils.uncenter(Z, mu1, mu2)
        
        error2 = MCEvaluator.rootMeanSqError(X, Z2)
        
        self.assertAlmostEquals(error1, error2)
コード例 #4
0
 def setUp(self): 
     numpy.set_printoptions(suppress=True, precision=3, linewidth=150)
     numpy.random.seed(21)
     shape = (20, 10)
     r = 5 
     k = 100         
     
     #Create an iterator 
     matrixList = [] 
     matrixList.append(SparseUtils.generateSparseLowRank(shape, r, k))
     matrixList.append(SparseUtils.generateSparseLowRank(shape, r, k))
     matrixList.append(SparseUtils.generateSparseLowRank(shape, r, k))
     
     self.matrixList = matrixList
     self.testMatrixList = []
     
     for X in matrixList: 
         self.testMatrixList.append(X.copy())
コード例 #5
0
    def profileSubmatrix(self):
        shape = (100000, 15000)
        r = 50
        k = 5000000

        X = SparseUtils.generateSparseLowRank(shape, r, k)
        print(X.nnz, type(X))

        inds = numpy.random.permutation(X.nnz)[0:1000000]

        ProfileUtils.profile("SparseUtils.submatrix(X, inds)", globals(), locals())
コード例 #6
0
    def profileGetOmegaList(self):
        shape = (20000, 15000)
        r = 50
        k = 1000000

        X = SparseUtils.generateSparseLowRank(shape, r, k)
        import sppy

        X = sppy.csarray(X)

        ProfileUtils.profile("SparseUtils.getOmegaList(X)", globals(), locals())
コード例 #7
0
    def profileGetOmegaList(self):
        shape = (20000, 15000)
        r = 50
        k = 1000000

        X = SparseUtils.generateSparseLowRank(shape, r, k)
        import sppy
        X = sppy.csarray(X)

        ProfileUtils.profile('SparseUtils.getOmegaList(X)', globals(),
                             locals())
コード例 #8
0
ファイル: SoftImputeProfile.py プロジェクト: kentwang/sandbox
    def __init__(self):
        numpy.random.seed(21)

        # Create a low rank matrix
        n = 100000
        m = 100000
        self.r = 50
        k = 5 * 10 ** 6
        # k = 10**5

        self.X = SparseUtils.generateSparseLowRank((n, m), self.r, k)
        print(self.X.nnz)
コード例 #9
0
    def profileSubmatrix(self):
        shape = (100000, 15000)
        r = 50
        k = 5000000

        X = SparseUtils.generateSparseLowRank(shape, r, k)
        print(X.nnz, type(X))

        inds = numpy.random.permutation(X.nnz)[0:1000000]

        ProfileUtils.profile('SparseUtils.submatrix(X, inds)', globals(),
                             locals())
コード例 #10
0
    def __init__(self):
        numpy.random.seed(21)

        #Create a low rank matrix
        n = 100000
        m = 100000
        self.r = 50
        k = 5 * 10**6
        #k = 10**5

        self.X = SparseUtils.generateSparseLowRank((n, m), self.r, k)
        print(self.X.nnz)
コード例 #11
0
ファイル: SparseUtilsTest.py プロジェクト: charanpald/sandbox
    def testGenerateSparseLowRank(self): 
        shape = (5000, 1000)
        r = 5 
        k = 10 

        X, U, s, V = SparseUtils.generateSparseLowRank(shape, r, k, verbose=True)         
        
        self.assertEquals(U.shape, (shape[0],r))
        self.assertEquals(V.shape, (shape[1], r))
        self.assertTrue(X.nnz <= k)
        
        Y = (U*s).dot(V.T)
        inds = X.nonzero()
        
        for i in range(inds[0].shape[0]):
            self.assertAlmostEquals(X[inds[0][i], inds[1][i]], Y[inds[0][i], inds[1][i]])
コード例 #12
0
 def profilePrecisionAtK(self):
     m = 1000 
     n = 500000 
     r = 30 
     k = m*100
     
     X, U, s, V = SparseUtils.generateSparseLowRank((m,n), r, k, verbose=True)
     mean = X.data.mean()
     X.data[X.data <= mean] = 0
     X.data[X.data > mean] = 1
     
     import sppy 
     X = sppy.csarray(X)
     
     
     ProfileUtils.profile("MCEvaluator.precisionAtK(X, U, V, 10)", globals(), locals())
コード例 #13
0
ファイル: SparseUtilsTest.py プロジェクト: charanpald/sandbox
    def testSvdArpack(self): 
        shape = (500, 100)
        r = 5 
        k = 1000 

        X, U, s, V = SparseUtils.generateSparseLowRank(shape, r, k, verbose=True)                
        
        k2 = 10 
        U, s, V = SparseUtils.svdArpack(X, k2)

        U2, s2, V2 = numpy.linalg.svd(X.todense())
        V2 = V2.T

        nptst.assert_array_almost_equal(s, s2[0:k2])
        nptst.assert_array_almost_equal(numpy.abs(U), numpy.abs(U2[:, 0:k2]), 3)
        nptst.assert_array_almost_equal(numpy.abs(V), numpy.abs(V2[:, 0:k2]), 3)
コード例 #14
0
    def profilePrecisionAtK(self):
        m = 1000
        n = 500000
        r = 30
        k = m * 100

        X, U, s, V = SparseUtils.generateSparseLowRank((m, n),
                                                       r,
                                                       k,
                                                       verbose=True)
        mean = X.data.mean()
        X.data[X.data <= mean] = 0
        X.data[X.data > mean] = 1

        import sppy
        X = sppy.csarray(X)

        ProfileUtils.profile("MCEvaluator.precisionAtK(X, U, V, 10)",
                             globals(), locals())
コード例 #15
0
ファイル: SparseUtilsTest.py プロジェクト: rezaarmand/sandbox
    def testGenerateSparseLowRank(self):
        shape = (5000, 1000)
        r = 5
        k = 10

        X, U, s, V = SparseUtils.generateSparseLowRank(shape,
                                                       r,
                                                       k,
                                                       verbose=True)

        self.assertEquals(U.shape, (shape[0], r))
        self.assertEquals(V.shape, (shape[1], r))
        self.assertTrue(X.nnz <= k)

        Y = (U * s).dot(V.T)
        inds = X.nonzero()

        for i in range(inds[0].shape[0]):
            self.assertAlmostEquals(X[inds[0][i], inds[1][i]], Y[inds[0][i],
                                                                 inds[1][i]])
コード例 #16
0
ファイル: SparseUtilsTest.py プロジェクト: charanpald/sandbox
    def testCentreRows2(self): 
        shape = (50, 10)
        r = 5 
        k = 100 
        
        #Test if centering rows changes the RMSE
        X, U, s, V = SparseUtils.generateSparseLowRank(shape, r, k, verbose=True)   
 
        Y = X.copy() 
        Y.data = numpy.random.rand(X.nnz)
        
        error = ((X.data - Y.data)**2).sum()
        
        X, mu = SparseUtils.centerRows(X)
        Y, mu = SparseUtils.centerRows(Y, mu)
        
        error2 = ((X.data - Y.data)**2).sum()
        self.assertAlmostEquals(error, error2)
        
        error3 = numpy.linalg.norm(X.todense()- Y.todense())**2
        self.assertAlmostEquals(error2, error3)        
コード例 #17
0
ファイル: SparseUtilsTest.py プロジェクト: rezaarmand/sandbox
    def testSvdArpack(self):
        shape = (500, 100)
        r = 5
        k = 1000

        X, U, s, V = SparseUtils.generateSparseLowRank(shape,
                                                       r,
                                                       k,
                                                       verbose=True)

        k2 = 10
        U, s, V = SparseUtils.svdArpack(X, k2)

        U2, s2, V2 = numpy.linalg.svd(X.todense())
        V2 = V2.T

        nptst.assert_array_almost_equal(s, s2[0:k2])
        nptst.assert_array_almost_equal(numpy.abs(U), numpy.abs(U2[:, 0:k2]),
                                        3)
        nptst.assert_array_almost_equal(numpy.abs(V), numpy.abs(V2[:, 0:k2]),
                                        3)
コード例 #18
0
ファイル: SparseUtilsTest.py プロジェクト: rezaarmand/sandbox
    def testUncentre(self):
        shape = (50, 10)
        r = 5
        k = 100

        X, U, s, V = SparseUtils.generateSparseLowRank(shape,
                                                       r,
                                                       k,
                                                       verbose=True)
        rowInds, colInds = X.nonzero()

        Y = X.copy()

        inds = X.nonzero()
        X, mu1 = SparseUtils.centerRows(X)
        X, mu2 = SparseUtils.centerCols(X, inds=inds)

        cX = X.copy()

        Y2 = SparseUtils.uncenter(X, mu1, mu2)

        nptst.assert_array_almost_equal(Y.todense(), Y2.todense(), 3)

        #We try softImpute on a centered matrix and check if the results are the same
        lmbdas = numpy.array([0.1])
        softImpute = SoftImpute(lmbdas)

        Z = softImpute.learnModel(cX, fullMatrices=False)
        Z = softImpute.predict([Z], cX.nonzero())[0]

        error1 = MCEvaluator.rootMeanSqError(cX, Z)

        X = SparseUtils.uncenter(cX, mu1, mu2)
        Z2 = SparseUtils.uncenter(Z, mu1, mu2)

        error2 = MCEvaluator.rootMeanSqError(X, Z2)

        self.assertAlmostEquals(error1, error2)
コード例 #19
0
ファイル: SparseUtilsTest.py プロジェクト: rezaarmand/sandbox
    def testCentreRows2(self):
        shape = (50, 10)
        r = 5
        k = 100

        #Test if centering rows changes the RMSE
        X, U, s, V = SparseUtils.generateSparseLowRank(shape,
                                                       r,
                                                       k,
                                                       verbose=True)

        Y = X.copy()
        Y.data = numpy.random.rand(X.nnz)

        error = ((X.data - Y.data)**2).sum()

        X, mu = SparseUtils.centerRows(X)
        Y, mu = SparseUtils.centerRows(Y, mu)

        error2 = ((X.data - Y.data)**2).sum()
        self.assertAlmostEquals(error, error2)

        error3 = numpy.linalg.norm(X.todense() - Y.todense())**2
        self.assertAlmostEquals(error2, error3)