예제 #1
0
    def testParallelSparseLowRankOp(self):
        numRuns = 10

        for i in range(numRuns):
            m = numpy.random.randint(10, 100)
            n = numpy.random.randint(10, 100)
            density = numpy.random.rand()
            A = scipy.sparse.rand(m, n, density)
            A = A.tocsc()

            r = numpy.random.randint(10, 100)
            U, s, V = SparseUtils.generateLowRank((m, n), r)

            L = LinOperatorUtils.parallelSparseLowRankOp(A, U, s, V)

            u = numpy.random.rand(m)
            v = numpy.random.rand(n)

            r = 10
            W = numpy.random.rand(m, r)
            X = numpy.random.rand(n, r)

            B = numpy.array(A + (U * s).dot(V.T))

            nptst.assert_array_almost_equal(L.matvec(v), B.dot(v))
            nptst.assert_array_almost_equal(L.rmatvec(u), B.T.dot(u))
            nptst.assert_array_almost_equal(L.matmat(X), B.dot(X))
            nptst.assert_array_almost_equal(L.rmatmat(W), B.T.dot(W))
예제 #2
0
 def testParallelSparseLowRankOp(self): 
     numRuns = 10         
     
     for i in range(numRuns): 
         m = numpy.random.randint(10, 100)
         n = numpy.random.randint(10, 100)
         density = numpy.random.rand()
         A = scipy.sparse.rand(m, n, density)
         A = A.tocsc()
         
         r = numpy.random.randint(10, 100)
         U, s, V = SparseUtils.generateLowRank((m, n), r)          
         
         L = LinOperatorUtils.parallelSparseLowRankOp(A, U, s, V)
         
         u = numpy.random.rand(m)
         v = numpy.random.rand(n)
         
         r = 10 
         W = numpy.random.rand(m, r)
         X = numpy.random.rand(n, r)
         
         B = numpy.array(A+(U*s).dot(V.T))            
         
         nptst.assert_array_almost_equal(L.matvec(v), B.dot(v))
         nptst.assert_array_almost_equal(L.rmatvec(u), B.T.dot(u))
         nptst.assert_array_almost_equal(L.matmat(X), B.dot(X))
         nptst.assert_array_almost_equal(L.rmatmat(W), B.T.dot(W))
 def profilePartialReconstructValsPQ(self):
     shape = 5000, 10000
     r = 100 
     U, s, V = SparseUtils.generateLowRank(shape, r)
     
     k = 1000000 
     inds = numpy.unravel_index(numpy.random.randint(0, shape[0]*shape[1], k), dims=shape)
     
     ProfileUtils.profile('SparseUtilsCython.partialReconstructValsPQ(inds[0], inds[1], U, V)', globals(), locals())
예제 #4
0
 def testReconstructLowRank(self): 
     shape = (5000, 1000)
     r = 5
     
     U, s, V = SparseUtils.generateLowRank(shape, r)
     
     inds = numpy.array([0])
     X = SparseUtils.reconstructLowRank(U, s, V, inds)
     
     self.assertAlmostEquals(X[0, 0], (U[0, :]*s).dot(V[0, :]))
예제 #5
0
    def testReconstructLowRank(self):
        shape = (5000, 1000)
        r = 5

        U, s, V = SparseUtils.generateLowRank(shape, r)

        inds = numpy.array([0])
        X = SparseUtils.reconstructLowRank(U, s, V, inds)

        self.assertAlmostEquals(X[0, 0], (U[0, :] * s).dot(V[0, :]))
예제 #6
0
 def testGenerateLowRank(self): 
     shape = (5000, 1000)
     r = 5  
     
     U, s, V = SparseUtils.generateLowRank(shape, r)
     
     nptst.assert_array_almost_equal(U.T.dot(U), numpy.eye(r))
     nptst.assert_array_almost_equal(V.T.dot(V), numpy.eye(r))
     
     self.assertEquals(U.shape[0], shape[0])
     self.assertEquals(V.shape[0], shape[1])
     self.assertEquals(s.shape[0], r)
     
     #Check the range is not 
     shape = (500, 500)
     r = 100
     U, s, V = SparseUtils.generateLowRank(shape, r)
     X = (U*s).dot(V.T)
     
     self.assertTrue(abs(numpy.max(X) - 1) < 0.5) 
     self.assertTrue(abs(numpy.min(X) + 1) < 0.5) 
예제 #7
0
    def testGenerateLowRank(self):
        shape = (5000, 1000)
        r = 5

        U, s, V = SparseUtils.generateLowRank(shape, r)

        nptst.assert_array_almost_equal(U.T.dot(U), numpy.eye(r))
        nptst.assert_array_almost_equal(V.T.dot(V), numpy.eye(r))

        self.assertEquals(U.shape[0], shape[0])
        self.assertEquals(V.shape[0], shape[1])
        self.assertEquals(s.shape[0], r)

        #Check the range is not
        shape = (500, 500)
        r = 100
        U, s, V = SparseUtils.generateLowRank(shape, r)
        X = (U * s).dot(V.T)

        self.assertTrue(abs(numpy.max(X) - 1) < 0.5)
        self.assertTrue(abs(numpy.min(X) + 1) < 0.5)
예제 #8
0
    def generateMatrices(self):
        """
        This function returns a list of 20 train/test matrices for incremental 
        collaborative filtering. Each item in the list is (trainX, testX).
        """    
        numpy.random.seed(21)    
        r = 50 
        
        U, s, V = SparseUtils.generateLowRank((self.endM, self.endN), r, normalise=False)
        
        self.startNumInds = self.pnz*self.startM*self.startN
        self.endNumInds = self.pnz*self.endM*self.endN
        
        if not self.nonUniform: 
            inds = numpy.random.randint(0, self.endM*self.endN-1, self.endNumInds)
        else:
            logging.debug("Using non uniform dataset")
            inds = numpy.array(numpy.random.randn(self.endNumInds)*(self.endM*self.endN-1)/4 +(self.endM*self.endN-1)/2, numpy.int)
            inds = numpy.clip(inds, 0, (self.endM*self.endN-1))
            
        inds = numpy.unique(inds)
        numpy.random.shuffle(inds)
        self.endNumInds = inds.shape[0]
        
        rowInds, colInds = numpy.unravel_index(inds, (self.endM, self.endN))
        rowInds = numpy.array(rowInds, numpy.int32)
        colInds = numpy.array(colInds, numpy.int32)
        vals = SparseUtilsCython.partialReconstructValsPQ(rowInds, colInds, U*s, V)
        vals /= vals.std()
        vals +=  numpy.random.randn(vals.shape[0])*self.noise
        
        
        isTrainInd = numpy.array(numpy.random.rand(inds.shape[0]) <= self.trainSplit, numpy.bool)
        
        assert (self.trainSplit - isTrainInd.sum()/float(isTrainInd.shape[0]))
        
        XMaskTrain = scipy.sparse.csc_matrix((isTrainInd, (rowInds, colInds)), dtype=numpy.bool, shape=(self.endM, self.endN))
        XMaskTest = scipy.sparse.csc_matrix((numpy.logical_not(isTrainInd), (rowInds, colInds)), dtype=numpy.bool, shape=(self.endM, self.endN))

        #In the first phase, the matrices stay the same size but there are more nonzero 
        #entries   
        numMatrices = 10 
        stepList = numpy.linspace(self.startNumInds, self.endNumInds, numMatrices) 
        trainXList = []
        testXList = []    
        
        for i in range(numMatrices):  
            currentVals = vals[0:stepList[i]]
            currentRowInds = rowInds[0:stepList[i]]
            currentColInds = colInds[0:stepList[i]]
            
            X = scipy.sparse.csc_matrix((currentVals, (currentRowInds, currentColInds)), dtype=numpy.float, shape=(self.endM, self.endN))
            #print("pnz=" + str(X.nnz/float(X.shape[0]*X.shape[1])))
            
            trainX = X.multiply(XMaskTrain)[0:self.startM, 0:self.startN]
            trainX.eliminate_zeros()
            trainX.prune() 
            
            testX = X.multiply(XMaskTest)[0:self.startM, 0:self.startN]
            testX.eliminate_zeros()
            testX.prune() 
            
            trainXList.append(trainX)
            testXList.append(testX)
            
        #Now we increase the size of matrix 
        numMatrices = 10 
        mStepList = numpy.linspace(self.startM, self.endM, numMatrices)
        nStepList = numpy.linspace(self.startN, self.endN, numMatrices)
    
        X = scipy.sparse.csc_matrix((vals, (rowInds, colInds)), dtype=numpy.float, shape=(self.endM, self.endN))
    
        for i in range(numMatrices): 
            trainX = X.multiply(XMaskTrain)[0:mStepList[i], :][:, 0:nStepList[i]]
            trainX.eliminate_zeros()
            trainX.prune() 
            
            testX = X.multiply(XMaskTest)[0:mStepList[i], :][:, 0:nStepList[i]]
            testX.eliminate_zeros()
            testX.prune() 
            
            trainXList.append(trainX)
            testXList.append(testX)
                    
        return trainXList, testXList