Example #1
0
 def testPartialReconstructValsPQ(self):
     n = 10
     Y = numpy.random.rand(n, n)
     
     U, s, V = numpy.linalg.svd(Y)
     V = V.T 
     
     V = numpy.ascontiguousarray(V)
     
     rowInds, colInds = numpy.nonzero(Y)  
     rowInds = numpy.array(rowInds, numpy.int32)
     colInds = numpy.array(colInds, numpy.int32)
     vals = SparseUtilsCython.partialReconstructValsPQ(rowInds, colInds, numpy.ascontiguousarray(U*s), V)
     X = numpy.reshape(vals, Y.shape)
     
     nptst.assert_almost_equal(X, Y)
     
     #Try just some indices 
     density = 0.2
     A = scipy.sparse.rand(n, n, density)
     inds = A.nonzero()
     rowInds = numpy.array(inds[0], numpy.int32)
     colInds = numpy.array(inds[1], numpy.int32)
     
     vals = SparseUtilsCython.partialReconstructValsPQ(rowInds, colInds, numpy.ascontiguousarray(U*s), V)
     
     for i in range(inds[0].shape[0]): 
         j = inds[0][i]
         k = inds[1][i]
         
         self.assertAlmostEquals(vals[i], Y[j, k])  
         
     
     self.assertEquals(A.nnz, inds[0].shape[0])
Example #2
0
 def testPartialReconstructValsPQ2(self): 
     numRuns = 10         
     
     for i in range(numRuns): 
         m = numpy.random.randint(5, 50)
         n = numpy.random.randint(5, 50)
         Y = numpy.random.rand(m, n)
         
         U, s, V = numpy.linalg.svd(Y,  full_matrices=0)
         V = V.T 
         
         V = numpy.ascontiguousarray(V)
         
         rowInds, colInds = numpy.nonzero(Y)  
         rowInds = numpy.array(rowInds, numpy.int32)
         colInds = numpy.array(colInds, numpy.int32)
         #print(U.shape, V.shape)
         vals = SparseUtilsCython.partialReconstructValsPQ(rowInds, colInds, numpy.ascontiguousarray(U*s), V)
         X = numpy.reshape(vals, Y.shape)
         
         nptst.assert_almost_equal(X, Y)
Example #3
0
    def generateMatrices(self):
        """
        This function returns a list of 20 train/test matrices for incremental 
        collaborative filtering. Each item in the list is (trainX, testX).
        """    
        numpy.random.seed(21)    
        r = 50 
        
        U, s, V = SparseUtils.generateLowRank((self.endM, self.endN), r, normalise=False)
        
        self.startNumInds = self.pnz*self.startM*self.startN
        self.endNumInds = self.pnz*self.endM*self.endN
        
        if not self.nonUniform: 
            inds = numpy.random.randint(0, self.endM*self.endN-1, self.endNumInds)
        else:
            logging.debug("Using non uniform dataset")
            inds = numpy.array(numpy.random.randn(self.endNumInds)*(self.endM*self.endN-1)/4 +(self.endM*self.endN-1)/2, numpy.int)
            inds = numpy.clip(inds, 0, (self.endM*self.endN-1))
            
        inds = numpy.unique(inds)
        numpy.random.shuffle(inds)
        self.endNumInds = inds.shape[0]
        
        rowInds, colInds = numpy.unravel_index(inds, (self.endM, self.endN))
        rowInds = numpy.array(rowInds, numpy.int32)
        colInds = numpy.array(colInds, numpy.int32)
        vals = SparseUtilsCython.partialReconstructValsPQ(rowInds, colInds, U*s, V)
        vals /= vals.std()
        vals +=  numpy.random.randn(vals.shape[0])*self.noise
        
        
        isTrainInd = numpy.array(numpy.random.rand(inds.shape[0]) <= self.trainSplit, numpy.bool)
        
        assert (self.trainSplit - isTrainInd.sum()/float(isTrainInd.shape[0]))
        
        XMaskTrain = scipy.sparse.csc_matrix((isTrainInd, (rowInds, colInds)), dtype=numpy.bool, shape=(self.endM, self.endN))
        XMaskTest = scipy.sparse.csc_matrix((numpy.logical_not(isTrainInd), (rowInds, colInds)), dtype=numpy.bool, shape=(self.endM, self.endN))

        #In the first phase, the matrices stay the same size but there are more nonzero 
        #entries   
        numMatrices = 10 
        stepList = numpy.linspace(self.startNumInds, self.endNumInds, numMatrices) 
        trainXList = []
        testXList = []    
        
        for i in range(numMatrices):  
            currentVals = vals[0:stepList[i]]
            currentRowInds = rowInds[0:stepList[i]]
            currentColInds = colInds[0:stepList[i]]
            
            X = scipy.sparse.csc_matrix((currentVals, (currentRowInds, currentColInds)), dtype=numpy.float, shape=(self.endM, self.endN))
            #print("pnz=" + str(X.nnz/float(X.shape[0]*X.shape[1])))
            
            trainX = X.multiply(XMaskTrain)[0:self.startM, 0:self.startN]
            trainX.eliminate_zeros()
            trainX.prune() 
            
            testX = X.multiply(XMaskTest)[0:self.startM, 0:self.startN]
            testX.eliminate_zeros()
            testX.prune() 
            
            trainXList.append(trainX)
            testXList.append(testX)
            
        #Now we increase the size of matrix 
        numMatrices = 10 
        mStepList = numpy.linspace(self.startM, self.endM, numMatrices)
        nStepList = numpy.linspace(self.startN, self.endN, numMatrices)
    
        X = scipy.sparse.csc_matrix((vals, (rowInds, colInds)), dtype=numpy.float, shape=(self.endM, self.endN))
    
        for i in range(numMatrices): 
            trainX = X.multiply(XMaskTrain)[0:mStepList[i], :][:, 0:nStepList[i]]
            trainX.eliminate_zeros()
            trainX.prune() 
            
            testX = X.multiply(XMaskTest)[0:mStepList[i], :][:, 0:nStepList[i]]
            testX.eliminate_zeros()
            testX.prune() 
            
            trainXList.append(trainX)
            testXList.append(testX)
                    
        return trainXList, testXList