def testParallelSparseLowRankOp(self): numRuns = 10 for i in range(numRuns): m = numpy.random.randint(10, 100) n = numpy.random.randint(10, 100) density = numpy.random.rand() A = scipy.sparse.rand(m, n, density) A = A.tocsc() r = numpy.random.randint(10, 100) U, s, V = SparseUtils.generateLowRank((m, n), r) L = LinOperatorUtils.parallelSparseLowRankOp(A, U, s, V) u = numpy.random.rand(m) v = numpy.random.rand(n) r = 10 W = numpy.random.rand(m, r) X = numpy.random.rand(n, r) B = numpy.array(A + (U * s).dot(V.T)) nptst.assert_array_almost_equal(L.matvec(v), B.dot(v)) nptst.assert_array_almost_equal(L.rmatvec(u), B.T.dot(u)) nptst.assert_array_almost_equal(L.matmat(X), B.dot(X)) nptst.assert_array_almost_equal(L.rmatmat(W), B.T.dot(W))
def testParallelSparseLowRankOp(self): numRuns = 10 for i in range(numRuns): m = numpy.random.randint(10, 100) n = numpy.random.randint(10, 100) density = numpy.random.rand() A = scipy.sparse.rand(m, n, density) A = A.tocsc() r = numpy.random.randint(10, 100) U, s, V = SparseUtils.generateLowRank((m, n), r) L = LinOperatorUtils.parallelSparseLowRankOp(A, U, s, V) u = numpy.random.rand(m) v = numpy.random.rand(n) r = 10 W = numpy.random.rand(m, r) X = numpy.random.rand(n, r) B = numpy.array(A+(U*s).dot(V.T)) nptst.assert_array_almost_equal(L.matvec(v), B.dot(v)) nptst.assert_array_almost_equal(L.rmatvec(u), B.T.dot(u)) nptst.assert_array_almost_equal(L.matmat(X), B.dot(X)) nptst.assert_array_almost_equal(L.rmatmat(W), B.T.dot(W))
def profilePartialReconstructValsPQ(self): shape = 5000, 10000 r = 100 U, s, V = SparseUtils.generateLowRank(shape, r) k = 1000000 inds = numpy.unravel_index(numpy.random.randint(0, shape[0]*shape[1], k), dims=shape) ProfileUtils.profile('SparseUtilsCython.partialReconstructValsPQ(inds[0], inds[1], U, V)', globals(), locals())
def testReconstructLowRank(self): shape = (5000, 1000) r = 5 U, s, V = SparseUtils.generateLowRank(shape, r) inds = numpy.array([0]) X = SparseUtils.reconstructLowRank(U, s, V, inds) self.assertAlmostEquals(X[0, 0], (U[0, :]*s).dot(V[0, :]))
def testReconstructLowRank(self): shape = (5000, 1000) r = 5 U, s, V = SparseUtils.generateLowRank(shape, r) inds = numpy.array([0]) X = SparseUtils.reconstructLowRank(U, s, V, inds) self.assertAlmostEquals(X[0, 0], (U[0, :] * s).dot(V[0, :]))
def testGenerateLowRank(self): shape = (5000, 1000) r = 5 U, s, V = SparseUtils.generateLowRank(shape, r) nptst.assert_array_almost_equal(U.T.dot(U), numpy.eye(r)) nptst.assert_array_almost_equal(V.T.dot(V), numpy.eye(r)) self.assertEquals(U.shape[0], shape[0]) self.assertEquals(V.shape[0], shape[1]) self.assertEquals(s.shape[0], r) #Check the range is not shape = (500, 500) r = 100 U, s, V = SparseUtils.generateLowRank(shape, r) X = (U*s).dot(V.T) self.assertTrue(abs(numpy.max(X) - 1) < 0.5) self.assertTrue(abs(numpy.min(X) + 1) < 0.5)
def testGenerateLowRank(self): shape = (5000, 1000) r = 5 U, s, V = SparseUtils.generateLowRank(shape, r) nptst.assert_array_almost_equal(U.T.dot(U), numpy.eye(r)) nptst.assert_array_almost_equal(V.T.dot(V), numpy.eye(r)) self.assertEquals(U.shape[0], shape[0]) self.assertEquals(V.shape[0], shape[1]) self.assertEquals(s.shape[0], r) #Check the range is not shape = (500, 500) r = 100 U, s, V = SparseUtils.generateLowRank(shape, r) X = (U * s).dot(V.T) self.assertTrue(abs(numpy.max(X) - 1) < 0.5) self.assertTrue(abs(numpy.min(X) + 1) < 0.5)
def generateMatrices(self): """ This function returns a list of 20 train/test matrices for incremental collaborative filtering. Each item in the list is (trainX, testX). """ numpy.random.seed(21) r = 50 U, s, V = SparseUtils.generateLowRank((self.endM, self.endN), r, normalise=False) self.startNumInds = self.pnz*self.startM*self.startN self.endNumInds = self.pnz*self.endM*self.endN if not self.nonUniform: inds = numpy.random.randint(0, self.endM*self.endN-1, self.endNumInds) else: logging.debug("Using non uniform dataset") inds = numpy.array(numpy.random.randn(self.endNumInds)*(self.endM*self.endN-1)/4 +(self.endM*self.endN-1)/2, numpy.int) inds = numpy.clip(inds, 0, (self.endM*self.endN-1)) inds = numpy.unique(inds) numpy.random.shuffle(inds) self.endNumInds = inds.shape[0] rowInds, colInds = numpy.unravel_index(inds, (self.endM, self.endN)) rowInds = numpy.array(rowInds, numpy.int32) colInds = numpy.array(colInds, numpy.int32) vals = SparseUtilsCython.partialReconstructValsPQ(rowInds, colInds, U*s, V) vals /= vals.std() vals += numpy.random.randn(vals.shape[0])*self.noise isTrainInd = numpy.array(numpy.random.rand(inds.shape[0]) <= self.trainSplit, numpy.bool) assert (self.trainSplit - isTrainInd.sum()/float(isTrainInd.shape[0])) XMaskTrain = scipy.sparse.csc_matrix((isTrainInd, (rowInds, colInds)), dtype=numpy.bool, shape=(self.endM, self.endN)) XMaskTest = scipy.sparse.csc_matrix((numpy.logical_not(isTrainInd), (rowInds, colInds)), dtype=numpy.bool, shape=(self.endM, self.endN)) #In the first phase, the matrices stay the same size but there are more nonzero #entries numMatrices = 10 stepList = numpy.linspace(self.startNumInds, self.endNumInds, numMatrices) trainXList = [] testXList = [] for i in range(numMatrices): currentVals = vals[0:stepList[i]] currentRowInds = rowInds[0:stepList[i]] currentColInds = colInds[0:stepList[i]] X = scipy.sparse.csc_matrix((currentVals, (currentRowInds, currentColInds)), dtype=numpy.float, shape=(self.endM, self.endN)) #print("pnz=" + str(X.nnz/float(X.shape[0]*X.shape[1]))) trainX = X.multiply(XMaskTrain)[0:self.startM, 0:self.startN] trainX.eliminate_zeros() trainX.prune() testX = X.multiply(XMaskTest)[0:self.startM, 0:self.startN] testX.eliminate_zeros() testX.prune() trainXList.append(trainX) testXList.append(testX) #Now we increase the size of matrix numMatrices = 10 mStepList = numpy.linspace(self.startM, self.endM, numMatrices) nStepList = numpy.linspace(self.startN, self.endN, numMatrices) X = scipy.sparse.csc_matrix((vals, (rowInds, colInds)), dtype=numpy.float, shape=(self.endM, self.endN)) for i in range(numMatrices): trainX = X.multiply(XMaskTrain)[0:mStepList[i], :][:, 0:nStepList[i]] trainX.eliminate_zeros() trainX.prune() testX = X.multiply(XMaskTest)[0:mStepList[i], :][:, 0:nStepList[i]] testX.eliminate_zeros() testX.prune() trainXList.append(trainX) testXList.append(testX) return trainXList, testXList