def testHadamard(self): nptst.assert_array_equal((self.A.hadamard(self.A)).toarray(), (self.A.toarray()) ** 2) nptst.assert_array_equal((self.B.hadamard(self.B)).toarray(), self.B.toarray() ** 2) nptst.assert_array_equal((self.C.hadamard(self.C)).toarray(), self.C.toarray() ** 2) nptst.assert_array_equal((self.D.hadamard(self.D)).toarray(), self.D.toarray() ** 2) nptst.assert_array_equal((self.F.hadamard(self.F)).toarray(), self.F.toarray() ** 2) nptst.assert_array_equal((self.G.hadamard(self.G)).toarray(), self.G.toarray() ** 2) nptst.assert_array_equal((self.H.hadamard(self.H)).toarray(), self.H.toarray() ** 2) for storagetype in self.storagetypes: A = csarray((5, 5), storagetype=storagetype) A[0, 1] = 4 A[2, 3] = -1.2 A[1, 3] = 2 A[3, 3] = 1 B = csarray((5, 5), storagetype=storagetype) B[0, 2] = 9.2 B[2, 3] = -5 B[3, 4] = 12 B[3, 3] = 12 C = csarray((5, 5), storagetype=storagetype) nptst.assert_array_equal((A.hadamard(B)).toarray(), A.toarray() * B.toarray()) nptst.assert_array_equal((A.hadamard(C)).toarray(), C.toarray()) nptst.assert_array_equal((self.a.hadamard(self.a)).toarray(), (self.a.toarray()) ** 2) nptst.assert_array_equal((self.b.hadamard(self.b)).toarray(), (self.b.toarray()) ** 2) nptst.assert_array_equal((self.c.hadamard(self.c)).toarray(), (self.c.toarray()) ** 2)
def testAdd(self): # print(self.A.__add__(self.A._array)) nptst.assert_array_equal((self.A + self.A).toarray(), self.A.toarray() * 2) nptst.assert_array_equal((self.B + self.B).toarray(), self.B.toarray() * 2) nptst.assert_array_equal((self.C + self.C).toarray(), self.C.toarray() * 2) nptst.assert_array_equal((self.D + self.D).toarray(), self.D.toarray() * 2) nptst.assert_array_equal((self.F + self.F).toarray(), self.F.toarray() * 2) nptst.assert_array_equal((self.G + self.G).toarray(), self.G.toarray() * 2) nptst.assert_array_equal((self.H + self.H).toarray(), self.H.toarray() * 2) A = csarray((5, 5)) A[0, 1] = 4 A[1, 3] = 2 A[3, 3] = 1 B = csarray((5, 5)) B[0, 2] = 9.2 B[2, 3] = -5 B[3, 4] = 12 nptst.assert_array_equal((A + B).toarray(), A.toarray() + B.toarray()) nptst.assert_array_equal((self.a + self.a).toarray(), self.a.toarray() * 2) nptst.assert_array_equal((self.b + self.b).toarray(), self.b.toarray() * 2) nptst.assert_array_equal((self.c + self.c).toarray(), self.c.toarray() * 2)
def testSub(self): nptst.assert_array_equal((self.A - self.A).toarray(), self.A.toarray() * 0) nptst.assert_array_equal((self.B - self.B).toarray(), self.B.toarray() * 0) nptst.assert_array_equal((self.C - self.C).toarray(), self.C.toarray() * 0) nptst.assert_array_equal((self.D - self.D).toarray(), self.D.toarray() * 0) nptst.assert_array_equal((self.F - self.F).toarray(), self.F.toarray() * 0) nptst.assert_array_equal((self.B * 2 - self.B).toarray(), self.B.toarray()) A = csarray((5, 5)) A[0, 1] = 4 A[1, 3] = 2 A[3, 3] = 1 B = csarray((5, 5)) B[0, 2] = 9.2 B[2, 3] = -5 B[3, 4] = 12 nptst.assert_array_equal((A - B).toarray(), A.toarray() - B.toarray())
def testStr(self): nrow = 5 ncol = 7 storagetypes = ["col", "row"] for storagetype in storagetypes: A = csarray((nrow, ncol), storagetype=storagetype) A[0, 1] = 1 A[1, 3] = 5.2 A[3, 3] = -0.2 outputStr = "csarray dtype:float64 shape:(5, 7) non-zeros:3 storage:" + A.storagetype + "\n" outputStr += "(0, 1) 1.0\n" outputStr += "(1, 3) 5.2\n" outputStr += "(3, 3) -0.2" self.assertEquals(str(A), outputStr) B = csarray((5, 5), storagetype=storagetype) outputStr = "csarray dtype:float64 shape:(5, 5) non-zeros:0 storage:" + B.storagetype + "\n" self.assertEquals(str(B), outputStr) outputStr = "csarray dtype:float64 shape:(10,) non-zeros:3\n" outputStr += "(0) 23.0\n" outputStr += "(3) 1.2\n" outputStr += "(4) -8.0" self.assertEquals(str(self.a), outputStr) outputStr = "csarray dtype:float64 shape:(3,) non-zeros:0\n" self.assertEquals(str(self.c), outputStr)
def testBiCGSTAB(self): #This doesn't always converge numRuns = 10 for i in range(numRuns): n = numpy.random.randint(5, 20) A = numpy.random.rand(n, n) x = numpy.random.rand(n) b = A.dot(x) A = sppy.csarray(A) x2, output = sppy.linalg.biCGSTAB(A, b, tol=10**-6, maxIter=n) if output == 0: nptst.assert_array_almost_equal(x, x2, 3) #Try with bad input m = 3 n = 5 A = numpy.random.rand(n, m) A = sppy.csarray(A) x = numpy.random.rand(m) b = A.dot(x) self.assertRaises(ValueError, sppy.linalg.biCGSTAB, A, b) A = numpy.random.rand(n, n) A = sppy.csarray(A) b = numpy.array(n+1) self.assertRaises(ValueError, sppy.linalg.biCGSTAB, A, b)
def testAdd(self): #print(self.A.__add__(self.A._array)) nptst.assert_array_equal((self.A + self.A).toarray(), self.A.toarray() * 2) nptst.assert_array_equal((self.B + self.B).toarray(), self.B.toarray() * 2) nptst.assert_array_equal((self.C + self.C).toarray(), self.C.toarray() * 2) nptst.assert_array_equal((self.D + self.D).toarray(), self.D.toarray() * 2) nptst.assert_array_equal((self.F + self.F).toarray(), self.F.toarray() * 2) A = csarray((5, 5)) A[0, 1] = 4 A[1, 3] = 2 A[3, 3] = 1 B = csarray((5, 5)) B[0, 2] = 9.2 B[2, 3] = -5 B[3, 4] = 12 nptst.assert_array_equal((A + B).toarray(), A.toarray() + B.toarray())
def testGetOmegaListPtr(self): import sppy m = 10 n = 5 X = scipy.sparse.rand(m, n, 0.1) X = X.tocsr() indPtr, colInds = SparseUtils.getOmegaListPtr(X) for i in range(m): omegai = colInds[indPtr[i]:indPtr[i+1]] nptst.assert_array_almost_equal(omegai, X.toarray()[i, :].nonzero()[0]) Xsppy = sppy.csarray(X) indPtr, colInds = SparseUtils.getOmegaListPtr(Xsppy) for i in range(m): omegai = colInds[indPtr[i]:indPtr[i+1]] nptst.assert_array_almost_equal(omegai, X.toarray()[i, :].nonzero()[0]) #Test a zero array (scipy doesn't work in this case) X = sppy.csarray((m,n)) indPtr, colInds = SparseUtils.getOmegaListPtr(X) for i in range(m): omegai = colInds[indPtr[i]:indPtr[i+1]]
def testStr(self): nrow = 5 ncol = 7 A = csarray((nrow, ncol)) A[0, 1] = 1 A[1, 3] = 5.2 A[3, 3] = -0.2 outputStr = "csarray dtype:float64 shape:(5, 7) non-zeros:3\n" outputStr += "(0, 1) 1.0\n" outputStr += "(1, 3) 5.2\n" outputStr += "(3, 3) -0.2" self.assertEquals(str(A), outputStr) B = csarray((5, 5)) outputStr = "csarray dtype:float64 shape:(5, 5) non-zeros:0\n" self.assertEquals(str(B), outputStr) outputStr = "csarray dtype:float64 shape:(10,) non-zeros:3\n" outputStr +="(0) 23.0\n" outputStr +="(3) 1.2\n" outputStr +="(4) -8.0" self.assertEquals(str(self.a), outputStr) outputStr = "csarray dtype:float64 shape:(3,) non-zeros:0\n" self.assertEquals(str(self.c), outputStr)
def testHadamard(self): nptst.assert_array_equal((self.A.hadamard(self.A)).toarray(), (self.A.toarray())**2) nptst.assert_array_equal((self.B.hadamard(self.B)).toarray(), self.B.toarray()**2) nptst.assert_array_equal((self.C.hadamard(self.C)).toarray(), self.C.toarray()**2) nptst.assert_array_equal((self.D.hadamard(self.D)).toarray(), self.D.toarray()**2) nptst.assert_array_equal((self.F.hadamard(self.F)).toarray(), self.F.toarray()**2) A = csarray((5, 5)) A[0, 1] = 4 A[2, 3] = -1.2 A[1, 3] = 2 A[3, 3] = 1 B = csarray((5, 5)) B[0, 2] = 9.2 B[2, 3] = -5 B[3, 4] = 12 B[3, 3] = 12 C = csarray((5, 5)) nptst.assert_array_equal((A.hadamard(B)).toarray(), A.toarray() * B.toarray()) nptst.assert_array_equal((A.hadamard(C)).toarray(), C.toarray())
def testGetOmegaListPtr(self): import sppy m = 10 n = 5 X = scipy.sparse.rand(m, n, 0.1) X = X.tocsr() indPtr, colInds = SparseUtils.getOmegaListPtr(X) for i in range(m): omegai = colInds[indPtr[i]:indPtr[i + 1]] nptst.assert_array_almost_equal(omegai, X.toarray()[i, :].nonzero()[0]) Xsppy = sppy.csarray(X) indPtr, colInds = SparseUtils.getOmegaListPtr(Xsppy) for i in range(m): omegai = colInds[indPtr[i]:indPtr[i + 1]] nptst.assert_array_almost_equal(omegai, X.toarray()[i, :].nonzero()[0]) #Test a zero array (scipy doesn't work in this case) X = sppy.csarray((m, n)) indPtr, colInds = SparseUtils.getOmegaListPtr(X) for i in range(m): omegai = colInds[indPtr[i]:indPtr[i + 1]]
def testSub(self): nptst.assert_array_equal((self.A - self.A).toarray(), self.A.toarray() * 0) nptst.assert_array_equal((self.B - self.B).toarray(), self.B.toarray() * 0) nptst.assert_array_equal((self.C - self.C).toarray(), self.C.toarray() * 0) nptst.assert_array_equal((self.D - self.D).toarray(), self.D.toarray() * 0) nptst.assert_array_equal((self.F - self.F).toarray(), self.F.toarray() * 0) nptst.assert_array_equal((self.G - self.G).toarray(), self.G.toarray() * 0) nptst.assert_array_equal((self.H - self.H).toarray(), self.H.toarray() * 0) nptst.assert_array_equal((self.B * 2 - self.B).toarray(), self.B.toarray()) A = csarray((5, 5)) A[0, 1] = 4 A[1, 3] = 2 A[3, 3] = 1 B = csarray((5, 5)) B[0, 2] = 9.2 B[2, 3] = -5 B[3, 4] = 12 nptst.assert_array_equal((A - B).toarray(), A.toarray() - B.toarray()) nptst.assert_array_equal((self.a - self.a).toarray(), self.a.toarray() * 0) nptst.assert_array_equal((self.b - self.b).toarray(), self.b.toarray() * 0) nptst.assert_array_equal((self.c - self.c).toarray(), self.c.toarray() * 0)
def testDot(self): A = csarray((5, 5)) A[0, 1] = 4 A[2, 3] = -1.2 A[1, 3] = 2 A[3, 3] = 1 B = A.dot(A) nptst.assert_array_equal(B.toarray(), A.toarray().dot(A.toarray())) B = self.D.dot(self.D) nptst.assert_array_equal(B.toarray(), self.D.toarray().dot(self.D.toarray())) C = csarray((5, 2)) for i in range(5): for j in range(2): C[i, j] = 1 self.assertRaises(ValueError, C.dot, C) B = A.dot(C) nptst.assert_array_equal(B.toarray(), A.toarray().dot(C.toarray())) self.assertEquals((self.a.dot(self.a)), (self.a.dot(self.a))) self.assertEquals((self.b.dot(self.b)), (self.b.dot(self.b))) self.assertEquals((self.c.dot(self.c)), (self.c.dot(self.c)))
def testHadamard(self): nptst.assert_array_equal((self.A.hadamard(self.A)).toarray(), (self.A.toarray())**2) nptst.assert_array_equal((self.B.hadamard(self.B)).toarray(), self.B.toarray()**2) nptst.assert_array_equal((self.C.hadamard(self.C)).toarray(), self.C.toarray()**2) nptst.assert_array_equal((self.D.hadamard(self.D)).toarray(), self.D.toarray()**2) nptst.assert_array_equal((self.F.hadamard(self.F)).toarray(), self.F.toarray()**2) A = csarray((5, 5)) A[0, 1] = 4 A[2, 3] = -1.2 A[1, 3] = 2 A[3, 3] = 1 B = csarray((5, 5)) B[0, 2] = 9.2 B[2, 3] = -5 B[3, 4] = 12 B[3, 3] = 12 C = csarray((5, 5)) nptst.assert_array_equal((A.hadamard(B)).toarray(), A.toarray()*B.toarray()) nptst.assert_array_equal((A.hadamard(C)).toarray(), C.toarray()) nptst.assert_array_equal((self.a.hadamard(self.a)).toarray(), (self.a.toarray())**2) nptst.assert_array_equal((self.b.hadamard(self.b)).toarray(), (self.b.toarray())**2) nptst.assert_array_equal((self.c.hadamard(self.c)).toarray(), (self.c.toarray())**2)
def testNDim(self): A = csarray((5, 7)) self.assertEquals(A.ndim, 2) A = csarray((0, 0)) self.assertEquals(A.ndim, 2) self.assertEquals(self.a.ndim, 1) self.assertEquals(self.b.ndim, 1)
def loadMatrix(filename): M = scipy.io.mmread(filename) if type(M) == numpy.ndarray: M2 = sppy.csarray(M) elif scipy.sparse.issparse(M): M2 = sppy.csarray(M.shape, dtype=M.dtype) M2[M.nonzero()] = M.data return M2
def testNonZeroInds(self): (rowInds, colInds) = self.B.nonzero() for i in range(rowInds.shape[0]): self.assertNotEqual(self.B[rowInds[i], colInds[i]], 0) self.assertEquals(self.B.getnnz(), rowInds.shape[0]) self.assertEquals(self.B.sum(), self.B[rowInds, colInds].sum()) (rowInds, colInds) = self.C.nonzero() for i in range(rowInds.shape[0]): self.assertNotEqual(self.C[rowInds[i], colInds[i]], 0) self.assertEquals(self.C.getnnz(), rowInds.shape[0]) self.assertEquals(self.C.sum(), self.C[rowInds, colInds].sum()) (rowInds, colInds) = self.F.nonzero() for i in range(rowInds.shape[0]): self.assertNotEqual(self.F[rowInds[i], colInds[i]], 0) self.assertEquals(self.F.getnnz(), rowInds.shape[0]) self.assertEquals(self.F.sum(), self.F[rowInds, colInds].sum()) (inds, ) = self.a.nonzero() for i in range(inds.shape[0]): self.assertNotEqual(self.a[inds[i]], 0) #Try an array with no non zeros nrow = 5 ncol = 7 A = csarray((nrow, ncol)) (rowInds, colInds) = A.nonzero() self.assertEquals(A.getnnz(), rowInds.shape[0]) self.assertEquals(rowInds.shape[0], 0) self.assertEquals(colInds.shape[0], 0) (inds, ) = self.c.nonzero() self.assertEquals(inds.shape[0], 0) #Zero size array nrow = 0 ncol = 0 A = csarray((nrow, ncol)) (rowInds, colInds) = A.nonzero() self.assertEquals(A.getnnz(), rowInds.shape[0]) self.assertEquals(rowInds.shape[0], 0) self.assertEquals(colInds.shape[0], 0) (inds, ) = self.d.nonzero() self.assertEquals(inds.shape[0], 0)
def testNDim(self): A = csarray((5, 7)) self.assertEquals(A.ndim, 2) A = csarray((5, 7), storagetype="row") self.assertEquals(A.ndim, 2) A = csarray((0, 0)) self.assertEquals(A.ndim, 2) self.assertEquals(self.a.ndim, 1) self.assertEquals(self.b.ndim, 1)
def setUp(self): logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) self.A = csarray((5, 5)) nrow = 5 ncol = 7 self.B = csarray((nrow, ncol)) self.B[0, 1] = 1 self.B[1, 3] = 5.2 self.B[3, 3] = -0.2 self.B[0, 6] = -1.23 self.B[4, 4] = 12.2 nrow = 100 ncol = 100 self.C = csarray((nrow, ncol)) self.C[0, 1] = 1 self.C[10, 3] = 5.2 self.C[30, 34] = -0.2 self.C[0, 62] = -1.23 self.C[4, 41] = 12.2 self.D = csarray((5, 5)) self.D[0, 0] = 23.1 self.D[2, 0] = -3.1 self.D[3, 0] = -10.0 self.D[2, 1] = -5 self.D[3, 1] = 5 self.E = csarray((0, 0)) self.F = csarray((6, 6), dtype=numpy.int) self.F[0, 0] = 23 self.F[2, 0] = -3 self.F[3, 0] = -10 self.F[2, 1] = -5 self.F[3, 1] = 5 self.a = csarray(10, dtype=numpy.float) self.a[0] = 23 self.a[3] = 1.2 self.a[4] = -8 self.b = csarray(10, dtype=numpy.int) self.b[0] = 23 self.b[5] = 1 self.b[8] = -8 self.c = csarray((3, ), dtype=numpy.float) self.d = csarray((0, ), dtype=numpy.float)
def testSetItem(self): nrow = 5 ncol = 7 A = csarray((nrow, ncol)) A[0, 1] = 1 A[1, 3] = 5.2 A[3, 3] = -0.2 self.assertEquals(A[0, 1], 1) self.assertAlmostEquals(A[1, 3], 5.2) self.assertAlmostEquals(A[3, 3], -0.2) for i in range(nrow): for j in range(ncol): if (i, j) != (0, 1) and (i, j) != (1, 3) and (i, j) != (3, 3): self.assertEquals(A[i, j], 0) self.assertRaises(ValueError, A.__setitem__, (20, 1), 1) self.assertRaises(TypeError, A.__setitem__, (1, 1), "a") self.assertRaises(ValueError, A.__setitem__, (1, 100), 1) self.assertRaises(ValueError, A.__setitem__, (-1, 1), 1) self.assertRaises(ValueError, A.__setitem__, (0, -1), 1) result = A[(numpy.array([0, 1, 3]), numpy.array([1, 3, 3]))] self.assertEquals(result[0], 1) self.assertEquals(result[1], 5.2) self.assertEquals(result[2], -0.2) #Replace value of A A[0, 1] = 2 self.assertEquals(A[0, 1], 2) self.assertAlmostEquals(A[1, 3], 5.2) self.assertAlmostEquals(A[3, 3], -0.2) for i in range(nrow): for j in range(ncol): if (i, j) != (0, 1) and (i, j) != (1, 3) and (i, j) != (3, 3): self.assertEquals(A[i, j], 0) #Try setting items with arrays A = csarray((nrow, ncol)) A[numpy.array([0, 1]), numpy.array([2, 3])] = numpy.array([1.2, 2.4]) self.assertEquals(A.getnnz(), 2) self.assertEquals(A[0, 2], 1.2) self.assertEquals(A[1, 3], 2.4) A[numpy.array([2, 4]), numpy.array([2, 3])] = 5 self.assertEquals(A[2, 2], 5) self.assertEquals(A[4, 3], 5)
def time_ns(): density = 10**-3 ns = var_range * 10**4 times = numpy.zeros((5, ns.shape[0])) for i, n in enumerate(ns): # Generate random sparse matrix inds = numpy.random.randint(n, size=(2, n * n * density)) data = numpy.random.rand(n * n * density) A = scipy.sparse.csc_matrix((data, inds), (n, n)) A_sppy = sppy.csarray(A, storagetype="row") L = GeneralLinearOperator.asLinearOperator(A_sppy, parallel=True) print(A.shape, A.nnz) times[0, i] = time_reps(svds, (A, k), reps) times[1, i] = time_reps(svdp, (A, k), reps) # times[2, i] = time_reps(sparsesvd, (A, k), reps) times[3, i] = time_reps(truncated_svd.fit, (A,), reps) times[4, i] = time_reps(sppy.linalg.rsvd, (L, k, p, n_iter), reps) print(n, density, times[:, i]) plt.figure(1) plt.plot(ns, times[0, :], 'k-', label="ARPACK") plt.plot(ns, times[1, :], 'r-', label="PROPACK") # plt.plot(ns, times[2, :], 'b-', label="SparseSVD") plt.plot(ns, times[3, :], 'k--', label="sklearn RSVD") plt.plot(ns, times[4, :], 'r--', label="sppy RSVD") plt.legend(loc="upper left") plt.xlabel("n") plt.ylabel("time (s)") plt.savefig("time_ns.png", format="png")
def testSplitNnz(self): numRuns = 100 import sppy for i in range(numRuns): m = numpy.random.randint(5, 50) n = numpy.random.randint(5, 50) X = scipy.sparse.rand(m, n, 0.5) X = X.tocsc() split = numpy.random.rand() X1, X2 = SparseUtils.splitNnz(X, split) nptst.assert_array_almost_equal((X1 + X2).todense(), X.todense()) for i in range(numRuns): m = numpy.random.randint(5, 50) n = numpy.random.randint(5, 50) X = scipy.sparse.rand(m, n, 0.5) X = X.tocsc() X = sppy.csarray(X) split = numpy.random.rand() X1, X2 = SparseUtils.splitNnz(X, split) nptst.assert_array_almost_equal((X1 + X2).toarray(), X.toarray())
def testMean(self): self.assertEquals(self.A.mean(), 0) self.assertAlmostEquals(self.B.mean(), 0.4848571428571428) self.assertAlmostEquals(self.C.mean(), 0.001697) self.assertAlmostEquals(self.H.mean(), 0.4848571428571428) D = csarray((0, 0)) self.assertTrue(math.isnan(D.mean())) self.assertEquals(self.F.mean(), 10 / float(36)) nptst.assert_array_equal(self.A.mean(0), self.A.sum(0) / self.A.shape[0]) nptst.assert_array_equal(self.B.mean(0), self.B.sum(0) / self.B.shape[0]) nptst.assert_array_equal(self.C.mean(0), self.C.sum(0) / self.C.shape[0]) nptst.assert_array_equal(self.D.mean(0), self.D.sum(0) / self.D.shape[0]) nptst.assert_array_equal(self.F.mean(0), self.F.sum(0) / float(self.F.shape[0])) nptst.assert_array_equal(self.G.mean(0), self.G.sum(0) / self.G.shape[0]) nptst.assert_array_equal(self.H.mean(0), self.H.sum(0) / self.H.shape[0]) nptst.assert_array_equal(self.A.mean(1), self.A.sum(1) / self.A.shape[1]) nptst.assert_array_equal(self.B.mean(1), self.B.sum(1) / self.B.shape[1]) nptst.assert_array_equal(self.C.mean(1), self.C.sum(1) / self.C.shape[1]) nptst.assert_array_equal(self.D.mean(1), self.D.sum(1) / self.D.shape[1]) nptst.assert_array_equal(self.F.mean(1), self.F.sum(1) / float(self.F.shape[1])) nptst.assert_array_equal(self.G.mean(1), self.G.sum(1) / self.G.shape[1]) nptst.assert_array_equal(self.H.mean(1), self.H.sum(1) / self.H.shape[1]) self.assertEquals(self.a.mean(), 1.6199999999999999) self.assertEquals(self.b.mean(), 1.6) self.assertEquals(self.c.mean(), 0.0) self.assertTrue(math.isnan(self.d.mean()))
def submatrix(X, inds): """ Take a sparse matrix in coo format and pick out inds indices relative to X.data. Returns a csc matrix. """ if type(inds) != numpy.ndarray: inds = numpy.random.permutation(X.nnz)[0:inds] if scipy.sparse.issparse(X): rowInds, colInds = X.nonzero() rowInds = rowInds[inds] colInds = colInds[inds] vals = numpy.array(X[X.nonzero()]).ravel()[inds] if scipy.sparse.isspmatrix_csc(X): return scipy.sparse.csc_matrix((vals, (rowInds, colInds)), X.shape) elif scipy.sparse.isspmatrix_csr(X): return scipy.sparse.csr_matrix((vals, (rowInds, colInds)), X.shape) else: #Assume a sppy array rowInds, colInds = X.nonzero() rowInds = rowInds[inds] colInds = colInds[inds] vals = X.values()[inds] import sppy Y = sppy.csarray(X.shape, storagetype=X.storagetype, dtype=X.dtype) Y.put(vals, rowInds, colInds, init=True) return Y
def sparseMatrix(vals, rowInds, colInds, shape, mattype, storagetype="col"): """ Create a sparse matrix of the given mattype with X[rowInds, colInds] = vals. The choices for type are "csarray" and "scipy" """ import sppy if mattype == "csarray": rowInds = numpy.array(rowInds, numpy.int32) colInds = numpy.array(colInds, numpy.int32) X = sppy.csarray(shape, dtype=vals.dtype, storagetype=storagetype) X.put(vals, rowInds, colInds, True) elif mattype == "scipy": if storagetype == "row": X = scipy.sparse.csr_matrix((vals, (rowInds, colInds)), shape=shape) elif storagetype == "col": X = scipy.sparse.csc_matrix((vals, (rowInds, colInds)), shape=shape) else: raise ValueError("Unknown storagetype: " + storagetype) else: raise ValueError("Unknown mattype: " + mattype) return X
def rand(shape, density, dtype=numpy.float, storagetype="col"): """ Generate a random sparse matrix with m rows and n cols with given density and dtype. :param shape: The shape of the output array (m, n) :param density: The proportion of non zero elements to create :param dtype: The data type of the output array (only supports floats at the moment) :param storagetype: The storage type of the csarray ("row" or "col") :type storagetype: `str` """ result = csarray(shape, dtype, storagetype=storagetype) size = result.size numEntries = int(size * density) inds = numpy.random.randint(0, size, numEntries) if result.ndim == 2: rowInds, colInds = numpy.unravel_index(inds, shape) rowInds = numpy.array(rowInds, numpy.int32) colInds = numpy.array(colInds, numpy.int32) result.put(numpy.array(numpy.random.rand(numEntries), dtype), rowInds, colInds, init=True) elif result.ndim == 1: result[inds] = numpy.array(numpy.random.rand(numEntries), dtype) return result
def generateSparseBinaryMatrix(shape, p, w=0.9, sd=0, csarray=False, verbose=False, indsPerRow=50): """ Create an underlying matrix Z = UsV.T of rank p and then go through each row and threshold so that a proportion quantile numbers are kept. The final matrix is a 0/1 matrix. We order each row of Z in ascending order and then keep those bigger than u. In other words w=0 keeps all numbers and w=1.0 keeps none. """ m, n = shape U, s, V = SparseUtils.generateLowRank(shape, p) X = (U*s).dot(V.T) wv = numpy.random.randn(m)*sd + w wv = numpy.clip(wv, 0, 1) r = SparseUtilsCython.computeR2((U*s), V, wv, indsPerRow=indsPerRow) for i in range(m): X[i, X[i, :] >= r[i]] = 1 X[i, X[i, :] < r[i]] = 0 if csarray: import sppy X = sppy.csarray(X, storagetype="row") else: X = scipy.sparse.csr_matrix(X) if verbose: return X, U, s, V, wv else: return X
def testReserve(self): A = csarray((5, 5)) A.reserve(5) A[0, 1] = 4 A[2, 3] = -1.2 A[1, 3] = 2 A[3, 3] = 1
def testCopy(self): A = csarray((5, 5)) A[0, 0] = 1 A[1, 0] = 2 A[4, 2] = 3 self.assertEquals(A[0, 0], 1) self.assertEquals(A[1, 0], 2) self.assertEquals(A[4, 2], 3) B = A.copy() A[0, 0] = 2 A[1, 0] = 3 A[4, 2] = 4 A[4, 4] = 5 self.assertEquals(A[0, 0], 2) self.assertEquals(A[1, 0], 3) self.assertEquals(A[4, 2], 4) self.assertEquals(A[4, 4], 5) self.assertEquals(A.getnnz(), 4) self.assertEquals(B[0, 0], 1) self.assertEquals(B[1, 0], 2) self.assertEquals(B[4, 2], 3) self.assertEquals(B.getnnz(), 3) F = self.F.copy() F[0, 0] = -15 self.assertEquals(F[0, 0], -15) self.assertEquals(self.F[0, 0], 23)
def testDiag(self): nptst.assert_array_equal(self.A.diag(), numpy.zeros(5)) nptst.assert_array_equal(self.B.diag(), numpy.array([ 0, 0, 0, -0.2, 12.2])) nptst.assert_array_equal(self.C.diag(), numpy.zeros(100)) D = csarray((3, 3)) D[0, 0] = -1 D[1, 1] = 3.2 D[2, 2] = 34 nptst.assert_array_equal(D.diag(), numpy.array([-1, 3.2, 34])) E = csarray((0, 0)) nptst.assert_array_equal(E.diag(), numpy.array([])) nptst.assert_array_equal(self.F.diag(), numpy.array([23, 0, 0, 0, 0, 0]) )
def testSum(self): nrow = 5 ncol = 7 A = csarray((nrow, ncol)) A[0, 1] = 1 A[1, 3] = 5.2 A[3, 3] = -0.2 self.assertEquals(A.sum(), 6.0) A[3, 4] = -1.2 self.assertEquals(A.sum(), 4.8) A[0, 0] = 1.34 self.assertEquals(A.sum(), 6.14) A[0, 0] = 0 self.assertEquals(A.sum(), 4.8) self.assertEquals(self.A.sum(), 0.0) self.assertEquals(self.B.sum(), 16.97) self.assertEquals(self.C.sum(), 16.97) self.assertAlmostEquals(self.D.sum(), 10) self.assertEquals(self.F.sum(), 10) #Test sum along axes nptst.assert_array_equal(self.A.sum(0), numpy.zeros(5)) nptst.assert_array_equal(self.B.sum(0), numpy.array([0, 1, 0, 5, 12.2, 0, -1.23])) nptst.assert_array_equal(self.D.sum(0), numpy.array([10, 0, 0, 0, 0])) nptst.assert_array_equal(self.A.sum(1), numpy.zeros(5)) nptst.assert_array_almost_equal(self.B.sum(1), numpy.array([-0.23, 5.2, 0, -0.2, 12.2])) nptst.assert_array_equal(self.D.sum(1), numpy.array([23.1, 0, -8.1, -5, 0]))
def setDiff(self, graph): """ Find the edges in the current graph which are not present in the input graph. Replaces the edges in the current graph with adjacencies. :param graph: the input graph. :type graph: :class:`apgl.graph.CsArrayGraph` :returns: The graph which is the set difference of the edges of this graph and graph. """ Parameter.checkClass(graph, CsArrayGraph) if graph.getNumVertices() != self.getNumVertices(): raise ValueError( "Can only add edges from graph with same number of vertices") if self.undirected != graph.undirected: raise ValueError( "Both graphs must be either undirected or directed") A1 = self.adjacencyMatrix() A2 = graph.adjacencyMatrix() A1 = A1 - A2 A1 = (A1 + numpy.abs(A1**2)) / 2 newGraph = CsArrayGraph(self.vList, self.undirected) newGraph.W = sppy.csarray(A1) return newGraph
def profileDot2(self): density = 0.01 m = 10000 n = 10000 a_sppy = sppy.rand((m, n), density, storagetype='row') a_sppy_T = sppy.csarray(a_sppy.T, storagetype="col") ProfileUtils.profile('a_sppy.dot(a_sppy_T)', globals(), locals())
def rand(shape, density, dtype=numpy.float, storagetype="col"): """ Generate a random sparse matrix with m rows and n cols with given density and dtype. :param shape: The shape of the output array (m, n) :param density: The proportion of non zero elements to create :param dtype: The data type of the output array (only supports floats at the moment) :param storagetype: The storage type of the csarray ("row" or "col") :type storagetype: `str` """ result = csarray(shape, dtype, storagetype=storagetype) size = result.size numEntries = int(size*density) inds = numpy.random.randint(0, size, numEntries) if result.ndim == 2: rowInds, colInds = numpy.unravel_index(inds, shape) rowInds = numpy.array(rowInds, numpy.int32) colInds = numpy.array(colInds, numpy.int32) result.put(numpy.array(numpy.random.rand(numEntries), dtype), rowInds, colInds, init=True) elif result.ndim == 1: result[inds] = numpy.array(numpy.random.rand(numEntries), dtype) return result
def profileDot(self): #Create random sparse matrix and numpy array #Test speed of array creation numpy.random.seed(21) m = 1000000 n = 1000000 numInds = 10000000 inds = numpy.random.randint(0, m*n, numInds) inds = numpy.unique(inds) vals = numpy.random.randn(inds.shape[0]) rowInds, colInds = numpy.unravel_index(inds, (m, n), order="FORTRAN") rowInds = numpy.array(rowInds, numpy.int32) colInds = numpy.array(colInds, numpy.int32) A = csarray((m, n), storageType="rowMajor") A.put(vals, rowInds, colInds, True) A.compress() p = 500 W = numpy.random.rand(n, p) ProfileUtils.profile('A.dot(W)', globals(), locals()) #Compare versus scipy #B = scipy.sparse.csc_matrix((vals, (rowInds, colInds)), (m, n)) #ProfileUtils.profile('B.dot(W)', globals(), locals()) #Compare versus pdot ProfileUtils.profile('A.pdot(W)', globals(), locals())
def testCompress(self): A = csarray((5, 5)) A[0, 1] = 4 A[2, 3] = -1.2 A[1, 3] = 2 A[3, 3] = 1 A.compress()
def epinions(minNnzRows=10, minNnzCols=3, quantile=90): matrixFileName = PathDefaults.getDataDir() + "epinions/rating.mat" A = scipy.io.loadmat(matrixFileName)["rating"] userIndexer = IdIndexer("i") itemIndexer = IdIndexer("i") for i in range(A.shape[0]): userIndexer.append(A[i, 0]) itemIndexer.append(A[i, 1]) rowInds = userIndexer.getArray() colInds = itemIndexer.getArray() ratings = A[:, 3] X = sppy.csarray((len(userIndexer.getIdDict()), len(itemIndexer.getIdDict())), storagetype="row", dtype=numpy.int) X.put(numpy.array(ratings>3, numpy.int), numpy.array(rowInds, numpy.int32), numpy.array(colInds, numpy.int32), init=True) X.prune() X = SparseUtils.pruneMatrixRowAndCols(X, minNnzRows, minNnzCols) logging.debug("Read file: " + matrixFileName) logging.debug("Non zero elements: " + str(X.nnz) + " shape: " + str(X.shape)) return X
def testMean(self): self.assertEquals(self.A.mean(), 0) self.assertAlmostEquals(self.B.mean(), 0.4848571428571428) self.assertAlmostEquals(self.C.mean(), 0.001697) D = csarray((0, 0)) self.assertTrue(math.isnan(D.mean())) self.assertEquals(self.F.mean(), 10 / float(36)) nptst.assert_array_equal(self.A.mean(0), self.A.sum(0) / self.A.shape[0]) nptst.assert_array_equal(self.B.mean(0), self.B.sum(0) / self.B.shape[0]) nptst.assert_array_equal(self.C.mean(0), self.C.sum(0) / self.C.shape[0]) nptst.assert_array_equal(self.D.mean(0), self.D.sum(0) / self.D.shape[0]) nptst.assert_array_equal(self.F.mean(0), self.F.sum(0) / float(self.F.shape[0])) nptst.assert_array_equal(self.A.mean(1), self.A.sum(1) / self.A.shape[1]) nptst.assert_array_equal(self.B.mean(1), self.B.sum(1) / self.B.shape[1]) nptst.assert_array_equal(self.C.mean(1), self.C.sum(1) / self.C.shape[1]) nptst.assert_array_equal(self.D.mean(1), self.D.sum(1) / self.D.shape[1]) nptst.assert_array_equal(self.F.mean(1), self.F.sum(1) / float(self.F.shape[1]))
def flixster(minNnzRows=10, minNnzCols=2, quantile=90): matrixFileName = PathDefaults.getDataDir() + "flixster/Ratings.timed.txt" matrixFile = open(matrixFileName) matrixFile.readline() userIndexer = IdIndexer("i") movieIndexer = IdIndexer("i") ratings = array.array("f") logging.debug("Loading ratings from " + matrixFileName) for i, line in enumerate(matrixFile): if i % 1000000 == 0: logging.debug("Iteration: " + str(i)) vals = line.split() userIndexer.append(vals[0]) movieIndexer.append(vals[1]) ratings.append(float(vals[2])) rowInds = userIndexer.getArray() colInds = movieIndexer.getArray() ratings = numpy.array(ratings) X = sppy.csarray((len(userIndexer.getIdDict()), len(movieIndexer.getIdDict())), storagetype="row", dtype=numpy.int) X.put(numpy.array(ratings>3, numpy.int), numpy.array(rowInds, numpy.int32), numpy.array(colInds, numpy.int32), init=True) X.prune() X = SparseUtils.pruneMatrixRowAndCols(X, minNnzRows, minNnzCols) logging.debug("Read file: " + matrixFileName) logging.debug("Non zero elements: " + str(X.nnz) + " shape: " + str(X.shape)) #X = Sampling.sampleUsers(X, 1000) return X
def testSplitNnz(self): numRuns = 100 import sppy for i in range(numRuns): m = numpy.random.randint(5, 50) n = numpy.random.randint(5, 50) X = scipy.sparse.rand(m, n, 0.5) X = X.tocsc() split = numpy.random.rand() X1, X2 = SparseUtils.splitNnz(X, split) nptst.assert_array_almost_equal((X1+X2).todense(), X.todense()) for i in range(numRuns): m = numpy.random.randint(5, 50) n = numpy.random.randint(5, 50) X = scipy.sparse.rand(m, n, 0.5) X = X.tocsc() X = sppy.csarray(X) split = numpy.random.rand() X1, X2 = SparseUtils.splitNnz(X, split) nptst.assert_array_almost_equal((X1+X2).toarray(), X.toarray())
def time_ns(): density = 10**-3 ns = var_range * 10**4 times = numpy.zeros((5, ns.shape[0])) for i, n in enumerate(ns): # Generate random sparse matrix inds = numpy.random.randint(n, size=(2, n * n * density)) data = numpy.random.rand(n * n * density) A = scipy.sparse.csc_matrix((data, inds), (n, n)) A_sppy = sppy.csarray(A, storagetype="row") L = GeneralLinearOperator.asLinearOperator(A_sppy, parallel=True) print(A.shape, A.nnz) times[0, i] = time_reps(svds, (A, k), reps) times[1, i] = time_reps(svdp, (A, k), reps) # times[2, i] = time_reps(sparsesvd, (A, k), reps) times[3, i] = time_reps(truncated_svd.fit, (A, ), reps) times[4, i] = time_reps(sppy.linalg.rsvd, (L, k, p, n_iter), reps) print(n, density, times[:, i]) plt.figure(1) plt.plot(ns, times[0, :], 'k-', label="ARPACK") plt.plot(ns, times[1, :], 'r-', label="PROPACK") # plt.plot(ns, times[2, :], 'b-', label="SparseSVD") plt.plot(ns, times[3, :], 'k--', label="sklearn RSVD") plt.plot(ns, times[4, :], 'r--', label="sppy RSVD") plt.legend(loc="upper left") plt.xlabel("n") plt.ylabel("time (s)") plt.savefig("time_ns.png", format="png")
def ones(shape, dtype=numpy.float): """ Create a ones matrix of the given shape and dtype. Generally a bad idea for large matrices. """ result = csarray(shape, dtype) result.ones() return result
def diag(x): """ From a 1D numpy array x create a diagonal sparse array. """ result = csarray((x.shape[0], x.shape[0]), x.dtype) result[(numpy.arange(x.shape[0]), numpy.arange(x.shape[0]))] = x return result
def testStr(self): nrow = 5 ncol = 7 A = csarray((nrow, ncol)) A[0, 1] = 1 A[1, 3] = 5.2 A[3, 3] = -0.2 outputStr = "csarray dtype:float64 shape:(5, 7) non-zeros:3\n" outputStr += "(0, 1) 1.0\n" outputStr += "(1, 3) 5.2\n" outputStr += "(3, 3) -0.2" self.assertEquals(str(A), outputStr) B = csarray((5, 5)) outputStr = "csarray dtype:float64 shape:(5, 5) non-zeros:0\n" self.assertEquals(str(B), outputStr)
def testDiag(self): nptst.assert_array_equal(self.A.diag(), numpy.zeros(5)) nptst.assert_array_equal(self.B.diag(), numpy.array([0, 0, 0, -0.2, 12.2])) nptst.assert_array_equal(self.C.diag(), numpy.zeros(100)) D = csarray((3, 3)) D[0, 0] = -1 D[1, 1] = 3.2 D[2, 2] = 34 nptst.assert_array_equal(D.diag(), numpy.array([-1, 3.2, 34])) E = csarray((0, 0)) nptst.assert_array_equal(E.diag(), numpy.array([])) nptst.assert_array_equal(self.F.diag(), numpy.array([23, 0, 0, 0, 0, 0]))
def setUp(self): logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) self.A = csarray((5, 5)) nrow = 5 ncol = 7 self.B = csarray((nrow, ncol)) self.B[0, 1] = 1 self.B[1, 3] = 5.2 self.B[3, 3] = -0.2 self.B[0, 6] = -1.23 self.B[4, 4] = 12.2 nrow = 100 ncol = 100 self.C = csarray((nrow, ncol)) self.C[0, 1] = 1 self.C[10, 3] = 5.2 self.C[30, 34] = -0.2 self.C[0, 62] = -1.23 self.C[4, 41] = 12.2 self.D = csarray((5, 5)) self.D[0, 0] = 23.1 self.D[2, 0] = -3.1 self.D[3, 0] = -10.0 self.D[2, 1] = -5 self.D[3, 1] = 5 self.E = csarray((0, 0)) self.F = csarray((6, 6), dtype=numpy.int) self.F[0, 0] = 23 self.F[2, 0] = -3 self.F[3, 0] = -10 self.F[2, 1] = -5 self.F[3, 1] = 5 self.a = csarray(10, dtype=numpy.float) self.a[0] = 23 self.a[3] = 1.2 self.a[4] = -8 self.b = csarray(10, dtype=numpy.int) self.b[0] = 23 self.b[5] = 1 self.b[8] = -8 self.c = csarray((3, ), dtype=numpy.float)
def testNonZeroInds(self): (rowInds, colInds) = self.B.nonzero() for i in range(rowInds.shape[0]): self.assertNotEqual(self.B[rowInds[i], colInds[i]], 0) self.assertEquals(self.B.getnnz(), rowInds.shape[0]) self.assertEquals(self.B.sum(), self.B[rowInds, colInds].sum()) (rowInds, colInds) = self.C.nonzero() for i in range(rowInds.shape[0]): self.assertNotEqual(self.C[rowInds[i], colInds[i]], 0) self.assertEquals(self.C.getnnz(), rowInds.shape[0]) self.assertEquals(self.C.sum(), self.C[rowInds, colInds].sum()) (rowInds, colInds) = self.F.nonzero() for i in range(rowInds.shape[0]): self.assertNotEqual(self.F[rowInds[i], colInds[i]], 0) self.assertEquals(self.F.getnnz(), rowInds.shape[0]) self.assertEquals(self.F.sum(), self.F[rowInds, colInds].sum()) #Try an array with no non zeros nrow = 5 ncol = 7 A = csarray((nrow, ncol)) (rowInds, colInds) = A.nonzero() self.assertEquals(A.getnnz(), rowInds.shape[0]) self.assertEquals(rowInds.shape[0], 0) self.assertEquals(colInds.shape[0], 0) #Zero size array nrow = 0 ncol = 0 A = csarray((nrow, ncol)) (rowInds, colInds) = A.nonzero() self.assertEquals(A.getnnz(), rowInds.shape[0]) self.assertEquals(rowInds.shape[0], 0) self.assertEquals(colInds.shape[0], 0)
def testRecommendAtk(self): m = 20 n = 50 r = 3 X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True) import sppy X = sppy.csarray(X) k = 10 X = numpy.zeros(X.shape) omegaList = [] for i in range(m): omegaList.append(numpy.random.permutation(n)[0:5]) X[i, omegaList[i]] = 1 X = sppy.csarray(X) orderedItems = MCEvaluatorCython.recommendAtk(U, V, k, X) orderedItems2 = MCEvaluator.recommendAtk(U, V, k, omegaList=omegaList) nptst.assert_array_equal(orderedItems[orderedItems2 != -1], orderedItems2[orderedItems2 != -1]) for i in range(m): items = numpy.intersect1d(omegaList[i], orderedItems[i, :]) self.assertEquals(items.shape[0], 0) #items = numpy.union1d(omegaList[i], orderedItems[i, :]) #items = numpy.intersect1d(items, orderedItems2[i, :]) #nptst.assert_array_equal(items, numpy.sort(orderedItems2[i, :])) #Now let's have an all zeros X X = sppy.csarray(X.shape) orderedItems = MCEvaluatorCython.recommendAtk(U, V, k, X) orderedItems2 = MCEvaluator.recommendAtk(U, V, k) nptst.assert_array_equal(orderedItems, orderedItems2)
def profileGetOmegaList(self): shape = (20000, 15000) r = 50 k = 1000000 X = SparseUtils.generateSparseLowRank(shape, r, k) import sppy X = sppy.csarray(X) ProfileUtils.profile('SparseUtils.getOmegaList(X)', globals(), locals())
def addVertices(self, n): """ Adds n vertices to the current graph. This is not an efficient operation as we create a new weight matrix and copy the old one. The old vertices are the first m at the start of the new graph. """ W2 = sppy.csarray((self.W.shape[0] + n, self.W.shape[0] + n), self.W.dtype) W2[self.W.nonzero()] = self.W.values() self.W = W2 self.vList.addVertices(n)
def testPrecisionAtK(self): m = 10 n = 5 r = 3 X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True) import sppy X = sppy.csarray(X) #print(MCEvaluator.precisionAtK(X, U*s, V, 2)) orderedItems = MCEvaluator.recommendAtk(U, V, n) self.assertAlmostEquals(MCEvaluator.precisionAtK(X, orderedItems, n), X.nnz / float(m * n)) k = 2 orderedItems = MCEvaluator.recommendAtk(U * s, V, k) precision, scoreInds = MCEvaluator.precisionAtK(X, orderedItems, k, verbose=True) precisions = numpy.zeros(m) for i in range(m): nonzeroRow = X.toarray()[i, :].nonzero()[0] precisions[i] = numpy.intersect1d(scoreInds[i, :], nonzeroRow).shape[0] / float(k) self.assertEquals(precision.mean(), precisions.mean()) #Now try random U and V U = numpy.random.rand(m, 3) V = numpy.random.rand(m, 3) orderedItems = MCEvaluator.recommendAtk(U * s, V, k) precision, scoreInds = MCEvaluator.precisionAtK(X, orderedItems, k, verbose=True) precisions = numpy.zeros(m) for i in range(m): nonzeroRow = X.toarray()[i, :].nonzero()[0] precisions[i] = numpy.intersect1d(scoreInds[i, :], nonzeroRow).shape[0] / float(k) self.assertEquals(precision.mean(), precisions.mean())
def testGetnnz(self): A = csarray((5, 7)) self.assertEquals(A.getnnz(), 0) A[0, 0] = 1.0 self.assertEquals(A.getnnz(), 1) A[2, 1] = 1.0 self.assertEquals(A.getnnz(), 2) A[2, 5] = 1.0 A[3, 5] = 1.0 self.assertEquals(A.getnnz(), 4) #If we insert a zero it is not registered as zero A[4, 4] = 0.0 self.assertEquals(A.getnnz(), 4) #But erasing an item keeps it (can call prune) A[3, 5] = 0.0 self.assertEquals(A.getnnz(), 4) B = csarray((5, 7)) B[(numpy.array([1, 2, 3]), numpy.array([4, 5, 6]))] = 1 self.assertEquals(B.getnnz(), 3) for i in range(5): for j in range(7): B[i, j] = 1 self.assertEquals(B.getnnz(), 35) self.assertEquals(self.A.getnnz(), 0) self.assertEquals(self.B.getnnz(), 5) self.assertEquals(self.C.getnnz(), 5) self.assertEquals(self.F.getnnz(), 5) self.assertEquals(self.a.getnnz(), 3) self.assertEquals(self.b.getnnz(), 3) self.assertEquals(self.c.getnnz(), 0)
def testRecallAtK(self): m = 10 n = 5 r = 3 X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True) import sppy X = sppy.csarray(X) orderedItems = MCEvaluator.recommendAtk(U, V, n) self.assertAlmostEquals(MCEvaluator.recallAtK(X, orderedItems, n), 1.0) k = 2 orderedItems = MCEvaluator.recommendAtk(U * s, V, k) recall, scoreInds = MCEvaluator.recallAtK(X, orderedItems, k, verbose=True) recalls = numpy.zeros(m) for i in range(m): nonzeroRow = X.toarray()[i, :].nonzero()[0] recalls[i] = numpy.intersect1d(scoreInds[i, :], nonzeroRow).shape[0] / float( nonzeroRow.shape[0]) self.assertEquals(recall.mean(), recalls.mean()) #Now try random U and V U = numpy.random.rand(m, 3) V = numpy.random.rand(m, 3) orderedItems = MCEvaluator.recommendAtk(U, V, k) recall, scoreInds = MCEvaluator.recallAtK(X, orderedItems, k, verbose=True) recalls = numpy.zeros(m) for i in range(m): nonzeroRow = X.toarray()[i, :].nonzero()[0] recalls[i] = numpy.intersect1d(scoreInds[i, :], nonzeroRow).shape[0] / float( nonzeroRow.shape[0]) self.assertEquals(recall.mean(), recalls.mean())