def testCentreCols(self): shape = (50, 10) r = 5 k = 100 X, U, s, V = SparseUtils.generateSparseLowRank(shape, r, k, verbose=True) rowInds, colInds = X.nonzero() mu2 = numpy.array(X.sum(0)).ravel() numNnz = numpy.zeros(X.shape[1]) for i in range(X.shape[0]): for j in range(X.shape[1]): if X[i, j] != 0: numNnz[j] += 1 mu2 /= numNnz mu2[numNnz == 0] = 0 X, mu = SparseUtils.centerCols(X) nptst.assert_array_almost_equal( numpy.array(X.mean(0)).ravel(), numpy.zeros(X.shape[1])) nptst.assert_array_almost_equal(mu, mu2)
def testCentreRows(self): shape = (50, 10) r = 5 k = 100 X, U, s, V = SparseUtils.generateSparseLowRank(shape, r, k, verbose=True) rowInds, colInds = X.nonzero() for i in range(rowInds.shape[0]): self.assertEquals(X[rowInds[i], colInds[i]], numpy.array(X[X.nonzero()]).ravel()[i]) mu2 = numpy.array(X.sum(1)).ravel() numNnz = numpy.zeros(X.shape[0]) for i in range(X.shape[0]): for j in range(X.shape[1]): if X[i,j]!=0: numNnz[i] += 1 mu2 /= numNnz mu2[numNnz==0] = 0 X, mu = SparseUtils.centerRows(X) nptst.assert_array_almost_equal(numpy.array(X.mean(1)).ravel(), numpy.zeros(X.shape[0])) nptst.assert_array_almost_equal(mu, mu2)
def testUncentre(self): shape = (50, 10) r = 5 k = 100 X, U, s, V = SparseUtils.generateSparseLowRank(shape, r, k, verbose=True) rowInds, colInds = X.nonzero() Y = X.copy() inds = X.nonzero() X, mu1 = SparseUtils.centerRows(X) X, mu2 = SparseUtils.centerCols(X, inds=inds) cX = X.copy() Y2 = SparseUtils.uncenter(X, mu1, mu2) nptst.assert_array_almost_equal(Y.todense(), Y2.todense(), 3) #We try softImpute on a centered matrix and check if the results are the same lmbdas = numpy.array([0.1]) softImpute = SoftImpute(lmbdas) Z = softImpute.learnModel(cX, fullMatrices=False) Z = softImpute.predict([Z], cX.nonzero())[0] error1 = MCEvaluator.rootMeanSqError(cX, Z) X = SparseUtils.uncenter(cX, mu1, mu2) Z2 = SparseUtils.uncenter(Z, mu1, mu2) error2 = MCEvaluator.rootMeanSqError(X, Z2) self.assertAlmostEquals(error1, error2)
def setUp(self): numpy.set_printoptions(suppress=True, precision=3, linewidth=150) numpy.random.seed(21) shape = (20, 10) r = 5 k = 100 #Create an iterator matrixList = [] matrixList.append(SparseUtils.generateSparseLowRank(shape, r, k)) matrixList.append(SparseUtils.generateSparseLowRank(shape, r, k)) matrixList.append(SparseUtils.generateSparseLowRank(shape, r, k)) self.matrixList = matrixList self.testMatrixList = [] for X in matrixList: self.testMatrixList.append(X.copy())
def profileSubmatrix(self): shape = (100000, 15000) r = 50 k = 5000000 X = SparseUtils.generateSparseLowRank(shape, r, k) print(X.nnz, type(X)) inds = numpy.random.permutation(X.nnz)[0:1000000] ProfileUtils.profile("SparseUtils.submatrix(X, inds)", globals(), locals())
def profileGetOmegaList(self): shape = (20000, 15000) r = 50 k = 1000000 X = SparseUtils.generateSparseLowRank(shape, r, k) import sppy X = sppy.csarray(X) ProfileUtils.profile("SparseUtils.getOmegaList(X)", globals(), locals())
def profileGetOmegaList(self): shape = (20000, 15000) r = 50 k = 1000000 X = SparseUtils.generateSparseLowRank(shape, r, k) import sppy X = sppy.csarray(X) ProfileUtils.profile('SparseUtils.getOmegaList(X)', globals(), locals())
def __init__(self): numpy.random.seed(21) # Create a low rank matrix n = 100000 m = 100000 self.r = 50 k = 5 * 10 ** 6 # k = 10**5 self.X = SparseUtils.generateSparseLowRank((n, m), self.r, k) print(self.X.nnz)
def profileSubmatrix(self): shape = (100000, 15000) r = 50 k = 5000000 X = SparseUtils.generateSparseLowRank(shape, r, k) print(X.nnz, type(X)) inds = numpy.random.permutation(X.nnz)[0:1000000] ProfileUtils.profile('SparseUtils.submatrix(X, inds)', globals(), locals())
def __init__(self): numpy.random.seed(21) #Create a low rank matrix n = 100000 m = 100000 self.r = 50 k = 5 * 10**6 #k = 10**5 self.X = SparseUtils.generateSparseLowRank((n, m), self.r, k) print(self.X.nnz)
def testGenerateSparseLowRank(self): shape = (5000, 1000) r = 5 k = 10 X, U, s, V = SparseUtils.generateSparseLowRank(shape, r, k, verbose=True) self.assertEquals(U.shape, (shape[0],r)) self.assertEquals(V.shape, (shape[1], r)) self.assertTrue(X.nnz <= k) Y = (U*s).dot(V.T) inds = X.nonzero() for i in range(inds[0].shape[0]): self.assertAlmostEquals(X[inds[0][i], inds[1][i]], Y[inds[0][i], inds[1][i]])
def profilePrecisionAtK(self): m = 1000 n = 500000 r = 30 k = m*100 X, U, s, V = SparseUtils.generateSparseLowRank((m,n), r, k, verbose=True) mean = X.data.mean() X.data[X.data <= mean] = 0 X.data[X.data > mean] = 1 import sppy X = sppy.csarray(X) ProfileUtils.profile("MCEvaluator.precisionAtK(X, U, V, 10)", globals(), locals())
def testSvdArpack(self): shape = (500, 100) r = 5 k = 1000 X, U, s, V = SparseUtils.generateSparseLowRank(shape, r, k, verbose=True) k2 = 10 U, s, V = SparseUtils.svdArpack(X, k2) U2, s2, V2 = numpy.linalg.svd(X.todense()) V2 = V2.T nptst.assert_array_almost_equal(s, s2[0:k2]) nptst.assert_array_almost_equal(numpy.abs(U), numpy.abs(U2[:, 0:k2]), 3) nptst.assert_array_almost_equal(numpy.abs(V), numpy.abs(V2[:, 0:k2]), 3)
def profilePrecisionAtK(self): m = 1000 n = 500000 r = 30 k = m * 100 X, U, s, V = SparseUtils.generateSparseLowRank((m, n), r, k, verbose=True) mean = X.data.mean() X.data[X.data <= mean] = 0 X.data[X.data > mean] = 1 import sppy X = sppy.csarray(X) ProfileUtils.profile("MCEvaluator.precisionAtK(X, U, V, 10)", globals(), locals())
def testGenerateSparseLowRank(self): shape = (5000, 1000) r = 5 k = 10 X, U, s, V = SparseUtils.generateSparseLowRank(shape, r, k, verbose=True) self.assertEquals(U.shape, (shape[0], r)) self.assertEquals(V.shape, (shape[1], r)) self.assertTrue(X.nnz <= k) Y = (U * s).dot(V.T) inds = X.nonzero() for i in range(inds[0].shape[0]): self.assertAlmostEquals(X[inds[0][i], inds[1][i]], Y[inds[0][i], inds[1][i]])
def testCentreRows2(self): shape = (50, 10) r = 5 k = 100 #Test if centering rows changes the RMSE X, U, s, V = SparseUtils.generateSparseLowRank(shape, r, k, verbose=True) Y = X.copy() Y.data = numpy.random.rand(X.nnz) error = ((X.data - Y.data)**2).sum() X, mu = SparseUtils.centerRows(X) Y, mu = SparseUtils.centerRows(Y, mu) error2 = ((X.data - Y.data)**2).sum() self.assertAlmostEquals(error, error2) error3 = numpy.linalg.norm(X.todense()- Y.todense())**2 self.assertAlmostEquals(error2, error3)
def testCentreRows2(self): shape = (50, 10) r = 5 k = 100 #Test if centering rows changes the RMSE X, U, s, V = SparseUtils.generateSparseLowRank(shape, r, k, verbose=True) Y = X.copy() Y.data = numpy.random.rand(X.nnz) error = ((X.data - Y.data)**2).sum() X, mu = SparseUtils.centerRows(X) Y, mu = SparseUtils.centerRows(Y, mu) error2 = ((X.data - Y.data)**2).sum() self.assertAlmostEquals(error, error2) error3 = numpy.linalg.norm(X.todense() - Y.todense())**2 self.assertAlmostEquals(error2, error3)