def testUncentre(self): shape = (50, 10) r = 5 k = 100 X, U, s, V = SparseUtils.generateSparseLowRank(shape, r, k, verbose=True) rowInds, colInds = X.nonzero() Y = X.copy() inds = X.nonzero() X, mu1 = SparseUtils.centerRows(X) X, mu2 = SparseUtils.centerCols(X, inds=inds) cX = X.copy() Y2 = SparseUtils.uncenter(X, mu1, mu2) nptst.assert_array_almost_equal(Y.todense(), Y2.todense(), 3) #We try softImpute on a centered matrix and check if the results are the same lmbdas = numpy.array([0.1]) softImpute = SoftImpute(lmbdas) Z = softImpute.learnModel(cX, fullMatrices=False) Z = softImpute.predict([Z], cX.nonzero())[0] error1 = MCEvaluator.rootMeanSqError(cX, Z) X = SparseUtils.uncenter(cX, mu1, mu2) Z2 = SparseUtils.uncenter(Z, mu1, mu2) error2 = MCEvaluator.rootMeanSqError(X, Z2) self.assertAlmostEquals(error1, error2)
def testLearnModel2(self): #Test the SVD updating solution in the case where we get an exact solution lmbda = 0.0 eps = 0.1 k = 20 matrixIterator = iter(self.matrixList) iterativeSoftImpute = IterativeSoftImpute(lmbda, k=k, eps=eps, svdAlg="rsvd") ZList = iterativeSoftImpute.learnModel(matrixIterator) #Check that ZList is the same as XList for i, Z in enumerate(ZList): U, s, V = Z Xhat = (U*s).dot(V.T) nptst.assert_array_almost_equal(Xhat, self.matrixList[i].todense()) #Compare solution with that of SoftImpute class rhoList = [0.1, 0.2, 0.5, 1.0] for rho in rhoList: iterativeSoftImpute = IterativeSoftImpute(rho, k=k, eps=eps, svdAlg="rsvd", updateAlg="zero") matrixIterator = iter(self.matrixList) ZList = iterativeSoftImpute.learnModel(matrixIterator) rhos = numpy.array([rho]) softImpute = SoftImpute(rhos, k=k, eps=eps) Z1 = softImpute.learnModel(self.matrixList[0]) Z2 = softImpute.learnModel(self.matrixList[1]) Z3 = softImpute.learnModel(self.matrixList[2]) ZList2 = [Z1, Z2, Z3] for j, Zhat in enumerate(ZList): U, s, V = Zhat Z = (U*s).dot(V.T) nptst.assert_array_almost_equal(Z, ZList2[j].todense()) #Also test with true solution Z = S_lambda(X + Z^\bot_\omega) Zomega = numpy.zeros(self.matrixList[j].shape) rowInds, colInds = self.matrixList[j].nonzero() for i in range(self.matrixList[j].nonzero()[0].shape[0]): Zomega[rowInds[i], colInds[i]] = Z[rowInds[i], colInds[i]] U, s, V = ExpSU.SparseUtils.svdArpack(self.matrixList[j], 1, kmax=20) lmbda = rho*numpy.max(s) U, s, V = ExpSU.SparseUtils.svdSoft(numpy.array(self.matrixList[j]-Zomega+Z), lmbda) tol = 0.1 self.assertTrue(numpy.linalg.norm(Z -(U*s).dot(V.T))**2 < tol)
def testLearnModel(self): lmbda = 0.0 eps = 0.1 k = 10 matrixIterator = iter(self.matrixList) iterativeSoftImpute = IterativeSoftImpute(lmbda, k=k, eps=eps, svdAlg="propack") ZList = iterativeSoftImpute.learnModel(matrixIterator) #Check that ZList is the same as XList for i, Z in enumerate(ZList): U, s, V = Z Xhat = (U*s).dot(V.T) nptst.assert_array_almost_equal(Xhat, numpy.array(self.matrixList[i].todense())) #Compare solution with that of SoftImpute class lmbdaList = [0.1, 0.2, 0.5, 1.0] for lmbda in lmbdaList: iterativeSoftImpute = IterativeSoftImpute(lmbda, k=k, eps=eps, svdAlg="propack", updateAlg="zero") matrixIterator = iter(self.matrixList) ZList = iterativeSoftImpute.learnModel(matrixIterator) lmbdas = numpy.array([lmbda]) softImpute = SoftImpute(lmbdas, k=k, eps=eps) Z1 = softImpute.learnModel(self.matrixList[0]) Z2 = softImpute.learnModel(self.matrixList[1]) Z3 = softImpute.learnModel(self.matrixList[2]) ZList2 = [Z1, Z2, Z3] for j, Zhat in enumerate(ZList): U, s, V = Zhat Z = (U*s).dot(V.T) nptst.assert_array_almost_equal(Z, ZList2[j].todense()) #Also test with true solution Z = S_lambda(X + Z^\bot_\omega) Zomega = numpy.zeros(self.matrixList[j].shape) rowInds, colInds = self.matrixList[j].nonzero() for i in range(self.matrixList[j].nonzero()[0].shape[0]): Zomega[rowInds[i], colInds[i]] = Z[rowInds[i], colInds[i]] U, s, V = ExpSU.SparseUtils.svdSoft(numpy.array(self.matrixList[j]-Zomega+Z), lmbda) tol = 0.1 self.assertTrue(numpy.linalg.norm(Z -(U*s).dot(V.T))**2 < tol)
def testModelSelect(self): lmbda = 0.1 shape = (20, 20) r = 20 numInds = 100 noise = 0.2 X = ExpSU.SparseUtils.generateSparseLowRank(shape, r, numInds, noise) U, s, V = numpy.linalg.svd(X.todense()) k = 15 iterativeSoftImpute = IterativeSoftImpute(lmbda, k=None, svdAlg="propack", updateAlg="zero") iterativeSoftImpute.numProcesses = 1 rhos = numpy.linspace(0.5, 0.001, 20) ks = numpy.array([k], numpy.int) folds = 3 cvInds = Sampling.randCrossValidation(folds, X.nnz) meanTestErrors, meanTrainErrors = iterativeSoftImpute.modelSelect(X, rhos, ks, cvInds) #Now do model selection manually (rowInds, colInds) = X.nonzero() trainErrors = numpy.zeros((rhos.shape[0], len(cvInds))) testErrors = numpy.zeros((rhos.shape[0], len(cvInds))) for i, rho in enumerate(rhos): for j, (trainInds, testInds) in enumerate(cvInds): trainX = scipy.sparse.csc_matrix(X.shape) testX = scipy.sparse.csc_matrix(X.shape) for p in trainInds: trainX[rowInds[p], colInds[p]] = X[rowInds[p], colInds[p]] for p in testInds: testX[rowInds[p], colInds[p]] = X[rowInds[p], colInds[p]] softImpute = SoftImpute(numpy.array([rho]), k=ks[0]) ZList = [softImpute.learnModel(trainX, fullMatrices=False)] predTrainX = softImpute.predict(ZList, trainX.nonzero())[0] predX = softImpute.predict(ZList, testX.nonzero())[0] testErrors[i, j] = MCEvaluator.rootMeanSqError(testX, predX) trainErrors[i, j] = MCEvaluator.rootMeanSqError(trainX, predTrainX) meanTestErrors2 = testErrors.mean(1) meanTrainErrors2 = trainErrors.mean(1) nptst.assert_array_almost_equal(meanTestErrors.ravel(), meanTestErrors2, 1)
def testPredict(self): X = scipy.sparse.rand(10, 10, 0.2) X = X.tocsc() lmbdas = numpy.array([2.0, 1.5, 1.0, 0.5, 0.2, 0.1]) eps = 0.001 k = 9 #Check out singular values U, s, V = sparsesvd(X.tocsc(), k) softImpute = SoftImpute(lmbdas, eps, k) ZList = softImpute.learnModel(X, fullMatrices=False) inds = X.nonzero() predXList = softImpute.predict(ZList, inds) U, s, V = ZList[0] for predX in predXList: nptst.assert_array_equal(predX.nonzero()[0], inds[0]) nptst.assert_array_equal(predX.nonzero()[1], inds[1])
def testLearnModel2(self): X = scipy.sparse.rand(10, 10, 0.2) X = X.tocsc() lmbdas = numpy.array([10.0, 0.0]) eps = 0.01 k = 9 #Check out singular values U, s, V = sparsesvd(X.tocsc(), k) softImpute = SoftImpute(lmbdas, eps, k) ZList = softImpute.learnModel2(X) #Test that when lambda=0 get approx original matrix back X2 = ZList[1].todense() nptst.assert_almost_equal(X.todense(), X2) #When lambda is greater or equal to largest singular value, get 0 U, s, V = sparsesvd(X.tocsc(), k) lmbdas = numpy.array([numpy.max(s)]) softImpute = SoftImpute(lmbdas, eps, k) Z = softImpute.learnModel2(X) self.assertEquals(numpy.linalg.norm(Z.todense()), 0) #Check solution for medium values of lambda eps = 0.1 lmbdas = numpy.array([0.1, 0.2, 0.5, 1.0]) softImpute = SoftImpute(lmbdas, eps, k) ZList = softImpute.learnModel2(X) for j, Z in enumerate(ZList): Z = Z.todense() Zomega = numpy.zeros(X.shape) rowInds, colInds = X.nonzero() for i in range(X.nonzero()[0].shape[0]): Zomega[rowInds[i], colInds[i]] = Z[rowInds[i], colInds[i]] U, s, V = ExpSU.SparseUtils.svdSoft(numpy.array(X - Zomega + Z), lmbdas[j]) tol = 0.1 self.assertTrue(numpy.linalg.norm(Z - (U * s).dot(V.T))**2 < tol)
def testLearnModel2(self): X = scipy.sparse.rand(10, 10, 0.2) X = X.tocsc() lmbdas = numpy.array([10.0, 0.0]) eps = 0.01 k = 9 #Check out singular values U, s, V = sparsesvd(X.tocsc(), k) softImpute = SoftImpute(lmbdas, eps, k) ZList = softImpute.learnModel2(X) #Test that when lambda=0 get approx original matrix back X2 = ZList[1].todense() nptst.assert_almost_equal(X.todense(), X2) #When lambda is greater or equal to largest singular value, get 0 U, s, V = sparsesvd(X.tocsc(), k) lmbdas = numpy.array([numpy.max(s)]) softImpute = SoftImpute(lmbdas, eps, k) Z = softImpute.learnModel2(X) self.assertEquals(numpy.linalg.norm(Z.todense()), 0) #Check solution for medium values of lambda eps = 0.1 lmbdas = numpy.array([0.1, 0.2, 0.5, 1.0]) softImpute = SoftImpute(lmbdas, eps, k) ZList = softImpute.learnModel2(X) for j, Z in enumerate(ZList): Z = Z.todense() Zomega = numpy.zeros(X.shape) rowInds, colInds = X.nonzero() for i in range(X.nonzero()[0].shape[0]): Zomega[rowInds[i], colInds[i]] = Z[rowInds[i], colInds[i]] U, s, V = ExpSU.SparseUtils.svdSoft(numpy.array(X-Zomega+Z), lmbdas[j]) tol = 0.1 self.assertTrue(numpy.linalg.norm(Z -(U*s).dot(V.T))**2 < tol)
def profileLearnModel(self): lmbdas = numpy.array([0.5]) softImpute = SoftImpute(lmbdas) ProfileUtils.profile('softImpute.learnModel(self.X, False)', globals(), locals())