def testModelSelect(self): lmbda = 0.1 shape = (20, 20) r = 20 numInds = 100 noise = 0.2 X = ExpSU.SparseUtils.generateSparseLowRank(shape, r, numInds, noise) U, s, V = numpy.linalg.svd(X.todense()) k = 15 iterativeSoftImpute = IterativeSoftImpute(lmbda, k=None, svdAlg="propack", updateAlg="zero") iterativeSoftImpute.numProcesses = 1 rhos = numpy.linspace(0.5, 0.001, 20) ks = numpy.array([k], numpy.int) folds = 3 cvInds = Sampling.randCrossValidation(folds, X.nnz) meanTestErrors, meanTrainErrors = iterativeSoftImpute.modelSelect(X, rhos, ks, cvInds) #Now do model selection manually (rowInds, colInds) = X.nonzero() trainErrors = numpy.zeros((rhos.shape[0], len(cvInds))) testErrors = numpy.zeros((rhos.shape[0], len(cvInds))) for i, rho in enumerate(rhos): for j, (trainInds, testInds) in enumerate(cvInds): trainX = scipy.sparse.csc_matrix(X.shape) testX = scipy.sparse.csc_matrix(X.shape) for p in trainInds: trainX[rowInds[p], colInds[p]] = X[rowInds[p], colInds[p]] for p in testInds: testX[rowInds[p], colInds[p]] = X[rowInds[p], colInds[p]] softImpute = SoftImpute(numpy.array([rho]), k=ks[0]) ZList = [softImpute.learnModel(trainX, fullMatrices=False)] predTrainX = softImpute.predict(ZList, trainX.nonzero())[0] predX = softImpute.predict(ZList, testX.nonzero())[0] testErrors[i, j] = MCEvaluator.rootMeanSqError(testX, predX) trainErrors[i, j] = MCEvaluator.rootMeanSqError(trainX, predTrainX) meanTestErrors2 = testErrors.mean(1) meanTrainErrors2 = trainErrors.mean(1) nptst.assert_array_almost_equal(meanTestErrors.ravel(), meanTestErrors2, 1)
def testUncentre(self): shape = (50, 10) r = 5 k = 100 X, U, s, V = SparseUtils.generateSparseLowRank(shape, r, k, verbose=True) rowInds, colInds = X.nonzero() Y = X.copy() inds = X.nonzero() X, mu1 = SparseUtils.centerRows(X) X, mu2 = SparseUtils.centerCols(X, inds=inds) cX = X.copy() Y2 = SparseUtils.uncenter(X, mu1, mu2) nptst.assert_array_almost_equal(Y.todense(), Y2.todense(), 3) #We try softImpute on a centered matrix and check if the results are the same lmbdas = numpy.array([0.1]) softImpute = SoftImpute(lmbdas) Z = softImpute.learnModel(cX, fullMatrices=False) Z = softImpute.predict([Z], cX.nonzero())[0] error1 = MCEvaluator.rootMeanSqError(cX, Z) X = SparseUtils.uncenter(cX, mu1, mu2) Z2 = SparseUtils.uncenter(Z, mu1, mu2) error2 = MCEvaluator.rootMeanSqError(X, Z2) self.assertAlmostEquals(error1, error2)
def testPredict(self): X = scipy.sparse.rand(10, 10, 0.2) X = X.tocsc() lmbdas = numpy.array([2.0, 1.5, 1.0, 0.5, 0.2, 0.1]) eps = 0.001 k = 9 #Check out singular values U, s, V = sparsesvd(X.tocsc(), k) softImpute = SoftImpute(lmbdas, eps, k) ZList = softImpute.learnModel(X, fullMatrices=False) inds = X.nonzero() predXList = softImpute.predict(ZList, inds) U, s, V = ZList[0] for predX in predXList: nptst.assert_array_equal(predX.nonzero()[0], inds[0]) nptst.assert_array_equal(predX.nonzero()[1], inds[1])