def testAverageRocCurve(self): m = 50 n = 20 k = 8 u = 20.0 / m w = 1 - u X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True, verbose=True, indsPerRow=200) fpr, tpr = MCEvaluator.averageRocCurve(X, U, V) import matplotlib matplotlib.use("GTK3Agg") import matplotlib.pyplot as plt #plt.plot(fpr, tpr) #plt.show() #Now try case where we have a training set folds = 1 testSize = 5 trainTestXs = Sampling.shuffleSplitRows(X, folds, testSize) trainX, testX = trainTestXs[0] fpr, tpr = MCEvaluator.averageRocCurve(testX, U, V, trainX=trainX)
def testUncentre(self): shape = (50, 10) r = 5 k = 100 X, U, s, V = SparseUtils.generateSparseLowRank(shape, r, k, verbose=True) rowInds, colInds = X.nonzero() Y = X.copy() inds = X.nonzero() X, mu1 = SparseUtils.centerRows(X) X, mu2 = SparseUtils.centerCols(X, inds=inds) cX = X.copy() Y2 = SparseUtils.uncenter(X, mu1, mu2) nptst.assert_array_almost_equal(Y.todense(), Y2.todense(), 3) #We try softImpute on a centered matrix and check if the results are the same lmbdas = numpy.array([0.1]) softImpute = SoftImpute(lmbdas) Z = softImpute.learnModel(cX, fullMatrices=False) Z = softImpute.predict([Z], cX.nonzero())[0] error1 = MCEvaluator.rootMeanSqError(cX, Z) X = SparseUtils.uncenter(cX, mu1, mu2) Z2 = SparseUtils.uncenter(Z, mu1, mu2) error2 = MCEvaluator.rootMeanSqError(X, Z2) self.assertAlmostEquals(error1, error2)
def testPredict(self): #Create a set of indices lmbda = 0.0 iterativeSoftImpute = IterativeSoftImpute(lmbda, k=10) matrixIterator = iter(self.matrixList) ZList = iterativeSoftImpute.learnModel(matrixIterator) XhatList = iterativeSoftImpute.predict(ZList, self.indsList) #Check we get the exact matrices returned for i, Xhat in enumerate(XhatList): nptst.assert_array_almost_equal(numpy.array(Xhat.todense()), self.matrixList[i].todense()) self.assertEquals(Xhat.nnz, self.indsList[i].shape[0]) self.assertAlmostEquals(MCEvaluator.meanSqError(Xhat, self.matrixList[i]), 0) self.assertAlmostEquals(MCEvaluator.rootMeanSqError(Xhat, self.matrixList[i]), 0) #Try moderate lambda lmbda = 0.1 iterativeSoftImpute = IterativeSoftImpute(lmbda, k=10) matrixIterator = iter(self.matrixList) ZList = list(iterativeSoftImpute.learnModel(matrixIterator)) XhatList = iterativeSoftImpute.predict(iter(ZList), self.indsList) for i, Xhat in enumerate(XhatList): for ind in self.indsList[i]: U, s, V = ZList[i] Z = (U*s).dot(V.T) self.assertEquals(Xhat[numpy.unravel_index(ind, Xhat.shape)], Z[numpy.unravel_index(ind, Xhat.shape)]) self.assertEquals(Xhat.nnz, self.indsList[i].shape[0])
def testAverageRocCurve(self): m = 50 n = 20 k = 8 u = 20.0 / m w = 1 - u X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix( (m, n), k, w, csarray=True, verbose=True, indsPerRow=200 ) fpr, tpr = MCEvaluator.averageRocCurve(X, U, V) import matplotlib matplotlib.use("GTK3Agg") import matplotlib.pyplot as plt # plt.plot(fpr, tpr) # plt.show() # Now try case where we have a training set folds = 1 testSize = 5 trainTestXs = Sampling.shuffleSplitRows(X, folds, testSize) trainX, testX = trainTestXs[0] fpr, tpr = MCEvaluator.averageRocCurve(testX, U, V, trainX=trainX)
def testLocalAucApprox(self): m = 100 n = 200 k = 2 X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True, verbose=True) w = 1.0 localAuc = MCEvaluator.localAUC(X, U, V, w) samples = numpy.arange(150, 200, 10) for i, sampleSize in enumerate(samples): numAucSamples = sampleSize localAuc2 = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X), U, V, w, numAucSamples) self.assertAlmostEqual(localAuc2, localAuc, 1) # Try smaller w w = 0.5 localAuc = MCEvaluator.localAUC(X, U, V, w) samples = numpy.arange(50, 200, 10) for i, sampleSize in enumerate(samples): numAucSamples = sampleSize localAuc2 = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X), U, V, w, numAucSamples) self.assertAlmostEqual(localAuc2, localAuc, 1)
def testLocalAUC(self): m = 10 n = 20 k = 2 X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), k, 0.5, verbose=True, csarray=True) Z = U.dot(V.T) localAuc = numpy.zeros(m) for i in range(m): localAuc[i] = sklearn.metrics.roc_auc_score(numpy.ravel(X[i, :].toarray()), Z[i, :]) localAuc = localAuc.mean() u = 0.0 localAuc2 = MCEvaluator.localAUC(X, U, V, u) self.assertEquals(localAuc, localAuc2) # Now try a large r w = 1.0 localAuc2 = MCEvaluator.localAUC(X, U, V, w) self.assertEquals(localAuc2, 0)
def testLocalAUC(self): m = 10 n = 20 k = 2 X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), k, 0.5, verbose=True, csarray=True) Z = U.dot(V.T) localAuc = numpy.zeros(m) for i in range(m): localAuc[i] = sklearn.metrics.roc_auc_score( numpy.ravel(X[i, :].toarray()), Z[i, :]) localAuc = localAuc.mean() u = 0.0 localAuc2 = MCEvaluator.localAUC(X, U, V, u) self.assertEquals(localAuc, localAuc2) #Now try a large r w = 1.0 localAuc2 = MCEvaluator.localAUC(X, U, V, w) self.assertEquals(localAuc2, 0)
def testLocalAucApprox(self): m = 100 n = 200 k = 2 X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True, verbose=True) w = 1.0 localAuc = MCEvaluator.localAUC(X, U, V, w) samples = numpy.arange(150, 200, 10) for i, sampleSize in enumerate(samples): numAucSamples = sampleSize localAuc2 = MCEvaluator.localAUCApprox( SparseUtils.getOmegaListPtr(X), U, V, w, numAucSamples) self.assertAlmostEqual(localAuc2, localAuc, 1) #Try smaller w w = 0.5 localAuc = MCEvaluator.localAUC(X, U, V, w) samples = numpy.arange(50, 200, 10) for i, sampleSize in enumerate(samples): numAucSamples = sampleSize localAuc2 = MCEvaluator.localAUCApprox( SparseUtils.getOmegaListPtr(X), U, V, w, numAucSamples) self.assertAlmostEqual(localAuc2, localAuc, 1)
def testWeightedLearning(self): #See if the weighted learning has any effect shape = (20, 20) r = 20 numInds = 100 noise = 0.2 X = ExpSU.SparseUtils.generateSparseLowRank(shape, r, numInds, noise) rho = 0.0 iterativeSoftImpute = IterativeSoftImpute(rho, k=10, weighted=True) iterX = iter([X]) resultIter = iterativeSoftImpute.learnModel(iterX) Z = resultIter.next() iterativeSoftImpute = IterativeSoftImpute(rho, k=10, weighted=False) iterX = iter([X]) resultIter = iterativeSoftImpute.learnModel(iterX) Z2 = resultIter.next() #Check results when rho=0 nptst.assert_array_almost_equal((Z[0]*Z[1]).dot(Z[2].T), (Z2[0]*Z2[1]).dot(Z2[2].T)) nptst.assert_array_almost_equal(Z[1], Z2[1]) #Then check non-uniform matrix - entries clustered around middle indices shape = (20, 15) numInds = 200 maxInd = (shape[0]*shape[1]-1) nzInds = numpy.array(numpy.random.randn(numInds)*maxInd/4 + maxInd/2, numpy.int) trainInds = nzInds[0:int(nzInds.shape[0]/2)] testInds = nzInds[int(nzInds.shape[0]/2):] trainInds = numpy.unique(numpy.clip(trainInds, 0, maxInd)) testInds = numpy.unique(numpy.clip(testInds, 0, maxInd)) trainX = ExpSU.SparseUtils.generateSparseLowRank(shape, r, trainInds, noise) testX = ExpSU.SparseUtils.generateSparseLowRank(shape, r, testInds, noise) #Error using weighted soft impute #print("Running weighted soft impute") rho = 0.5 iterativeSoftImpute = IterativeSoftImpute(rho, k=10, weighted=True) iterX = iter([trainX]) resultIter = iterativeSoftImpute.learnModel(iterX) Z = resultIter.next() iterTestX = iter([testX]) predX = iterativeSoftImpute.predictOne(Z, testX.nonzero()) error = MCEvaluator.rootMeanSqError(testX, predX) #print(error) iterativeSoftImpute = IterativeSoftImpute(rho, k=10, weighted=False) iterX = iter([trainX]) resultIter = iterativeSoftImpute.learnModel(iterX) Z = resultIter.next() iterTestX = iter([testX]) predX = iterativeSoftImpute.predictOne(Z, testX.nonzero()) error = MCEvaluator.rootMeanSqError(testX, predX)
def computeTestAuc(args): trainX, testX, maxLocalAuc, U, V = args numpy.random.seed(21) logging.debug(maxLocalAuc) #maxLocalAuc.learningRateSelect(trainX) U, V, trainMeasures, testMeasures, iterations, time = maxLocalAuc.learnModel(trainX, U=U, V=V, verbose=True) fprTrain, tprTrain = MCEvaluator.averageRocCurve(trainX, U, V) fprTest, tprTest = MCEvaluator.averageRocCurve(testX, U, V) return fprTrain, tprTrain, fprTest, tprTest
def learnPredictRanking(args): """ A function to train on a training set and test on a test set, for a number of values of rho. """ learner, trainX, testX, rhos = args logging.debug("k=" + str(learner.getK())) logging.debug(learner) testInds = testX.nonzero() trainXIter = [] testIndList = [] for rho in rhos: trainXIter.append(trainX) testIndList.append(testInds) trainXIter = iter(trainXIter) ZIter = learner.learnModel(trainXIter, iter(rhos)) metrics = numpy.zeros(rhos.shape[0]) for j, Z in enumerate(ZIter): U, s, V = Z U = U * s U = numpy.ascontiguousarray(U) V = numpy.ascontiguousarray(V) testOrderedItems = MCEvaluatorCython.recommendAtk( U, V, learner.recommendSize, trainX) if learner.metric == "mrr": metrics[j] = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX), testOrderedItems, learner.recommendSize) logging.debug("MRR@" + str(learner.recommendSize) + ": " + str('%.4f' % metrics[j]) + " " + str(learner)) elif learner.metric == "f1": metrics[j] = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX), testOrderedItems, learner.recommendSize) logging.debug("F1@" + str(learner.recommendSize) + ": " + str('%.4f' % metrics[j]) + " " + str(learner)) else: raise ValueError("Unknown metric " + learner.metric) gc.collect() return metrics
def testModelSelect(self): lmbda = 0.1 shape = (20, 20) r = 20 numInds = 100 noise = 0.2 X = ExpSU.SparseUtils.generateSparseLowRank(shape, r, numInds, noise) U, s, V = numpy.linalg.svd(X.todense()) k = 15 iterativeSoftImpute = IterativeSoftImpute(lmbda, k=None, svdAlg="propack", updateAlg="zero") iterativeSoftImpute.numProcesses = 1 rhos = numpy.linspace(0.5, 0.001, 20) ks = numpy.array([k], numpy.int) folds = 3 cvInds = Sampling.randCrossValidation(folds, X.nnz) meanTestErrors, meanTrainErrors = iterativeSoftImpute.modelSelect(X, rhos, ks, cvInds) #Now do model selection manually (rowInds, colInds) = X.nonzero() trainErrors = numpy.zeros((rhos.shape[0], len(cvInds))) testErrors = numpy.zeros((rhos.shape[0], len(cvInds))) for i, rho in enumerate(rhos): for j, (trainInds, testInds) in enumerate(cvInds): trainX = scipy.sparse.csc_matrix(X.shape) testX = scipy.sparse.csc_matrix(X.shape) for p in trainInds: trainX[rowInds[p], colInds[p]] = X[rowInds[p], colInds[p]] for p in testInds: testX[rowInds[p], colInds[p]] = X[rowInds[p], colInds[p]] softImpute = SoftImpute(numpy.array([rho]), k=ks[0]) ZList = [softImpute.learnModel(trainX, fullMatrices=False)] predTrainX = softImpute.predict(ZList, trainX.nonzero())[0] predX = softImpute.predict(ZList, testX.nonzero())[0] testErrors[i, j] = MCEvaluator.rootMeanSqError(testX, predX) trainErrors[i, j] = MCEvaluator.rootMeanSqError(trainX, predTrainX) meanTestErrors2 = testErrors.mean(1) meanTrainErrors2 = trainErrors.mean(1) nptst.assert_array_almost_equal(meanTestErrors.ravel(), meanTestErrors2, 1)
def learnPredict(self, trainX, testX, k, lmbda, gamma, maxNTry=1): """ A function to train on a training set and test on a test set. Use a copy of the base learner (allow to run several parameter sets in parallel) """ logging.debug("k = " + str(k) + " lmbda = " + str(lmbda) + " gamma = " +str(gamma)) learner = self.baseLearner.copy() learner.k = k learner.lmbda = lmbda learner.gamma = gamma testInds = testX.nonzero() # train (try several time if floating point error is raised haveRes = False nTry = 0 while not haveRes and nTry<maxNTry: nTry += 1 try: ZIter = learner.learnModel(trainX, storeAll = False) haveRes = True except (FloatingPointError, ValueError, SGDNorm2Reg.ArithmeticError): pass if haveRes: logging.debug("result obtained in " + str(nTry) + " try(ies)") predX = learner.predict(ZIter, testX.nonzero()) error = MCEvaluator.rootMeanSqError(testX, predX) else: logging.debug("enable to make SGD converge") error = float("inf") return error
def testAverageAuc(self): m = 50 n = 20 k = 8 u = 20.0 / m w = 1 - u X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix( (m, n), k, w, csarray=True, verbose=True, indsPerRow=200 ) auc = MCEvaluator.averageAuc(X, U, V) u = 0.0 auc2 = MCEvaluator.localAUC(X, U, V, u) self.assertAlmostEquals(auc, auc2)
def testMeanSqError(self): numExamples = 10 testX = scipy.sparse.rand(numExamples, numExamples) testX = testX.tocsr() predX = testX.copy() error = MCEvaluator.meanSqError(testX, predX) self.assertEquals(error, 0.0) testX = numpy.random.rand(numExamples, numExamples) predX = testX + numpy.random.rand(numExamples, numExamples) * 0.5 error2 = ((testX - predX) ** 2).sum() / (numExamples ** 2) error = MCEvaluator.meanSqError(scipy.sparse.csr_matrix(testX), scipy.sparse.csr_matrix(predX)) self.assertEquals(error, error2)
def learnPredictMSE(args): """ A function to train on a training set and test on a test set, for a number of values of rho. """ learner, trainX, testX, rhos = args logging.debug("k=" + str(learner.getK())) logging.debug(learner) testInds = testX.nonzero() trainXIter = [] testIndList = [] for rho in rhos: trainXIter.append(trainX) testIndList.append(testInds) trainXIter = iter(trainXIter) ZIter = learner.learnModel(trainXIter, iter(rhos)) predXIter = learner.predict(ZIter, testIndList) errors = numpy.zeros(rhos.shape[0]) for j, predX in enumerate(predXIter): errors[j] = MCEvaluator.rootMeanSqError(testX, predX) logging.debug("Error = " + str(errors[j])) del predX gc.collect() return errors
def testF1Atk(self): m = 10 n = 5 r = 3 X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True) import sppy X = sppy.csarray(X) orderedItems = MCEvaluator.recommendAtk(U * s, V, n) self.assertAlmostEquals( MCEvaluator.f1AtK(X, orderedItems, n, verbose=False), 2 * r / float(n) / (1 + r / float(n)) ) m = 20 n = 50 X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True) k = 5 orderedItems = MCEvaluator.recommendAtk(U * s, V, k) precision, scoreInds = MCEvaluator.precisionAtK(X, orderedItems, k, verbose=True) recall, scoreInds = MCEvaluator.recallAtK(X, orderedItems, k, verbose=True) f1s = numpy.zeros(m) for i in range(m): f1s[i] = 2 * precision[i] * recall[i] / (precision[i] + recall[i]) orderedItems = MCEvaluator.recommendAtk(U * s, V, n) f1s2, scoreInds = MCEvaluator.f1AtK(X, orderedItems, k, verbose=True) nptst.assert_array_equal(f1s, f1s2) # Test case where we get a zero precision or recall orderedItems[5, :] = -1 precision, scoreInds = MCEvaluator.precisionAtK(X, orderedItems, k, verbose=True) recall, scoreInds = MCEvaluator.recallAtK(X, orderedItems, k, verbose=True) f1s = numpy.zeros(m) for i in range(m): if precision[i] + recall[i] != 0: f1s[i] = 2 * precision[i] * recall[i] / (precision[i] + recall[i]) f1s2, scoreInds = MCEvaluator.f1AtK(X, orderedItems, k, verbose=True) nptst.assert_array_equal(f1s, f1s2)
def testMeanSqError(self): numExamples = 10 testX = scipy.sparse.rand(numExamples, numExamples) testX = testX.tocsr() predX = testX.copy() error = MCEvaluator.meanSqError(testX, predX) self.assertEquals(error, 0.0) testX = numpy.random.rand(numExamples, numExamples) predX = testX + numpy.random.rand(numExamples, numExamples) * 0.5 error2 = ((testX - predX)**2).sum() / (numExamples**2) error = MCEvaluator.meanSqError(scipy.sparse.csr_matrix(testX), scipy.sparse.csr_matrix(predX)) self.assertEquals(error, error2)
def testRecommendAtk(self): m = 20 n = 50 r = 3 X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True) import sppy X = sppy.csarray(X) k = 10 orderedItems, scores = MCEvaluator.recommendAtk(U, V, k, verbose=True) #Now do it manually Z = U.dot(V.T) orderedItems2 = Util.argmaxN(Z, k) scores2 = numpy.fliplr(numpy.sort(Z, 1))[:, 0:k] nptst.assert_array_equal(orderedItems, orderedItems2) nptst.assert_array_equal(scores, scores2) #Test case where we have a set of training indices to remove #Let's create a random omegaList omegaList = [] for i in range(m): omegaList.append(numpy.random.permutation(n)[0:5]) orderedItems = MCEvaluator.recommendAtk(U, V, k, omegaList=omegaList) orderedItems2 = MCEvaluator.recommendAtk(U, V, k) #print(omegaList) #print(orderedItems) #print(orderedItems2) for i in range(m): items = numpy.intersect1d(omegaList[i], orderedItems[i, :]) self.assertEquals(items.shape[0], 0) items = numpy.union1d(omegaList[i], orderedItems[i, :]) items = numpy.intersect1d(items, orderedItems2[i, :]) nptst.assert_array_equal(items, numpy.sort(orderedItems2[i, :]))
def testRecommendAtk(self): m = 20 n = 50 r = 3 X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True) import sppy X = sppy.csarray(X) k = 10 X = numpy.zeros(X.shape) omegaList = [] for i in range(m): omegaList.append(numpy.random.permutation(n)[0:5]) X[i, omegaList[i]] = 1 X = sppy.csarray(X) orderedItems = MCEvaluatorCython.recommendAtk(U, V, k, X) orderedItems2 = MCEvaluator.recommendAtk(U, V, k, omegaList=omegaList) nptst.assert_array_equal(orderedItems[orderedItems2 != -1], orderedItems2[orderedItems2 != -1]) for i in range(m): items = numpy.intersect1d(omegaList[i], orderedItems[i, :]) self.assertEquals(items.shape[0], 0) #items = numpy.union1d(omegaList[i], orderedItems[i, :]) #items = numpy.intersect1d(items, orderedItems2[i, :]) #nptst.assert_array_equal(items, numpy.sort(orderedItems2[i, :])) #Now let's have an all zeros X X = sppy.csarray(X.shape) orderedItems = MCEvaluatorCython.recommendAtk(U, V, k, X) orderedItems2 = MCEvaluator.recommendAtk(U, V, k) nptst.assert_array_equal(orderedItems, orderedItems2)
def learnPredictRanking(args): """ A function to train on a training set and test on a test set, for a number of values of rho. """ learner, trainX, testX, rhos = args logging.debug("k=" + str(learner.getK())) logging.debug(learner) testInds = testX.nonzero() trainXIter = [] testIndList = [] for rho in rhos: trainXIter.append(trainX) testIndList.append(testInds) trainXIter = iter(trainXIter) ZIter = learner.learnModel(trainXIter, iter(rhos)) metrics = numpy.zeros(rhos.shape[0]) for j, Z in enumerate(ZIter): U, s, V = Z U = U*s U = numpy.ascontiguousarray(U) V = numpy.ascontiguousarray(V) testOrderedItems = MCEvaluatorCython.recommendAtk(U, V, learner.recommendSize, trainX) if learner.metric == "mrr": metrics[j] = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX), testOrderedItems, learner.recommendSize) logging.debug("MRR@" + str(learner.recommendSize) + ": " + str('%.4f' % metrics[j]) + " " + str(learner)) elif learner.metric == "f1": metrics[j] = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX), testOrderedItems, learner.recommendSize) logging.debug("F1@" + str(learner.recommendSize) + ": " + str('%.4f' % metrics[j]) + " " + str(learner)) else: raise ValueError("Unknown metric " + learner.metric) gc.collect() return metrics
def testRecommendAtk(self): m = 20 n = 50 r = 3 X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True) import sppy X = sppy.csarray(X) k = 10 orderedItems, scores = MCEvaluator.recommendAtk(U, V, k, verbose=True) # Now do it manually Z = U.dot(V.T) orderedItems2 = Util.argmaxN(Z, k) scores2 = numpy.fliplr(numpy.sort(Z, 1))[:, 0:k] nptst.assert_array_equal(orderedItems, orderedItems2) nptst.assert_array_equal(scores, scores2) # Test case where we have a set of training indices to remove # Let's create a random omegaList omegaList = [] for i in range(m): omegaList.append(numpy.random.permutation(n)[0:5]) orderedItems = MCEvaluator.recommendAtk(U, V, k, omegaList=omegaList) orderedItems2 = MCEvaluator.recommendAtk(U, V, k) # print(omegaList) # print(orderedItems) # print(orderedItems2) for i in range(m): items = numpy.intersect1d(omegaList[i], orderedItems[i, :]) self.assertEquals(items.shape[0], 0) items = numpy.union1d(omegaList[i], orderedItems[i, :]) items = numpy.intersect1d(items, orderedItems2[i, :]) nptst.assert_array_equal(items, numpy.sort(orderedItems2[i, :]))
def testAverageAuc(self): m = 50 n = 20 k = 8 u = 20.0 / m w = 1 - u X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True, verbose=True, indsPerRow=200) auc = MCEvaluator.averageAuc(X, U, V) u = 0.0 auc2 = MCEvaluator.localAUC(X, U, V, u) self.assertAlmostEquals(auc, auc2)
def testRecommendAtk(self): m = 20 n = 50 r = 3 X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), r, 0.5, verbose=True) import sppy X = sppy.csarray(X) k = 10 X = numpy.zeros(X.shape) omegaList = [] for i in range(m): omegaList.append(numpy.random.permutation(n)[0:5]) X[i, omegaList[i]] = 1 X = sppy.csarray(X) orderedItems = MCEvaluatorCython.recommendAtk(U, V, k, X) orderedItems2 = MCEvaluator.recommendAtk(U, V, k, omegaList=omegaList) nptst.assert_array_equal(orderedItems[orderedItems2!=-1], orderedItems2[orderedItems2!=-1]) for i in range(m): items = numpy.intersect1d(omegaList[i], orderedItems[i, :]) self.assertEquals(items.shape[0], 0) #items = numpy.union1d(omegaList[i], orderedItems[i, :]) #items = numpy.intersect1d(items, orderedItems2[i, :]) #nptst.assert_array_equal(items, numpy.sort(orderedItems2[i, :])) #Now let's have an all zeros X X = sppy.csarray(X.shape) orderedItems = MCEvaluatorCython.recommendAtk(U, V, k, X) orderedItems2 = MCEvaluator.recommendAtk(U, V, k) nptst.assert_array_equal(orderedItems, orderedItems2)
def computePrecision(args): trainX, testX, testOmegaList, learner = args (m, n) = trainX.shape learner.learnModel(trainX) maxItems = 20 orderedItems = learner.predict(maxItems) #print(orderedItems) precision = MCEvaluator.precisionAtK(testX, orderedItems, maxItems) logging.debug("Precision@" + str(maxItems) + ": " + str(precision) + " with k = " + str(learner.k)) return precision
def localAucsLmbdas(args): trainX, testX, testOmegaList, learner = args (m, n) = trainX.shape localAucs = numpy.zeros(learner.lmbdas.shape[0]) for j, lmbda in enumerate(learner.lmbdas): learner.lmbda = lmbda U, V = learner.learnModel(trainX) r = SparseUtilsCython.computeR(U, V, 1-learner.u, learner.numAucSamples) localAucs[j] = MCEvaluator.localAUCApprox(testX, U, V, testOmegaList, learner.numAucSamples, r) logging.debug("Local AUC: " + str(localAucs[j]) + " with k = " + str(learner.k) + " and lmbda= " + str(learner.lmbda)) return localAucs
def testPrecisionAtK(self): m = 10 n = 5 r = 3 X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True) import sppy X = sppy.csarray(X) #print(MCEvaluator.precisionAtK(X, U*s, V, 2)) orderedItems = MCEvaluator.recommendAtk(U, V, n) self.assertAlmostEquals(MCEvaluator.precisionAtK(X, orderedItems, n), X.nnz / float(m * n)) k = 2 orderedItems = MCEvaluator.recommendAtk(U * s, V, k) precision, scoreInds = MCEvaluator.precisionAtK(X, orderedItems, k, verbose=True) precisions = numpy.zeros(m) for i in range(m): nonzeroRow = X.toarray()[i, :].nonzero()[0] precisions[i] = numpy.intersect1d(scoreInds[i, :], nonzeroRow).shape[0] / float(k) self.assertEquals(precision.mean(), precisions.mean()) #Now try random U and V U = numpy.random.rand(m, 3) V = numpy.random.rand(m, 3) orderedItems = MCEvaluator.recommendAtk(U * s, V, k) precision, scoreInds = MCEvaluator.precisionAtK(X, orderedItems, k, verbose=True) precisions = numpy.zeros(m) for i in range(m): nonzeroRow = X.toarray()[i, :].nonzero()[0] precisions[i] = numpy.intersect1d(scoreInds[i, :], nonzeroRow).shape[0] / float(k) self.assertEquals(precision.mean(), precisions.mean())
def testRecallAtK(self): m = 10 n = 5 r = 3 X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True) import sppy X = sppy.csarray(X) orderedItems = MCEvaluator.recommendAtk(U, V, n) self.assertAlmostEquals(MCEvaluator.recallAtK(X, orderedItems, n), 1.0) k = 2 orderedItems = MCEvaluator.recommendAtk(U * s, V, k) recall, scoreInds = MCEvaluator.recallAtK(X, orderedItems, k, verbose=True) recalls = numpy.zeros(m) for i in range(m): nonzeroRow = X.toarray()[i, :].nonzero()[0] recalls[i] = numpy.intersect1d(scoreInds[i, :], nonzeroRow).shape[0] / float( nonzeroRow.shape[0]) self.assertEquals(recall.mean(), recalls.mean()) #Now try random U and V U = numpy.random.rand(m, 3) V = numpy.random.rand(m, 3) orderedItems = MCEvaluator.recommendAtk(U, V, k) recall, scoreInds = MCEvaluator.recallAtK(X, orderedItems, k, verbose=True) recalls = numpy.zeros(m) for i in range(m): nonzeroRow = X.toarray()[i, :].nonzero()[0] recalls[i] = numpy.intersect1d(scoreInds[i, :], nonzeroRow).shape[0] / float( nonzeroRow.shape[0]) self.assertEquals(recall.mean(), recalls.mean())
def testPredict(self): k = 5 lmbda = 0.01 eps = 0.000001 tmax = 1000 learner = IterativeSGDNorm2Reg(k, lmbda, eps, tmax) ZList = learner.learnModel(iter(self.matrixList)) indList = [] for X in self.matrixList: indList.append(X.nonzero()) XList = learner.predict(ZList, indList) for i, Xhat in enumerate(XList): #print(Xhat) print(MCEvaluator.rootMeanSqError(Xhat, self.matrixList[i]))
def computeTestMRR(args): """ A simple function for outputing F1 for a learner in conjunction e.g. with parallel model selection. """ trainX, testX, learner = args learner.learnModel(trainX) testOrderedItems = MCEvaluatorCython.recommendAtk(learner.U, learner.V, learner.recommendSize, trainX) mrr = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX), testOrderedItems, learner.recommendSize) try: learnerStr = learner.modelParamsStr() except: learnerStr = str(learner) logging.debug("MRR@" + str(learner.recommendSize) + ": " + str("%.4f" % mrr) + " " + learnerStr) return mrr
def testTrain(self): args = BPRArgs() args.learning_rate = 0.1 k = 5 learner = BPR(k, args) maxIterations = 10 sample_negative_items_empirically = True sampler = UniformUserUniformItem() user_factors, item_factors = learner.train(self.X, sampler, maxIterations) print(MCEvaluator.averageAuc(self.X, user_factors, item_factors)) #Let's try regularisation args.user_regularization = 1 learner.train(self.X, sampler, maxIterations) #Let's try regularisation args.positive_item_regularization = 1
def localAucsLmbdas(args): trainX, testX, testOmegaList, learner = args (m, n) = trainX.shape localAucs = numpy.zeros(learner.lmbdas.shape[0]) for j, lmbda in enumerate(learner.lmbdas): learner.lmbda = lmbda U, V = learner.learnModel(trainX) r = SparseUtilsCython.computeR(U, V, 1 - learner.u, learner.numAucSamples) localAucs[j] = MCEvaluator.localAUCApprox(testX, U, V, testOmegaList, learner.numAucSamples, r) logging.debug("Local AUC: " + str(localAucs[j]) + " with k = " + str(learner.k) + " and lmbda= " + str(learner.lmbda)) return localAucs
def testModelSelect2(self): rho = 0.1 shape = (20, 20) r = 20 numInds = 100 noise = 0.2 X = ExpSU.SparseUtils.generateSparseLowRank(shape, r, numInds, noise) X = X.tocsc() U, s, V = numpy.linalg.svd(X.todense()) k = 15 iterativeSoftImpute = IterativeSoftImpute(rho, k=None, svdAlg="propack", updateAlg="initial") rhos = numpy.linspace(0.5, 0.001, 5) ks = numpy.array([5, 10, 15], numpy.int) folds = 3 cvInds = [] for i in range(folds): cvInds.append((numpy.arange(X.nnz), numpy.arange(X.nnz))) meanTestErrors, stdTestErrors = iterativeSoftImpute.modelSelect(X, rhos, ks, cvInds) self.assertAlmostEquals(numpy.linalg.norm(stdTestErrors), 0, 3) meanTestErrors2 = numpy.zeros((rhos.shape[0], ks.shape[0])) #Now compute errors manually for j, k in enumerate(ks): iterativeSoftImpute.setK(k) for i, rho in enumerate(rhos): iterativeSoftImpute.setRho(rho) ZIter = iterativeSoftImpute.learnModel(iter([X])) indList = [X.nonzero()] outIterator = iterativeSoftImpute.predict(ZIter, indList) Xhat = outIterator.next() meanTestErrors2[i, j] = MCEvaluator.rootMeanSqError(X, Xhat) nptst.assert_array_almost_equal(meanTestErrors, meanTestErrors2, 2)
def testLearnModel(self): numpy.random.seed(21) X = scipy.sparse.rand(10, 10, 0.5) X = X.tocsr() method = "lsnmf" nimfaFactorise = NimfaFactorise(method, maxIter=50) predX = nimfaFactorise.learnModel(X) self.assertEquals(predX.shape, X.shape) #Test the case where we specify many ranks ranks = numpy.array([10, 8, 5, 2]) nimfaFactorise = NimfaFactorise(method, ranks) predXList = nimfaFactorise.learnModel(X) #Let's look at the errors for predX in predXList: error = MCEvaluator.meanSqError(X, predX) print(error)
def testLocalAucApprox2(self): m = 100 n = 200 k = 5 numInds = 100 X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True, verbose=True) r = numpy.ones(m) * -10 w = 0.5 localAuc = MCEvaluator.localAUC(X, U, V, w) samples = numpy.arange(50, 200, 10) for i, sampleSize in enumerate(samples): localAuc2 = MCEvaluator.localAUCApprox( SparseUtils.getOmegaListPtr(X), U, V, w, sampleSize) self.assertAlmostEqual(localAuc2, localAuc, 1) #Test more accurately sampleSize = 1000 localAuc2 = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X), U, V, w, sampleSize) self.assertAlmostEqual(localAuc2, localAuc, 2) #Now set a high r Z = U.dot(V.T) localAuc = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X), U, V, w, sampleSize) for i, sampleSize in enumerate(samples): localAuc2 = MCEvaluator.localAUCApprox( SparseUtils.getOmegaListPtr(X), U, V, w, sampleSize) self.assertAlmostEqual(localAuc2, localAuc, 1) #Test more accurately sampleSize = 1000 localAuc2 = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X), U, V, w, sampleSize) self.assertAlmostEqual(localAuc2, localAuc, 2)
def computeTestF1(args): """ A simple function for outputing F1 for a learner in conjunction e.g. with parallel model selection. """ trainX, testX, learner = args learner.learnModel(trainX) testOrderedItems = MCEvaluatorCython.recommendAtk(learner.U, learner.V, learner.recommendSize, trainX) f1 = MCEvaluator.f1AtK(SparseUtils.getOmegaListPtr(testX), testOrderedItems, learner.recommendSize) try: learnerStr = learner.modelParamsStr() except: learnerStr = str(learner) logging.debug("F1@" + str(learner.recommendSize) + ": " + str('%.4f' % f1) + " " + learnerStr) return f1
def learnPredict(self, trainX, testX, k, lmbda, gamma, maxNTry=1): """ A function to train on a training set and test on a test set. Use a copy of the base learner (allow to run several parameter sets in parallel) """ logging.debug("k = " + str(k) + " lmbda = " + str(lmbda) + " gamma = " + str(gamma)) learner = self.baseLearner.copy() learner.k = k learner.lmbda = lmbda learner.gamma = gamma testInds = testX.nonzero() # train (try several time if floating point error is raised haveRes = False nTry = 0 while not haveRes and nTry < maxNTry: nTry += 1 try: ZIter = learner.learnModel(trainX, storeAll=False) haveRes = True except (FloatingPointError, ValueError, SGDNorm2Reg.ArithmeticError): pass if haveRes: logging.debug("result obtained in " + str(nTry) + " try(ies)") predX = learner.predict(ZIter, testX.nonzero()) error = MCEvaluator.rootMeanSqError(testX, predX) else: logging.debug("enable to make SGD converge") error = float("inf") return error
def testPrecisionAtK(self): m = 10 n = 5 r = 3 X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True) import sppy X = sppy.csarray(X) # print(MCEvaluator.precisionAtK(X, U*s, V, 2)) orderedItems = MCEvaluator.recommendAtk(U, V, n) self.assertAlmostEquals(MCEvaluator.precisionAtK(X, orderedItems, n), X.nnz / float(m * n)) k = 2 orderedItems = MCEvaluator.recommendAtk(U * s, V, k) precision, scoreInds = MCEvaluator.precisionAtK(X, orderedItems, k, verbose=True) precisions = numpy.zeros(m) for i in range(m): nonzeroRow = X.toarray()[i, :].nonzero()[0] precisions[i] = numpy.intersect1d(scoreInds[i, :], nonzeroRow).shape[0] / float(k) self.assertEquals(precision.mean(), precisions.mean()) # Now try random U and V U = numpy.random.rand(m, 3) V = numpy.random.rand(m, 3) orderedItems = MCEvaluator.recommendAtk(U * s, V, k) precision, scoreInds = MCEvaluator.precisionAtK(X, orderedItems, k, verbose=True) precisions = numpy.zeros(m) for i in range(m): nonzeroRow = X.toarray()[i, :].nonzero()[0] precisions[i] = numpy.intersect1d(scoreInds[i, :], nonzeroRow).shape[0] / float(k) self.assertEquals(precision.mean(), precisions.mean())
def testRecallAtK(self): m = 10 n = 5 r = 3 X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True) import sppy X = sppy.csarray(X) orderedItems = MCEvaluator.recommendAtk(U, V, n) self.assertAlmostEquals(MCEvaluator.recallAtK(X, orderedItems, n), 1.0) k = 2 orderedItems = MCEvaluator.recommendAtk(U * s, V, k) recall, scoreInds = MCEvaluator.recallAtK(X, orderedItems, k, verbose=True) recalls = numpy.zeros(m) for i in range(m): nonzeroRow = X.toarray()[i, :].nonzero()[0] recalls[i] = numpy.intersect1d(scoreInds[i, :], nonzeroRow).shape[0] / float(nonzeroRow.shape[0]) self.assertEquals(recall.mean(), recalls.mean()) # Now try random U and V U = numpy.random.rand(m, 3) V = numpy.random.rand(m, 3) orderedItems = MCEvaluator.recommendAtk(U, V, k) recall, scoreInds = MCEvaluator.recallAtK(X, orderedItems, k, verbose=True) recalls = numpy.zeros(m) for i in range(m): nonzeroRow = X.toarray()[i, :].nonzero()[0] recalls[i] = numpy.intersect1d(scoreInds[i, :], nonzeroRow).shape[0] / float(nonzeroRow.shape[0]) self.assertEquals(recall.mean(), recalls.mean())
def testLocalAucApprox2(self): m = 100 n = 200 k = 5 numInds = 100 X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True, verbose=True) r = numpy.ones(m) * -10 w = 0.5 localAuc = MCEvaluator.localAUC(X, U, V, w) samples = numpy.arange(50, 200, 10) for i, sampleSize in enumerate(samples): localAuc2 = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X), U, V, w, sampleSize) self.assertAlmostEqual(localAuc2, localAuc, 1) # Test more accurately sampleSize = 1000 localAuc2 = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X), U, V, w, sampleSize) self.assertAlmostEqual(localAuc2, localAuc, 2) # Now set a high r Z = U.dot(V.T) localAuc = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X), U, V, w, sampleSize) for i, sampleSize in enumerate(samples): localAuc2 = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X), U, V, w, sampleSize) self.assertAlmostEqual(localAuc2, localAuc, 1) # Test more accurately sampleSize = 1000 localAuc2 = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X), U, V, w, sampleSize) self.assertAlmostEqual(localAuc2, localAuc, 2)
def predict(self, maxItems): return MCEvaluator.recommendAtk(self.U, self.U, maxItems)
def recordResults(self, ZIter, learner, fileName): """ Save results for a particular recommendation """ trainIterator = self.getTrainIterator() testIterator = self.testXIteratorFunc() measures = [] metadata = [] vectorMetaData = [] logging.debug("Computing recommendation errors") while True: try: start = time.time() Z = next(ZIter) learnTime = time.time()-start except StopIteration: break if self.algoArgs.verbose: print(learner.measures) vectorMetaData.append(learner.measures) trainX = next(trainIterator) if not self.algoArgs.trainError: del trainX gc.collect() testX = next(testIterator) predTestX = learner.predictOne(Z, testX.nonzero()) predTestX.eliminate_zeros() predTestX = trainIterator.uncenter(predTestX) currentMeasures = [MCEvaluator.rootMeanSqError(testX, predTestX), MCEvaluator.meanAbsError(testX, predTestX)] if self.algoArgs.trainError: assert trainX.shape == testX.shape predTrainX = learner.predictOne(Z, trainX.nonzero()) predTrainX.eliminate_zeros() predTrainX = trainIterator.uncenter(predTrainX) trainX.eliminate_zeros() trainX = trainIterator.uncenter(trainX) currentMeasures.append(MCEvaluator.rootMeanSqError(trainX, predTrainX)) del trainX gc.collect() logging.debug("Error measures: " + str(currentMeasures)) logging.debug("Standard deviation of test set " + str(testX.data.std())) measures.append(currentMeasures) #Store some metadata about the learning process if type(learner) == IterativeSoftImpute: metadata.append([Z[0].shape[1], learner.getRho(), learnTime]) elif type(learner) == IterativeSGDNorm2Reg: metadata.append([Z[0][0].shape[1], learner.getLambda(), learnTime]) measures = numpy.array(measures) metadata = numpy.array(metadata) vectorMetaData = numpy.array(vectorMetaData) logging.debug(measures) numpy.savez(fileName, measures, metadata, vectorMetaData) logging.debug("Saved file as " + fileName)
def run(): for i in range(numRuns): MCEvaluator.localAUCApprox(X, U, V, omegaList, numAucSamples, r)
def recordResults(self, muU, muV, trainMeasures, testMeasures, loopInd, rowSamples, indPtr, colInds, testIndPtr, testColInds, allIndPtr, allColInds, gi, gp, gq, trainX, startTime): sigmaU = self.getSigma(loopInd, self.alpha, muU.shape[0]) sigmaV = self.getSigma(loopInd, self.alpha, muU.shape[0]) r = SparseUtilsCython.computeR(muU, muV, self.w, self.numRecordAucSamples) objArr = self.objectiveApprox((indPtr, colInds), muU, muV, r, gi, gp, gq, full=True) if trainMeasures == None: trainMeasures = [] trainMeasures.append([ objArr.sum(), MCEvaluator.localAUCApprox((indPtr, colInds), muU, muV, self.w, self.numRecordAucSamples, r), time.time() - startTime, loopInd ]) printStr = "iter " + str(loopInd) + ":" printStr += " sigmaU=" + str('%.4f' % sigmaU) printStr += " sigmaV=" + str('%.4f' % sigmaV) printStr += " train: obj~" + str('%.4f' % trainMeasures[-1][0]) printStr += " LAUC~" + str('%.4f' % trainMeasures[-1][1]) if testIndPtr is not None: testMeasuresRow = [] testMeasuresRow.append( self.objectiveApprox((testIndPtr, testColInds), muU, muV, r, gi, gp, gq, allArray=(allIndPtr, allColInds))) testMeasuresRow.append( MCEvaluator.localAUCApprox((testIndPtr, testColInds), muU, muV, self.w, self.numRecordAucSamples, r, allArray=(allIndPtr, allColInds))) testOrderedItems = MCEvaluatorCython.recommendAtk( muU, muV, numpy.max(self.recommendSize), trainX) printStr += " validation: obj~" + str('%.4f' % testMeasuresRow[0]) printStr += " LAUC~" + str('%.4f' % testMeasuresRow[1]) try: for p in self.recommendSize: f1Array, orderedItems = MCEvaluator.f1AtK( (testIndPtr, testColInds), testOrderedItems, p, verbose=True) testMeasuresRow.append(f1Array[rowSamples].mean()) except: f1Array, orderedItems = MCEvaluator.f1AtK( (testIndPtr, testColInds), testOrderedItems, self.recommendSize, verbose=True) testMeasuresRow.append(f1Array[rowSamples].mean()) printStr += " f1@" + str(self.recommendSize) + "=" + str( '%.4f' % testMeasuresRow[-1]) try: for p in self.recommendSize: mrr, orderedItems = MCEvaluator.mrrAtK( (testIndPtr, testColInds), testOrderedItems, p, verbose=True) testMeasuresRow.append(mrr[rowSamples].mean()) except: mrr, orderedItems = MCEvaluator.mrrAtK( (testIndPtr, testColInds), testOrderedItems, self.recommendSize, verbose=True) testMeasuresRow.append(mrr[rowSamples].mean()) printStr += " mrr@" + str(self.recommendSize) + "=" + str( '%.4f' % testMeasuresRow[-1]) testMeasures.append(testMeasuresRow) printStr += " ||U||=" + str('%.3f' % numpy.linalg.norm(muU)) printStr += " ||V||=" + str('%.3f' % numpy.linalg.norm(muV)) if self.bound: trainObj = objArr.sum() expectationBound = self.computeBound(trainX, muU, muV, trainObj, self.delta) printStr += " bound=" + str('%.3f' % expectationBound) trainMeasures[-1].append(expectationBound) return printStr
def predict(self, maxItems): return MCEvaluator.recommendAtk(self.U, self.V, maxItems)
def recordResults( self, muU, muV, trainMeasures, testMeasures, loopInd, rowSamples, indPtr, colInds, testIndPtr, testColInds, allIndPtr, allColInds, gi, gp, gq, trainX, startTime, ): sigmaU = self.getSigma(loopInd, self.alpha, muU.shape[0]) sigmaV = self.getSigma(loopInd, self.alpha, muU.shape[0]) r = SparseUtilsCython.computeR(muU, muV, self.w, self.numRecordAucSamples) objArr = self.objectiveApprox((indPtr, colInds), muU, muV, r, gi, gp, gq, full=True) if trainMeasures == None: trainMeasures = [] trainMeasures.append( [ objArr.sum(), MCEvaluator.localAUCApprox((indPtr, colInds), muU, muV, self.w, self.numRecordAucSamples, r), time.time() - startTime, loopInd, ] ) printStr = "iter " + str(loopInd) + ":" printStr += " sigmaU=" + str("%.4f" % sigmaU) printStr += " sigmaV=" + str("%.4f" % sigmaV) printStr += " train: obj~" + str("%.4f" % trainMeasures[-1][0]) printStr += " LAUC~" + str("%.4f" % trainMeasures[-1][1]) if testIndPtr is not None: testMeasuresRow = [] testMeasuresRow.append( self.objectiveApprox( (testIndPtr, testColInds), muU, muV, r, gi, gp, gq, allArray=(allIndPtr, allColInds) ) ) testMeasuresRow.append( MCEvaluator.localAUCApprox( (testIndPtr, testColInds), muU, muV, self.w, self.numRecordAucSamples, r, allArray=(allIndPtr, allColInds), ) ) testOrderedItems = MCEvaluatorCython.recommendAtk(muU, muV, numpy.max(self.recommendSize), trainX) printStr += " validation: obj~" + str("%.4f" % testMeasuresRow[0]) printStr += " LAUC~" + str("%.4f" % testMeasuresRow[1]) try: for p in self.recommendSize: f1Array, orderedItems = MCEvaluator.f1AtK( (testIndPtr, testColInds), testOrderedItems, p, verbose=True ) testMeasuresRow.append(f1Array[rowSamples].mean()) except: f1Array, orderedItems = MCEvaluator.f1AtK( (testIndPtr, testColInds), testOrderedItems, self.recommendSize, verbose=True ) testMeasuresRow.append(f1Array[rowSamples].mean()) printStr += " f1@" + str(self.recommendSize) + "=" + str("%.4f" % testMeasuresRow[-1]) try: for p in self.recommendSize: mrr, orderedItems = MCEvaluator.mrrAtK((testIndPtr, testColInds), testOrderedItems, p, verbose=True) testMeasuresRow.append(mrr[rowSamples].mean()) except: mrr, orderedItems = MCEvaluator.mrrAtK( (testIndPtr, testColInds), testOrderedItems, self.recommendSize, verbose=True ) testMeasuresRow.append(mrr[rowSamples].mean()) printStr += " mrr@" + str(self.recommendSize) + "=" + str("%.4f" % testMeasuresRow[-1]) testMeasures.append(testMeasuresRow) printStr += " ||U||=" + str("%.3f" % numpy.linalg.norm(muU)) printStr += " ||V||=" + str("%.3f" % numpy.linalg.norm(muV)) if self.bound: trainObj = objArr.sum() expectationBound = self.computeBound(trainX, muU, muV, trainObj, self.delta) printStr += " bound=" + str("%.3f" % expectationBound) trainMeasures[-1].append(expectationBound) return printStr
trainX = X if modelSelect: modelSelectX, userInds = Sampling.sampleUsers2(trainX, modelSelectSamples) meanMetrics, stdMetrics = learner.modelSelect(modelSelectX) learner.learnModel(X) U = learner.U V = learner.V if type(learner) != CosineKNNRecommender: U = numpy.ascontiguousarray(U) V = numpy.ascontiguousarray(V) #Note that we compute UU^T for recommendations orderedItems, scores = MCEvaluator.recommendAtk(U, U, maxItems, verbose=True) orderedItems2, scores2 = MCEvaluator.recommendAtk(U.dot(V.T.dot(V)), U, maxItems, verbose=True) else: orderedItems2 = orderedItems scores2 = scores #Normalise scores scores /= numpy.max(scores) meanStatsContacts, meanStatsInterests = saveResults(orderedItems, scores, dataset, similaritiesFileName, contactsFilename, interestsFilename, minScore, minContacts, minAcceptableSims) meanStatsContacts2, meanStatsInterests2 = saveResults(orderedItems2, scores2, dataset, similaritiesFileName, contactsFilename, interestsFilename, minScore, minContacts, minAcceptableSims) numpy.savez(outputFilename, meanStatsContacts, meanStatsInterests, meanStatsContacts2, meanStatsInterests2) logging.debug("Saved precisions/recalls on contacts/interests as " + outputFilename) finally:
def testF1Atk(self): m = 10 n = 5 r = 3 X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True) import sppy X = sppy.csarray(X) orderedItems = MCEvaluator.recommendAtk(U * s, V, n) self.assertAlmostEquals( MCEvaluator.f1AtK(X, orderedItems, n, verbose=False), 2 * r / float(n) / (1 + r / float(n))) m = 20 n = 50 X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True) k = 5 orderedItems = MCEvaluator.recommendAtk(U * s, V, k) precision, scoreInds = MCEvaluator.precisionAtK(X, orderedItems, k, verbose=True) recall, scoreInds = MCEvaluator.recallAtK(X, orderedItems, k, verbose=True) f1s = numpy.zeros(m) for i in range(m): f1s[i] = 2 * precision[i] * recall[i] / (precision[i] + recall[i]) orderedItems = MCEvaluator.recommendAtk(U * s, V, n) f1s2, scoreInds = MCEvaluator.f1AtK(X, orderedItems, k, verbose=True) nptst.assert_array_equal(f1s, f1s2) #Test case where we get a zero precision or recall orderedItems[5, :] = -1 precision, scoreInds = MCEvaluator.precisionAtK(X, orderedItems, k, verbose=True) recall, scoreInds = MCEvaluator.recallAtK(X, orderedItems, k, verbose=True) f1s = numpy.zeros(m) for i in range(m): if precision[i] + recall[i] != 0: f1s[i] = 2 * precision[i] * recall[i] / (precision[i] + recall[i]) f1s2, scoreInds = MCEvaluator.f1AtK(X, orderedItems, k, verbose=True) nptst.assert_array_equal(f1s, f1s2)