Ejemplo n.º 1
0
    def testF1Atk(self):
        m = 10
        n = 5
        r = 3
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True)

        import sppy

        X = sppy.csarray(X)
        orderedItems = MCEvaluator.recommendAtk(U * s, V, n)

        self.assertAlmostEquals(
            MCEvaluator.f1AtK(X, orderedItems, n, verbose=False), 2 * r / float(n) / (1 + r / float(n))
        )

        m = 20
        n = 50
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True)
        k = 5

        orderedItems = MCEvaluator.recommendAtk(U * s, V, k)
        precision, scoreInds = MCEvaluator.precisionAtK(X, orderedItems, k, verbose=True)
        recall, scoreInds = MCEvaluator.recallAtK(X, orderedItems, k, verbose=True)
        f1s = numpy.zeros(m)

        for i in range(m):
            f1s[i] = 2 * precision[i] * recall[i] / (precision[i] + recall[i])

        orderedItems = MCEvaluator.recommendAtk(U * s, V, n)
        f1s2, scoreInds = MCEvaluator.f1AtK(X, orderedItems, k, verbose=True)

        nptst.assert_array_equal(f1s, f1s2)

        # Test case where we get a zero precision or recall
        orderedItems[5, :] = -1
        precision, scoreInds = MCEvaluator.precisionAtK(X, orderedItems, k, verbose=True)
        recall, scoreInds = MCEvaluator.recallAtK(X, orderedItems, k, verbose=True)

        f1s = numpy.zeros(m)

        for i in range(m):
            if precision[i] + recall[i] != 0:
                f1s[i] = 2 * precision[i] * recall[i] / (precision[i] + recall[i])

        f1s2, scoreInds = MCEvaluator.f1AtK(X, orderedItems, k, verbose=True)

        nptst.assert_array_equal(f1s, f1s2)
Ejemplo n.º 2
0
def computeTestF1(args):
    """
    A simple function for outputing F1 for a learner in conjunction e.g. with 
    parallel model selection. 
    """
    trainX, testX, learner = args

    learner.learnModel(trainX)

    testOrderedItems = MCEvaluatorCython.recommendAtk(learner.U, learner.V, learner.recommendSize, trainX)
    f1 = MCEvaluator.f1AtK(SparseUtils.getOmegaListPtr(testX), testOrderedItems, learner.recommendSize)

    try:
        learnerStr = learner.modelParamsStr()
    except:
        learnerStr = str(learner)

    logging.debug("F1@" + str(learner.recommendSize) + ": " + str("%.4f" % f1) + " " + learnerStr)

    return f1
Ejemplo n.º 3
0
def computeTestF1(args):
    """
    A simple function for outputing F1 for a learner in conjunction e.g. with 
    parallel model selection. 
    """
    trainX, testX, learner = args

    learner.learnModel(trainX)

    testOrderedItems = MCEvaluatorCython.recommendAtk(learner.U, learner.V,
                                                      learner.recommendSize,
                                                      trainX)
    f1 = MCEvaluator.f1AtK(SparseUtils.getOmegaListPtr(testX),
                           testOrderedItems, learner.recommendSize)

    try:
        learnerStr = learner.modelParamsStr()
    except:
        learnerStr = str(learner)

    logging.debug("F1@" + str(learner.recommendSize) + ": " +
                  str('%.4f' % f1) + " " + learnerStr)

    return f1
Ejemplo n.º 4
0
    def testF1Atk(self):
        m = 10
        n = 5
        r = 3
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                r,
                                                                0.5,
                                                                verbose=True)

        import sppy
        X = sppy.csarray(X)
        orderedItems = MCEvaluator.recommendAtk(U * s, V, n)

        self.assertAlmostEquals(
            MCEvaluator.f1AtK(X, orderedItems, n, verbose=False),
            2 * r / float(n) / (1 + r / float(n)))

        m = 20
        n = 50
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                r,
                                                                0.5,
                                                                verbose=True)
        k = 5

        orderedItems = MCEvaluator.recommendAtk(U * s, V, k)
        precision, scoreInds = MCEvaluator.precisionAtK(X,
                                                        orderedItems,
                                                        k,
                                                        verbose=True)
        recall, scoreInds = MCEvaluator.recallAtK(X,
                                                  orderedItems,
                                                  k,
                                                  verbose=True)
        f1s = numpy.zeros(m)

        for i in range(m):
            f1s[i] = 2 * precision[i] * recall[i] / (precision[i] + recall[i])

        orderedItems = MCEvaluator.recommendAtk(U * s, V, n)
        f1s2, scoreInds = MCEvaluator.f1AtK(X, orderedItems, k, verbose=True)

        nptst.assert_array_equal(f1s, f1s2)

        #Test case where we get a zero precision or recall
        orderedItems[5, :] = -1
        precision, scoreInds = MCEvaluator.precisionAtK(X,
                                                        orderedItems,
                                                        k,
                                                        verbose=True)
        recall, scoreInds = MCEvaluator.recallAtK(X,
                                                  orderedItems,
                                                  k,
                                                  verbose=True)

        f1s = numpy.zeros(m)

        for i in range(m):
            if precision[i] + recall[i] != 0:
                f1s[i] = 2 * precision[i] * recall[i] / (precision[i] +
                                                         recall[i])

        f1s2, scoreInds = MCEvaluator.f1AtK(X, orderedItems, k, verbose=True)

        nptst.assert_array_equal(f1s, f1s2)
Ejemplo n.º 5
0
    def recordResults(
        self,
        muU,
        muV,
        trainMeasures,
        testMeasures,
        loopInd,
        rowSamples,
        indPtr,
        colInds,
        testIndPtr,
        testColInds,
        allIndPtr,
        allColInds,
        gi,
        gp,
        gq,
        trainX,
        startTime,
    ):

        sigmaU = self.getSigma(loopInd, self.alpha, muU.shape[0])
        sigmaV = self.getSigma(loopInd, self.alpha, muU.shape[0])
        r = SparseUtilsCython.computeR(muU, muV, self.w, self.numRecordAucSamples)
        objArr = self.objectiveApprox((indPtr, colInds), muU, muV, r, gi, gp, gq, full=True)
        if trainMeasures == None:
            trainMeasures = []
        trainMeasures.append(
            [
                objArr.sum(),
                MCEvaluator.localAUCApprox((indPtr, colInds), muU, muV, self.w, self.numRecordAucSamples, r),
                time.time() - startTime,
                loopInd,
            ]
        )

        printStr = "iter " + str(loopInd) + ":"
        printStr += " sigmaU=" + str("%.4f" % sigmaU)
        printStr += " sigmaV=" + str("%.4f" % sigmaV)
        printStr += " train: obj~" + str("%.4f" % trainMeasures[-1][0])
        printStr += " LAUC~" + str("%.4f" % trainMeasures[-1][1])

        if testIndPtr is not None:
            testMeasuresRow = []
            testMeasuresRow.append(
                self.objectiveApprox(
                    (testIndPtr, testColInds), muU, muV, r, gi, gp, gq, allArray=(allIndPtr, allColInds)
                )
            )
            testMeasuresRow.append(
                MCEvaluator.localAUCApprox(
                    (testIndPtr, testColInds),
                    muU,
                    muV,
                    self.w,
                    self.numRecordAucSamples,
                    r,
                    allArray=(allIndPtr, allColInds),
                )
            )
            testOrderedItems = MCEvaluatorCython.recommendAtk(muU, muV, numpy.max(self.recommendSize), trainX)

            printStr += " validation: obj~" + str("%.4f" % testMeasuresRow[0])
            printStr += " LAUC~" + str("%.4f" % testMeasuresRow[1])

            try:
                for p in self.recommendSize:
                    f1Array, orderedItems = MCEvaluator.f1AtK(
                        (testIndPtr, testColInds), testOrderedItems, p, verbose=True
                    )
                    testMeasuresRow.append(f1Array[rowSamples].mean())
            except:
                f1Array, orderedItems = MCEvaluator.f1AtK(
                    (testIndPtr, testColInds), testOrderedItems, self.recommendSize, verbose=True
                )
                testMeasuresRow.append(f1Array[rowSamples].mean())

            printStr += " f1@" + str(self.recommendSize) + "=" + str("%.4f" % testMeasuresRow[-1])

            try:
                for p in self.recommendSize:
                    mrr, orderedItems = MCEvaluator.mrrAtK((testIndPtr, testColInds), testOrderedItems, p, verbose=True)
                    testMeasuresRow.append(mrr[rowSamples].mean())
            except:
                mrr, orderedItems = MCEvaluator.mrrAtK(
                    (testIndPtr, testColInds), testOrderedItems, self.recommendSize, verbose=True
                )
                testMeasuresRow.append(mrr[rowSamples].mean())

            printStr += " mrr@" + str(self.recommendSize) + "=" + str("%.4f" % testMeasuresRow[-1])
            testMeasures.append(testMeasuresRow)

        printStr += " ||U||=" + str("%.3f" % numpy.linalg.norm(muU))
        printStr += " ||V||=" + str("%.3f" % numpy.linalg.norm(muV))

        if self.bound:
            trainObj = objArr.sum()

            expectationBound = self.computeBound(trainX, muU, muV, trainObj, self.delta)
            printStr += " bound=" + str("%.3f" % expectationBound)
            trainMeasures[-1].append(expectationBound)

        return printStr
Ejemplo n.º 6
0
    def recordResults(self, muU, muV, trainMeasures, testMeasures, loopInd,
                      rowSamples, indPtr, colInds, testIndPtr, testColInds,
                      allIndPtr, allColInds, gi, gp, gq, trainX, startTime):

        sigmaU = self.getSigma(loopInd, self.alpha, muU.shape[0])
        sigmaV = self.getSigma(loopInd, self.alpha, muU.shape[0])
        r = SparseUtilsCython.computeR(muU, muV, self.w,
                                       self.numRecordAucSamples)
        objArr = self.objectiveApprox((indPtr, colInds),
                                      muU,
                                      muV,
                                      r,
                                      gi,
                                      gp,
                                      gq,
                                      full=True)
        if trainMeasures == None:
            trainMeasures = []
        trainMeasures.append([
            objArr.sum(),
            MCEvaluator.localAUCApprox((indPtr, colInds), muU, muV, self.w,
                                       self.numRecordAucSamples, r),
            time.time() - startTime, loopInd
        ])

        printStr = "iter " + str(loopInd) + ":"
        printStr += " sigmaU=" + str('%.4f' % sigmaU)
        printStr += " sigmaV=" + str('%.4f' % sigmaV)
        printStr += " train: obj~" + str('%.4f' % trainMeasures[-1][0])
        printStr += " LAUC~" + str('%.4f' % trainMeasures[-1][1])

        if testIndPtr is not None:
            testMeasuresRow = []
            testMeasuresRow.append(
                self.objectiveApprox((testIndPtr, testColInds),
                                     muU,
                                     muV,
                                     r,
                                     gi,
                                     gp,
                                     gq,
                                     allArray=(allIndPtr, allColInds)))
            testMeasuresRow.append(
                MCEvaluator.localAUCApprox((testIndPtr, testColInds),
                                           muU,
                                           muV,
                                           self.w,
                                           self.numRecordAucSamples,
                                           r,
                                           allArray=(allIndPtr, allColInds)))
            testOrderedItems = MCEvaluatorCython.recommendAtk(
                muU, muV, numpy.max(self.recommendSize), trainX)

            printStr += " validation: obj~" + str('%.4f' % testMeasuresRow[0])
            printStr += " LAUC~" + str('%.4f' % testMeasuresRow[1])

            try:
                for p in self.recommendSize:
                    f1Array, orderedItems = MCEvaluator.f1AtK(
                        (testIndPtr, testColInds),
                        testOrderedItems,
                        p,
                        verbose=True)
                    testMeasuresRow.append(f1Array[rowSamples].mean())
            except:
                f1Array, orderedItems = MCEvaluator.f1AtK(
                    (testIndPtr, testColInds),
                    testOrderedItems,
                    self.recommendSize,
                    verbose=True)
                testMeasuresRow.append(f1Array[rowSamples].mean())

            printStr += " f1@" + str(self.recommendSize) + "=" + str(
                '%.4f' % testMeasuresRow[-1])

            try:
                for p in self.recommendSize:
                    mrr, orderedItems = MCEvaluator.mrrAtK(
                        (testIndPtr, testColInds),
                        testOrderedItems,
                        p,
                        verbose=True)
                    testMeasuresRow.append(mrr[rowSamples].mean())
            except:
                mrr, orderedItems = MCEvaluator.mrrAtK(
                    (testIndPtr, testColInds),
                    testOrderedItems,
                    self.recommendSize,
                    verbose=True)
                testMeasuresRow.append(mrr[rowSamples].mean())

            printStr += " mrr@" + str(self.recommendSize) + "=" + str(
                '%.4f' % testMeasuresRow[-1])
            testMeasures.append(testMeasuresRow)

        printStr += " ||U||=" + str('%.3f' % numpy.linalg.norm(muU))
        printStr += " ||V||=" + str('%.3f' % numpy.linalg.norm(muV))

        if self.bound:
            trainObj = objArr.sum()

            expectationBound = self.computeBound(trainX, muU, muV, trainObj,
                                                 self.delta)
            printStr += " bound=" + str('%.3f' % expectationBound)
            trainMeasures[-1].append(expectationBound)

        return printStr
Ejemplo n.º 7
0
#Look at distribution of train and test objectives 
plt.figure(5)
hist, edges = numpy.histogram(trainObjVec, bins=50, normed=True)
xvals = (edges[0:-1]+edges[1:])/2
plt.plot(xvals, hist, label="train")

hist, e = numpy.histogram(testObjVec, bins=edges, normed=True)
xvals = (edges[0:-1]+edges[1:])/2
plt.plot(xvals, hist, label="test")
plt.legend()

plt.figure(6)
plt.scatter(trainObjVec, trainX.sum(1))

#See precisions 
f1s, orderedItems = MCEvaluator.f1AtK(testOmegaPtr, testOrderedItems, maxLocalAuc.recommendSize, verbose=True)
uniqp, inverse = numpy.unique(f1s, return_inverse=True)
print(uniqp, numpy.bincount(inverse))

numItems = trainX.sum(1)
print(numpy.corrcoef(numItems, f1s))
print(numpy.corrcoef(trainObjVec, f1s))
print(numpy.corrcoef(testObjVec, f1s))

#fprTrain, tprTrain = MCEvaluator.averageRocCurve(trainX, U, V)
#fprTest, tprTest = MCEvaluator.averageRocCurve(testX, U, V)
#
#plt.figure(7)
#plt.plot(fprTrain, tprTrain, label="train")
#plt.plot(fprTest, tprTest, label="test")
#plt.xlabel("mean false positive rate")
Ejemplo n.º 8
0
    def recordResults(self, X, trainX, testX, learner, fileName):
        """
        Save results for a particular recommendation
        """
        if self.algoArgs.skipRecordResults:
            logging.debug("Skipping final evaluation of algorithm")
            return

        allTrainMeasures = []
        allTestMeasures = []
        allMetaData = []

        for i in range(self.algoArgs.recordFolds):
            metaData = []
            w = 1-self.algoArgs.u
            logging.debug("Computing recommendation errors")
            maxItems = self.ps[-1]

            start = time.time()
            if type(learner) == IterativeSoftImpute:
                trainIterator = iter([trainX])
                ZList = learner.learnModel(trainIterator)
                U, s, V = ZList.next()
                U = U*s

                #trainX = sppy.csarray(trainX)
                #testX = sppy.csarray(testX)
                U = numpy.ascontiguousarray(U)
                V = numpy.ascontiguousarray(V)
            else:
                learner.learnModel(trainX)
                U = learner.U
                V = learner.V

            learnTime = time.time()-start
            metaData.append(learnTime)

            logging.debug("Getting all omega")
            allOmegaPtr = SparseUtils.getOmegaListPtr(X)
            logging.debug("Getting train omega")
            trainOmegaPtr = SparseUtils.getOmegaListPtr(trainX)
            logging.debug("Getting test omega")
            testOmegaPtr = SparseUtils.getOmegaListPtr(testX)
            logging.debug("Getting recommendations")

            trainOrderedItems = MCEvaluator.recommendAtk(U, V, maxItems)
            testOrderedItems = MCEvaluatorCython.recommendAtk(U, V, maxItems, trainX)

            colNames = []
            trainMeasures = []
            testMeasures = []
            for p in self.ps:
                trainMeasures.append(MCEvaluator.precisionAtK(trainOmegaPtr, trainOrderedItems, p))
                testMeasures.append(MCEvaluator.precisionAtK(testOmegaPtr, testOrderedItems, p))

                colNames.append("precision@" + str(p))

            for p in self.ps:
                trainMeasures.append(MCEvaluator.recallAtK(trainOmegaPtr, trainOrderedItems, p))
                testMeasures.append(MCEvaluator.recallAtK(testOmegaPtr, testOrderedItems, p))

                colNames.append("recall@" + str(p))

            for p in self.ps:
                trainMeasures.append(MCEvaluator.f1AtK(trainOmegaPtr, trainOrderedItems, p))
                testMeasures.append(MCEvaluator.f1AtK(testOmegaPtr, testOrderedItems, p))

                colNames.append("f1@" + str(p))

            for p in self.ps:
                trainMeasures.append(MCEvaluator.mrrAtK(trainOmegaPtr, trainOrderedItems, p))
                testMeasures.append(MCEvaluator.mrrAtK(testOmegaPtr, testOrderedItems, p))

                colNames.append("mrr@" + str(p))

            try:
                r = SparseUtilsCython.computeR(U, V, w, self.algoArgs.numRecordAucSamples)
                trainMeasures.append(MCEvaluator.localAUCApprox(trainOmegaPtr, U, V, w, self.algoArgs.numRecordAucSamples, r=r))
                testMeasures.append(MCEvaluator.localAUCApprox(testOmegaPtr, U, V, w, self.algoArgs.numRecordAucSamples, allArray=allOmegaPtr, r=r))

                w = 0.0
                r = SparseUtilsCython.computeR(U, V, w, self.algoArgs.numRecordAucSamples)
                trainMeasures.append(MCEvaluator.localAUCApprox(trainOmegaPtr, U, V, w, self.algoArgs.numRecordAucSamples, r=r))
                testMeasures.append(MCEvaluator.localAUCApprox(testOmegaPtr, U, V, w, self.algoArgs.numRecordAucSamples, allArray=allOmegaPtr, r=r))

                colNames.append("LAUC@" + str(self.algoArgs.u))
                colNames.append("AUC")
            except:
                logging.debug("Could not compute AUCs")
                raise

            trainMeasures = numpy.array(trainMeasures)
            testMeasures = numpy.array(testMeasures)
            metaData = numpy.array(metaData)

            allTrainMeasures.append(trainMeasures)
            allTestMeasures.append(testMeasures)
            allMetaData.append(metaData)

        allTrainMeasures = numpy.array(allTrainMeasures)
        allTestMeasures = numpy.array(allTestMeasures)
        allMetaData = numpy.array(allMetaData)

        meanTrainMeasures = numpy.mean(allTrainMeasures, 0)
        meanTestMeasures = numpy.mean(allTestMeasures, 0)
        meanMetaData = numpy.mean(allMetaData, 0)

        logging.debug("Mean metrics")
        for i, colName in enumerate(colNames):
            logging.debug(colName + ":" + str('%.4f' % meanTrainMeasures[i]) + "/" + str('%.4f' % meanTestMeasures[i]))

        numpy.savez(fileName, meanTrainMeasures, meanTestMeasures, meanMetaData, trainOrderedItems, testOrderedItems)
        logging.debug("Saved file as " + fileName)