def testRecommendAtk(self):
        m = 20
        n = 50
        r = 3

        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                r,
                                                                0.5,
                                                                verbose=True)

        import sppy
        X = sppy.csarray(X)

        k = 10

        X = numpy.zeros(X.shape)
        omegaList = []
        for i in range(m):
            omegaList.append(numpy.random.permutation(n)[0:5])
            X[i, omegaList[i]] = 1

        X = sppy.csarray(X)

        orderedItems = MCEvaluatorCython.recommendAtk(U, V, k, X)
        orderedItems2 = MCEvaluator.recommendAtk(U, V, k, omegaList=omegaList)

        nptst.assert_array_equal(orderedItems[orderedItems2 != -1],
                                 orderedItems2[orderedItems2 != -1])

        for i in range(m):
            items = numpy.intersect1d(omegaList[i], orderedItems[i, :])
            self.assertEquals(items.shape[0], 0)

            #items = numpy.union1d(omegaList[i], orderedItems[i, :])
            #items = numpy.intersect1d(items, orderedItems2[i, :])
            #nptst.assert_array_equal(items, numpy.sort(orderedItems2[i, :]))

        #Now let's have an all zeros X
        X = sppy.csarray(X.shape)
        orderedItems = MCEvaluatorCython.recommendAtk(U, V, k, X)
        orderedItems2 = MCEvaluator.recommendAtk(U, V, k)

        nptst.assert_array_equal(orderedItems, orderedItems2)
    def testRecommendAtk(self): 
        m = 20 
        n = 50 
        r = 3 

        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), r, 0.5, verbose=True)

        import sppy 
        X = sppy.csarray(X)  
        
        k = 10        
        
        X = numpy.zeros(X.shape)
        omegaList = []
        for i in range(m): 
            omegaList.append(numpy.random.permutation(n)[0:5])
            X[i, omegaList[i]] = 1
            
        X = sppy.csarray(X)            
        
        
        orderedItems = MCEvaluatorCython.recommendAtk(U, V, k, X)
        orderedItems2 = MCEvaluator.recommendAtk(U, V, k, omegaList=omegaList)
                
        nptst.assert_array_equal(orderedItems[orderedItems2!=-1], orderedItems2[orderedItems2!=-1])

        for i in range(m): 
            items = numpy.intersect1d(omegaList[i], orderedItems[i, :])
            self.assertEquals(items.shape[0], 0)
            
            #items = numpy.union1d(omegaList[i], orderedItems[i, :])
            #items = numpy.intersect1d(items, orderedItems2[i, :])
            #nptst.assert_array_equal(items, numpy.sort(orderedItems2[i, :]))
            
        #Now let's have an all zeros X 
        X = sppy.csarray(X.shape)
        orderedItems = MCEvaluatorCython.recommendAtk(U, V, k, X)
        orderedItems2 = MCEvaluator.recommendAtk(U, V, k) 
        
        nptst.assert_array_equal(orderedItems, orderedItems2)
Example #3
0
def learnPredictRanking(args):
    """
    A function to train on a training set and test on a test set, for a number 
    of values of rho. 
    """
    learner, trainX, testX, rhos = args
    logging.debug("k=" + str(learner.getK()))
    logging.debug(learner)

    testInds = testX.nonzero()
    trainXIter = []
    testIndList = []

    for rho in rhos:
        trainXIter.append(trainX)
        testIndList.append(testInds)

    trainXIter = iter(trainXIter)

    ZIter = learner.learnModel(trainXIter, iter(rhos))

    metrics = numpy.zeros(rhos.shape[0])

    for j, Z in enumerate(ZIter):
        U, s, V = Z
        U = U * s
        U = numpy.ascontiguousarray(U)
        V = numpy.ascontiguousarray(V)

        testOrderedItems = MCEvaluatorCython.recommendAtk(
            U, V, learner.recommendSize, trainX)

        if learner.metric == "mrr":
            metrics[j] = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX),
                                            testOrderedItems,
                                            learner.recommendSize)
            logging.debug("MRR@" + str(learner.recommendSize) + ": " +
                          str('%.4f' % metrics[j]) + " " + str(learner))
        elif learner.metric == "f1":
            metrics[j] = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX),
                                            testOrderedItems,
                                            learner.recommendSize)
            logging.debug("F1@" + str(learner.recommendSize) + ": " +
                          str('%.4f' % metrics[j]) + " " + str(learner))
        else:
            raise ValueError("Unknown metric " + learner.metric)

        gc.collect()

    return metrics
def learnPredictRanking(args): 
    """
    A function to train on a training set and test on a test set, for a number 
    of values of rho. 
    """
    learner, trainX, testX, rhos = args 
    logging.debug("k=" + str(learner.getK()))
    logging.debug(learner) 
    
    testInds = testX.nonzero()
    trainXIter = []
    testIndList = []    
    
    for rho in rhos: 
        trainXIter.append(trainX)
        testIndList.append(testInds)
    
    trainXIter = iter(trainXIter)

    ZIter = learner.learnModel(trainXIter, iter(rhos))
    
    metrics = numpy.zeros(rhos.shape[0])
    
    for j, Z in enumerate(ZIter): 
        U, s, V = Z
        U = U*s
        U = numpy.ascontiguousarray(U)
        V = numpy.ascontiguousarray(V)
        
        testOrderedItems = MCEvaluatorCython.recommendAtk(U, V, learner.recommendSize, trainX)
        
        if learner.metric == "mrr": 
            metrics[j] = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX), testOrderedItems, learner.recommendSize) 
            logging.debug("MRR@" + str(learner.recommendSize) +  ": " + str('%.4f' % metrics[j]) + " " + str(learner))
        elif learner.metric == "f1": 
            metrics[j] = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX), testOrderedItems, learner.recommendSize) 
            logging.debug("F1@" + str(learner.recommendSize) +  ": " + str('%.4f' % metrics[j]) + " " + str(learner))
        else: 
            raise ValueError("Unknown metric " + learner.metric)
            
        gc.collect()
        
    return metrics 
Example #5
0
def computeTestMRR(args):
    """
    A simple function for outputing F1 for a learner in conjunction e.g. with 
    parallel model selection. 
    """
    trainX, testX, learner = args

    learner.learnModel(trainX)

    testOrderedItems = MCEvaluatorCython.recommendAtk(learner.U, learner.V, learner.recommendSize, trainX)
    mrr = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX), testOrderedItems, learner.recommendSize)

    try:
        learnerStr = learner.modelParamsStr()
    except:
        learnerStr = str(learner)

    logging.debug("MRR@" + str(learner.recommendSize) + ": " + str("%.4f" % mrr) + " " + learnerStr)

    return mrr
Example #6
0
def computeTestF1(args):
    """
    A simple function for outputing F1 for a learner in conjunction e.g. with 
    parallel model selection. 
    """
    trainX, testX, learner = args

    learner.learnModel(trainX)

    testOrderedItems = MCEvaluatorCython.recommendAtk(learner.U, learner.V,
                                                      learner.recommendSize,
                                                      trainX)
    f1 = MCEvaluator.f1AtK(SparseUtils.getOmegaListPtr(testX),
                           testOrderedItems, learner.recommendSize)

    try:
        learnerStr = learner.modelParamsStr()
    except:
        learnerStr = str(learner)

    logging.debug("F1@" + str(learner.recommendSize) + ": " +
                  str('%.4f' % f1) + " " + learnerStr)

    return f1
Example #7
0
    def recordResults(
        self,
        muU,
        muV,
        trainMeasures,
        testMeasures,
        loopInd,
        rowSamples,
        indPtr,
        colInds,
        testIndPtr,
        testColInds,
        allIndPtr,
        allColInds,
        gi,
        gp,
        gq,
        trainX,
        startTime,
    ):

        sigmaU = self.getSigma(loopInd, self.alpha, muU.shape[0])
        sigmaV = self.getSigma(loopInd, self.alpha, muU.shape[0])
        r = SparseUtilsCython.computeR(muU, muV, self.w, self.numRecordAucSamples)
        objArr = self.objectiveApprox((indPtr, colInds), muU, muV, r, gi, gp, gq, full=True)
        if trainMeasures == None:
            trainMeasures = []
        trainMeasures.append(
            [
                objArr.sum(),
                MCEvaluator.localAUCApprox((indPtr, colInds), muU, muV, self.w, self.numRecordAucSamples, r),
                time.time() - startTime,
                loopInd,
            ]
        )

        printStr = "iter " + str(loopInd) + ":"
        printStr += " sigmaU=" + str("%.4f" % sigmaU)
        printStr += " sigmaV=" + str("%.4f" % sigmaV)
        printStr += " train: obj~" + str("%.4f" % trainMeasures[-1][0])
        printStr += " LAUC~" + str("%.4f" % trainMeasures[-1][1])

        if testIndPtr is not None:
            testMeasuresRow = []
            testMeasuresRow.append(
                self.objectiveApprox(
                    (testIndPtr, testColInds), muU, muV, r, gi, gp, gq, allArray=(allIndPtr, allColInds)
                )
            )
            testMeasuresRow.append(
                MCEvaluator.localAUCApprox(
                    (testIndPtr, testColInds),
                    muU,
                    muV,
                    self.w,
                    self.numRecordAucSamples,
                    r,
                    allArray=(allIndPtr, allColInds),
                )
            )
            testOrderedItems = MCEvaluatorCython.recommendAtk(muU, muV, numpy.max(self.recommendSize), trainX)

            printStr += " validation: obj~" + str("%.4f" % testMeasuresRow[0])
            printStr += " LAUC~" + str("%.4f" % testMeasuresRow[1])

            try:
                for p in self.recommendSize:
                    f1Array, orderedItems = MCEvaluator.f1AtK(
                        (testIndPtr, testColInds), testOrderedItems, p, verbose=True
                    )
                    testMeasuresRow.append(f1Array[rowSamples].mean())
            except:
                f1Array, orderedItems = MCEvaluator.f1AtK(
                    (testIndPtr, testColInds), testOrderedItems, self.recommendSize, verbose=True
                )
                testMeasuresRow.append(f1Array[rowSamples].mean())

            printStr += " f1@" + str(self.recommendSize) + "=" + str("%.4f" % testMeasuresRow[-1])

            try:
                for p in self.recommendSize:
                    mrr, orderedItems = MCEvaluator.mrrAtK((testIndPtr, testColInds), testOrderedItems, p, verbose=True)
                    testMeasuresRow.append(mrr[rowSamples].mean())
            except:
                mrr, orderedItems = MCEvaluator.mrrAtK(
                    (testIndPtr, testColInds), testOrderedItems, self.recommendSize, verbose=True
                )
                testMeasuresRow.append(mrr[rowSamples].mean())

            printStr += " mrr@" + str(self.recommendSize) + "=" + str("%.4f" % testMeasuresRow[-1])
            testMeasures.append(testMeasuresRow)

        printStr += " ||U||=" + str("%.3f" % numpy.linalg.norm(muU))
        printStr += " ||V||=" + str("%.3f" % numpy.linalg.norm(muV))

        if self.bound:
            trainObj = objArr.sum()

            expectationBound = self.computeBound(trainX, muU, muV, trainObj, self.delta)
            printStr += " bound=" + str("%.3f" % expectationBound)
            trainMeasures[-1].append(expectationBound)

        return printStr
Example #8
0
    def recordResults(self, muU, muV, trainMeasures, testMeasures, loopInd,
                      rowSamples, indPtr, colInds, testIndPtr, testColInds,
                      allIndPtr, allColInds, gi, gp, gq, trainX, startTime):

        sigmaU = self.getSigma(loopInd, self.alpha, muU.shape[0])
        sigmaV = self.getSigma(loopInd, self.alpha, muU.shape[0])
        r = SparseUtilsCython.computeR(muU, muV, self.w,
                                       self.numRecordAucSamples)
        objArr = self.objectiveApprox((indPtr, colInds),
                                      muU,
                                      muV,
                                      r,
                                      gi,
                                      gp,
                                      gq,
                                      full=True)
        if trainMeasures == None:
            trainMeasures = []
        trainMeasures.append([
            objArr.sum(),
            MCEvaluator.localAUCApprox((indPtr, colInds), muU, muV, self.w,
                                       self.numRecordAucSamples, r),
            time.time() - startTime, loopInd
        ])

        printStr = "iter " + str(loopInd) + ":"
        printStr += " sigmaU=" + str('%.4f' % sigmaU)
        printStr += " sigmaV=" + str('%.4f' % sigmaV)
        printStr += " train: obj~" + str('%.4f' % trainMeasures[-1][0])
        printStr += " LAUC~" + str('%.4f' % trainMeasures[-1][1])

        if testIndPtr is not None:
            testMeasuresRow = []
            testMeasuresRow.append(
                self.objectiveApprox((testIndPtr, testColInds),
                                     muU,
                                     muV,
                                     r,
                                     gi,
                                     gp,
                                     gq,
                                     allArray=(allIndPtr, allColInds)))
            testMeasuresRow.append(
                MCEvaluator.localAUCApprox((testIndPtr, testColInds),
                                           muU,
                                           muV,
                                           self.w,
                                           self.numRecordAucSamples,
                                           r,
                                           allArray=(allIndPtr, allColInds)))
            testOrderedItems = MCEvaluatorCython.recommendAtk(
                muU, muV, numpy.max(self.recommendSize), trainX)

            printStr += " validation: obj~" + str('%.4f' % testMeasuresRow[0])
            printStr += " LAUC~" + str('%.4f' % testMeasuresRow[1])

            try:
                for p in self.recommendSize:
                    f1Array, orderedItems = MCEvaluator.f1AtK(
                        (testIndPtr, testColInds),
                        testOrderedItems,
                        p,
                        verbose=True)
                    testMeasuresRow.append(f1Array[rowSamples].mean())
            except:
                f1Array, orderedItems = MCEvaluator.f1AtK(
                    (testIndPtr, testColInds),
                    testOrderedItems,
                    self.recommendSize,
                    verbose=True)
                testMeasuresRow.append(f1Array[rowSamples].mean())

            printStr += " f1@" + str(self.recommendSize) + "=" + str(
                '%.4f' % testMeasuresRow[-1])

            try:
                for p in self.recommendSize:
                    mrr, orderedItems = MCEvaluator.mrrAtK(
                        (testIndPtr, testColInds),
                        testOrderedItems,
                        p,
                        verbose=True)
                    testMeasuresRow.append(mrr[rowSamples].mean())
            except:
                mrr, orderedItems = MCEvaluator.mrrAtK(
                    (testIndPtr, testColInds),
                    testOrderedItems,
                    self.recommendSize,
                    verbose=True)
                testMeasuresRow.append(mrr[rowSamples].mean())

            printStr += " mrr@" + str(self.recommendSize) + "=" + str(
                '%.4f' % testMeasuresRow[-1])
            testMeasures.append(testMeasuresRow)

        printStr += " ||U||=" + str('%.3f' % numpy.linalg.norm(muU))
        printStr += " ||V||=" + str('%.3f' % numpy.linalg.norm(muV))

        if self.bound:
            trainObj = objArr.sum()

            expectationBound = self.computeBound(trainX, muU, muV, trainObj,
                                                 self.delta)
            printStr += " bound=" + str('%.3f' % expectationBound)
            trainMeasures[-1].append(expectationBound)

        return printStr
os.system('taskset -p 0xffffffff %d' % os.getpid())

logging.debug("Starting training")
logging.debug(maxLocalAuc)

#modelSelectX = trainX[0:100, :]
#maxLocalAuc.learningRateSelect(trainX)
#maxLocalAuc.modelSelect(trainX)
#ProfileUtils.profile('U, V, trainObjs, trainAucs, testObjs, testAucs, iterations, time = maxLocalAuc.learnModel(trainX, testX=testX, verbose=True)', globals(), locals())

U, V, trainMeasures, testMeasures, iterations, time = maxLocalAuc.learnModel(trainX, verbose=True)

p = 10

trainOrderedItems = MCEvaluator.recommendAtk(U, V, p)
testOrderedItems = MCEvaluatorCython.recommendAtk(U, V, p, trainX)

r = SparseUtilsCython.computeR(U, V, maxLocalAuc.w, maxLocalAuc.numRecordAucSamples)
trainObjVec = maxLocalAuc.objectiveApprox(trainOmegaPtr, U, V, r, maxLocalAuc.gi, maxLocalAuc.gp, maxLocalAuc.gq, full=True)
testObjVec = maxLocalAuc.objectiveApprox(testOmegaPtr, U, V, r, maxLocalAuc.gi, maxLocalAuc.gp, maxLocalAuc.gq, allArray=allOmegaPtr, full=True)

itemCounts = numpy.array(X.sum(0)+1, numpy.int32)
beta = 0.5

for p in [1, 3, 5, 10]:
    trainPrecision = MCEvaluator.precisionAtK(trainOmegaPtr, trainOrderedItems, p)
    testPrecision = MCEvaluator.precisionAtK(testOmegaPtr, testOrderedItems, p)
    logging.debug("Train/test precision@" + str(p) + "=" + str(trainPrecision) + "/" + str(testPrecision)) 
    
for p in [1, 3, 5, 10]:
    trainRecall = MCEvaluator.stratifiedRecallAtK(trainOmegaPtr, trainOrderedItems, p, itemCounts, beta)
Example #10
0
    def recordResults(self, X, trainX, testX, learner, fileName):
        """
        Save results for a particular recommendation
        """
        if self.algoArgs.skipRecordResults:
            logging.debug("Skipping final evaluation of algorithm")
            return

        allTrainMeasures = []
        allTestMeasures = []
        allMetaData = []

        for i in range(self.algoArgs.recordFolds):
            metaData = []
            w = 1-self.algoArgs.u
            logging.debug("Computing recommendation errors")
            maxItems = self.ps[-1]

            start = time.time()
            if type(learner) == IterativeSoftImpute:
                trainIterator = iter([trainX])
                ZList = learner.learnModel(trainIterator)
                U, s, V = ZList.next()
                U = U*s

                #trainX = sppy.csarray(trainX)
                #testX = sppy.csarray(testX)
                U = numpy.ascontiguousarray(U)
                V = numpy.ascontiguousarray(V)
            else:
                learner.learnModel(trainX)
                U = learner.U
                V = learner.V

            learnTime = time.time()-start
            metaData.append(learnTime)

            logging.debug("Getting all omega")
            allOmegaPtr = SparseUtils.getOmegaListPtr(X)
            logging.debug("Getting train omega")
            trainOmegaPtr = SparseUtils.getOmegaListPtr(trainX)
            logging.debug("Getting test omega")
            testOmegaPtr = SparseUtils.getOmegaListPtr(testX)
            logging.debug("Getting recommendations")

            trainOrderedItems = MCEvaluator.recommendAtk(U, V, maxItems)
            testOrderedItems = MCEvaluatorCython.recommendAtk(U, V, maxItems, trainX)

            colNames = []
            trainMeasures = []
            testMeasures = []
            for p in self.ps:
                trainMeasures.append(MCEvaluator.precisionAtK(trainOmegaPtr, trainOrderedItems, p))
                testMeasures.append(MCEvaluator.precisionAtK(testOmegaPtr, testOrderedItems, p))

                colNames.append("precision@" + str(p))

            for p in self.ps:
                trainMeasures.append(MCEvaluator.recallAtK(trainOmegaPtr, trainOrderedItems, p))
                testMeasures.append(MCEvaluator.recallAtK(testOmegaPtr, testOrderedItems, p))

                colNames.append("recall@" + str(p))

            for p in self.ps:
                trainMeasures.append(MCEvaluator.f1AtK(trainOmegaPtr, trainOrderedItems, p))
                testMeasures.append(MCEvaluator.f1AtK(testOmegaPtr, testOrderedItems, p))

                colNames.append("f1@" + str(p))

            for p in self.ps:
                trainMeasures.append(MCEvaluator.mrrAtK(trainOmegaPtr, trainOrderedItems, p))
                testMeasures.append(MCEvaluator.mrrAtK(testOmegaPtr, testOrderedItems, p))

                colNames.append("mrr@" + str(p))

            try:
                r = SparseUtilsCython.computeR(U, V, w, self.algoArgs.numRecordAucSamples)
                trainMeasures.append(MCEvaluator.localAUCApprox(trainOmegaPtr, U, V, w, self.algoArgs.numRecordAucSamples, r=r))
                testMeasures.append(MCEvaluator.localAUCApprox(testOmegaPtr, U, V, w, self.algoArgs.numRecordAucSamples, allArray=allOmegaPtr, r=r))

                w = 0.0
                r = SparseUtilsCython.computeR(U, V, w, self.algoArgs.numRecordAucSamples)
                trainMeasures.append(MCEvaluator.localAUCApprox(trainOmegaPtr, U, V, w, self.algoArgs.numRecordAucSamples, r=r))
                testMeasures.append(MCEvaluator.localAUCApprox(testOmegaPtr, U, V, w, self.algoArgs.numRecordAucSamples, allArray=allOmegaPtr, r=r))

                colNames.append("LAUC@" + str(self.algoArgs.u))
                colNames.append("AUC")
            except:
                logging.debug("Could not compute AUCs")
                raise

            trainMeasures = numpy.array(trainMeasures)
            testMeasures = numpy.array(testMeasures)
            metaData = numpy.array(metaData)

            allTrainMeasures.append(trainMeasures)
            allTestMeasures.append(testMeasures)
            allMetaData.append(metaData)

        allTrainMeasures = numpy.array(allTrainMeasures)
        allTestMeasures = numpy.array(allTestMeasures)
        allMetaData = numpy.array(allMetaData)

        meanTrainMeasures = numpy.mean(allTrainMeasures, 0)
        meanTestMeasures = numpy.mean(allTestMeasures, 0)
        meanMetaData = numpy.mean(allMetaData, 0)

        logging.debug("Mean metrics")
        for i, colName in enumerate(colNames):
            logging.debug(colName + ":" + str('%.4f' % meanTrainMeasures[i]) + "/" + str('%.4f' % meanTestMeasures[i]))

        numpy.savez(fileName, meanTrainMeasures, meanTestMeasures, meanMetaData, trainOrderedItems, testOrderedItems)
        logging.debug("Saved file as " + fileName)