Beispiel #1
0
    def localAUCApprox(positiveArray,
                       U,
                       V,
                       w,
                       numAucSamples=50,
                       r=None,
                       allArray=None):
        """
        Compute the estimated local AUC for the score functions UV^T relative to X with 
        quantile w. The AUC is computed using positiveArray which is a tuple (indPtr, colInds)
        assuming allArray is None. If allArray is not None then positive items are chosen 
        from positiveArray and negative ones are chosen to complement allArray.
        """

        if type(positiveArray) != tuple:
            positiveArray = SparseUtils.getOmegaListPtr(positiveArray)

        indPtr, colInds = positiveArray
        U = numpy.ascontiguousarray(U)
        V = numpy.ascontiguousarray(V)

        if r is None:
            r = SparseUtilsCython.computeR(U, V, w, numAucSamples)

        if allArray is None:
            return MCEvaluatorCython.localAUCApprox(indPtr, colInds, indPtr,
                                                    colInds, U, V,
                                                    numAucSamples, r)
        else:
            allIndPtr, allColInd = allArray
            return MCEvaluatorCython.localAUCApprox(indPtr, colInds, allIndPtr,
                                                    allColInd, U, V,
                                                    numAucSamples, r)
Beispiel #2
0
 def f1AtK(positiveArray, orderedItems, k, verbose=False): 
     """
     Return the F1@k measure for each row of the predicted matrix UV.T 
     using real values in positiveArray. positiveArray is a tuple (indPtr, colInds)
     
     :param orderedItems: The ordered items for each user (users are rows, items are cols)  
     
     :param verbose: If true return recall and first k recommendation for each row, otherwise just precisions
     """
     if type(positiveArray) != tuple: 
         positiveArray = SparseUtils.getOmegaListPtr(positiveArray)        
     
     orderedItems = orderedItems[:, 0:k]
     indPtr, colInds = positiveArray
     
     precisions = MCEvaluatorCython.precisionAtk(indPtr, colInds, orderedItems)
     recalls = MCEvaluatorCython.recallAtk(indPtr, colInds, orderedItems)
     
     denominator = precisions+recalls
     denominator += denominator == 0      
     
     f1s = 2*precisions*recalls/denominator
     
     if verbose: 
         return f1s, orderedItems
     else: 
         return f1s.mean()
Beispiel #3
0
    def f1AtK(positiveArray, orderedItems, k, verbose=False):
        """
        Return the F1@k measure for each row of the predicted matrix UV.T 
        using real values in positiveArray. positiveArray is a tuple (indPtr, colInds)
        
        :param orderedItems: The ordered items for each user (users are rows, items are cols)  
        
        :param verbose: If true return recall and first k recommendation for each row, otherwise just precisions
        """
        if type(positiveArray) != tuple:
            positiveArray = SparseUtils.getOmegaListPtr(positiveArray)

        orderedItems = orderedItems[:, 0:k]
        indPtr, colInds = positiveArray

        precisions = MCEvaluatorCython.precisionAtk(indPtr, colInds,
                                                    orderedItems)
        recalls = MCEvaluatorCython.recallAtk(indPtr, colInds, orderedItems)

        denominator = precisions + recalls
        denominator += denominator == 0

        f1s = 2 * precisions * recalls / denominator

        if verbose:
            return f1s, orderedItems
        else:
            return f1s.mean()
    def testStratifiedRecallAtk(self):
        m = 20
        n = 50
        r = 3
        alpha = 1

        X, U, V = SparseUtilsCython.generateSparseBinaryMatrixPL((m, n),
                                                                 r,
                                                                 density=0.2,
                                                                 alpha=alpha,
                                                                 csarray=True)

        itemCounts = numpy.array(X.sum(0) + 1, numpy.int32)

        (indPtr, colInds) = X.nonzeroRowsPtr()

        indPtr = numpy.array(indPtr, numpy.uint32)
        colInds = numpy.array(colInds, numpy.uint32)

        k = 5
        orderedItems = numpy.random.randint(0, n, m * k)
        orderedItems = numpy.reshape(orderedItems, (m, k))
        orderedItems = numpy.array(orderedItems, numpy.int32)
        beta = 0.5

        recalls, denominators = MCEvaluatorCython.stratifiedRecallAtk(
            indPtr, colInds, orderedItems, itemCounts, beta)

        recalls2 = numpy.zeros(m)

        #Now compute recalls from scratch
        for i in range(m):
            omegai = colInds[indPtr[i]:indPtr[i + 1]]

            numerator = 0
            for j in range(k):
                if orderedItems[i, j] in omegai:
                    numerator += 1 / itemCounts[orderedItems[i, j]]**beta

            denominator = 0

            for j in omegai:
                denominator += 1 / itemCounts[j]**beta

            recalls2[i] = numerator / denominator

        nptst.assert_array_equal(recalls, recalls2)

        #Now try to match with normal recall
        itemCounts = numpy.ones(n, numpy.int32)
        recalls, denominators = MCEvaluatorCython.stratifiedRecallAtk(
            indPtr, colInds, orderedItems, itemCounts, beta)
        recalls2 = MCEvaluatorCython.recallAtk(indPtr, colInds, orderedItems)

        nptst.assert_array_equal(recalls, recalls2)
    def testStratifiedRecallAtk(self): 
        m = 20 
        n = 50 
        r = 3     
        alpha = 1
        
        X, U, V = SparseUtilsCython.generateSparseBinaryMatrixPL((m,n), r, density=0.2, alpha=alpha, csarray=True)
        
        itemCounts = numpy.array(X.sum(0)+1, numpy.int32) 
        
        (indPtr, colInds) = X.nonzeroRowsPtr()
        
        indPtr = numpy.array(indPtr, numpy.uint32)
        colInds = numpy.array(colInds, numpy.uint32)
        
        k = 5
        orderedItems = numpy.random.randint(0, n, m*k)
        orderedItems = numpy.reshape(orderedItems, (m, k))
        orderedItems = numpy.array(orderedItems, numpy.int32)        
        beta = 0.5
        
        recalls, denominators = MCEvaluatorCython.stratifiedRecallAtk(indPtr, colInds, orderedItems, itemCounts, beta)
        
        
        recalls2 = numpy.zeros(m)        
            
        #Now compute recalls from scratch 
        for i in range(m):
            omegai = colInds[indPtr[i]:indPtr[i+1]]            
            
            numerator = 0 
            for j in range(k):
                if orderedItems[i, j] in omegai: 
                    numerator += 1/itemCounts[orderedItems[i, j]]**beta
            
            denominator = 0

            for j in omegai: 
                denominator += 1/itemCounts[j]**beta
                
            recalls2[i] = numerator/denominator
            
        nptst.assert_array_equal(recalls, recalls2)
                                
                
        #Now try to match with normal recall 
        itemCounts = numpy.ones(n, numpy.int32)
        recalls, denominators = MCEvaluatorCython.stratifiedRecallAtk(indPtr, colInds, orderedItems, itemCounts, beta)
        recalls2 = MCEvaluatorCython.recallAtk(indPtr, colInds, orderedItems)
        
        nptst.assert_array_equal(recalls, recalls2)
Beispiel #6
0
    def stratifiedRecallAtK(positiveArray,
                            orderedItems,
                            k,
                            itemCounts,
                            beta=0.5,
                            verbose=False):
        """
        Compute the average recall@k score for each row of the predicted matrix UV.T 
        using real values in positiveArray. positiveArray is a tuple (indPtr, colInds)
        
        :param orderedItems: The ordered items for each user (users are rows, items are cols)  
        
        :param verbose: If true return recall and first k recommendation for each row, otherwise just precisions
        """
        if type(positiveArray) != tuple:
            positiveArray = SparseUtils.getOmegaListPtr(positiveArray)

        orderedItems = orderedItems[:, 0:k]
        indPtr, colInds = positiveArray
        recalls, denominators = MCEvaluatorCython.stratifiedRecallAtk(
            indPtr, colInds, orderedItems, itemCounts, beta)

        if verbose:
            return recalls, orderedItems
        else:
            return numpy.average(recalls, weights=denominators)
    def testReciprocalRankAtk(self):
        m = 20
        n = 50
        r = 3
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                r,
                                                                0.5,
                                                                verbose=True,
                                                                csarray=True)

        k = 5
        orderedItems = numpy.random.randint(0, n, m * k)
        orderedItems = numpy.reshape(orderedItems, (m, k))
        orderedItems = numpy.array(orderedItems, numpy.int32)

        (indPtr, colInds) = X.nonzeroRowsPtr()
        indPtr = numpy.array(indPtr, numpy.uint32)
        colInds = numpy.array(colInds, numpy.uint32)
        rrs = MCEvaluatorCython.reciprocalRankAtk(indPtr, colInds,
                                                  orderedItems)

        rrs2 = numpy.zeros(m)
        for i in range(m):
            omegai = colInds[indPtr[i]:indPtr[i + 1]]
            for j in range(k):
                if orderedItems[i, j] in omegai:
                    rrs2[i] = 1 / float(1 + j)
                    break

        nptst.assert_array_equal(rrs, rrs2)

        #Test case where no items are in ranking
        orderedItems = numpy.ones((m, k), numpy.int32) * (n + 1)
        rrs = MCEvaluatorCython.reciprocalRankAtk(indPtr, colInds,
                                                  orderedItems)
        nptst.assert_array_equal(rrs, numpy.zeros(m))

        #Now, make all items rank 2
        for i in range(m):
            omegai = colInds[indPtr[i]:indPtr[i + 1]]
            orderedItems[i, 1] = omegai[0]

        rrs = MCEvaluatorCython.reciprocalRankAtk(indPtr, colInds,
                                                  orderedItems)
        nptst.assert_array_equal(rrs, numpy.ones(m) * 0.5)
    def testRecommendAtk(self):
        m = 20
        n = 50
        r = 3

        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                r,
                                                                0.5,
                                                                verbose=True)

        import sppy
        X = sppy.csarray(X)

        k = 10

        X = numpy.zeros(X.shape)
        omegaList = []
        for i in range(m):
            omegaList.append(numpy.random.permutation(n)[0:5])
            X[i, omegaList[i]] = 1

        X = sppy.csarray(X)

        orderedItems = MCEvaluatorCython.recommendAtk(U, V, k, X)
        orderedItems2 = MCEvaluator.recommendAtk(U, V, k, omegaList=omegaList)

        nptst.assert_array_equal(orderedItems[orderedItems2 != -1],
                                 orderedItems2[orderedItems2 != -1])

        for i in range(m):
            items = numpy.intersect1d(omegaList[i], orderedItems[i, :])
            self.assertEquals(items.shape[0], 0)

            #items = numpy.union1d(omegaList[i], orderedItems[i, :])
            #items = numpy.intersect1d(items, orderedItems2[i, :])
            #nptst.assert_array_equal(items, numpy.sort(orderedItems2[i, :]))

        #Now let's have an all zeros X
        X = sppy.csarray(X.shape)
        orderedItems = MCEvaluatorCython.recommendAtk(U, V, k, X)
        orderedItems2 = MCEvaluator.recommendAtk(U, V, k)

        nptst.assert_array_equal(orderedItems, orderedItems2)
    def testRecommendAtk(self): 
        m = 20 
        n = 50 
        r = 3 

        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), r, 0.5, verbose=True)

        import sppy 
        X = sppy.csarray(X)  
        
        k = 10        
        
        X = numpy.zeros(X.shape)
        omegaList = []
        for i in range(m): 
            omegaList.append(numpy.random.permutation(n)[0:5])
            X[i, omegaList[i]] = 1
            
        X = sppy.csarray(X)            
        
        
        orderedItems = MCEvaluatorCython.recommendAtk(U, V, k, X)
        orderedItems2 = MCEvaluator.recommendAtk(U, V, k, omegaList=omegaList)
                
        nptst.assert_array_equal(orderedItems[orderedItems2!=-1], orderedItems2[orderedItems2!=-1])

        for i in range(m): 
            items = numpy.intersect1d(omegaList[i], orderedItems[i, :])
            self.assertEquals(items.shape[0], 0)
            
            #items = numpy.union1d(omegaList[i], orderedItems[i, :])
            #items = numpy.intersect1d(items, orderedItems2[i, :])
            #nptst.assert_array_equal(items, numpy.sort(orderedItems2[i, :]))
            
        #Now let's have an all zeros X 
        X = sppy.csarray(X.shape)
        orderedItems = MCEvaluatorCython.recommendAtk(U, V, k, X)
        orderedItems2 = MCEvaluator.recommendAtk(U, V, k) 
        
        nptst.assert_array_equal(orderedItems, orderedItems2)
 def testReciprocalRankAtk(self): 
     m = 20 
     n = 50 
     r = 3 
     X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), r, 0.5, verbose=True, csarray=True)
     
     k = 5
     orderedItems = numpy.random.randint(0, n, m*k)
     orderedItems = numpy.reshape(orderedItems, (m, k))
     orderedItems = numpy.array(orderedItems, numpy.int32)
     
     (indPtr, colInds) = X.nonzeroRowsPtr()
     indPtr = numpy.array(indPtr, numpy.uint32)
     colInds = numpy.array(colInds, numpy.uint32)
     rrs = MCEvaluatorCython.reciprocalRankAtk(indPtr, colInds, orderedItems)
     
     rrs2 = numpy.zeros(m)
     for i in range(m): 
         omegai = colInds[indPtr[i]:indPtr[i+1]]
         for j in range(k): 
             if orderedItems[i, j] in omegai: 
                 rrs2[i] = 1/float(1+j)
                 break 
     
     nptst.assert_array_equal(rrs, rrs2)
     
     #Test case where no items are in ranking 
     orderedItems = numpy.ones((m, k), numpy.int32) * (n+1)
     rrs = MCEvaluatorCython.reciprocalRankAtk(indPtr, colInds, orderedItems)
     nptst.assert_array_equal(rrs, numpy.zeros(m))
     
     #Now, make all items rank 2
     for i in range(m): 
         omegai = colInds[indPtr[i]:indPtr[i+1]]
         orderedItems[i, 1] = omegai[0]
     
     rrs = MCEvaluatorCython.reciprocalRankAtk(indPtr, colInds, orderedItems)
     nptst.assert_array_equal(rrs, numpy.ones(m)*0.5)     
Beispiel #11
0
def learnPredictRanking(args):
    """
    A function to train on a training set and test on a test set, for a number 
    of values of rho. 
    """
    learner, trainX, testX, rhos = args
    logging.debug("k=" + str(learner.getK()))
    logging.debug(learner)

    testInds = testX.nonzero()
    trainXIter = []
    testIndList = []

    for rho in rhos:
        trainXIter.append(trainX)
        testIndList.append(testInds)

    trainXIter = iter(trainXIter)

    ZIter = learner.learnModel(trainXIter, iter(rhos))

    metrics = numpy.zeros(rhos.shape[0])

    for j, Z in enumerate(ZIter):
        U, s, V = Z
        U = U * s
        U = numpy.ascontiguousarray(U)
        V = numpy.ascontiguousarray(V)

        testOrderedItems = MCEvaluatorCython.recommendAtk(
            U, V, learner.recommendSize, trainX)

        if learner.metric == "mrr":
            metrics[j] = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX),
                                            testOrderedItems,
                                            learner.recommendSize)
            logging.debug("MRR@" + str(learner.recommendSize) + ": " +
                          str('%.4f' % metrics[j]) + " " + str(learner))
        elif learner.metric == "f1":
            metrics[j] = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX),
                                            testOrderedItems,
                                            learner.recommendSize)
            logging.debug("F1@" + str(learner.recommendSize) + ": " +
                          str('%.4f' % metrics[j]) + " " + str(learner))
        else:
            raise ValueError("Unknown metric " + learner.metric)

        gc.collect()

    return metrics
Beispiel #12
0
 def localAUCApprox(positiveArray, U, V, w, numAucSamples=50, r=None, allArray=None): 
     """
     Compute the estimated local AUC for the score functions UV^T relative to X with 
     quantile w. The AUC is computed using positiveArray which is a tuple (indPtr, colInds)
     assuming allArray is None. If allArray is not None then positive items are chosen 
     from positiveArray and negative ones are chosen to complement allArray.
     """
     
     if type(positiveArray) != tuple: 
         positiveArray = SparseUtils.getOmegaListPtr(positiveArray)          
     
     indPtr, colInds = positiveArray
     U = numpy.ascontiguousarray(U)
     V = numpy.ascontiguousarray(V)        
     
     if r is None: 
         r = SparseUtilsCython.computeR(U, V, w, numAucSamples)
     
     if allArray is None: 
         return MCEvaluatorCython.localAUCApprox(indPtr, colInds, indPtr, colInds, U, V, numAucSamples, r)
     else:
         allIndPtr, allColInd = allArray
         return MCEvaluatorCython.localAUCApprox(indPtr, colInds, allIndPtr, allColInd, U, V, numAucSamples, r)
def learnPredictRanking(args): 
    """
    A function to train on a training set and test on a test set, for a number 
    of values of rho. 
    """
    learner, trainX, testX, rhos = args 
    logging.debug("k=" + str(learner.getK()))
    logging.debug(learner) 
    
    testInds = testX.nonzero()
    trainXIter = []
    testIndList = []    
    
    for rho in rhos: 
        trainXIter.append(trainX)
        testIndList.append(testInds)
    
    trainXIter = iter(trainXIter)

    ZIter = learner.learnModel(trainXIter, iter(rhos))
    
    metrics = numpy.zeros(rhos.shape[0])
    
    for j, Z in enumerate(ZIter): 
        U, s, V = Z
        U = U*s
        U = numpy.ascontiguousarray(U)
        V = numpy.ascontiguousarray(V)
        
        testOrderedItems = MCEvaluatorCython.recommendAtk(U, V, learner.recommendSize, trainX)
        
        if learner.metric == "mrr": 
            metrics[j] = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX), testOrderedItems, learner.recommendSize) 
            logging.debug("MRR@" + str(learner.recommendSize) +  ": " + str('%.4f' % metrics[j]) + " " + str(learner))
        elif learner.metric == "f1": 
            metrics[j] = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX), testOrderedItems, learner.recommendSize) 
            logging.debug("F1@" + str(learner.recommendSize) +  ": " + str('%.4f' % metrics[j]) + " " + str(learner))
        else: 
            raise ValueError("Unknown metric " + learner.metric)
            
        gc.collect()
        
    return metrics 
Beispiel #14
0
 def stratifiedRecallAtK(positiveArray, orderedItems, k, itemCounts, beta=0.5, verbose=False): 
     """
     Compute the average recall@k score for each row of the predicted matrix UV.T 
     using real values in positiveArray. positiveArray is a tuple (indPtr, colInds)
     
     :param orderedItems: The ordered items for each user (users are rows, items are cols)  
     
     :param verbose: If true return recall and first k recommendation for each row, otherwise just precisions
     """
     if type(positiveArray) != tuple: 
         positiveArray = SparseUtils.getOmegaListPtr(positiveArray)        
     
     orderedItems = orderedItems[:, 0:k]
     indPtr, colInds = positiveArray
     recalls, denominators = MCEvaluatorCython.stratifiedRecallAtk(indPtr, colInds, orderedItems, itemCounts, beta)
     
     if verbose: 
         return recalls, orderedItems
     else: 
         return numpy.average(recalls, weights=denominators)
Beispiel #15
0
 def precisionAtK(positiveArray, orderedItems, k, verbose=False): 
     """
     Compute the average precision@k score for each row of the predicted matrix UV.T 
     using real values in positiveArray. positiveArray is a tuple (indPtr, colInds)
     
     :param orderedItems: The ordered items for each user (users are rows, items are cols)       
     
     :param verbose: If true return precision and first k recommendation for each row, otherwise just precisions
     """
     if type(positiveArray) != tuple: 
         positiveArray = SparseUtils.getOmegaListPtr(positiveArray)
     
     orderedItems = orderedItems[:, 0:k]
     indPtr, colInds = positiveArray
     precisions = MCEvaluatorCython.precisionAtk(indPtr, colInds, orderedItems)
     
     if verbose: 
         return precisions, orderedItems
     else: 
         return precisions.mean()
Beispiel #16
0
def computeTestMRR(args):
    """
    A simple function for outputing F1 for a learner in conjunction e.g. with 
    parallel model selection. 
    """
    trainX, testX, learner = args

    learner.learnModel(trainX)

    testOrderedItems = MCEvaluatorCython.recommendAtk(learner.U, learner.V, learner.recommendSize, trainX)
    mrr = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX), testOrderedItems, learner.recommendSize)

    try:
        learnerStr = learner.modelParamsStr()
    except:
        learnerStr = str(learner)

    logging.debug("MRR@" + str(learner.recommendSize) + ": " + str("%.4f" % mrr) + " " + learnerStr)

    return mrr
Beispiel #17
0
    def precisionAtK(positiveArray, orderedItems, k, verbose=False):
        """
        Compute the average precision@k score for each row of the predicted matrix UV.T 
        using real values in positiveArray. positiveArray is a tuple (indPtr, colInds)
        
        :param orderedItems: The ordered items for each user (users are rows, items are cols)       
        
        :param verbose: If true return precision and first k recommendation for each row, otherwise just precisions
        """
        if type(positiveArray) != tuple:
            positiveArray = SparseUtils.getOmegaListPtr(positiveArray)

        orderedItems = orderedItems[:, 0:k]
        indPtr, colInds = positiveArray
        precisions = MCEvaluatorCython.precisionAtk(indPtr, colInds,
                                                    orderedItems)

        if verbose:
            return precisions, orderedItems
        else:
            return precisions.mean()
Beispiel #18
0
def computeTestF1(args):
    """
    A simple function for outputing F1 for a learner in conjunction e.g. with 
    parallel model selection. 
    """
    trainX, testX, learner = args

    learner.learnModel(trainX)

    testOrderedItems = MCEvaluatorCython.recommendAtk(learner.U, learner.V,
                                                      learner.recommendSize,
                                                      trainX)
    f1 = MCEvaluator.f1AtK(SparseUtils.getOmegaListPtr(testX),
                           testOrderedItems, learner.recommendSize)

    try:
        learnerStr = learner.modelParamsStr()
    except:
        learnerStr = str(learner)

    logging.debug("F1@" + str(learner.recommendSize) + ": " +
                  str('%.4f' % f1) + " " + learnerStr)

    return f1
Beispiel #19
0
    def recordResults(self, X, trainX, testX, learner, fileName):
        """
        Save results for a particular recommendation
        """
        if self.algoArgs.skipRecordResults:
            logging.debug("Skipping final evaluation of algorithm")
            return

        allTrainMeasures = []
        allTestMeasures = []
        allMetaData = []

        for i in range(self.algoArgs.recordFolds):
            metaData = []
            w = 1-self.algoArgs.u
            logging.debug("Computing recommendation errors")
            maxItems = self.ps[-1]

            start = time.time()
            if type(learner) == IterativeSoftImpute:
                trainIterator = iter([trainX])
                ZList = learner.learnModel(trainIterator)
                U, s, V = ZList.next()
                U = U*s

                #trainX = sppy.csarray(trainX)
                #testX = sppy.csarray(testX)
                U = numpy.ascontiguousarray(U)
                V = numpy.ascontiguousarray(V)
            else:
                learner.learnModel(trainX)
                U = learner.U
                V = learner.V

            learnTime = time.time()-start
            metaData.append(learnTime)

            logging.debug("Getting all omega")
            allOmegaPtr = SparseUtils.getOmegaListPtr(X)
            logging.debug("Getting train omega")
            trainOmegaPtr = SparseUtils.getOmegaListPtr(trainX)
            logging.debug("Getting test omega")
            testOmegaPtr = SparseUtils.getOmegaListPtr(testX)
            logging.debug("Getting recommendations")

            trainOrderedItems = MCEvaluator.recommendAtk(U, V, maxItems)
            testOrderedItems = MCEvaluatorCython.recommendAtk(U, V, maxItems, trainX)

            colNames = []
            trainMeasures = []
            testMeasures = []
            for p in self.ps:
                trainMeasures.append(MCEvaluator.precisionAtK(trainOmegaPtr, trainOrderedItems, p))
                testMeasures.append(MCEvaluator.precisionAtK(testOmegaPtr, testOrderedItems, p))

                colNames.append("precision@" + str(p))

            for p in self.ps:
                trainMeasures.append(MCEvaluator.recallAtK(trainOmegaPtr, trainOrderedItems, p))
                testMeasures.append(MCEvaluator.recallAtK(testOmegaPtr, testOrderedItems, p))

                colNames.append("recall@" + str(p))

            for p in self.ps:
                trainMeasures.append(MCEvaluator.f1AtK(trainOmegaPtr, trainOrderedItems, p))
                testMeasures.append(MCEvaluator.f1AtK(testOmegaPtr, testOrderedItems, p))

                colNames.append("f1@" + str(p))

            for p in self.ps:
                trainMeasures.append(MCEvaluator.mrrAtK(trainOmegaPtr, trainOrderedItems, p))
                testMeasures.append(MCEvaluator.mrrAtK(testOmegaPtr, testOrderedItems, p))

                colNames.append("mrr@" + str(p))

            try:
                r = SparseUtilsCython.computeR(U, V, w, self.algoArgs.numRecordAucSamples)
                trainMeasures.append(MCEvaluator.localAUCApprox(trainOmegaPtr, U, V, w, self.algoArgs.numRecordAucSamples, r=r))
                testMeasures.append(MCEvaluator.localAUCApprox(testOmegaPtr, U, V, w, self.algoArgs.numRecordAucSamples, allArray=allOmegaPtr, r=r))

                w = 0.0
                r = SparseUtilsCython.computeR(U, V, w, self.algoArgs.numRecordAucSamples)
                trainMeasures.append(MCEvaluator.localAUCApprox(trainOmegaPtr, U, V, w, self.algoArgs.numRecordAucSamples, r=r))
                testMeasures.append(MCEvaluator.localAUCApprox(testOmegaPtr, U, V, w, self.algoArgs.numRecordAucSamples, allArray=allOmegaPtr, r=r))

                colNames.append("LAUC@" + str(self.algoArgs.u))
                colNames.append("AUC")
            except:
                logging.debug("Could not compute AUCs")
                raise

            trainMeasures = numpy.array(trainMeasures)
            testMeasures = numpy.array(testMeasures)
            metaData = numpy.array(metaData)

            allTrainMeasures.append(trainMeasures)
            allTestMeasures.append(testMeasures)
            allMetaData.append(metaData)

        allTrainMeasures = numpy.array(allTrainMeasures)
        allTestMeasures = numpy.array(allTestMeasures)
        allMetaData = numpy.array(allMetaData)

        meanTrainMeasures = numpy.mean(allTrainMeasures, 0)
        meanTestMeasures = numpy.mean(allTestMeasures, 0)
        meanMetaData = numpy.mean(allMetaData, 0)

        logging.debug("Mean metrics")
        for i, colName in enumerate(colNames):
            logging.debug(colName + ":" + str('%.4f' % meanTrainMeasures[i]) + "/" + str('%.4f' % meanTestMeasures[i]))

        numpy.savez(fileName, meanTrainMeasures, meanTestMeasures, meanMetaData, trainOrderedItems, testOrderedItems)
        logging.debug("Saved file as " + fileName)
Beispiel #20
0
    def recordResults(self, muU, muV, trainMeasures, testMeasures, loopInd,
                      rowSamples, indPtr, colInds, testIndPtr, testColInds,
                      allIndPtr, allColInds, gi, gp, gq, trainX, startTime):

        sigmaU = self.getSigma(loopInd, self.alpha, muU.shape[0])
        sigmaV = self.getSigma(loopInd, self.alpha, muU.shape[0])
        r = SparseUtilsCython.computeR(muU, muV, self.w,
                                       self.numRecordAucSamples)
        objArr = self.objectiveApprox((indPtr, colInds),
                                      muU,
                                      muV,
                                      r,
                                      gi,
                                      gp,
                                      gq,
                                      full=True)
        if trainMeasures == None:
            trainMeasures = []
        trainMeasures.append([
            objArr.sum(),
            MCEvaluator.localAUCApprox((indPtr, colInds), muU, muV, self.w,
                                       self.numRecordAucSamples, r),
            time.time() - startTime, loopInd
        ])

        printStr = "iter " + str(loopInd) + ":"
        printStr += " sigmaU=" + str('%.4f' % sigmaU)
        printStr += " sigmaV=" + str('%.4f' % sigmaV)
        printStr += " train: obj~" + str('%.4f' % trainMeasures[-1][0])
        printStr += " LAUC~" + str('%.4f' % trainMeasures[-1][1])

        if testIndPtr is not None:
            testMeasuresRow = []
            testMeasuresRow.append(
                self.objectiveApprox((testIndPtr, testColInds),
                                     muU,
                                     muV,
                                     r,
                                     gi,
                                     gp,
                                     gq,
                                     allArray=(allIndPtr, allColInds)))
            testMeasuresRow.append(
                MCEvaluator.localAUCApprox((testIndPtr, testColInds),
                                           muU,
                                           muV,
                                           self.w,
                                           self.numRecordAucSamples,
                                           r,
                                           allArray=(allIndPtr, allColInds)))
            testOrderedItems = MCEvaluatorCython.recommendAtk(
                muU, muV, numpy.max(self.recommendSize), trainX)

            printStr += " validation: obj~" + str('%.4f' % testMeasuresRow[0])
            printStr += " LAUC~" + str('%.4f' % testMeasuresRow[1])

            try:
                for p in self.recommendSize:
                    f1Array, orderedItems = MCEvaluator.f1AtK(
                        (testIndPtr, testColInds),
                        testOrderedItems,
                        p,
                        verbose=True)
                    testMeasuresRow.append(f1Array[rowSamples].mean())
            except:
                f1Array, orderedItems = MCEvaluator.f1AtK(
                    (testIndPtr, testColInds),
                    testOrderedItems,
                    self.recommendSize,
                    verbose=True)
                testMeasuresRow.append(f1Array[rowSamples].mean())

            printStr += " f1@" + str(self.recommendSize) + "=" + str(
                '%.4f' % testMeasuresRow[-1])

            try:
                for p in self.recommendSize:
                    mrr, orderedItems = MCEvaluator.mrrAtK(
                        (testIndPtr, testColInds),
                        testOrderedItems,
                        p,
                        verbose=True)
                    testMeasuresRow.append(mrr[rowSamples].mean())
            except:
                mrr, orderedItems = MCEvaluator.mrrAtK(
                    (testIndPtr, testColInds),
                    testOrderedItems,
                    self.recommendSize,
                    verbose=True)
                testMeasuresRow.append(mrr[rowSamples].mean())

            printStr += " mrr@" + str(self.recommendSize) + "=" + str(
                '%.4f' % testMeasuresRow[-1])
            testMeasures.append(testMeasuresRow)

        printStr += " ||U||=" + str('%.3f' % numpy.linalg.norm(muU))
        printStr += " ||V||=" + str('%.3f' % numpy.linalg.norm(muV))

        if self.bound:
            trainObj = objArr.sum()

            expectationBound = self.computeBound(trainX, muU, muV, trainObj,
                                                 self.delta)
            printStr += " bound=" + str('%.3f' % expectationBound)
            trainMeasures[-1].append(expectationBound)

        return printStr
os.system('taskset -p 0xffffffff %d' % os.getpid())

logging.debug("Starting training")
logging.debug(maxLocalAuc)

#modelSelectX = trainX[0:100, :]
#maxLocalAuc.learningRateSelect(trainX)
#maxLocalAuc.modelSelect(trainX)
#ProfileUtils.profile('U, V, trainObjs, trainAucs, testObjs, testAucs, iterations, time = maxLocalAuc.learnModel(trainX, testX=testX, verbose=True)', globals(), locals())

U, V, trainMeasures, testMeasures, iterations, time = maxLocalAuc.learnModel(trainX, verbose=True)

p = 10

trainOrderedItems = MCEvaluator.recommendAtk(U, V, p)
testOrderedItems = MCEvaluatorCython.recommendAtk(U, V, p, trainX)

r = SparseUtilsCython.computeR(U, V, maxLocalAuc.w, maxLocalAuc.numRecordAucSamples)
trainObjVec = maxLocalAuc.objectiveApprox(trainOmegaPtr, U, V, r, maxLocalAuc.gi, maxLocalAuc.gp, maxLocalAuc.gq, full=True)
testObjVec = maxLocalAuc.objectiveApprox(testOmegaPtr, U, V, r, maxLocalAuc.gi, maxLocalAuc.gp, maxLocalAuc.gq, allArray=allOmegaPtr, full=True)

itemCounts = numpy.array(X.sum(0)+1, numpy.int32)
beta = 0.5

for p in [1, 3, 5, 10]:
    trainPrecision = MCEvaluator.precisionAtK(trainOmegaPtr, trainOrderedItems, p)
    testPrecision = MCEvaluator.precisionAtK(testOmegaPtr, testOrderedItems, p)
    logging.debug("Train/test precision@" + str(p) + "=" + str(trainPrecision) + "/" + str(testPrecision)) 
    
for p in [1, 3, 5, 10]:
    trainRecall = MCEvaluator.stratifiedRecallAtK(trainOmegaPtr, trainOrderedItems, p, itemCounts, beta)
Beispiel #22
0
    def recordResults(
        self,
        muU,
        muV,
        trainMeasures,
        testMeasures,
        loopInd,
        rowSamples,
        indPtr,
        colInds,
        testIndPtr,
        testColInds,
        allIndPtr,
        allColInds,
        gi,
        gp,
        gq,
        trainX,
        startTime,
    ):

        sigmaU = self.getSigma(loopInd, self.alpha, muU.shape[0])
        sigmaV = self.getSigma(loopInd, self.alpha, muU.shape[0])
        r = SparseUtilsCython.computeR(muU, muV, self.w, self.numRecordAucSamples)
        objArr = self.objectiveApprox((indPtr, colInds), muU, muV, r, gi, gp, gq, full=True)
        if trainMeasures == None:
            trainMeasures = []
        trainMeasures.append(
            [
                objArr.sum(),
                MCEvaluator.localAUCApprox((indPtr, colInds), muU, muV, self.w, self.numRecordAucSamples, r),
                time.time() - startTime,
                loopInd,
            ]
        )

        printStr = "iter " + str(loopInd) + ":"
        printStr += " sigmaU=" + str("%.4f" % sigmaU)
        printStr += " sigmaV=" + str("%.4f" % sigmaV)
        printStr += " train: obj~" + str("%.4f" % trainMeasures[-1][0])
        printStr += " LAUC~" + str("%.4f" % trainMeasures[-1][1])

        if testIndPtr is not None:
            testMeasuresRow = []
            testMeasuresRow.append(
                self.objectiveApprox(
                    (testIndPtr, testColInds), muU, muV, r, gi, gp, gq, allArray=(allIndPtr, allColInds)
                )
            )
            testMeasuresRow.append(
                MCEvaluator.localAUCApprox(
                    (testIndPtr, testColInds),
                    muU,
                    muV,
                    self.w,
                    self.numRecordAucSamples,
                    r,
                    allArray=(allIndPtr, allColInds),
                )
            )
            testOrderedItems = MCEvaluatorCython.recommendAtk(muU, muV, numpy.max(self.recommendSize), trainX)

            printStr += " validation: obj~" + str("%.4f" % testMeasuresRow[0])
            printStr += " LAUC~" + str("%.4f" % testMeasuresRow[1])

            try:
                for p in self.recommendSize:
                    f1Array, orderedItems = MCEvaluator.f1AtK(
                        (testIndPtr, testColInds), testOrderedItems, p, verbose=True
                    )
                    testMeasuresRow.append(f1Array[rowSamples].mean())
            except:
                f1Array, orderedItems = MCEvaluator.f1AtK(
                    (testIndPtr, testColInds), testOrderedItems, self.recommendSize, verbose=True
                )
                testMeasuresRow.append(f1Array[rowSamples].mean())

            printStr += " f1@" + str(self.recommendSize) + "=" + str("%.4f" % testMeasuresRow[-1])

            try:
                for p in self.recommendSize:
                    mrr, orderedItems = MCEvaluator.mrrAtK((testIndPtr, testColInds), testOrderedItems, p, verbose=True)
                    testMeasuresRow.append(mrr[rowSamples].mean())
            except:
                mrr, orderedItems = MCEvaluator.mrrAtK(
                    (testIndPtr, testColInds), testOrderedItems, self.recommendSize, verbose=True
                )
                testMeasuresRow.append(mrr[rowSamples].mean())

            printStr += " mrr@" + str(self.recommendSize) + "=" + str("%.4f" % testMeasuresRow[-1])
            testMeasures.append(testMeasuresRow)

        printStr += " ||U||=" + str("%.3f" % numpy.linalg.norm(muU))
        printStr += " ||V||=" + str("%.3f" % numpy.linalg.norm(muV))

        if self.bound:
            trainObj = objArr.sum()

            expectationBound = self.computeBound(trainX, muU, muV, trainObj, self.delta)
            printStr += " bound=" + str("%.3f" % expectationBound)
            trainMeasures[-1].append(expectationBound)

        return printStr