コード例 #1
0
ファイル: SparseUtilsTest.py プロジェクト: rezaarmand/sandbox
    def testGetOmegaListPtr(self):
        import sppy
        m = 10
        n = 5
        X = scipy.sparse.rand(m, n, 0.1)
        X = X.tocsr()

        indPtr, colInds = SparseUtils.getOmegaListPtr(X)

        for i in range(m):
            omegai = colInds[indPtr[i]:indPtr[i + 1]]
            nptst.assert_array_almost_equal(omegai,
                                            X.toarray()[i, :].nonzero()[0])

        Xsppy = sppy.csarray(X)
        indPtr, colInds = SparseUtils.getOmegaListPtr(Xsppy)

        for i in range(m):
            omegai = colInds[indPtr[i]:indPtr[i + 1]]
            nptst.assert_array_almost_equal(omegai,
                                            X.toarray()[i, :].nonzero()[0])

        #Test a zero array (scipy doesn't work in this case)
        X = sppy.csarray((m, n))

        indPtr, colInds = SparseUtils.getOmegaListPtr(X)

        for i in range(m):
            omegai = colInds[indPtr[i]:indPtr[i + 1]]
コード例 #2
0
    def testLocalAucApprox(self):
        m = 100
        n = 200
        k = 2
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                k,
                                                                csarray=True,
                                                                verbose=True)

        w = 1.0
        localAuc = MCEvaluator.localAUC(X, U, V, w)

        samples = numpy.arange(150, 200, 10)

        for i, sampleSize in enumerate(samples):
            numAucSamples = sampleSize
            localAuc2 = MCEvaluator.localAUCApprox(
                SparseUtils.getOmegaListPtr(X), U, V, w, numAucSamples)
            self.assertAlmostEqual(localAuc2, localAuc, 1)

        #Try smaller w
        w = 0.5
        localAuc = MCEvaluator.localAUC(X, U, V, w)

        samples = numpy.arange(50, 200, 10)

        for i, sampleSize in enumerate(samples):
            numAucSamples = sampleSize
            localAuc2 = MCEvaluator.localAUCApprox(
                SparseUtils.getOmegaListPtr(X), U, V, w, numAucSamples)

            self.assertAlmostEqual(localAuc2, localAuc, 1)
コード例 #3
0
ファイル: MCEvaluatorTest.py プロジェクト: kentwang/sandbox
    def testLocalAucApprox(self):
        m = 100
        n = 200
        k = 2
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True, verbose=True)

        w = 1.0
        localAuc = MCEvaluator.localAUC(X, U, V, w)

        samples = numpy.arange(150, 200, 10)

        for i, sampleSize in enumerate(samples):
            numAucSamples = sampleSize
            localAuc2 = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X), U, V, w, numAucSamples)
            self.assertAlmostEqual(localAuc2, localAuc, 1)

        # Try smaller w
        w = 0.5
        localAuc = MCEvaluator.localAUC(X, U, V, w)

        samples = numpy.arange(50, 200, 10)

        for i, sampleSize in enumerate(samples):
            numAucSamples = sampleSize
            localAuc2 = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X), U, V, w, numAucSamples)

            self.assertAlmostEqual(localAuc2, localAuc, 1)
コード例 #4
0
ファイル: SparseUtilsTest.py プロジェクト: charanpald/sandbox
    def testGetOmegaListPtr(self): 
        import sppy 
        m = 10 
        n = 5
        X = scipy.sparse.rand(m, n, 0.1)
        X = X.tocsr()
        
        indPtr, colInds = SparseUtils.getOmegaListPtr(X)

        for i in range(m): 
            omegai = colInds[indPtr[i]:indPtr[i+1]]
            nptst.assert_array_almost_equal(omegai, X.toarray()[i, :].nonzero()[0])
        
        Xsppy = sppy.csarray(X)
        indPtr, colInds  = SparseUtils.getOmegaListPtr(Xsppy)
        
        for i in range(m):
            omegai = colInds[indPtr[i]:indPtr[i+1]]
            nptst.assert_array_almost_equal(omegai, X.toarray()[i, :].nonzero()[0])
        
        #Test a zero array (scipy doesn't work in this case)
        X = sppy.csarray((m,n))
        
        indPtr, colInds = SparseUtils.getOmegaListPtr(X)
   
        for i in range(m): 
            omegai = colInds[indPtr[i]:indPtr[i+1]]
コード例 #5
0
def learnPredictRanking(args):
    """
    A function to train on a training set and test on a test set, for a number 
    of values of rho. 
    """
    learner, trainX, testX, rhos = args
    logging.debug("k=" + str(learner.getK()))
    logging.debug(learner)

    testInds = testX.nonzero()
    trainXIter = []
    testIndList = []

    for rho in rhos:
        trainXIter.append(trainX)
        testIndList.append(testInds)

    trainXIter = iter(trainXIter)

    ZIter = learner.learnModel(trainXIter, iter(rhos))

    metrics = numpy.zeros(rhos.shape[0])

    for j, Z in enumerate(ZIter):
        U, s, V = Z
        U = U * s
        U = numpy.ascontiguousarray(U)
        V = numpy.ascontiguousarray(V)

        testOrderedItems = MCEvaluatorCython.recommendAtk(
            U, V, learner.recommendSize, trainX)

        if learner.metric == "mrr":
            metrics[j] = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX),
                                            testOrderedItems,
                                            learner.recommendSize)
            logging.debug("MRR@" + str(learner.recommendSize) + ": " +
                          str('%.4f' % metrics[j]) + " " + str(learner))
        elif learner.metric == "f1":
            metrics[j] = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX),
                                            testOrderedItems,
                                            learner.recommendSize)
            logging.debug("F1@" + str(learner.recommendSize) + ": " +
                          str('%.4f' % metrics[j]) + " " + str(learner))
        else:
            raise ValueError("Unknown metric " + learner.metric)

        gc.collect()

    return metrics
コード例 #6
0
ファイル: MCEvaluator.py プロジェクト: rezaarmand/sandbox
    def localAUCApprox(positiveArray,
                       U,
                       V,
                       w,
                       numAucSamples=50,
                       r=None,
                       allArray=None):
        """
        Compute the estimated local AUC for the score functions UV^T relative to X with 
        quantile w. The AUC is computed using positiveArray which is a tuple (indPtr, colInds)
        assuming allArray is None. If allArray is not None then positive items are chosen 
        from positiveArray and negative ones are chosen to complement allArray.
        """

        if type(positiveArray) != tuple:
            positiveArray = SparseUtils.getOmegaListPtr(positiveArray)

        indPtr, colInds = positiveArray
        U = numpy.ascontiguousarray(U)
        V = numpy.ascontiguousarray(V)

        if r is None:
            r = SparseUtilsCython.computeR(U, V, w, numAucSamples)

        if allArray is None:
            return MCEvaluatorCython.localAUCApprox(indPtr, colInds, indPtr,
                                                    colInds, U, V,
                                                    numAucSamples, r)
        else:
            allIndPtr, allColInd = allArray
            return MCEvaluatorCython.localAUCApprox(indPtr, colInds, allIndPtr,
                                                    allColInd, U, V,
                                                    numAucSamples, r)
コード例 #7
0
ファイル: MCEvaluator.py プロジェクト: rezaarmand/sandbox
    def f1AtK(positiveArray, orderedItems, k, verbose=False):
        """
        Return the F1@k measure for each row of the predicted matrix UV.T 
        using real values in positiveArray. positiveArray is a tuple (indPtr, colInds)
        
        :param orderedItems: The ordered items for each user (users are rows, items are cols)  
        
        :param verbose: If true return recall and first k recommendation for each row, otherwise just precisions
        """
        if type(positiveArray) != tuple:
            positiveArray = SparseUtils.getOmegaListPtr(positiveArray)

        orderedItems = orderedItems[:, 0:k]
        indPtr, colInds = positiveArray

        precisions = MCEvaluatorCython.precisionAtk(indPtr, colInds,
                                                    orderedItems)
        recalls = MCEvaluatorCython.recallAtk(indPtr, colInds, orderedItems)

        denominator = precisions + recalls
        denominator += denominator == 0

        f1s = 2 * precisions * recalls / denominator

        if verbose:
            return f1s, orderedItems
        else:
            return f1s.mean()
コード例 #8
0
ファイル: MCEvaluator.py プロジェクト: rezaarmand/sandbox
    def stratifiedRecallAtK(positiveArray,
                            orderedItems,
                            k,
                            itemCounts,
                            beta=0.5,
                            verbose=False):
        """
        Compute the average recall@k score for each row of the predicted matrix UV.T 
        using real values in positiveArray. positiveArray is a tuple (indPtr, colInds)
        
        :param orderedItems: The ordered items for each user (users are rows, items are cols)  
        
        :param verbose: If true return recall and first k recommendation for each row, otherwise just precisions
        """
        if type(positiveArray) != tuple:
            positiveArray = SparseUtils.getOmegaListPtr(positiveArray)

        orderedItems = orderedItems[:, 0:k]
        indPtr, colInds = positiveArray
        recalls, denominators = MCEvaluatorCython.stratifiedRecallAtk(
            indPtr, colInds, orderedItems, itemCounts, beta)

        if verbose:
            return recalls, orderedItems
        else:
            return numpy.average(recalls, weights=denominators)
コード例 #9
0
    def profileDerivativeUiApprox(self):
        k = 10
        U = numpy.random.rand(self.m, k)
        V = numpy.random.rand(self.n, k)

        indPtr, colInds = SparseUtils.getOmegaListPtr(self.X)

        gp = numpy.random.rand(self.n)
        gp /= gp.sum()
        gq = numpy.random.rand(self.n)
        gq /= gq.sum()

        j = 3
        numRowSamples = 100
        numAucSamples = 10

        permutedRowInds = numpy.array(numpy.random.permutation(self.m), numpy.uint32)
        permutedColInds = numpy.array(numpy.random.permutation(self.n), numpy.uint32)

        maxLocalAuc = MaxLocalAUC(k, w=0.9)
        normGp, normGq = maxLocalAuc.computeNormGpq(indPtr, colInds, gp, gq, self.m)

        lmbda = 0.001
        normalise = True

        learner = MaxLocalAUCCython()

        def run():
            numRuns = 10
            for j in range(numRuns):
                for i in range(self.m):
                    learner.derivativeUiApprox(indPtr, colInds, U, V, gp, gq, permutedColInds, i)

        ProfileUtils.profile("run()", globals(), locals())
コード例 #10
0
    def profileObjective(self):

        k = 10
        U = numpy.random.rand(self.m, k)
        V = numpy.random.rand(self.n, k)

        indPtr, colInds = SparseUtils.getOmegaListPtr(self.X)
        colIndsProbabilities = numpy.ones(colInds.shape[0])

        for i in range(self.m):
            colIndsProbabilities[indPtr[i] : indPtr[i + 1]] /= colIndsProbabilities[indPtr[i] : indPtr[i + 1]].sum()
            colIndsProbabilities[indPtr[i] : indPtr[i + 1]] = numpy.cumsum(
                colIndsProbabilities[indPtr[i] : indPtr[i + 1]]
            )

        r = numpy.zeros(self.m)
        lmbda = 0.001
        rho = 1.0
        numAucSamples = 100

        def run():
            numRuns = 10
            for i in range(numRuns):
                objectiveApprox(indPtr, colInds, indPtr, colInds, U, V, r, numAucSamples, lmbda, rho, False)

        ProfileUtils.profile("run()", globals(), locals())
コード例 #11
0
    def profileObjective(self):

        k = 10
        U = numpy.random.rand(self.m, k)
        V = numpy.random.rand(self.n, k)

        indPtr, colInds = SparseUtils.getOmegaListPtr(self.X)
        colIndsProbabilities = numpy.ones(colInds.shape[0])

        for i in range(self.m):
            colIndsProbabilities[indPtr[i]:indPtr[
                i + 1]] /= colIndsProbabilities[indPtr[i]:indPtr[i + 1]].sum()
            colIndsProbabilities[indPtr[i]:indPtr[i + 1]] = numpy.cumsum(
                colIndsProbabilities[indPtr[i]:indPtr[i + 1]])

        r = numpy.zeros(self.m)
        lmbda = 0.001
        rho = 1.0
        numAucSamples = 100

        def run():
            numRuns = 10
            for i in range(numRuns):
                objectiveApprox(indPtr, colInds, indPtr, colInds, U, V, r,
                                numAucSamples, lmbda, rho, False)

        ProfileUtils.profile('run()', globals(), locals())
コード例 #12
0
ファイル: MCEvaluator.py プロジェクト: charanpald/sandbox
 def f1AtK(positiveArray, orderedItems, k, verbose=False): 
     """
     Return the F1@k measure for each row of the predicted matrix UV.T 
     using real values in positiveArray. positiveArray is a tuple (indPtr, colInds)
     
     :param orderedItems: The ordered items for each user (users are rows, items are cols)  
     
     :param verbose: If true return recall and first k recommendation for each row, otherwise just precisions
     """
     if type(positiveArray) != tuple: 
         positiveArray = SparseUtils.getOmegaListPtr(positiveArray)        
     
     orderedItems = orderedItems[:, 0:k]
     indPtr, colInds = positiveArray
     
     precisions = MCEvaluatorCython.precisionAtk(indPtr, colInds, orderedItems)
     recalls = MCEvaluatorCython.recallAtk(indPtr, colInds, orderedItems)
     
     denominator = precisions+recalls
     denominator += denominator == 0      
     
     f1s = 2*precisions*recalls/denominator
     
     if verbose: 
         return f1s, orderedItems
     else: 
         return f1s.mean()
コード例 #13
0
    def testLocalAucApprox2(self):
        m = 100
        n = 200
        k = 5
        numInds = 100
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                k,
                                                                csarray=True,
                                                                verbose=True)

        r = numpy.ones(m) * -10

        w = 0.5
        localAuc = MCEvaluator.localAUC(X, U, V, w)

        samples = numpy.arange(50, 200, 10)

        for i, sampleSize in enumerate(samples):
            localAuc2 = MCEvaluator.localAUCApprox(
                SparseUtils.getOmegaListPtr(X), U, V, w, sampleSize)

            self.assertAlmostEqual(localAuc2, localAuc, 1)

        #Test more accurately
        sampleSize = 1000
        localAuc2 = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X),
                                               U, V, w, sampleSize)
        self.assertAlmostEqual(localAuc2, localAuc, 2)

        #Now set a high r
        Z = U.dot(V.T)
        localAuc = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X),
                                              U, V, w, sampleSize)

        for i, sampleSize in enumerate(samples):
            localAuc2 = MCEvaluator.localAUCApprox(
                SparseUtils.getOmegaListPtr(X), U, V, w, sampleSize)

            self.assertAlmostEqual(localAuc2, localAuc, 1)

        #Test more accurately
        sampleSize = 1000
        localAuc2 = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X),
                                               U, V, w, sampleSize)
        self.assertAlmostEqual(localAuc2, localAuc, 2)
コード例 #14
0
ファイル: MaxAUCTanhTest.py プロジェクト: rezaarmand/sandbox
    def testScale(self):
        """
        Look at the scales of the unnormalised gradients. 
        """

        m = 100
        n = 400
        k = 3
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)

        w = 0.1
        eps = 0.001
        learner = MaxAUCTanh(k, w)
        learner.normalise = False
        learner.lmbdaU = 1.0
        learner.lmbdaV = 1.0
        learner.rho = 1.0
        learner.numAucSamples = 100

        indPtr, colInds = SparseUtils.getOmegaListPtr(X)
        r = numpy.random.rand(m)

        U = numpy.random.rand(X.shape[0], k)
        V = numpy.random.rand(X.shape[1], k)

        gi = numpy.random.rand(m)
        gi /= gi.sum()
        gp = numpy.random.rand(n)
        gp /= gp.sum()
        gq = numpy.random.rand(n)
        gq /= gq.sum()

        permutedRowInds = numpy.array(numpy.random.permutation(m),
                                      numpy.uint32)
        permutedColInds = numpy.array(numpy.random.permutation(n),
                                      numpy.uint32)

        maxLocalAuc = MaxLocalAUC(k, w)
        normGp, normGq = maxLocalAuc.computeNormGpq(indPtr, colInds, gp, gq, m)

        normDui = 0
        for i in range(m):
            du = learner.derivativeUi(indPtr, colInds, U, V, r, gi, gp, gq, i)
            normDui += numpy.linalg.norm(du)

        normDui /= float(m)
        print(normDui)

        normDvi = 0

        for i in range(n):
            dv = learner.derivativeVi(indPtr, colInds, U, V, r, gi, gp, gq, i)
            normDvi += numpy.linalg.norm(dv)

        normDvi /= float(n)
        print(normDvi)
コード例 #15
0
def learnPredictRanking(args): 
    """
    A function to train on a training set and test on a test set, for a number 
    of values of rho. 
    """
    learner, trainX, testX, rhos = args 
    logging.debug("k=" + str(learner.getK()))
    logging.debug(learner) 
    
    testInds = testX.nonzero()
    trainXIter = []
    testIndList = []    
    
    for rho in rhos: 
        trainXIter.append(trainX)
        testIndList.append(testInds)
    
    trainXIter = iter(trainXIter)

    ZIter = learner.learnModel(trainXIter, iter(rhos))
    
    metrics = numpy.zeros(rhos.shape[0])
    
    for j, Z in enumerate(ZIter): 
        U, s, V = Z
        U = U*s
        U = numpy.ascontiguousarray(U)
        V = numpy.ascontiguousarray(V)
        
        testOrderedItems = MCEvaluatorCython.recommendAtk(U, V, learner.recommendSize, trainX)
        
        if learner.metric == "mrr": 
            metrics[j] = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX), testOrderedItems, learner.recommendSize) 
            logging.debug("MRR@" + str(learner.recommendSize) +  ": " + str('%.4f' % metrics[j]) + " " + str(learner))
        elif learner.metric == "f1": 
            metrics[j] = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX), testOrderedItems, learner.recommendSize) 
            logging.debug("F1@" + str(learner.recommendSize) +  ": " + str('%.4f' % metrics[j]) + " " + str(learner))
        else: 
            raise ValueError("Unknown metric " + learner.metric)
            
        gc.collect()
        
    return metrics 
コード例 #16
0
ファイル: MaxAUCTanhTest.py プロジェクト: charanpald/sandbox
    def testScale(self): 
        """
        Look at the scales of the unnormalised gradients. 
        """        
        
        m = 100 
        n = 400 
        k = 3 
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)
        
        w = 0.1
        eps = 0.001
        learner = MaxAUCTanh(k, w)
        learner.normalise = False
        learner.lmbdaU = 1.0
        learner.lmbdaV = 1.0
        learner.rho = 1.0
        learner.numAucSamples = 100
        
        indPtr, colInds = SparseUtils.getOmegaListPtr(X)
        r = numpy.random.rand(m)

        U = numpy.random.rand(X.shape[0], k)
        V = numpy.random.rand(X.shape[1], k)
        
        gi = numpy.random.rand(m)
        gi /= gi.sum()        
        gp = numpy.random.rand(n)
        gp /= gp.sum()        
        gq = numpy.random.rand(n)
        gq /= gq.sum()     
        
        permutedRowInds = numpy.array(numpy.random.permutation(m), numpy.uint32)
        permutedColInds = numpy.array(numpy.random.permutation(n), numpy.uint32)
        
        maxLocalAuc = MaxLocalAUC(k, w)
        normGp, normGq = maxLocalAuc.computeNormGpq(indPtr, colInds, gp, gq, m)
        
        normDui = 0
        for i in range(m): 
            du = learner.derivativeUi(indPtr, colInds, U, V, r, gi, gp, gq, i) 
            normDui += numpy.linalg.norm(du)
            
        normDui /= float(m)
        print(normDui)        
        
        normDvi = 0         
        
        for i in range(n): 
            dv = learner.derivativeVi(indPtr, colInds, U, V, r, gi, gp, gq, i) 
            normDvi += numpy.linalg.norm(dv)
            
        normDvi /= float(n)
        print(normDvi)
コード例 #17
0
    def profileRestrictOmega(self):
        X, U, V = DatasetUtils.syntheticDataset1(u=0.01, m=1000, n=2000)
        m, n = X.shape
        indPtr, colInds = SparseUtils.getOmegaListPtr(X)

        colIndsSubset = numpy.random.choice(n, 500, replace=False)

        def run():
            for i in range(100):
                newIndPtr, newColInds = restrictOmega(indPtr, colInds,
                                                      colIndsSubset)

        ProfileUtils.profile('run()', globals(), locals())
コード例 #18
0
ファイル: MCEvaluatorTest.py プロジェクト: kentwang/sandbox
    def testLocalAucApprox2(self):
        m = 100
        n = 200
        k = 5
        numInds = 100
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True, verbose=True)

        r = numpy.ones(m) * -10

        w = 0.5
        localAuc = MCEvaluator.localAUC(X, U, V, w)

        samples = numpy.arange(50, 200, 10)

        for i, sampleSize in enumerate(samples):
            localAuc2 = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X), U, V, w, sampleSize)

            self.assertAlmostEqual(localAuc2, localAuc, 1)

        # Test more accurately
        sampleSize = 1000
        localAuc2 = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X), U, V, w, sampleSize)
        self.assertAlmostEqual(localAuc2, localAuc, 2)

        # Now set a high r
        Z = U.dot(V.T)
        localAuc = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X), U, V, w, sampleSize)

        for i, sampleSize in enumerate(samples):
            localAuc2 = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X), U, V, w, sampleSize)

            self.assertAlmostEqual(localAuc2, localAuc, 1)

        # Test more accurately
        sampleSize = 1000
        localAuc2 = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X), U, V, w, sampleSize)
        self.assertAlmostEqual(localAuc2, localAuc, 2)
コード例 #19
0
ファイル: RecommenderUtils.py プロジェクト: kentwang/sandbox
def computeTestMRR(args):
    """
    A simple function for outputing F1 for a learner in conjunction e.g. with 
    parallel model selection. 
    """
    trainX, testX, learner = args

    learner.learnModel(trainX)

    testOrderedItems = MCEvaluatorCython.recommendAtk(learner.U, learner.V, learner.recommendSize, trainX)
    mrr = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX), testOrderedItems, learner.recommendSize)

    try:
        learnerStr = learner.modelParamsStr()
    except:
        learnerStr = str(learner)

    logging.debug("MRR@" + str(learner.recommendSize) + ": " + str("%.4f" % mrr) + " " + learnerStr)

    return mrr
コード例 #20
0
ファイル: MCEvaluator.py プロジェクト: charanpald/sandbox
 def stratifiedRecallAtK(positiveArray, orderedItems, k, itemCounts, beta=0.5, verbose=False): 
     """
     Compute the average recall@k score for each row of the predicted matrix UV.T 
     using real values in positiveArray. positiveArray is a tuple (indPtr, colInds)
     
     :param orderedItems: The ordered items for each user (users are rows, items are cols)  
     
     :param verbose: If true return recall and first k recommendation for each row, otherwise just precisions
     """
     if type(positiveArray) != tuple: 
         positiveArray = SparseUtils.getOmegaListPtr(positiveArray)        
     
     orderedItems = orderedItems[:, 0:k]
     indPtr, colInds = positiveArray
     recalls, denominators = MCEvaluatorCython.stratifiedRecallAtk(indPtr, colInds, orderedItems, itemCounts, beta)
     
     if verbose: 
         return recalls, orderedItems
     else: 
         return numpy.average(recalls, weights=denominators)
コード例 #21
0
ファイル: MCEvaluator.py プロジェクト: charanpald/sandbox
 def precisionAtK(positiveArray, orderedItems, k, verbose=False): 
     """
     Compute the average precision@k score for each row of the predicted matrix UV.T 
     using real values in positiveArray. positiveArray is a tuple (indPtr, colInds)
     
     :param orderedItems: The ordered items for each user (users are rows, items are cols)       
     
     :param verbose: If true return precision and first k recommendation for each row, otherwise just precisions
     """
     if type(positiveArray) != tuple: 
         positiveArray = SparseUtils.getOmegaListPtr(positiveArray)
     
     orderedItems = orderedItems[:, 0:k]
     indPtr, colInds = positiveArray
     precisions = MCEvaluatorCython.precisionAtk(indPtr, colInds, orderedItems)
     
     if verbose: 
         return precisions, orderedItems
     else: 
         return precisions.mean()
コード例 #22
0
ファイル: MCEvaluator.py プロジェクト: rezaarmand/sandbox
    def precisionAtK(positiveArray, orderedItems, k, verbose=False):
        """
        Compute the average precision@k score for each row of the predicted matrix UV.T 
        using real values in positiveArray. positiveArray is a tuple (indPtr, colInds)
        
        :param orderedItems: The ordered items for each user (users are rows, items are cols)       
        
        :param verbose: If true return precision and first k recommendation for each row, otherwise just precisions
        """
        if type(positiveArray) != tuple:
            positiveArray = SparseUtils.getOmegaListPtr(positiveArray)

        orderedItems = orderedItems[:, 0:k]
        indPtr, colInds = positiveArray
        precisions = MCEvaluatorCython.precisionAtk(indPtr, colInds,
                                                    orderedItems)

        if verbose:
            return precisions, orderedItems
        else:
            return precisions.mean()
コード例 #23
0
    def profileDerivativeVjApprox(self):
        k = 10
        U = numpy.random.rand(self.m, k)
        V = numpy.random.rand(self.n, k)

        indPtr, colInds = SparseUtils.getOmegaListPtr(self.X)

        gp = numpy.random.rand(self.n)
        gp /= gp.sum()
        gq = numpy.random.rand(self.n)
        gq /= gq.sum()

        j = 3
        numRowSamples = 100
        numAucSamples = 10

        permutedRowInds = numpy.array(numpy.random.permutation(self.m),
                                      numpy.uint32)
        permutedColInds = numpy.array(numpy.random.permutation(self.n),
                                      numpy.uint32)

        maxLocalAuc = MaxLocalAUC(k, w=0.9)
        normGp, normGq = maxLocalAuc.computeNormGpq(indPtr, colInds, gp, gq,
                                                    self.m)

        lmbda = 0.001
        normalise = True

        learner = MaxLocalAUCCython()

        def run():
            numRuns = 1
            for i in range(numRuns):
                for j in range(self.n):
                    learner.derivativeViApprox(indPtr, colInds, U, V, gp, gq,
                                               normGp, normGq, permutedRowInds,
                                               permutedColInds, i)

        ProfileUtils.profile('run()', globals(), locals())
コード例 #24
0
ファイル: MCEvaluator.py プロジェクト: rezaarmand/sandbox
    def localAUC(positiveArray, U, V, w, numRowInds=None):
        """
        Compute the local AUC for the score functions UV^T relative to X with 
        quantile w. 
        """
        if numRowInds == None:
            numRowInds = V.shape[0]

        if type(positiveArray) != tuple:
            positiveArray = SparseUtils.getOmegaListPtr(positiveArray)

        #For now let's compute the full matrix
        Z = U.dot(V.T)

        r = SparseUtilsCython.computeR(U, V, w, numRowInds)

        localAuc = numpy.zeros(U.shape[0])
        allInds = numpy.arange(V.shape[0])
        indPtr, colInds = positiveArray

        for i in range(U.shape[0]):
            omegai = colInds[indPtr[i]:indPtr[i + 1]]
            omegaBari = numpy.setdiff1d(allInds, omegai, assume_unique=True)

            if omegai.shape[0] * omegaBari.shape[0] != 0:
                partialAuc = 0

                for p in omegai:
                    for q in omegaBari:
                        if Z[i, p] > Z[i, q] and Z[i, p] > r[i]:
                            partialAuc += 1

                localAuc[i] = partialAuc / float(
                    omegai.shape[0] * omegaBari.shape[0])

        localAuc = localAuc.mean()

        return localAuc
コード例 #25
0
    def testRestrictOmega(self):
        m = 50
        n = 100
        k = 5

        u = 0.5
        w = 1 - u
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True)

        indPtr, colInds = SparseUtils.getOmegaListPtr(X)
        runs = 100

        for i in range(runs):
            colSubset = numpy.random.choice(n, 20, replace=False)

            newIndPtr, newColInds = restrictOmega(indPtr, colInds, colSubset)

            for i in range(m):
                omegai = colInds[indPtr[i]:indPtr[i + 1]]
                omegai2 = newColInds[newIndPtr[i]:newIndPtr[i + 1]]

                a = numpy.setdiff1d(omegai, omegai2)
                self.assertEquals(numpy.intersect1d(a, colSubset).shape[0], 0)
コード例 #26
0
ファイル: MCEvaluator.py プロジェクト: charanpald/sandbox
 def localAUCApprox(positiveArray, U, V, w, numAucSamples=50, r=None, allArray=None): 
     """
     Compute the estimated local AUC for the score functions UV^T relative to X with 
     quantile w. The AUC is computed using positiveArray which is a tuple (indPtr, colInds)
     assuming allArray is None. If allArray is not None then positive items are chosen 
     from positiveArray and negative ones are chosen to complement allArray.
     """
     
     if type(positiveArray) != tuple: 
         positiveArray = SparseUtils.getOmegaListPtr(positiveArray)          
     
     indPtr, colInds = positiveArray
     U = numpy.ascontiguousarray(U)
     V = numpy.ascontiguousarray(V)        
     
     if r is None: 
         r = SparseUtilsCython.computeR(U, V, w, numAucSamples)
     
     if allArray is None: 
         return MCEvaluatorCython.localAUCApprox(indPtr, colInds, indPtr, colInds, U, V, numAucSamples, r)
     else:
         allIndPtr, allColInd = allArray
         return MCEvaluatorCython.localAUCApprox(indPtr, colInds, allIndPtr, allColInd, U, V, numAucSamples, r)
コード例 #27
0
ファイル: MaxLocalAUCTest.py プロジェクト: charanpald/sandbox
 def testRestrictOmega(self):
     m = 50 
     n = 100 
     k = 5 
     
     u = 0.5
     w = 1-u
     X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True)
     
     indPtr, colInds = SparseUtils.getOmegaListPtr(X)
     runs = 100 
     
     for i in range(runs): 
         colSubset = numpy.random.choice(n, 20, replace=False)
 
         newIndPtr, newColInds = restrictOmega(indPtr, colInds, colSubset)
         
         for i in range(m): 
             omegai = colInds[indPtr[i]:indPtr[i+1]]
             omegai2 = newColInds[newIndPtr[i]:newIndPtr[i+1]]
             
             a = numpy.setdiff1d(omegai, omegai2)
             self.assertEquals(numpy.intersect1d(a, colSubset).shape[0], 0)
コード例 #28
0
ファイル: MCEvaluator.py プロジェクト: charanpald/sandbox
 def localAUC(positiveArray, U, V, w, numRowInds=None): 
     """
     Compute the local AUC for the score functions UV^T relative to X with 
     quantile w. 
     """
     if numRowInds == None: 
         numRowInds = V.shape[0]
         
     if type(positiveArray) != tuple: 
         positiveArray = SparseUtils.getOmegaListPtr(positiveArray)  
     
     #For now let's compute the full matrix 
     Z = U.dot(V.T)
     
     r = SparseUtilsCython.computeR(U, V, w, numRowInds)
     
     localAuc = numpy.zeros(U.shape[0]) 
     allInds = numpy.arange(V.shape[0])
     indPtr, colInds = positiveArray
     
     for i in range(U.shape[0]): 
         omegai = colInds[indPtr[i]:indPtr[i+1]]
         omegaBari = numpy.setdiff1d(allInds, omegai, assume_unique=True)
         
         if omegai.shape[0] * omegaBari.shape[0] != 0: 
             partialAuc = 0                
             
             for p in omegai: 
                 for q in omegaBari: 
                     if Z[i, p] > Z[i, q] and Z[i, p] > r[i]: 
                         partialAuc += 1 
                         
             localAuc[i] = partialAuc/float(omegai.shape[0] * omegaBari.shape[0])
     
     localAuc = localAuc.mean()        
     
     return localAuc
コード例 #29
0
def computeTestF1(args):
    """
    A simple function for outputing F1 for a learner in conjunction e.g. with 
    parallel model selection. 
    """
    trainX, testX, learner = args

    learner.learnModel(trainX)

    testOrderedItems = MCEvaluatorCython.recommendAtk(learner.U, learner.V,
                                                      learner.recommendSize,
                                                      trainX)
    f1 = MCEvaluator.f1AtK(SparseUtils.getOmegaListPtr(testX),
                           testOrderedItems, learner.recommendSize)

    try:
        learnerStr = learner.modelParamsStr()
    except:
        learnerStr = str(learner)

    logging.debug("F1@" + str(learner.recommendSize) + ": " +
                  str('%.4f' % f1) + " " + learnerStr)

    return f1
コード例 #30
0
    def testDerivativeUiApprox(self): 
        """
        We'll test the case in which we apprormate using a large number of samples 
        for the AUC and see if we get close to the exact derivative 
        """
        m = 20 
        n = 30 
        k = 3 
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)
        
        w = 0.1
        learner = MaxAUCSigmoid(k, w)
        learner.normalise = False
        learner.lmbdaU = 0
        learner.lmbdaV = 0
        learner.rho = 1.0
        learner.numAucSamples = 100

        U = numpy.random.rand(X.shape[0], k)
        V = numpy.random.rand(X.shape[1], k)

        gp = numpy.random.rand(n)
        gp /= gp.sum()        
        gq = numpy.random.rand(n)
        gq /= gq.sum()     

        
        numRuns = 200 
        numTests = 5
        
        indPtr, colInds = SparseUtils.getOmegaListPtr(X)
        permutedColInds = numpy.arange(n, dtype=numpy.uint32)

        #Test with small number of AUC samples, but normalise 
        learner.numAucSamples = n
        numRuns = 1000
        
        for i in numpy.random.permutation(m)[0:numTests]:  
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)            
            
            du1 = numpy.zeros(k)
            for j in range(numRuns): 
                du1 += learner.derivativeUiApprox(indPtr, colInds, U, V, gp, gq, permutedColInds, i)
            du1 /= numRuns
            du2 = learner.derivativeUi(indPtr, colInds, U, V, gp, gq, i) 
            #print(du1, du2)
            print(du1/numpy.linalg.norm(du1), du2/numpy.linalg.norm(du2))
            #print(numpy.linalg.norm(du1 - du2)/numpy.linalg.norm(du1))
            self.assertTrue(numpy.linalg.norm(du1 - du2)/numpy.linalg.norm(du1) < 0.5)

        #Let's compare against using the exact derivative 
        for i in numpy.random.permutation(m)[0:numTests]:  
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)            
            
            du1 = numpy.zeros(k)
            for j in range(numRuns): 
                du1 += learner.derivativeUiApprox(indPtr, colInds, U, V, gp, gq, permutedColInds, i)
            du1 /= numRuns
            du2 = learner.derivativeUi(indPtr, colInds, U, V, gp, gq, i)   
            
            print(du1/numpy.linalg.norm(du1), du2/numpy.linalg.norm(du2))
            nptst.assert_array_almost_equal(du1, du2, 2)
            
            
        learner.lmbdaV = 0.5 
        learner.rho = 0.5
        
        for i in numpy.random.permutation(m)[0:numTests]:  
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)            
            
            du1 = numpy.zeros(k)
            for j in range(numRuns): 
                du1 += learner.derivativeUiApprox(indPtr, colInds, U, V, gp, gq, permutedColInds, i)
            du1 /= numRuns
            du2 = learner.derivativeUi(indPtr, colInds, U, V, gp, gq, i)   
            nptst.assert_array_almost_equal(du1, du2, 2)
            print(du1/numpy.linalg.norm(du1), du2/numpy.linalg.norm(du2))
コード例 #31
0
    def testComputeV1V2(self):
        m = 10
        n = 20
        nnzPerRow = 5
        X = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                   nnzPerRow,
                                                   csarray=True)

        k = 5
        learner = MaxAUCSquare(k)
        learner.normalise = False
        learner.lmbdaU = 0
        learner.lmbdaV = 0
        learner.rho = 1.0
        learner.numAucSamples = 20

        numRuns = 500
        gp = numpy.random.rand(n)
        gp /= gp.sum()
        gq = numpy.random.rand(n)
        gq /= gq.sum()

        permutedRowInds = numpy.arange(m, dtype=numpy.uint32)
        permutedColInds = numpy.arange(n, dtype=numpy.uint32)

        indPtr, colInds = SparseUtils.getOmegaListPtr(X)

        U = numpy.random.randn(m, k)
        V = numpy.random.randn(n, k)

        V11 = numpy.zeros((m, k))
        V21 = numpy.zeros((m, k))

        W11 = numpy.zeros((m, k))
        W21 = numpy.zeros((m, k))

        for i in range(numRuns):
            tempV1, tempV2, tempW1, tempW2 = learner.computeMeansVW(
                indPtr, colInds, U, V, permutedRowInds, permutedColInds, gp,
                gq)
            V11 += tempV1
            V21 += tempV2

            W11 += tempW1
            W21 += tempW2

        V11 /= numRuns
        V21 /= numRuns

        W11 /= numRuns
        W21 /= numRuns

        #print(V11)
        #print(V21)

        #Now compute real solution
        V12 = numpy.zeros((m, k))
        V22 = numpy.zeros((m, k))
        W12 = numpy.zeros((m, k))
        W22 = numpy.zeros((m, k))

        #The bootstrap sampling causes slight errors
        for i in range(m):
            normGp = 0
            omegai = colInds[indPtr[i]:indPtr[i + 1]]
            for j in omegai:
                V12[i, :] += V[j, :] * gp[j]
                W12[i, :] += V[j, :] * gp[j] * (U[i, :].dot(V[j, :]))
                normGp += gp[j]

            V12[i, :] /= normGp
            W12[i, :] /= normGp

            normGq = 0
            omegaBari = numpy.setdiff1d(numpy.arange(n, dtype=numpy.uint32),
                                        omegai,
                                        assume_unique=True)

            for j in omegaBari:
                V22[i, :] += V[j, :] * gq[j]
                W22[i, :] += V[j, :] * gq[j] * (U[i, :].dot(V[j, :]))
                normGq += gq[j]

            V22[i, :] /= normGq
            W22[i, :] /= normGq

        #print(W21)
        #print(W22)
        nptst.assert_array_almost_equal(V11, V12, 1)
        nptst.assert_array_almost_equal(V21, V22, 1)

        nptst.assert_array_almost_equal(W11, W12, 1)
        nptst.assert_array_almost_equal(W21, W22, 1)
コード例 #32
0
    def parallelLearnModel(self, X, verbose=False, U=None, V=None):
        """
        Max local AUC with Frobenius norm penalty on V. Solve with parallel (stochastic) gradient descent. 
        The input is a sparse array. 
        """
        #Convert to a csarray for faster access
        if scipy.sparse.issparse(X):
            logging.debug("Converting to csarray")
            X2 = sppy.csarray(X, storagetype="row")
            X = X2

        m, n = X.shape

        #We keep a validation set in order to determine when to stop
        if self.validationUsers != 0:
            numValidationUsers = int(m * self.validationUsers)
            trainX, testX, rowSamples = Sampling.shuffleSplitRows(
                X, 1, self.validationSize, numRows=numValidationUsers)[0]
            testIndPtr, testColInds = SparseUtils.getOmegaListPtr(testX)
        else:
            trainX = X
            testX = None
            rowSamples = None
            testIndPtr, testColInds = None, None

        #Not that to compute the test AUC we pick i \in X and j \notin X \cup testX
        indPtr, colInds = SparseUtils.getOmegaListPtr(trainX)
        allIndPtr, allColInds = SparseUtils.getOmegaListPtr(X)

        if U == None or V == None:
            U, V = self.initUV(trainX)

        if self.metric == "f1":
            metricInd = 2
        elif self.metric == "mrr":
            metricInd = 3
        else:
            raise ValueError("Unknown metric: " + self.metric)

        bestMetric = 0
        bestU = 0
        bestV = 0
        trainMeasures = []
        testMeasures = []
        loopInd = 0
        lastObj = 0
        currentObj = lastObj - 2 * self.eps

        numBlocks = self.numProcesses + 1
        gi, gp, gq = self.computeGipq(X)
        normGp, normGq = self.computeNormGpq(indPtr, colInds, gp, gq, m)

        #Some shared variables
        rowIsFree = sharedmem.ones(numBlocks, dtype=numpy.bool)
        colIsFree = sharedmem.ones(numBlocks, dtype=numpy.bool)

        #Create shared factors
        U2 = sharedmem.zeros((m, self.k))
        V2 = sharedmem.zeros((n, self.k))
        muU2 = sharedmem.zeros((m, self.k))
        muV2 = sharedmem.zeros((n, self.k))

        U2[:] = U[:]
        V2[:] = V[:]
        muU2[:] = U[:]
        muV2[:] = V[:]
        del U, V

        rowBlockSize = int(numpy.ceil(float(m) / numBlocks))
        colBlockSize = int(numpy.ceil(float(n) / numBlocks))

        lock = multiprocessing.Lock()
        startTime = time.time()
        loopInd = 0
        iterationsPerBlock = sharedmem.zeros((numBlocks, numBlocks))

        self.learnerCython = self.getCythonLearner()
        nextRecord = 0

        while loopInd < self.maxIterations and abs(lastObj -
                                                   currentObj) > self.eps:
            if loopInd >= nextRecord:
                if loopInd != 0:
                    print("")

                printStr = self.recordResults(muU2, muV2, trainMeasures,
                                              testMeasures, loopInd,
                                              rowSamples, indPtr, colInds,
                                              testIndPtr, testColInds,
                                              allIndPtr, allColInds, gi, gp,
                                              gq, trainX, startTime)
                logging.debug(printStr)

                if testIndPtr is not None and testMeasures[-1][
                        metricInd] >= bestMetric:
                    bestMetric = testMeasures[-1][metricInd]
                    bestU = muU2.copy()
                    bestV = muV2.copy()
                elif testIndPtr is None:
                    bestU = muU2.copy()
                    bestV = muV2.copy()

                #Compute objective averaged over last 5 recorded steps
                trainMeasuresArr = numpy.array(trainMeasures)
                lastObj = currentObj
                currentObj = numpy.mean(trainMeasuresArr[-5:, 0])

                nextRecord += self.recordStep

            iterationsPerBlock = sharedmem.zeros((numBlocks, numBlocks))
            self.parallelUpdateUV(X, U2, V2, muU2, muV2, numBlocks,
                                  rowBlockSize, colBlockSize, rowIsFree,
                                  colIsFree, indPtr, colInds, lock, gi, gp, gq,
                                  normGp, normGq, iterationsPerBlock, loopInd)
            loopInd += numpy.floor(iterationsPerBlock.mean())

        totalTime = time.time() - startTime

        #Compute quantities for last U and V
        print("")
        totalTime = time.time() - startTime
        printStr = "Finished, time=" + str('%.1f' % totalTime) + " "
        printStr += self.recordResults(muU2, muV2, trainMeasures, testMeasures,
                                       loopInd, rowSamples, indPtr, colInds,
                                       testIndPtr, testColInds, allIndPtr,
                                       allColInds, gi, gp, gq, trainX,
                                       startTime)
        printStr += " delta obj" + "%.3e" % abs(lastObj - currentObj)
        logging.debug(printStr)

        self.U = bestU
        self.V = bestV
        self.gi = gi
        self.gp = gp
        self.gq = gq

        if verbose:
            return self.U, self.V, numpy.array(trainMeasures), numpy.array(
                testMeasures), loopInd, totalTime
        else:
            return self.U, self.V
コード例 #33
0
    def testComputeV1V2(self): 
        m = 10 
        n = 20 
        nnzPerRow = 5 
        X = SparseUtils.generateSparseBinaryMatrix((m, n), nnzPerRow, csarray=True)
        
        k = 5
        learner = MaxAUCSquare(k)
        learner.normalise = False
        learner.lmbdaU = 0
        learner.lmbdaV = 0
        learner.rho = 1.0
        learner.numAucSamples = 20
        
        numRuns = 500      
        gp = numpy.random.rand(n)
        gp /= gp.sum()        
        gq = numpy.random.rand(n)
        gq /= gq.sum()    

        permutedRowInds = numpy.arange(m, dtype=numpy.uint32)        
        permutedColInds = numpy.arange(n, dtype=numpy.uint32)
        
        indPtr, colInds = SparseUtils.getOmegaListPtr(X)        
        
        U = numpy.random.randn(m, k)
        V = numpy.random.randn(n, k)        
        
        V11 = numpy.zeros((m, k))
        V21 = numpy.zeros((m, k))    
        
        W11 = numpy.zeros((m, k))
        W21 = numpy.zeros((m, k)) 
        
        for i in range(numRuns):
            tempV1, tempV2, tempW1, tempW2 = learner.computeMeansVW(indPtr, colInds, U, V, permutedRowInds, permutedColInds, gp, gq)
            V11 += tempV1
            V21 += tempV2
            
            W11 += tempW1 
            W21 += tempW2
        
        V11 /= numRuns 
        V21 /= numRuns 

        W11 /= numRuns 
        W21 /= numRuns          
        
        #print(V11)
        #print(V21)        
        
        #Now compute real solution 
        V12 = numpy.zeros((m, k))
        V22 = numpy.zeros((m, k))   
        W12 = numpy.zeros((m, k))
        W22 = numpy.zeros((m, k))                 
                 
                 
        #The bootstrap sampling causes slight errors 
        for i in range(m):
            normGp = 0
            omegai = colInds[indPtr[i]:indPtr[i+1]]
            for j in omegai: 
                V12[i, :] += V[j, :]*gp[j]
                W12[i, :] += V[j, :]*gp[j]*(U[i, :].dot(V[j, :]))
                normGp += gp[j]
                
            V12[i, :] /= normGp
            W12[i, :] /= normGp


            normGq = 0             
            omegaBari = numpy.setdiff1d(numpy.arange(n, dtype=numpy.uint32), omegai, assume_unique=True)
            
            for j in omegaBari: 
                V22[i, :] += V[j, :]*gq[j]
                W22[i, :] += V[j, :]*gq[j]*(U[i, :].dot(V[j, :]))
                normGq += gq[j]
                
            V22[i, :] /= normGq
            W22[i, :] /= normGq
                

        #print(W21)
        #print(W22)
        nptst.assert_array_almost_equal(V11, V12, 1)
        nptst.assert_array_almost_equal(V21, V22, 1)
        
        nptst.assert_array_almost_equal(W11, W12, 1)
        nptst.assert_array_almost_equal(W21, W22, 1)
コード例 #34
0
ファイル: MaxLocalAUC.py プロジェクト: kentwang/sandbox
    def parallelLearnModel(self, X, verbose=False, U=None, V=None):
        """
        Max local AUC with Frobenius norm penalty on V. Solve with parallel (stochastic) gradient descent. 
        The input is a sparse array. 
        """
        # Convert to a csarray for faster access
        if scipy.sparse.issparse(X):
            logging.debug("Converting to csarray")
            X2 = sppy.csarray(X, storagetype="row")
            X = X2

        m, n = X.shape

        # We keep a validation set in order to determine when to stop
        if self.validationUsers != 0:
            numValidationUsers = int(m * self.validationUsers)
            trainX, testX, rowSamples = Sampling.shuffleSplitRows(
                X, 1, self.validationSize, numRows=numValidationUsers
            )[0]
            testIndPtr, testColInds = SparseUtils.getOmegaListPtr(testX)
        else:
            trainX = X
            testX = None
            rowSamples = None
            testIndPtr, testColInds = None, None

        # Not that to compute the test AUC we pick i \in X and j \notin X \cup testX
        indPtr, colInds = SparseUtils.getOmegaListPtr(trainX)
        allIndPtr, allColInds = SparseUtils.getOmegaListPtr(X)

        if U == None or V == None:
            U, V = self.initUV(trainX)

        if self.metric == "f1":
            metricInd = 2
        elif self.metric == "mrr":
            metricInd = 3
        else:
            raise ValueError("Unknown metric: " + self.metric)

        bestMetric = 0
        bestU = 0
        bestV = 0
        trainMeasures = []
        testMeasures = []
        loopInd = 0
        lastObj = 0
        currentObj = lastObj - 2 * self.eps

        numBlocks = self.numProcesses + 1
        gi, gp, gq = self.computeGipq(X)
        normGp, normGq = self.computeNormGpq(indPtr, colInds, gp, gq, m)

        # Some shared variables
        rowIsFree = sharedmem.ones(numBlocks, dtype=numpy.bool)
        colIsFree = sharedmem.ones(numBlocks, dtype=numpy.bool)

        # Create shared factors
        U2 = sharedmem.zeros((m, self.k))
        V2 = sharedmem.zeros((n, self.k))
        muU2 = sharedmem.zeros((m, self.k))
        muV2 = sharedmem.zeros((n, self.k))

        U2[:] = U[:]
        V2[:] = V[:]
        muU2[:] = U[:]
        muV2[:] = V[:]
        del U, V

        rowBlockSize = int(numpy.ceil(float(m) / numBlocks))
        colBlockSize = int(numpy.ceil(float(n) / numBlocks))

        lock = multiprocessing.Lock()
        startTime = time.time()
        loopInd = 0
        iterationsPerBlock = sharedmem.zeros((numBlocks, numBlocks))

        self.learnerCython = self.getCythonLearner()
        nextRecord = 0

        while loopInd < self.maxIterations and abs(lastObj - currentObj) > self.eps:
            if loopInd >= nextRecord:
                if loopInd != 0:
                    print("")

                printStr = self.recordResults(
                    muU2,
                    muV2,
                    trainMeasures,
                    testMeasures,
                    loopInd,
                    rowSamples,
                    indPtr,
                    colInds,
                    testIndPtr,
                    testColInds,
                    allIndPtr,
                    allColInds,
                    gi,
                    gp,
                    gq,
                    trainX,
                    startTime,
                )
                logging.debug(printStr)

                if testIndPtr is not None and testMeasures[-1][metricInd] >= bestMetric:
                    bestMetric = testMeasures[-1][metricInd]
                    bestU = muU2.copy()
                    bestV = muV2.copy()
                elif testIndPtr is None:
                    bestU = muU2.copy()
                    bestV = muV2.copy()

                # Compute objective averaged over last 5 recorded steps
                trainMeasuresArr = numpy.array(trainMeasures)
                lastObj = currentObj
                currentObj = numpy.mean(trainMeasuresArr[-5:, 0])

                nextRecord += self.recordStep

            iterationsPerBlock = sharedmem.zeros((numBlocks, numBlocks))
            self.parallelUpdateUV(
                X,
                U2,
                V2,
                muU2,
                muV2,
                numBlocks,
                rowBlockSize,
                colBlockSize,
                rowIsFree,
                colIsFree,
                indPtr,
                colInds,
                lock,
                gi,
                gp,
                gq,
                normGp,
                normGq,
                iterationsPerBlock,
                loopInd,
            )
            loopInd += numpy.floor(iterationsPerBlock.mean())

        totalTime = time.time() - startTime

        # Compute quantities for last U and V
        print("")
        totalTime = time.time() - startTime
        printStr = "Finished, time=" + str("%.1f" % totalTime) + " "
        printStr += self.recordResults(
            muU2,
            muV2,
            trainMeasures,
            testMeasures,
            loopInd,
            rowSamples,
            indPtr,
            colInds,
            testIndPtr,
            testColInds,
            allIndPtr,
            allColInds,
            gi,
            gp,
            gq,
            trainX,
            startTime,
        )
        printStr += " delta obj" + "%.3e" % abs(lastObj - currentObj)
        logging.debug(printStr)

        self.U = bestU
        self.V = bestV
        self.gi = gi
        self.gp = gp
        self.gq = gq

        if verbose:
            return self.U, self.V, numpy.array(trainMeasures), numpy.array(testMeasures), loopInd, totalTime
        else:
            return self.U, self.V
コード例 #35
0
    def testDerivativeViApprox(self): 
        """
        We'll test the case in which we apprormate using a large number of samples 
        for the AUC and see if we get close to the exact derivative 
        """
        m = 20 
        n = 30 
        k = 3 
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)
        
        for i in range(m):
            X[i, 0] = 1
            X[i, 1] = 0
        
        w = 0.1
        eps = 0.001
        learner = MaxAUCSigmoid(k, w)
        learner.normalise = False
        learner.lmbdaU = 0
        learner.lmbdaV = 0
        learner.numAucSamples = n
        
        indPtr, colInds = SparseUtils.getOmegaListPtr(X)

        U = numpy.random.rand(X.shape[0], k)
        V = numpy.random.rand(X.shape[1], k)
             
        gp = numpy.random.rand(n)
        gp /= gp.sum()        
        gq = numpy.random.rand(n)
        gq /= gq.sum()     
        
        permutedRowInds = numpy.array(numpy.random.permutation(m), numpy.uint32)
        permutedColInds = numpy.array(numpy.random.permutation(n), numpy.uint32)
        
        maxLocalAuc = MaxLocalAUC(k, w)
        normGp, normGq = maxLocalAuc.computeNormGpq(indPtr, colInds, gp, gq, m)
        
        numRuns = 200 
        numTests = 5

        #Let's compare against using the exact derivative 
        for i in numpy.random.permutation(m)[0:numTests]: 
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)
            dv1 = numpy.zeros(k)
            for j in range(numRuns): 
                dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp, gq, normGp, normGq, permutedRowInds, permutedColInds, i)
            dv1 /= numRuns
            dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i)   
            
            
            dv3 = numpy.zeros(k)
            for j in range(k): 
                eps = 10**-6
                tempV = V.copy() 
                tempV[i,j] += eps
                obj1 = learner.objective(indPtr, colInds, indPtr, colInds, U, tempV, gp, gq)
                
                tempV = V.copy() 
                tempV[i,j] -= eps
                obj2 = learner.objective(indPtr, colInds, indPtr, colInds, U, tempV, gp, gq)
                
                dv3[j] = (obj1-obj2)/(2*eps)            
            
            print(dv1, dv2, dv3)
            
            nptst.assert_array_almost_equal(dv1, dv2, 3)
            
        learner.lmbdaV = 0.5 
        learner.rho = 0.5
        
        for i in numpy.random.permutation(m)[0:numTests]: 
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)            
    
            dv1 = numpy.zeros(k)
            for j in range(numRuns): 
                dv1 += learner.derivativeViApprox(indPtr, colInds, U, V,  gp, gq, normGp, normGq, permutedRowInds, permutedColInds, i)
            dv1 /= numRuns
            dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i) 
            print(dv1, dv2)
            nptst.assert_array_almost_equal(dv1, dv2, 3)
            
        learner.numRowSamples = 10 
        numRuns = 1000
        
        for i in numpy.random.permutation(m)[0:numTests]: 
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)            
            
            dv1 = numpy.zeros(k)
            for j in range(numRuns): 
                dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp, gq, normGp, normGq, permutedRowInds, permutedColInds, i)
            dv1 /= numRuns
            dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i)  
            print(dv1, dv2)
            nptst.assert_array_almost_equal(dv1, dv2, 3)

        maxLocalAuc.numRowSamples = m 
        maxLocalAuc.numAucSamples = 20 
        maxLocalAuc.lmbdaV = 0
        numRuns = 1000
        print("Final test")
        
        #for i in numpy.random.permutation(m)[0:numTests]: 
        for i in range(m): 
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)            
            
            dv1 = numpy.zeros(k)
            for j in range(numRuns): 
                dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp, gq, normGp, normGq, permutedRowInds, permutedColInds, i)
            dv1 /= numRuns
            #dv1 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i) 
            dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i)   
                      
            
            print(i, dv1, dv2)
            nptst.assert_array_almost_equal(dv1, dv2, 3)
コード例 #36
0
    def testDerivativeViApprox(self):
        """
        We'll test the case in which we apprormate using a large number of samples 
        for the AUC and see if we get close to the exact derivative 
        """
        m = 20
        n = 30
        k = 3
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)

        for i in range(m):
            X[i, 0] = 1
            X[i, 1] = 0

        w = 0.1
        eps = 0.001
        learner = MaxAUCSigmoid(k, w)
        learner.normalise = False
        learner.lmbdaU = 0
        learner.lmbdaV = 0
        learner.numAucSamples = n

        indPtr, colInds = SparseUtils.getOmegaListPtr(X)

        U = numpy.random.rand(X.shape[0], k)
        V = numpy.random.rand(X.shape[1], k)

        gp = numpy.random.rand(n)
        gp /= gp.sum()
        gq = numpy.random.rand(n)
        gq /= gq.sum()

        permutedRowInds = numpy.array(numpy.random.permutation(m),
                                      numpy.uint32)
        permutedColInds = numpy.array(numpy.random.permutation(n),
                                      numpy.uint32)

        maxLocalAuc = MaxLocalAUC(k, w)
        normGp, normGq = maxLocalAuc.computeNormGpq(indPtr, colInds, gp, gq, m)

        numRuns = 200
        numTests = 5

        #Let's compare against using the exact derivative
        for i in numpy.random.permutation(m)[0:numTests]:
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)
            dv1 = numpy.zeros(k)
            for j in range(numRuns):
                dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp,
                                                  gq, normGp, normGq,
                                                  permutedRowInds,
                                                  permutedColInds, i)
            dv1 /= numRuns
            dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i)

            dv3 = numpy.zeros(k)
            for j in range(k):
                eps = 10**-6
                tempV = V.copy()
                tempV[i, j] += eps
                obj1 = learner.objective(indPtr, colInds, indPtr, colInds, U,
                                         tempV, gp, gq)

                tempV = V.copy()
                tempV[i, j] -= eps
                obj2 = learner.objective(indPtr, colInds, indPtr, colInds, U,
                                         tempV, gp, gq)

                dv3[j] = (obj1 - obj2) / (2 * eps)

            print(dv1, dv2, dv3)

            nptst.assert_array_almost_equal(dv1, dv2, 3)

        learner.lmbdaV = 0.5
        learner.rho = 0.5

        for i in numpy.random.permutation(m)[0:numTests]:
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)

            dv1 = numpy.zeros(k)
            for j in range(numRuns):
                dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp,
                                                  gq, normGp, normGq,
                                                  permutedRowInds,
                                                  permutedColInds, i)
            dv1 /= numRuns
            dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i)
            print(dv1, dv2)
            nptst.assert_array_almost_equal(dv1, dv2, 3)

        learner.numRowSamples = 10
        numRuns = 1000

        for i in numpy.random.permutation(m)[0:numTests]:
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)

            dv1 = numpy.zeros(k)
            for j in range(numRuns):
                dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp,
                                                  gq, normGp, normGq,
                                                  permutedRowInds,
                                                  permutedColInds, i)
            dv1 /= numRuns
            dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i)
            print(dv1, dv2)
            nptst.assert_array_almost_equal(dv1, dv2, 3)

        maxLocalAuc.numRowSamples = m
        maxLocalAuc.numAucSamples = 20
        maxLocalAuc.lmbdaV = 0
        numRuns = 1000
        print("Final test")

        #for i in numpy.random.permutation(m)[0:numTests]:
        for i in range(m):
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)

            dv1 = numpy.zeros(k)
            for j in range(numRuns):
                dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp,
                                                  gq, normGp, normGq,
                                                  permutedRowInds,
                                                  permutedColInds, i)
            dv1 /= numRuns
            #dv1 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i)
            dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i)

            print(i, dv1, dv2)
            nptst.assert_array_almost_equal(dv1, dv2, 3)
コード例 #37
0
    def testDerivativeV(self):
        m = 10
        n = 20
        nnzPerRow = 5
        X = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                   nnzPerRow,
                                                   csarray=True)

        for i in range(m):
            X[i, 0] = 1
            X[i, 1] = 0

        k = 5
        u = 0.1
        w = 1 - u
        eps = 0.05
        learner = MaxAUCSigmoid(k, w)
        learner.normalise = False
        learner.lmbdaU = 0
        learner.lmbdaV = 0
        learner.rho = 1.0
        learner.numAucSamples = 100

        numRuns = 20
        indPtr, colInds = SparseUtils.getOmegaListPtr(X)

        gp = numpy.random.rand(n)
        gp /= gp.sum()
        gq = numpy.random.rand(n)
        gq /= gq.sum()

        for s in range(numRuns):
            U = numpy.random.randn(m, k)
            V = numpy.random.randn(n, k)

            deltaV = numpy.zeros(V.shape)
            for j in range(n):
                deltaV[j, :] = learner.derivativeVi(indPtr, colInds, U, V, gp,
                                                    gq, j)

            deltaV2 = numpy.zeros(V.shape)

            eps = 0.00001

            for i in range(n):
                for j in range(k):
                    tempV = V.copy()
                    tempV[i, j] += eps
                    obj1 = learner.objective(indPtr, colInds, indPtr, colInds,
                                             U, tempV, gp, gq)

                    tempV = V.copy()
                    tempV[i, j] -= eps
                    obj2 = learner.objective(indPtr, colInds, indPtr, colInds,
                                             U, tempV, gp, gq)

                    deltaV2[i, j] = (obj1 - obj2) / (2 * eps)
                #deltaV2[i,:] = deltaV2[i,:]/numpy.linalg.norm(deltaV2[i,:])

            nptst.assert_almost_equal(deltaV, deltaV2, 3)

        #Try r != 0 and rho > 0
        for s in range(numRuns):
            U = numpy.random.randn(m, k)
            V = numpy.random.randn(n, k)
            learner.rho = 1.0

            deltaV = numpy.zeros(V.shape)
            for j in range(n):
                deltaV[j, :] = learner.derivativeVi(indPtr, colInds, U, V, gp,
                                                    gq, j)

            deltaV2 = numpy.zeros(V.shape)

            for i in range(n):
                for j in range(k):
                    tempV = V.copy()
                    tempV[i, j] += eps
                    obj1 = learner.objective(indPtr, colInds, indPtr, colInds,
                                             U, tempV, gp, gq)

                    tempV = V.copy()
                    tempV[i, j] -= eps
                    obj2 = learner.objective(indPtr, colInds, indPtr, colInds,
                                             U, tempV, gp, gq)

                    deltaV2[i, j] = (obj1 - obj2) / (2 * eps)
                #deltaV2[i,:] = deltaV2[i,:]/numpy.linalg.norm(deltaV2[i,:])

            nptst.assert_almost_equal(deltaV, deltaV2, 3)

        #Try r != 0 and rho > 0
        for s in range(numRuns):
            U = numpy.random.randn(m, k)
            V = numpy.random.randn(n, k)

            learner.lmbdaV = 100
            learner.rho = 0.1

            deltaV = numpy.zeros(V.shape)
            for j in range(n):
                deltaV[j, :] = learner.derivativeVi(indPtr, colInds, U, V, gp,
                                                    gq, j)

            deltaV2 = numpy.zeros(V.shape)

            for i in range(n):
                for j in range(k):
                    tempV = V.copy()
                    tempV[i, j] += eps
                    obj1 = learner.objective(indPtr, colInds, indPtr, colInds,
                                             U, tempV, gp, gq)

                    tempV = V.copy()
                    tempV[i, j] -= eps
                    obj2 = learner.objective(indPtr, colInds, indPtr, colInds,
                                             U, tempV, gp, gq)

                    deltaV2[i, j] = (obj1 - obj2) / (2 * eps)
                #deltaV2[i,:] = deltaV2[i,:]/numpy.linalg.norm(deltaV2[i,:])

            nptst.assert_almost_equal(deltaV, deltaV2, 3)
コード例 #38
0
    def testDerivativeU(self):
        m = 10
        n = 20
        nnzPerRow = 5
        X = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                   nnzPerRow,
                                                   csarray=True)

        k = 5
        eps = 0.05
        learner = MaxAUCSigmoid(k)
        learner.normalise = False
        learner.lmbdaU = 0
        learner.lmbdaV = 0
        learner.rho = 1.0
        learner.numAucSamples = n

        numRuns = 20
        gi = numpy.random.rand(m)
        gi /= gi.sum()
        gp = numpy.random.rand(n)
        gp /= gp.sum()
        gq = numpy.random.rand(n)
        gq /= gq.sum()

        indPtr, colInds = SparseUtils.getOmegaListPtr(X)

        for s in range(numRuns):
            U = numpy.random.randn(m, k)
            V = numpy.random.randn(n, k)
            deltaU = numpy.zeros(U.shape)
            for i in range(X.shape[0]):
                deltaU[i, :] = learner.derivativeUi(indPtr, colInds, U, V, gp,
                                                    gq, i)

            deltaU2 = numpy.zeros(U.shape)
            eps = 10**-8

            for i in range(m):
                for j in range(k):
                    tempU = U.copy()
                    tempU[i, j] += eps
                    obj1 = learner.objective(indPtr, colInds, indPtr, colInds,
                                             tempU, V, gp, gq)

                    tempU = U.copy()
                    tempU[i, j] -= eps
                    obj2 = learner.objective(indPtr, colInds, indPtr, colInds,
                                             tempU, V, gp, gq)

                    deltaU2[i, j] = (obj1 - obj2) / (2 * eps)

                #deltaU2[i,:] = deltaU2[i,:]/numpy.linalg.norm(deltaU2[i,:])

            #print(deltaU*100)
            #print(deltaU2*100)
            nptst.assert_almost_equal(deltaU, deltaU2, 3)

        #Try r != 0 and rho > 0
        for s in range(numRuns):
            U = numpy.random.randn(m, k)
            V = numpy.random.randn(n, k)
            learner.rho = 0.1

            deltaU = numpy.zeros(U.shape)
            for i in range(X.shape[0]):
                deltaU[i, :] = learner.derivativeUi(indPtr, colInds, U, V, gp,
                                                    gq, i)

            deltaU2 = numpy.zeros(U.shape)
            eps = 10**-9

            for i in range(m):
                for j in range(k):
                    tempU = U.copy()
                    tempU[i, j] += eps
                    obj1 = learner.objective(indPtr, colInds, indPtr, colInds,
                                             tempU, V, gp, gq)

                    tempU = U.copy()
                    tempU[i, j] -= eps
                    obj2 = learner.objective(indPtr, colInds, indPtr, colInds,
                                             tempU, V, gp, gq)

                    deltaU2[i, j] = (obj1 - obj2) / (2 * eps)

            nptst.assert_almost_equal(deltaU, deltaU2, 3)

        #Try lmbda > 0

        for s in range(numRuns):
            U = numpy.random.randn(m, k)
            V = numpy.random.randn(n, k)
            learner.lmbdaU = 0.5

            deltaU = numpy.zeros(U.shape)
            for i in range(X.shape[0]):
                deltaU[i, :] = learner.derivativeUi(indPtr, colInds, U, V, gp,
                                                    gq, i)

            deltaU2 = numpy.zeros(U.shape)
            eps = 10**-9

            for i in range(m):
                for j in range(k):
                    tempU = U.copy()
                    tempU[i, j] += eps
                    obj1 = learner.objective(indPtr, colInds, indPtr, colInds,
                                             tempU, V, gp, gq)

                    tempU = U.copy()
                    tempU[i, j] -= eps
                    obj2 = learner.objective(indPtr, colInds, indPtr, colInds,
                                             tempU, V, gp, gq)

                    deltaU2[i, j] = (obj1 - obj2) / (2 * eps)

            nptst.assert_almost_equal(deltaU, deltaU2, 3)
コード例 #39
0
    def testDerivativeUiApprox(self):
        """
        We'll test the case in which we apprormate using a large number of samples 
        for the AUC and see if we get close to the exact derivative 
        """
        m = 20
        n = 30
        k = 3
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)

        w = 0.1
        learner = MaxAUCSigmoid(k, w)
        learner.normalise = False
        learner.lmbdaU = 0
        learner.lmbdaV = 0
        learner.rho = 1.0
        learner.numAucSamples = 100

        U = numpy.random.rand(X.shape[0], k)
        V = numpy.random.rand(X.shape[1], k)

        gp = numpy.random.rand(n)
        gp /= gp.sum()
        gq = numpy.random.rand(n)
        gq /= gq.sum()

        numRuns = 200
        numTests = 5

        indPtr, colInds = SparseUtils.getOmegaListPtr(X)
        permutedColInds = numpy.arange(n, dtype=numpy.uint32)

        #Test with small number of AUC samples, but normalise
        learner.numAucSamples = n
        numRuns = 1000

        for i in numpy.random.permutation(m)[0:numTests]:
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)

            du1 = numpy.zeros(k)
            for j in range(numRuns):
                du1 += learner.derivativeUiApprox(indPtr, colInds, U, V, gp,
                                                  gq, permutedColInds, i)
            du1 /= numRuns
            du2 = learner.derivativeUi(indPtr, colInds, U, V, gp, gq, i)
            #print(du1, du2)
            print(du1 / numpy.linalg.norm(du1), du2 / numpy.linalg.norm(du2))
            #print(numpy.linalg.norm(du1 - du2)/numpy.linalg.norm(du1))
            self.assertTrue(
                numpy.linalg.norm(du1 - du2) / numpy.linalg.norm(du1) < 0.5)

        #Let's compare against using the exact derivative
        for i in numpy.random.permutation(m)[0:numTests]:
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)

            du1 = numpy.zeros(k)
            for j in range(numRuns):
                du1 += learner.derivativeUiApprox(indPtr, colInds, U, V, gp,
                                                  gq, permutedColInds, i)
            du1 /= numRuns
            du2 = learner.derivativeUi(indPtr, colInds, U, V, gp, gq, i)

            print(du1 / numpy.linalg.norm(du1), du2 / numpy.linalg.norm(du2))
            nptst.assert_array_almost_equal(du1, du2, 2)

        learner.lmbdaV = 0.5
        learner.rho = 0.5

        for i in numpy.random.permutation(m)[0:numTests]:
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)

            du1 = numpy.zeros(k)
            for j in range(numRuns):
                du1 += learner.derivativeUiApprox(indPtr, colInds, U, V, gp,
                                                  gq, permutedColInds, i)
            du1 /= numRuns
            du2 = learner.derivativeUi(indPtr, colInds, U, V, gp, gq, i)
            nptst.assert_array_almost_equal(du1, du2, 2)
            print(du1 / numpy.linalg.norm(du1), du2 / numpy.linalg.norm(du2))
コード例 #40
0
    def singleLearnModel(self, X, verbose=False, U=None, V=None):
        """
        Max local AUC with Frobenius norm penalty on V. Solve with (stochastic) gradient descent. 
        The input is a sparse array. 
        """
        #Convert to a csarray for faster access
        if scipy.sparse.issparse(X):
            logging.debug("Converting to csarray")
            X2 = sppy.csarray(X, storagetype="row")
            X = X2

        m, n = X.shape

        #We keep a validation set in order to determine when to stop
        if self.validationUsers != 0:
            numValidationUsers = int(m * self.validationUsers)
            trainX, testX, rowSamples = Sampling.shuffleSplitRows(
                X, 1, self.validationSize, numRows=numValidationUsers)[0]

            testIndPtr, testColInds = SparseUtils.getOmegaListPtr(testX)

            logging.debug("Train X shape and nnz: " + str(trainX.shape) + " " +
                          str(trainX.nnz))
            logging.debug("Validation X shape and nnz: " + str(testX.shape) +
                          " " + str(testX.nnz))
        else:
            trainX = X
            testX = None
            rowSamples = None
            testIndPtr, testColInds = None, None

        #Note that to compute the test AUC we pick i \in X and j \notin X \cup testX
        indPtr, colInds = SparseUtils.getOmegaListPtr(trainX)
        allIndPtr, allColInds = SparseUtils.getOmegaListPtr(X)

        if type(U) != numpy.ndarray and type(V) != numpy.ndarray:
            U, V = self.initUV(trainX)

        if self.metric == "f1":
            metricInd = 2
        elif self.metric == "mrr":
            metricInd = 3
        else:
            raise ValueError("Unknown metric: " + self.metric)

        muU = U.copy()
        muV = V.copy()
        bestMetric = 0
        bestU = 0
        bestV = 0
        trainMeasures = []
        testMeasures = []
        loopInd = 0
        lastObj = 0
        currentObj = lastObj - 2 * self.eps

        #Try alternative number of iterations
        #numIterations = trainX.nnz/self.numAucSamples
        numIterations = max(m, n)

        self.learnerCython = self.getCythonLearner()

        #Set up order of indices for stochastic methods
        permutedRowInds = numpy.array(numpy.random.permutation(m),
                                      numpy.uint32)
        permutedColInds = numpy.array(numpy.random.permutation(n),
                                      numpy.uint32)

        startTime = time.time()

        gi, gp, gq = self.computeGipq(X)
        normGp, normGq = self.computeNormGpq(indPtr, colInds, gp, gq, m)

        while loopInd < self.maxIterations and abs(lastObj -
                                                   currentObj) > self.eps:
            sigmaU = self.getSigma(loopInd, self.alpha, m)
            sigmaV = self.getSigma(loopInd, self.alpha, m)

            if loopInd % self.recordStep == 0:
                if loopInd != 0 and self.stochastic:
                    print("")

                printStr = self.recordResults(muU, muV, trainMeasures,
                                              testMeasures, loopInd,
                                              rowSamples, indPtr, colInds,
                                              testIndPtr, testColInds,
                                              allIndPtr, allColInds, gi, gp,
                                              gq, trainX, startTime)
                logging.debug(printStr)

                if testIndPtr is not None and testMeasures[-1][
                        metricInd] >= bestMetric:
                    bestMetric = testMeasures[-1][metricInd]
                    logging.debug("Current best metric=" + str(bestMetric))
                    bestU = muU.copy()
                    bestV = muV.copy()
                elif testIndPtr is None:
                    bestU = muU.copy()
                    bestV = muV.copy()

                #Compute objective averaged over last 5 recorded steps
                trainMeasuresArr = numpy.array(trainMeasures)
                lastObj = currentObj
                currentObj = numpy.mean(trainMeasuresArr[-5:, 0])

            U = numpy.ascontiguousarray(U)
            self.updateUV(indPtr, colInds, U, V, muU, muV, permutedRowInds,
                          permutedColInds, gp, gq, normGp, normGq, loopInd,
                          sigmaU, sigmaV, numIterations)
            loopInd += 1

        #Compute quantities for last U and V
        totalTime = time.time() - startTime
        printStr = "\nFinished, time=" + str('%.1f' % totalTime) + " "
        printStr += self.recordResults(muU, muV, trainMeasures, testMeasures,
                                       loopInd, rowSamples, indPtr, colInds,
                                       testIndPtr, testColInds, allIndPtr,
                                       allColInds, gi, gp, gq, trainX,
                                       startTime)
        printStr += " delta obj=" + "%.3e" % abs(lastObj - currentObj)
        logging.debug(printStr)

        self.U = bestU
        self.V = bestV
        self.gi = gi
        self.gp = gp
        self.gq = gq

        trainMeasures = numpy.array(trainMeasures)
        testMeasures = numpy.array(testMeasures)

        if verbose:
            return self.U, self.V, trainMeasures, testMeasures, loopInd, totalTime
        else:
            return self.U, self.V
コード例 #41
0
    def recordResults(self, X, trainX, testX, learner, fileName):
        """
        Save results for a particular recommendation
        """
        if self.algoArgs.skipRecordResults:
            logging.debug("Skipping final evaluation of algorithm")
            return

        allTrainMeasures = []
        allTestMeasures = []
        allMetaData = []

        for i in range(self.algoArgs.recordFolds):
            metaData = []
            w = 1-self.algoArgs.u
            logging.debug("Computing recommendation errors")
            maxItems = self.ps[-1]

            start = time.time()
            if type(learner) == IterativeSoftImpute:
                trainIterator = iter([trainX])
                ZList = learner.learnModel(trainIterator)
                U, s, V = ZList.next()
                U = U*s

                #trainX = sppy.csarray(trainX)
                #testX = sppy.csarray(testX)
                U = numpy.ascontiguousarray(U)
                V = numpy.ascontiguousarray(V)
            else:
                learner.learnModel(trainX)
                U = learner.U
                V = learner.V

            learnTime = time.time()-start
            metaData.append(learnTime)

            logging.debug("Getting all omega")
            allOmegaPtr = SparseUtils.getOmegaListPtr(X)
            logging.debug("Getting train omega")
            trainOmegaPtr = SparseUtils.getOmegaListPtr(trainX)
            logging.debug("Getting test omega")
            testOmegaPtr = SparseUtils.getOmegaListPtr(testX)
            logging.debug("Getting recommendations")

            trainOrderedItems = MCEvaluator.recommendAtk(U, V, maxItems)
            testOrderedItems = MCEvaluatorCython.recommendAtk(U, V, maxItems, trainX)

            colNames = []
            trainMeasures = []
            testMeasures = []
            for p in self.ps:
                trainMeasures.append(MCEvaluator.precisionAtK(trainOmegaPtr, trainOrderedItems, p))
                testMeasures.append(MCEvaluator.precisionAtK(testOmegaPtr, testOrderedItems, p))

                colNames.append("precision@" + str(p))

            for p in self.ps:
                trainMeasures.append(MCEvaluator.recallAtK(trainOmegaPtr, trainOrderedItems, p))
                testMeasures.append(MCEvaluator.recallAtK(testOmegaPtr, testOrderedItems, p))

                colNames.append("recall@" + str(p))

            for p in self.ps:
                trainMeasures.append(MCEvaluator.f1AtK(trainOmegaPtr, trainOrderedItems, p))
                testMeasures.append(MCEvaluator.f1AtK(testOmegaPtr, testOrderedItems, p))

                colNames.append("f1@" + str(p))

            for p in self.ps:
                trainMeasures.append(MCEvaluator.mrrAtK(trainOmegaPtr, trainOrderedItems, p))
                testMeasures.append(MCEvaluator.mrrAtK(testOmegaPtr, testOrderedItems, p))

                colNames.append("mrr@" + str(p))

            try:
                r = SparseUtilsCython.computeR(U, V, w, self.algoArgs.numRecordAucSamples)
                trainMeasures.append(MCEvaluator.localAUCApprox(trainOmegaPtr, U, V, w, self.algoArgs.numRecordAucSamples, r=r))
                testMeasures.append(MCEvaluator.localAUCApprox(testOmegaPtr, U, V, w, self.algoArgs.numRecordAucSamples, allArray=allOmegaPtr, r=r))

                w = 0.0
                r = SparseUtilsCython.computeR(U, V, w, self.algoArgs.numRecordAucSamples)
                trainMeasures.append(MCEvaluator.localAUCApprox(trainOmegaPtr, U, V, w, self.algoArgs.numRecordAucSamples, r=r))
                testMeasures.append(MCEvaluator.localAUCApprox(testOmegaPtr, U, V, w, self.algoArgs.numRecordAucSamples, allArray=allOmegaPtr, r=r))

                colNames.append("LAUC@" + str(self.algoArgs.u))
                colNames.append("AUC")
            except:
                logging.debug("Could not compute AUCs")
                raise

            trainMeasures = numpy.array(trainMeasures)
            testMeasures = numpy.array(testMeasures)
            metaData = numpy.array(metaData)

            allTrainMeasures.append(trainMeasures)
            allTestMeasures.append(testMeasures)
            allMetaData.append(metaData)

        allTrainMeasures = numpy.array(allTrainMeasures)
        allTestMeasures = numpy.array(allTestMeasures)
        allMetaData = numpy.array(allMetaData)

        meanTrainMeasures = numpy.mean(allTrainMeasures, 0)
        meanTestMeasures = numpy.mean(allTestMeasures, 0)
        meanMetaData = numpy.mean(allMetaData, 0)

        logging.debug("Mean metrics")
        for i, colName in enumerate(colNames):
            logging.debug(colName + ":" + str('%.4f' % meanTrainMeasures[i]) + "/" + str('%.4f' % meanTestMeasures[i]))

        numpy.savez(fileName, meanTrainMeasures, meanTestMeasures, meanMetaData, trainOrderedItems, testOrderedItems)
        logging.debug("Saved file as " + fileName)
コード例 #42
0
    def testDerivativeV(self): 
        m = 10 
        n = 20 
        nnzPerRow = 5 
        X = SparseUtils.generateSparseBinaryMatrix((m, n), nnzPerRow, csarray=True)
        
        for i in range(m):
            X[i, 0] = 1
            X[i, 1] = 0
        
        k = 5
        u = 0.1
        w = 1-u
        eps = 0.05
        learner = MaxAUCSigmoid(k, w)
        learner.normalise = False
        learner.lmbdaU = 0
        learner.lmbdaV = 0
        learner.rho = 1.0
        learner.numAucSamples = 100

        numRuns = 20
        indPtr, colInds = SparseUtils.getOmegaListPtr(X)
            
        gp = numpy.random.rand(n)
        gp /= gp.sum()        
        gq = numpy.random.rand(n)
        gq /= gq.sum()            
        
        for s in range(numRuns):
            U = numpy.random.randn(m, k)
            V = numpy.random.randn(n, k)            
            
            deltaV = numpy.zeros(V.shape)
            for j in range(n): 
                deltaV[j, :] = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, j)   
            
            deltaV2 = numpy.zeros(V.shape)    
            
            eps = 0.00001        
            
            for i in range(n): 
                for j in range(k):
                    tempV = V.copy() 
                    tempV[i,j] += eps
                    obj1 = learner.objective(indPtr, colInds, indPtr, colInds, U, tempV, gp, gq)
                    
                    tempV = V.copy() 
                    tempV[i,j] -= eps
                    obj2 = learner.objective(indPtr, colInds, indPtr, colInds, U, tempV, gp, gq)
                    
                    deltaV2[i,j] = (obj1-obj2)/(2*eps)
                #deltaV2[i,:] = deltaV2[i,:]/numpy.linalg.norm(deltaV2[i,:])                   
                        

            nptst.assert_almost_equal(deltaV, deltaV2, 3)

        #Try r != 0 and rho > 0
        for s in range(numRuns):
            U = numpy.random.randn(m, k)
            V = numpy.random.randn(n, k)   
            learner.rho = 1.0    
            
            deltaV = numpy.zeros(V.shape)
            for j in range(n): 
                deltaV[j, :] = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, j)    
            
            deltaV2 = numpy.zeros(V.shape)
            
            for i in range(n): 
                for j in range(k):
                    tempV = V.copy() 
                    tempV[i,j] += eps
                    obj1 = learner.objective(indPtr, colInds, indPtr, colInds, U, tempV, gp, gq)
                    
                    tempV = V.copy() 
                    tempV[i,j] -= eps
                    obj2 = learner.objective(indPtr, colInds, indPtr, colInds, U, tempV, gp, gq)
                    
                    deltaV2[i,j] = (obj1-obj2)/(2*eps)
                #deltaV2[i,:] = deltaV2[i,:]/numpy.linalg.norm(deltaV2[i,:])
                           
            nptst.assert_almost_equal(deltaV, deltaV2, 3)
        
        
        #Try r != 0 and rho > 0
        for s in range(numRuns):
            U = numpy.random.randn(m, k)
            V = numpy.random.randn(n, k)              
            
            learner.lmbdaV = 100   
            learner.rho = 0.1
            
            deltaV = numpy.zeros(V.shape)
            for j in range(n): 
                deltaV[j, :] = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, j)
            
            deltaV2 = numpy.zeros(V.shape)
            
            for i in range(n): 
                for j in range(k):
                    tempV = V.copy() 
                    tempV[i,j] += eps
                    obj1 = learner.objective(indPtr, colInds, indPtr, colInds, U, tempV, gp, gq)
                    
                    tempV = V.copy() 
                    tempV[i,j] -= eps
                    obj2 = learner.objective(indPtr, colInds, indPtr, colInds, U, tempV,  gp, gq)
                    
                    deltaV2[i,j] = (obj1-obj2)/(2*eps)
                #deltaV2[i,:] = deltaV2[i,:]/numpy.linalg.norm(deltaV2[i,:])
              
            nptst.assert_almost_equal(deltaV, deltaV2, 3)         
コード例 #43
0
    def testObjectiveApprox(self):
        """
        We'll test the case in which we apprormate using a large number of samples 
        for the AUC and see if we get close to the exact objective 
        """
        m = 20
        n = 30
        k = 3
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)

        learner = MaxAUCSigmoid(k)
        learner.normalise = False
        learner.lmbdaU = 0
        learner.lmbdaV = 0
        learner.rho = 1.0
        learner.numAucSamples = n

        indPtr, colInds = SparseUtils.getOmegaListPtr(X)

        U = numpy.random.rand(X.shape[0], k)
        V = numpy.random.rand(X.shape[1], k)

        numRuns = 100
        numTests = 5

        gi = numpy.random.rand(m)
        gi /= gi.sum()
        gp = numpy.random.rand(n)
        gp /= gp.sum()
        gq = numpy.random.rand(n)
        gq /= gq.sum()
        #gi = numpy.ones(m)
        #gp = numpy.ones(n)
        #gq = numpy.ones(n)

        #Let's compare against using the exact derivative
        for i in range(numTests):
            obj = 0

            for j in range(numRuns):
                obj += learner.objectiveApprox(indPtr, colInds, indPtr,
                                               colInds, U, V, gp, gq)
            obj /= numRuns

            obj2 = learner.objective(indPtr, colInds, indPtr, colInds, U, V,
                                     gp, gq)
            self.assertAlmostEquals(obj, obj2, 2)

        learner.rho = 0.2

        for i in range(numTests):
            obj = 0
            for j in range(numRuns):
                obj += learner.objectiveApprox(indPtr, colInds, indPtr,
                                               colInds, U, V, gp, gq)
            obj /= numRuns

            obj2 = learner.objective(indPtr, colInds, indPtr, colInds, U, V,
                                     gp, gq)
            self.assertAlmostEquals(obj, obj2, 2)

        learner.lmbdaV = 0.2

        for i in range(numTests):
            obj = 0
            for j in range(numRuns):
                obj += learner.objectiveApprox(indPtr, colInds, indPtr,
                                               colInds, U, V, gp, gq)
            obj /= numRuns

            obj2 = learner.objective(indPtr, colInds, indPtr, colInds, U, V,
                                     gp, gq)
            self.assertAlmostEquals(obj, obj2, 2)

        #Check full and summary versions are the same
        obj = learner.objective(indPtr, colInds, indPtr, colInds, U, V, gp, gq)
        obj2 = learner.objective(indPtr, colInds, indPtr, colInds, U, V, gp,
                                 gq)
        self.assertAlmostEquals(obj, obj2, 2)
コード例 #44
0
    def testDerivativeU(self): 
        m = 10 
        n = 20 
        nnzPerRow = 5 
        X = SparseUtils.generateSparseBinaryMatrix((m, n), nnzPerRow, csarray=True)
        
        k = 5
        eps = 0.05
        learner = MaxAUCSigmoid(k)
        learner.normalise = False
        learner.lmbdaU = 0
        learner.lmbdaV = 0
        learner.rho = 1.0
        learner.numAucSamples = n

        numRuns = 20
        gi = numpy.random.rand(m)
        gi /= gi.sum()        
        gp = numpy.random.rand(n)
        gp /= gp.sum()        
        gq = numpy.random.rand(n)
        gq /= gq.sum()     
        
        indPtr, colInds = SparseUtils.getOmegaListPtr(X)

        for s in range(numRuns):
            U = numpy.random.randn(m, k)
            V = numpy.random.randn(n, k)
            deltaU = numpy.zeros(U.shape)
            for i in range(X.shape[0]): 
                deltaU[i, :] = learner.derivativeUi(indPtr, colInds, U, V, gp, gq, i)      
    
            deltaU2 = numpy.zeros(U.shape) 
            eps = 10**-8         
            
            for i in range(m): 
                for j in range(k):
                    tempU = U.copy() 
                    tempU[i,j] += eps
                    obj1 = learner.objective(indPtr, colInds, indPtr, colInds, tempU, V, gp, gq)
                    
                    tempU = U.copy() 
                    tempU[i,j] -= eps
                    obj2 = learner.objective(indPtr, colInds, indPtr, colInds, tempU, V, gp, gq)
                    
                    deltaU2[i,j] = (obj1-obj2)/(2*eps)
    
                #deltaU2[i,:] = deltaU2[i,:]/numpy.linalg.norm(deltaU2[i,:])
            
            #print(deltaU*100)
            #print(deltaU2*100)
            nptst.assert_almost_equal(deltaU, deltaU2, 3)
        
        #Try r != 0 and rho > 0
        for s in range(numRuns):
            U = numpy.random.randn(m, k)
            V = numpy.random.randn(n, k)
            learner.rho = 0.1
            
            deltaU = numpy.zeros(U.shape)
            for i in range(X.shape[0]): 
                deltaU[i, :] = learner.derivativeUi(indPtr, colInds, U, V, gp, gq, i)
            
            deltaU2 = numpy.zeros(U.shape) 
            eps = 10**-9        
            
            for i in range(m): 
                for j in range(k):
                    tempU = U.copy() 
                    tempU[i,j] += eps
                    obj1 = learner.objective(indPtr, colInds, indPtr, colInds, tempU, V, gp, gq)
                    
                    tempU = U.copy() 
                    tempU[i,j] -= eps
                    obj2 = learner.objective(indPtr, colInds, indPtr, colInds, tempU, V, gp, gq)
                    
                    deltaU2[i,j] = (obj1-obj2)/(2*eps)
                                
            nptst.assert_almost_equal(deltaU, deltaU2, 3)
        
        #Try lmbda > 0
        
        for s in range(numRuns):
            U = numpy.random.randn(m, k)
            V = numpy.random.randn(n, k)
            learner.lmbdaU = 0.5
            
            deltaU = numpy.zeros(U.shape)
            for i in range(X.shape[0]): 
                deltaU[i, :] = learner.derivativeUi(indPtr, colInds, U, V, gp, gq, i) 
            
            deltaU2 = numpy.zeros(U.shape) 
            eps = 10**-9        
            
            for i in range(m): 
                for j in range(k):
                    tempU = U.copy() 
                    tempU[i,j] += eps
                    obj1 = learner.objective(indPtr, colInds, indPtr, colInds, tempU, V, gp, gq)
                    
                    tempU = U.copy() 
                    tempU[i,j] -= eps
                    obj2 = learner.objective(indPtr, colInds, indPtr, colInds, tempU, V, gp, gq)
                    
                    deltaU2[i,j] = (obj1-obj2)/(2*eps)
                                
            nptst.assert_almost_equal(deltaU, deltaU2, 3)
コード例 #45
0
learningRateParams = [(4.0, 1.0), (4.0, 0.5), (4.0, 0.1), (1.0, 1.0), (1.0, 0.5), (1.0, 0.1), (0.25, 1.0), (0.25, 0.5), (0.25, 0.1)]
print(startAverages)

def computeTestObj(args): 
    trainX, maxLocalAuc  = args 
    numpy.random.seed(21)
    U, V, trainMeasures, testMeasures, iterations, totalTime = maxLocalAuc.learnModel(trainX, verbose=True)
    return U, V, trainMeasures[-1, 0], testMeasures[-1, 0]

if saveResults:    
    trainObjectives = numpy.zeros((startAverages.shape[0], len(learningRateParams)))
    testObjectives = numpy.zeros((startAverages.shape[0], len(learningRateParams)))
    
    for trainX, testX in trainTestXs: 
        trainOmegaPtr = SparseUtils.getOmegaListPtr(trainX)
        testOmegaPtr = SparseUtils.getOmegaListPtr(testX)
        allOmegaPtr = SparseUtils.getOmegaListPtr(X)
        logging.debug("Number of non-zero elements: " + str((trainX.nnz, testX.nnz)))        
        
        paramList = []      
        
        for j, startAverage in enumerate(startAverages): 
            for i, (alpha, t0) in enumerate(learningRateParams):
                maxLocalAuc.startAverage = startAverage
                maxLocalAuc.alpha = alpha 
                maxLocalAuc.t0 = t0
                logging.debug(maxLocalAuc)
                
                learner = maxLocalAuc.copy()
                paramList.append((trainX, learner))
コード例 #46
0
    def testObjectiveApprox(self): 
        """
        We'll test the case in which we apprormate using a large number of samples 
        for the AUC and see if we get close to the exact objective 
        """
        m = 20 
        n = 30 
        k = 3 
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)
        
        learner = MaxAUCSigmoid(k)
        learner.normalise = False
        learner.lmbdaU = 0
        learner.lmbdaV = 0
        learner.rho = 1.0
        learner.numAucSamples = n
        
        indPtr, colInds = SparseUtils.getOmegaListPtr(X)

        U = numpy.random.rand(X.shape[0], k)
        V = numpy.random.rand(X.shape[1], k)
        
        numRuns = 100 
        numTests = 5
        
        gi = numpy.random.rand(m)
        gi /= gi.sum()        
        gp = numpy.random.rand(n)
        gp /= gp.sum()        
        gq = numpy.random.rand(n)
        gq /= gq.sum()
        #gi = numpy.ones(m)
        #gp = numpy.ones(n)
        #gq = numpy.ones(n)

        #Let's compare against using the exact derivative 
        for i in range(numTests): 
            obj = 0

            for j in range(numRuns): 
                obj += learner.objectiveApprox(indPtr, colInds, indPtr, colInds, U, V, gp, gq)
            obj /= numRuns

            obj2 = learner.objective(indPtr, colInds, indPtr, colInds, U, V, gp, gq)    
            self.assertAlmostEquals(obj, obj2, 2)
            
        learner.rho = 0.2

        for i in range(numTests): 
            obj = 0
            for j in range(numRuns): 
                obj += learner.objectiveApprox(indPtr, colInds, indPtr, colInds, U, V, gp, gq)
            obj /= numRuns
            
            obj2 = learner.objective(indPtr, colInds, indPtr, colInds, U, V, gp, gq)    
            self.assertAlmostEquals(obj, obj2, 2)

        learner.lmbdaV = 0.2

        for i in range(numTests): 
            obj = 0
            for j in range(numRuns): 
                obj += learner.objectiveApprox(indPtr, colInds, indPtr, colInds, U, V, gp, gq)
            obj /= numRuns
            
            obj2 = learner.objective(indPtr, colInds, indPtr, colInds, U, V, gp, gq)    
            self.assertAlmostEquals(obj, obj2, 2)
        
        #Check full and summary versions are the same 
        obj = learner.objective(indPtr, colInds, indPtr, colInds, U, V, gp, gq) 
        obj2 = learner.objective(indPtr, colInds, indPtr, colInds, U, V, gp, gq) 
        self.assertAlmostEquals(obj, obj2, 2)
コード例 #47
0
ファイル: MaxLocalAUC.py プロジェクト: kentwang/sandbox
    def singleLearnModel(self, X, verbose=False, U=None, V=None):
        """
        Max local AUC with Frobenius norm penalty on V. Solve with (stochastic) gradient descent. 
        The input is a sparse array. 
        """
        # Convert to a csarray for faster access
        if scipy.sparse.issparse(X):
            logging.debug("Converting to csarray")
            X2 = sppy.csarray(X, storagetype="row")
            X = X2

        m, n = X.shape

        # We keep a validation set in order to determine when to stop
        if self.validationUsers != 0:
            numValidationUsers = int(m * self.validationUsers)
            trainX, testX, rowSamples = Sampling.shuffleSplitRows(
                X, 1, self.validationSize, numRows=numValidationUsers
            )[0]

            testIndPtr, testColInds = SparseUtils.getOmegaListPtr(testX)

            logging.debug("Train X shape and nnz: " + str(trainX.shape) + " " + str(trainX.nnz))
            logging.debug("Validation X shape and nnz: " + str(testX.shape) + " " + str(testX.nnz))
        else:
            trainX = X
            testX = None
            rowSamples = None
            testIndPtr, testColInds = None, None

        # Note that to compute the test AUC we pick i \in X and j \notin X \cup testX
        indPtr, colInds = SparseUtils.getOmegaListPtr(trainX)
        allIndPtr, allColInds = SparseUtils.getOmegaListPtr(X)

        if type(U) != numpy.ndarray and type(V) != numpy.ndarray:
            U, V = self.initUV(trainX)

        if self.metric == "f1":
            metricInd = 2
        elif self.metric == "mrr":
            metricInd = 3
        else:
            raise ValueError("Unknown metric: " + self.metric)

        muU = U.copy()
        muV = V.copy()
        bestMetric = 0
        bestU = 0
        bestV = 0
        trainMeasures = []
        testMeasures = []
        loopInd = 0
        lastObj = 0
        currentObj = lastObj - 2 * self.eps

        # Try alternative number of iterations
        # numIterations = trainX.nnz/self.numAucSamples
        numIterations = max(m, n)

        self.learnerCython = self.getCythonLearner()

        # Set up order of indices for stochastic methods
        permutedRowInds = numpy.array(numpy.random.permutation(m), numpy.uint32)
        permutedColInds = numpy.array(numpy.random.permutation(n), numpy.uint32)

        startTime = time.time()

        gi, gp, gq = self.computeGipq(X)
        normGp, normGq = self.computeNormGpq(indPtr, colInds, gp, gq, m)

        while loopInd < self.maxIterations and abs(lastObj - currentObj) > self.eps:
            sigmaU = self.getSigma(loopInd, self.alpha, m)
            sigmaV = self.getSigma(loopInd, self.alpha, m)

            if loopInd % self.recordStep == 0:
                if loopInd != 0 and self.stochastic:
                    print("")

                printStr = self.recordResults(
                    muU,
                    muV,
                    trainMeasures,
                    testMeasures,
                    loopInd,
                    rowSamples,
                    indPtr,
                    colInds,
                    testIndPtr,
                    testColInds,
                    allIndPtr,
                    allColInds,
                    gi,
                    gp,
                    gq,
                    trainX,
                    startTime,
                )
                logging.debug(printStr)

                if testIndPtr is not None and testMeasures[-1][metricInd] >= bestMetric:
                    bestMetric = testMeasures[-1][metricInd]
                    logging.debug("Current best metric=" + str(bestMetric))
                    bestU = muU.copy()
                    bestV = muV.copy()
                elif testIndPtr is None:
                    bestU = muU.copy()
                    bestV = muV.copy()

                # Compute objective averaged over last 5 recorded steps
                trainMeasuresArr = numpy.array(trainMeasures)
                lastObj = currentObj
                currentObj = numpy.mean(trainMeasuresArr[-5:, 0])

            U = numpy.ascontiguousarray(U)
            self.updateUV(
                indPtr,
                colInds,
                U,
                V,
                muU,
                muV,
                permutedRowInds,
                permutedColInds,
                gp,
                gq,
                normGp,
                normGq,
                loopInd,
                sigmaU,
                sigmaV,
                numIterations,
            )
            loopInd += 1

        # Compute quantities for last U and V
        totalTime = time.time() - startTime
        printStr = "\nFinished, time=" + str("%.1f" % totalTime) + " "
        printStr += self.recordResults(
            muU,
            muV,
            trainMeasures,
            testMeasures,
            loopInd,
            rowSamples,
            indPtr,
            colInds,
            testIndPtr,
            testColInds,
            allIndPtr,
            allColInds,
            gi,
            gp,
            gq,
            trainX,
            startTime,
        )
        printStr += " delta obj=" + "%.3e" % abs(lastObj - currentObj)
        logging.debug(printStr)

        self.U = bestU
        self.V = bestV
        self.gi = gi
        self.gp = gp
        self.gq = gq

        trainMeasures = numpy.array(trainMeasures)
        testMeasures = numpy.array(testMeasures)

        if verbose:
            return self.U, self.V, trainMeasures, testMeasures, loopInd, totalTime
        else:
            return self.U, self.V