def testStratifiedRecallAtk(self): m = 20 n = 50 r = 3 alpha = 1 X, U, V = SparseUtilsCython.generateSparseBinaryMatrixPL((m, n), r, density=0.2, alpha=alpha, csarray=True) itemCounts = numpy.array(X.sum(0) + 1, numpy.int32) (indPtr, colInds) = X.nonzeroRowsPtr() indPtr = numpy.array(indPtr, numpy.uint32) colInds = numpy.array(colInds, numpy.uint32) k = 5 orderedItems = numpy.random.randint(0, n, m * k) orderedItems = numpy.reshape(orderedItems, (m, k)) orderedItems = numpy.array(orderedItems, numpy.int32) beta = 0.5 recalls, denominators = MCEvaluatorCython.stratifiedRecallAtk( indPtr, colInds, orderedItems, itemCounts, beta) recalls2 = numpy.zeros(m) #Now compute recalls from scratch for i in range(m): omegai = colInds[indPtr[i]:indPtr[i + 1]] numerator = 0 for j in range(k): if orderedItems[i, j] in omegai: numerator += 1 / itemCounts[orderedItems[i, j]]**beta denominator = 0 for j in omegai: denominator += 1 / itemCounts[j]**beta recalls2[i] = numerator / denominator nptst.assert_array_equal(recalls, recalls2) #Now try to match with normal recall itemCounts = numpy.ones(n, numpy.int32) recalls, denominators = MCEvaluatorCython.stratifiedRecallAtk( indPtr, colInds, orderedItems, itemCounts, beta) recalls2 = MCEvaluatorCython.recallAtk(indPtr, colInds, orderedItems) nptst.assert_array_equal(recalls, recalls2)
def testStratifiedRecallAtk(self): m = 20 n = 50 r = 3 alpha = 1 X, U, V = SparseUtilsCython.generateSparseBinaryMatrixPL((m,n), r, density=0.2, alpha=alpha, csarray=True) itemCounts = numpy.array(X.sum(0)+1, numpy.int32) (indPtr, colInds) = X.nonzeroRowsPtr() indPtr = numpy.array(indPtr, numpy.uint32) colInds = numpy.array(colInds, numpy.uint32) k = 5 orderedItems = numpy.random.randint(0, n, m*k) orderedItems = numpy.reshape(orderedItems, (m, k)) orderedItems = numpy.array(orderedItems, numpy.int32) beta = 0.5 recalls, denominators = MCEvaluatorCython.stratifiedRecallAtk(indPtr, colInds, orderedItems, itemCounts, beta) recalls2 = numpy.zeros(m) #Now compute recalls from scratch for i in range(m): omegai = colInds[indPtr[i]:indPtr[i+1]] numerator = 0 for j in range(k): if orderedItems[i, j] in omegai: numerator += 1/itemCounts[orderedItems[i, j]]**beta denominator = 0 for j in omegai: denominator += 1/itemCounts[j]**beta recalls2[i] = numerator/denominator nptst.assert_array_equal(recalls, recalls2) #Now try to match with normal recall itemCounts = numpy.ones(n, numpy.int32) recalls, denominators = MCEvaluatorCython.stratifiedRecallAtk(indPtr, colInds, orderedItems, itemCounts, beta) recalls2 = MCEvaluatorCython.recallAtk(indPtr, colInds, orderedItems) nptst.assert_array_equal(recalls, recalls2)
def testGenerateSparseBinaryMatrixPL(self): m = 200 n = 100 k = 3 density = 0.1 numpy.random.seed(21) X, U, V = SparseUtilsCython.generateSparseBinaryMatrixPL((m,n), k, density=density, csarray=True) #Just check that the distributions are roughtly power law print(numpy.histogram(X.sum(0))) print(numpy.histogram(X.sum(1))) self.assertAlmostEqual(X.nnz/float(m*n), density, 2) self.assertEquals(X.shape, (m, n))
import os import sys import sppy.io import numpy import logging from sandbox.util.SparseUtilsCython import SparseUtilsCython from sandbox.util.SparseUtils import SparseUtils from sandbox.util.PathDefaults import PathDefaults logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) numpy.random.seed(21) m = 600 n = 300 k = 8 density = 0.1 X, U, V = SparseUtilsCython.generateSparseBinaryMatrixPL((m,n), k, density=density, alpha=1, csarray=True) X = SparseUtils.pruneMatrixRows(X, minNnzRows=10) resultsDir = PathDefaults.getDataDir() + "syntheticRanking/" if not os.path.exists(resultsDir): os.mkdir(resultsDir) matrixFileName = resultsDir + "dataset1.mtx" sppy.io.mmwrite(matrixFileName, X) logging.debug("Non-zero elements: " + str(X.nnz) + " shape: " + str(X.shape)) logging.debug("Saved file: " + matrixFileName)