def testPartialReconstructValsPQ(self): n = 10 Y = numpy.random.rand(n, n) U, s, V = numpy.linalg.svd(Y) V = V.T V = numpy.ascontiguousarray(V) rowInds, colInds = numpy.nonzero(Y) rowInds = numpy.array(rowInds, numpy.int32) colInds = numpy.array(colInds, numpy.int32) vals = SparseUtilsCython.partialReconstructValsPQ(rowInds, colInds, numpy.ascontiguousarray(U*s), V) X = numpy.reshape(vals, Y.shape) nptst.assert_almost_equal(X, Y) #Try just some indices density = 0.2 A = scipy.sparse.rand(n, n, density) inds = A.nonzero() rowInds = numpy.array(inds[0], numpy.int32) colInds = numpy.array(inds[1], numpy.int32) vals = SparseUtilsCython.partialReconstructValsPQ(rowInds, colInds, numpy.ascontiguousarray(U*s), V) for i in range(inds[0].shape[0]): j = inds[0][i] k = inds[1][i] self.assertAlmostEquals(vals[i], Y[j, k]) self.assertEquals(A.nnz, inds[0].shape[0])
def testPartialReconstructValsPQ2(self): numRuns = 10 for i in range(numRuns): m = numpy.random.randint(5, 50) n = numpy.random.randint(5, 50) Y = numpy.random.rand(m, n) U, s, V = numpy.linalg.svd(Y, full_matrices=0) V = V.T V = numpy.ascontiguousarray(V) rowInds, colInds = numpy.nonzero(Y) rowInds = numpy.array(rowInds, numpy.int32) colInds = numpy.array(colInds, numpy.int32) #print(U.shape, V.shape) vals = SparseUtilsCython.partialReconstructValsPQ(rowInds, colInds, numpy.ascontiguousarray(U*s), V) X = numpy.reshape(vals, Y.shape) nptst.assert_almost_equal(X, Y)
def generateMatrices(self): """ This function returns a list of 20 train/test matrices for incremental collaborative filtering. Each item in the list is (trainX, testX). """ numpy.random.seed(21) r = 50 U, s, V = SparseUtils.generateLowRank((self.endM, self.endN), r, normalise=False) self.startNumInds = self.pnz*self.startM*self.startN self.endNumInds = self.pnz*self.endM*self.endN if not self.nonUniform: inds = numpy.random.randint(0, self.endM*self.endN-1, self.endNumInds) else: logging.debug("Using non uniform dataset") inds = numpy.array(numpy.random.randn(self.endNumInds)*(self.endM*self.endN-1)/4 +(self.endM*self.endN-1)/2, numpy.int) inds = numpy.clip(inds, 0, (self.endM*self.endN-1)) inds = numpy.unique(inds) numpy.random.shuffle(inds) self.endNumInds = inds.shape[0] rowInds, colInds = numpy.unravel_index(inds, (self.endM, self.endN)) rowInds = numpy.array(rowInds, numpy.int32) colInds = numpy.array(colInds, numpy.int32) vals = SparseUtilsCython.partialReconstructValsPQ(rowInds, colInds, U*s, V) vals /= vals.std() vals += numpy.random.randn(vals.shape[0])*self.noise isTrainInd = numpy.array(numpy.random.rand(inds.shape[0]) <= self.trainSplit, numpy.bool) assert (self.trainSplit - isTrainInd.sum()/float(isTrainInd.shape[0])) XMaskTrain = scipy.sparse.csc_matrix((isTrainInd, (rowInds, colInds)), dtype=numpy.bool, shape=(self.endM, self.endN)) XMaskTest = scipy.sparse.csc_matrix((numpy.logical_not(isTrainInd), (rowInds, colInds)), dtype=numpy.bool, shape=(self.endM, self.endN)) #In the first phase, the matrices stay the same size but there are more nonzero #entries numMatrices = 10 stepList = numpy.linspace(self.startNumInds, self.endNumInds, numMatrices) trainXList = [] testXList = [] for i in range(numMatrices): currentVals = vals[0:stepList[i]] currentRowInds = rowInds[0:stepList[i]] currentColInds = colInds[0:stepList[i]] X = scipy.sparse.csc_matrix((currentVals, (currentRowInds, currentColInds)), dtype=numpy.float, shape=(self.endM, self.endN)) #print("pnz=" + str(X.nnz/float(X.shape[0]*X.shape[1]))) trainX = X.multiply(XMaskTrain)[0:self.startM, 0:self.startN] trainX.eliminate_zeros() trainX.prune() testX = X.multiply(XMaskTest)[0:self.startM, 0:self.startN] testX.eliminate_zeros() testX.prune() trainXList.append(trainX) testXList.append(testX) #Now we increase the size of matrix numMatrices = 10 mStepList = numpy.linspace(self.startM, self.endM, numMatrices) nStepList = numpy.linspace(self.startN, self.endN, numMatrices) X = scipy.sparse.csc_matrix((vals, (rowInds, colInds)), dtype=numpy.float, shape=(self.endM, self.endN)) for i in range(numMatrices): trainX = X.multiply(XMaskTrain)[0:mStepList[i], :][:, 0:nStepList[i]] trainX.eliminate_zeros() trainX.prune() testX = X.multiply(XMaskTest)[0:mStepList[i], :][:, 0:nStepList[i]] testX.eliminate_zeros() testX.prune() trainXList.append(trainX) testXList.append(testX) return trainXList, testXList