def testPartialOuterProduct(self): m = 15 n = 10 u = numpy.random.rand(m) v = numpy.random.rand(n) Y = numpy.outer(u, v) inds = numpy.nonzero(Y) rowInds = numpy.array(inds[0], numpy.int32) colInds = numpy.array(inds[1], numpy.int32) vals = SparseUtilsCython.partialOuterProduct(rowInds, colInds, u, v) X = numpy.reshape(vals, Y.shape) nptst.assert_almost_equal(X, Y) #Try just some indices density = 0.2 A = scipy.sparse.rand(n, n, density) inds = A.nonzero() rowInds = numpy.array(inds[0], numpy.int32) colInds = numpy.array(inds[1], numpy.int32) vals = SparseUtilsCython.partialOuterProduct(rowInds, colInds, u, v) for i in range(inds[0].shape[0]): j = inds[0][i] k = inds[1][i] self.assertAlmostEquals(vals[i], Y[j, k]) self.assertEquals(A.nnz, inds[0].shape[0])
def uncenter(X, mu1, mu2): """ Uncenter a matrix with mu1 and mu2, the row and columns means of the original matrix. X is the centered matrix. """ rowInds, colInds = X.nonzero() rowInds = numpy.array(rowInds, numpy.int32) colInds = numpy.array(colInds, numpy.int32) vals1 = SparseUtilsCython.partialOuterProduct(rowInds, colInds, numpy.array(mu1, numpy.float), numpy.ones(X.shape[1])) vals2 = SparseUtilsCython.partialOuterProduct(rowInds, colInds, numpy.ones(X.shape[0]), numpy.array(mu2, numpy.float)) X[rowInds, colInds] = X[rowInds, colInds] + vals1 + vals2 return X
def centerRows(X, mu=None, inds=None): """ Simply subtract the mean value of a row from each non-zero element. """ if inds == None: rowInds, colInds = X.nonzero() else: rowInds, colInds = inds rowInds = numpy.array(rowInds, numpy.int32) colInds = numpy.array(colInds, numpy.int32) if mu == None: #This is the mean of the nonzero values in each row nonZeroCounts = numpy.bincount(rowInds, minlength=X.shape[0]) inds = nonZeroCounts==0 nonZeroCounts += inds #This is required because when we do X.sum(1) for centering it uses the same #dtype as X to store the sum, and this can result in overflow for e.g. uint8 if X.dtype == numpy.uint8: sumCol = SparseUtilsCython.sumCols(rowInds, numpy.array(X[rowInds, colInds]).flatten(), X.shape[0]) else: sumCol = numpy.array(X.sum(1)).flatten() mu = sumCol/nonZeroCounts mu[inds] = 0 vals = SparseUtilsCython.partialOuterProduct(rowInds, colInds, numpy.array(mu, numpy.float), numpy.ones(X.shape[1])) X[X.nonzero()] = numpy.array(X[X.nonzero()] - vals, numpy.float) return X, mu
def centerCols(X, mu=None, inds=None): """ Simply subtract the mean value of a row from each non-zero element. """ if inds == None: rowInds, colInds = X.nonzero() else: rowInds, colInds = inds rowInds = numpy.array(rowInds, numpy.int32) colInds = numpy.array(colInds, numpy.int32) if mu == None: #This is the mean of the nonzero values in each col nonZeroCounts = numpy.bincount(colInds, minlength=X.shape[1]) inds = nonZeroCounts == 0 nonZeroCounts += inds mu = numpy.array(X.sum(0), numpy.float).ravel() / nonZeroCounts mu[inds] = 0 vals = SparseUtilsCython.partialOuterProduct( rowInds, colInds, numpy.ones(X.shape[0]), numpy.array(mu, numpy.float)) X[X.nonzero()] = numpy.array(X[X.nonzero()] - vals, numpy.float) return X, mu
def centerRows(X, mu=None, inds=None): """ Simply subtract the mean value of a row from each non-zero element. """ if inds == None: rowInds, colInds = X.nonzero() else: rowInds, colInds = inds rowInds = numpy.array(rowInds, numpy.int32) colInds = numpy.array(colInds, numpy.int32) if mu == None: #This is the mean of the nonzero values in each row nonZeroCounts = numpy.bincount(rowInds, minlength=X.shape[0]) inds = nonZeroCounts == 0 nonZeroCounts += inds #This is required because when we do X.sum(1) for centering it uses the same #dtype as X to store the sum, and this can result in overflow for e.g. uint8 if X.dtype == numpy.uint8: sumCol = SparseUtilsCython.sumCols( rowInds, numpy.array(X[rowInds, colInds]).flatten(), X.shape[0]) else: sumCol = numpy.array(X.sum(1)).flatten() mu = sumCol / nonZeroCounts mu[inds] = 0 vals = SparseUtilsCython.partialOuterProduct( rowInds, colInds, numpy.array(mu, numpy.float), numpy.ones(X.shape[1])) X[X.nonzero()] = numpy.array(X[X.nonzero()] - vals, numpy.float) return X, mu
def unshrink(self, X, U, V): """ Perform post-processing on a factorisation of a matrix X use factor vectors U and V. """ logging.debug("Post processing singular values") #Fix for versions of numpy < 1.7 inds = numpy.unique( numpy.random.randint( 0, X.data.shape[0], numpy.min([self.postProcessSamples, X.data.shape[0]]))) a = numpy.array(X[X.nonzero()]).ravel()[inds] B = numpy.zeros((a.shape[0], U.shape[1])) rowInds, colInds = X.nonzero() rowInds = numpy.array(rowInds[inds], numpy.int32) colInds = numpy.array(colInds[inds], numpy.int32) #Populate B for i in range(U.shape[1]): B[:, i] = SparseUtilsCython.partialOuterProduct( rowInds, colInds, U[:, i], V[:, i]) s = numpy.linalg.pinv(B.T.dot(B)).dot(B.T).dot(a) return s
def uncenter(X, mu1, mu2): """ Uncenter a matrix with mu1 and mu2, the row and columns means of the original matrix. X is the centered matrix. """ rowInds, colInds = X.nonzero() rowInds = numpy.array(rowInds, numpy.int32) colInds = numpy.array(colInds, numpy.int32) vals1 = SparseUtilsCython.partialOuterProduct( rowInds, colInds, numpy.array(mu1, numpy.float), numpy.ones(X.shape[1])) vals2 = SparseUtilsCython.partialOuterProduct( rowInds, colInds, numpy.ones(X.shape[0]), numpy.array(mu2, numpy.float)) X[rowInds, colInds] = X[rowInds, colInds] + vals1 + vals2 return X
def uncenterRows(X, mu): """ Take a matrix with rows centered using mu, and return them to their original state. Note that one should call X.eliminate_zeros() beforehand. """ if X.shape[0] != mu.shape[0]: raise ValueError("Invalid number of rows") rowInds, colInds = X.nonzero() rowInds = numpy.array(rowInds, numpy.int32) colInds = numpy.array(colInds, numpy.int32) vals = SparseUtilsCython.partialOuterProduct(rowInds, colInds, numpy.array(mu, numpy.float), numpy.ones(X.shape[1])) X[rowInds, colInds] = numpy.array(X[rowInds, colInds] + vals, numpy.float) return X
def uncenterRows(X, mu): """ Take a matrix with rows centered using mu, and return them to their original state. Note that one should call X.eliminate_zeros() beforehand. """ if X.shape[0] != mu.shape[0]: raise ValueError("Invalid number of rows") rowInds, colInds = X.nonzero() rowInds = numpy.array(rowInds, numpy.int32) colInds = numpy.array(colInds, numpy.int32) vals = SparseUtilsCython.partialOuterProduct( rowInds, colInds, numpy.array(mu, numpy.float), numpy.ones(X.shape[1])) X[rowInds, colInds] = numpy.array(X[rowInds, colInds] + vals, numpy.float) return X
def centerCols(X, mu=None, inds=None): """ Simply subtract the mean value of a row from each non-zero element. """ if inds == None: rowInds, colInds = X.nonzero() else: rowInds, colInds = inds rowInds = numpy.array(rowInds, numpy.int32) colInds = numpy.array(colInds, numpy.int32) if mu == None: #This is the mean of the nonzero values in each col nonZeroCounts = numpy.bincount(colInds, minlength=X.shape[1]) inds = nonZeroCounts==0 nonZeroCounts += inds mu = numpy.array(X.sum(0), numpy.float).ravel()/nonZeroCounts mu[inds] = 0 vals = SparseUtilsCython.partialOuterProduct(rowInds, colInds, numpy.ones(X.shape[0]), numpy.array(mu, numpy.float)) X[X.nonzero()] = numpy.array(X[X.nonzero()] - vals, numpy.float) return X, mu
def unshrink(self, X, U, V): """ Perform post-processing on a factorisation of a matrix X use factor vectors U and V. """ logging.debug("Post processing singular values") #Fix for versions of numpy < 1.7 inds = numpy.unique(numpy.random.randint(0, X.data.shape[0], numpy.min([self.postProcessSamples, X.data.shape[0]]))) a = numpy.array(X[X.nonzero()]).ravel()[inds] B = numpy.zeros((a.shape[0], U.shape[1])) rowInds, colInds = X.nonzero() rowInds = numpy.array(rowInds[inds], numpy.int32) colInds = numpy.array(colInds[inds], numpy.int32) #Populate B for i in range(U.shape[1]): B[:, i] = SparseUtilsCython.partialOuterProduct(rowInds, colInds, U[:, i], V[:, i]) s = numpy.linalg.pinv(B.T.dot(B)).dot(B.T).dot(a) return s