Example #1
0
    def testGenerateSparseBinaryMatrix(self):
        m = 5 
        n = 10 
        k = 3
        quantile = 0.7
        numpy.random.seed(21)
        X = SparseUtils.generateSparseBinaryMatrix((m,n), k, quantile)
        Xscipy = numpy.array(X.todense()) 
        
        nptst.assert_array_equal(numpy.array(X.sum(1)).flatten(), numpy.ones(m)*3)
        
        quantile = 0.0 
        X = SparseUtils.generateSparseBinaryMatrix((m,n), k, quantile)
        self.assertTrue(numpy.linalg.norm(X - numpy.ones((m,n))) < 1.1)
        #nptst.assert_array_almost_equal(X.todense(), numpy.ones((m,n)))
        
        quantile = 0.7
        numpy.random.seed(21)
        X = SparseUtils.generateSparseBinaryMatrix((m,n), k, quantile, csarray=True)
        Xcsarray = X.toarray()
        
        nptst.assert_array_equal(numpy.array(X.sum(1)).flatten(), numpy.ones(m)*3)
        
        quantile = 0.0 
        X = SparseUtils.generateSparseBinaryMatrix((m,n), k, quantile, csarray=True)
        self.assertTrue(numpy.linalg.norm(X.toarray() - numpy.ones((m,n))) < 1.1)
        #nptst.assert_array_almost_equal(X.toarray(), numpy.ones((m,n)))
        
        nptst.assert_array_equal(Xcsarray, Xscipy)
        
        #Test variation in the quantiles 
        w = 0.7
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), k, w, sd=0.1, csarray=True, verbose=True)
        
        Z = (U*s).dot(V.T)
        X2 = numpy.zeros((m, n))
        r2 = numpy.zeros(m)
        for i in range(m): 
            r2[i] = numpy.percentile(numpy.sort(Z[i, :]), wv[i]*100)
            X2[i, Z[i, :]>r2[i]] = 1 
        r = SparseUtilsCython.computeR2(U*s, V, wv)

        nptst.assert_array_almost_equal(X.toarray(), X2)
        nptst.assert_array_almost_equal(r, r2)
        
        #Test a larger standard deviation
        w = 0.7
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), k, w, sd=0.5, csarray=True, verbose=True)
        
        Z = (U*s).dot(V.T)
        X2 = numpy.zeros((m, n))
        r2 = numpy.zeros(m)
        for i in range(m): 
            r2[i] = numpy.percentile(numpy.sort(Z[i, :]), wv[i]*100)
            X2[i, Z[i, :]>=r2[i]] = 1 
        r = SparseUtilsCython.computeR2(U*s, V, wv)

        nptst.assert_array_almost_equal(X.toarray(), X2)
        nptst.assert_array_almost_equal(r, r2)
Example #2
0
 def generateSparseBinaryMatrix(shape, p, w=0.9, sd=0, csarray=False, verbose=False, indsPerRow=50):
     """
     Create an underlying matrix Z = UsV.T of rank p and then go through each row 
     and threshold so that a proportion quantile numbers are kept. The final matrix 
     is a 0/1 matrix. We order each row of Z in ascending order and then keep those bigger 
     than u. In other words w=0 keeps all numbers and w=1.0 keeps none. 
     """
     m, n = shape
     U, s, V = SparseUtils.generateLowRank(shape, p)
     
     X = (U*s).dot(V.T)
     
     wv = numpy.random.randn(m)*sd + w
     wv = numpy.clip(wv, 0, 1)
     r = SparseUtilsCython.computeR2((U*s), V, wv, indsPerRow=indsPerRow)
     
     for i in range(m):
         X[i, X[i, :] >= r[i]] = 1
         X[i, X[i, :] < r[i]] = 0
     
     if csarray:
         import sppy
         X = sppy.csarray(X, storagetype="row")
     else:
         X = scipy.sparse.csr_matrix(X)
         
     if verbose: 
         return X, U, s, V, wv 
     else: 
         return X
Example #3
0
    def testComputeR2(self): 
        m = 10 
        n = 15
        U = numpy.random.rand(m, 5)
        V = numpy.random.rand(n, 5)
        
        Z = U.dot(V.T)
        
        w = numpy.ones(m)*1.0
        r = SparseUtilsCython.computeR2(U, V, w, indsPerRow=1000)
               
        tol = 0.1
        self.assertTrue(numpy.linalg.norm(Z.max(1) - r)/numpy.linalg.norm(Z.max(1)) < tol)
        
        w =  numpy.zeros(m)
        r = SparseUtilsCython.computeR2(U, V, w, indsPerRow=1000)
        self.assertTrue(numpy.linalg.norm(Z.min(1) - r)/numpy.linalg.norm(Z.min(1)) < tol)
        
        w = numpy.zeros(m)
        w[5:10] = 1
        r = SparseUtilsCython.computeR2(U, V, w, indsPerRow=1000)
        self.assertTrue(numpy.linalg.norm(Z[0:5, :].min(1) - r[0:5])/numpy.linalg.norm(Z[0:5, :].min(1)) < tol)
        self.assertTrue(numpy.linalg.norm(Z[5:, :].max(1) - r[5:])/numpy.linalg.norm(Z[5:, :].min(1)) < tol)
        
        w =  numpy.ones(m)*0.3
        r = SparseUtilsCython.computeR2(U, V, w, indsPerRow=1000) 
        r2 = numpy.zeros(m)
        for i in range(m): 
            r2[i] = numpy.percentile(Z[i, :], w[i]*100.0)
        self.assertTrue(numpy.linalg.norm(r2 - r)/numpy.linalg.norm(r2) < tol)
        
        w =  numpy.random.rand(m)
        r = SparseUtilsCython.computeR2(U, V, w) 
        r2 = numpy.zeros(m)

        for i in range(m): 
            r2[i] = numpy.percentile(Z[i, :], w[i]*100.0)
        self.assertTrue(numpy.linalg.norm(r2 - r)/numpy.linalg.norm(r2) < tol)       
        
        #Try a larger matrix 
        m = 100 
        n = 105
        U = numpy.random.rand(m, 5)
        V = numpy.random.rand(n, 5)
        
        Z = U.dot(V.T)
        w =  numpy.random.rand(m)
        r = SparseUtilsCython.computeR2(U, V, w, indsPerRow=10000) 
        r2 = numpy.zeros(m) 
        for i in range(m): 
            r2[i] = numpy.percentile(Z[i, :], w[i]*100.0)
        
        self.assertTrue(numpy.linalg.norm(r-r2) < 0.4)
Example #4
0
    def generateSparseBinaryMatrix(shape,
                                   p,
                                   w=0.9,
                                   sd=0,
                                   csarray=False,
                                   verbose=False,
                                   indsPerRow=50):
        """
        Create an underlying matrix Z = UsV.T of rank p and then go through each row 
        and threshold so that a proportion quantile numbers are kept. The final matrix 
        is a 0/1 matrix. We order each row of Z in ascending order and then keep those bigger 
        than u. In other words w=0 keeps all numbers and w=1.0 keeps none. 
        """
        m, n = shape
        U, s, V = SparseUtils.generateLowRank(shape, p)

        X = (U * s).dot(V.T)

        wv = numpy.random.randn(m) * sd + w
        wv = numpy.clip(wv, 0, 1)
        r = SparseUtilsCython.computeR2((U * s), V, wv, indsPerRow=indsPerRow)

        for i in range(m):
            X[i, X[i, :] >= r[i]] = 1
            X[i, X[i, :] < r[i]] = 0

        if csarray:
            import sppy
            X = sppy.csarray(X, storagetype="row")
        else:
            X = scipy.sparse.csr_matrix(X)

        if verbose:
            return X, U, s, V, wv
        else:
            return X
Example #5
0
    def testGenerateSparseBinaryMatrix(self):
        m = 5
        n = 10
        k = 3
        quantile = 0.7
        numpy.random.seed(21)
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, quantile)
        Xscipy = numpy.array(X.todense())

        nptst.assert_array_equal(
            numpy.array(X.sum(1)).flatten(),
            numpy.ones(m) * 3)

        quantile = 0.0
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, quantile)
        self.assertTrue(numpy.linalg.norm(X - numpy.ones((m, n))) < 1.1)
        #nptst.assert_array_almost_equal(X.todense(), numpy.ones((m,n)))

        quantile = 0.7
        numpy.random.seed(21)
        X = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                   k,
                                                   quantile,
                                                   csarray=True)
        Xcsarray = X.toarray()

        nptst.assert_array_equal(
            numpy.array(X.sum(1)).flatten(),
            numpy.ones(m) * 3)

        quantile = 0.0
        X = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                   k,
                                                   quantile,
                                                   csarray=True)
        self.assertTrue(
            numpy.linalg.norm(X.toarray() - numpy.ones((m, n))) < 1.1)
        #nptst.assert_array_almost_equal(X.toarray(), numpy.ones((m,n)))

        nptst.assert_array_equal(Xcsarray, Xscipy)

        #Test variation in the quantiles
        w = 0.7
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                k,
                                                                w,
                                                                sd=0.1,
                                                                csarray=True,
                                                                verbose=True)

        Z = (U * s).dot(V.T)
        X2 = numpy.zeros((m, n))
        r2 = numpy.zeros(m)
        for i in range(m):
            r2[i] = numpy.percentile(numpy.sort(Z[i, :]), wv[i] * 100)
            X2[i, Z[i, :] > r2[i]] = 1
        r = SparseUtilsCython.computeR2(U * s, V, wv)

        nptst.assert_array_almost_equal(X.toarray(), X2)
        nptst.assert_array_almost_equal(r, r2)

        #Test a larger standard deviation
        w = 0.7
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                k,
                                                                w,
                                                                sd=0.5,
                                                                csarray=True,
                                                                verbose=True)

        Z = (U * s).dot(V.T)
        X2 = numpy.zeros((m, n))
        r2 = numpy.zeros(m)
        for i in range(m):
            r2[i] = numpy.percentile(numpy.sort(Z[i, :]), wv[i] * 100)
            X2[i, Z[i, :] >= r2[i]] = 1
        r = SparseUtilsCython.computeR2(U * s, V, wv)

        nptst.assert_array_almost_equal(X.toarray(), X2)
        nptst.assert_array_almost_equal(r, r2)