Example #1
0
def solve(c,G,h,dims,A=None,b=None, **kwargs):
    """ This Python routine "unpacks" scipy sparse matrices G and A into the
        data structures that we need for calling ECOS' csolve routine.

        If G and h are both None, then we will automatically create an "empty"
        CSC matrix to use with ECOS.

        It is *not* compatible with CVXOPT spmatrix and matrix, although
        it would not be very difficult to make it compatible. We put the
        onus on the user to convert CVXOPT matrix types into numpy, scipy
        array types.
    """
    if G is not None and not sparse.issparse(G):
        raise TypeError("G is required to be a sparse matrix")
    if A is not None and not sparse.issparse(A):
        raise TypeError("A is required to be a sparse matrix")

    if G is not None and not sparse.isspmatrix_csc(G):
        warn("Converting G to a CSC matrix; may take a while.")
        G = G.tocsc()
    if A is not None and not sparse.isspmatrix_csc(A):
        warn("Converting A to a CSC matrix; may take a while.")
        A = A.tocsc()

    # set the dimensions
    # note that we forcibly coerce the shape values to Python ints
    # (C longs) in case of shenanigans with the underlying storage
    m,n1 = (0,len(c)) if G is None else map(int, G.get_shape())
    p,n2 = (0,n1) if A is None else map(int, A.shape)

    if n1 != n2:
        raise TypeError("Columns of A and G don't match")


    # G.sort_indices() # ECHU: performance hit? do we need this?
    # if A is not None: A.sort_indices()

    if (G is None and h is not None) or (G is not None and h is None):
      raise TypeError("G and h must be supplied together")

    if (A is None and b is not None) or (A is not None and b is None):
      raise TypeError("A and b must be supplied together")

    if G is None:
        data = np.zeros((0,),dtype=np.double)
        indices = np.zeros((0,),dtype=np.int)
        colptr = np.zeros((n1+1,),dtype=np.int)
        h = np.zeros((0,))
    else:
        data, indices, colptr = G.data, G.indices, G.indptr

    
    if A is None:
    	print("ecopy1")
	return _ecos.csolve((m,n1,p), c, data, indices, colptr, h, dims, **kwargs)
    else:
	print("ecopy2")
        r = _ecos.csolve((m,n1,p), c, data, indices, colptr, h, dims, A.data, A.indices, A.indptr, b, **kwargs)
	return r
Example #2
0
def spsolve(A, b, permc_spec=None, use_umfpack=True):
    """Solve the sparse linear system Ax=b
    """
    if isspmatrix( b ):
        b = b.toarray()

    if b.ndim > 1:
        if max( b.shape ) == b.size:
            b = b.squeeze()
        else:
            raise ValueError("rhs must be a vector (has shape %s)" % (b.shape,))

    if not (isspmatrix_csc(A) or isspmatrix_csr(A)):
        A = csc_matrix(A)
        warn('spsolve requires CSC or CSR matrix format', SparseEfficiencyWarning)

    A.sort_indices()
    A = A.asfptype()  #upcast to a floating point format

    M, N = A.shape
    if (M != N):
        raise ValueError("matrix must be square (has shape %s)" % ((M, N),))
    if M != b.size:
        raise ValueError("matrix - rhs size mismatch (%s - %s)"
              % (A.shape, b.size))

    use_umfpack = use_umfpack and useUmfpack

    if isUmfpack and use_umfpack:
        if noScikit:
            warn( 'scipy.sparse.linalg.dsolve.umfpack will be removed,'
                    ' install scikits.umfpack instead', DeprecationWarning )
        if A.dtype.char not in 'dD':
            raise ValueError("convert matrix data to double, please, using"
                  " .astype(), or set linsolve.useUmfpack = False")

        b = asarray(b, dtype=A.dtype).reshape(-1)

        family = {'d' : 'di', 'D' : 'zi'}
        umf = umfpack.UmfpackContext( family[A.dtype.char] )
        return umf.linsolve( umfpack.UMFPACK_A, A, b,
                             autoTranspose = True )

    else:
        if isspmatrix_csc(A):
            flag = 1 # CSC format
        elif isspmatrix_csr(A):
            flag = 0 # CSR format
        else:
            A = csc_matrix(A)
            flag = 1

        b = asarray(b, dtype=A.dtype)
        options = dict(ColPerm=permc_spec)
        return _superlu.gssv(N, A.nnz, A.data, A.indices, A.indptr, b, flag,
                             options=options)[0]
Example #3
0
def solve(c, G, h, dims, A=None, b=None, verbose=True):
    """ This Python routine "unpacks" scipy sparse matrices G and A into the
        data structures that we need for calling ECOS' csolve routine.
        
        If G and h are both None, then we will automatically create an "empty"
        CSC matrix to use with ECOS.
        
        It is *not* compatible with CVXOPT spmatrix and matrix, although
        it would not be very difficult to make it compatible. We put the
        onus on the user to convert CVXOPT matrix types into numpy, scipy
        array types.
    """
    if G is not None and not sparse.issparse(G):
        raise TypeError("G is required to be a sparse matrix")
    if A is not None and not sparse.issparse(A):
        raise TypeError("A is required to be a sparse matrix")

    if G is not None and not sparse.isspmatrix_csc(G):
        warn("Converting G to a CSC matrix; may take a while.")
        G = G.tocsc()
    if A is not None and not sparse.isspmatrix_csc(A):
        warn("Converting A to a CSC matrix; may take a while.")
        A = A.tocsc()

    if G is None:
        m, n1 = 0, len(c)
    else:
        m, n1 = G.shape
    if A is None:
        p, n2 = 0, n1
    else:
        p, n2 = A.shape

    if n1 != n2:
        raise TypeError("Columns of A and G don't match")

    # G.sort_indices() # ECHU: performance hit? do we need this?
    # if A is not None: A.sort_indices()

    if G is None:
        if h is not None:
            raise TypeError("G and h must be supplied together")
        data = np.zeros((0,), dtype=np.double)
        indices = np.zeros((0,), dtype=np.int)
        colptr = np.zeros((n1 + 1,), dtype=np.int)
        h = np.zeros((0,))
    else:
        data, indices, colptr = G.data, G.indices, G.indptr

    if A is None:
        if b is not None:
            raise TypeError("A and b must be supplied together")
        return _ecos.csolve((m, n1, p), c, data, indices, colptr, h, dims, verbose=verbose)
    else:
        return _ecos.csolve((m, n1, p), c, data, indices, colptr, h, dims, A.data, A.indices, A.indptr, b, verbose)
Example #4
0
    def _getIndx(self, mtx):

        if sp.isspmatrix_csc(mtx):
            indx = mtx.indices
            self.isCSR = 0
        elif sp.isspmatrix_csr(mtx):
            indx = mtx.indices
            self.isCSR = 1
        else:
            raise TypeError("must be a CSC/CSR matrix (is %s)" % mtx.__class__)

        ##
        # Should check types of indices to correspond to familyTypes.
        if self.family[1] == "i":
            if (indx.dtype != np.dtype("i")) or mtx.indptr.dtype != np.dtype("i"):
                raise ValueError("matrix must have int indices")
        else:
            if (indx.dtype != np.dtype("l")) or mtx.indptr.dtype != np.dtype("l"):
                raise ValueError("matrix must have long indices")

        if self.isReal:
            if mtx.data.dtype != np.dtype("f8"):
                raise ValueError("matrix must have float64 values")
        else:
            if mtx.data.dtype != np.dtype("c16"):
                raise ValueError("matrix must have complex128 values")

        return indx
    def PreparePageRank(self, r, a, c):
        """ inititalizes Pagerank """
        
        A = self.get(a)
        colsum = self.get(c)

        # do A = I - p*A, p = 0.85
        # we want to work with columns
        A = A.tocsc()
        if sparse.isspmatrix_csc(A):
            for j in np.arange(colsum.size):
                if colsum[j] != 0:
                    # divide all elements in that column by colsum[j]:
                    ptr1 = A.indptr[j]
                    ptr2 = A.indptr[j+1]
                    A.data[ptr1:ptr2] /= colsum[j]
        
            p = 0.85
            A = -p*A
        
            # Add 1 to all elements on diaginal:
            A = A.tolil() # because making structural changes to lil_matrix is more efficient
            row = 0
            for col in range(self.rows[0], self.rows[1]):
                A[row, col] += 1.0
                row += 1
        
        self.set(r, A.tocsr())
Example #6
0
def sparse_matrix_to_hdf(obj, path, name):
	if (sparse.isspmatrix_csr(obj) or sparse.isspmatrix_csc(obj)):
		sparse_csx_matrix_to_hdf(obj, path, name)
	elif (sparse.isspmatrix_coo(obj)):
		sparse_coo_matrix_to_hdf(obj, path, name)
	else:
		raise ValueError('Type {} not yet supported for serialisation!'.format(type(obj)))
    def __init__(self, A):
        if not (isspmatrix_csc(A) or isspmatrix_csr(A)):
            A = csc_matrix(A)
            warn('spsolve requires A be CSC or CSR matrix format',
                    SparseEfficiencyWarning)

        A.sort_indices()
        A = A.asfptype()  # upcast to a floating point format

        M, N = A.shape
        if (M != N):
            raise ValueError("matrix must be square (has shape %s)" % ((M, N),))

        f_type = np.sctypeDict[A.dtype.name]
        i_type = np.sctypeDict[A.indices.dtype.name]
        try:
            family = _families[(f_type, i_type)]

        except KeyError:
            msg = 'only float64 or complex128 matrices with int32 or int64' \
                ' indices are supported! (got: matrix: %s, indices: %s)' \
                % (f_type, i_type)
            raise ValueError(msg)

        self.umf = UmfpackContext(family)
        self.umf.numeric(A)

        self._A = A
        self._L = None
        self._U = None
        self._P = None
        self._Q = None
        self._R = None
Example #8
0
def breadth_first_search(A, start):
    """
    Breadth-First-Search (BFS) of a graph in CSR or CSC matrix format starting
    from a given node (row).  Takes Qobjs and CSR or CSC matrices as inputs.

    This function requires a matrix with symmetric structure.
    Use A+trans(A) if original matrix is not symmetric or not sure.

    Parameters
    ----------
    A : csc_matrix, csr_matrix
        Input graph in CSC or CSR matrix format
    start : int
        Staring node for BFS traversal.

    Returns
    -------
    order : array
        Order in which nodes are traversed from starting node.
    levels : array
        Level of the nodes in the order that they are traversed.

    """
    if not (sp.isspmatrix_csc(A) or sp.isspmatrix_csr(A)):
        raise TypeError('Input must be CSC or CSR sparse matrix.')

    num_rows = A.shape[0]
    start = int(start)
    order, levels = _breadth_first_search(A.indices, A.indptr, num_rows, start)
    # since maybe not all nodes are in search, check for unused entires in
    # arrays
    return order[order != -1], levels[levels != -1]
Example #9
0
    def inverse_transform(self, X):
        """Return terms per document with nonzero entries in X.

        Parameters
        ----------
        X : {array, sparse matrix}, shape = [n_samples, n_features]

        Returns
        -------
        X_inv : list of arrays, len = n_samples
            List of arrays of terms.
        """
        if sp.isspmatrix_coo(X) or sp.isspmatrix_csc(X):
            # COO matrix is not indexable, CSC is slow for row manipulations
            X = X.tocsr()
        elif not sp.issparse(X):
            # We need to convert X to a matrix, so that the indexing
            # returns 2D objects
            X = np.asmatrix(X)
        n_samples = X.shape[0]

        terms = np.array(list(self.vocabulary_.keys()))
        indices = np.array(list(self.vocabulary_.values()))
        inverse_vocabulary = terms[np.argsort(indices)]

        return [inverse_vocabulary[X[i, :].nonzero()[1]].ravel()
                for i in range(n_samples)]
Example #10
0
    def _getIndx(self, mtx):

        if sp.isspmatrix_csc(mtx):
            indx = mtx.indices
            self.isCSR = 0
        elif sp.isspmatrix_csr(mtx):
            indx = mtx.indices
            self.isCSR = 1
        else:
            raise TypeError('must be a CSC/CSR matrix (is %s)' % mtx.__class__)

        ##
        # Should check types of indices to correspond to familyTypes.
        if self.family[1] == 'i':
            if (indx.dtype != np.dtype('i')) \
                   or mtx.indptr.dtype != np.dtype('i'):
                raise ValueError('matrix must have int indices')
        else:
            if (indx.dtype != np.dtype('l')) \
                   or mtx.indptr.dtype != np.dtype('l'):
                raise ValueError('matrix must have long indices')

        if self.isReal:
            if mtx.data.dtype != np.dtype('f8'):
                raise ValueError('matrix must have float64 values')
        else:
            if mtx.data.dtype != np.dtype('c16'):
                raise ValueError('matrix must have complex128 values')

        return indx
Example #11
0
 def init(self, data, attr, bpr_k=None, bpr_args=None, bpr_model=None):
     assert sp.isspmatrix_csc(data)
     self.data = data
     self.num_users, self.num_items = data.shape
     self.attr = attr
     assert attr.shape[0] >= self.num_items
     _, self.num_attrs = attr.shape
     if bpr_model == None:
         self.bpr_k = [self.num_users / 5, bpr_k][bpr_k != None]
         if bpr_args == None:
             self.bpr_args = bpr.BPRArgs(0.01, 1.0, 0.02125, 0.00355, 0.00355)
         else:
             self.bpr_args = bpr_args
         self.bpr_model = bpr.BPR(self.bpr_k, self.bpr_args)
     else:
         self.bpr_model = bpr_model
         self.bpr_k = bpr_model.D
         self.bpr_args = bpr.BPRArgs(
             bpr_model.learning_rate,
             bpr_model.bias_regularization,
             bpr_model.user_regularization,
             bpr_model.positive_item_regularization,
             bpr_model.negative_item_regularization,
             bpr_model.update_negative_item_factors,
         )
     self.sampler = bpr.UniformUserUniformItem()
Example #12
0
def splu(A, permc_spec=2, diag_pivot_thresh=1.0,
         drop_tol=0.0, relax=1, panel_size=10):
    """
    A linear solver, for a sparse, square matrix A, using LU decomposition where
    L is a lower triangular matrix and U is an upper triagular matrix.

    Returns a factored_lu object. (scipy.sparse.linalg.dsolve._superlu.SciPyLUType)

    See scipy.sparse.linalg.dsolve._superlu.dgstrf for more info.
    """

    if not isspmatrix_csc(A):
        A = csc_matrix(A)
        warn('splu requires CSC matrix format', SparseEfficiencyWarning)

    A.sort_indices()
    A = A.asfptype()  #upcast to a floating point format

    M, N = A.shape
    if (M != N):
        raise ValueError, "can only factor square matrices" #is this true?

    ftype = superLU_transtabl[A.dtype.char]

    gstrf = eval('_superlu.' + ftype + 'gstrf')
    return gstrf(N, A.nnz, A.data, A.indices, A.indptr, permc_spec,
                 diag_pivot_thresh, drop_tol, relax, panel_size)
Example #13
0
    def __init__(self, A):
        if not (isspmatrix_csc(A) or isspmatrix_csr(A)):
            A = csc_matrix(A)
            warn('spsolve requires A be CSC or CSR matrix format',
                    SparseEfficiencyWarning)

        A.sort_indices()
        A = A.asfptype()  # upcast to a floating point format

        M, N = A.shape
        if (M != N):
            raise ValueError("matrix must be square (has shape %s)" % ((M, N),))

        if A.dtype.char not in 'dD':
            raise ValueError("Only double precision matrices supported")

        family = {'d': 'di', 'D': 'zi'}
        self.umf = UmfpackContext(family[A.dtype.char])
        self.umf.numeric(A)

        self._A = A
        self._L = None
        self._U = None
        self._P = None
        self._Q = None
        self._R = None
Example #14
0
    def fit(self, X, y=None):
        """Learn the catent vector (global term weights)
        
        Parameters
        ----------
        X : sparse matrix, [n_samples, n_features]
        a matrix of term/token counts        
        """
        if not sp.isspmatrix_csc(X):
            X = sp.csc_matrix(X)
        
        n_samples, n_features = X.shape
        df = _document_frequency(X)
        cats = self.cats
        cat_vals, cat_indicies = np.unique(cats, return_inverse=True)
        cat_count = np.zeros((cat_vals.size, n_features))
               
        # preform smoothing if required
        df += int(self.smooth)
        n_samples += int(self.smooth)
        cat_count += int(self.smooth)
        
        # count appearance of each term for each category
        cx = X.tocoo()
        for i,j in itertools.izip(cx.row, cx.col):
#            print i, cats[i], cat_indicies[i]
            cat_count[cat_indicies[i], j] += 1
            
        # compute entropy of p(c|t) as
        # log(df(t)) - 1/df(t) * sum_c (count(c,t) * log(count(c,t)))
        catent = np.log(df) - 1/df * np.sum(cat_count * np.log(cat_count), axis=0)
#        catent = 1.0 / (catent + 1)
        self._catent_diag = sp.spdiags(catent, diags=0, m=n_features, n=n_features)
        
        return self
Example #15
0
def sp_profile(A):
    """Returns the total, lower, and upper profiles of a sparse matrix.

    If the matrix is symmetric then the upper and lower profiles are
    identical. Diagonal matrices have zero profile.

    Parameters
    ----------
    A : csr_matrix, csc_matrix
        Input matrix
    """
    if sp.isspmatrix_csr(A):
        up = _sparse_profile(A.indices, A.indptr, A.shape[0])
        A = A.tocsc()
        lp = _sparse_profile(A.indices, A.indptr, A.shape[0])

    elif sp.isspmatrix_csc(A):
        lp = _sparse_profile(A.indices, A.indptr, A.shape[0])
        A = A.tocsr()
        up = _sparse_profile(A.indices, A.indptr, A.shape[0])

    else:
        raise TypeError('Input sparse matrix must be in CSR or CSC format.')

    return up+lp, lp, up
Example #16
0
def weighted_bipartite_matching(A, perm_type="row"):
    """
    Returns an array of row permutations that attempts to maximize
    the product of the ABS values of the diagonal elements in
    a nonsingular square CSC sparse matrix. Such a permutation is
    always possible provided that the matrix is nonsingular.

    This function looks at both the structure and ABS values of the
    underlying matrix.

    Parameters
    ----------
    A : csc_matrix
        Input matrix

    perm_type : str {'row', 'column'}
        Type of permutation to generate.

    Returns
    -------
    perm : array
        Array of row or column permutations.

    Notes
    -----
    This function uses a weighted maximum cardinality bipartite matching
    algorithm based on breadth-first search (BFS).  The columns are weighted
    according to the element of max ABS value in the associated rows and
    are traversed in descending order by weight.  When performing the BFS
    traversal, the row associated to a given column is the one with maximum
    weight. Unlike other techniques[1]_, this algorithm does not guarantee the
    product of the diagonal is maximized.  However, this limitation is offset
    by the substantially faster runtime of this method.

    References
    ----------
    .. [1] I. S. Duff and J. Koster, "The design and use of algorithms for
    permuting large entries to the diagonal of sparse matrices", SIAM J.
    Matrix Anal. and Applics. 20, no. 4, 889 (1997).

    """
    nrows = A.shape[0]
    if A.shape[0] != A.shape[1]:
        raise ValueError("weighted_bfs_matching requires a square matrix.")

    if sp.isspmatrix_csr(A) or sp.isspmatrix_coo(A):
        A = A.tocsc()
    elif not sp.isspmatrix_csc(A):
        raise TypeError("matrix must be in CSC, CSR, or COO format.")

    if perm_type == "column":
        A = A.transpose().tocsc()

    perm = _weighted_bipartite_matching(np.asarray(np.abs(A.data), dtype=float), A.indices, A.indptr, nrows)

    if np.any(perm == -1):
        raise Exception("Possibly singular input matrix.")

    return perm
Example #17
0
File: graph.py Project: pyamg/pyamg
def asgraph(G):
    if not (sparse.isspmatrix_csr(G) or sparse.isspmatrix_csc(G)):
        G = sparse.csr_matrix(G)

    if G.shape[0] != G.shape[1]:
        raise ValueError('expected square matrix')

    return G
Example #18
0
 def test_init(self, test_data, test_attr):
     assert sp.isspmatrix_csc(test_data)
     self.num_test_items, _ = test_attr.shape
     tmp = test_data.tocsr()
     self.test_attr = test_attr
     self.test_dataidx = []
     for u in range(self.num_users):
         self.test_dataidx.append(tmp[u].indices)
Example #19
0
 def __init__(self, datamat, attrmat, k):
     assert sp.isspmatrix_csc(datamat)
     self.datamat = datamat
     self.attrmat = attrmat
     self.k = k
     _, self.num_items = datamat.shape
     assert self.k<=self.num_items
     self.index = [i for i in range(self.num_items)]
Example #20
0
def sparse_stump_fit(f, y, prob, pplus):
    #Will only work with CSC column matrices
    #Results are identical with dense stump-fit, if there exists atleast
    #one data point with f_i = 0. If not, the result is no worse. We also
    #take advantage of the convexity of H, by stopping the search when 
    #Hc < Hthresh.
    if not (sp.isspmatrix_csc(f) and f.shape[1] == 1):
        raise ValueError

    nr = f.shape[0]; nz = f.nnz
    #
    sarg = argsort(f.data)
    sdat = f.data[sarg]
    zidx = sdat.searchsorted(0.0)
    sidx = f.indices[sarg]
    #
    Hc = []; Hcur = 1.0; Hthresh = 1e-6

    pl = 0.0; plt = 0.0
    pr = 1.0; prt = pplus
    for i in range(zidx):
        pele = prob[sidx[i]]; cele = 0.5 * (y[sidx[i]] + 1)
        pl += pele; plt += cele * pele
        pr -= pele; prt -= cele * pele
        if i != zidx-1:
            dv = mean(sdat[i:i+2])
        else:
            dv = sdat[i]/2.0
        Hcur = pl * h2(plt/pl) + pr * h2(prt/pr)
        Hc.append([dv, Hcur])
        if Hcur < Hthresh:
            break

    if Hcur > Hthresh:
        pl = 1.0; plt = pplus
        pr = 0.0; prt = 0.0
        for i in range(nz - 1, zidx - 1, -1):
            pele = prob[sidx[i]]; cele = 0.5 * (y[sidx[i]] + 1)
            pl -= pele; plt -= cele * pele
            pr += pele; prt += cele * pele
            if i != zidx:
                dv = mean(sdat[i-1:i+1])
            else:
                dv = sdat[i]/2.0
            Hcur = pl * h2(plt/pl) + pr * h2(prt/pr)
            Hc.append([dv, Hcur])
            if Hcur < Hthresh:
                break

    Hc = array(Hc)
    (dv, Hm) = Hc[Hc[:, 1].argmin(), :]
    didx = sdat.searchsorted(dv)
    if dv >= 0:
        err = pplus - (prob[sidx[didx:]] * (y[sidx[didx:]] > 0)).sum() + (prob[sidx[didx:]] * (y[sidx[didx:]] < 0)).sum()
    else:
        err = 1 - pplus + (prob[sidx[:didx]] * (y[sidx[:didx]] > 0)).sum() - (prob[sidx[:didx]] * (y[sidx[:didx]] < 0)).sum()

    return (dv, err)
Example #21
0
def add_dummy_feature(X, value=1.0):
    """Augment dataset with an additional dummy feature.

    This is useful for fitting an intercept term with implementations which
    cannot otherwise fit it directly.

    Parameters
    ----------
    X : array or scipy.sparse matrix with shape [n_samples, n_features]
        Data.

    value : float
        Value to use for the dummy feature.

    Returns
    -------

    X : array or scipy.sparse matrix with shape [n_samples, n_features + 1]
        Same data with dummy feature added as first column.

    Examples
    --------

    >>> from sklearn.preprocessing import add_dummy_feature
    >>> add_dummy_feature([[0, 1], [1, 0]])
    array([[ 1.,  0.,  1.],
           [ 1.,  1.,  0.]])
    """
    X = safe_asarray(X)
    n_samples, n_features = X.shape
    shape = (n_samples, n_features + 1)
    if sp.issparse(X):
        if sp.isspmatrix_coo(X):
            # Shift columns to the right.
            col = X.col + 1
            # Column indices of dummy feature are 0 everywhere.
            col = np.concatenate((np.zeros(n_samples), col))
            # Row indices of dummy feature are 0, ..., n_samples-1.
            row = np.concatenate((np.arange(n_samples), X.row))
            # Prepend the dummy feature n_samples times.
            data = np.concatenate((np.ones(n_samples) * value, X.data))
            return sp.coo_matrix((data, (row, col)), shape)
        elif sp.isspmatrix_csc(X):
            # Shift index pointers since we need to add n_samples elements.
            indptr = X.indptr + n_samples
            # indptr[0] must be 0.
            indptr = np.concatenate((np.array([0]), indptr))
            # Row indices of dummy feature are 0, ..., n_samples-1.
            indices = np.concatenate((np.arange(n_samples), X.indices))
            # Prepend the dummy feature n_samples times.
            data = np.concatenate((np.ones(n_samples) * value, X.data))
            return sp.csc_matrix((data, indices, indptr), shape)
        else:
            klass = X.__class__
            X = klass(add_dummy_feature(X.tocoo(), value))
            return klass(X)
    else:
        return np.hstack((np.ones((n_samples, 1)) * value, X))
Example #22
0
 def _validate_matrix(X):
     '''glmnet only accepts sparse matricies in compressed saprse column 
     format.  Note: while glment documentation says it wants sparse row
     format, it lies.
     '''
     if issparse(X) and not isspmatrix_csc(X):
         raise ValueError("Sparse matrix detected, but not in compressed "
                          "sparse row format."
               )
Example #23
0
def factorized(A):
    """
    Return a fuction for solving a sparse linear system, with A pre-factorized.

    Parameters
    ----------
    A : (N, N) array_like
        Input.

    Returns
    -------
    solve : callable
        To solve the linear system of equations given in `A`, the `solve`
        callable should be passed an ndarray of shape (N,).

    Examples
    --------
    >>> A = np.array([[ 3. ,  2. , -1. ],
                      [ 2. , -2. ,  4. ],
                      [-1. ,  0.5, -1. ]])

    >>> solve = factorized( A ) # Makes LU decomposition.

    >>> rhs1 = np.array([1,-2,0])
    >>> x1 = solve( rhs1 ) # Uses the LU factors.
    array([ 1., -2., -2.])

    """
    if isUmfpack and useUmfpack:
        if noScikit:
            warn('scipy.sparse.linalg.dsolve.umfpack will be removed,'
                    ' install scikits.umfpack instead', DeprecationWarning)

        if not isspmatrix_csc(A):
            A = csc_matrix(A)
            warn('splu requires CSC matrix format', SparseEfficiencyWarning)

        A.sort_indices()
        A = A.asfptype()  # upcast to a floating point format

        if A.dtype.char not in 'dD':
            raise ValueError("convert matrix data to double, please, using"
                  " .astype(), or set linsolve.useUmfpack = False")

        family = {'d': 'di', 'D': 'zi'}
        umf = umfpack.UmfpackContext(family[A.dtype.char])

        # Make LU decomposition.
        umf.numeric(A)

        def solve(b):
            return umf.solve(umfpack.UMFPACK_A, A, b, autoTranspose=True)

        return solve
    else:
        return splu(A).solve
Example #24
0
def solve(probdata, cone, **opts):
    """
    solves convex cone problems

    @return dictionary with solution with keys:
         'x' - primal solution
         's' - primal slack solution
         'y' - dual solution
         'info' - information dictionary
    """
    if not 'A' in probdata or not 'b' in probdata or not 'c' in probdata:
        raise TypeError("Missing one or more of A, b, c from data dictionary")
    A = probdata['A']
    b = probdata['b']
    c = probdata['c']

    warm = {}
    if 'x' in probdata:
        warm['x'] = probdata['x']
    if 'y' in probdata:
        warm['y'] = probdata['y']
    if 's' in probdata:
        warm['s'] = probdata['s']

    if A is None or b is None or c is None:
        raise TypeError("Incomplete data specification")
    if not sparse.issparse(A):
        raise TypeError("A is required to be a sparse matrix")
    if not sparse.isspmatrix_csc(A):
        warn("Converting A to a CSC (compressed sparse column) matrix; may take a while.")
        A = A.tocsc()

    if sparse.issparse(b):
        b = b.toDense()

    if sparse.issparse(c):
        c = c.toDense()

    # Set m and n based on b and c rather than A.
    m = b.size
    n = c.size

    Adata, Aindices, Acolptr = A.data, A.indices, A.indptr
    # Convert keys to upper case.
    new_opts = {}
    for key, val in opts.items():
        new_opts[key.upper()] = opts[key]
    opts = new_opts
    if opts.get("USE_INDIRECT", False):
        # HACK transfer from probdata to opts.
        for key in ["Amul", "ATmul", "getDE", "getM"]:
            if key in probdata:
                opts[key] = probdata[key]
        return _mat_free_scs_indirect.csolve((m, n), Adata, Aindices, Acolptr, b, c, cone, opts, warm)
    else:
        return _mat_free_scs_direct.csolve((m, n), Adata, Aindices, Acolptr, b, c, cone, opts, warm)
Example #25
0
def matrix_conflicts(L):
    """
    Given an N x M matrix where L_{i,j} is the label given by the jth LF to the ith candidate:
    Return the **fraction of candidates that each LF _conflicts with other LFs on_.**
    """
    B = L.copy()
    if not sparse.issparse(B):
        for row in range(B.shape[0]):
            if np.unique(np.array(B[row][np.nonzero(B[row])])).size == 1:
                B[row] = 0
        return matrix_coverage(sparse_nonzero(B))
    if not (sparse.isspmatrix_csc(B) or sparse.isspmatrix_lil(B) or sparse.isspmatrix_csr(B)):
        raise ValueError("Only supports CSR/CSC and LIL matrices")
    if sparse.isspmatrix_csc(B) or sparse.isspmatrix_lil(B):
        B = B.tocsr()
    for row in range(B.shape[0]):
        if np.unique(B.getrow(row).data).size == 1:
            B.data[B.indptr[row]:B.indptr[row+1]] = 0
    return matrix_coverage(sparse_nonzero(B))
def abs_sparse(X):
  """ Element-wise absolute value of sparse matrix """
  X_abs = X.copy()
  if sparse.isspmatrix_csr(X) or sparse.isspmatrix_csc(X):
    X_abs.data = np.abs(X_abs.data)
  elif sparse.isspmatrix_lil(X):
    X_abs.data = np.array([np.abs(L) for L in X_abs.data])
  else:
    raise ValueError("Only supports CSR/CSC and LIL matrices")
  return X_abs
Example #27
0
 def get_values(self, X):
     from Orange.classification import _tree_scorers
     if sp.isspmatrix_csc(X):
         func = _tree_scorers.compute_predictions_csc
     elif sp.issparse(X):
         func = _tree_scorers.compute_predictions_csr
         X = X.tocsr()
     else:
         func = _tree_scorers.compute_predictions
     return func(X, self._code, self._values, self._thresholds)
Example #28
0
def check_data(data, cone):
    """ Check the correctness of input data.
    A is CSC with int64 indices and float64 values
    b,c are float64 vectors, with correct sizes

    If all datatypes are OK, returns *new* dictionary with *same* A, b, c objects.

    Raises an *error* if b, or c are incorrectly formatted.

    If A is incorrect, but can be converted, returns a *new* dict with the
    same b,c arrays, but a *new* A matrix, so as not to modify the original A
    matrix.
    """
    # data has elements A, b, c
    if not_met('A' in data, 'b' in data, 'c' in data):
        raise TypeError("Missing one or more of A, b, or c from data dictionary.")

    A = data['A']
    b = data['b']
    c = data['c']

    if A is None or b is None or c is None:
        raise TypeError("Incomplete data specification.")

    if not sp.issparse(A):
        raise TypeError("A is required to be a scipy sparse matrix.")

    if not sp.isspmatrix_csc(A):
        warn("Converting A to a scipy CSC (compressed sparse column) matrix; may take a while.")
        A = A.tocsc()

    m,n = A.shape
    check_bc(b,c,m,n)

    if not_met(A.indptr.dtype == np.int64, A.indices.dtype == np.int64):
        warn("Converting A.indptr and A.indices to arrays with dtype = numpy.int64")
        # copy the matrix to avoid modifying original
        A = sp.csc_matrix(A)
        A.indptr = A.indptr.astype(np.int64)
        A.indices = A.indices.astype(np.int64)

    if not_met(A.data.dtype == np.float64):
        warn("Converting A.data to array with dtype = numpy.float64")
        # copy the matrix to avoid modifying original
        A = sp.csc_matrix(A)
        A.data = A.data.astype(np.float64)

    if not_met(cone_len(cone) > 0, A.shape[0] == cone_len(cone)):
        raise ValueError('The cones must match the number of rows of A.')


    # return a dict of (possibly modified) problem data
    # we do not modify the original dictionary or the original numpy arrays or matrices
    # if no modifications are needed, these are the *same* A, b, c matrices
    return dict(A=A,b=b,c=c)
Example #29
0
def maximum_bipartite_matching(A, perm_type='row'):
    """
    Returns an array of row or column permutations that removes nonzero
    elements from the diagonal of a nonsingular square CSC sparse matrix. Such
    a permutation is always possible provided that the matrix is nonsingular.
    This function looks at the structure of the matrix only.

    The input matrix will be converted to CSC matrix format if
    necessary.

    Parameters
    ----------
    A : sparse matrix
        Input matrix

    perm_type : str {'row', 'column'}
        Type of permutation to generate.

    Returns
    -------
    perm : array
        Array of row or column permutations.

    Notes
    -----
    This function relies on a maximum cardinality bipartite matching algorithm
    based on a breadth-first search (BFS) of the underlying graph[1]_.

    References
    ----------
    .. [1] I. S. Duff, K. Kaya, and B. Ucar, "Design, Implementation, and
    Analysis of Maximum Transversal Algorithms", ACM Trans. Math. Softw.
    38, no. 2, (2011).

    """
    nrows = A.shape[0]
    if A.shape[0] != A.shape[1]:
        raise ValueError(
            'Maximum bipartite matching requires a square matrix.')

    if sp.isspmatrix_csr(A) or sp.isspmatrix_coo(A):
        A = A.tocsc()
    elif not sp.isspmatrix_csc(A):
        raise TypeError("matrix must be in CSC, CSR, or COO format.")

    if perm_type == 'column':
        A = A.transpose().tocsc()

    perm = _maximum_bipartite_matching(A.indices, A.indptr, nrows)

    if np.any(perm == -1):
        raise Exception('Possibly singular input matrix.')

    return perm
Example #30
0
def _toCS_umfpack( A ):
    if isspmatrix_csr( A ) or isspmatrix_csc( A ):
        mat = A
    else:
        if hasattr(A, 'tocsc'):
            mat = A.tocsc()
        elif hasattr(A, 'tocsr'):
            mat = A.tocsr()
        else:
            raise ValueError, "matrix cannot be converted to CSC/CSR"
    return mat
Example #31
0
def sparse_nonzero(X):
    """Sparse matrix with value 1 for i,jth entry !=0"""
    X_nonzero = X.copy()
    if not sparse.issparse(X):
        X_nonzero[X_nonzero != 0] = 1
        return X_nonzero
    if sparse.isspmatrix_csr(X) or sparse.isspmatrix_csc(X):
        X_nonzero.data[X_nonzero.data != 0] = 1
    elif sparse.isspmatrix_lil(X):
        X_nonzero.data = [np.ones(len(L)) for L in X_nonzero.data]
    else:
        raise ValueError("Only supports CSR/CSC and LIL matrices")
    return X_nonzero
Example #32
0
def py2cellref(arr):
    if type(arr) is list:
        arr = np.asarray(arr)

    if type(arr) is np.ndarray:
        if arr.size == 0:
            return None  #turn empty ndarray to Null pointer. do not use 0
        elif sp.isspmatrix_csc(arr):
            return byref(csc(arr))
        else:
            return byref(cell(arr))
    else:
        return byref(arr)
Example #33
0
    def test_getcol(self):
        # This test is adapted from Scipy's CSC tests
        N = 10
        X = testing.shaped_random((N, N), cupy, seed=0)
        X[X > 0.7] = 0
        Xcsc = sparse.csc_matrix(X)

        for i in range(N):
            arr_col = X[:, i:i + 1]
            csc_col = Xcsc.getcol(i)

            assert sparse.isspmatrix_csc(csc_col)
            assert (arr_col == csc_col.toarray()).all()
Example #34
0
def validate_graph(csgraph, directed, dtype=DTYPE,
                   csr_output=True, dense_output=True,
                   copy_if_dense=False, copy_if_sparse=False,
                   null_value_in=0, null_value_out=np.inf,
                   infinity_null=True, nan_null=True):
    """Routine for validation and conversion of csgraph inputs"""

    DTYPE = csgraph.dtype

    if not (csr_output or dense_output):
        raise ValueError("Internal: dense or csr output must be true")

    # if undirected and csc storage, then transposing in-place
    # is quicker than later converting to csr.
    if (not directed) and isspmatrix_csc(csgraph):
        csgraph = csgraph.T

    if isspmatrix(csgraph):
        if csr_output:
            csgraph = csr_matrix(csgraph, dtype=DTYPE, copy=copy_if_sparse)
        else:
            csgraph = csgraph_to_dense(csgraph, null_value=null_value_out)
    elif np.ma.isMaskedArray(csgraph):
        if dense_output:
            mask = csgraph.mask
            csgraph = np.array(csgraph.data, dtype=DTYPE, copy=copy_if_dense)
            csgraph[mask] = null_value_out
        else:
            csgraph = csgraph_from_masked(csgraph)
    else:
        if dense_output:
            csgraph = csgraph_masked_from_dense(csgraph,
                                                copy=copy_if_dense,
                                                null_value=null_value_in,
                                                nan_null=nan_null,
                                                infinity_null=infinity_null)
            mask = csgraph.mask
            csgraph = np.asarray(csgraph.data, dtype=DTYPE)
            csgraph[mask] = null_value_out
        else:
            csgraph = csgraph_from_dense(csgraph, null_value=null_value_in,
                                         infinity_null=infinity_null,
                                         nan_null=nan_null)

    if csgraph.ndim != 2:
        raise ValueError("compressed-sparse graph must be two dimensional")

    if csgraph.shape[0] != csgraph.shape[1]:
        raise ValueError("compressed-sparse graph must be shape (N, N)")

    return csgraph
Example #35
0
def object_size(x, memo=None):
    """Estimate the size of a reasonable python object.

    Parameters
    ----------
    x : object
        Object to approximate the size of.
        Can be anything comprised of nested versions of:
        {dict, list, tuple, ndarray, str, bytes, float, int, None}.
    memo : dict | None
        The memodict.

    Returns
    -------
    size : int
        The estimated size in bytes of the object.
    """
    from scipy import sparse
    # Note: this will not process object arrays properly (since those only)
    # hold references
    if memo is None:
        memo = dict()
    id_ = id(x)
    if id_ in memo:
        return 0  # do not add already existing ones
    if isinstance(x, (bytes, str, int, float, type(None))):
        size = sys.getsizeof(x)
    elif isinstance(x, np.ndarray):
        # On newer versions of NumPy, just doing sys.getsizeof(x) works,
        # but on older ones you always get something small :(
        size = sys.getsizeof(np.array([]))
        if x.base is None or id(x.base) not in memo:
            size += x.nbytes
    elif isinstance(x, np.generic):
        size = x.nbytes
    elif isinstance(x, dict):
        size = sys.getsizeof(x)
        for key, value in x.items():
            size += object_size(key, memo)
            size += object_size(value, memo)
    elif isinstance(x, (list, tuple)):
        size = sys.getsizeof(x) + sum(object_size(xx, memo) for xx in x)
    elif isinstance(x, datetime):
        size = object_size(_dt_to_stamp(x), memo)
    elif sparse.isspmatrix_csc(x) or sparse.isspmatrix_csr(x):
        size = sum(
            sys.getsizeof(xx) for xx in [x, x.data, x.indices, x.indptr])
    else:
        raise RuntimeError('unsupported type: %s (%s)' % (type(x), x))
    memo[id_] = size
    return size
Example #36
0
def concatenate_csc_matrices_by_columns(m1, m2):
    """ 自定义 csc_matrices 合并函数,以替代 hstack
    Notes
    -----
    原因是 hstack 在合并的时候占内存太大,并且效率不高。对于所有的稀疏矩阵,它都将
    其转化为统一的coo_matrix再做合并,然后再转化为指定格式。
    :param m1: csc_matrice
    :param m2: csc_matrice
    :return: 合并的结果
    """
    # 确保 m1, m2 为 csc_matrix,不然要出问题
    from scipy.sparse import isspmatrix_csc
    if not isspmatrix_csc(m1):
        m1 = m1.tocsc()
        gc.collect()
    if not isspmatrix_csc(m2):
        m2 = m2.tocsc()
        gc.collect()
    # 分解合成
    data = np.concatenate((m1.data, m2.data))
    indices = np.concatenate((m1.indices, m2.indices))
    indptr = m2.indptr + len(m1.data)
    indptr = indptr[1:]
    indptr = np.concatenate((m1.indptr, indptr))
    # 手动释放内存
    del m1
    del m2
    gc.collect()
    # 生成结果
    res = csc_matrix((data, indices, indptr))

    # 手动释放内存
    del data
    del indices
    del indptr
    gc.collect()

    return res
Example #37
0
 def __init__(self, Y, cliques):
     assert isspmatrix_csc(Y)
     M, N = Y.shape
     assert np.all(np.arange(N) == np.asarray(sorted([k for clq in cliques for k in clq])))
     U = len(cliques)
     self.init = False
     self.Ys = []
     self.Mplus = Y.sum(axis=0).A.reshape(-1)
     self.Q = 1. / (N * self.Mplus)
     for u in range(U):
         clq = cliques[u]
         Yu = Y[:, clq]
         self.Ys.append(Yu.tocoo())
     self.init = True
Example #38
0
    def factorized(A, piv_tol=1.0, sym_piv_tol=1.0):
        """
        Return a fuction for solving a sparse linear system, with A
        pre-factorized.

        Parameters
        ----------
        A : csc_matrix
            matrix to be factorized
        piv_tol : float, 0 <= piv_tol <= 1.0
        sym_piv_tol : float, 0 <= piv_tol <= 1.0
            thresholds used by UMFPACK for pivoting. 0 means no pivoting, 1.0
            means full pivoting as in dense matrices (guaranteeing stability,
            but reducing possibly sparsity). Defaults of UMFPACK are 0.1 and
            0.001 respectively. Whether piv_tol or sym_piv_tol are used is
            decided internally by UMFPACK, depending on whether the matrix is
            "symmetric" enough.

        Examples
        --------
        solve = factorized(A) # Makes LU decomposition.
        x1 = solve(rhs1) # Uses the LU factors.
        x2 = solve(rhs2) # Uses again the LU factors.
        """
        umfpack = linsolve.umfpack

        if not sp.isspmatrix_csc(A):
            A = sp.csc_matrix(A)

        A.sort_indices()
        A = A.asfptype()  # upcast to a floating point format

        if A.dtype.char not in 'dD':
            raise ValueError("convert matrix data to double, please, using"
                             " .astype()")

        family = {'d': 'di', 'D': 'zi'}
        umf = umfpack.UmfpackContext(family[A.dtype.char])

        # adjust pivot thresholds
        umf.control[umfpack.UMFPACK_PIVOT_TOLERANCE] = piv_tol
        umf.control[umfpack.UMFPACK_SYM_PIVOT_TOLERANCE] = sym_piv_tol

        # Make LU decomposition.
        umf.numeric(A)

        def solve(b):
            return umf.solve(umfpack.UMFPACK_A, A, b, autoTranspose=True)

        return solve
Example #39
0
    def test_loading_and_storing_empty_containers(self):
        filename = make_temp_dir('empty_containers.hdf5')
        traj = Trajectory(filename=filename, add_time=True)

        # traj.f_add_parameter('empty.dict', {})
        # traj.f_add_parameter('empty.list', [])
        traj.f_add_parameter(ArrayParameter, 'empty.tuple', ())
        traj.f_add_parameter(ArrayParameter, 'empty.array', np.array([], dtype=float))

        spsparse_csc = spsp.csc_matrix((2,10))
        spsparse_csr = spsp.csr_matrix((6660,660))
        spsparse_bsr = spsp.bsr_matrix((3330,2220))
        spsparse_dia = spsp.dia_matrix((1230,1230))

        traj.f_add_parameter(SparseParameter, 'empty.csc', spsparse_csc)
        traj.f_add_parameter(SparseParameter, 'empty.csr', spsparse_csr)
        traj.f_add_parameter(SparseParameter, 'empty.bsr', spsparse_bsr)
        traj.f_add_parameter(SparseParameter, 'empty.dia', spsparse_dia)

        traj.f_add_result(SparseResult, 'empty.all', dict={}, list=[],
                          series = pd.Series(),
                          frame = pd.DataFrame(),
                          **traj.par.f_to_dict(short_names=True, fast_access=True))

        traj.f_store()

        newtraj = load_trajectory(index=-1, filename=filename)

        newtraj.f_load(load_data=2)

        epg = newtraj.par.empty
        self.assertTrue(type(epg.tuple) is tuple)
        self.assertTrue(len(epg.tuple) == 0)

        self.assertTrue(type(epg.array) is np.ndarray)
        self.assertTrue(epg.array.size == 0)

        self.assertTrue(spsp.isspmatrix_csr(epg.csr))
        self.assertTrue(epg.csr.size == 0)

        self.assertTrue(spsp.isspmatrix_csc(epg.csc))
        self.assertTrue(epg.csc.size == 0)

        self.assertTrue(spsp.isspmatrix_bsr(epg.bsr))
        self.assertTrue(epg.bsr.size == 0)

        self.assertTrue(spsp.isspmatrix_dia(epg.dia))
        self.assertTrue(epg.dia.size == 0)

        self.compare_trajectories(traj, newtraj)
Example #40
0
def maximum_bipartite_matching(A, perm_type='row'):
    """
    Returns an array of row or column permutations that removes nonzero
    elements from the diagonal of a nonsingular square CSC sparse matrix. Such
    a permutation is always possible provided that the matrix is nonsingular.
    This function looks at the structure of the matrix only.

    The input matrix will be converted to CSC matrix format if necessary.

    Parameters
    ----------
    A : sparse matrix
        Input matrix

    perm_type : str {'row', 'column'}
        Type of permutation to generate.

    Returns
    -------
    perm : array
        Array of row or column permutations.

    Notes
    -----
    This function relies on a maximum cardinality bipartite matching algorithm
    based on a breadth-first search (BFS) of the underlying graph[1]_.

    References
    ----------
    I. S. Duff, K. Kaya, and B. Ucar, "Design, Implementation, and
    Analysis of Maximum Transversal Algorithms", ACM Trans. Math. Softw.
    38, no. 2, (2011).
    """
    _deprecate()
    nrows = A.shape[0]
    if A.shape[0] != A.shape[1]:
        raise ValueError(
            'Maximum bipartite matching requires a square matrix.')
    if sp.isspmatrix_csr(A) or sp.isspmatrix_coo(A):
        A = A.tocsc()
    elif not sp.isspmatrix_csc(A):
        raise TypeError("matrix must be in CSC, CSR, or COO format.")

    if perm_type == 'column':
        A = A.transpose().tocsc()
    perm = _maximum_bipartite_matching(A.indices, A.indptr, nrows)
    if np.any(perm == -1):
        raise Exception('Possibly singular input matrix.')
    return perm
Example #41
0
def should_enforce_sparse(m,
                          sparse_format: SparseFormat,
                          policy: SparsePolicy,
                          dtype,
                          sparse_values: bool = True) -> bool:
    """
    Returns whether it is preferable to convert a given matrix into a `scipy.sparse.csr_matrix`,
    `scipy.sparse.csc_matrix` or `scipy.sparse.dok_matrix`, depending on the format of the given matrix and a given
    `SparsePolicy`:

    If the given policy is `SparsePolicy.AUTO`, the matrix will be converted into the given sparse format, if possible,
    if the sparse matrix is expected to occupy less memory than a dense matrix. To be able to convert the matrix into a
    sparse format, it must be a `scipy.sparse.lil_matrix`, `scipy.sparse.dok_matrix` or `scipy.sparse.coo_matrix`. If
    the given sparse format is `csr` or `csc` and the matrix is a already in that format, it will not be converted.

    If the given policy is `SparsePolicy.FORCE_DENSE`, the matrix will always be converted into the specified sparse
    format, if possible.

    If the given policy is `SparsePolicy.FORCE_SPARSE`, the matrix will always be converted into a dense matrix.

    :param m:               A `np.ndarray` or `scipy.sparse.matrix` to be checked
    :param sparse_format:   The `SparseFormat` to be used
    :param policy:          The `SparsePolicy` to be used
    :param dtype:           The type of the values that should be stored in the matrix
    :param sparse_values:   True, if the values must explicitly be stored when using a sparse format, False otherwise
    :return:                True, if it is preferable to convert the matrix into a sparse matrix of the given format,
                            False otherwise
    """
    if not issparse(m):
        # Given matrix is dense
        if policy != SparsePolicy.FORCE_SPARSE:
            return False
    elif (isspmatrix_csr(m) and sparse_format == SparseFormat.CSR) or (
            isspmatrix_csc(m) and sparse_format == SparseFormat.CSC):
        # Matrix is a `scipy.sparse.csr_matrix` or `scipy.sparse.csc_matrix` and is already in the given sparse format
        return policy != SparsePolicy.FORCE_DENSE
    elif isspmatrix_lil(m) or isspmatrix_coo(m) or isspmatrix_dok(m):
        # Given matrix is in a format that might be converted into the specified sparse format
        if policy == SparsePolicy.AUTO:
            return is_sparse(m,
                             sparse_format=sparse_format,
                             dtype=dtype,
                             sparse_values=sparse_values)
        else:
            return policy == SparsePolicy.FORCE_SPARSE

    raise ValueError('Matrix of type ' + type(m).__name__ +
                     ' cannot be converted to format "' + str(sparse_format) +
                     '""')
Example #42
0
def D_spPoisson(w, A, x, b, g=1, A_sum=None, b_sum=None):
    f = 0.0
    assert sp.isspmatrix_csc(x), 'D defined only for sparse X'

    xhat_ind = A[x.indices].dot(w)
    if (A_sum is None): A_sum = np.sum(A, 0)
    if np.isscalar(b):
        xhat_ind = xhat_ind + b
        if b_sum is None: b_sum = b * A.shape[0]
        b_ind = b
    else:
        xhat_ind = xhat_ind + b[x.indices]
        if b_sum is None: b_sum = np.sum(b)
        b_ind = b[x.indices]

    if g:
        if (np.any(xhat_ind == 0)):
            f = maxval
            gradF = np.zeros(w.shape)
            weps = np.copy(w)
            for i in range(len(w)):
                #print 'in here print', np.shape(weps)
                weps[i] += eps
                #print 'in here'
                xhateps_ind = A[x.indices].dot(weps) + b_ind
                #print 'successfully added bias'
                if np.any(xhateps_ind == 0):
                    feps = maxval
                else:
                    feps = A_sum.dot(weps) + b_sum - np.sum(x.data) + np.sum(
                        x.data * np.log(x.data / xhateps_ind))
                gradF[i] = (feps - f) / eps
                weps[i] -= eps
        else:
            t = (x.data / xhat_ind)
            f = A_sum.dot(w) + b_sum - np.sum(x.data) + np.sum(
                x.data * np.log(t))
            f = min(f, maxval)
            gradF = A_sum - A[x.indices].T.dot(t)
        return f, gradF
    else:
        if (np.any(xhat_ind == 0)):
            f = maxval
        else:
            t = (x.data / xhat_ind)
            f = A_sum.dot(w) + b_sum - np.sum(x.data) + np.sum(
                x.data * np.log(t))
            f = min(f, maxval)
        return f
Example #43
0
def spsolve(A,
            b,
            factorize=True,
            squeeze=True,
            solver=pypardiso_solver,
            *args,
            **kwargs):
    """
    This function mimics scipy.sparse.linalg.spsolve, but uses the Pardiso solver instead of SuperLU/UMFPACK
    
        solve Ax=b for x
        
        --- Parameters ---
        A: sparse square CSR or CSC matrix (scipy.sparse.csr.csr_matrix)
        b: numpy ndarray
           right-hand side(s), b.shape[0] needs to be the same as A.shape[0]
        factorize: boolean, default True
                   matrix A is factorized by default, so the factorization can be reused
        squeeze: default True
                 strange quirk of scipy spsolve, which always returns x.squeeze(), this
                 feature in order to keep it compatible with implementations that rely on 
                 this behaviour
        solver: instance of PyPardisoSolver, default pypardiso_solver
                you can supply your own instance of PyPardisoSolver, but using several instances
                of PyPardisoSolver in parallel can lead to errors
           
        --- Returns ---
        x: numpy ndarray
           solution of the system of linear equations, same shape as b (but returnsshape (n,) if b has shape (n,1))
           
        --- Notes ---
        The computation time increases only minimally if the factorization and the solve phase are carried out 
        in two steps, therefore it is factorized by default. Subsequent calls to spsolve with the same matrix A 
        will be drastically faster. This makes the "factorized" method obsolete, but it is kept for compatibility.
    """
    if sp.isspmatrix_csc(A):
        A = A.tocsr()  # fixes issue with brightway2 technosphere matrix

    solver._check_A(A)
    if factorize and not solver._is_already_factorized(A):
        solver.factorize(A)

    x = solver.solve(A, b)

    if squeeze:
        return x.squeeze(
        )  # scipy spsolve always returns vectors with shape (n,) indstead of (n,1)
    else:
        return x
Example #44
0
 def _validate_data_types(self):
     # The backed API does not support interrogation of the underlying sparsity or sparse matrix type
     # Fake it by asking for a small subarray and testing it.   NOTE: if the user has ignored our
     # anndata <= 0.7 warning, opted for the --backed option, and specified a large, sparse dataset,
     # this "small" indexing request will load the entire X array. This is due to a bug in anndata<=0.7
     # which will load the entire X matrix to fullfill any slicing request if X is sparse.  See
     # user warning in _load_data().
     X0 = self.data.X[0, 0:1]
     if sparse.isspmatrix(X0) and not sparse.isspmatrix_csc(X0):
         warnings.warn(
             "Anndata data matrix is sparse, but not a CSC (columnar) matrix.  "
             "Performance may be improved by using CSC.")
     if self.data.X.dtype > np.dtype(np.float32):
         warnings.warn(
             f"Anndata data matrix is in {self.data.X.dtype} format not float32. "
             f"Precision may be truncated.")
     if self.data.X.dtype < np.float32:
         if self.data.isbacked:
             raise DatasetAccessError(
                 f"Data matrix in {self.data.X.dtype} format is not supported in backed mode."
                 " Please reload without --backed, or convert matrix to float32"
             )
         warnings.warn(
             f"Anndata data matrix is in unsupported {self.data.X.dtype} format -- will be cast to float32"
         )
         self.data.X = self.data.X.astype(np.float32)
     for ax in Axis:
         curr_axis = getattr(self.data, str(ax))
         for ann in curr_axis:
             datatype = curr_axis[ann].dtype
             downcast_map = {
                 "int64": "int32",
                 "uint32": "int32",
                 "uint64": "int32",
                 "float64": "float32",
             }
             if datatype in downcast_map:
                 warnings.warn(
                     f"Anndata annotation {ax}:{ann} is in unsupported format: {datatype}. "
                     f"Data will be downcast to {downcast_map[datatype]}.")
             if isinstance(datatype, CategoricalDtype):
                 category_num = len(curr_axis[ann].dtype.categories)
                 if category_num > 500 and category_num > self.dataset_config.presentation__max_categories:
                     warnings.warn(
                         f"{str(ax).title()} annotation '{ann}' has {category_num} categories, this may be "
                         f"cumbersome or slow to display. We recommend setting the "
                         f"--max-category-items option to 500, this will hide categorical "
                         f"annotations with more than 500 categories in the UI"
                     )
Example #45
0
def _pfscm_grad(weights, x, centroids, memberships, m=2., v=2.):
    c, d = weights.shape
    n, d2 = x.shape
    assert (d == d2)
    if sp.issparse(x):
        weights = weights.tocsc()
        assert sp.isspmatrix_csc(
            x), "Only the CSC format is supported (iteration on dimensions)"
        # Supports `weights` being sparse or not but produces a new sparse matrix
        # Do we have to use a d*c loop however?
        # Can we "fusion" this with the prox operator in order to remove useless dims?..
        w = sp.dok_matrix((c, d))
        for j in range(d):
            #for r in range(c):
            temps = np.zeros((c, ))
            for iptr in range(x.indptr[j], x.indptr[j + 1]):
                i = x.indices[iptr]
                if sp.issparse(weights):
                    for rptr in range(weights.indptr[j],
                                      weights.indptr[j + 1]):
                        r = weights.indices[rptr]
                        temps[r] += v * memberships[
                            r, i]**m * weights.data[rptr] * (np.linalg.norm(
                                x.data[iptr] - centroids[r, j]))**2
                else:
                    for r in range(c):
                        temps[r] = v * memberships[r, i]**m * weights[r, j] * (
                            x.data[iptr] - centroids[r, j])**2
#                        for jptr in range(weights.indptr[r], weights.indptr[r+1]):
#                            # Or should it be the other way around:
#                            #  * iteration on the weights to get pointers to the clusters
#                            #  * loop first on the weights then on the data X
#                            j2 = weights.indices[jptr]
#                            if j2 == j:
#                                t += v * memberships[r, i] ** m * weights.data[jptr] * (x.data[iptr] - centroids[r, j])**2
#                                break
#                    else:
#                        t += v * memberships[r,i] ** m * weights[r,j] * (x.data[iptr] - centroids[r, j])**2
                for r2 in range(c):
                    w[r2, j] = temps[r2]
        return w.tocsr()
    else:
        w = np.zeros((c, d))
        for j in range(d):
            for r in range(c):
                w[r, j] = v * sum(memberships[r, i]**m * weights[r, j] *
                                  (x[i, j] - centroids[r, j])**2
                                  for i in range(n))
        return w
Example #46
0
    def transform(self, X, y=None, copy=True):
        if not sp.isspmatrix_csr(X) and not sp.isspmatrix_csc(X):
            X = sp.csr_matrix(X)
        elif copy:
            X = X.copy()

        cond = X.data > self.threshold
        not_cond = np.logical_not(cond)

        X.data[cond] = 1
        # FIXME: if enough values became 0, it may be worth changing
        #        the sparsity structure
        X.data[not_cond] = 0

        return X
Example #47
0
def train_test_split(x, p=0.5):
    if ss.issparse(x):
        data = np.random.binomial(n=x.data.astype(np.int),
                                  p=p,
                                  size=x.data.shape)
        if ss.isspmatrix_csr(x):
            train = ss.csr_matrix((data, x.indices, x.indptr), shape=x.shape)
        elif ss.isspmatrix_csc(x):
            train = ss.csc_matrix((data, x.indices, x.indptr), shape=x.shape)
        else:
            raise NotImplementedError('sparse matrix type not supported')
    else:
        train = np.random.binomial(n=x, p=p, size=x.shape)
    test = x - train
    return train, test
Example #48
0
def inf_norm(X):
    """
    Infinity norm of a matrix (maximum absolute row sum).

    :param X: Input matrix.
    :type X: :class:`scipy.sparse.csr_matrix`, :class:`scipy.sparse.csc_matrix` or :class:`numpy.matrix`
    """
    if sp.isspmatrix_csr(X) or sp.isspmatrix_csc(X):
        # avoid copying index and ptr arrays
        abs_X = X.__class__((abs(X.data), X.indices, X.indptr), shape=X.shape)
        return (abs_X * np.ones((X.shape[1]), dtype=X.dtype)).max()
    elif sp.isspmatrix(X):
        return (abs(X) * np.ones((X.shape[1]), dtype=X.dtype)).max()
    else:
        return nla.norm(np.asmatrix(X), float('inf'))
Example #49
0
        def _serialize_scipy_sparse(obj):
            if isspmatrix_coo(obj):
                return 'coo', pa.SparseCOOTensor.from_scipy(obj)

            elif isspmatrix_csr(obj):
                return 'csr', pa.SparseCSRMatrix.from_scipy(obj)

            elif isspmatrix_csc(obj):
                return 'csc', pa.SparseCSCMatrix.from_scipy(obj)

            elif isspmatrix(obj):
                return 'csr', pa.SparseCOOTensor.from_scipy(obj.to_coo())

            else:
                raise NotImplementedError(
                    "Serialization of {} is not supported.".format(obj[0]))
Example #50
0
def sparse_isone(X):
    """Sparse matrix with value 1 for i,jth entry ==1, duplicated and modified from sparse_nonzero"""
    X_isone = X.copy()
    if not sparse.issparse(X):
        X_isone[X_isone == 1] = 1
        X_isone[X_isone != 1] = 0
        return X_isone
    if sparse.isspmatrix_csr(X) or sparse.isspmatrix_csc(X):
        X_isone.data[X_isone.data ==1] = 1
        X_isone.data[X_isone.data !=1] = 0
    # elif sparse.isspmatrix_lil(X):
    #     print("is lil (linked list) sparse matrix, double check!")
    #     X_isone.data = [np.ones(len(L)) for L in X_isone.data]
    else:
        raise ValueError("Only supports CSR/CSC not LIL matrices")
    return X_isone
Example #51
0
    def __init__(self, X_train, Y_train, C, cliques, verbose=0):
        assert C > 0
        assert X_train.shape[0] == Y_train.shape[0]
        assert isspmatrix_csc(Y_train)

        self.X = X_train
        self.Y = Y_train
        self.C = C
        self.cliques = cliques
        self.verbose = verbose

        self.M, self.D = self.X.shape
        self.N = self.Y.shape[1]
        self.U = len(self.cliques)
        self.data_helper = DataHelper(self.Y, self.cliques)
        self.trained = False
Example #52
0
def sparse_logdet(A):
    '''
    Calculate the log determinant using sparse LU decomposition, product of 
    diagonal values of L and U matrix give determinant up to a sign change.
    Know positive determinant, so take log of the absolute value of the 
    diagonal values and sum them for log-determinant.

    See link for math and implementation details:
    http://stackoverflow.com/a/19616987  (first comment w/ Wikipedia link)
    '''
    if not isspmatrix_csc(A):  # needed for splu() decomposition to work
        A = csc_matrix(A)

    aux = splu(A)
    return np.sum(
        np.log(np.abs(aux.L.diagonal())) + np.log(np.abs(aux.U.diagonal())))
Example #53
0
def Cholesky( A, b, options={'SolutionType': 'double', 'Ordering': 'amd'}): 
    ## A is a scipy.sparse(data must be float64) #technically it only needs to be numerical
    ## b is a numpy.array (data must be float64)
    ## options is a dictionary that specifies what tipe the solution should be, this by default is double
    
    ##--------------------------------------------------------------------------
    ## Verify inputs
    ##--------------------------------------------------------------------------
    if not isspmatrix(A):
        print("Input matrix must be scipy.sparse")
        raise TypeError
    ## If the sparse input matrix is not in csc form, convert it into csc form
    if not isspmatrix_csc(A):
        A.tocsc()
    ## Check symmetry    
    tol=1e-8    
    if scipy.sparse.linalg.norm(A-A.T, scipy.Inf) > tol:
        print("Input matrix is not symmetric")
        raise TypeError
    
    ##--------------------------------------------------------------------------
    ## Ordering
    ##--------------------------------------------------------------------------
    if options['Ordering']=="none":
        order=0
    elif options['Ordering']=="colamd":
        order=1
    elif options['Ordering']=="amd": ##amd is the default ordering for Cholesky
        order=2
    else:
        print("Invalid order options")
        raise ValueError
        
    ##--------------------------------------------------------------------------
    ## Call the correct function depending on the desired output type
    ##--------------------------------------------------------------------------    
    if options['SolutionType']=="double":
        charOut=0
    elif options['SolutionType']=="string":
        charOut=0
    else:
        print("Invalid output type options")
        raise ValueError

    x=spex_chol_backslash(A,b,order,charOut)

    return x
Example #54
0
def poisson_cluster(data, k, init=None, max_iters=100):
    """
    Performs Poisson hard EM on the given data.

    Args:
        data (array): A 2d array- genes x cells. Can be dense or sparse; for best performance, sparse matrices should be in CSC format.
        k (int): Number of clusters
        init (array, optional): Initial centers - genes x k array. Default: None, use kmeans++
        max_iters (int, optional): Maximum number of iterations. Default: 100

    Returns:
        a tuple of two arrays: a cells x 1 vector of cluster assignments,
        and a genes x k array of cluster means.
    """
    # TODO: be able to use a combination of fixed and unknown starting points
    # e.g., have init values only for certain genes, have a row of all
    # zeros indicating that kmeans++ should be used for that row.
    genes, cells = data.shape
    #print 'starting: ', centers
    if sparse.issparse(data) and not sparse.isspmatrix_csc(data):
        data = sparse.csc_matrix(data)
    init, assignments = kmeans_pp(data, k, centers=init)
    centers = np.copy(init)
    assignments = np.zeros(cells)
    for it in range(max_iters):
        lls = poisson_ll(data, centers)
        #cluster_dists = np.zeros((cells, k))
        new_assignments = np.argmax(lls, 1)
        if np.equal(assignments, new_assignments).all():
            #print 'ending: ', centers
            return new_assignments, centers
        for c in range(k):
            if sparse.issparse(data):
                if data[:,new_assignments==c].shape[0]==0:
                    # re-initialize centers?
                    new_c, _ = kmeans_pp(data, k, centers[:,:c])
                    centers[:,c] = new_c[:,c]
                else:
                    centers[:,c] = np.asarray(data[:,new_assignments==c].mean(1)).flatten()
            else:
                if len(data[:,new_assignments==c])==0:
                    new_c, _ = kmeans_pp(data, k, centers[:,:c])
                    centers[:,c] = new_c[:,c]
                else:
                    centers[:,c] = np.mean(data[:,new_assignments==c], 1)
        assignments = new_assignments
    return assignments, centers
Example #55
0
 def __init__(self, name, items, terms, counts, col_names=None):
     self.name = name  # name of feature
     assert type(items) == list
     self.items = items  # list of items
     assert type(terms) == list
     self.terms = terms  # list of terms (feature names)
     if sparse.issparse(counts):
         self.sparse = True
         assert sparse.isspmatrix_csc(counts)
     else:
         self.sparse = False
     self.counts = counts
     # add an additional member to be usd primarily for the case of vector transformations
     if col_names is None:
         self.col_names = terms[:]
     else:
         self.col_names = col_names
Example #56
0
    def get_Q2TJbp_T_Q2TJbp_blockdiag(self):
        assert self._state == self.STATE_MARGINALIZED

        # TODO: don't use dense Hpp

        if 0:
            # too slow
            Hpp = self._get_Q2TJbp_T_Q2TJbp_blockdiag_sp(
                self.nb, self.np, self._pose_size, self._pose_damping,
                self._blocks, self.dtype)
        elif NUMBA_LEVEL >= 3:
            # no extra memory used
            Hpp = DictArray2D((self.nb + self.np, self.nb + self.np),
                              dtype=self.dtype)
            self._get_Q2TJbp_T_Q2TJbp_blockdiag_lv3(self.nb, self.np,
                                                    self._pose_size,
                                                    self._pose_damping,
                                                    self._blocks, Hpp)
        else:
            # uses way too much memory for large problems due to dense Hpp
            Hpp = self._get_Q2TJbp_T_Q2TJbp_blockdiag(
                self.nb, self.np, self._pose_size, self._pose_damping,
                nb.typed.List([blk.Q2T_Jbp for blk in self._blocks]),
                nb.typed.List([blk.pose_idxs for blk in self._blocks]))

        for blk in self.non_lm_blocks:
            blk_Hpp = blk.Q2T_Jbp.T.dot(blk.Q2T_Jbp)
            tmp = np.arange(len(blk.pose_idxs))
            _, _, bi, bj = self._block_indexing(tmp, tmp, self._pose_size,
                                                self._pose_size)
            _, _, i, j = self._block_indexing(blk.pose_idxs, blk.pose_idxs,
                                              self._pose_size, self._pose_size)
            if is_own_sp_mx(Hpp):
                Hpp.idx_isum_arr(i + self.nb, j + self.nb,
                                 np.array(blk_Hpp[bi, bj]).flatten())
            else:
                Hpp[i + self.nb,
                    j + self.nb] += np.array(blk_Hpp[bi, bj]).flatten()

        if sp.issparse(Hpp) and sp.isspmatrix_csc(Hpp):
            return Hpp
        if sp.issparse(Hpp):
            return Hpp.tocsc()
        if is_own_sp_mx(Hpp):
            return own_sp_mx_to_coo(Hpp).tocsc()
        return sp.csc_matrix(Hpp)
Example #57
0
def infinity_norm(A):
    """
    Infinity norm of a matrix (maximum absolute row sum).

    Parameters
    ----------
    A : csr_matrix, csc_matrix, sparse, or numpy matrix
        Sparse or dense matrix

    Returns
    -------
    n : float
        Infinity norm of the matrix

    Notes
    -----
    - This serves as an upper bound on spectral radius.
    - csr and csc avoid a deep copy
    - dense calls scipy.linalg.norm

    See Also
    --------
    scipy.linalg.norm : dense matrix norms

    Examples
    --------
    >>> import numpy as np
    >>> from scipy.sparse import spdiags
    >>> from pyamg.util.linalg import infinity_norm
    >>> n=10
    >>> e = np.ones((n,1)).ravel()
    >>> data = [ -1*e, 2*e, -1*e ]
    >>> A = spdiags(data,[-1,0,1],n,n)
    >>> print infinity_norm(A)
    4.0
    """

    if sparse.isspmatrix_csr(A) or sparse.isspmatrix_csc(A):
        # avoid copying index and ptr arrays
        abs_A = A.__class__((np.abs(A.data), A.indices, A.indptr),
                            shape=A.shape)
        return (abs_A * np.ones((A.shape[1]), dtype=A.dtype)).max()
    elif sparse.isspmatrix(A):
        return (abs(A) * np.ones((A.shape[1]), dtype=A.dtype)).max()
    else:
        return np.dot(np.abs(A), np.ones((A.shape[1], ), dtype=A.dtype)).max()
Example #58
0
def reverse_cuthill_mckee(A, sym=False):
    """
    Returns the permutation array that orders a sparse CSR or CSC matrix
    in Reverse-Cuthill McKee ordering. Since the input matrix must be
    symmetric, this routine works on the matrix A+Trans(A) if the sym flag is
    set to False (Default).

    It is assumed by default (*sym=False*) that the input matrix is not
    symmetric. This is because it is faster to do A+Trans(A) than it is to
    check for symmetry for a generic matrix. If you are guaranteed that the
    matrix is symmetric in structure (values of matrix element do not matter)
    then set *sym=True*

    Parameters
    ----------
    A : csc_matrix, csr_matrix
        Input sparse CSC or CSR sparse matrix format.
    sym : bool {False, True}
        Flag to set whether input matrix is symmetric.

    Returns
    -------
    perm : array
        Array of permuted row and column indices.

    Notes
    -----
    This routine is used primarily for internal reordering of Lindblad
    superoperators for use in iterative solver routines.

    References
    ----------
    E. Cuthill and J. McKee, "Reducing the Bandwidth of Sparse Symmetric
    Matrices", ACM '69 Proceedings of the 1969 24th national conference,
    (1969).
    
    """
    if not (sp.isspmatrix_csc(A) or sp.isspmatrix_csr(A)):
        raise TypeError('Input must be CSC or CSR sparse matrix.')

    nrows = A.shape[0]

    if not sym:
        A = A + A.transpose()

    return _reverse_cuthill_mckee(A.indices, A.indptr, nrows)
Example #59
0
 def __init__(self, X_train, Y_train, C1, C2, C3, cliques):
     assert C1 > 0
     assert C2 > 0
     assert C3 > 0
     assert X_train.shape[0] == Y_train.shape[0]
     assert isspmatrix_csc(Y_train)
     self.X = X_train
     self.Y = Y_train
     self.C1 = C1
     self.C2 = C2
     self.C3 = C3
     self.cliques = cliques
     self.M, self.D = self.X.shape
     self.N = self.Y.shape[1]
     self.U = len(self.cliques)
     self.data_helper = DataHelper(self.Y, self.cliques)
     self.trained = False
Example #60
0
def bfs_matching(A):
    """
    Returns an array of row permutations that removes nonzero elements
    from the diagonal of a nonsingular square CSC sparse matrix.  Such
    a permutation is always possible provided that the matrix is 
    nonsingular.
    
    This function looks at the structure of the matrix only.
    
    Parameters
    ----------
    A : csc_matrix
        Input matrix
    
    Returns
    -------
    perm : array
        Array of row permutations.
    
    Notes
    -----
    This function relies on a maximum cardinality bipartite matching algorithm
    based on a breadth-first search (BFS) of the underlying graph[1]_.
    
    References
    ----------
    .. [1] I. S. Duff, K. Kaya, and B. Ucar, "Design, Implementation, and 
    Analysis of Maximum Transversal Algorithms", ACM Trans. Math. Softw.
    38, no. 2, (2011).
    
    """
    nrows = A.shape[0]
    if A.shape[0] != A.shape[1]:
        raise ValueError('bfs_matching requires a square matrix.')
    if A.__class__.__name__ == 'Qobj':
        A = A.data.tocsc()
    elif not sp.isspmatrix_csc(A):
        A = sp.csc_matrix(A)
        warn('bfs_matching requires CSC matrix format.',
             sp.SparseEfficiencyWarning)

    perm = _bfs_matching(A.indices, A.indptr, nrows)
    if np.any(perm == -1):
        raise Exception('Possibly singular input matrix.')
    return perm