def from_spmatrix(cls, data, index=None, columns=None): """ Create a new DataFrame from a scipy sparse matrix. .. versionadded:: 0.25.0 Parameters ---------- data : scipy.sparse.spmatrix Must be convertible to csc format. index, columns : Index, optional Row and column labels to use for the resulting DataFrame. Defaults to a RangeIndex. Returns ------- DataFrame Each column of the DataFrame is stored as a :class:`arrays.SparseArray`. Examples -------- >>> import scipy.sparse >>> mat = scipy.sparse.eye(3) >>> pd.DataFrame.sparse.from_spmatrix(mat) 0 1 2 0 1.0 0.0 0.0 1 0.0 1.0 0.0 2 0.0 0.0 1.0 """ from pandas._libs.sparse import IntIndex from pandas import DataFrame data = data.tocsc() index, columns = cls._prep_index(data, index, columns) n_rows, n_columns = data.shape # We need to make sure indices are sorted, as we create # IntIndex with no input validation (i.e. check_integrity=False ). # Indices may already be sorted in scipy in which case this adds # a small overhead. data.sort_indices() indices = data.indices indptr = data.indptr array_data = data.data dtype = SparseDtype(array_data.dtype, 0) arrays = [] for i in range(n_columns): sl = slice(indptr[i], indptr[i + 1]) idx = IntIndex(n_rows, indices[sl], check_integrity=False) arr = SparseArray._simple_new(array_data[sl], idx, dtype) arrays.append(arr) return DataFrame._from_arrays(arrays, columns=columns, index=index, verify_integrity=False)
def from_spmatrix(cls, data, index=None, columns=None): """ Create a new DataFrame from a scipy sparse matrix. .. versionadded:: 0.25.0 Parameters ---------- data : scipy.sparse.spmatrix Must be convertible to csc format. index, columns : Index, optional Row and column labels to use for the resulting DataFrame. Defaults to a RangeIndex. Returns ------- DataFrame Each column of the DataFrame is stored as a :class:`arrays.SparseArray`. Examples -------- >>> import scipy.sparse >>> mat = scipy.sparse.eye(3) >>> pd.DataFrame.sparse.from_spmatrix(mat) 0 1 2 0 1.0 0.0 0.0 1 0.0 1.0 0.0 2 0.0 0.0 1.0 """ from pandas import DataFrame data = data.tocsc() index, columns = cls._prep_index(data, index, columns) sparrays = [ SparseArray.from_spmatrix(data[:, i]) for i in range(data.shape[1]) ] data = dict(enumerate(sparrays)) result = DataFrame(data, index=index) result.columns = columns return result