def sqrt(data, copy=False, chunked=False, chunk_size=None): """Square root the data matrix. Computes `X = sqrt(X)`. Parameters ---------- data : :class:`~scanpy.api.AnnData`, `np.ndarray`, `sp.sparse` The (annotated) data matrix of shape `n_obs` × `n_vars`. Rows correspond to cells and columns to genes. copy : `bool`, optional (default: `False`) If an :class:`~scanpy.api.AnnData` is passed, determines whether a copy is returned. Returns ------- Returns or updates `data`, depending on `copy`. """ if isinstance(data, AnnData): adata = data.copy() if copy else data if chunked: for chunk, start, end in adata.chunked_X(chunk_size): adata.X[start:end] = sqrt(chunk) else: adata.X = sqrt(data.X) return adata if copy else None X = data # proceed with data matrix if not issparse(X): return np.sqrt(X) else: return X.sqrt()
def _scale(X, zero_center=True): # - using sklearn.StandardScaler throws an error related to # int to long trafo for very large matrices # - using X.multiply is slower # the result differs very slightly, why? if True: mean, var = _get_mean_var(X) scale = np.sqrt(var) if issparse(X): if zero_center: raise ValueError('Cannot zero-center sparse matrix.') sparsefuncs.inplace_column_scale(X, 1/scale) else: X -= mean X /= scale else: from sklearn.preprocessing import StandardScaler scaler = StandardScaler(with_mean=zero_center, copy=False).partial_fit(X) # user R convention (unbiased estimator) scaler.scale_ *= np.sqrt(X.shape[0]/(X.shape[0]-1)) scaler.transform(X)
def sqrt( data: AnnData, copy: bool = False, chunked: bool = False, chunk_size: Optional[int] = None, ): """Square root the data matrix. Computes :math:`X = \\sqrt(X)`. Parameters ---------- data The (annotated) data matrix of shape ``n_obs`` × ``n_vars``. Rows correspond to cells and columns to genes. copy If an :class:`~scanpy.api.AnnData` is passed, determines whether a copy is returned. chunked Process the data matrix in chunks, which will save memory. Applies only to :class:`~anndata.AnnData`. chunk_size ``n_obs`` of the chunks to process the data in. Returns ------- AnnData, None Returns or updates `data`, depending on `copy`. """ if isinstance(data, AnnData): adata = data.copy() if copy else data if chunked: for chunk, start, end in adata.chunked_X(chunk_size): adata.X[start:end] = sqrt(chunk) else: adata.X = sqrt(data.X) return adata if copy else None X = data # proceed with data matrix if not issparse(X): return np.sqrt(X) else: return X.sqrt()