def download(self, fpath: Optional[PathLike] = None, **kwargs: Any) -> Any: """Download the dataset into ``fpath``.""" fpath = str(self.path if fpath is None else fpath) if not fpath.endswith(self._extension): fpath += self._extension if os.path.isfile(fpath): logg.debug(f"Loading dataset `{self.name}` from `{fpath}`") else: logg.debug( f"Downloading dataset `{self.name}` from `{self.url}` as `{fpath}`" ) dirname = Path(fpath).parent try: if not dirname.is_dir(): logg.info(f"Creating directory `{dirname}`") dirname.mkdir(parents=True, exist_ok=True) except OSError as e: logg.error(f"Unable to create directory `{dirname}`. Reason `{e}`") data = self._download(fpath=fpath, backup_url=self.url, **kwargs) if self.shape is not None and data.shape != self.shape: raise ValueError( f"Expected the data to have shape `{self.shape}`, found `{data.shape}`." ) return data
def density_normalize( self, other: Union[ndarray, spmatrix] ) -> Union[ndarray, spmatrix]: """ Density normalization by the underlying KNN graph. Params ------ other: Matrix to normalize. Returns ------- :class:`np.ndarray` or :class:`scipy.sparse.spmatrix` Density normalized transition matrix. """ logg.debug("DEBUG: Density-normalizing the transition matrix") q = np.asarray(self._conn.sum(axis=0)) if not issparse(other): Q = np.diag(1.0 / q) else: Q = spdiags(1.0 / q, 0, other.shape[0], other.shape[0]) return Q @ other @ Q
def _set_iroot_via_xroot(self, xroot): """Determine the index of the root cell. Given an expression vector, find the observation index that is closest to this vector. Parameters ---------- xroot : np.ndarray Vector that marks the root cell, the vector storing the initial condition, only relevant for computing pseudotime. """ if self._adata.shape[1] != xroot.size: raise ValueError('The root vector you provided does not have the ' 'correct dimension.') # this is the squared distance dsqroot = 1e10 iroot = 0 for i in range(self._adata.shape[0]): diff = self._adata.X[i, :] - xroot dsq = diff @ diff if dsq < dsqroot: dsqroot = dsq iroot = i if np.sqrt(dsqroot) < 1e-10: break logg.debug(f'setting root index to {iroot}') if self.iroot is not None and iroot != self.iroot: logg.warning( f'Changing index of iroot from {self.iroot} to {iroot}.') self.iroot = iroot
def write_to_adata(self, key_added: Optional[str] = None): """ Write the parameters and transition matrix to the underlying adata object. Params ------ key_added Postfix to be added to :paramref`.adata` `.uns. Returns ------- None Updates the underlying :paramref:`.adata` object with the following: - `.uns[:paramref:`T_{fwd, bwd}` _`:paramref:`key_added`]['T']` - transition matrix - `.uns[:paramref:`T_{fwd, bwd}` _`:paramref:`key_added`]['params']` - parameters used for calculation """ if self.transition_matrix is None: raise ValueError( "Compute transition matrix first as `.compute_transition_matrix()`.`" ) key = _transition(self._direction) if key_added is not None: key += f"_{key_added}" if self.adata.uns.get(key, None) is not None: logg.debug(f"DEBUG: Overwriting key `{key!r}` in `adata.uns`") self.adata.uns[key] = dict() self.adata.uns[key]["params"] = str(self) self.adata.uns[key]["T"] = self.transition_matrix logg.debug(f"DEBUG: Added `{key!r}` to `adata.uns`")
def _(self, img: xr.DataArray, copy: bool = True, **_: Any) -> xr.DataArray: logg.debug(f"Loading data `xarray.DataArray` of shape `{img.shape}`") if img.ndim == 2: img = img.expand_dims("channels", -1) if img.ndim != 3: raise ValueError( f"Expected image to have `3` dimensions, found `{img.ndim}`.") mapping: Dict[Hashable, str] = {} if "y" not in img.dims: logg.warning( f"Dimension `y` not found in the data. Assuming it's `{img.dims[0]}`" ) mapping[img.dims[0]] = "y" if "x" not in img.dims: logg.warning( f"Dimension `x` not found in the data. Assuming it's `{img.dims[1]}`" ) mapping[img.dims[1]] = "x" img = img.rename(mapping) channel_dim = [d for d in img.dims if d not in ("y", "x")][0] try: img = img.reset_index(dims_or_levels=channel_dim, drop=True) except KeyError: # might not be present, ignore pass return img.copy() if copy else img
def __init__( self, adata: AnnData, *, metric: Literal["alignment", "identity", "levenshtein"] = "identity", cutoff: float = 0, receptor_arms: Literal["TRA", "TRB", "all", "any"] = "all", dual_tcr: Literal["primary_only", "all", "any"] = "primary_only", sequence: Literal["aa", "nt"] = "aa", ): """Class to compute Neighborhood graphs of CDR3 sequences. For documentation of the parameters, see :func:`tcr_neighbors`. """ if metric == "identity" and cutoff != 0: raise ValueError("Identity metric only works with cutoff = 0") if sequence == "nt" and metric == "alignment": raise ValueError( "Using nucleotide sequences with alignment metric is not supported. " ) self.adata = adata self.metric = metric self.cutoff = cutoff self.receptor_arms = receptor_arms self.dual_tcr = dual_tcr self.sequence = sequence self._build_index_dict() self._dist_mat = None logging.debug("Finished initalizing TcrNeighbors object. ")
def _(self, img: np.ndarray, **_: Any) -> xr.DataArray: logg.debug(f"Loading data `numpy.array` of shape `{img.shape}`") if img.ndim == 2: img = img[:, :, np.newaxis] if img.ndim != 3: raise ValueError(f"Expected image to have `3` dimensions, found `{img.ndim}`.") return xr.DataArray(img, dims=["y", "x", "channels"])
def _trim_data(self) -> None: """Subset genes :attr:`_data` to those present in interactions.""" if TYPE_CHECKING: assert isinstance(self._data, pd.DataFrame) assert isinstance(self.interactions, pd.DataFrame) logg.debug("DEBUG: Removing genes not in any interaction") self._filtered_data = self._data.loc[:, set(self.interactions[SOURCE]) | set(self.interactions[TARGET])]
def _read_from_adata(self, time_key: str, **kwargs): super()._read_from_adata(variance_key="palantir", **kwargs) if time_key not in self.adata.obs.keys(): raise KeyError(f"Could not find time key `{time_key!r}` in `adata.obs`.") logg.debug("Adding `.pseudotime`") self.pseudotime = np.array(self.adata.obs[time_key]).astype(_dtype) if np.min(self.pseudotime) < 0: raise ValueError(f"Pseudotime must be positive")
def _write_eig_to_adata(self, eig): # write to class and AnnData object if self._eig is not None: logg.debug("DEBUG: Overwriting `.eigendecomposition`") else: logg.debug( f"DEBUG: Adding `.eigendecomposition` and `adata.uns['eig_{self._direction}']`" ) self._eig = eig self._adata.uns[f"eig_{self._direction}"] = eig
def _(self, img: Pathlike_t, chunks: Optional[int] = None, **_: Any) -> Optional[xr.DataArray]: def transform_metadata(data: xr.Dataset) -> xr.Dataset: data.attrs[Key.img.coords] = CropCoords.from_tuple(data.attrs.get(Key.img.coords, _NULL_COORDS.to_tuple())) data.attrs[Key.img.padding] = CropPadding.from_tuple( data.attrs.get(Key.img.padding, _NULL_PADDING.to_tuple()) ) if Key.img.mask_circle not in data.attrs: data.attrs[Key.img.mask_circle] = False if Key.img.scale not in data.attrs: data.attrs[Key.img.scale] = 1 return data img = Path(img) logg.debug(f"Loading data from `{img}`") if not img.exists(): raise OSError(f"Path `{img}` does not exist.") suffix = img.suffix.lower() if suffix in (".jpg", ".jpeg"): return self._load_img(imread(str(img))) if img.is_dir(): if len(self._data): raise ValueError("Loading data from `Zarr` store is disallowed if the container is not empty.") self._data = transform_metadata(xr.open_zarr(str(img), chunks=chunks)) return None if suffix in (".nc", ".cdf"): if len(self._data): raise ValueError("Loading data from `NetCDF` is disallowed if the container is not empty.") self._data = transform_metadata(xr.open_dataset(img, chunks=chunks)) return None if suffix in (".tif", ".tiff"): # calling _load_img ensures we can safely do the transpose return self._load_img( xr.concat( [ _open_rasterio(f"GTIFF_DIR:{i}:{img}", chunks=chunks, parse_coordinates=False) for i in range(1, _num_pages(img) + 1) ], dim="band", ), copy=False, ).transpose("y", "x", ...) raise ValueError(f"Unknown suffix `{img.suffix}`.")
def test_formats(capsys, logging_state): s.logfile = sys.stderr s.verbosity = Verbosity.debug l.error('0') assert capsys.readouterr().err == 'ERROR: 0\n' l.warning('1') assert capsys.readouterr().err == 'WARNING: 1\n' l.info('2') assert capsys.readouterr().err == '2\n' l.hint('3') assert capsys.readouterr().err == '--> 3\n' l.debug('4') assert capsys.readouterr().err == ' 4\n'
def _filter_interactions_by_genes(self) -> None: """Subset :attr:`interactions` to only those for which we have the data.""" if TYPE_CHECKING: assert isinstance(self.interactions, pd.DataFrame) logg.debug("DEBUG: Removing interactions with no genes in the data") self._interactions = self.interactions[ self.interactions[SOURCE].isin(self._data.columns) & self.interactions[TARGET].isin(self._data.columns)] if self.interactions.empty: raise ValueError( "After filtering by genes, no interactions remain.")
def compute_transitions(self): vkey = self.vkey + '_graph' if vkey not in self._adata.uns: if 'velocyto_transitions' in self._adata.uns: self._adata.uns[vkey] = self._adata.uns['velocyto_transitions'] sclogg.debug( "The key 'velocyto_transitions' has been changed to 'velocity_graph'." ) else: raise ValueError( 'The passed AnnData needs to have an `uns` annotation ' "with key 'velocity_graph' - a sparse matrix from RNA velocity." ) if self._adata.uns[vkey].shape != (self._adata.n_obs, self._adata.n_obs): raise ValueError( f"The passed 'velocity_graph' have shape {self._adata.uns[vkey].shape} " f"but shoud have shape {(self._adata.n_obs, self._adata.n_obs)}" ) # restore this at some point # if 'expected_n_edges_random' not in self._adata.uns['paga']: # raise ValueError( # 'Before running PAGA with `use_rna_velocity=True`, run it with `False`.') import igraph from scanpy.utils import get_igraph_from_adjacency, get_sparse_from_igraph g = get_igraph_from_adjacency(self._adata.uns[vkey].astype('bool'), directed=True) vc = igraph.VertexClustering( g, membership=self._adata.obs[self._groups_key].cat.codes.values) # set combine_edges to False if you want self loops cg_full = vc.cluster_graph(combine_edges='sum') transitions = get_sparse_from_igraph(cg_full, weight_attr='weight') transitions = transitions - transitions.T transitions_conf = transitions.copy() transitions = transitions.tocoo() total_n = self._neighbors.n_neighbors * np.array(vc.sizes()) # total_n_sum = sum(total_n) # expected_n_edges_random = self._adata.uns['paga']['expected_n_edges_random'] for i, j, v in zip(transitions.row, transitions.col, transitions.data): # if expected_n_edges_random[i, j] != 0: # # factor 0.5 because of asymmetry # reference = 0.5 * expected_n_edges_random[i, j] # else: # # approximate # reference = self._neighbors.n_neighbors * total_n[i] * total_n[j] / total_n_sum reference = np.sqrt(total_n[i] * total_n[j]) transitions_conf[i, j] = 0 if v < 0 else v / reference transitions_conf.eliminate_zeros() # transpose in order to match convention of stochastic matrices # entry ij means transition from j to i self.transitions_confidence = transitions_conf.T
def _read_from_adata(self, **kwargs): """ Import the base-KNN graph and check for symmetry and connectivity. """ if not has_neighs(self.adata): raise KeyError("Compute KNN graph first as `scanpy.pp.neighbors()`.") self._conn = get_neighs(self.adata, "connectivities").astype(_dtype) start = logg.debug("Checking the KNN graph for connectedness") if not is_connected(self._conn): logg.warning("KNN graph is not connected", time=start) start = logg.debug("Checking the KNN graph for symmetry") if not is_symmetric(self._conn): logg.warning("KNN graph is not symmetric", time=start) variance_key = kwargs.pop("variance_key", None) if variance_key is not None: logg.debug(f"DEBUG: Loading variances from `adata.uns[{variance_key!r}]`") variance_key = f"{variance_key}_variances" if variance_key in self.adata.uns.keys(): # keep it sparse self._variances = csr_matrix( self.adata.uns[variance_key].astype(_dtype) ) else: self._variances = None logg.debug( f"DEBUG: Unable to load variances`{variance_key}` from `adata.uns`" ) else: logg.debug("DEBUG: No variance key specified")
def partition(conn: Union[nx.DiGraph, np.ndarray, spmatrix], sort: bool = True) -> Tuple[List[List[Any]], List[List[Any]]]: """ Partition a directed graph into its transient and recurrent classes. In a directed graph *G*, node *j* is accessible from node *i* if there exists a path from *i* to *j*. If *i* is accessible from *j* and the converse holds as well, then *i* and *j* communicate. Communication forms and equivalence relation on directed graphs, so every directed graph can be uniquely partitioned into its communication classes (also called strongly connected components). If *G* describes the state space of a Markov chain, then communication classes are often characterized as either recurrent or transient. Intuitively, once the process enters a recurrent class, it will never leave it again. See [Tolver16]_ for more formal definition. Params ------ conn Directed graph to partition. Returns ------- (:class:`list`, :class:`list`) Recurrent and transient classes respectively. """ start = logg.debug( "Partitioning the graph into current and transient classes") def partition(g): yield from (( (sorted(scc) if sort else scc), all((not nx.has_path(g, s, t) for s, t in product(scc, g.nodes - scc))), ) for scc in nx.strongly_connected_components(g)) def maybe_sort(iterable): return (sorted(iterable, key=lambda x: (-len(x), x[0])) if sort else list(map(list, iterable))) rec_classes, trans_classes = tee( partition( nx.DiGraph(conn) if not isinstance(conn, nx.DiGraph) else conn), 2) rec_classes = (node for node, is_rec in rec_classes if is_rec) trans_classes = (node for node, is_rec in trans_classes if not is_rec) logg.debug(" Finish", time=start) return maybe_sort(rec_classes), maybe_sort(trans_classes)
def _cell_dist_mat_reduce(self): """Compute the distance matrix by using custom reduction functions. More flexible than `_build_cell_dist_mat_min`, but requires more memory. Reduce dual is called before reduce arms. """ coord_dict = dict() def _add_to_dict(d, c1, c2, cell_row, cell_col, value): """Add a value to the nested coord dict""" try: tmp_dict = d[(cell_row, cell_col)] try: tmp_dict2 = tmp_dict[arm] try: if (c1, c2) in tmp_dict2: # can be in arbitrary order apprarently assert (c2, c1) not in tmp_dict2 tmp_dict2[(c2, c1)] = value tmp_dict2[(c1, c2)] = value except KeyError: tmp_dict2 = {(c1, c2): value} except KeyError: tmp_dict[arm] = {(c1, c2): value} except KeyError: d[(cell_row, cell_col)] = {arm: {(c1, c2): value}} for arm, arm_info in self.index_dict.items(): dist_mat, seq_to_cell, chain_inds = ( arm_info["dist_mat"], arm_info["seq_to_cell"], arm_info["chain_inds"], ) for row, col, value in zip(dist_mat.row, dist_mat.col, dist_mat.data): for c1, c2 in itertools.product(chain_inds, repeat=2): for cell_row, cell_col in itertools.product( seq_to_cell[c1][row], seq_to_cell[c2][col] ): # fill upper diagonal. Important: these are dist-mat row,cols # not cell-mat row cols. This is required, because the # itertools.product returns all combinations for the diagonal # but not for the other values. _add_to_dict(coord_dict, c1, c2, cell_row, cell_col, value) if row != col: _add_to_dict(coord_dict, c1, c2, cell_col, cell_row, value) logging.debug("Finished constructing coord-dictionary") yield from self._reduce_coord_dict(coord_dict)
def maybe_create_lineage(direction: Direction): lin_key = str(LinKey.FORWARD if direction == Direction.FORWARD else LinKey.BACKWARD) names_key, colors_key = _lin_names(lin_key), _colors(lin_key) if lin_key in adata.obsm.keys(): n_cells, n_lineages = adata.obsm[lin_key].shape logg.info( f"Creating {'forward' if direction == Direction.FORWARD else 'backward'} `Lineage` object" ) if names_key not in adata.uns.keys(): logg.warning( f"Lineage names not found in `adata.uns[{names_key!r}]`, creating dummy names" ) names = [f"Lineage {i}" for i in range(n_lineages)] elif len(adata.uns[names_key]) != n_lineages: logg.warning( f"Lineage names are don't have the required length ({n_lineages}), creating dummy names" ) names = [f"Lineage {i}" for i in range(n_lineages)] else: logg.info("Succesfully loaded names") names = adata.uns[names_key] if colors_key not in adata.uns.keys(): logg.warning( f"Lineage colors not found in `adata.uns[{colors_key!r}]`, creating new colors" ) colors = _create_categorical_colors(n_lineages) elif len(adata.uns[colors_key]) != n_lineages or not all( map(lambda c: is_color_like(c), adata.uns[colors_key])): logg.warning( f"Lineage colors don't have the required length ({n_lineages}) " f"or are not color-like, creating new colors") colors = _create_categorical_colors(n_lineages) else: logg.info("Succesfully loaded colors") colors = adata.uns[colors_key] adata.obsm[lin_key] = Lineage(adata.obsm[lin_key], names=names, colors=colors) adata.uns[colors_key] = colors adata.uns[names_key] = names else: logg.debug( f"DEBUG: Unable to load {'forward' if direction == Direction.FORWARD else 'backward'} " f"`Lineage` from `adata.obsm[{lin_key!r}]`")
def _read_from_adata(self, vkey: str, **kwargs): super()._read_from_adata(variance_key="velocity", **kwargs) if (vkey + "_graph" not in self.adata.uns.keys()) or ( vkey + "_graph_neg" not in self.adata.uns.keys() ): raise KeyError( "Compute cosine correlations first as `scvelo.tl.velocity_graph()`." ) velo_corr_pos, velo_corr_neg = ( csr_matrix(self.adata.uns[vkey + "_graph"]).copy(), csr_matrix(self.adata.uns[vkey + "_graph_neg"]).copy(), ) logg.debug("Adding `.velo_corr`, the velocity correlations") self.velo_corr = (velo_corr_pos + velo_corr_neg).astype(_dtype)
def test_logfile(tmp_path, logging_state): s.verbosity = Verbosity.hint io = StringIO() s.logfile = io assert s.logfile is io assert s.logpath is None l.error('test!') assert io.getvalue() == 'ERROR: test!\n' p = tmp_path / 'test.log' s.logpath = p assert s.logpath == p assert s.logfile.name == str(p) l.hint('test2') l.debug('invisible') assert s.logpath.read_text() == '--> test2\n'
def _chain_pairing( adata: AnnData, mask_ambiguous: np.ndarray, mask_has_ir: np.ndarray, mask_multichain: np.ndarray, ) -> np.ndarray: """Annotate chain pairing categories. Parameters: ----------- mask_ambiguous boolean array of the same length as `adata.obs`, marking which cells have an ambiguous receptor configuration. """ x = adata.obs string_length = len("two full chains") results = np.empty(dtype=f"<U{string_length}", shape=(x.shape[0], )) logging.debug("Done initalizing") mask_has_vj1 = ~_is_na(x["IR_VJ_1_junction_aa"].values) mask_has_vdj1 = ~_is_na(x["IR_VDJ_1_junction_aa"].values) mask_has_vj2 = ~_is_na(x["IR_VJ_2_junction_aa"].values) mask_has_vdj2 = ~_is_na(x["IR_VDJ_2_junction_aa"].values) logging.debug("Done with masks") for m in [mask_has_vj1, mask_has_vdj1, mask_has_vj2, mask_has_vdj2]: # no cell can have a junction_aa sequence but no TCR assert np.setdiff1d(np.where(m)[0], np.where(mask_has_ir)[0]).size == 0 results[~mask_has_ir] = "no IR" results[mask_has_vj1] = "orphan VJ" results[mask_has_vdj1] = "orphan VDJ" results[mask_has_vj1 & mask_has_vdj1] = "single pair" results[mask_has_vj1 & mask_has_vdj1 & mask_has_vj2] = "extra VJ" results[mask_has_vj1 & mask_has_vdj1 & mask_has_vdj2] = "extra VDJ" results[mask_has_vj1 & mask_has_vdj1 & mask_has_vj2 & mask_has_vdj2] = "two full chains" results[mask_ambiguous] = "ambiguous" results[mask_multichain] = "multichain" assert "" not in results, "One or more chains are not characterized" return results
def _check_and_create_colors(self): n_cats = len(self._meta_states.cat.categories) color_key = _colors(self._rc_key) if self._meta_states_colors is None: if color_key in self._adata.uns and n_cats == len( self._adata.uns[color_key] ): logg.debug("DEBUG: Loading colors from `.adata` object") self._meta_states_colors = _convert_to_hex_colors( self._adata.uns[color_key] ) else: self._meta_states_colors = _create_categorical_colors(n_cats) self._adata.uns[color_key] = self._meta_states_colors elif len(self._meta_states_colors) != n_cats: self._meta_states_colors = _create_categorical_colors(n_cats) self._adata.uns[color_key] = self._meta_states_colors
def scale_array( X, *, zero_center: bool = True, max_value: Optional[float] = None, copy: bool = False, return_mean_std: bool = False, ): if copy: X = X.copy() if not zero_center and max_value is not None: logg.info( # Be careful of what? This should be more specific "... be careful when using `max_value` " "without `zero_center`." ) if np.issubdtype(X.dtype, np.integer): logg.info( '... as scaling leads to float results, integer ' 'input is cast to float, returning copy.' ) X = X.astype(float) mean, var = _get_mean_var(X) std = np.sqrt(var) std[std == 0] = 1 if issparse(X): if zero_center: raise ValueError("Cannot zero-center sparse matrix.") sparsefuncs.inplace_column_scale(X, 1 / std) else: if zero_center: X -= mean X /= std # do the clipping if max_value is not None: logg.debug(f"... clipping at max_value {max_value}") X[X > max_value] = max_value if return_mean_std: return X, mean, std else: return X
def save_fig(fig: Figure, path: Union[str, Path], make_dir: bool = True, ext: str = "png", **kwargs: Any) -> None: """ Save a figure. Parameters ---------- fig Figure to save. path Path where to save the figure. If path is relative, save it under :attr:`scanpy.settings.figdir`. make_dir Whether to try making the directory if it does not exist. ext Extension to use if none is provided. kwargs Keyword arguments for :meth:`matplotlib.figure.Figure.savefig`. Returns ------- None Just saves the plot. """ if os.path.splitext(path)[1] == "": path = f"{path}.{ext}" path = Path(path) if not path.is_absolute(): path = Path(settings.figdir) / path if make_dir: try: os.makedirs(str(Path.parent), exist_ok=True) except OSError as e: logg.debug(f"Unable to create directory `{Path.parent}`. Reason: `{e}`") logg.debug(f"Saving figure to `{path!r}`") kwargs.setdefault("bbox_inches", "tight") kwargs.setdefault("transparent", True) fig.savefig(path, **kwargs)
def _get_categorical( adata: AnnData, key: str, palette: Optional[str] = None, vec: Optional[pd.Series] = None, ) -> np.ndarray: if vec is not None: if not is_categorical_dtype(vec): raise TypeError(f"Expected a `categorical` type, found `{infer_dtype(vec)}`.") if key in adata.obs: logg.debug(f"Overwriting `adata.obs[{key!r}]`") adata.obs[key] = vec.values add_colors_for_categorical_sample_annotation( adata, key=key, force_update_colors=palette is not None, palette=palette ) col_dict = dict(zip(adata.obs[key].cat.categories, [to_rgb(i) for i in adata.uns[Key.uns.colors(key)]])) return np.array([col_dict[v] for v in adata.obs[key]])
def _knn_smooth(diff_kernel, velo_graph, trans_graph, weight_diffusion): # utility function for combining KNN kernel and velocity kernel assert weight_diffusion >= 0, "Weight diffusion must be non-negative." assert weight_diffusion <= 1, "Weight diffusion must be <= 1." # this is necessary because I don't want to normalize this graph (density correction) G_sim = trans_graph.copy() if diff_kernel == "mult": logg.debug("DEBUG: Using a multiplicative diffusion kernel") # element wise multiplication velo_graph = velo_graph.multiply(G_sim) elif diff_kernel == "sum": logg.debug("DEBUG: Using an additive diffusion kernel") # G_sim = G_sim.multiply(velo_graph>0) velo_graph, trans_graph = _normalize(velo_graph), _normalize(G_sim) velo_graph = ( 1 - weight_diffusion) * velo_graph + weight_diffusion * trans_graph elif diff_kernel == "both": logg.debug( "DEBUG: Using first a multiplicative and then an additive diffusion kernel" ) G_sim = G_sim.multiply(velo_graph > 0) velo_graph = velo_graph.multiply(G_sim) velo_graph, trans_grap = _normalize(velo_graph), _normalize(G_sim) velo_graph = (1 - weight_diffusion) * velo_graph + weight_diffusion * G_sim else: raise ValueError( f"Invalid kernel type `{diff_kernel}`. Valid options are: `'mult', 'sum', 'both'`." ) return velo_graph
def compute_transition_matrix( self, density_normalize: bool = True, **kwargs ) -> "ConnectivityKernel": """ Compute transition matrix based on transcriptomic similarity. Uses symmetric, weighted KNN graph to compute symmetric transition matrix. The connectivities are computed using :func:`scanpy.pp.neighbors`. Depending on the parameters used there, they can be UMAP connectivities or gaussian-kernel-based connectivities with adaptive kernel width. Params ------ density_normalize Whether or not to use the underlying KNN graph for density normalization. Returns ------- None Makes :paramref:`transition_matrix` available. """ start = logg.info("Computing transition matrix based on connectivities") params = dict(dnorm=density_normalize) if params == self._params: assert self.transition_matrix is not None, _ERROR_EMPTY_CACHE_MSG logg.debug(_LOG_USING_CACHE) logg.info(" Finish", time=start) return self self._params = params conn = self._conn.copy() if density_normalize: conn = self.density_normalize(conn) logg.info(" Finish", time=start) self.transition_matrix = csr_matrix(conn) return self
def connectivities(self): """Get the weighted adjacecency matrix derived from the distance matrix. The cutoff will be used to normalize the distances. """ if self.cutoff == 0: return self._dist_mat start = logging.debug( "Started converting distances to connectivities. ") connectivities = self._dist_mat.copy() # actual distances d = connectivities.data - 1 # structure of the matrix stayes the same, we can safely change the data only connectivities.data = (self.cutoff - d) / self.cutoff connectivities.eliminate_zeros() logging.debug("Finished converting distances to connectivities. ", time=start) return connectivities
def compute_transition_matrix(self, *args, **kwargs) -> "SimpleNaryExpression": # must be done before, because the underlying expression dont' have to be normed if isinstance(self, KernelSimpleAdd): self._maybe_recalculate_constants(Constant) elif isinstance(self, KernelAdaptiveAdd): self._maybe_recalculate_constants(ConstantMatrix) for kexpr in self: if kexpr.transition_matrix is None: if isinstance(kexpr, Kernel): raise RuntimeError( f"Kernel `{kexpr}` is uninitialized. " f"Compute its transition matrix as `.compute_transition_matrix()`." ) kexpr.compute_transition_matrix() elif isinstance(kexpr, Kernel): logg.debug(_LOG_USING_CACHE) self.transition_matrix = csr_matrix( self._fn([kexpr.transition_matrix for kexpr in self]) ) return self
def save_fig(fig, path: Union[str, os.PathLike], make_dir: bool = True, ext: str = "png") -> None: """ Save a plot. Params ------ fig: :class:`matplotlib.figure.Figure` Figure to save. path: Path where to save the figure. If path is relative, save it under `sc.settings.figdir`. make_dir: Whether to try making the directory if it does not exist. ext: Extension to use. Returns ------- None Just saves the plot. """ if os.path.splitext(path)[1] == "": path = f"{path}.{ext}" if not os.path.isabs(path): path = os.path.join(sc.settings.figdir, path) if make_dir: _maybe_create_dir(os.path.split(path)[0]) logg.debug(f"Saving figure to `{path!r}`") fig.savefig(path, bbox_inches="tight", transparent=True)