def _knn_smooth(diff_kernel, velo_graph, trans_graph, weight_diffusion): # utility function for combining KNN kernel and velocity kernel assert weight_diffusion >= 0, "Weight diffusion must be non-negative." assert weight_diffusion <= 1, "Weight diffusion must be <= 1." # this is necessary because I don't want to normalize this graph (density correction) G_sim = trans_graph.copy() if diff_kernel == "mult": logg.debug("DEBUG: Using a multiplicative diffusion kernel") # element wise multiplication velo_graph = velo_graph.multiply(G_sim) elif diff_kernel == "sum": logg.debug("DEBUG: Using an additive diffusion kernel") # G_sim = G_sim.multiply(velo_graph>0) velo_graph, trans_graph = _normalize(velo_graph), _normalize(G_sim) velo_graph = ( 1 - weight_diffusion) * velo_graph + weight_diffusion * trans_graph elif diff_kernel == "both": logg.debug( "DEBUG: Using first a multiplicative and then an additive diffusion kernel" ) G_sim = G_sim.multiply(velo_graph > 0) velo_graph = velo_graph.multiply(G_sim) velo_graph, trans_grap = _normalize(velo_graph), _normalize(G_sim) velo_graph = (1 - weight_diffusion) * velo_graph + weight_diffusion * G_sim else: raise ValueError( f"Invalid kernel type `{diff_kernel}`. Valid options are: `'mult', 'sum', 'both'`." ) return velo_graph
def test_backward_manual_dense_norm(self, adata): backward = True vk = VelocityKernel(adata, backward=backward).compute_transition_matrix( density_normalize=False) ck = ConnectivityKernel(adata, backward=backward).compute_transition_matrix( density_normalize=False) # combine the kernels comb = 0.8 * vk + 0.2 * ck T_1 = comb.transition_matrix conn = _get_neighs(adata, "connectivities") T_1 = density_normalization(T_1, conn) T_1 = _normalize(T_1) transition_matrix( adata, diff_kernel="sum", weight_diffusion=0.2, density_normalize=True, backward=backward, ) T_2 = adata.uns[_transition(Direction.BACKWARD)]["T"] np.testing.assert_allclose(T_1.A, T_2.A, rtol=_rtol)
def transition_matrix(self, value: Union[np.ndarray, spmatrix]) -> None: """ Set a new value of the transition matrix. Params ------ value The new transition matrix. Returns ------- None """ if self._parent is None: self._transition_matrix = _normalize(value) else: self._transition_matrix = _normalize(value) if self._normalize else value
def test_addition_adaptive(self, adata): adata.uns["velocity_variances"] = vv = np.random.random( size=(adata.n_obs, adata.n_obs)) adata.uns["connectivity_variances"] = cv = np.random.random( size=(adata.n_obs, adata.n_obs)) vk, ck = create_kernels(adata) k = vk ^ ck expected = _normalize(0.5 * vv * vk.transition_matrix + 0.5 * cv * ck.transition_matrix) np.testing.assert_allclose(k.transition_matrix.A, expected)
def test_addition_adaptive_wrong_variances(self, adata): a, b = np.random.uniform(0, 10, 2) s = a + b adata.uns["velocity_variances"] = np.random.random(size=(adata.n_obs, adata.n_obs)) adata.uns["connectivity_variances"] = np.random.random( size=(adata.n_obs, adata.n_obs)) vk, ck = create_kernels(adata) k = a * vk ^ b * ck expected = _normalize(a / s * vk.transition_matrix + b / s * ck.transition_matrix) assert not np.allclose(k.transition_matrix.A, expected.A)
def test_addition_adaptive_constants(self, adata): a, b = np.random.uniform(0, 10, 2) s = a + b adata.uns["velocity_variances"] = vv = np.random.random( size=(adata.n_obs, adata.n_obs)) adata.uns["connectivity_variances"] = cv = np.random.random( size=(adata.n_obs, adata.n_obs)) vk, ck = create_kernels(adata) # diagonal + upper diag k = a * vk ^ b * ck expected = _normalize(a / s * vv * vk.transition_matrix + b / s * cv * ck.transition_matrix) np.testing.assert_allclose(k.transition_matrix.A, expected)
def test_palantir(self, adata): conn = _get_neighs(adata, "connectivities") n_neighbors = _get_neighs_params(adata)["n_neighbors"] pseudotime = adata.obs["latent_time"] conn_biased = bias_knn(conn, pseudotime, n_neighbors) T_1 = _normalize(conn_biased) pk = PalantirKernel(adata, time_key="latent_time").compute_transition_matrix( density_normalize=False) T_2 = pk.transition_matrix np.testing.assert_allclose(T_1.A, T_2.A, rtol=_rtol)
def test_addition_adaptive_4_kernels(self, adata): a, b, c, d = np.random.uniform(0, 10, 4) s = a + b + c + d adata.uns["velocity_variances"] = vv = np.random.random( size=(adata.n_obs, adata.n_obs)) adata.uns["connectivity_variances"] = cv = np.random.random( size=(adata.n_obs, adata.n_obs)) vk, ck = create_kernels(adata) vk1, ck1 = create_kernels(adata) k = a * vk ^ b * ck ^ c * vk1 ^ d * ck1 expected = _normalize(a / s * vv * vk.transition_matrix + b / s * cv * ck.transition_matrix + c / s * vv * vk1.transition_matrix + d / s * cv * ck1.transition_matrix) np.testing.assert_allclose(k.transition_matrix.A, expected)
def transition_matrix( adata: AnnData, vkey: str = "velocity", backward: bool = False, self_transitions: Optional[str] = None, sigma_corr: Optional[float] = None, diff_kernel: Optional[str] = None, weight_diffusion: float = 0.2, density_normalize: bool = True, backward_mode: str = "transpose", inplace: bool = True, ) -> csr_matrix: """ Computes transition probabilities from velocity graph. THIS FUNCTION HAS BEEN DEPRECATED. Interact with kernels via the Kernel class or via cellrank.tools_transition_matrix.transition_matrix Employs ideas of both scvelo as well as velocyto. Parameters -------- adata : :class:`anndata.AnnData` Annotated Data Matrix vkey Name of the velocity estimates to be used backward Whether to use the transition matrix to push forward (`False`) or to pull backward (`True`) self_transitions How to fill the diagonal. Can be either 'velocyto' or 'scvelo'. Two diffent heuristics are used. Can prevent dividing by zero in unlucky sitatuations for the reverse process sigma_corr Kernel width for exp kernel to be used to compute transition probabilities from the velocity graph. If None, the median cosine correlation of all potisive cosine correlations will be used. diff_kernel Whether to multiply the velocity connectivities with transcriptomic distances to make them more robust. Options are ('sum', 'mult', 'both') weight_diffusion Relative weight given to the diffusion kernel. Must be in [0, 1]. Only matters when using 'sum' or 'both' for the diffusion kernel. density_normalize Whether to use the transcriptomic KNN graph for density normalization as performed in scanpy when computing diffusion maps backward_mode Options are ['transpose', 'negate']. inplace If True, adds to adata. Otherwise returns. Returns -------- T: :class:`scipy.sparse.csr_matrix` Transition matrix """ logg.info("Computing transition probability from velocity graph") from datetime import datetime print(datetime.now()) # get the direction of the process direction = Direction.BACKWARD if backward else Direction.FORWARD # get the velocity correlations if (vkey + "_graph" not in adata.uns.keys()) or (vkey + "_graph_neg" not in adata.uns.keys()): raise ValueError( "You need to run `tl.velocity_graph` first to compute cosine correlations" ) velo_corr, velo_corr_neg = ( csr_matrix(adata.uns[vkey + "_graph"]).copy(), csr_matrix(adata.uns[vkey + "_graph_neg"]).copy(), ) velo_corr_comb_ = (velo_corr + velo_corr_neg).astype(np.float64) if backward: if backward_mode == "negate": velo_corr_comb = velo_corr_comb_.multiply(-1) elif backward_mode == "transpose": velo_corr_comb = velo_corr_comb_.T else: raise ValueError(f"Unknown backward_mode `{backward_mode}`.") else: velo_corr_comb = velo_corr_comb_ med_corr = np.median(np.abs(velo_corr_comb.data)) # compute the raw transition matrix. At the moment, this is just an exponential kernel logg.debug("DEBUG: Computing the raw transition matrix") if sigma_corr is None: sigma_corr = 1 / med_corr velo_graph = velo_corr_comb.copy() velo_graph.data = np.exp(velo_graph.data * sigma_corr) # should I row-_normalize the transcriptomic connectivities? if diff_kernel is not None or density_normalize: params = _get_neighs_params(adata) logg.debug( f'DEBUG: Using KNN graph computed in basis {params.get("use_rep", "Unknown")!r} ' 'with {params["n_neighbors"]} neighbors') trans_graph = _get_neighs(adata, "connectivities") dev = norm((trans_graph - trans_graph.T), ord="fro") if dev > 1e-4: logg.warning("KNN base graph not symmetric, `dev={dev}`") # KNN smoothing if diff_kernel is not None: logg.debug("DEBUG: Smoothing KNN graph with diffusion kernel") velo_graph = _knn_smooth(diff_kernel, velo_graph, trans_graph, weight_diffusion) # return velo_graph # set the diagonal elements. This is important especially for the backwards direction logg.debug("DEBUG: Setting diagonal elements") velo_graph = _self_loops(self_transitions, velo_graph) # density normalisation - taken from scanpy if density_normalize: logg.debug("DEBUG: Density correcting the velocity graph") velo_graph = density_normalization(velo_graph, trans_graph) # normalize T = _normalize(velo_graph) if not inplace: logg.info("Computed transition matrix") return T if _transition(direction) in adata.uns.keys(): logg.warning( f"`.uns` already contains a field `{_transition(direction)!r}`. Overwriting" ) params = { "backward": backward, "self_transitions": self_transitions, "sigma_corr": np.round(sigma_corr, 3), "diff_kernel": diff_kernel, "weight_diffusion": weight_diffusion, "density_normalize": density_normalize, } adata.uns[_transition(direction)] = {"T": T, "params": params} logg.info( f"Computed transition matrix and added the key `{_transition(direction)!r}` to `adata.uns`" )
def compute_lin_probs( self, keys: Optional[Sequence[str]] = None, check_irred: bool = False, norm_by_frequ: bool = False, ) -> None: """ Compute absorption probabilities for a Markov chain. For each cell, this computes the probability of it reaching any of the approximate recurrent classes. This also computes the entropy over absorption probabilities, which is a measure of cell plasticity, see [Setty19]_. Params ------ keys Comma separated sequence of keys defining the recurrent classes. check_irred Check whether the matrix restricted to the given transient states is irreducible. norm_by_frequ Divide absorption probabilities for `rc_i` by `|rc_i|`. Returns ------- None Nothing, but updates the following fields: :paramref:`lineage_probabilities`, :paramref:`diff_potential`. """ if self._meta_states is None: raise RuntimeError( "Compute approximate recurrent classes first as `.compute_metastable_states()`" ) if keys is not None: keys = sorted(set(keys)) # Note: There are three relevant data structures here # - self.metastable_states: pd.Series which contains annotations for approx rcs. Associated colors in # self.metastable_states_colors # - self.lin_probs: Linage object which contains the lineage probabilities with associated names and colors # -_metastable_states: pd.Series, temporary copy of self.approx rcs used in the context of this function. # In this copy, some metastable_states may be removed or combined with others start = logg.info("Computing absorption probabilities") # we don't expect the abs. probs. to be sparse, therefore, make T dense. See scipy docs about sparse lin solve. t = self._T.A if self._is_sparse else self._T # colors are created in `compute_metastable_states`, this is just in case self._check_and_create_colors() # process the current annotations according to `keys` metastable_states_, colors_ = _process_series( series=self._meta_states, keys=keys, colors=self._meta_states_colors ) # create empty lineage object if self._lin_probs is not None: logg.debug("DEBUG: Overwriting `.lin_probs`") self._lin_probs = Lineage( np.empty((1, len(colors_))), names=metastable_states_.cat.categories, colors=colors_, ) # warn in case only one state is left keys = list(metastable_states_.cat.categories) if len(keys) == 1: logg.warning( "There is only one recurrent class, all cells will have probability 1 of going there" ) # create arrays of all recurrent and transient indices mask = np.repeat(False, len(metastable_states_)) for cat in metastable_states_.cat.categories: mask = np.logical_or(mask, metastable_states_ == cat) rec_indices, trans_indices = np.where(mask)[0], np.where(~mask)[0] # create Q (restriction transient-transient), S (restriction transient-recurrent) and I (Q-sized identity) q = t[trans_indices, :][:, trans_indices] s = t[trans_indices, :][:, rec_indices] eye = np.eye(len(trans_indices)) if check_irred: if self._is_irreducible is None: self.compute_partition() if not self._is_irreducible: logg.warning("Restriction Q is not irreducible") # compute abs probs. Since we don't expect sparse solution, dense computation is faster. logg.debug("DEBUG: Solving the linear system to find absorption probabilities") abs_states = solve(eye - q, s) # aggregate to class level by summing over columns belonging to the same metastable_states approx_rc_red = metastable_states_[mask] rec_classes_red = { key: np.where(approx_rc_red == key)[0] for key in approx_rc_red.cat.categories } _abs_classes = np.concatenate( [ np.sum(abs_states[:, rec_classes_red[key]], axis=1)[:, None] for key in approx_rc_red.cat.categories ], axis=1, ) if norm_by_frequ: logg.debug("DEBUG: Normalizing by frequency") _abs_classes /= [len(value) for value in rec_classes_red.values()] _abs_classes = _normalize(_abs_classes) # for recurrent states, set their self-absorption probability to one abs_classes = np.zeros((self._n_states, len(rec_classes_red))) rec_classes_full = { cl: np.where(metastable_states_ == cl) for cl in metastable_states_.cat.categories } for col, cl_indices in enumerate(rec_classes_full.values()): abs_classes[trans_indices, col] = _abs_classes[:, col] abs_classes[cl_indices, col] = 1 self._dp = entropy(abs_classes.T) self._lin_probs = Lineage( abs_classes, names=list(self._lin_probs.names), colors=list(self._lin_probs.colors), ) self._adata.obsm[self._lin_key] = self._lin_probs self._adata.obs[_dp(self._lin_key)] = self._dp self._adata.uns[_lin_names(self._lin_key)] = self._lin_probs.names self._adata.uns[_colors(self._lin_key)] = self._lin_probs.colors logg.info(" Finish", time=start)