def _reconstruct_lineage(self, attr: PrettyEnum, obsm_key: str): self._set_or_debug(obsm_key, self.adata.obsm, attr) names = self._set_or_debug(_lin_names(self._term_key), self.adata.uns) colors = self._set_or_debug(_colors(self._term_key), self.adata.uns) # choosing this instead of property because GPCCA doesn't have property for FIN_ABS_PROBS probs = self._get(attr) if probs is not None: if len(names) != probs.shape[1]: logg.debug( f"Expected lineage names to be of length `{probs.shape[1]}`, found `{len(names)}`. " f"Creating new names") names = [f"Lineage {i}" for i in range(probs.shape[1])] if len(colors) != probs.shape[1] or not all( map(lambda c: isinstance(c, str) and is_color_like(c), colors)): logg.debug( f"Expected lineage colors to be of length `{probs.shape[1]}`, found `{len(names)}`. " f"Creating new colors") colors = _create_categorical_colors(probs.shape[1]) self._set(attr, Lineage(probs, names=names, colors=colors)) self.adata.obsm[obsm_key] = self._get(attr) self.adata.uns[_lin_names(self._term_key)] = names self.adata.uns[_colors(self._term_key)] = colors
def test_compute_absorption_probabilities_normal_run( self, adata_large: AnnData): vk = VelocityKernel(adata_large).compute_transition_matrix( softmax_scale=4) ck = ConnectivityKernel(adata_large).compute_transition_matrix() final_kernel = 0.8 * vk + 0.2 * ck mc = cr.tl.estimators.CFLARE(final_kernel) mc.compute_eigendecomposition(k=5) mc.compute_final_states(use=2) mc.compute_absorption_probabilities() assert isinstance(mc._get(P.DIFF_POT), pd.Series) assert f"{AbsProbKey.FORWARD}_dp" in mc.adata.obs.keys() np.testing.assert_array_equal(mc._get(P.DIFF_POT), mc.adata.obs[f"{AbsProbKey.FORWARD}_dp"]) assert isinstance(mc._get(P.ABS_PROBS), cr.tl.Lineage) assert mc._get(P.ABS_PROBS).shape == (mc.adata.n_obs, 2) assert f"{AbsProbKey.FORWARD}" in mc.adata.obsm.keys() np.testing.assert_array_equal( mc._get(P.ABS_PROBS).X, mc.adata.obsm[f"{AbsProbKey.FORWARD}"]) assert _lin_names(AbsProbKey.FORWARD) in mc.adata.uns.keys() np.testing.assert_array_equal( mc._get(P.ABS_PROBS).names, mc.adata.uns[_lin_names(AbsProbKey.FORWARD)], ) assert _colors(AbsProbKey.FORWARD) in mc.adata.uns.keys() np.testing.assert_array_equal( mc._get(P.ABS_PROBS).colors, mc.adata.uns[_colors(AbsProbKey.FORWARD)], ) np.testing.assert_allclose(mc._get(P.ABS_PROBS).X.sum(1), 1)
def _reconstruct_lineage(self, attr: PrettyEnum, obsm_key: str): self._set_or_debug(obsm_key, self.adata.obsm, attr) names = self._set_or_debug(_lin_names(self._term_key), self.adata.uns) colors = self._set_or_debug(_colors(self._term_key), self.adata.uns) probs = self._get(attr) if probs is not None: if len(names) != probs.shape[1]: if isinstance(probs, Lineage): names = probs.names else: logg.warning( f"Expected lineage names to be of length `{probs.shape[1]}`, found `{len(names)}`. " f"Creating new names" ) names = [f"Lineage {i}" for i in range(probs.shape[1])] if len(colors) != probs.shape[1] or not all( map(lambda c: isinstance(c, str) and is_color_like(c), colors) ): if isinstance(probs, Lineage): colors = probs.colors else: logg.warning( f"Expected lineage colors to be of length `{probs.shape[1]}`, found `{len(names)}`. " f"Creating new colors" ) colors = _create_categorical_colors(probs.shape[1]) self._set(attr, Lineage(probs, names=names, colors=colors)) self.adata.obsm[obsm_key] = self._get(attr) self.adata.uns[_lin_names(self._term_key)] = names self.adata.uns[_colors(self._term_key)] = colors
def _check_abs_probs(mc: cr.tl.estimators.GPCCA, has_main_states: bool = True): if has_main_states: assert isinstance(mc._get(P.FIN), pd.Series) assert_array_nan_equal(mc.adata.obs[str(FinalStatesKey.FORWARD)], mc._get(P.FIN)) np.testing.assert_array_equal( mc.adata.uns[_colors(FinalStatesKey.FORWARD)], mc._get(A.FIN_ABS_PROBS)[list(mc._get( P.FIN).cat.categories)].colors, ) assert isinstance(mc._get(P.DIFF_POT), pd.Series) assert isinstance(mc._get(P.ABS_PROBS), cr.tl.Lineage) np.testing.assert_array_almost_equal(mc._get(P.ABS_PROBS).sum(1), 1.0) np.testing.assert_array_equal(mc.adata.obsm[str(AbsProbKey.FORWARD)], mc._get(P.ABS_PROBS).X) np.testing.assert_array_equal(mc.adata.uns[_lin_names(AbsProbKey.FORWARD)], mc._get(P.ABS_PROBS).names) np.testing.assert_array_equal(mc.adata.uns[_colors(AbsProbKey.FORWARD)], mc._get(P.ABS_PROBS).colors) np.testing.assert_array_equal(mc.adata.obs[_dp(AbsProbKey.FORWARD)], mc._get(P.DIFF_POT)) assert_array_nan_equal(mc.adata.obs[FinalStatesKey.FORWARD.s], mc._get(P.FIN)) np.testing.assert_array_equal(mc.adata.obs[_probs(FinalStatesKey.FORWARD)], mc._get(P.FIN_PROBS))
def _write_terminal_states(self, time=None) -> None: self.adata.obs[self._term_key] = self._get(P.TERM) self.adata.obs[_probs(self._term_key)] = self._get(P.TERM_PROBS) self.adata.uns[_colors(self._term_key)] = self._get(A.TERM_COLORS) self.adata.uns[_lin_names(self._term_key)] = np.array( self._get(P.TERM).cat.categories ) extra_msg = "" if getattr(self, A.TERM_ABS_PROBS.s, None) is not None and hasattr( self, "_term_abs_prob_key" ): # checking for None because terminal states can be set using `set_terminal_states` # without the probabilities in GPCCA self.adata.obsm[self._term_abs_prob_key] = self._get(A.TERM_ABS_PROBS) extra_msg = f" `adata.obsm[{self._term_abs_prob_key!r}]`\n" logg.info( f"Adding `adata.obs[{_probs(self._term_key)!r}]`\n" f" `adata.obs[{self._term_key!r}]`\n" f"{extra_msg}" f" `.{P.TERM_PROBS}`\n" f" `.{P.TERM}`\n" " Finish", time=time, )
def _check_abs_probs(mc: cr.tl.estimators.GPCCA, has_main_states: bool = True): if has_main_states: assert isinstance(mc._get(P.TERM), pd.Series) assert_array_nan_equal(mc.adata.obs[str(TermStatesKey.FORWARD)], mc._get(P.TERM)) np.testing.assert_array_equal( mc.adata.uns[_colors(TermStatesKey.FORWARD)], mc._get(A.TERM_ABS_PROBS)[list(mc._get( P.TERM).cat.categories)].colors, ) assert isinstance(mc._get(P.PRIME_DEG), pd.Series) assert isinstance(mc._get(P.ABS_PROBS), cr.tl.Lineage) np.testing.assert_array_almost_equal(mc._get(P.ABS_PROBS).sum(1), 1.0) np.testing.assert_array_equal(mc.adata.obsm[str(AbsProbKey.FORWARD)], mc._get(P.ABS_PROBS).X) np.testing.assert_array_equal(mc.adata.uns[_lin_names(AbsProbKey.FORWARD)], mc._get(P.ABS_PROBS).names) np.testing.assert_array_equal(mc.adata.uns[_colors(AbsProbKey.FORWARD)], mc._get(P.ABS_PROBS).colors) np.testing.assert_array_equal(mc.adata.obs[_pd(AbsProbKey.FORWARD)], mc._get(P.PRIME_DEG)) assert_array_nan_equal(mc.adata.obs[TermStatesKey.FORWARD.s], mc._get(P.TERM)) np.testing.assert_array_equal(mc.adata.obs[_probs(TermStatesKey.FORWARD)], mc._get(P.TERM_PROBS))
def test_compute_initial_states_from_forward_normal_run( self, adata_large: AnnData): vk = VelocityKernel( adata_large, backward=False).compute_transition_matrix(softmax_scale=4) ck = ConnectivityKernel(adata_large, backward=False).compute_transition_matrix() terminal_kernel = 0.8 * vk + 0.2 * ck mc = cr.tl.estimators.GPCCA(terminal_kernel) mc.compute_schur(n_components=10, method="krylov") mc.compute_macrostates(n_states=2, n_cells=5) obsm_keys = set(mc.adata.obsm.keys()) expected = mc._get(P.COARSE_STAT_D).index[np.argmin( mc._get(P.COARSE_STAT_D))] mc._compute_initial_states(1) key = TermStatesKey.BACKWARD.s assert key in mc.adata.obs np.testing.assert_array_equal(mc.adata.obs[key].cat.categories, [expected]) assert _probs(key) in mc.adata.obs assert _colors(key) in mc.adata.uns assert _lin_names(key) in mc.adata.uns # make sure that we don't write anything there - it's useless assert set(mc.adata.obsm.keys()) == obsm_keys
def test_non_unique_names(self, adata: AnnData, path: Path, lin_key: str, _: int): names_key = _lin_names(lin_key) adata.uns[names_key][0] = adata.uns[names_key][1] sc.write(path, adata) with pytest.raises(ValueError): _ = cr.read(path)
def maybe_create_lineage( direction: Union[str, Direction], pretty_name: Optional[str] = None ): if isinstance(direction, Direction): lin_key = str( AbsProbKey.FORWARD if direction == Direction.FORWARD else AbsProbKey.BACKWARD ) else: lin_key = direction pretty_name = "" if pretty_name is None else (pretty_name + " ") names_key, colors_key = _lin_names(lin_key), _colors(lin_key) if lin_key in adata.obsm.keys(): n_cells, n_lineages = adata.obsm[lin_key].shape logg.info(f"Creating {pretty_name}`Lineage` from `adata.obsm[{lin_key!r}]`") if names_key not in adata.uns.keys(): logg.warning( f" Lineage names not found in `adata.uns[{names_key!r}]`, creating new names" ) names = [f"Lineage {i}" for i in range(n_lineages)] elif len(adata.uns[names_key]) != n_lineages: logg.warning( f" Lineage names are don't have the required length ({n_lineages}), creating new names" ) names = [f"Lineage {i}" for i in range(n_lineages)] else: logg.info(" Successfully loaded names") names = adata.uns[names_key] if colors_key not in adata.uns.keys(): logg.warning( f" Lineage colors not found in `adata.uns[{colors_key!r}]`, creating new colors" ) colors = _create_categorical_colors(n_lineages) elif len(adata.uns[colors_key]) != n_lineages or not all( map(lambda c: is_color_like(c), adata.uns[colors_key]) ): logg.warning( f" Lineage colors don't have the required length ({n_lineages}) " f"or are not color-like, creating new colors" ) colors = _create_categorical_colors(n_lineages) else: logg.info(" Successfully loaded colors") colors = adata.uns[colors_key] adata.obsm[lin_key] = Lineage( adata.obsm[lin_key], names=names, colors=colors ) adata.uns[colors_key] = colors adata.uns[names_key] = names else: logg.debug( f"Unable to load {pretty_name}`Lineage` from `adata.obsm[{lin_key!r}]`" )
def _check_renaming_no_write_terminal(mc: cr.tl.estimators.GPCCA) -> None: assert mc._get(P.TERM) is None assert mc._get(P.TERM_PROBS) is None assert mc._get(A.TERM_ABS_PROBS) is None assert TermStatesKey.FORWARD.s not in mc.adata.obs assert _probs(TermStatesKey.FORWARD.s) not in mc.adata.obs assert _colors(TermStatesKey.FORWARD.s) not in mc.adata.uns assert _lin_names(TermStatesKey.FORWARD.s) not in mc.adata.uns
def _assert_has_all_keys(adata: AnnData, direction: Direction): assert _transition(direction) in adata.obsp.keys() # check if it's not a dummy transition matrix assert not np.all( np.isclose(np.diag(adata.obsp[_transition(direction)].A), 1.0)) assert f"{_transition(direction)}_params" in adata.uns.keys() if direction == Direction.FORWARD: assert str(AbsProbKey.FORWARD) in adata.obsm assert isinstance(adata.obsm[str(AbsProbKey.FORWARD)], cr.tl.Lineage) assert _colors(AbsProbKey.FORWARD) in adata.uns.keys() assert _lin_names(AbsProbKey.FORWARD) in adata.uns.keys() assert str(TermStatesKey.FORWARD) in adata.obs assert is_categorical_dtype(adata.obs[str(TermStatesKey.FORWARD)]) assert _probs(TermStatesKey.FORWARD) in adata.obs # check the correlations with all lineages have been computed lin_probs = adata.obsm[str(AbsProbKey.FORWARD)] np.in1d( [f"{str(DirPrefix.FORWARD)} {key}" for key in lin_probs.names], adata.var.keys(), ).all() else: assert str(AbsProbKey.BACKWARD) in adata.obsm assert isinstance(adata.obsm[str(AbsProbKey.BACKWARD)], cr.tl.Lineage) assert _colors(AbsProbKey.BACKWARD) in adata.uns.keys() assert _lin_names(AbsProbKey.BACKWARD) in adata.uns.keys() assert str(TermStatesKey.BACKWARD) in adata.obs assert is_categorical_dtype(adata.obs[str(TermStatesKey.BACKWARD)]) assert _probs(TermStatesKey.BACKWARD) in adata.obs # check the correlations with all lineages have been computed lin_probs = adata.obsm[str(AbsProbKey.BACKWARD)] np.in1d( [f"{str(DirPrefix.BACKWARD)} {key}" for key in lin_probs.names], adata.var.keys(), ).all()
def test_no_names(self, adata: AnnData, path: Path, lin_key: str, n_lins: int): names_key = _lin_names(lin_key) del adata.uns[names_key] sc.write(path, adata) adata_new = cr.read(path) lins = adata_new.obsm[lin_key] assert isinstance(lins, Lineage) np.testing.assert_array_equal(lins.names, [f"Lineage {i}" for i in range(n_lins)]) np.testing.assert_array_equal(lins.names, adata_new.uns[names_key])
def test_normal_run(self, adata: AnnData, path: Path, lin_key: str, n_lins: int): colors = _create_categorical_colors(10)[-n_lins:] names = [f"foo {i}" for i in range(n_lins)] adata.uns[_colors(lin_key)] = colors adata.uns[_lin_names(lin_key)] = names sc.write(path, adata) adata_new = cr.read(path) lins_new = adata_new.obsm[lin_key] np.testing.assert_array_equal(lins_new.colors, colors) np.testing.assert_array_equal(lins_new.names, names)
def test_wrong_names_length(self, adata: AnnData, path: Path, lin_key: str, n_lins: int): names_key = _lin_names(lin_key) adata.uns[names_key] = list(adata.uns[names_key]) adata.uns[names_key] += ["foo", "bar", "baz"] sc.write(path, adata) adata_new = cr.read(path) lins = adata_new.obsm[lin_key] assert isinstance(lins, Lineage) np.testing.assert_array_equal(lins.names, [f"Lineage {i}" for i in range(n_lins)]) np.testing.assert_array_equal(lins.names, adata_new.uns[names_key])
def _write_absorption_probabilities( self, time: datetime, extra_msg: str = "" ) -> None: self.adata.obsm[self._abs_prob_key] = self._get(P.ABS_PROBS) abs_prob = self._get(P.ABS_PROBS) self.adata.uns[_lin_names(self._abs_prob_key)] = abs_prob.names self.adata.uns[_colors(self._abs_prob_key)] = abs_prob.colors logg.info( f"Adding `adata.obsm[{self._abs_prob_key!r}]`\n" f"{extra_msg}" f" `.{P.ABS_PROBS}`\n" " Finish", time=time, )
def _write_initial_states(self, membership: Lineage, probs: pd.Series, cats: pd.Series, time=None) -> None: key = TermStatesKey.BACKWARD.s self.adata.obs[key] = cats self.adata.obs[_probs(key)] = probs self.adata.uns[_colors(key)] = membership.colors self.adata.uns[_lin_names(key)] = membership.names logg.info( f"Adding `adata.obs[{_probs(key)!r}]`\n `adata.obs[{key!r}]`\n", time=time, )
def test_rename_terminal_states_dont_update_adata(self, adata_large: AnnData): vk = VelocityKernel(adata_large).compute_transition_matrix(softmax_scale=4) ck = ConnectivityKernel(adata_large).compute_transition_matrix() terminal_kernel = 0.8 * vk + 0.2 * ck mc = cr.tl.estimators.CFLARE(terminal_kernel) mc.compute_eigendecomposition(k=5) mc.compute_terminal_states(use=2) mc.rename_terminal_states({"0": "foo", "1": "bar"}, update_adata=False) np.testing.assert_array_equal(mc._get(P.TERM).cat.categories, ["foo", "bar"]) np.testing.assert_array_equal( mc.adata.obs[TermStatesKey.FORWARD.s].cat.categories, ["0", "1"] ) np.testing.assert_array_equal( mc.adata.uns[_lin_names(TermStatesKey.FORWARD.s)], ["0", "1"] )
def _write_final_states(self, time=None) -> None: self.adata.obs[self._fs_key] = self._get(P.FIN) self.adata.obs[_probs(self._fs_key)] = self._get(P.FIN_PROBS) self.adata.uns[_colors(self._fs_key)] = self._get(A.FIN_COLORS) self.adata.uns[_lin_names(self._fs_key)] = list(self._get(P.FIN).cat.categories) extra_msg = "" if getattr(self, A.FIN_ABS_PROBS.s, None) is not None and hasattr( self, "_fin_abs_prob_key" ): # checking for None because final states can be set using `set_final_states` # without the probabilities in GPCCA self.adata.obsm[self._fin_abs_prob_key] = self._get(A.FIN_ABS_PROBS) extra_msg = f" `adata.obsm[{self._fin_abs_prob_key!r}]`\n" logg.info( f"Adding `adata.obs[{_probs(self._fs_key)!r}]`\n" f" `adata.obs[{self._fs_key!r}]`\n" f"{extra_msg}" f" `.{P.FIN_PROBS}`\n" f" `.{P.FIN}`", time=time, )
def rename_terminal_states( self, new_names: Mapping[str, str], update_adata: bool = True ) -> None: """ Rename the names of :paramref:`{ts}`. Parameters ---------- new_names Mapping where keys are the old names and the values are the new names. New names must be unique. update_adata Whether to update underlying :paramref:`adata` object as well or not. Returns ------- None Nothing, just updates the names of :paramref:`{ts}`. """ term_states = self._get(P.TERM) if term_states is None: raise RuntimeError(_COMP_TERM_STATES_MSG) if not isinstance(new_names, Mapping): raise TypeError(f"Expected a `Mapping` type, found `{type(new_names)!r}`.") if not len(new_names): return new_names = {k: str(v) for k, v in new_names.items()} mask = np.isin(list(new_names.keys()), term_states.cat.categories) if not np.all(mask): raise ValueError( f"Invalid old terminal states names: `{np.array(list(new_names.keys()))[~mask]}`." ) names_after_renaming = [new_names.get(n, n) for n in term_states.cat.categories] if len(set(names_after_renaming)) != len(term_states.cat.categories): raise ValueError( f"After renaming, the names will not be unique: `{names_after_renaming}`." ) term_states.cat.rename_categories(new_names, inplace=True) memberships = ( self._get(A.TERM_ABS_PROBS) if hasattr(self, A.TERM_ABS_PROBS.s) else None ) if memberships is not None: # GPCCA memberships.names = [new_names.get(n, n) for n in memberships.names] self._set(A.TERM_ABS_PROBS, memberships) # we can be just computing it and it's not yet saved in adata if ( update_adata and self._term_key in self.adata.obs and _lin_names(self._term_key) in self.adata.uns ): self.adata.obs[self._term_key].cat.rename_categories( new_names, inplace=True ) self.adata.uns[_lin_names(self._term_key)] = np.array( self.adata.obs[self._term_key].cat.categories )