def _de_core(adata, model_fn, groupby, group1, group2, idx1, idx2, all_stats, all_stats_fn, col_names, mode, batchid1, batchid2, delta, batch_correction, fdr, silent, **kwargs): """Internal function for DE interface.""" if group1 is None and idx1 is None: group1 = adata.obs[groupby].astype("category").cat.categories.tolist() if len(group1) == 1: raise ValueError( "Only a single group in the data. Can't run DE on a single group." ) if not isinstance(group1, IterableClass) or isinstance(group1, str): group1 = [group1] # make a temp obs key using indices temp_key = None if idx1 is not None: obs_col, group1, group2 = _prepare_obs(idx1, idx2, adata) temp_key = "_scvi_temp_de" adata.obs[temp_key] = obs_col groupby = temp_key df_results = [] dc = DifferentialComputation(model_fn, adata) for g1 in track( group1, description="DE...", disable=silent, ): cell_idx1 = (adata.obs[groupby] == g1).to_numpy().ravel() if group2 is None: cell_idx2 = ~cell_idx1 else: cell_idx2 = (adata.obs[groupby] == group2).to_numpy().ravel() all_info = dc.get_bayes_factors( cell_idx1, cell_idx2, mode=mode, delta=delta, batchid1=batchid1, batchid2=batchid2, use_observed_batches=not batch_correction, **kwargs, ) if all_stats is True: genes_properties_dict = all_stats_fn(adata, cell_idx1, cell_idx2) all_info = {**all_info, **genes_properties_dict} res = pd.DataFrame(all_info, index=col_names) sort_key = "proba_de" if mode == "change" else "bayes_factor" res = res.sort_values(by=sort_key, ascending=False) if mode == "change": res["is_de_fdr_{}".format(fdr)] = _fdr_de_prediction( res["proba_de"], fdr=fdr) if idx1 is None: g2 = "Rest" if group2 is None else group2 res["comparison"] = "{} vs {}".format(g1, g2) df_results.append(res) if temp_key is not None: del adata.obs[temp_key] result = pd.concat(df_results, axis=0) return result
def test_differential_computation(save_path): n_latent = 5 adata = synthetic_iid() model = SCVI(adata, n_latent=n_latent) model.train(1) model_fn = partial(model.get_normalized_expression, return_numpy=True) dc = DifferentialComputation(model_fn, adata) cell_idx1 = np.asarray(adata.obs.labels == "label_1") cell_idx2 = ~cell_idx1 dc.get_bayes_factors(cell_idx1, cell_idx2, mode="vanilla", use_permutation=True) dc.get_bayes_factors(cell_idx1, cell_idx2, mode="change", use_permutation=False) dc.get_bayes_factors(cell_idx1, cell_idx2, mode="change", cred_interval_lvls=[0.75]) delta = 0.5 def change_fn_test(x, y): return x - y def m1_domain_fn_test(samples): return np.abs(samples) >= delta dc.get_bayes_factors( cell_idx1, cell_idx2, mode="change", m1_domain_fn=m1_domain_fn_test, change_fn=change_fn_test, ) # should fail if just one batch with pytest.raises(ValueError): model.differential_expression(adata[:20], groupby="batch") # test view model.differential_expression(adata[adata.obs["labels"] == "label_1"], groupby="batch") # test that ints as group work a = synthetic_iid() a.obs["test"] = [0] * 200 + [1] * 200 model = SCVI(a) model.differential_expression(groupby="test", group1=0) # test that string but not as categorical work a = synthetic_iid() a.obs["test"] = ["0"] * 200 + ["1"] * 200 model = SCVI(a) model.differential_expression(groupby="test", group1="0")
def test_differential_computation(save_path): n_latent = 5 adata = synthetic_iid() model = SCVI(adata, n_latent=n_latent) model.train(1) model_fn = partial(model.get_normalized_expression, return_numpy=True) dc = DifferentialComputation(model_fn, adata) cell_idx1 = np.asarray(adata.obs.labels == "label_1") cell_idx2 = ~cell_idx1 dc.get_bayes_factors(cell_idx1, cell_idx2, mode="vanilla", use_permutation=True) res = dc.get_bayes_factors( cell_idx1, cell_idx2, mode="change", use_permutation=False ) assert (res["delta"] == 0.5) and (res["pseudocounts"] == 0.0) res = dc.get_bayes_factors( cell_idx1, cell_idx2, mode="change", use_permutation=False, delta=None ) dc.get_bayes_factors( cell_idx1, cell_idx2, mode="change", use_permutation=False, delta=None, pseudocounts=None, ) dc.get_bayes_factors(cell_idx1, cell_idx2, mode="change", cred_interval_lvls=[0.75]) delta = 0.5 def change_fn_test(x, y): return x - y def m1_domain_fn_test(samples): return np.abs(samples) >= delta dc.get_bayes_factors( cell_idx1, cell_idx2, mode="change", m1_domain_fn=m1_domain_fn_test, change_fn=change_fn_test, ) # should fail if just one batch with pytest.raises(ValueError): model.differential_expression(adata[:20], groupby="batch") # test view model.differential_expression( adata[adata.obs["labels"] == "label_1"], groupby="batch" ) # Test query features obs_col, group1, _, = _prepare_obs( idx1="(labels == 'label_1') & (batch == 'batch_1')", idx2=None, adata=adata ) assert (obs_col == group1).sum() == adata.obs.loc[ lambda x: (x.labels == "label_1") & (x.batch == "batch_1") ].shape[0] model.differential_expression( idx1="labels == 'label_1'", ) model.differential_expression( idx1="labels == 'label_1'", idx2="(labels == 'label_2') & (batch == 'batch_1')" ) # test that ints as group work a = synthetic_iid() a.obs["test"] = [0] * 200 + [1] * 200 model = SCVI(a) model.differential_expression(groupby="test", group1=0) # test that string but not as categorical work a = synthetic_iid() a.obs["test"] = ["0"] * 200 + ["1"] * 200 model = SCVI(a) model.differential_expression(groupby="test", group1="0")