def test_scvi_sparse(save_path): n_latent = 5 adata = synthetic_iid(run_setup_anndata=False) adata.X = csr_matrix(adata.X) setup_anndata(adata) model = SCVI(adata, n_latent=n_latent) model.train(1, train_size=0.5) assert model.is_trained is True z = model.get_latent_representation() assert z.shape == (adata.shape[0], n_latent) model.get_elbo() model.get_marginal_ll(n_mc_samples=3) model.get_reconstruction_error() model.get_normalized_expression() model.differential_expression(groupby="labels", group1="label_1")
def test_differential_computation(save_path): n_latent = 5 adata = synthetic_iid() model = SCVI(adata, n_latent=n_latent) model.train(1) model_fn = partial(model.get_normalized_expression, return_numpy=True) dc = DifferentialComputation(model_fn, adata) cell_idx1 = np.asarray(adata.obs.labels == "label_1") cell_idx2 = ~cell_idx1 dc.get_bayes_factors(cell_idx1, cell_idx2, mode="vanilla", use_permutation=True) dc.get_bayes_factors(cell_idx1, cell_idx2, mode="change", use_permutation=False) dc.get_bayes_factors(cell_idx1, cell_idx2, mode="change", cred_interval_lvls=[0.75]) delta = 0.5 def change_fn_test(x, y): return x - y def m1_domain_fn_test(samples): return np.abs(samples) >= delta dc.get_bayes_factors( cell_idx1, cell_idx2, mode="change", m1_domain_fn=m1_domain_fn_test, change_fn=change_fn_test, ) # should fail if just one batch with pytest.raises(ValueError): model.differential_expression(adata[:20], groupby="batch") # test view model.differential_expression( adata[adata.obs["labels"] == "label_1"], groupby="batch" )
def test_scvi(save_path): n_latent = 5 adata = synthetic_iid() model = SCVI(adata, n_latent=n_latent) model.train(1, check_val_every_n_epoch=1, train_size=0.5) model = SCVI(adata, n_latent=n_latent, var_activation=Softplus()) model.train(1, check_val_every_n_epoch=1, train_size=0.5) # tests __repr__ print(model) assert model.is_trained is True z = model.get_latent_representation() assert z.shape == (adata.shape[0], n_latent) assert len(model.history["elbo_train"]) == 1 model.get_elbo() model.get_marginal_ll(n_mc_samples=3) model.get_reconstruction_error() model.get_normalized_expression(transform_batch="batch_1") adata2 = synthetic_iid() model.get_elbo(adata2) model.get_marginal_ll(adata2, n_mc_samples=3) model.get_reconstruction_error(adata2) latent = model.get_latent_representation(adata2, indices=[1, 2, 3]) assert latent.shape == (3, n_latent) denoised = model.get_normalized_expression(adata2) assert denoised.shape == adata.shape denoised = model.get_normalized_expression( adata2, indices=[1, 2, 3], transform_batch="batch_1" ) denoised = model.get_normalized_expression( adata2, indices=[1, 2, 3], transform_batch=["batch_0", "batch_1"] ) assert denoised.shape == (3, adata2.n_vars) sample = model.posterior_predictive_sample(adata2) assert sample.shape == adata2.shape sample = model.posterior_predictive_sample( adata2, indices=[1, 2, 3], gene_list=["1", "2"] ) assert sample.shape == (3, 2) sample = model.posterior_predictive_sample( adata2, indices=[1, 2, 3], gene_list=["1", "2"], n_samples=3 ) assert sample.shape == (3, 2, 3) model.get_feature_correlation_matrix(correlation_type="pearson") model.get_feature_correlation_matrix( adata2, indices=[1, 2, 3], correlation_type="spearman", rna_size_factor=500, n_samples=5, ) model.get_feature_correlation_matrix( adata2, indices=[1, 2, 3], correlation_type="spearman", rna_size_factor=500, n_samples=5, transform_batch=["batch_0", "batch_1"], ) params = model.get_likelihood_parameters() assert params["mean"].shape == adata.shape assert ( params["mean"].shape == params["dispersions"].shape == params["dropout"].shape ) params = model.get_likelihood_parameters(adata2, indices=[1, 2, 3]) assert params["mean"].shape == (3, adata.n_vars) params = model.get_likelihood_parameters( adata2, indices=[1, 2, 3], n_samples=3, give_mean=True ) assert params["mean"].shape == (3, adata.n_vars) model.get_latent_library_size() model.get_latent_library_size(adata2, indices=[1, 2, 3]) # test transfer_anndata_setup adata2 = synthetic_iid(run_setup_anndata=False) transfer_anndata_setup(adata, adata2) model.get_elbo(adata2) # test automatic transfer_anndata_setup + on a view adata = synthetic_iid() model = SCVI(adata) adata2 = synthetic_iid(run_setup_anndata=False) model.get_elbo(adata2[:10]) # test that we catch incorrect mappings adata = synthetic_iid() adata2 = synthetic_iid(run_setup_anndata=False) transfer_anndata_setup(adata, adata2) adata2.uns["_scvi"]["categorical_mappings"]["_scvi_labels"]["mapping"] = np.array( ["label_4", "label_0", "label_2"] ) with pytest.raises(ValueError): model.get_elbo(adata2) # test that same mapping different order doesn't raise error adata = synthetic_iid() adata2 = synthetic_iid(run_setup_anndata=False) transfer_anndata_setup(adata, adata2) adata2.uns["_scvi"]["categorical_mappings"]["_scvi_labels"]["mapping"] = np.array( ["label_1", "label_0", "label_2"] ) model.get_elbo(adata2) # should automatically transfer setup # test mismatched categories raises ValueError adata2 = synthetic_iid(run_setup_anndata=False) adata2.obs.labels.cat.rename_categories(["a", "b", "c"], inplace=True) with pytest.raises(ValueError): model.get_elbo(adata2) # test differential expression model.differential_expression(groupby="labels", group1="label_1") model.differential_expression( groupby="labels", group1="label_1", group2="label_2", mode="change" ) model.differential_expression(groupby="labels") model.differential_expression(idx1=[0, 1, 2], idx2=[3, 4, 5]) model.differential_expression(idx1=[0, 1, 2]) # transform batch works with all different types a = synthetic_iid(run_setup_anndata=False) batch = np.zeros(a.n_obs) batch[:64] += 1 a.obs["batch"] = batch setup_anndata(a, batch_key="batch") m = SCVI(a) m.train(1, train_size=0.5) m.get_normalized_expression(transform_batch=1) m.get_normalized_expression(transform_batch=[0, 1]) # test get_likelihood_parameters() when dispersion=='gene-cell' model = SCVI(adata, dispersion="gene-cell") model.get_likelihood_parameters() # test train callbacks work a = synthetic_iid() m = scvi.model.SCVI(a) lr_monitor = LearningRateMonitor() m.train( callbacks=[lr_monitor], max_epochs=10, log_every_n_steps=1, plan_kwargs={"reduce_lr_on_plateau": True}, ) assert "lr-Adam" in m.history.keys()
def test_scvi(): n_latent = 5 adata = synthetic_iid() model = SCVI(adata, n_latent=n_latent) model.train(1, frequency=1, train_size=0.5) assert model.is_trained is True z = model.get_latent_representation() assert z.shape == (adata.shape[0], n_latent) # len of history should be 2 since metrics is also run once at the very end after training assert len(model.history["elbo_train_set"]) == 2 model.get_elbo() model.get_marginal_ll() model.get_reconstruction_error() model.get_normalized_expression(transform_batch="batch_1") adata2 = synthetic_iid() model.get_elbo(adata2) model.get_marginal_ll(adata2) model.get_reconstruction_error(adata2) latent = model.get_latent_representation(adata2, indices=[1, 2, 3]) assert latent.shape == (3, n_latent) denoised = model.get_normalized_expression(adata2) assert denoised.shape == adata.shape denoised = model.get_normalized_expression(adata2, indices=[1, 2, 3], transform_batch="batch_1") denoised = model.get_normalized_expression( adata2, indices=[1, 2, 3], transform_batch=["batch_0", "batch_1"]) assert denoised.shape == (3, adata2.n_vars) sample = model.posterior_predictive_sample(adata2) assert sample.shape == adata2.shape sample = model.posterior_predictive_sample(adata2, indices=[1, 2, 3], gene_list=["1", "2"]) assert sample.shape == (3, 2) sample = model.posterior_predictive_sample(adata2, indices=[1, 2, 3], gene_list=["1", "2"], n_samples=3) assert sample.shape == (3, 2, 3) model.get_feature_correlation_matrix(correlation_type="pearson") model.get_feature_correlation_matrix( adata2, indices=[1, 2, 3], correlation_type="spearman", rna_size_factor=500, n_samples=5, ) model.get_feature_correlation_matrix( adata2, indices=[1, 2, 3], correlation_type="spearman", rna_size_factor=500, n_samples=5, transform_batch=["batch_0", "batch_1"], ) params = model.get_likelihood_parameters() assert params["mean"].shape == adata.shape assert (params["mean"].shape == params["dispersions"].shape == params["dropout"].shape) params = model.get_likelihood_parameters(adata2, indices=[1, 2, 3]) assert params["mean"].shape == (3, adata.n_vars) params = model.get_likelihood_parameters(adata2, indices=[1, 2, 3], n_samples=3, give_mean=True) assert params["mean"].shape == (3, adata.n_vars) model.get_latent_library_size() model.get_latent_library_size(adata2, indices=[1, 2, 3]) # test transfer_anndata_setup adata2 = synthetic_iid(run_setup_anndata=False) transfer_anndata_setup(adata, adata2) model.get_elbo(adata2) # test automatic transfer_anndata_setup + on a view adata = synthetic_iid() model = SCVI(adata) adata2 = synthetic_iid(run_setup_anndata=False) model.get_elbo(adata2[:10]) # test that we catch incorrect mappings adata = synthetic_iid() adata2 = synthetic_iid(run_setup_anndata=False) transfer_anndata_setup(adata, adata2) adata2.uns["_scvi"]["categorical_mappings"]["_scvi_labels"][ "mapping"] = np.array(["label_1", "label_0", "label_2"]) with pytest.raises(ValueError): model.get_elbo(adata2) # test mismatched categories raises ValueError adata2 = synthetic_iid(run_setup_anndata=False) adata2.obs.labels.cat.rename_categories(["a", "b", "c"], inplace=True) with pytest.raises(ValueError): model.get_elbo(adata2) # test differential expression model.differential_expression(groupby="labels", group1="label_1") model.differential_expression(groupby="labels", group1="label_1", group2="label_2", mode="change") model.differential_expression(groupby="labels") model.differential_expression(idx1=[0, 1, 2], idx2=[3, 4, 5]) model.differential_expression(idx1=[0, 1, 2]) # transform batch works with all different types a = synthetic_iid(run_setup_anndata=False) batch = np.zeros(a.n_obs) batch[:64] += 1 a.obs["batch"] = batch setup_anndata(a, batch_key="batch") m = SCVI(a) m.train(1, train_size=0.5) m.get_normalized_expression(transform_batch=1) m.get_normalized_expression(transform_batch=[0, 1])
def test_differential_computation(save_path): n_latent = 5 adata = synthetic_iid() model = SCVI(adata, n_latent=n_latent) model.train(1) model_fn = partial(model.get_normalized_expression, return_numpy=True) dc = DifferentialComputation(model_fn, adata) cell_idx1 = np.asarray(adata.obs.labels == "label_1") cell_idx2 = ~cell_idx1 dc.get_bayes_factors(cell_idx1, cell_idx2, mode="vanilla", use_permutation=True) res = dc.get_bayes_factors( cell_idx1, cell_idx2, mode="change", use_permutation=False ) assert (res["delta"] == 0.5) and (res["pseudocounts"] == 0.0) res = dc.get_bayes_factors( cell_idx1, cell_idx2, mode="change", use_permutation=False, delta=None ) dc.get_bayes_factors( cell_idx1, cell_idx2, mode="change", use_permutation=False, delta=None, pseudocounts=None, ) dc.get_bayes_factors(cell_idx1, cell_idx2, mode="change", cred_interval_lvls=[0.75]) delta = 0.5 def change_fn_test(x, y): return x - y def m1_domain_fn_test(samples): return np.abs(samples) >= delta dc.get_bayes_factors( cell_idx1, cell_idx2, mode="change", m1_domain_fn=m1_domain_fn_test, change_fn=change_fn_test, ) # should fail if just one batch with pytest.raises(ValueError): model.differential_expression(adata[:20], groupby="batch") # test view model.differential_expression( adata[adata.obs["labels"] == "label_1"], groupby="batch" ) # Test query features obs_col, group1, _, = _prepare_obs( idx1="(labels == 'label_1') & (batch == 'batch_1')", idx2=None, adata=adata ) assert (obs_col == group1).sum() == adata.obs.loc[ lambda x: (x.labels == "label_1") & (x.batch == "batch_1") ].shape[0] model.differential_expression( idx1="labels == 'label_1'", ) model.differential_expression( idx1="labels == 'label_1'", idx2="(labels == 'label_2') & (batch == 'batch_1')" ) # test that ints as group work a = synthetic_iid() a.obs["test"] = [0] * 200 + [1] * 200 model = SCVI(a) model.differential_expression(groupby="test", group1=0) # test that string but not as categorical work a = synthetic_iid() a.obs["test"] = ["0"] * 200 + ["1"] * 200 model = SCVI(a) model.differential_expression(groupby="test", group1="0")
def test_scvi(): n_latent = 5 adata = synthetic_iid() model = SCVI(adata, n_latent=n_latent) model.train(1) assert model.is_trained is True z = model.get_latent_representation() assert z.shape == (adata.shape[0], n_latent) model.get_elbo() model.get_marginal_ll() model.get_reconstruction_error() model.get_normalized_expression() adata2 = synthetic_iid() model.get_elbo(adata2) model.get_marginal_ll(adata2) model.get_reconstruction_error(adata2) latent = model.get_latent_representation(adata2, indices=[1, 2, 3]) assert latent.shape == (3, n_latent) denoised = model.get_normalized_expression(adata2) assert denoised.shape == adata.shape denoised = model.get_normalized_expression(adata2, indices=[1, 2, 3], transform_batch=1) assert denoised.shape == (3, adata2.n_vars) sample = model.posterior_predictive_sample(adata2) assert sample.shape == adata2.shape sample = model.posterior_predictive_sample(adata2, indices=[1, 2, 3], gene_list=["1", "2"]) assert sample.shape == (3, 2) sample = model.posterior_predictive_sample(adata2, indices=[1, 2, 3], gene_list=["1", "2"], n_samples=3) assert sample.shape == (3, 2, 3) model.get_feature_correlation_matrix(correlation_type="pearson") model.get_feature_correlation_matrix( adata2, indices=[1, 2, 3], correlation_type="spearman", rna_size_factor=500, n_samples=5, ) params = model.get_likelihood_parameters() assert params["mean"].shape == adata.shape assert (params["mean"].shape == params["dispersions"].shape == params["dropout"].shape) params = model.get_likelihood_parameters(adata2, indices=[1, 2, 3]) assert params["mean"].shape == (3, adata.n_vars) params = model.get_likelihood_parameters(adata2, indices=[1, 2, 3], n_samples=3, give_mean=True) assert params["mean"].shape == (3, adata.n_vars) model.get_latent_library_size() model.get_latent_library_size(adata2, indices=[1, 2, 3]) # test transfer_anndata_setup adata2 = synthetic_iid(run_setup_anndata=False) transfer_anndata_setup(adata, adata2) model.get_elbo(adata2) # test automatic transfer_anndata_setup + on a view adata = synthetic_iid() model = SCVI(adata) adata2 = synthetic_iid(run_setup_anndata=False) model.get_elbo(adata2[:10]) # test that we catch incorrect mappings adata = synthetic_iid() adata2 = synthetic_iid(run_setup_anndata=False) transfer_anndata_setup(adata, adata2) adata2.uns["_scvi"]["categorical_mappings"]["_scvi_labels"][ "mapping"] = pd.Index( data=["undefined_1", "undefined_0", "undefined_2"]) with pytest.raises(ValueError): model.get_elbo(adata2) # test mismatched categories raises ValueError adata2 = synthetic_iid(run_setup_anndata=False) adata2.obs.labels.cat.rename_categories(["a", "b", "c"], inplace=True) with pytest.raises(ValueError): model.get_elbo(adata2) # test differential expression model.differential_expression(groupby="labels", group1="undefined_1") model.differential_expression(groupby="labels", group1="undefined_1", group2="undefined_2", mode="change") model.differential_expression(groupby="labels") model.differential_expression(idx1=[0, 1, 2], idx2=[3, 4, 5]) model.differential_expression(idx1=[0, 1, 2])