def test_scanvi(save_path): adata = synthetic_iid(run_setup_anndata=False) SCANVI.setup_anndata( adata, batch_key="batch", labels_key="labels", ) model = SCANVI(adata, "label_0", n_latent=10) model.train(1, train_size=0.5, check_val_every_n_epoch=1) logged_keys = model.history.keys() assert "elbo_validation" in logged_keys assert "reconstruction_loss_validation" in logged_keys assert "kl_local_validation" in logged_keys assert "elbo_train" in logged_keys assert "reconstruction_loss_train" in logged_keys assert "kl_local_train" in logged_keys assert "classification_loss_validation" in logged_keys adata2 = synthetic_iid() predictions = model.predict(adata2, indices=[1, 2, 3]) assert len(predictions) == 3 model.predict() df = model.predict(adata2, soft=True) assert isinstance(df, pd.DataFrame) model.predict(adata2, soft=True, indices=[1, 2, 3]) model.get_normalized_expression(adata2) model.differential_expression(groupby="labels", group1="label_1") model.differential_expression(groupby="labels", group1="label_1", group2="label_2") # test that all data labeled runs unknown_label = "asdf" a = scvi.data.synthetic_iid() scvi.model.SCANVI.setup_anndata(a, batch_key="batch", labels_key="labels") m = scvi.model.SCANVI(a, unknown_label) m.train(1) # test mix of labeled and unlabeled data unknown_label = "label_0" a = scvi.data.synthetic_iid() scvi.model.SCANVI.setup_anndata(a, batch_key="batch", labels_key="labels") m = scvi.model.SCANVI(a, unknown_label) m.train(1, train_size=0.9) # test from_scvi_model a = scvi.data.synthetic_iid() m = scvi.model.SCVI(a, use_observed_lib_size=False) a2 = scvi.data.synthetic_iid() scanvi_model = scvi.model.SCANVI.from_scvi_model(m, "label_0", adata=a2) scanvi_model = scvi.model.SCANVI.from_scvi_model(m, "label_0", use_labels_groups=False) scanvi_model.train(1)
def test_scanvi(): adata = synthetic_iid() model = SCANVI(adata, "undefined_0", n_latent=10) model.train(1) adata2 = synthetic_iid() predictions = model.predict(adata2, indices=[1, 2, 3]) assert len(predictions) == 3 model.predict() model.predict(adata2, soft=True) model.predict(adata2, soft=True, indices=[1, 2, 3]) model.get_normalized_expression(adata2) model.differential_expression(groupby="labels", group1="undefined_1") model.differential_expression(groupby="labels", group1="undefined_1", group2="undefined_2")
def test_scanvi(): adata = synthetic_iid() model = SCANVI(adata, "label_0", n_latent=10) model.train(1, train_size=0.5, frequency=1) assert len(model.history["unsupervised_trainer_history"]) == 2 assert len(model.history["semisupervised_trainer_history"]) == 7 adata2 = synthetic_iid() predictions = model.predict(adata2, indices=[1, 2, 3]) assert len(predictions) == 3 model.predict() model.predict(adata2, soft=True) model.predict(adata2, soft=True, indices=[1, 2, 3]) model.get_normalized_expression(adata2) model.differential_expression(groupby="labels", group1="label_1") model.differential_expression(groupby="labels", group1="label_1", group2="label_2")
def test_scanvi(save_path): adata = synthetic_iid() model = SCANVI(adata, "label_0", n_latent=10) model.train(1, train_size=0.5, check_val_every_n_epoch=1) logged_keys = model.history.keys() assert "elbo_validation" in logged_keys assert "reconstruction_loss_validation" in logged_keys assert "kl_local_validation" in logged_keys assert "elbo_train" in logged_keys assert "reconstruction_loss_train" in logged_keys assert "kl_local_train" in logged_keys adata2 = synthetic_iid() predictions = model.predict(adata2, indices=[1, 2, 3]) assert len(predictions) == 3 model.predict() model.predict(adata2, soft=True) model.predict(adata2, soft=True, indices=[1, 2, 3]) model.get_normalized_expression(adata2) model.differential_expression(groupby="labels", group1="label_1") model.differential_expression(groupby="labels", group1="label_1", group2="label_2") # test that all data labeled runs unknown_label = "asdf" a = scvi.data.synthetic_iid() scvi.data.setup_anndata(a, batch_key="batch", labels_key="labels") m = scvi.model.SCANVI(a, unknown_label) m.train(1) # check the number of indices n_train_idx = len(m.train_indices) n_validation_idx = len(m.validation_indices) n_test_idx = len(m.test_indices) assert n_train_idx + n_validation_idx + n_test_idx == a.n_obs assert np.isclose(n_train_idx / a.n_obs, 0.9) assert np.isclose(n_validation_idx / a.n_obs, 0.1) assert np.isclose(n_test_idx / a.n_obs, 0) # test mix of labeled and unlabeled data unknown_label = "label_0" a = scvi.data.synthetic_iid() scvi.data.setup_anndata(a, batch_key="batch", labels_key="labels") m = scvi.model.SCANVI(a, unknown_label) m.train(1, train_size=0.9) # check the number of indices n_train_idx = len(m.train_indices) n_validation_idx = len(m.validation_indices) n_test_idx = len(m.test_indices) assert n_train_idx + n_validation_idx + n_test_idx == a.n_obs assert np.isclose(n_train_idx / a.n_obs, 0.9, rtol=0.05) assert np.isclose(n_validation_idx / a.n_obs, 0.1, rtol=0.05) assert np.isclose(n_test_idx / a.n_obs, 0, rtol=0.05) # check that training indices have proper mix of labeled and unlabeled data labelled_idx = np.where(a.obs["labels"] != unknown_label)[0] unlabelled_idx = np.where(a.obs["labels"] == unknown_label)[0] # labeled training idx labeled_train_idx = [i for i in m.train_indices if i in labelled_idx] # unlabeled training idx unlabeled_train_idx = [i for i in m.train_indices if i in unlabelled_idx] n_labeled_idx = len(m._labeled_indices) n_unlabeled_idx = len(m._unlabeled_indices) # labeled vs unlabeled ratio in adata adata_ratio = n_unlabeled_idx / n_labeled_idx # labeled vs unlabeled ratio in train set train_ratio = len(unlabeled_train_idx) / len(labeled_train_idx) assert np.isclose(adata_ratio, train_ratio, atol=0.05)
def test_scanvi(save_path): adata = synthetic_iid() SCANVI.setup_anndata( adata, "label_0", batch_key="batch", labels_key="labels", ) model = SCANVI(adata, n_latent=10) model.train(1, train_size=0.5, check_val_every_n_epoch=1) logged_keys = model.history.keys() assert "elbo_validation" in logged_keys assert "reconstruction_loss_validation" in logged_keys assert "kl_local_validation" in logged_keys assert "elbo_train" in logged_keys assert "reconstruction_loss_train" in logged_keys assert "kl_local_train" in logged_keys assert "classification_loss_validation" in logged_keys adata2 = synthetic_iid() predictions = model.predict(adata2, indices=[1, 2, 3]) assert len(predictions) == 3 model.predict() df = model.predict(adata2, soft=True) assert isinstance(df, pd.DataFrame) model.predict(adata2, soft=True, indices=[1, 2, 3]) model.get_normalized_expression(adata2) model.differential_expression(groupby="labels", group1="label_1") model.differential_expression(groupby="labels", group1="label_1", group2="label_2") # test that all data labeled runs unknown_label = "asdf" a = scvi.data.synthetic_iid() scvi.model.SCANVI.setup_anndata(a, unknown_label, batch_key="batch", labels_key="labels") m = scvi.model.SCANVI(a) m.train(1) # test mix of labeled and unlabeled data unknown_label = "label_0" a = scvi.data.synthetic_iid() scvi.model.SCANVI.setup_anndata(a, unknown_label, batch_key="batch", labels_key="labels") m = scvi.model.SCANVI(a) m.train(1, train_size=0.9) # test from_scvi_model a = scvi.data.synthetic_iid() SCVI.setup_anndata( a, batch_key="batch", labels_key="labels", ) m = SCVI(a, use_observed_lib_size=False) a2 = scvi.data.synthetic_iid() scanvi_model = scvi.model.SCANVI.from_scvi_model(m, "label_0", adata=a2) # make sure the state_dicts are different objects for the two models assert scanvi_model.module.state_dict() is not m.module.state_dict() scanvi_pxr = scanvi_model.module.state_dict().get("px_r", None) scvi_pxr = m.module.state_dict().get("px_r", None) assert scanvi_pxr is not None and scvi_pxr is not None assert scanvi_pxr is not scvi_pxr scanvi_model.train(1) # Test without label groups scanvi_model = scvi.model.SCANVI.from_scvi_model(m, "label_0", use_labels_groups=False) scanvi_model.train(1) # test from_scvi_model with size_factor a = scvi.data.synthetic_iid() a.obs["size_factor"] = np.random.randint(1, 5, size=(a.shape[0], )) SCVI.setup_anndata(a, batch_key="batch", labels_key="labels", size_factor_key="size_factor") m = SCVI(a, use_observed_lib_size=False) a2 = scvi.data.synthetic_iid() a2.obs["size_factor"] = np.random.randint(1, 5, size=(a2.shape[0], )) scanvi_model = scvi.model.SCANVI.from_scvi_model(m, "label_0", adata=a2) scanvi_model.train(1)