def __init__( self, st_adata: AnnData, cell_type_mapping: np.ndarray, decoder_state_dict: OrderedDict, px_decoder_state_dict: OrderedDict, px_r: np.ndarray, n_hidden: int, n_latent: int, n_layers: int, **module_kwargs, ): st_adata.obs["_indices"] = np.arange(st_adata.n_obs) register_tensor_from_anndata(st_adata, "ind_x", "obs", "_indices") super(DestVI, self).__init__(st_adata) self.module = MRDeconv( n_spots=st_adata.n_obs, n_labels=cell_type_mapping.shape[0], decoder_state_dict=decoder_state_dict, px_decoder_state_dict=px_decoder_state_dict, px_r=px_r, n_genes=st_adata.n_vars, n_latent=n_latent, n_layers=n_layers, n_hidden=n_hidden, **module_kwargs, ) self.cell_type_mapping = cell_type_mapping self._model_summary_string = "DestVI Model" self.init_params_ = self._get_init_params(locals())
def __init__( self, adata: AnnData, per_cell_weight=False, ): # in case any other model was created before that shares the same parameter names. clear_param_store() # add index for each cell (provided to pyro plate for correct minibatching) adata.obs["_indices"] = np.arange(adata.n_obs).astype("int64") register_tensor_from_anndata( adata, registry_key="ind_x", adata_attr_name="obs", adata_key_name="_indices", ) super().__init__(adata) self.module = BayesianRegressionModule( in_features=adata.shape[1], out_features=1, per_cell_weight=per_cell_weight, ) self._model_summary_string = "BayesianRegressionModel" self.init_params_ = self._get_init_params(locals())
def test_register_tensor_from_anndata(): adata = synthetic_iid() adata.obs["cont1"] = np.random.normal(size=(adata.shape[0], )) register_tensor_from_anndata(adata, registry_key="test", adata_attr_name="obs", adata_key_name="cont1") assert "test" in adata.uns["_scvi"]["data_registry"] assert adata.uns["_scvi"]["data_registry"]["test"] == dict( attr_name="obs", attr_key="cont1")
def __init__( self, adata: AnnData, cell_type_markers: pd.DataFrame, size_factor_key: str, **model_kwargs, ): try: cell_type_markers = cell_type_markers.loc[adata.var_names] except KeyError: raise KeyError( "Anndata and cell type markers do not contain the same genes." ) super().__init__(adata) register_tensor_from_anndata(adata, "_size_factor", "obs", size_factor_key) self.n_genes = self.summary_stats["n_vars"] self.cell_type_markers = cell_type_markers rho = torch.Tensor(cell_type_markers.to_numpy()) n_cats_per_cov = ( self.scvi_setup_dict_["extra_categoricals"]["n_cats_per_key"] if "extra_categoricals" in self.scvi_setup_dict_ else None ) x = scvi.data.get_from_registry(adata, _CONSTANTS.X_KEY) col_means = np.asarray(np.mean(x, 0)).ravel() # (g) col_means_mu, col_means_std = np.mean(col_means), np.std(col_means) col_means_normalized = torch.Tensor((col_means - col_means_mu) / col_means_std) # compute basis means for phi - shape (B) basis_means = np.linspace(np.min(x), np.max(x), B) # (B) self.module = CellAssignModule( n_genes=self.n_genes, rho=rho, basis_means=basis_means, b_g_0=col_means_normalized, n_batch=self.summary_stats["n_batch"], n_cats_per_cov=n_cats_per_cov, n_continuous_cov=self.summary_stats["n_continuous_covs"], **model_kwargs, ) self._model_summary_string = ( "CellAssign Model with params: \nn_genes: {}, n_labels: {}" ).format( self.n_genes, rho.shape[1], ) self.init_params_ = self._get_init_params(locals())
def test_pyro_bayesian_regression_jit(): use_gpu = int(torch.cuda.is_available()) adata = synthetic_iid() # add index for each cell (provided to pyro plate for correct minibatching) adata.obs["_indices"] = np.arange(adata.n_obs).astype("int64") register_tensor_from_anndata( adata, registry_key="ind_x", adata_attr_name="obs", adata_key_name="_indices", ) train_dl = AnnDataLoader(adata, shuffle=True, batch_size=128) pyro.clear_param_store() model = BayesianRegressionModule(in_features=adata.shape[1], out_features=1) plan = PyroTrainingPlan(model, loss_fn=pyro.infer.JitTrace_ELBO()) plan.n_obs_training = len(train_dl.indices) trainer = Trainer(gpus=use_gpu, max_epochs=2, callbacks=[PyroJitGuideWarmup(train_dl)]) trainer.fit(plan, train_dl) # 100 features assert list(model.guide.state_dict() ["locs.linear.weight_unconstrained"].shape) == [ 1, 100, ] # 1 bias assert list( model.guide.state_dict()["locs.linear.bias_unconstrained"].shape) == [ 1, ] if use_gpu == 1: model.cuda() # test Predictive num_samples = 5 predictive = model.create_predictive(num_samples=num_samples) for tensor_dict in train_dl: args, kwargs = model._get_fn_args_from_batch(tensor_dict) _ = { k: v.detach().cpu().numpy() for k, v in predictive(*args, **kwargs).items() if k != "obs" }
def setup_anndata( adata: AnnData, size_factor_key: str, batch_key: Optional[str] = None, layer: Optional[str] = None, categorical_covariate_keys: Optional[List[str]] = None, continuous_covariate_keys: Optional[List[str]] = None, copy: bool = False, ) -> Optional[AnnData]: """ %(summary)s. Parameters ---------- %(param_adata)s size_factor_key key in `adata.obs` with continuous valued size factors. %(param_batch_key)s %(param_layer)s %(param_cat_cov_keys)s %(param_cat_cov_keys)s %(param_copy)s Returns ------- %(returns)s """ setup_data = _setup_anndata( adata, batch_key=batch_key, layer=layer, categorical_covariate_keys=categorical_covariate_keys, continuous_covariate_keys=continuous_covariate_keys, copy=copy, ) register_tensor_from_anndata( adata if setup_data is None else setup_data, "_size_factor", "obs", size_factor_key, ) return setup_data
def __init__( self, st_adata: AnnData, params: Tuple[np.ndarray], use_gpu: bool = True, prior_weight: Literal["n_obs", "minibatch"] = "n_obs", **model_kwargs, ): st_adata.obs["_indices"] = np.arange(st_adata.n_obs) register_tensor_from_anndata(st_adata, "ind_x", "obs", "_indices") super().__init__(st_adata, use_gpu=use_gpu) self.model = SpatialDeconv( n_spots=st_adata.n_obs, params=params, prior_weight=prior_weight, **model_kwargs, ) self._model_summary_string = ( "RNADeconv Model with params: \nn_spots: {}").format( st_adata.n_obs, ) self.init_params_ = self._get_init_params(locals())
def test_saving_and_loading(save_path): def legacy_save( model, dir_path, prefix=None, overwrite=False, save_anndata=False, **anndata_write_kwargs, ): if not os.path.exists(dir_path) or overwrite: os.makedirs(dir_path, exist_ok=overwrite) else: raise ValueError( "{} already exists. Please provide an unexisting directory for saving." .format(dir_path)) file_name_prefix = prefix or "" if save_anndata: model.adata.write( os.path.join(dir_path, f"{file_name_prefix}adata.h5ad"), **anndata_write_kwargs, ) model_save_path = os.path.join(dir_path, f"{file_name_prefix}model_params.pt") attr_save_path = os.path.join(dir_path, f"{file_name_prefix}attr.pkl") varnames_save_path = os.path.join(dir_path, f"{file_name_prefix}var_names.csv") torch.save(model.module.state_dict(), model_save_path) var_names = model.adata.var_names.astype(str) var_names = var_names.to_numpy() np.savetxt(varnames_save_path, var_names, fmt="%s") # get all the user attributes user_attributes = model._get_user_attributes() # only save the public attributes with _ at the very end user_attributes = { a[0]: a[1] for a in user_attributes if a[0][-1] == "_" } with open(attr_save_path, "wb") as f: pickle.dump(user_attributes, f) def test_save_load_model(cls, adata, save_path, prefix=None, legacy=False): model = cls(adata, latent_distribution="normal") model.train(1, train_size=0.2) z1 = model.get_latent_representation(adata) test_idx1 = model.validation_indices if legacy: legacy_save(model, save_path, overwrite=True, save_anndata=True, prefix=prefix) else: model.save(save_path, overwrite=True, save_anndata=True, prefix=prefix) model = cls.load(save_path, prefix=prefix) model.get_latent_representation() tmp_adata = scvi.data.synthetic_iid(n_genes=200) with pytest.raises(ValueError): cls.load(save_path, adata=tmp_adata, prefix=prefix) model = cls.load(save_path, adata=adata, prefix=prefix) assert "test" in adata.uns["_scvi"]["data_registry"] assert adata.uns["_scvi"]["data_registry"]["test"] == dict( attr_name="obs", attr_key="cont1") z2 = model.get_latent_representation() test_idx2 = model.validation_indices np.testing.assert_array_equal(z1, z2) np.testing.assert_array_equal(test_idx1, test_idx2) assert model.is_trained is True save_path = os.path.join(save_path, "tmp") adata = synthetic_iid() # Test custom tensors are loaded properly. adata.obs["cont1"] = np.random.normal(size=(adata.shape[0], )) register_tensor_from_anndata(adata, registry_key="test", adata_attr_name="obs", adata_key_name="cont1") for cls in [SCVI, LinearSCVI, TOTALVI, PEAKVI]: print(cls) test_save_load_model(cls, adata, save_path, prefix=f"{cls.__name__}_", legacy=True) test_save_load_model(cls, adata, save_path, prefix=f"{cls.__name__}_") # Test load prioritizes newer save paradigm and thus mismatches legacy save. with pytest.raises(AssertionError): test_save_load_model(cls, adata, save_path, prefix=f"{cls.__name__}_", legacy=True) # AUTOZI def test_save_load_autozi(legacy=False): prefix = "AUTOZI_" model = AUTOZI(adata, latent_distribution="normal") model.train(1, train_size=0.5) ab1 = model.get_alphas_betas() if legacy: legacy_save(model, save_path, overwrite=True, save_anndata=True, prefix=prefix) else: model.save(save_path, overwrite=True, save_anndata=True, prefix=prefix) model = AUTOZI.load(save_path, prefix=prefix) model.get_latent_representation() tmp_adata = scvi.data.synthetic_iid(n_genes=200) with pytest.raises(ValueError): AUTOZI.load(save_path, adata=tmp_adata, prefix=prefix) model = AUTOZI.load(save_path, adata=adata, prefix=prefix) assert "test" in adata.uns["_scvi"]["data_registry"] assert adata.uns["_scvi"]["data_registry"]["test"] == dict( attr_name="obs", attr_key="cont1") ab2 = model.get_alphas_betas() np.testing.assert_array_equal(ab1["alpha_posterior"], ab2["alpha_posterior"]) np.testing.assert_array_equal(ab1["beta_posterior"], ab2["beta_posterior"]) assert model.is_trained is True test_save_load_autozi(legacy=True) test_save_load_autozi() # Test load prioritizes newer save paradigm and thus mismatches legacy save. with pytest.raises(AssertionError): test_save_load_autozi(legacy=True) # SCANVI def test_save_load_scanvi(legacy=False): prefix = "SCANVI_" model = SCANVI(adata, "label_0") model.train(max_epochs=1, train_size=0.5) p1 = model.predict() if legacy: legacy_save(model, save_path, overwrite=True, save_anndata=True, prefix=prefix) else: model.save(save_path, overwrite=True, save_anndata=True, prefix=prefix) model = SCANVI.load(save_path, prefix=prefix) model.get_latent_representation() tmp_adata = scvi.data.synthetic_iid(n_genes=200) with pytest.raises(ValueError): SCANVI.load(save_path, adata=tmp_adata, prefix=prefix) model = SCANVI.load(save_path, adata=adata, prefix=prefix) assert "test" in adata.uns["_scvi"]["data_registry"] assert adata.uns["_scvi"]["data_registry"]["test"] == dict( attr_name="obs", attr_key="cont1") p2 = model.predict() np.testing.assert_array_equal(p1, p2) assert model.is_trained is True test_save_load_scanvi(legacy=True) test_save_load_scanvi() # Test load prioritizes newer save paradigm and thus mismatches legacy save. with pytest.raises(AssertionError): test_save_load_scanvi(legacy=True)
def test_pyro_bayesian_regression(save_path): use_gpu = int(torch.cuda.is_available()) adata = synthetic_iid() # add index for each cell (provided to pyro plate for correct minibatching) adata.obs["_indices"] = np.arange(adata.n_obs).astype("int64") register_tensor_from_anndata( adata, registry_key="ind_x", adata_attr_name="obs", adata_key_name="_indices", ) train_dl = AnnDataLoader(adata, shuffle=True, batch_size=128) pyro.clear_param_store() model = BayesianRegressionModule(in_features=adata.shape[1], out_features=1) plan = PyroTrainingPlan(model) plan.n_obs_training = len(train_dl.indices) trainer = Trainer( gpus=use_gpu, max_epochs=2, ) trainer.fit(plan, train_dl) if use_gpu == 1: model.cuda() # test Predictive num_samples = 5 predictive = model.create_predictive(num_samples=num_samples) for tensor_dict in train_dl: args, kwargs = model._get_fn_args_from_batch(tensor_dict) _ = { k: v.detach().cpu().numpy() for k, v in predictive(*args, **kwargs).items() if k != "obs" } # test save and load # cpu/gpu has minor difference model.cpu() quants = model.guide.quantiles([0.5]) sigma_median = quants["sigma"][0].detach().cpu().numpy() linear_median = quants["linear.weight"][0].detach().cpu().numpy() model_save_path = os.path.join(save_path, "model_params.pt") torch.save(model.state_dict(), model_save_path) pyro.clear_param_store() new_model = BayesianRegressionModule(in_features=adata.shape[1], out_features=1) # run model one step to get autoguide params try: new_model.load_state_dict(torch.load(model_save_path)) except RuntimeError as err: if isinstance(new_model, PyroBaseModuleClass): plan = PyroTrainingPlan(new_model) plan.n_obs_training = len(train_dl.indices) trainer = Trainer( gpus=use_gpu, max_steps=1, ) trainer.fit(plan, train_dl) new_model.load_state_dict(torch.load(model_save_path)) else: raise err quants = new_model.guide.quantiles([0.5]) sigma_median_new = quants["sigma"][0].detach().cpu().numpy() linear_median_new = quants["linear.weight"][0].detach().cpu().numpy() np.testing.assert_array_equal(sigma_median_new, sigma_median) np.testing.assert_array_equal(linear_median_new, linear_median)