def train_model( mdl_class, dataset, mdl_params: dict, train_params: dict, train_fn_params: dict, filename: str = None, ): """ :param mdl_class: Class of algorithm :param dataset: Dataset :param mdl_params: :param train_params: :param train_fn_params: :param filename :return: """ # if os.path.exists(filename): # res = load_pickle(filename) # return res["vae"], res["trainer"] if "test_indices" not in train_params: warnings.warn("No `test_indices` attribute found.") my_vae = mdl_class(n_input=dataset.nb_genes, n_batch=dataset.n_batches, **mdl_params) my_trainer = UnsupervisedTrainer(my_vae, dataset, **train_params) my_trainer.train(**train_fn_params) print(my_trainer.train_losses) return my_vae, my_trainer
def cortex_benchmark(n_epochs=250, use_cuda=True, save_path='data/', show_plot=True): cortex_dataset = CortexDataset(save_path=save_path) vae = VAE(cortex_dataset.nb_genes) trainer_cortex_vae = UnsupervisedTrainer(vae, cortex_dataset, use_cuda=use_cuda) trainer_cortex_vae.train(n_epochs=n_epochs) trainer_cortex_vae.train_set.differential_expression_score( 'oligodendrocytes', 'pyramidal CA1', genes=["THY1", "MBP"]) trainer_cortex_vae.test_set.ll() # assert ~ 1200 vae = VAE(cortex_dataset.nb_genes) trainer_cortex_vae = UnsupervisedTrainer(vae, cortex_dataset, use_cuda=use_cuda) trainer_cortex_vae.corrupt_posteriors() trainer_cortex_vae.train(n_epochs=n_epochs) trainer_cortex_vae.uncorrupt_posteriors() trainer_cortex_vae.train_set.imputation_benchmark(verbose=(n_epochs > 1), save_path=save_path, show_plot=show_plot) n_samples = 10 if n_epochs == 1 else None # n_epochs == 1 is unit tests trainer_cortex_vae.train_set.show_t_sne(n_samples=n_samples) return trainer_cortex_vae
def benchmark(dataset, n_epochs=250, use_cuda=True): vae = VAE(dataset.nb_genes, n_batch=dataset.n_batches) trainer = UnsupervisedTrainer(vae, dataset, use_cuda=use_cuda) trainer.train(n_epochs=n_epochs) trainer.test_set.reconstruction_error() trainer.test_set.marginal_ll() return trainer
def cortex_benchmark(n_epochs=250, use_cuda=True, save_path="data/", show_plot=True): cortex_dataset = CortexDataset(save_path=save_path) vae = VAE(cortex_dataset.nb_genes) trainer_cortex_vae = UnsupervisedTrainer(vae, cortex_dataset, use_cuda=use_cuda) trainer_cortex_vae.train(n_epochs=n_epochs) couple_celltypes = (4, 5) # the couple types on which to study DE cell_idx1 = cortex_dataset.labels.ravel() == couple_celltypes[0] cell_idx2 = cortex_dataset.labels.ravel() == couple_celltypes[1] trainer_cortex_vae.train_set.differential_expression_score( cell_idx1, cell_idx2, genes=["THY1", "MBP"]) trainer_cortex_vae.test_set.reconstruction_error() # assert ~ 1200 vae = VAE(cortex_dataset.nb_genes) trainer_cortex_vae = UnsupervisedTrainer(vae, cortex_dataset, use_cuda=use_cuda) trainer_cortex_vae.corrupt_posteriors() trainer_cortex_vae.train(n_epochs=n_epochs) trainer_cortex_vae.uncorrupt_posteriors() trainer_cortex_vae.train_set.imputation_benchmark(save_path=save_path, show_plot=show_plot) n_samples = 10 if n_epochs == 1 else None # n_epochs == 1 is unit tests trainer_cortex_vae.train_set.show_t_sne(n_samples=n_samples) return trainer_cortex_vae
def benchmark(dataset, n_epochs=250, use_cuda=True): vae = VAE(dataset.nb_genes, n_batch=dataset.n_batches) trainer = UnsupervisedTrainer(vae, dataset, use_cuda=use_cuda) trainer.train(n_epochs=n_epochs) trainer.test_set.ll(verbose=True) trainer.test_set.marginal_ll(verbose=True) return trainer
def test_gamma_de(): cortex_dataset = CortexDataset() cortex_vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches) trainer_cortex_vae = UnsupervisedTrainer(cortex_vae, cortex_dataset, train_size=0.5, use_cuda=use_cuda) trainer_cortex_vae.train(n_epochs=2) full = trainer_cortex_vae.create_posterior(trainer_cortex_vae.model, cortex_dataset, indices=np.arange( len(cortex_dataset))) n_samples = 10 M_permutation = 100 cell_idx1 = cortex_dataset.labels.ravel() == 0 cell_idx2 = cortex_dataset.labels.ravel() == 1 full.differential_expression_score(cell_idx1, cell_idx2, n_samples=n_samples, M_permutation=M_permutation) full.differential_expression_gamma(cell_idx1, cell_idx2, n_samples=n_samples, M_permutation=M_permutation)
def scVI_ld(csv_file, csv_path, ndims, vae_model = VAE, n_labels = 0, n_cores=1, seed= 1234, lr = 1e-3, use_cuda = False): set_seed(seed) dat = CsvDataset(csv_file, save_path=csv_path, new_n_genes=None) # Based on recommendations in linear_decoder.ipynb n_epochs = 250 # trainer and model ldvae = LDVAE( dat.nb_genes, n_batch = dat.n_batches, n_latent = ndims, n_labels = n_labels ) trainerLD = UnsupervisedTrainer(ldvae, dat, use_cuda=use_cuda) # limit cpu usage torch.set_num_threads(n_cores) trainerLD.train(n_epochs=n_epochs, lr=lr) # extract mean value for the ld full = trainerLD.create_posterior(trainerLD.model, dat, indices=np.arange(len(dat))) Z_hat = full.sequential().get_latent()[0] adata = anndata.AnnData(dat.X) for i, z in enumerate(Z_hat.T): adata.obs[f'Z_{i}'] = z # reordering for convenience and correspondance with PCA's ordering cellLoads = adata.obs.reindex(adata.obs.std().sort_values().index, axis = 1) return(cellLoads)
def unsupervised_training_one_epoch(dataset: GeneExpressionDataset): vae = VAE(dataset.nb_genes, dataset.n_batches, dataset.n_labels) trainer = UnsupervisedTrainer(vae, dataset, train_size=0.5, use_cuda=use_cuda) trainer.train(n_epochs=1)
def scVI_latent(csv_file, csv_path, vae_model=VAE, train_size=1.0, n_labels=0, seed=1234, n_cores=1, lr=1e-3, use_cuda=False): set_seed(seed) dat = CsvDataset(csv_file, save_path=csv_path, new_n_genes=None) # Based on recommendations in basic_tutorial.ipynb n_epochs = 400 if (len(dat) < 10000) else 200 # trainer and model vae = vae_model(dat.nb_genes, n_labels=n_labels) trainer = UnsupervisedTrainer( vae, dat, train_size=train_size, # default to 0.8, documentation recommends 1 use_cuda=use_cuda) # limit cpu usage torch.set_num_threads(n_cores) trainer.train(n_epochs=n_epochs, lr=lr) full = trainer.create_posterior(trainer.model, dat, indices=np.arange(len(dat))) # Updating the "minibatch" size after training is useful in low memory configurations Z_hat = full.sequential().get_latent()[0] adata = anndata.AnnData(dat.X) for i, z in enumerate(Z_hat.T): adata.obs[f'Z_{i}'] = z # reordering for convenience and correspondance with PCA's ordering cellLoads = adata.obs.reindex(adata.obs.std().sort_values().index, axis=1) return (cellLoads)
def scVI_norm(csv_file, csv_path, vae_model=VAE, train_size=1.0, n_labels=0, seed=1234, n_cores=1, lr=1e-3, use_cuda=False): set_seed(seed) dat = CsvDataset(csv_file, save_path=csv_path, new_n_genes=None) dat.subsample_genes(1000, mode="variance") # Based on recommendations in basic_tutorial.ipynb n_epochs = 400 if (len(dat) < 10000) else 200 # trainer and model vae = vae_model(dat.nb_genes, n_labels=n_labels) trainer = UnsupervisedTrainer( vae, dat, train_size=train_size, # default to 0.8, documentation recommends 1 use_cuda=use_cuda) # limit cpu usage torch.set_num_threads(n_cores) trainer.train(n_epochs=n_epochs, lr=lr) full = trainer.create_posterior(trainer.model, dat, indices=np.arange(len(dat))) # Updating the "minibatch" size after training is useful in low memory configurations normalized_values = full.sequential().get_sample_scale() return [normalized_values, dat.gene_names]
def run(self): n_epochs = 100 n_latent = 10 n_hidden = 128 n_layers = 2 net_data = self.data.copy() net_data.X = self.data.layers['counts'] del net_data.layers['counts'] net_data.raw = None # Ensure that the raw counts are not accidentally used # Define batch indices le = LabelEncoder() net_data.obs['batch_indices'] = le.fit_transform( net_data.obs[self.batch].values) net_data = AnnDatasetFromAnnData(net_data) vae = VAE(net_data.nb_genes, reconstruction_loss='nb', n_batch=net_data.n_batches, n_layers=n_layers, n_latent=n_latent, n_hidden=n_hidden) trainer = UnsupervisedTrainer(vae, net_data, train_size=1, use_cuda=False) trainer.train(n_epochs=n_epochs, lr=1e-3) full = trainer.create_posterior(trainer.model, net_data, indices=np.arange(len(net_data))) latent, _, _ = full.sequential().get_latent() self.data.obsm['X_emb'] = latent self.dump_to_h5ad("scvi")
def trainVAE(gene_dataset, filename, rep, nlayers=2, n_hidden=128, reconstruction_loss: str = 'zinb'): vae = VAE(gene_dataset.nb_genes, n_batch=gene_dataset.n_batches, n_labels=gene_dataset.n_labels, n_hidden=n_hidden, n_latent=10, n_layers=nlayers, dispersion='gene', reconstruction_loss=reconstruction_loss) trainer = UnsupervisedTrainer(vae, gene_dataset, train_size=1.0) filename = '../' + filename + '/' + 'vae' + '.' + reconstruction_loss + '.rep' + str( rep) + '.pkl' if os.path.isfile(filename): trainer.model.load_state_dict(torch.load(filename)) trainer.model.eval() else: trainer.train(n_epochs=250) torch.save(trainer.model.state_dict(), filename) full = trainer.create_posterior(trainer.model, gene_dataset, indices=np.arange(len(gene_dataset))) return full
def test_iwae(save_path): import time dataset = CortexDataset(save_path=save_path) torch.manual_seed(42) vae = VAE(n_input=dataset.nb_genes, n_batch=dataset.n_batches).cuda() start = time.time() trainer = UnsupervisedTrainer(vae, gene_dataset=dataset, ratio_loss=True, k_importance_weighted=5, single_backward=True) trainer.train(n_epochs=10) stop1 = time.time() - start vae = VAE(n_input=dataset.nb_genes, n_batch=dataset.n_batches).cuda() start = time.time() trainer = UnsupervisedTrainer(vae, gene_dataset=dataset, ratio_loss=True, k_importance_weighted=5, single_backward=False) trainer.train(n_epochs=10) stop2 = time.time() - start print('Time single backward : ', stop1) print('Time all elements : ', stop2)
def base_benchmark(gene_dataset): vae = VAE(gene_dataset.nb_genes, gene_dataset.n_batches, gene_dataset.n_labels) trainer = UnsupervisedTrainer(vae, gene_dataset, train_size=0.5, use_cuda=use_cuda) trainer.train(n_epochs=1) return trainer
def ldvae_benchmark(dataset, n_epochs, use_cuda=True): ldvae = LDVAE(dataset.nb_genes, n_batch=dataset.n_batches) trainer = UnsupervisedTrainer(ldvae, dataset, use_cuda=use_cuda) trainer.train(n_epochs=n_epochs) trainer.test_set.reconstruction_error() trainer.test_set.marginal_ll() ldvae.get_loadings() return trainer
def test_iaf2(save_path): dataset = CortexDataset(save_path=save_path) vae = IALogNormalPoissonVAE(n_input=dataset.nb_genes, n_batch=dataset.n_batches, do_h=True).cuda() trainer = UnsupervisedTrainer(vae, dataset, train_size=0.5, ratio_loss=True) trainer.train(n_epochs=1000) print(trainer.train_losses) z, l = trainer.test_set.get_latents(n_samples=5, device='cpu') return
def test_encoder_only(): # torch.autograd.set_detect_anomaly(mode=True) dataset = LatentLogPoissonDataset(n_genes=5, n_latent=2, n_cells=300, n_comps=1) dataset = LatentLogPoissonDataset(n_genes=3, n_latent=2, n_cells=15, n_comps=2) dataset = LatentLogPoissonDataset(n_genes=5, n_latent=2, n_cells=150, n_comps=1, learn_prior_scale=True) # _, _, marginals = dataset.compute_posteriors( # x_obs=torch.randint(0, 150, size=(1, 5), dtype=torch.float), # mcmc_kwargs={"num_samples": 20, "warmup_steps": 20, "num_chains": 1} # ) # stats = marginals.diagnostics() # print(stats) dataset.cuda() vae_mdl = LogNormalPoissonVAE( dataset.nb_genes, dataset.n_batches, autoregressive=False, full_cov=True, n_latent=2, gt_decoder=dataset.nn_model, ) params = vae_mdl.encoder_params trainer = UnsupervisedTrainer( model=vae_mdl, gene_dataset=dataset, use_cuda=True, train_size=0.7, n_epochs_kl_warmup=1, ratio_loss=True, ) trainer.train( n_epochs=2, lr=1e-3, params=params, ) full = trainer.create_posterior(trainer.model, dataset, indices=np.arange(len(dataset))) lkl_estimate = vae_mdl.marginal_ll(full, n_samples_mc=50)
def test_differential_expression(save_path): dataset = CortexDataset(save_path=save_path) n_cells = len(dataset) all_indices = np.arange(n_cells) vae = VAE(dataset.nb_genes, dataset.n_batches) trainer = UnsupervisedTrainer(vae, dataset, train_size=0.5, use_cuda=use_cuda) trainer.train(n_epochs=2) post = trainer.create_posterior(vae, dataset, shuffle=False, indices=all_indices) # Sample scale example px_scales = post.scale_sampler(n_samples_per_cell=4, n_samples=None, selection=all_indices)["scale"] assert (px_scales.shape[1] == dataset.nb_genes ), "posterior scales should have shape (n_samples, n_genes)" # Differential expression different models idx_1 = [1, 2, 3] idx_2 = [4, 5, 6, 7] de_dataframe = post.differential_expression_score( idx1=idx_1, idx2=idx_2, n_samples=10, mode="vanilla", use_permutation=True, M_permutation=100, ) de_dataframe = post.differential_expression_score( idx1=idx_1, idx2=idx_2, n_samples=10, mode="change", use_permutation=True, M_permutation=100, ) print(de_dataframe.keys()) assert (de_dataframe["confidence_interval_0.5_min"] <= de_dataframe["confidence_interval_0.5_max"]).all() assert (de_dataframe["confidence_interval_0.95_min"] <= de_dataframe["confidence_interval_0.95_max"]).all() # DE estimation example de_probabilities = de_dataframe.loc[:, "proba_de"] assert ((0.0 <= de_probabilities) & (de_probabilities <= 1.0)).all()
def test_multibatches_features(): data = [ np.random.randint(1, 5, size=(20, 10)), np.random.randint(1, 10, size=(20, 10)), np.random.randint(1, 10, size=(20, 10)), np.random.randint(1, 10, size=(30, 10)), ] dataset = GeneExpressionDataset() dataset.populate_from_per_batch_list(data) vae = VAE(dataset.nb_genes, dataset.n_batches) trainer = UnsupervisedTrainer(vae, dataset, train_size=0.5, use_cuda=use_cuda) trainer.train(n_epochs=2) trainer.test_set.imputation(n_samples=2, transform_batch=0) trainer.train_set.imputation(n_samples=2, transform_batch=[0, 1, 2])
def test_logpoisson(): mu_skeletton = 'mu_{}_200genes_pbmc_diag.npy' sgm_skeletton = 'sigma_{}full_200genes_pbmc_diag.npy' dataset = LogPoissonDataset(mu0_path=mu_skeletton.format(0), mu1_path=mu_skeletton.format(1), sig0_path=sgm_skeletton.format(0), sig1_path=sgm_skeletton.format(1), pi=[0.5], n_cells=50) # res = dataset.compute_bayes_factors(n_sim=30) kwargs = { 'early_stopping_metric': 'elbo', 'save_best_state_metric': 'elbo', 'patience': 15, 'threshold': 3 } VAE = LogNormalPoissonVAE(dataset.nb_genes, dataset.n_batches) trainer = UnsupervisedTrainer(model=VAE, gene_dataset=dataset, use_cuda=True, train_size=0.7, frequency=1, n_epochs_kl_warmup=2, early_stopping_kwargs=kwargs) trainer.train(n_epochs=5, lr=1e-3) train = trainer.train_set.sequential() zs, _, _ = train.get_latent() assert not np.isnan(zs).any() VAE = LogNormalPoissonVAE(dataset.nb_genes, dataset.n_batches, autoregressive=True, n_latent=5) trainer = UnsupervisedTrainer(model=VAE, gene_dataset=dataset, use_cuda=True, train_size=0.7, frequency=1, n_epochs_kl_warmup=2, early_stopping_kwargs=kwargs) torch.autograd.set_detect_anomaly(mode=True) trainer.train(n_epochs=5, lr=1e-3) train = trainer.train_set.sequential() trainer.train_set.show_t_sne(n_samples=1000, color_by='label') zs, _, _ = train.get_latent() print(zs) assert not np.isnan(zs).any() print(trainer.history)
def trainVAE(gene_dataset, rmCellTypes,rep): vae = VAE(gene_dataset.nb_genes, n_batch=gene_dataset.n_batches, n_labels=gene_dataset.n_labels, n_hidden=128, n_latent=10, n_layers=2, dispersion='gene') trainer = UnsupervisedTrainer(vae, gene_dataset, train_size=1.0) if os.path.isfile('../NoOverlap/vae.%s%s.pkl' % (rmCellTypes,rep)): trainer.model.load_state_dict(torch.load('../NoOverlap/vae.%s%s.pkl' % (rmCellTypes,rep))) trainer.model.eval() else: trainer.train(n_epochs=150) torch.save(trainer.model.state_dict(), '../NoOverlap/vae.%s%s.pkl' % (rmCellTypes,rep)) full = trainer.create_posterior(trainer.model, gene_dataset, indices=np.arange(len(gene_dataset))) latent, batch_indices, labels = full.sequential().get_latent() batch_indices = batch_indices.ravel() return latent, batch_indices,labels,trainer
def test_sampling_zl(save_path): cortex_dataset = CortexDataset(save_path=save_path) cortex_vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches) trainer_cortex_vae = UnsupervisedTrainer( cortex_vae, cortex_dataset, train_size=0.5, use_cuda=use_cuda ) trainer_cortex_vae.train(n_epochs=2) cortex_cls = Classifier((cortex_vae.n_latent + 1), n_labels=cortex_dataset.n_labels) trainer_cortex_cls = ClassifierTrainer( cortex_cls, cortex_dataset, sampling_model=cortex_vae, sampling_zl=True ) trainer_cortex_cls.train(n_epochs=2) trainer_cortex_cls.test_set.accuracy()
def training_score_scvi(train, **kwargs): from scvi.dataset import GeneExpressionDataset from scvi.inference import UnsupervisedTrainer from scvi.models import VAE data = GeneExpressionDataset( *GeneExpressionDataset.get_attributes_from_matrix(train)) vae = VAE(n_input=train.shape[1]) m = UnsupervisedTrainer(vae, data, verbose=False) m.train(n_epochs=100) # Training permuted the data for minibatching. Unpermute before "imputing" # (estimating lambda) lam = np.vstack([ m.train_set.sequential().imputation(), m.test_set.sequential().imputation() ]) return st.poisson(mu=lam).logpmf(train).sum()
def generalization_score_scvi(train, test, **kwargs): from scvi.dataset import GeneExpressionDataset from scvi.inference import UnsupervisedTrainer from scvi.models import VAE data = GeneExpressionDataset( *GeneExpressionDataset.get_attributes_from_matrix(train)) vae = VAE(n_input=train.shape[1]) m = UnsupervisedTrainer(vae, data, verbose=False) m.train(n_epochs=100) # Training permuted the data for minibatching. Unpermute before "imputing" # (estimating lambda) with torch.autograd.set_grad_enabled(False): lam = np.vstack([ m.train_set.sequential().imputation(), m.test_set.sequential().imputation() ]) return pois_llik(lam, train, test)
def test_autozi(save_path): data = SyntheticDataset(n_batches=1) for disp_zi in ["gene", "gene-label"]: autozivae = AutoZIVAE( n_input=data.nb_genes, dispersion=disp_zi, zero_inflation=disp_zi, n_labels=data.n_labels, ) trainer_autozivae = UnsupervisedTrainer( model=autozivae, gene_dataset=data, train_size=0.5 ) trainer_autozivae.train(n_epochs=2, lr=1e-2) trainer_autozivae.test_set.elbo() trainer_autozivae.test_set.reconstruction_error() trainer_autozivae.test_set.marginal_ll()
def compute_scvi_latent( adata: sc.AnnData, n_latent: int = 5, n_epochs: int = 100, lr: float = 1e-3, use_batches: bool = False, use_cuda: bool = True, ) -> Tuple[scvi.inference.Posterior, np.ndarray]: """Train and return a scVI model and sample a latent space :param adata: sc.AnnData object non-normalized :param n_latent: dimension of the latent space :param n_epochs: number of training epochs :param lr: learning rate :param use_batches :param use_cuda :return: (scvi.Posterior, latent_space) """ # Convert easily to scvi dataset scviDataset = AnnDataset(adata) # Train a model vae = VAE( scviDataset.nb_genes, n_batch=scviDataset.n_batches * use_batches, n_latent=n_latent, ) trainer = UnsupervisedTrainer(vae, scviDataset, train_size=1.0, use_cuda=use_cuda) trainer.train(n_epochs=n_epochs, lr=lr) #### # Extract latent space posterior = trainer.create_posterior(trainer.model, scviDataset, indices=np.arange( len(scviDataset))).sequential() latent, _, _ = posterior.get_latent() return posterior, latent
def test_full_cov(): dataset = CortexDataset() mdl = VAE(n_input=dataset.nb_genes, n_batch=dataset.n_batches, reconstruction_loss='zinb', n_latent=2, full_cov=True) trainer = UnsupervisedTrainer(model=mdl, gene_dataset=dataset, use_cuda=True, train_size=0.7, frequency=1, early_stopping_kwargs={ 'early_stopping_metric': 'elbo', 'save_best_state_metric': 'elbo', 'patience': 15, 'threshold': 3 }) trainer.train(n_epochs=20, lr=1e-3) assert not np.isnan(trainer.history['ll_test_set']).any()
class Base_scVI(Benchmarkable): def __init__(self, data, name, n_latent=10): super().__init__(data, name) self.n_latent = n_latent self.USE_CUDA = False def train(self, n_epochs=20): self.train_seq(n_epochs) self.train_fish(n_epochs) starting_time = time.time() self.train_both(n_epochs) self.train_time = time.time() - starting_time def train_fish(self, n_epochs=20): dataset = self.data.data_fish vae = VAE( dataset.nb_genes, n_batch=dataset.n_batches, dispersion="gene-batch", n_latent=self.n_latent, reconstruction_loss="nb", ) self.trainer_fish = UnsupervisedTrainer(vae, dataset, train_size=0.95, use_cuda=self.USE_CUDA) self.trainer_fish.train(n_epochs=n_epochs, lr=0.001) def train_seq(self, n_epochs=20, reconstruction_seq='nb'): dataset = self.data.data_seq vae = VAE( dataset.nb_genes, dispersion="gene", n_latent=self.n_latent, reconstruction_loss=reconstruction_seq, ) self.trainer_seq = UnsupervisedTrainer(vae, dataset, train_size=0.95, use_cuda=self.USE_CUDA) self.trainer_seq.train(n_epochs=n_epochs, lr=0.001)
def test_cortex(save_path): cortex_dataset = CortexDataset(save_path=save_path) vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches) trainer_cortex_vae = UnsupervisedTrainer(vae, cortex_dataset, train_size=0.5, use_cuda=use_cuda) trainer_cortex_vae.train(n_epochs=1) trainer_cortex_vae.train_set.ll() trainer_cortex_vae.train_set.differential_expression_stats() trainer_cortex_vae.corrupt_posteriors(corruption='binomial') trainer_cortex_vae.corrupt_posteriors() trainer_cortex_vae.train(n_epochs=1) trainer_cortex_vae.uncorrupt_posteriors() trainer_cortex_vae.train_set.imputation_benchmark(n_samples=1, show_plot=False, title_plot='imputation', save_path=save_path) svaec = SCANVI(cortex_dataset.nb_genes, cortex_dataset.n_batches, cortex_dataset.n_labels) trainer_cortex_svaec = JointSemiSupervisedTrainer(svaec, cortex_dataset, n_labelled_samples_per_class=3, use_cuda=use_cuda) trainer_cortex_svaec.train(n_epochs=1) trainer_cortex_svaec.labelled_set.accuracy() trainer_cortex_svaec.full_dataset.ll() svaec = SCANVI(cortex_dataset.nb_genes, cortex_dataset.n_batches, cortex_dataset.n_labels) trainer_cortex_svaec = AlternateSemiSupervisedTrainer(svaec, cortex_dataset, n_labelled_samples_per_class=3, use_cuda=use_cuda) trainer_cortex_svaec.train(n_epochs=1, lr=1e-2) trainer_cortex_svaec.unlabelled_set.accuracy() data_train, labels_train = trainer_cortex_svaec.labelled_set.raw_data() data_test, labels_test = trainer_cortex_svaec.unlabelled_set.raw_data() compute_accuracy_svc(data_train, labels_train, data_test, labels_test, param_grid=[{'C': [1], 'kernel': ['linear']}]) compute_accuracy_rf(data_train, labels_train, data_test, labels_test, param_grid=[{'max_depth': [3], 'n_estimators': [10]}]) cls = Classifier(cortex_dataset.nb_genes, n_labels=cortex_dataset.n_labels) cls_trainer = ClassifierTrainer(cls, cortex_dataset) cls_trainer.train(n_epochs=1) cls_trainer.train_set.accuracy()
def test_annealing_procedures(save_path): cortex_dataset = CortexDataset(save_path=save_path) cortex_vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches) trainer_cortex_vae = UnsupervisedTrainer( cortex_vae, cortex_dataset, train_size=0.5, use_cuda=use_cuda, n_epochs_kl_warmup=1, ) trainer_cortex_vae.train(n_epochs=2) assert trainer_cortex_vae.kl_weight >= 0.99, "Annealing should be over" trainer_cortex_vae = UnsupervisedTrainer( cortex_vae, cortex_dataset, train_size=0.5, use_cuda=use_cuda, n_epochs_kl_warmup=5, ) trainer_cortex_vae.train(n_epochs=2) assert trainer_cortex_vae.kl_weight <= 0.99, "Annealing should be proceeding" # iter trainer_cortex_vae = UnsupervisedTrainer( cortex_vae, cortex_dataset, train_size=0.5, use_cuda=use_cuda, n_iter_kl_warmup=1, n_epochs_kl_warmup=None, ) trainer_cortex_vae.train(n_epochs=2) assert trainer_cortex_vae.kl_weight >= 0.99, "Annealing should be over"