Exemple #1
0
def train_model(
    mdl_class,
    dataset,
    mdl_params: dict,
    train_params: dict,
    train_fn_params: dict,
    filename: str = None,
):
    """

    :param mdl_class: Class of algorithm
    :param dataset: Dataset
    :param mdl_params:
    :param train_params:
    :param train_fn_params:
    :param filename
    :return:
    """
    # if os.path.exists(filename):
    #     res = load_pickle(filename)
    #     return res["vae"], res["trainer"]

    if "test_indices" not in train_params:
        warnings.warn("No `test_indices` attribute found.")
    my_vae = mdl_class(n_input=dataset.nb_genes,
                       n_batch=dataset.n_batches,
                       **mdl_params)
    my_trainer = UnsupervisedTrainer(my_vae, dataset, **train_params)
    my_trainer.train(**train_fn_params)
    print(my_trainer.train_losses)
    return my_vae, my_trainer
Exemple #2
0
def cortex_benchmark(n_epochs=250,
                     use_cuda=True,
                     save_path='data/',
                     show_plot=True):
    cortex_dataset = CortexDataset(save_path=save_path)
    vae = VAE(cortex_dataset.nb_genes)
    trainer_cortex_vae = UnsupervisedTrainer(vae,
                                             cortex_dataset,
                                             use_cuda=use_cuda)
    trainer_cortex_vae.train(n_epochs=n_epochs)
    trainer_cortex_vae.train_set.differential_expression_score(
        'oligodendrocytes', 'pyramidal CA1', genes=["THY1", "MBP"])

    trainer_cortex_vae.test_set.ll()  # assert ~ 1200
    vae = VAE(cortex_dataset.nb_genes)
    trainer_cortex_vae = UnsupervisedTrainer(vae,
                                             cortex_dataset,
                                             use_cuda=use_cuda)
    trainer_cortex_vae.corrupt_posteriors()
    trainer_cortex_vae.train(n_epochs=n_epochs)
    trainer_cortex_vae.uncorrupt_posteriors()
    trainer_cortex_vae.train_set.imputation_benchmark(verbose=(n_epochs > 1),
                                                      save_path=save_path,
                                                      show_plot=show_plot)

    n_samples = 10 if n_epochs == 1 else None  # n_epochs == 1 is unit tests
    trainer_cortex_vae.train_set.show_t_sne(n_samples=n_samples)
    return trainer_cortex_vae
Exemple #3
0
def benchmark(dataset, n_epochs=250, use_cuda=True):
    vae = VAE(dataset.nb_genes, n_batch=dataset.n_batches)
    trainer = UnsupervisedTrainer(vae, dataset, use_cuda=use_cuda)
    trainer.train(n_epochs=n_epochs)
    trainer.test_set.reconstruction_error()
    trainer.test_set.marginal_ll()
    return trainer
Exemple #4
0
def cortex_benchmark(n_epochs=250,
                     use_cuda=True,
                     save_path="data/",
                     show_plot=True):
    cortex_dataset = CortexDataset(save_path=save_path)
    vae = VAE(cortex_dataset.nb_genes)
    trainer_cortex_vae = UnsupervisedTrainer(vae,
                                             cortex_dataset,
                                             use_cuda=use_cuda)
    trainer_cortex_vae.train(n_epochs=n_epochs)
    couple_celltypes = (4, 5)  # the couple types on which to study DE
    cell_idx1 = cortex_dataset.labels.ravel() == couple_celltypes[0]
    cell_idx2 = cortex_dataset.labels.ravel() == couple_celltypes[1]
    trainer_cortex_vae.train_set.differential_expression_score(
        cell_idx1, cell_idx2, genes=["THY1", "MBP"])

    trainer_cortex_vae.test_set.reconstruction_error()  # assert ~ 1200
    vae = VAE(cortex_dataset.nb_genes)
    trainer_cortex_vae = UnsupervisedTrainer(vae,
                                             cortex_dataset,
                                             use_cuda=use_cuda)
    trainer_cortex_vae.corrupt_posteriors()
    trainer_cortex_vae.train(n_epochs=n_epochs)
    trainer_cortex_vae.uncorrupt_posteriors()
    trainer_cortex_vae.train_set.imputation_benchmark(save_path=save_path,
                                                      show_plot=show_plot)

    n_samples = 10 if n_epochs == 1 else None  # n_epochs == 1 is unit tests
    trainer_cortex_vae.train_set.show_t_sne(n_samples=n_samples)
    return trainer_cortex_vae
Exemple #5
0
def benchmark(dataset, n_epochs=250, use_cuda=True):
    vae = VAE(dataset.nb_genes, n_batch=dataset.n_batches)
    trainer = UnsupervisedTrainer(vae, dataset, use_cuda=use_cuda)
    trainer.train(n_epochs=n_epochs)
    trainer.test_set.ll(verbose=True)
    trainer.test_set.marginal_ll(verbose=True)
    return trainer
Exemple #6
0
def test_gamma_de():
    cortex_dataset = CortexDataset()
    cortex_vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches)
    trainer_cortex_vae = UnsupervisedTrainer(cortex_vae,
                                             cortex_dataset,
                                             train_size=0.5,
                                             use_cuda=use_cuda)
    trainer_cortex_vae.train(n_epochs=2)

    full = trainer_cortex_vae.create_posterior(trainer_cortex_vae.model,
                                               cortex_dataset,
                                               indices=np.arange(
                                                   len(cortex_dataset)))

    n_samples = 10
    M_permutation = 100
    cell_idx1 = cortex_dataset.labels.ravel() == 0
    cell_idx2 = cortex_dataset.labels.ravel() == 1

    full.differential_expression_score(cell_idx1,
                                       cell_idx2,
                                       n_samples=n_samples,
                                       M_permutation=M_permutation)
    full.differential_expression_gamma(cell_idx1,
                                       cell_idx2,
                                       n_samples=n_samples,
                                       M_permutation=M_permutation)
Exemple #7
0
def scVI_ld(csv_file, csv_path, ndims, vae_model = VAE, n_labels = 0, n_cores=1, seed= 1234, lr = 1e-3, use_cuda = False): 
  set_seed(seed)
  dat = CsvDataset(csv_file, 
                   save_path=csv_path, 
                   new_n_genes=None) 
  # Based on recommendations in linear_decoder.ipynb
  n_epochs = 250
  # trainer and model 
  ldvae = LDVAE(
        dat.nb_genes,
        n_batch = dat.n_batches,
        n_latent = ndims, 
        n_labels = n_labels
        )
  trainerLD = UnsupervisedTrainer(ldvae, dat, use_cuda=use_cuda)
  # limit cpu usage
  torch.set_num_threads(n_cores) 
  trainerLD.train(n_epochs=n_epochs, lr=lr)
  # extract mean value for the ld
  full = trainerLD.create_posterior(trainerLD.model, dat, indices=np.arange(len(dat)))
  Z_hat = full.sequential().get_latent()[0]
  adata = anndata.AnnData(dat.X)
  for i, z in enumerate(Z_hat.T):
      adata.obs[f'Z_{i}'] = z
  # reordering for convenience and correspondance with PCA's ordering
  cellLoads = adata.obs.reindex(adata.obs.std().sort_values().index, axis = 1)
  return(cellLoads)
Exemple #8
0
def unsupervised_training_one_epoch(dataset: GeneExpressionDataset):
    vae = VAE(dataset.nb_genes, dataset.n_batches, dataset.n_labels)
    trainer = UnsupervisedTrainer(vae,
                                  dataset,
                                  train_size=0.5,
                                  use_cuda=use_cuda)
    trainer.train(n_epochs=1)
Exemple #9
0
def scVI_latent(csv_file,
                csv_path,
                vae_model=VAE,
                train_size=1.0,
                n_labels=0,
                seed=1234,
                n_cores=1,
                lr=1e-3,
                use_cuda=False):
    set_seed(seed)
    dat = CsvDataset(csv_file, save_path=csv_path, new_n_genes=None)
    # Based on recommendations in basic_tutorial.ipynb
    n_epochs = 400 if (len(dat) < 10000) else 200
    # trainer and model
    vae = vae_model(dat.nb_genes, n_labels=n_labels)
    trainer = UnsupervisedTrainer(
        vae,
        dat,
        train_size=train_size,  # default to 0.8, documentation recommends 1
        use_cuda=use_cuda)
    # limit cpu usage
    torch.set_num_threads(n_cores)
    trainer.train(n_epochs=n_epochs, lr=lr)
    full = trainer.create_posterior(trainer.model,
                                    dat,
                                    indices=np.arange(len(dat)))
    # Updating the "minibatch" size after training is useful in low memory configurations
    Z_hat = full.sequential().get_latent()[0]
    adata = anndata.AnnData(dat.X)
    for i, z in enumerate(Z_hat.T):
        adata.obs[f'Z_{i}'] = z
    # reordering for convenience and correspondance with PCA's ordering
    cellLoads = adata.obs.reindex(adata.obs.std().sort_values().index, axis=1)
    return (cellLoads)
Exemple #10
0
def scVI_norm(csv_file,
              csv_path,
              vae_model=VAE,
              train_size=1.0,
              n_labels=0,
              seed=1234,
              n_cores=1,
              lr=1e-3,
              use_cuda=False):
    set_seed(seed)
    dat = CsvDataset(csv_file, save_path=csv_path, new_n_genes=None)
    dat.subsample_genes(1000, mode="variance")
    # Based on recommendations in basic_tutorial.ipynb
    n_epochs = 400 if (len(dat) < 10000) else 200
    # trainer and model
    vae = vae_model(dat.nb_genes, n_labels=n_labels)
    trainer = UnsupervisedTrainer(
        vae,
        dat,
        train_size=train_size,  # default to 0.8, documentation recommends 1
        use_cuda=use_cuda)
    # limit cpu usage
    torch.set_num_threads(n_cores)
    trainer.train(n_epochs=n_epochs, lr=lr)
    full = trainer.create_posterior(trainer.model,
                                    dat,
                                    indices=np.arange(len(dat)))
    # Updating the "minibatch" size after training is useful in low memory configurations
    normalized_values = full.sequential().get_sample_scale()
    return [normalized_values, dat.gene_names]
    def run(self):
        n_epochs = 100
        n_latent = 10
        n_hidden = 128
        n_layers = 2
        net_data = self.data.copy()
        net_data.X = self.data.layers['counts']
        del net_data.layers['counts']
        net_data.raw = None  # Ensure that the raw counts are not accidentally used

        # Define batch indices
        le = LabelEncoder()
        net_data.obs['batch_indices'] = le.fit_transform(
            net_data.obs[self.batch].values)
        net_data = AnnDatasetFromAnnData(net_data)
        vae = VAE(net_data.nb_genes,
                  reconstruction_loss='nb',
                  n_batch=net_data.n_batches,
                  n_layers=n_layers,
                  n_latent=n_latent,
                  n_hidden=n_hidden)
        trainer = UnsupervisedTrainer(vae,
                                      net_data,
                                      train_size=1,
                                      use_cuda=False)
        trainer.train(n_epochs=n_epochs, lr=1e-3)
        full = trainer.create_posterior(trainer.model,
                                        net_data,
                                        indices=np.arange(len(net_data)))
        latent, _, _ = full.sequential().get_latent()
        self.data.obsm['X_emb'] = latent
        self.dump_to_h5ad("scvi")
def trainVAE(gene_dataset,
             filename,
             rep,
             nlayers=2,
             n_hidden=128,
             reconstruction_loss: str = 'zinb'):
    vae = VAE(gene_dataset.nb_genes,
              n_batch=gene_dataset.n_batches,
              n_labels=gene_dataset.n_labels,
              n_hidden=n_hidden,
              n_latent=10,
              n_layers=nlayers,
              dispersion='gene',
              reconstruction_loss=reconstruction_loss)
    trainer = UnsupervisedTrainer(vae, gene_dataset, train_size=1.0)
    filename = '../' + filename + '/' + 'vae' + '.' + reconstruction_loss + '.rep' + str(
        rep) + '.pkl'
    if os.path.isfile(filename):
        trainer.model.load_state_dict(torch.load(filename))
        trainer.model.eval()
    else:
        trainer.train(n_epochs=250)
        torch.save(trainer.model.state_dict(), filename)
    full = trainer.create_posterior(trainer.model,
                                    gene_dataset,
                                    indices=np.arange(len(gene_dataset)))
    return full
Exemple #13
0
def test_iwae(save_path):
    import time
    dataset = CortexDataset(save_path=save_path)
    torch.manual_seed(42)

    vae = VAE(n_input=dataset.nb_genes, n_batch=dataset.n_batches).cuda()
    start = time.time()
    trainer = UnsupervisedTrainer(vae,
                                  gene_dataset=dataset,
                                  ratio_loss=True,
                                  k_importance_weighted=5,
                                  single_backward=True)
    trainer.train(n_epochs=10)
    stop1 = time.time() - start

    vae = VAE(n_input=dataset.nb_genes, n_batch=dataset.n_batches).cuda()
    start = time.time()
    trainer = UnsupervisedTrainer(vae,
                                  gene_dataset=dataset,
                                  ratio_loss=True,
                                  k_importance_weighted=5,
                                  single_backward=False)
    trainer.train(n_epochs=10)
    stop2 = time.time() - start

    print('Time single backward : ', stop1)
    print('Time all elements : ', stop2)
Exemple #14
0
def base_benchmark(gene_dataset):
    vae = VAE(gene_dataset.nb_genes, gene_dataset.n_batches,
              gene_dataset.n_labels)
    trainer = UnsupervisedTrainer(vae,
                                  gene_dataset,
                                  train_size=0.5,
                                  use_cuda=use_cuda)
    trainer.train(n_epochs=1)
    return trainer
Exemple #15
0
def ldvae_benchmark(dataset, n_epochs, use_cuda=True):
    ldvae = LDVAE(dataset.nb_genes, n_batch=dataset.n_batches)
    trainer = UnsupervisedTrainer(ldvae, dataset, use_cuda=use_cuda)
    trainer.train(n_epochs=n_epochs)
    trainer.test_set.reconstruction_error()
    trainer.test_set.marginal_ll()

    ldvae.get_loadings()

    return trainer
Exemple #16
0
def test_iaf2(save_path):
    dataset = CortexDataset(save_path=save_path)
    vae = IALogNormalPoissonVAE(n_input=dataset.nb_genes,
                                n_batch=dataset.n_batches,
                                do_h=True).cuda()
    trainer = UnsupervisedTrainer(vae,
                                  dataset,
                                  train_size=0.5,
                                  ratio_loss=True)
    trainer.train(n_epochs=1000)
    print(trainer.train_losses)
    z, l = trainer.test_set.get_latents(n_samples=5, device='cpu')
    return
Exemple #17
0
def test_encoder_only():
    # torch.autograd.set_detect_anomaly(mode=True)
    dataset = LatentLogPoissonDataset(n_genes=5,
                                      n_latent=2,
                                      n_cells=300,
                                      n_comps=1)
    dataset = LatentLogPoissonDataset(n_genes=3,
                                      n_latent=2,
                                      n_cells=15,
                                      n_comps=2)
    dataset = LatentLogPoissonDataset(n_genes=5,
                                      n_latent=2,
                                      n_cells=150,
                                      n_comps=1,
                                      learn_prior_scale=True)

    # _, _, marginals = dataset.compute_posteriors(
    #     x_obs=torch.randint(0, 150, size=(1, 5), dtype=torch.float),
    #     mcmc_kwargs={"num_samples": 20, "warmup_steps": 20, "num_chains": 1}
    # )
    # stats = marginals.diagnostics()
    # print(stats)
    dataset.cuda()

    vae_mdl = LogNormalPoissonVAE(
        dataset.nb_genes,
        dataset.n_batches,
        autoregressive=False,
        full_cov=True,
        n_latent=2,
        gt_decoder=dataset.nn_model,
    )
    params = vae_mdl.encoder_params
    trainer = UnsupervisedTrainer(
        model=vae_mdl,
        gene_dataset=dataset,
        use_cuda=True,
        train_size=0.7,
        n_epochs_kl_warmup=1,
        ratio_loss=True,
    )
    trainer.train(
        n_epochs=2,
        lr=1e-3,
        params=params,
    )

    full = trainer.create_posterior(trainer.model,
                                    dataset,
                                    indices=np.arange(len(dataset)))
    lkl_estimate = vae_mdl.marginal_ll(full, n_samples_mc=50)
Exemple #18
0
def test_differential_expression(save_path):
    dataset = CortexDataset(save_path=save_path)
    n_cells = len(dataset)
    all_indices = np.arange(n_cells)
    vae = VAE(dataset.nb_genes, dataset.n_batches)
    trainer = UnsupervisedTrainer(vae,
                                  dataset,
                                  train_size=0.5,
                                  use_cuda=use_cuda)
    trainer.train(n_epochs=2)
    post = trainer.create_posterior(vae,
                                    dataset,
                                    shuffle=False,
                                    indices=all_indices)

    # Sample scale example
    px_scales = post.scale_sampler(n_samples_per_cell=4,
                                   n_samples=None,
                                   selection=all_indices)["scale"]
    assert (px_scales.shape[1] == dataset.nb_genes
            ), "posterior scales should have shape (n_samples, n_genes)"

    # Differential expression different models
    idx_1 = [1, 2, 3]
    idx_2 = [4, 5, 6, 7]
    de_dataframe = post.differential_expression_score(
        idx1=idx_1,
        idx2=idx_2,
        n_samples=10,
        mode="vanilla",
        use_permutation=True,
        M_permutation=100,
    )

    de_dataframe = post.differential_expression_score(
        idx1=idx_1,
        idx2=idx_2,
        n_samples=10,
        mode="change",
        use_permutation=True,
        M_permutation=100,
    )
    print(de_dataframe.keys())
    assert (de_dataframe["confidence_interval_0.5_min"] <=
            de_dataframe["confidence_interval_0.5_max"]).all()
    assert (de_dataframe["confidence_interval_0.95_min"] <=
            de_dataframe["confidence_interval_0.95_max"]).all()

    # DE estimation example
    de_probabilities = de_dataframe.loc[:, "proba_de"]
    assert ((0.0 <= de_probabilities) & (de_probabilities <= 1.0)).all()
Exemple #19
0
def test_multibatches_features():
    data = [
        np.random.randint(1, 5, size=(20, 10)),
        np.random.randint(1, 10, size=(20, 10)),
        np.random.randint(1, 10, size=(20, 10)),
        np.random.randint(1, 10, size=(30, 10)),
    ]
    dataset = GeneExpressionDataset()
    dataset.populate_from_per_batch_list(data)
    vae = VAE(dataset.nb_genes, dataset.n_batches)
    trainer = UnsupervisedTrainer(vae, dataset, train_size=0.5, use_cuda=use_cuda)
    trainer.train(n_epochs=2)
    trainer.test_set.imputation(n_samples=2, transform_batch=0)
    trainer.train_set.imputation(n_samples=2, transform_batch=[0, 1, 2])
Exemple #20
0
def test_logpoisson():
    mu_skeletton = 'mu_{}_200genes_pbmc_diag.npy'
    sgm_skeletton = 'sigma_{}full_200genes_pbmc_diag.npy'
    dataset = LogPoissonDataset(mu0_path=mu_skeletton.format(0),
                                mu1_path=mu_skeletton.format(1),
                                sig0_path=sgm_skeletton.format(0),
                                sig1_path=sgm_skeletton.format(1),
                                pi=[0.5],
                                n_cells=50)
    # res = dataset.compute_bayes_factors(n_sim=30)
    kwargs = {
        'early_stopping_metric': 'elbo',
        'save_best_state_metric': 'elbo',
        'patience': 15,
        'threshold': 3
    }
    VAE = LogNormalPoissonVAE(dataset.nb_genes, dataset.n_batches)
    trainer = UnsupervisedTrainer(model=VAE,
                                  gene_dataset=dataset,
                                  use_cuda=True,
                                  train_size=0.7,
                                  frequency=1,
                                  n_epochs_kl_warmup=2,
                                  early_stopping_kwargs=kwargs)
    trainer.train(n_epochs=5, lr=1e-3)
    train = trainer.train_set.sequential()
    zs, _, _ = train.get_latent()
    assert not np.isnan(zs).any()

    VAE = LogNormalPoissonVAE(dataset.nb_genes,
                              dataset.n_batches,
                              autoregressive=True,
                              n_latent=5)
    trainer = UnsupervisedTrainer(model=VAE,
                                  gene_dataset=dataset,
                                  use_cuda=True,
                                  train_size=0.7,
                                  frequency=1,
                                  n_epochs_kl_warmup=2,
                                  early_stopping_kwargs=kwargs)
    torch.autograd.set_detect_anomaly(mode=True)

    trainer.train(n_epochs=5, lr=1e-3)
    train = trainer.train_set.sequential()
    trainer.train_set.show_t_sne(n_samples=1000, color_by='label')
    zs, _, _ = train.get_latent()
    print(zs)
    assert not np.isnan(zs).any()

    print(trainer.history)
Exemple #21
0
def trainVAE(gene_dataset, rmCellTypes,rep):
    vae = VAE(gene_dataset.nb_genes, n_batch=gene_dataset.n_batches, n_labels=gene_dataset.n_labels,
              n_hidden=128, n_latent=10, n_layers=2, dispersion='gene')
    trainer = UnsupervisedTrainer(vae, gene_dataset, train_size=1.0)
    if os.path.isfile('../NoOverlap/vae.%s%s.pkl' % (rmCellTypes,rep)):
        trainer.model.load_state_dict(torch.load('../NoOverlap/vae.%s%s.pkl' % (rmCellTypes,rep)))
        trainer.model.eval()
    else:
        trainer.train(n_epochs=150)
        torch.save(trainer.model.state_dict(), '../NoOverlap/vae.%s%s.pkl' % (rmCellTypes,rep))
    full = trainer.create_posterior(trainer.model, gene_dataset, indices=np.arange(len(gene_dataset)))
    latent, batch_indices, labels = full.sequential().get_latent()
    batch_indices = batch_indices.ravel()
    return latent, batch_indices,labels,trainer
Exemple #22
0
def test_sampling_zl(save_path):
    cortex_dataset = CortexDataset(save_path=save_path)
    cortex_vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches)
    trainer_cortex_vae = UnsupervisedTrainer(
        cortex_vae, cortex_dataset, train_size=0.5, use_cuda=use_cuda
    )
    trainer_cortex_vae.train(n_epochs=2)

    cortex_cls = Classifier((cortex_vae.n_latent + 1), n_labels=cortex_dataset.n_labels)
    trainer_cortex_cls = ClassifierTrainer(
        cortex_cls, cortex_dataset, sampling_model=cortex_vae, sampling_zl=True
    )
    trainer_cortex_cls.train(n_epochs=2)
    trainer_cortex_cls.test_set.accuracy()
Exemple #23
0
def training_score_scvi(train, **kwargs):
    from scvi.dataset import GeneExpressionDataset
    from scvi.inference import UnsupervisedTrainer
    from scvi.models import VAE
    data = GeneExpressionDataset(
        *GeneExpressionDataset.get_attributes_from_matrix(train))
    vae = VAE(n_input=train.shape[1])
    m = UnsupervisedTrainer(vae, data, verbose=False)
    m.train(n_epochs=100)
    # Training permuted the data for minibatching. Unpermute before "imputing"
    # (estimating lambda)
    lam = np.vstack([
        m.train_set.sequential().imputation(),
        m.test_set.sequential().imputation()
    ])
    return st.poisson(mu=lam).logpmf(train).sum()
Exemple #24
0
def generalization_score_scvi(train, test, **kwargs):
    from scvi.dataset import GeneExpressionDataset
    from scvi.inference import UnsupervisedTrainer
    from scvi.models import VAE
    data = GeneExpressionDataset(
        *GeneExpressionDataset.get_attributes_from_matrix(train))
    vae = VAE(n_input=train.shape[1])
    m = UnsupervisedTrainer(vae, data, verbose=False)
    m.train(n_epochs=100)
    # Training permuted the data for minibatching. Unpermute before "imputing"
    # (estimating lambda)
    with torch.autograd.set_grad_enabled(False):
        lam = np.vstack([
            m.train_set.sequential().imputation(),
            m.test_set.sequential().imputation()
        ])
        return pois_llik(lam, train, test)
Exemple #25
0
def test_autozi(save_path):
    data = SyntheticDataset(n_batches=1)

    for disp_zi in ["gene", "gene-label"]:
        autozivae = AutoZIVAE(
            n_input=data.nb_genes,
            dispersion=disp_zi,
            zero_inflation=disp_zi,
            n_labels=data.n_labels,
        )
        trainer_autozivae = UnsupervisedTrainer(
            model=autozivae, gene_dataset=data, train_size=0.5
        )
        trainer_autozivae.train(n_epochs=2, lr=1e-2)
        trainer_autozivae.test_set.elbo()
        trainer_autozivae.test_set.reconstruction_error()
        trainer_autozivae.test_set.marginal_ll()
Exemple #26
0
def compute_scvi_latent(
    adata: sc.AnnData,
    n_latent: int = 5,
    n_epochs: int = 100,
    lr: float = 1e-3,
    use_batches: bool = False,
    use_cuda: bool = True,
) -> Tuple[scvi.inference.Posterior, np.ndarray]:
    """Train and return a scVI model and sample a latent space

    :param adata: sc.AnnData object non-normalized
    :param n_latent: dimension of the latent space
    :param n_epochs: number of training epochs
    :param lr: learning rate
    :param use_batches
    :param use_cuda
    :return: (scvi.Posterior, latent_space)
    """
    # Convert easily to scvi dataset
    scviDataset = AnnDataset(adata)

    # Train a model
    vae = VAE(
        scviDataset.nb_genes,
        n_batch=scviDataset.n_batches * use_batches,
        n_latent=n_latent,
    )
    trainer = UnsupervisedTrainer(vae,
                                  scviDataset,
                                  train_size=1.0,
                                  use_cuda=use_cuda)
    trainer.train(n_epochs=n_epochs, lr=lr)
    ####

    # Extract latent space
    posterior = trainer.create_posterior(trainer.model,
                                         scviDataset,
                                         indices=np.arange(
                                             len(scviDataset))).sequential()

    latent, _, _ = posterior.get_latent()

    return posterior, latent
Exemple #27
0
def test_full_cov():
    dataset = CortexDataset()
    mdl = VAE(n_input=dataset.nb_genes,
              n_batch=dataset.n_batches,
              reconstruction_loss='zinb',
              n_latent=2,
              full_cov=True)
    trainer = UnsupervisedTrainer(model=mdl,
                                  gene_dataset=dataset,
                                  use_cuda=True,
                                  train_size=0.7,
                                  frequency=1,
                                  early_stopping_kwargs={
                                      'early_stopping_metric': 'elbo',
                                      'save_best_state_metric': 'elbo',
                                      'patience': 15,
                                      'threshold': 3
                                  })
    trainer.train(n_epochs=20, lr=1e-3)
    assert not np.isnan(trainer.history['ll_test_set']).any()
Exemple #28
0
class Base_scVI(Benchmarkable):
    def __init__(self, data, name, n_latent=10):
        super().__init__(data, name)
        self.n_latent = n_latent
        self.USE_CUDA = False

    def train(self, n_epochs=20):
        self.train_seq(n_epochs)
        self.train_fish(n_epochs)
        starting_time = time.time()
        self.train_both(n_epochs)
        self.train_time = time.time() - starting_time

    def train_fish(self, n_epochs=20):
        dataset = self.data.data_fish
        vae = VAE(
            dataset.nb_genes,
            n_batch=dataset.n_batches,
            dispersion="gene-batch",
            n_latent=self.n_latent,
            reconstruction_loss="nb",
        )
        self.trainer_fish = UnsupervisedTrainer(vae,
                                                dataset,
                                                train_size=0.95,
                                                use_cuda=self.USE_CUDA)
        self.trainer_fish.train(n_epochs=n_epochs, lr=0.001)

    def train_seq(self, n_epochs=20, reconstruction_seq='nb'):
        dataset = self.data.data_seq
        vae = VAE(
            dataset.nb_genes,
            dispersion="gene",
            n_latent=self.n_latent,
            reconstruction_loss=reconstruction_seq,
        )
        self.trainer_seq = UnsupervisedTrainer(vae,
                                               dataset,
                                               train_size=0.95,
                                               use_cuda=self.USE_CUDA)
        self.trainer_seq.train(n_epochs=n_epochs, lr=0.001)
Exemple #29
0
def test_cortex(save_path):
    cortex_dataset = CortexDataset(save_path=save_path)
    vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches)
    trainer_cortex_vae = UnsupervisedTrainer(vae, cortex_dataset, train_size=0.5, use_cuda=use_cuda)
    trainer_cortex_vae.train(n_epochs=1)
    trainer_cortex_vae.train_set.ll()
    trainer_cortex_vae.train_set.differential_expression_stats()

    trainer_cortex_vae.corrupt_posteriors(corruption='binomial')
    trainer_cortex_vae.corrupt_posteriors()
    trainer_cortex_vae.train(n_epochs=1)
    trainer_cortex_vae.uncorrupt_posteriors()

    trainer_cortex_vae.train_set.imputation_benchmark(n_samples=1, show_plot=False,
                                                      title_plot='imputation', save_path=save_path)

    svaec = SCANVI(cortex_dataset.nb_genes, cortex_dataset.n_batches, cortex_dataset.n_labels)
    trainer_cortex_svaec = JointSemiSupervisedTrainer(svaec, cortex_dataset,
                                                      n_labelled_samples_per_class=3,
                                                      use_cuda=use_cuda)
    trainer_cortex_svaec.train(n_epochs=1)
    trainer_cortex_svaec.labelled_set.accuracy()
    trainer_cortex_svaec.full_dataset.ll()

    svaec = SCANVI(cortex_dataset.nb_genes, cortex_dataset.n_batches, cortex_dataset.n_labels)
    trainer_cortex_svaec = AlternateSemiSupervisedTrainer(svaec, cortex_dataset,
                                                          n_labelled_samples_per_class=3,
                                                          use_cuda=use_cuda)
    trainer_cortex_svaec.train(n_epochs=1, lr=1e-2)
    trainer_cortex_svaec.unlabelled_set.accuracy()
    data_train, labels_train = trainer_cortex_svaec.labelled_set.raw_data()
    data_test, labels_test = trainer_cortex_svaec.unlabelled_set.raw_data()
    compute_accuracy_svc(data_train, labels_train, data_test, labels_test,
                         param_grid=[{'C': [1], 'kernel': ['linear']}])
    compute_accuracy_rf(data_train, labels_train, data_test, labels_test,
                        param_grid=[{'max_depth': [3], 'n_estimators': [10]}])

    cls = Classifier(cortex_dataset.nb_genes, n_labels=cortex_dataset.n_labels)
    cls_trainer = ClassifierTrainer(cls, cortex_dataset)
    cls_trainer.train(n_epochs=1)
    cls_trainer.train_set.accuracy()
Exemple #30
0
def test_annealing_procedures(save_path):
    cortex_dataset = CortexDataset(save_path=save_path)
    cortex_vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches)

    trainer_cortex_vae = UnsupervisedTrainer(
        cortex_vae,
        cortex_dataset,
        train_size=0.5,
        use_cuda=use_cuda,
        n_epochs_kl_warmup=1,
    )
    trainer_cortex_vae.train(n_epochs=2)
    assert trainer_cortex_vae.kl_weight >= 0.99, "Annealing should be over"

    trainer_cortex_vae = UnsupervisedTrainer(
        cortex_vae,
        cortex_dataset,
        train_size=0.5,
        use_cuda=use_cuda,
        n_epochs_kl_warmup=5,
    )
    trainer_cortex_vae.train(n_epochs=2)
    assert trainer_cortex_vae.kl_weight <= 0.99, "Annealing should be proceeding"

    # iter
    trainer_cortex_vae = UnsupervisedTrainer(
        cortex_vae,
        cortex_dataset,
        train_size=0.5,
        use_cuda=use_cuda,
        n_iter_kl_warmup=1,
        n_epochs_kl_warmup=None,
    )
    trainer_cortex_vae.train(n_epochs=2)
    assert trainer_cortex_vae.kl_weight >= 0.99, "Annealing should be over"