Esempio n. 1
0
def unsupervised_training_one_epoch(dataset: GeneExpressionDataset):
    vae = VAE(dataset.nb_genes, dataset.n_batches, dataset.n_labels)
    trainer = UnsupervisedTrainer(vae,
                                  dataset,
                                  train_size=0.5,
                                  use_cuda=use_cuda)
    trainer.train(n_epochs=1)
def trainVAE(gene_dataset,
             filename,
             rep,
             nlayers=2,
             n_hidden=128,
             reconstruction_loss: str = 'zinb'):
    vae = VAE(gene_dataset.nb_genes,
              n_batch=gene_dataset.n_batches,
              n_labels=gene_dataset.n_labels,
              n_hidden=n_hidden,
              n_latent=10,
              n_layers=nlayers,
              dispersion='gene',
              reconstruction_loss=reconstruction_loss)
    trainer = UnsupervisedTrainer(vae, gene_dataset, train_size=1.0)
    filename = '../' + filename + '/' + 'vae' + '.' + reconstruction_loss + '.rep' + str(
        rep) + '.pkl'
    if os.path.isfile(filename):
        trainer.model.load_state_dict(torch.load(filename))
        trainer.model.eval()
    else:
        trainer.train(n_epochs=250)
        torch.save(trainer.model.state_dict(), filename)
    full = trainer.create_posterior(trainer.model,
                                    gene_dataset,
                                    indices=np.arange(len(gene_dataset)))
    return full
Esempio n. 3
0
def correct_scvi(Xs, genes):
    import torch
    use_cuda = True
    torch.cuda.set_device(1)

    from scvi.dataset.dataset import GeneExpressionDataset
    from scvi.inference import UnsupervisedTrainer
    from scvi.models import SCANVI, VAE
    from scvi.dataset.anndata import AnnDataset

    all_ann = [AnnDataset(AnnData(X, var=genes)) for X in Xs]

    all_dataset = GeneExpressionDataset.concat_datasets(*all_ann)

    vae = VAE(all_dataset.nb_genes,
              n_batch=all_dataset.n_batches,
              n_labels=all_dataset.n_labels,
              n_hidden=128,
              n_latent=30,
              n_layers=2,
              dispersion='gene')
    trainer = UnsupervisedTrainer(vae, all_dataset, train_size=0.99999)
    n_epochs = 100
    #trainer.train(n_epochs=n_epochs)
    #torch.save(trainer.model.state_dict(),
    #           'data/harmonization.vae.pkl')
    trainer.model.load_state_dict(torch.load('data/harmonization.vae.pkl'))
    trainer.model.eval()

    full = trainer.create_posterior(trainer.model,
                                    all_dataset,
                                    indices=np.arange(len(all_dataset)))
    latent, batch_indices, labels = full.sequential().get_latent()

    return latent
Esempio n. 4
0
def scVI_latent(csv_file,
                csv_path,
                vae_model=VAE,
                train_size=1.0,
                n_labels=0,
                seed=1234,
                n_cores=1,
                lr=1e-3,
                use_cuda=False):
    set_seed(seed)
    dat = CsvDataset(csv_file, save_path=csv_path, new_n_genes=None)
    # Based on recommendations in basic_tutorial.ipynb
    n_epochs = 400 if (len(dat) < 10000) else 200
    # trainer and model
    vae = vae_model(dat.nb_genes, n_labels=n_labels)
    trainer = UnsupervisedTrainer(
        vae,
        dat,
        train_size=train_size,  # default to 0.8, documentation recommends 1
        use_cuda=use_cuda)
    # limit cpu usage
    torch.set_num_threads(n_cores)
    trainer.train(n_epochs=n_epochs, lr=lr)
    full = trainer.create_posterior(trainer.model,
                                    dat,
                                    indices=np.arange(len(dat)))
    # Updating the "minibatch" size after training is useful in low memory configurations
    Z_hat = full.sequential().get_latent()[0]
    adata = anndata.AnnData(dat.X)
    for i, z in enumerate(Z_hat.T):
        adata.obs[f'Z_{i}'] = z
    # reordering for convenience and correspondance with PCA's ordering
    cellLoads = adata.obs.reindex(adata.obs.std().sort_values().index, axis=1)
    return (cellLoads)
Esempio n. 5
0
def benchmark(dataset, n_epochs=250, use_cuda=True):
    vae = VAE(dataset.nb_genes, n_batch=dataset.n_batches)
    trainer = UnsupervisedTrainer(vae, dataset, use_cuda=use_cuda)
    trainer.train(n_epochs=n_epochs)
    trainer.test_set.reconstruction_error()
    trainer.test_set.marginal_ll()
    return trainer
Esempio n. 6
0
    def train(self,
              adata,
              condition_key,
              cell_type_key,
              n_epochs=300,
              patience=30,
              lr_reducer=20):
        le = LabelEncoder()
        adata.obs['labels'] = le.fit_transform(adata.obs[cell_type_key].values)
        adata.obs['batch_indices'] = le.fit_transform(
            adata.obs[condition_key].values)

        net_adata = AnnDatasetFromAnnData(adata)

        early_stopping_kwargs = {
            "early_stopping_metric": "elbo",
            "save_best_state_metric": "elbo",
            "patience": patience,
            "threshold": 0,
            "reduce_lr_on_plateau": True,
            "lr_patience": lr_reducer,
            "lr_factor": 0.1,
        }

        self.trainer = UnsupervisedTrainer(
            self.model,
            net_adata,
            train_size=0.8,
            use_cuda=True,
            frequency=1,
            early_stopping_kwargs=early_stopping_kwargs,
        )

        self.trainer.train(n_epochs=n_epochs, lr=0.001)
Esempio n. 7
0
def test_gamma_de():
    cortex_dataset = CortexDataset()
    cortex_vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches)
    trainer_cortex_vae = UnsupervisedTrainer(cortex_vae,
                                             cortex_dataset,
                                             train_size=0.5,
                                             use_cuda=use_cuda)
    trainer_cortex_vae.train(n_epochs=2)

    full = trainer_cortex_vae.create_posterior(trainer_cortex_vae.model,
                                               cortex_dataset,
                                               indices=np.arange(
                                                   len(cortex_dataset)))

    n_samples = 10
    M_permutation = 100
    cell_idx1 = cortex_dataset.labels.ravel() == 0
    cell_idx2 = cortex_dataset.labels.ravel() == 1

    full.differential_expression_score(cell_idx1,
                                       cell_idx2,
                                       n_samples=n_samples,
                                       M_permutation=M_permutation)
    full.differential_expression_gamma(cell_idx1,
                                       cell_idx2,
                                       n_samples=n_samples,
                                       M_permutation=M_permutation)
Esempio n. 8
0
def scVI_ld(csv_file, csv_path, ndims, vae_model = VAE, n_labels = 0, n_cores=1, seed= 1234, lr = 1e-3, use_cuda = False): 
  set_seed(seed)
  dat = CsvDataset(csv_file, 
                   save_path=csv_path, 
                   new_n_genes=None) 
  # Based on recommendations in linear_decoder.ipynb
  n_epochs = 250
  # trainer and model 
  ldvae = LDVAE(
        dat.nb_genes,
        n_batch = dat.n_batches,
        n_latent = ndims, 
        n_labels = n_labels
        )
  trainerLD = UnsupervisedTrainer(ldvae, dat, use_cuda=use_cuda)
  # limit cpu usage
  torch.set_num_threads(n_cores) 
  trainerLD.train(n_epochs=n_epochs, lr=lr)
  # extract mean value for the ld
  full = trainerLD.create_posterior(trainerLD.model, dat, indices=np.arange(len(dat)))
  Z_hat = full.sequential().get_latent()[0]
  adata = anndata.AnnData(dat.X)
  for i, z in enumerate(Z_hat.T):
      adata.obs[f'Z_{i}'] = z
  # reordering for convenience and correspondance with PCA's ordering
  cellLoads = adata.obs.reindex(adata.obs.std().sort_values().index, axis = 1)
  return(cellLoads)
Esempio n. 9
0
def train_model(
    mdl_class,
    dataset,
    mdl_params: dict,
    train_params: dict,
    train_fn_params: dict,
    filename: str = None,
):
    """

    :param mdl_class: Class of algorithm
    :param dataset: Dataset
    :param mdl_params:
    :param train_params:
    :param train_fn_params:
    :param filename
    :return:
    """
    # if os.path.exists(filename):
    #     res = load_pickle(filename)
    #     return res["vae"], res["trainer"]

    if "test_indices" not in train_params:
        warnings.warn("No `test_indices` attribute found.")
    my_vae = mdl_class(n_input=dataset.nb_genes,
                       n_batch=dataset.n_batches,
                       **mdl_params)
    my_trainer = UnsupervisedTrainer(my_vae, dataset, **train_params)
    my_trainer.train(**train_fn_params)
    print(my_trainer.train_losses)
    return my_vae, my_trainer
Esempio n. 10
0
def benchmark(dataset, n_epochs=250, use_cuda=True):
    vae = VAE(dataset.nb_genes, n_batch=dataset.n_batches)
    trainer = UnsupervisedTrainer(vae, dataset, use_cuda=use_cuda)
    trainer.train(n_epochs=n_epochs)
    trainer.test_set.ll(verbose=True)
    trainer.test_set.marginal_ll(verbose=True)
    return trainer
Esempio n. 11
0
def scVI_norm(csv_file,
              csv_path,
              vae_model=VAE,
              train_size=1.0,
              n_labels=0,
              seed=1234,
              n_cores=1,
              lr=1e-3,
              use_cuda=False):
    set_seed(seed)
    dat = CsvDataset(csv_file, save_path=csv_path, new_n_genes=None)
    dat.subsample_genes(1000, mode="variance")
    # Based on recommendations in basic_tutorial.ipynb
    n_epochs = 400 if (len(dat) < 10000) else 200
    # trainer and model
    vae = vae_model(dat.nb_genes, n_labels=n_labels)
    trainer = UnsupervisedTrainer(
        vae,
        dat,
        train_size=train_size,  # default to 0.8, documentation recommends 1
        use_cuda=use_cuda)
    # limit cpu usage
    torch.set_num_threads(n_cores)
    trainer.train(n_epochs=n_epochs, lr=lr)
    full = trainer.create_posterior(trainer.model,
                                    dat,
                                    indices=np.arange(len(dat)))
    # Updating the "minibatch" size after training is useful in low memory configurations
    normalized_values = full.sequential().get_sample_scale()
    return [normalized_values, dat.gene_names]
Esempio n. 12
0
    def run(self):
        n_epochs = 100
        n_latent = 10
        n_hidden = 128
        n_layers = 2
        net_data = self.data.copy()
        net_data.X = self.data.layers['counts']
        del net_data.layers['counts']
        net_data.raw = None  # Ensure that the raw counts are not accidentally used

        # Define batch indices
        le = LabelEncoder()
        net_data.obs['batch_indices'] = le.fit_transform(
            net_data.obs[self.batch].values)
        net_data = AnnDatasetFromAnnData(net_data)
        vae = VAE(net_data.nb_genes,
                  reconstruction_loss='nb',
                  n_batch=net_data.n_batches,
                  n_layers=n_layers,
                  n_latent=n_latent,
                  n_hidden=n_hidden)
        trainer = UnsupervisedTrainer(vae,
                                      net_data,
                                      train_size=1,
                                      use_cuda=False)
        trainer.train(n_epochs=n_epochs, lr=1e-3)
        full = trainer.create_posterior(trainer.model,
                                        net_data,
                                        indices=np.arange(len(net_data)))
        latent, _, _ = full.sequential().get_latent()
        self.data.obsm['X_emb'] = latent
        self.dump_to_h5ad("scvi")
Esempio n. 13
0
    def test_special_dataset_size(self):
        gene_dataset = GeneExpressionDataset()
        x = np.random.randint(1, 100, (17 * 2, 10))
        y = np.random.randint(1, 100, (17 * 2, 10))
        gene_dataset.populate_from_data(x)
        protein_data = CellMeasurement(
            name="protein_expression",
            data=y,
            columns_attr_name="protein_names",
            columns=np.arange(10),
        )
        gene_dataset.initialize_cell_measurement(protein_data)

        # Test UnsupervisedTrainer
        vae = VAE(
            gene_dataset.nb_genes,
            n_batch=gene_dataset.n_batches,
            n_labels=gene_dataset.n_labels,
        )
        trainer = UnsupervisedTrainer(
            vae,
            gene_dataset,
            train_size=0.5,
            use_cuda=False,
            data_loader_kwargs={"batch_size": 8},
        )
        trainer.train(n_epochs=1)

        # Test JVATrainer
        jvae = JVAE(
            [gene_dataset.nb_genes, gene_dataset.nb_genes],
            gene_dataset.nb_genes,
            [slice(None)] * 2,
            ["zinb", "zinb"],
            [True, True],
            n_batch=1,
        )
        cls = Classifier(gene_dataset.nb_genes, n_labels=2, logits=True)
        trainer = JVAETrainer(
            jvae,
            cls,
            [gene_dataset, gene_dataset],
            train_size=0.5,
            use_cuda=False,
            data_loader_kwargs={"batch_size": 8},
        )
        trainer.train(n_epochs=1)

        totalvae = TOTALVI(gene_dataset.nb_genes,
                           len(gene_dataset.protein_names))
        trainer = TotalTrainer(
            totalvae,
            gene_dataset,
            train_size=0.5,
            use_cuda=False,
            data_loader_kwargs={"batch_size": 8},
            early_stopping_kwargs=None,
        )
        trainer.train(n_epochs=1)
Esempio n. 14
0
def base_benchmark(gene_dataset):
    vae = VAE(gene_dataset.nb_genes, gene_dataset.n_batches,
              gene_dataset.n_labels)
    trainer = UnsupervisedTrainer(vae,
                                  gene_dataset,
                                  train_size=0.5,
                                  use_cuda=use_cuda)
    trainer.train(n_epochs=1)
    return trainer
Esempio n. 15
0
def ldvae_benchmark(dataset, n_epochs, use_cuda=True):
    ldvae = LDVAE(dataset.nb_genes, n_batch=dataset.n_batches)
    trainer = UnsupervisedTrainer(ldvae, dataset, use_cuda=use_cuda)
    trainer.train(n_epochs=n_epochs)
    trainer.test_set.reconstruction_error()
    trainer.test_set.marginal_ll()

    ldvae.get_loadings()

    return trainer
Esempio n. 16
0
 def train_seq(self, n_epochs=20, reconstruction_seq='nb'):
     dataset = self.data.data_seq
     vae = VAE(
         dataset.nb_genes,
         dispersion="gene",
         n_latent=self.n_latent,
         reconstruction_loss=reconstruction_seq,
     )
     self.trainer_seq = UnsupervisedTrainer(vae,
                                            dataset,
                                            train_size=0.95,
                                            use_cuda=self.USE_CUDA)
     self.trainer_seq.train(n_epochs=n_epochs, lr=0.001)
Esempio n. 17
0
def test_iaf2(save_path):
    dataset = CortexDataset(save_path=save_path)
    vae = IALogNormalPoissonVAE(n_input=dataset.nb_genes,
                                n_batch=dataset.n_batches,
                                do_h=True).cuda()
    trainer = UnsupervisedTrainer(vae,
                                  dataset,
                                  train_size=0.5,
                                  ratio_loss=True)
    trainer.train(n_epochs=1000)
    print(trainer.train_losses)
    z, l = trainer.test_set.get_latents(n_samples=5, device='cpu')
    return
Esempio n. 18
0
def test_encoder_only():
    # torch.autograd.set_detect_anomaly(mode=True)
    dataset = LatentLogPoissonDataset(n_genes=5,
                                      n_latent=2,
                                      n_cells=300,
                                      n_comps=1)
    dataset = LatentLogPoissonDataset(n_genes=3,
                                      n_latent=2,
                                      n_cells=15,
                                      n_comps=2)
    dataset = LatentLogPoissonDataset(n_genes=5,
                                      n_latent=2,
                                      n_cells=150,
                                      n_comps=1,
                                      learn_prior_scale=True)

    # _, _, marginals = dataset.compute_posteriors(
    #     x_obs=torch.randint(0, 150, size=(1, 5), dtype=torch.float),
    #     mcmc_kwargs={"num_samples": 20, "warmup_steps": 20, "num_chains": 1}
    # )
    # stats = marginals.diagnostics()
    # print(stats)
    dataset.cuda()

    vae_mdl = LogNormalPoissonVAE(
        dataset.nb_genes,
        dataset.n_batches,
        autoregressive=False,
        full_cov=True,
        n_latent=2,
        gt_decoder=dataset.nn_model,
    )
    params = vae_mdl.encoder_params
    trainer = UnsupervisedTrainer(
        model=vae_mdl,
        gene_dataset=dataset,
        use_cuda=True,
        train_size=0.7,
        n_epochs_kl_warmup=1,
        ratio_loss=True,
    )
    trainer.train(
        n_epochs=2,
        lr=1e-3,
        params=params,
    )

    full = trainer.create_posterior(trainer.model,
                                    dataset,
                                    indices=np.arange(len(dataset)))
    lkl_estimate = vae_mdl.marginal_ll(full, n_samples_mc=50)
Esempio n. 19
0
def test_differential_expression(save_path):
    dataset = CortexDataset(save_path=save_path)
    n_cells = len(dataset)
    all_indices = np.arange(n_cells)
    vae = VAE(dataset.nb_genes, dataset.n_batches)
    trainer = UnsupervisedTrainer(vae,
                                  dataset,
                                  train_size=0.5,
                                  use_cuda=use_cuda)
    trainer.train(n_epochs=2)
    post = trainer.create_posterior(vae,
                                    dataset,
                                    shuffle=False,
                                    indices=all_indices)

    # Sample scale example
    px_scales = post.scale_sampler(n_samples_per_cell=4,
                                   n_samples=None,
                                   selection=all_indices)["scale"]
    assert (px_scales.shape[1] == dataset.nb_genes
            ), "posterior scales should have shape (n_samples, n_genes)"

    # Differential expression different models
    idx_1 = [1, 2, 3]
    idx_2 = [4, 5, 6, 7]
    de_dataframe = post.differential_expression_score(
        idx1=idx_1,
        idx2=idx_2,
        n_samples=10,
        mode="vanilla",
        use_permutation=True,
        M_permutation=100,
    )

    de_dataframe = post.differential_expression_score(
        idx1=idx_1,
        idx2=idx_2,
        n_samples=10,
        mode="change",
        use_permutation=True,
        M_permutation=100,
    )
    print(de_dataframe.keys())
    assert (de_dataframe["confidence_interval_0.5_min"] <=
            de_dataframe["confidence_interval_0.5_max"]).all()
    assert (de_dataframe["confidence_interval_0.95_min"] <=
            de_dataframe["confidence_interval_0.95_max"]).all()

    # DE estimation example
    de_probabilities = de_dataframe.loc[:, "proba_de"]
    assert ((0.0 <= de_probabilities) & (de_probabilities <= 1.0)).all()
Esempio n. 20
0
def trainVAE(gene_dataset, rmCellTypes,rep):
    vae = VAE(gene_dataset.nb_genes, n_batch=gene_dataset.n_batches, n_labels=gene_dataset.n_labels,
              n_hidden=128, n_latent=10, n_layers=2, dispersion='gene')
    trainer = UnsupervisedTrainer(vae, gene_dataset, train_size=1.0)
    if os.path.isfile('../NoOverlap/vae.%s%s.pkl' % (rmCellTypes,rep)):
        trainer.model.load_state_dict(torch.load('../NoOverlap/vae.%s%s.pkl' % (rmCellTypes,rep)))
        trainer.model.eval()
    else:
        trainer.train(n_epochs=150)
        torch.save(trainer.model.state_dict(), '../NoOverlap/vae.%s%s.pkl' % (rmCellTypes,rep))
    full = trainer.create_posterior(trainer.model, gene_dataset, indices=np.arange(len(gene_dataset)))
    latent, batch_indices, labels = full.sequential().get_latent()
    batch_indices = batch_indices.ravel()
    return latent, batch_indices,labels,trainer
Esempio n. 21
0
 def full_init(self):
     self.model = self.model_type(
         n_input=self.dataset.nb_genes,
         n_batch=self.dataset.n_batches,
         reconstruction_loss=self.reconstruction_loss,
         n_latent=self.n_latent,
         full_cov=self.full_cov)
     self.trainer = UnsupervisedTrainer(model=self.model,
                                        gene_dataset=self.dataset,
                                        use_cuda=True,
                                        train_size=0.7,
                                        kl=1,
                                        frequency=1)
     self.is_fully_init = True
Esempio n. 22
0
def test_iwae(save_path):
    import time
    dataset = CortexDataset(save_path=save_path)
    torch.manual_seed(42)

    vae = VAE(n_input=dataset.nb_genes, n_batch=dataset.n_batches).cuda()
    start = time.time()
    trainer = UnsupervisedTrainer(vae,
                                  gene_dataset=dataset,
                                  ratio_loss=True,
                                  k_importance_weighted=5,
                                  single_backward=True)
    trainer.train(n_epochs=10)
    stop1 = time.time() - start

    vae = VAE(n_input=dataset.nb_genes, n_batch=dataset.n_batches).cuda()
    start = time.time()
    trainer = UnsupervisedTrainer(vae,
                                  gene_dataset=dataset,
                                  ratio_loss=True,
                                  k_importance_weighted=5,
                                  single_backward=False)
    trainer.train(n_epochs=10)
    stop2 = time.time() - start

    print('Time single backward : ', stop1)
    print('Time all elements : ', stop2)
Esempio n. 23
0
 def train_fish(self, n_epochs=20):
     dataset = self.data.data_fish
     vae = VAE(
         dataset.nb_genes,
         n_batch=dataset.n_batches,
         dispersion="gene-batch",
         n_latent=self.n_latent,
         reconstruction_loss="nb",
     )
     self.trainer_fish = UnsupervisedTrainer(vae,
                                             dataset,
                                             train_size=0.95,
                                             use_cuda=self.USE_CUDA)
     self.trainer_fish.train(n_epochs=n_epochs, lr=0.001)
Esempio n. 24
0
def test_multibatches_features():
    data = [
        np.random.randint(1, 5, size=(20, 10)),
        np.random.randint(1, 10, size=(20, 10)),
        np.random.randint(1, 10, size=(20, 10)),
        np.random.randint(1, 10, size=(30, 10)),
    ]
    dataset = GeneExpressionDataset()
    dataset.populate_from_per_batch_list(data)
    vae = VAE(dataset.nb_genes, dataset.n_batches)
    trainer = UnsupervisedTrainer(vae, dataset, train_size=0.5, use_cuda=use_cuda)
    trainer.train(n_epochs=2)
    trainer.test_set.imputation(n_samples=2, transform_batch=0)
    trainer.train_set.imputation(n_samples=2, transform_batch=[0, 1, 2])
Esempio n. 25
0
def test_sampling_zl(save_path):
    cortex_dataset = CortexDataset(save_path=save_path)
    cortex_vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches)
    trainer_cortex_vae = UnsupervisedTrainer(
        cortex_vae, cortex_dataset, train_size=0.5, use_cuda=use_cuda
    )
    trainer_cortex_vae.train(n_epochs=2)

    cortex_cls = Classifier((cortex_vae.n_latent + 1), n_labels=cortex_dataset.n_labels)
    trainer_cortex_cls = ClassifierTrainer(
        cortex_cls, cortex_dataset, sampling_model=cortex_vae, sampling_zl=True
    )
    trainer_cortex_cls.train(n_epochs=2)
    trainer_cortex_cls.test_set.accuracy()
Esempio n. 26
0
 def get_trainer(self, vae, train_size):
     batch_size = 128
     while self.gene_dataset.nb_cells % batch_size == 1:
         batch_size += 1  # adjust batch size such that no batch has only one cell
     trainer = UnsupervisedTrainer(
         vae,
         self.gene_dataset,
         train_size=train_size,
         use_cuda=self.use_cuda,
         frequency=1,
         data_loader_kwargs={'batch_size': batch_size})
     if self.train_size == 1.0:
         trainer._posteriors['test_set'].to_monitor = []
         trainer.metrics_to_monitor = {}
     return trainer
Esempio n. 27
0
def training_score_scvi(train, **kwargs):
    from scvi.dataset import GeneExpressionDataset
    from scvi.inference import UnsupervisedTrainer
    from scvi.models import VAE
    data = GeneExpressionDataset(
        *GeneExpressionDataset.get_attributes_from_matrix(train))
    vae = VAE(n_input=train.shape[1])
    m = UnsupervisedTrainer(vae, data, verbose=False)
    m.train(n_epochs=100)
    # Training permuted the data for minibatching. Unpermute before "imputing"
    # (estimating lambda)
    lam = np.vstack([
        m.train_set.sequential().imputation(),
        m.test_set.sequential().imputation()
    ])
    return st.poisson(mu=lam).logpmf(train).sum()
Esempio n. 28
0
 def train_both(self, n_epochs=20):
     vae_both = VAE(
         self.full_dataset.nb_genes,
         n_latent=self.n_latent,
         n_batch=self.full_dataset.n_batches,
         dispersion="gene-batch",
         reconstruction_loss=self.reconstruction_seq,
     )
     self.trainer_both = UnsupervisedTrainer(
         vae_both,
         self.full_dataset,
         train_size=0.95,
         use_cuda=self.USE_CUDA,
         frequency=1,
     )
     self.trainer_both.train(n_epochs=n_epochs, lr=0.001)
Esempio n. 29
0
def generalization_score_scvi(train, test, **kwargs):
    from scvi.dataset import GeneExpressionDataset
    from scvi.inference import UnsupervisedTrainer
    from scvi.models import VAE
    data = GeneExpressionDataset(
        *GeneExpressionDataset.get_attributes_from_matrix(train))
    vae = VAE(n_input=train.shape[1])
    m = UnsupervisedTrainer(vae, data, verbose=False)
    m.train(n_epochs=100)
    # Training permuted the data for minibatching. Unpermute before "imputing"
    # (estimating lambda)
    with torch.autograd.set_grad_enabled(False):
        lam = np.vstack([
            m.train_set.sequential().imputation(),
            m.test_set.sequential().imputation()
        ])
        return pois_llik(lam, train, test)
Esempio n. 30
0
def test_autozi(save_path):
    data = SyntheticDataset(n_batches=1)

    for disp_zi in ["gene", "gene-label"]:
        autozivae = AutoZIVAE(
            n_input=data.nb_genes,
            dispersion=disp_zi,
            zero_inflation=disp_zi,
            n_labels=data.n_labels,
        )
        trainer_autozivae = UnsupervisedTrainer(
            model=autozivae, gene_dataset=data, train_size=0.5
        )
        trainer_autozivae.train(n_epochs=2, lr=1e-2)
        trainer_autozivae.test_set.elbo()
        trainer_autozivae.test_set.reconstruction_error()
        trainer_autozivae.test_set.marginal_ll()