Example #1
0
def cortex_benchmark(n_epochs=250,
                     use_cuda=True,
                     save_path='data/',
                     show_plot=True):
    cortex_dataset = CortexDataset(save_path=save_path)
    vae = VAE(cortex_dataset.nb_genes)
    trainer_cortex_vae = UnsupervisedTrainer(vae,
                                             cortex_dataset,
                                             use_cuda=use_cuda)
    trainer_cortex_vae.train(n_epochs=n_epochs)
    trainer_cortex_vae.train_set.differential_expression_score(
        'oligodendrocytes', 'pyramidal CA1', genes=["THY1", "MBP"])

    trainer_cortex_vae.test_set.ll()  # assert ~ 1200
    vae = VAE(cortex_dataset.nb_genes)
    trainer_cortex_vae = UnsupervisedTrainer(vae,
                                             cortex_dataset,
                                             use_cuda=use_cuda)
    trainer_cortex_vae.corrupt_posteriors()
    trainer_cortex_vae.train(n_epochs=n_epochs)
    trainer_cortex_vae.uncorrupt_posteriors()
    trainer_cortex_vae.train_set.imputation_benchmark(verbose=(n_epochs > 1),
                                                      save_path=save_path,
                                                      show_plot=show_plot)

    n_samples = 10 if n_epochs == 1 else None  # n_epochs == 1 is unit tests
    trainer_cortex_vae.train_set.show_t_sne(n_samples=n_samples)
    return trainer_cortex_vae
Example #2
0
def cortex_benchmark(n_epochs=250,
                     use_cuda=True,
                     save_path="data/",
                     show_plot=True):
    cortex_dataset = CortexDataset(save_path=save_path)
    vae = VAE(cortex_dataset.nb_genes)
    trainer_cortex_vae = UnsupervisedTrainer(vae,
                                             cortex_dataset,
                                             use_cuda=use_cuda)
    trainer_cortex_vae.train(n_epochs=n_epochs)
    couple_celltypes = (4, 5)  # the couple types on which to study DE
    cell_idx1 = cortex_dataset.labels.ravel() == couple_celltypes[0]
    cell_idx2 = cortex_dataset.labels.ravel() == couple_celltypes[1]
    trainer_cortex_vae.train_set.differential_expression_score(
        cell_idx1, cell_idx2, genes=["THY1", "MBP"])

    trainer_cortex_vae.test_set.reconstruction_error()  # assert ~ 1200
    vae = VAE(cortex_dataset.nb_genes)
    trainer_cortex_vae = UnsupervisedTrainer(vae,
                                             cortex_dataset,
                                             use_cuda=use_cuda)
    trainer_cortex_vae.corrupt_posteriors()
    trainer_cortex_vae.train(n_epochs=n_epochs)
    trainer_cortex_vae.uncorrupt_posteriors()
    trainer_cortex_vae.train_set.imputation_benchmark(save_path=save_path,
                                                      show_plot=show_plot)

    n_samples = 10 if n_epochs == 1 else None  # n_epochs == 1 is unit tests
    trainer_cortex_vae.train_set.show_t_sne(n_samples=n_samples)
    return trainer_cortex_vae
Example #3
0
def test_iwae(save_path):
    import time
    dataset = CortexDataset(save_path=save_path)
    torch.manual_seed(42)

    vae = VAE(n_input=dataset.nb_genes, n_batch=dataset.n_batches).cuda()
    start = time.time()
    trainer = UnsupervisedTrainer(vae,
                                  gene_dataset=dataset,
                                  ratio_loss=True,
                                  k_importance_weighted=5,
                                  single_backward=True)
    trainer.train(n_epochs=10)
    stop1 = time.time() - start

    vae = VAE(n_input=dataset.nb_genes, n_batch=dataset.n_batches).cuda()
    start = time.time()
    trainer = UnsupervisedTrainer(vae,
                                  gene_dataset=dataset,
                                  ratio_loss=True,
                                  k_importance_weighted=5,
                                  single_backward=False)
    trainer.train(n_epochs=10)
    stop2 = time.time() - start

    print('Time single backward : ', stop1)
    print('Time all elements : ', stop2)
Example #4
0
def benchmark(dataset, n_epochs=250, use_cuda=True):
    vae = VAE(dataset.nb_genes, n_batch=dataset.n_batches)
    trainer = UnsupervisedTrainer(vae, dataset, use_cuda=use_cuda)
    trainer.train(n_epochs=n_epochs)
    trainer.test_set.reconstruction_error()
    trainer.test_set.marginal_ll()
    return trainer
Example #5
0
def unsupervised_training_one_epoch(dataset: GeneExpressionDataset):
    vae = VAE(dataset.nb_genes, dataset.n_batches, dataset.n_labels)
    trainer = UnsupervisedTrainer(vae,
                                  dataset,
                                  train_size=0.5,
                                  use_cuda=use_cuda)
    trainer.train(n_epochs=1)
Example #6
0
def benchmark(dataset, n_epochs=250, use_cuda=True):
    vae = VAE(dataset.nb_genes, n_batch=dataset.n_batches)
    trainer = UnsupervisedTrainer(vae, dataset, use_cuda=use_cuda)
    trainer.train(n_epochs=n_epochs)
    trainer.test_set.ll(verbose=True)
    trainer.test_set.marginal_ll(verbose=True)
    return trainer
Example #7
0
def test_gamma_de():
    cortex_dataset = CortexDataset()
    cortex_vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches)
    trainer_cortex_vae = UnsupervisedTrainer(cortex_vae,
                                             cortex_dataset,
                                             train_size=0.5,
                                             use_cuda=use_cuda)
    trainer_cortex_vae.train(n_epochs=2)

    full = trainer_cortex_vae.create_posterior(trainer_cortex_vae.model,
                                               cortex_dataset,
                                               indices=np.arange(
                                                   len(cortex_dataset)))

    n_samples = 10
    M_permutation = 100
    cell_idx1 = cortex_dataset.labels.ravel() == 0
    cell_idx2 = cortex_dataset.labels.ravel() == 1

    full.differential_expression_score(cell_idx1,
                                       cell_idx2,
                                       n_samples=n_samples,
                                       M_permutation=M_permutation)
    full.differential_expression_gamma(cell_idx1,
                                       cell_idx2,
                                       n_samples=n_samples,
                                       M_permutation=M_permutation)
Example #8
0
    def run(self):
        n_epochs = 100
        n_latent = 10
        n_hidden = 128
        n_layers = 2
        net_data = self.data.copy()
        net_data.X = self.data.layers['counts']
        del net_data.layers['counts']
        net_data.raw = None  # Ensure that the raw counts are not accidentally used

        # Define batch indices
        le = LabelEncoder()
        net_data.obs['batch_indices'] = le.fit_transform(
            net_data.obs[self.batch].values)
        net_data = AnnDatasetFromAnnData(net_data)
        vae = VAE(net_data.nb_genes,
                  reconstruction_loss='nb',
                  n_batch=net_data.n_batches,
                  n_layers=n_layers,
                  n_latent=n_latent,
                  n_hidden=n_hidden)
        trainer = UnsupervisedTrainer(vae,
                                      net_data,
                                      train_size=1,
                                      use_cuda=False)
        trainer.train(n_epochs=n_epochs, lr=1e-3)
        full = trainer.create_posterior(trainer.model,
                                        net_data,
                                        indices=np.arange(len(net_data)))
        latent, _, _ = full.sequential().get_latent()
        self.data.obsm['X_emb'] = latent
        self.dump_to_h5ad("scvi")
Example #9
0
def test_annealing_procedures(save_path):
    cortex_dataset = CortexDataset(save_path=save_path)
    cortex_vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches)

    trainer_cortex_vae = UnsupervisedTrainer(
        cortex_vae,
        cortex_dataset,
        train_size=0.5,
        use_cuda=use_cuda,
        n_epochs_kl_warmup=1,
    )
    trainer_cortex_vae.train(n_epochs=2)
    assert trainer_cortex_vae.kl_weight >= 0.99, "Annealing should be over"

    trainer_cortex_vae = UnsupervisedTrainer(
        cortex_vae,
        cortex_dataset,
        train_size=0.5,
        use_cuda=use_cuda,
        n_epochs_kl_warmup=5,
    )
    trainer_cortex_vae.train(n_epochs=2)
    assert trainer_cortex_vae.kl_weight <= 0.99, "Annealing should be proceeding"

    # iter
    trainer_cortex_vae = UnsupervisedTrainer(
        cortex_vae,
        cortex_dataset,
        train_size=0.5,
        use_cuda=use_cuda,
        n_iter_kl_warmup=1,
        n_epochs_kl_warmup=None,
    )
    trainer_cortex_vae.train(n_epochs=2)
    assert trainer_cortex_vae.kl_weight >= 0.99, "Annealing should be over"
Example #10
0
def correct_scvi(Xs, genes):
    import torch
    use_cuda = True
    torch.cuda.set_device(1)

    from scvi.dataset.dataset import GeneExpressionDataset
    from scvi.inference import UnsupervisedTrainer
    from scvi.models import SCANVI, VAE
    from scvi.dataset.anndata import AnnDataset

    all_ann = [AnnDataset(AnnData(X, var=genes)) for X in Xs]

    all_dataset = GeneExpressionDataset.concat_datasets(*all_ann)

    vae = VAE(all_dataset.nb_genes,
              n_batch=all_dataset.n_batches,
              n_labels=all_dataset.n_labels,
              n_hidden=128,
              n_latent=30,
              n_layers=2,
              dispersion='gene')
    trainer = UnsupervisedTrainer(vae, all_dataset, train_size=0.99999)
    n_epochs = 100
    #trainer.train(n_epochs=n_epochs)
    #torch.save(trainer.model.state_dict(),
    #           'data/harmonization.vae.pkl')
    trainer.model.load_state_dict(torch.load('data/harmonization.vae.pkl'))
    trainer.model.eval()

    full = trainer.create_posterior(trainer.model,
                                    all_dataset,
                                    indices=np.arange(len(all_dataset)))
    latent, batch_indices, labels = full.sequential().get_latent()

    return latent
Example #11
0
def benchmark(dataset, n_epochs=250, use_cuda=True):
    vae = VAE(dataset.nb_genes, n_batch=dataset.n_batches)
    infer = VariationalInference(vae, dataset, use_cuda=use_cuda)
    infer.train(n_epochs=n_epochs)
    infer.ll('test')
    infer.imputation('test', rate=0.1)  # assert ~ 2.1
    return infer
Example #12
0
    def test_special_dataset_size(self):
        gene_dataset = GeneExpressionDataset()
        x = np.random.randint(1, 100, (17 * 2, 10))
        y = np.random.randint(1, 100, (17 * 2, 10))
        gene_dataset.populate_from_data(x)
        protein_data = CellMeasurement(
            name="protein_expression",
            data=y,
            columns_attr_name="protein_names",
            columns=np.arange(10),
        )
        gene_dataset.initialize_cell_measurement(protein_data)

        # Test UnsupervisedTrainer
        vae = VAE(
            gene_dataset.nb_genes,
            n_batch=gene_dataset.n_batches,
            n_labels=gene_dataset.n_labels,
        )
        trainer = UnsupervisedTrainer(
            vae,
            gene_dataset,
            train_size=0.5,
            use_cuda=False,
            data_loader_kwargs={"batch_size": 8},
        )
        trainer.train(n_epochs=1)

        # Test JVATrainer
        jvae = JVAE(
            [gene_dataset.nb_genes, gene_dataset.nb_genes],
            gene_dataset.nb_genes,
            [slice(None)] * 2,
            ["zinb", "zinb"],
            [True, True],
            n_batch=1,
        )
        cls = Classifier(gene_dataset.nb_genes, n_labels=2, logits=True)
        trainer = JVAETrainer(
            jvae,
            cls,
            [gene_dataset, gene_dataset],
            train_size=0.5,
            use_cuda=False,
            data_loader_kwargs={"batch_size": 8},
        )
        trainer.train(n_epochs=1)

        totalvae = TOTALVI(gene_dataset.nb_genes,
                           len(gene_dataset.protein_names))
        trainer = TotalTrainer(
            totalvae,
            gene_dataset,
            train_size=0.5,
            use_cuda=False,
            data_loader_kwargs={"batch_size": 8},
            early_stopping_kwargs=None,
        )
        trainer.train(n_epochs=1)
Example #13
0
 def get_vae(self):
     n_batch = self.gene_dataset.n_batches * self.use_batches
     if self.use_batches:
         vae = VAE(self.gene_dataset.nb_genes,
                   n_batch=n_batch,
                   dispersion='gene-batch',
                   n_layers=2,
                   n_hidden=128,
                   n_latent=self.n_latent)
     else:
         vae = VAE(self.gene_dataset.nb_genes,
                   n_batch=n_batch,
                   dispersion='gene',
                   n_layers=2,
                   n_hidden=128,
                   n_latent=self.n_latent)
     return vae
Example #14
0
def base_benchmark(gene_dataset):
    vae = VAE(gene_dataset.nb_genes, gene_dataset.n_batches,
              gene_dataset.n_labels)
    trainer = UnsupervisedTrainer(vae,
                                  gene_dataset,
                                  train_size=0.5,
                                  use_cuda=use_cuda)
    trainer.train(n_epochs=1)
    return trainer
Example #15
0
def base_benchmark(gene_dataset):
    vae = VAE(gene_dataset.nb_genes, gene_dataset.n_batches,
              gene_dataset.n_labels)
    infer = VariationalInference(vae,
                                 gene_dataset,
                                 train_size=0.5,
                                 use_cuda=use_cuda)
    infer.train(n_epochs=1)
    return infer
Example #16
0
def cortex_benchmark(n_epochs=250, use_cuda=True, unit_test=False):
    cortex_dataset = CortexDataset()
    vae = VAE(cortex_dataset.nb_genes)
    infer_cortex_vae = VariationalInference(vae, cortex_dataset, use_cuda=use_cuda)
    infer_cortex_vae.train(n_epochs=n_epochs)

    infer_cortex_vae.ll('test')  # assert ~ 1200
    infer_cortex_vae.differential_expression('test')
    infer_cortex_vae.imputation('test', rate=0.1)  # assert ~ 2.3
    n_samples = 1000 if not unit_test else 10
    infer_cortex_vae.show_t_sne('test', n_samples=n_samples)
    return infer_cortex_vae
Example #17
0
 def train_seq(self, n_epochs=20, reconstruction_seq='nb'):
     dataset = self.data.data_seq
     vae = VAE(
         dataset.nb_genes,
         dispersion="gene",
         n_latent=self.n_latent,
         reconstruction_loss=reconstruction_seq,
     )
     self.trainer_seq = UnsupervisedTrainer(vae,
                                            dataset,
                                            train_size=0.95,
                                            use_cuda=self.USE_CUDA)
     self.trainer_seq.train(n_epochs=n_epochs, lr=0.001)
Example #18
0
def test_differential_expression(save_path):
    dataset = CortexDataset(save_path=save_path)
    n_cells = len(dataset)
    all_indices = np.arange(n_cells)
    vae = VAE(dataset.nb_genes, dataset.n_batches)
    trainer = UnsupervisedTrainer(vae,
                                  dataset,
                                  train_size=0.5,
                                  use_cuda=use_cuda)
    trainer.train(n_epochs=2)
    post = trainer.create_posterior(vae,
                                    dataset,
                                    shuffle=False,
                                    indices=all_indices)

    # Sample scale example
    px_scales = post.scale_sampler(n_samples_per_cell=4,
                                   n_samples=None,
                                   selection=all_indices)["scale"]
    assert (px_scales.shape[1] == dataset.nb_genes
            ), "posterior scales should have shape (n_samples, n_genes)"

    # Differential expression different models
    idx_1 = [1, 2, 3]
    idx_2 = [4, 5, 6, 7]
    de_dataframe = post.differential_expression_score(
        idx1=idx_1,
        idx2=idx_2,
        n_samples=10,
        mode="vanilla",
        use_permutation=True,
        M_permutation=100,
    )

    de_dataframe = post.differential_expression_score(
        idx1=idx_1,
        idx2=idx_2,
        n_samples=10,
        mode="change",
        use_permutation=True,
        M_permutation=100,
    )
    print(de_dataframe.keys())
    assert (de_dataframe["confidence_interval_0.5_min"] <=
            de_dataframe["confidence_interval_0.5_max"]).all()
    assert (de_dataframe["confidence_interval_0.95_min"] <=
            de_dataframe["confidence_interval_0.95_max"]).all()

    # DE estimation example
    de_probabilities = de_dataframe.loc[:, "proba_de"]
    assert ((0.0 <= de_probabilities) & (de_probabilities <= 1.0)).all()
Example #19
0
 def train_fish(self, n_epochs=20):
     dataset = self.data.data_fish
     vae = VAE(
         dataset.nb_genes,
         n_batch=dataset.n_batches,
         dispersion="gene-batch",
         n_latent=self.n_latent,
         reconstruction_loss="nb",
     )
     self.trainer_fish = UnsupervisedTrainer(vae,
                                             dataset,
                                             train_size=0.95,
                                             use_cuda=self.USE_CUDA)
     self.trainer_fish.train(n_epochs=n_epochs, lr=0.001)
Example #20
0
def test_multibatches_features():
    data = [
        np.random.randint(1, 5, size=(20, 10)),
        np.random.randint(1, 10, size=(20, 10)),
        np.random.randint(1, 10, size=(20, 10)),
        np.random.randint(1, 10, size=(30, 10)),
    ]
    dataset = GeneExpressionDataset()
    dataset.populate_from_per_batch_list(data)
    vae = VAE(dataset.nb_genes, dataset.n_batches)
    trainer = UnsupervisedTrainer(vae, dataset, train_size=0.5, use_cuda=use_cuda)
    trainer.train(n_epochs=2)
    trainer.test_set.imputation(n_samples=2, transform_batch=0)
    trainer.train_set.imputation(n_samples=2, transform_batch=[0, 1, 2])
Example #21
0
def test_sampling_zl(save_path):
    cortex_dataset = CortexDataset(save_path=save_path)
    cortex_vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches)
    trainer_cortex_vae = UnsupervisedTrainer(
        cortex_vae, cortex_dataset, train_size=0.5, use_cuda=use_cuda
    )
    trainer_cortex_vae.train(n_epochs=2)

    cortex_cls = Classifier((cortex_vae.n_latent + 1), n_labels=cortex_dataset.n_labels)
    trainer_cortex_cls = ClassifierTrainer(
        cortex_cls, cortex_dataset, sampling_model=cortex_vae, sampling_zl=True
    )
    trainer_cortex_cls.train(n_epochs=2)
    trainer_cortex_cls.test_set.accuracy()
Example #22
0
def training_score_scvi(train, **kwargs):
    from scvi.dataset import GeneExpressionDataset
    from scvi.inference import UnsupervisedTrainer
    from scvi.models import VAE
    data = GeneExpressionDataset(
        *GeneExpressionDataset.get_attributes_from_matrix(train))
    vae = VAE(n_input=train.shape[1])
    m = UnsupervisedTrainer(vae, data, verbose=False)
    m.train(n_epochs=100)
    # Training permuted the data for minibatching. Unpermute before "imputing"
    # (estimating lambda)
    lam = np.vstack([
        m.train_set.sequential().imputation(),
        m.test_set.sequential().imputation()
    ])
    return st.poisson(mu=lam).logpmf(train).sum()
Example #23
0
 def train_both(self, n_epochs=20):
     vae_both = VAE(
         self.full_dataset.nb_genes,
         n_latent=self.n_latent,
         n_batch=self.full_dataset.n_batches,
         dispersion="gene-batch",
         reconstruction_loss=self.reconstruction_seq,
     )
     self.trainer_both = UnsupervisedTrainer(
         vae_both,
         self.full_dataset,
         train_size=0.95,
         use_cuda=self.USE_CUDA,
         frequency=1,
     )
     self.trainer_both.train(n_epochs=n_epochs, lr=0.001)
Example #24
0
def generalization_score_scvi(train, test, **kwargs):
    from scvi.dataset import GeneExpressionDataset
    from scvi.inference import UnsupervisedTrainer
    from scvi.models import VAE
    data = GeneExpressionDataset(
        *GeneExpressionDataset.get_attributes_from_matrix(train))
    vae = VAE(n_input=train.shape[1])
    m = UnsupervisedTrainer(vae, data, verbose=False)
    m.train(n_epochs=100)
    # Training permuted the data for minibatching. Unpermute before "imputing"
    # (estimating lambda)
    with torch.autograd.set_grad_enabled(False):
        lam = np.vstack([
            m.train_set.sequential().imputation(),
            m.test_set.sequential().imputation()
        ])
        return pois_llik(lam, train, test)
Example #25
0
def test_cortex():
    cortex_dataset = CortexDataset()
    vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches)
    infer_cortex_vae = VariationalInference(vae,
                                            cortex_dataset,
                                            train_size=0.1,
                                            use_cuda=use_cuda)
    infer_cortex_vae.train(n_epochs=1)
    infer_cortex_vae.ll('train')
    infer_cortex_vae.differential_expression_stats('train')
    infer_cortex_vae.differential_expression('test')
    infer_cortex_vae.imputation('train', corruption='uniform')
    infer_cortex_vae.imputation('test', n_samples=2, corruption='binomial')

    svaec = SVAEC(cortex_dataset.nb_genes, cortex_dataset.n_batches,
                  cortex_dataset.n_labels)
    infer_cortex_svaec = JointSemiSupervisedVariationalInference(
        svaec,
        cortex_dataset,
        n_labelled_samples_per_class=50,
        use_cuda=use_cuda)
    infer_cortex_svaec.train(n_epochs=1)
    infer_cortex_svaec.accuracy('labelled')
    infer_cortex_svaec.ll('all')

    svaec = SVAEC(cortex_dataset.nb_genes,
                  cortex_dataset.n_batches,
                  cortex_dataset.n_labels,
                  logreg_classifier=True)
    infer_cortex_svaec = AlternateSemiSupervisedVariationalInference(
        svaec,
        cortex_dataset,
        n_labelled_samples_per_class=50,
        use_cuda=use_cuda)
    infer_cortex_svaec.train(n_epochs=1, lr=1e-2)
    infer_cortex_svaec.accuracy('unlabelled')
    infer_cortex_svaec.svc_rf(unit_test=True)

    cls = Classifier(cortex_dataset.nb_genes, n_labels=cortex_dataset.n_labels)
    infer_cls = ClassifierInference(cls, cortex_dataset)
    infer_cls.train(n_epochs=1)
    infer_cls.accuracy('train')
Example #26
0
def correct_scvi(Xs, genes):
    import torch
    torch.manual_seed(0)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    from scvi.dataset import AnnDatasetFromAnnData
    from scvi.dataset.dataset import GeneExpressionDataset
    from scvi.inference import UnsupervisedTrainer
    from scvi.models import VAE

    all_ann = [AnnDatasetFromAnnData(AnnData(X, var=genes)) for X in Xs]

    all_dataset = GeneExpressionDataset()
    all_dataset.populate_from_datasets(all_ann)

    vae = VAE(all_dataset.nb_genes,
              n_batch=all_dataset.n_batches,
              n_labels=all_dataset.n_labels,
              n_hidden=128,
              n_latent=30,
              n_layers=2,
              dispersion='gene')
    trainer = UnsupervisedTrainer(
        vae,
        all_dataset,
        train_size=1.,
        use_cuda=True,
    )
    n_epochs = 100
    #trainer.train(n_epochs=n_epochs)
    #torch.save(trainer.model.state_dict(),
    #           'data/harmonization.vae.pkl')
    trainer.model.load_state_dict(torch.load('data/harmonization.vae.pkl'))
    trainer.model.eval()

    full = trainer.create_posterior(trainer.model,
                                    all_dataset,
                                    indices=np.arange(len(all_dataset)))
    latent, batch_indices, labels = full.sequential().get_latent()

    return latent
Example #27
0
def test_full_cov():
    dataset = CortexDataset()
    mdl = VAE(n_input=dataset.nb_genes,
              n_batch=dataset.n_batches,
              reconstruction_loss='zinb',
              n_latent=2,
              full_cov=True)
    trainer = UnsupervisedTrainer(model=mdl,
                                  gene_dataset=dataset,
                                  use_cuda=True,
                                  train_size=0.7,
                                  frequency=1,
                                  early_stopping_kwargs={
                                      'early_stopping_metric': 'elbo',
                                      'save_best_state_metric': 'elbo',
                                      'patience': 15,
                                      'threshold': 3
                                  })
    trainer.train(n_epochs=20, lr=1e-3)
    assert not np.isnan(trainer.history['ll_test_set']).any()
Example #28
0
def test_cortex(save_path):
    cortex_dataset = CortexDataset(save_path=save_path)
    vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches)
    trainer_cortex_vae = UnsupervisedTrainer(vae, cortex_dataset, train_size=0.5, use_cuda=use_cuda)
    trainer_cortex_vae.train(n_epochs=1)
    trainer_cortex_vae.train_set.ll()
    trainer_cortex_vae.train_set.differential_expression_stats()

    trainer_cortex_vae.corrupt_posteriors(corruption='binomial')
    trainer_cortex_vae.corrupt_posteriors()
    trainer_cortex_vae.train(n_epochs=1)
    trainer_cortex_vae.uncorrupt_posteriors()

    trainer_cortex_vae.train_set.imputation_benchmark(n_samples=1, show_plot=False,
                                                      title_plot='imputation', save_path=save_path)

    svaec = SCANVI(cortex_dataset.nb_genes, cortex_dataset.n_batches, cortex_dataset.n_labels)
    trainer_cortex_svaec = JointSemiSupervisedTrainer(svaec, cortex_dataset,
                                                      n_labelled_samples_per_class=3,
                                                      use_cuda=use_cuda)
    trainer_cortex_svaec.train(n_epochs=1)
    trainer_cortex_svaec.labelled_set.accuracy()
    trainer_cortex_svaec.full_dataset.ll()

    svaec = SCANVI(cortex_dataset.nb_genes, cortex_dataset.n_batches, cortex_dataset.n_labels)
    trainer_cortex_svaec = AlternateSemiSupervisedTrainer(svaec, cortex_dataset,
                                                          n_labelled_samples_per_class=3,
                                                          use_cuda=use_cuda)
    trainer_cortex_svaec.train(n_epochs=1, lr=1e-2)
    trainer_cortex_svaec.unlabelled_set.accuracy()
    data_train, labels_train = trainer_cortex_svaec.labelled_set.raw_data()
    data_test, labels_test = trainer_cortex_svaec.unlabelled_set.raw_data()
    compute_accuracy_svc(data_train, labels_train, data_test, labels_test,
                         param_grid=[{'C': [1], 'kernel': ['linear']}])
    compute_accuracy_rf(data_train, labels_train, data_test, labels_test,
                        param_grid=[{'max_depth': [3], 'n_estimators': [10]}])

    cls = Classifier(cortex_dataset.nb_genes, n_labels=cortex_dataset.n_labels)
    cls_trainer = ClassifierTrainer(cls, cortex_dataset)
    cls_trainer.train(n_epochs=1)
    cls_trainer.train_set.accuracy()
Example #29
0
    def test_special_dataset_size(self):
        gene_dataset = GeneExpressionDataset()
        x = np.random.randint(1, 100, (17 * 2, 10))
        gene_dataset.populate_from_data(x)

        # Test UnsupervisedTrainer
        vae = VAE(
            gene_dataset.nb_genes,
            n_batch=gene_dataset.n_batches,
            n_labels=gene_dataset.n_labels,
        )
        trainer = UnsupervisedTrainer(
            vae,
            gene_dataset,
            train_size=0.5,
            use_cuda=False,
            data_loader_kwargs={"batch_size": 8},
        )
        trainer.train(n_epochs=1)

        # Test JVATrainer
        jvae = JVAE(
            [gene_dataset.nb_genes, gene_dataset.nb_genes],
            gene_dataset.nb_genes,
            [slice(None)] * 2,
            ["zinb", "zinb"],
            [True, True],
            n_batch=1,
        )
        cls = Classifier(gene_dataset.nb_genes, n_labels=2, logits=True)
        trainer = JVAETrainer(
            jvae,
            cls,
            [gene_dataset, gene_dataset],
            train_size=0.5,
            use_cuda=False,
            data_loader_kwargs={"batch_size": 8},
        )
        trainer.train(n_epochs=1)
Example #30
0
def test_vae_ratio_loss(save_path):
    cortex_dataset = CortexDataset(save_path=save_path)
    cortex_vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches)
    trainer_cortex_vae = UnsupervisedTrainer(cortex_vae,
                                             cortex_dataset,
                                             train_size=0.5,
                                             use_cuda=use_cuda,
                                             ratio_loss=True)
    trainer_cortex_vae.train(n_epochs=2)

    dataset = LatentLogPoissonDataset(n_genes=5,
                                      n_latent=2,
                                      n_cells=300,
                                      n_comps=1)
    vae = LogNormalPoissonVAE(dataset.nb_genes,
                              dataset.n_batches,
                              full_cov=True)
    trainer_vae = UnsupervisedTrainer(vae,
                                      dataset,
                                      train_size=0.5,
                                      use_cuda=use_cuda,
                                      ratio_loss=True)
    trainer_vae.train(n_epochs=2)