Ejemplo n.º 1
0
def totalvi_benchmark(dataset, n_epochs, use_cuda=True):
    totalvae = TOTALVI(dataset.nb_genes,
                       len(dataset.protein_names),
                       n_batch=dataset.n_batches)
    trainer = TotalTrainer(totalvae,
                           dataset,
                           train_size=0.5,
                           use_cuda=use_cuda,
                           early_stopping_kwargs=None)
    trainer.train(n_epochs=n_epochs)
    trainer.test_set.reconstruction_error()
    trainer.test_set.marginal_ll()

    trainer.test_set.get_protein_background_mean()
    trainer.test_set.get_latent()
    trainer.test_set.generate()
    trainer.test_set.get_sample_dropout()
    trainer.test_set.get_normalized_denoised_expression(transform_batch=0)
    trainer.test_set.get_normalized_denoised_expression(transform_batch=0)
    trainer.test_set.imputation()
    trainer.test_set.get_protein_mean()
    trainer.test_set.one_vs_all_degenes(n_samples=2, M_permutation=10)
    trainer.test_set.generate_feature_correlation_matrix(n_samples=2)
    trainer.test_set.generate_feature_correlation_matrix(n_samples=2,
                                                         transform_batch=0)

    return trainer
Ejemplo n.º 2
0
def totalvi_benchmark(dataset, n_epochs, use_cuda=True):
    totalvae = TOTALVI(
        dataset.nb_genes, len(dataset.protein_names), n_batch=dataset.n_batches
    )
    trainer = TotalTrainer(totalvae, dataset, train_size=0.5, use_cuda=use_cuda)
    trainer.train(n_epochs=n_epochs)
    trainer.test_set.reconstruction_error()
    trainer.test_set.marginal_ll()

    trainer.test_set.get_protein_background_mean()
    trainer.test_set.get_latent()
    trainer.test_set.generate()
    trainer.test_set.get_sample_dropout()
    trainer.test_set.get_normalized_denoised_expression()
    trainer.test_set.imputation()

    return trainer
Ejemplo n.º 3
0
def test_totalvi(save_path):
    synthetic_dataset_one_batch = SyntheticDataset(n_batches=1)
    totalvi_benchmark(synthetic_dataset_one_batch,
                      n_epochs=1,
                      use_cuda=use_cuda)
    synthetic_dataset_two_batches = SyntheticDataset(n_batches=2)
    totalvi_benchmark(synthetic_dataset_two_batches,
                      n_epochs=1,
                      use_cuda=use_cuda)

    # adversarial testing
    dataset = synthetic_dataset_two_batches
    totalvae = TOTALVI(dataset.nb_genes,
                       len(dataset.protein_names),
                       n_batch=dataset.n_batches)
    trainer = TotalTrainer(
        totalvae,
        dataset,
        train_size=0.5,
        use_cuda=use_cuda,
        early_stopping_kwargs=None,
        use_adversarial_loss=True,
    )
    trainer.train(n_epochs=1)

    with tempfile.TemporaryDirectory() as temp_dir:
        posterior_save_path = os.path.join(temp_dir, "posterior_data")
        original_post = trainer.create_posterior(
            totalvae,
            dataset,
            indices=np.arange(len(dataset)),
            type_class=TotalPosterior,
        )
        original_post.save_posterior(posterior_save_path)
        new_totalvae = TOTALVI(dataset.nb_genes,
                               len(dataset.protein_names),
                               n_batch=dataset.n_batches)
        new_post = load_posterior(posterior_save_path,
                                  model=new_totalvae,
                                  use_cuda=False)
        assert new_post.posterior_type == "TotalPosterior"
        assert np.array_equal(new_post.gene_dataset.protein_expression,
                              dataset.protein_expression)
Ejemplo n.º 4
0
def test_totalvi(save_path):
    synthetic_dataset_one_batch = SyntheticDataset(n_batches=1)
    totalvi_benchmark(synthetic_dataset_one_batch, n_epochs=1, use_cuda=use_cuda)
    synthetic_dataset_two_batches = SyntheticDataset(n_batches=2)
    totalvi_benchmark(synthetic_dataset_two_batches, n_epochs=1, use_cuda=use_cuda)

    # adversarial testing
    dataset = synthetic_dataset_two_batches
    totalvae = TOTALVI(
        dataset.nb_genes, len(dataset.protein_names), n_batch=dataset.n_batches
    )
    trainer = TotalTrainer(
        totalvae,
        dataset,
        train_size=0.5,
        use_cuda=use_cuda,
        early_stopping_kwargs=None,
        use_adversarial_loss=True,
    )
    trainer.train(n_epochs=1)
    early_stopping_kwargs = {
        "early_stopping_metric": "elbo",
        "save_best_state_metric": "elbo",
        "patience": 45,
        "threshold": 0,
        "reduce_lr_on_plateau": True,
        "lr_patience": 30,
        "lr_factor": 0.6,
        "posterior_class": TotalPosterior,
    }

    trainer = TotalTrainer(
        model,
        dataset,
        train_size=0.9,
        test_size=0.1,
        use_cuda=use_cuda,
        frequency=1,
        data_loader_kwargs={"batch_size": 256, "pin_memory": False},
        early_stopping_kwargs=early_stopping_kwargs,
    )
    trainer.train(lr=lr, n_epochs=500)
    # create posterior on full data
    full_posterior = trainer.create_posterior(
        model, dataset, indices=np.arange(len(dataset)), type_class=TotalPosterior,
    )

    torch.save(
        trainer.model.state_dict(), "differential_expression/saved_models/" + n + ".pt"
    )