Example #1
0
def test_synthetic_1():
    synthetic_dataset = SyntheticDataset()
    synthetic_dataset.cell_types = np.array(["A", "B", "C"])
    svaec = SCANVI(
        synthetic_dataset.nb_genes,
        synthetic_dataset.n_batches,
        synthetic_dataset.n_labels,
    )
    trainer_synthetic_svaec = JointSemiSupervisedTrainer(
        svaec, synthetic_dataset, use_cuda=use_cuda
    )
    trainer_synthetic_svaec.train(n_epochs=1)
    trainer_synthetic_svaec.labelled_set.entropy_batch_mixing()
    trainer_synthetic_svaec.full_dataset.knn_purity()
    trainer_synthetic_svaec.labelled_set.show_t_sne(n_samples=5)
    trainer_synthetic_svaec.unlabelled_set.show_t_sne(n_samples=5, color_by="labels")
    trainer_synthetic_svaec.labelled_set.show_t_sne(
        n_samples=5, color_by="batches and labels"
    )
    trainer_synthetic_svaec.labelled_set.clustering_scores()
    trainer_synthetic_svaec.labelled_set.clustering_scores(prediction_algorithm="gmm")
    trainer_synthetic_svaec.unlabelled_set.unsupervised_classification_accuracy()
    trainer_synthetic_svaec.unlabelled_set.differential_expression_score(
        synthetic_dataset.labels.ravel() == 1,
        synthetic_dataset.labels.ravel() == 2,
        n_samples=2,
        M_permutation=10,
    )
    trainer_synthetic_svaec.unlabelled_set.one_vs_all_degenes(
        n_samples=2, M_permutation=10
    )
Example #2
0
def test_synthetic_1():
    synthetic_dataset = SyntheticDataset()
    synthetic_dataset.cell_types = np.array(['A', 'B', 'C'])
    svaec = SCANVI(synthetic_dataset.nb_genes, synthetic_dataset.n_batches,
                   synthetic_dataset.n_labels)
    trainer_synthetic_svaec = JointSemiSupervisedTrainer(svaec,
                                                         synthetic_dataset,
                                                         use_cuda=use_cuda)
    trainer_synthetic_svaec.train(n_epochs=1)
    trainer_synthetic_svaec.labelled_set.entropy_batch_mixing()
    trainer_synthetic_svaec.full_dataset.knn_purity(verbose=True)
    trainer_synthetic_svaec.labelled_set.show_t_sne(n_samples=5)
    trainer_synthetic_svaec.unlabelled_set.show_t_sne(n_samples=5,
                                                      color_by='labels')
    trainer_synthetic_svaec.labelled_set.show_t_sne(
        n_samples=5, color_by='batches and labels')
    trainer_synthetic_svaec.labelled_set.clustering_scores()
    trainer_synthetic_svaec.labelled_set.clustering_scores(
        prediction_algorithm='gmm')
    trainer_synthetic_svaec.unlabelled_set.unsupervised_classification_accuracy(
    )
    trainer_synthetic_svaec.unlabelled_set.differential_expression_score(
        'B', 'C', genes=['2', '4'], M_sampling=2, M_permutation=10)
    trainer_synthetic_svaec.unlabelled_set.differential_expression_table(
        M_sampling=2, M_permutation=10)
Example #3
0
def test_synthetic_2():
    synthetic_dataset = SyntheticDataset()
    vaec = VAEC(synthetic_dataset.nb_genes, synthetic_dataset.n_batches, synthetic_dataset.n_labels)
    trainer_synthetic_vaec = JointSemiSupervisedTrainer(vaec, synthetic_dataset, use_cuda=use_cuda, frequency=1,
                                                        early_stopping_kwargs={'early_stopping_metric': 'll',
                                                                               'on': 'labelled_set',
                                                                               'save_best_state_metric': 'll'})
    trainer_synthetic_vaec.train(n_epochs=2)
Example #4
0
def test_nb_not_zinb():
    synthetic_dataset = SyntheticDataset()
    svaec = SCANVI(synthetic_dataset.nb_genes,
                   synthetic_dataset.n_batches,
                   synthetic_dataset.n_labels,
                   labels_groups=[0, 0, 1],
                   reconstruction_loss="nb")
    trainer_synthetic_svaec = JointSemiSupervisedTrainer(svaec, synthetic_dataset, use_cuda=use_cuda)
    trainer_synthetic_svaec.train(n_epochs=1)
Example #5
0
def test_synthetic_1():
    synthetic_dataset = SyntheticDataset()
    synthetic_dataset.cell_types = np.array(["A", "B", "C"])
    svaec = SCANVI(
        synthetic_dataset.nb_genes,
        synthetic_dataset.n_batches,
        synthetic_dataset.n_labels,
    )
    trainer_synthetic_svaec = JointSemiSupervisedTrainer(svaec,
                                                         synthetic_dataset,
                                                         use_cuda=use_cuda)
    trainer_synthetic_svaec.train(n_epochs=1)
    trainer_synthetic_svaec.labelled_set.entropy_batch_mixing()

    with tempfile.TemporaryDirectory() as temp_dir:
        posterior_save_path = os.path.join(temp_dir, "posterior_data")
        original_post = trainer_synthetic_svaec.labelled_set.sequential()
        original_post.save_posterior(posterior_save_path)
        new_svaec = SCANVI(
            synthetic_dataset.nb_genes,
            synthetic_dataset.n_batches,
            synthetic_dataset.n_labels,
        )
        new_post = load_posterior(posterior_save_path,
                                  model=new_svaec,
                                  use_cuda=False)
    assert np.array_equal(new_post.indices, original_post.indices)
    assert np.array_equal(new_post.gene_dataset.X,
                          original_post.gene_dataset.X)
    assert np.array_equal(new_post.gene_dataset.labels,
                          original_post.gene_dataset.labels)

    trainer_synthetic_svaec.full_dataset.knn_purity()
    trainer_synthetic_svaec.labelled_set.show_t_sne(n_samples=5)
    trainer_synthetic_svaec.unlabelled_set.show_t_sne(n_samples=5,
                                                      color_by="labels")
    trainer_synthetic_svaec.labelled_set.show_t_sne(
        n_samples=5, color_by="batches and labels")
    trainer_synthetic_svaec.labelled_set.clustering_scores()
    trainer_synthetic_svaec.labelled_set.clustering_scores(
        prediction_algorithm="gmm")
    trainer_synthetic_svaec.unlabelled_set.unsupervised_classification_accuracy(
    )
    trainer_synthetic_svaec.unlabelled_set.differential_expression_score(
        synthetic_dataset.labels.ravel() == 1,
        synthetic_dataset.labels.ravel() == 2,
        n_samples=2,
        M_permutation=10,
    )
    trainer_synthetic_svaec.unlabelled_set.one_vs_all_degenes(n_samples=2,
                                                              M_permutation=10)
Example #6
0
def test_hierarchy():
    synthetic_dataset = SyntheticDataset()
    svaec = SCANVI(
        synthetic_dataset.nb_genes,
        synthetic_dataset.n_batches,
        synthetic_dataset.n_labels,
        ontology=[
            np.array([[1, 1, 0], [0, 0, 1]]),
            np.array([[1, 0, 1, 0], [0, 0, 1, 0], [0, 0, 1, 1]])
        ],
        use_ontology=True,
        reconstruction_loss="zinb",
        n_layers=3,
    )
    trainer_synthetic_svaec = JointSemiSupervisedTrainer(svaec,
                                                         synthetic_dataset,
                                                         use_cuda=use_cuda)
    trainer_synthetic_svaec.train(n_epochs=1)
Example #7
0
def test_synthetic_2():
    synthetic_dataset = SyntheticDataset()
    vaec = VAEC(
        synthetic_dataset.nb_genes,
        synthetic_dataset.n_batches,
        synthetic_dataset.n_labels,
    )
    trainer_synthetic_vaec = JointSemiSupervisedTrainer(
        vaec,
        synthetic_dataset,
        use_cuda=use_cuda,
        frequency=1,
        early_stopping_kwargs={
            "early_stopping_metric": "reconstruction_error",
            "on": "labelled_set",
            "save_best_state_metric": "reconstruction_error",
        },
    )
    trainer_synthetic_vaec.train(n_epochs=2)
Example #8
0
def test_cortex(save_path):
    cortex_dataset = CortexDataset(save_path=save_path)
    vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches)
    trainer_cortex_vae = UnsupervisedTrainer(vae, cortex_dataset, train_size=0.5, use_cuda=use_cuda)
    trainer_cortex_vae.train(n_epochs=1)
    trainer_cortex_vae.train_set.ll()
    trainer_cortex_vae.train_set.differential_expression_stats()

    trainer_cortex_vae.corrupt_posteriors(corruption='binomial')
    trainer_cortex_vae.corrupt_posteriors()
    trainer_cortex_vae.train(n_epochs=1)
    trainer_cortex_vae.uncorrupt_posteriors()

    trainer_cortex_vae.train_set.imputation_benchmark(n_samples=1, show_plot=False,
                                                      title_plot='imputation', save_path=save_path)

    svaec = SCANVI(cortex_dataset.nb_genes, cortex_dataset.n_batches, cortex_dataset.n_labels)
    trainer_cortex_svaec = JointSemiSupervisedTrainer(svaec, cortex_dataset,
                                                      n_labelled_samples_per_class=3,
                                                      use_cuda=use_cuda)
    trainer_cortex_svaec.train(n_epochs=1)
    trainer_cortex_svaec.labelled_set.accuracy()
    trainer_cortex_svaec.full_dataset.ll()

    svaec = SCANVI(cortex_dataset.nb_genes, cortex_dataset.n_batches, cortex_dataset.n_labels)
    trainer_cortex_svaec = AlternateSemiSupervisedTrainer(svaec, cortex_dataset,
                                                          n_labelled_samples_per_class=3,
                                                          use_cuda=use_cuda)
    trainer_cortex_svaec.train(n_epochs=1, lr=1e-2)
    trainer_cortex_svaec.unlabelled_set.accuracy()
    data_train, labels_train = trainer_cortex_svaec.labelled_set.raw_data()
    data_test, labels_test = trainer_cortex_svaec.unlabelled_set.raw_data()
    compute_accuracy_svc(data_train, labels_train, data_test, labels_test,
                         param_grid=[{'C': [1], 'kernel': ['linear']}])
    compute_accuracy_rf(data_train, labels_train, data_test, labels_test,
                        param_grid=[{'max_depth': [3], 'n_estimators': [10]}])

    cls = Classifier(cortex_dataset.nb_genes, n_labels=cortex_dataset.n_labels)
    cls_trainer = ClassifierTrainer(cls, cortex_dataset)
    cls_trainer.train(n_epochs=1)
    cls_trainer.train_set.accuracy()
Example #9
0
def test_cortex(save_path):
    cortex_dataset = CortexDataset(save_path=save_path)
    vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches)
    trainer_cortex_vae = UnsupervisedTrainer(
        vae, cortex_dataset, train_size=0.5, use_cuda=use_cuda
    )
    trainer_cortex_vae.train(n_epochs=1)
    trainer_cortex_vae.train_set.reconstruction_error()
    trainer_cortex_vae.train_set.differential_expression_stats()
    trainer_cortex_vae.train_set.generate_feature_correlation_matrix(
        n_samples=2, correlation_type="pearson"
    )
    trainer_cortex_vae.train_set.generate_feature_correlation_matrix(
        n_samples=2, correlation_type="spearman"
    )
    trainer_cortex_vae.train_set.imputation(n_samples=1)
    trainer_cortex_vae.test_set.imputation(n_samples=5)

    trainer_cortex_vae.corrupt_posteriors(corruption="binomial")
    trainer_cortex_vae.corrupt_posteriors()
    trainer_cortex_vae.train(n_epochs=1)
    trainer_cortex_vae.uncorrupt_posteriors()

    trainer_cortex_vae.train_set.imputation_benchmark(
        n_samples=1, show_plot=False, title_plot="imputation", save_path=save_path
    )
    trainer_cortex_vae.train_set.generate_parameters()

    n_cells, n_genes = (
        len(trainer_cortex_vae.train_set.indices),
        cortex_dataset.nb_genes,
    )
    n_samples = 3
    (dropout, means, dispersions,) = trainer_cortex_vae.train_set.generate_parameters()
    assert dropout.shape == (n_cells, n_genes) and means.shape == (n_cells, n_genes)
    assert dispersions.shape == (n_cells, n_genes)
    (dropout, means, dispersions,) = trainer_cortex_vae.train_set.generate_parameters(
        n_samples=n_samples
    )
    assert dropout.shape == (n_samples, n_cells, n_genes)
    assert means.shape == (n_samples, n_cells, n_genes,)
    (dropout, means, dispersions,) = trainer_cortex_vae.train_set.generate_parameters(
        n_samples=n_samples, give_mean=True
    )
    assert dropout.shape == (n_cells, n_genes) and means.shape == (n_cells, n_genes)

    full = trainer_cortex_vae.create_posterior(
        vae, cortex_dataset, indices=np.arange(len(cortex_dataset))
    )
    x_new, x_old = full.generate(n_samples=10)
    assert x_new.shape == (cortex_dataset.nb_cells, cortex_dataset.nb_genes, 10)
    assert x_old.shape == (cortex_dataset.nb_cells, cortex_dataset.nb_genes)

    trainer_cortex_vae.train_set.imputation_benchmark(
        n_samples=1, show_plot=False, title_plot="imputation", save_path=save_path
    )

    svaec = SCANVI(
        cortex_dataset.nb_genes, cortex_dataset.n_batches, cortex_dataset.n_labels
    )
    trainer_cortex_svaec = JointSemiSupervisedTrainer(
        svaec, cortex_dataset, n_labelled_samples_per_class=3, use_cuda=use_cuda
    )
    trainer_cortex_svaec.train(n_epochs=1)
    trainer_cortex_svaec.labelled_set.accuracy()
    trainer_cortex_svaec.full_dataset.reconstruction_error()

    svaec = SCANVI(
        cortex_dataset.nb_genes, cortex_dataset.n_batches, cortex_dataset.n_labels
    )
    trainer_cortex_svaec = AlternateSemiSupervisedTrainer(
        svaec, cortex_dataset, n_labelled_samples_per_class=3, use_cuda=use_cuda
    )
    trainer_cortex_svaec.train(n_epochs=1, lr=1e-2)
    trainer_cortex_svaec.unlabelled_set.accuracy()
    data_train, labels_train = trainer_cortex_svaec.labelled_set.raw_data()
    data_test, labels_test = trainer_cortex_svaec.unlabelled_set.raw_data()
    compute_accuracy_svc(
        data_train,
        labels_train,
        data_test,
        labels_test,
        param_grid=[{"C": [1], "kernel": ["linear"]}],
    )
    compute_accuracy_rf(
        data_train,
        labels_train,
        data_test,
        labels_test,
        param_grid=[{"max_depth": [3], "n_estimators": [10]}],
    )

    cls = Classifier(cortex_dataset.nb_genes, n_labels=cortex_dataset.n_labels)
    cls_trainer = ClassifierTrainer(cls, cortex_dataset)
    cls_trainer.train(n_epochs=1)
    cls_trainer.train_set.accuracy()