Example #1
0
def cortex_benchmark(n_epochs=250,
                     use_cuda=True,
                     save_path="data/",
                     show_plot=True):
    cortex_dataset = CortexDataset(save_path=save_path)
    vae = VAE(cortex_dataset.nb_genes)
    trainer_cortex_vae = UnsupervisedTrainer(vae,
                                             cortex_dataset,
                                             use_cuda=use_cuda)
    trainer_cortex_vae.train(n_epochs=n_epochs)
    couple_celltypes = (4, 5)  # the couple types on which to study DE
    cell_idx1 = cortex_dataset.labels.ravel() == couple_celltypes[0]
    cell_idx2 = cortex_dataset.labels.ravel() == couple_celltypes[1]
    trainer_cortex_vae.train_set.differential_expression_score(
        cell_idx1, cell_idx2, genes=["THY1", "MBP"])

    trainer_cortex_vae.test_set.reconstruction_error()  # assert ~ 1200
    vae = VAE(cortex_dataset.nb_genes)
    trainer_cortex_vae = UnsupervisedTrainer(vae,
                                             cortex_dataset,
                                             use_cuda=use_cuda)
    trainer_cortex_vae.corrupt_posteriors()
    trainer_cortex_vae.train(n_epochs=n_epochs)
    trainer_cortex_vae.uncorrupt_posteriors()
    trainer_cortex_vae.train_set.imputation_benchmark(save_path=save_path,
                                                      show_plot=show_plot)

    n_samples = 10 if n_epochs == 1 else None  # n_epochs == 1 is unit tests
    trainer_cortex_vae.train_set.show_t_sne(n_samples=n_samples)
    return trainer_cortex_vae
Example #2
0
def cortex_benchmark(n_epochs=250,
                     use_cuda=True,
                     save_path='data/',
                     show_plot=True):
    cortex_dataset = CortexDataset(save_path=save_path)
    vae = VAE(cortex_dataset.nb_genes)
    trainer_cortex_vae = UnsupervisedTrainer(vae,
                                             cortex_dataset,
                                             use_cuda=use_cuda)
    trainer_cortex_vae.train(n_epochs=n_epochs)
    trainer_cortex_vae.train_set.differential_expression_score(
        'oligodendrocytes', 'pyramidal CA1', genes=["THY1", "MBP"])

    trainer_cortex_vae.test_set.ll()  # assert ~ 1200
    vae = VAE(cortex_dataset.nb_genes)
    trainer_cortex_vae = UnsupervisedTrainer(vae,
                                             cortex_dataset,
                                             use_cuda=use_cuda)
    trainer_cortex_vae.corrupt_posteriors()
    trainer_cortex_vae.train(n_epochs=n_epochs)
    trainer_cortex_vae.uncorrupt_posteriors()
    trainer_cortex_vae.train_set.imputation_benchmark(verbose=(n_epochs > 1),
                                                      save_path=save_path,
                                                      show_plot=show_plot)

    n_samples = 10 if n_epochs == 1 else None  # n_epochs == 1 is unit tests
    trainer_cortex_vae.train_set.show_t_sne(n_samples=n_samples)
    return trainer_cortex_vae
Example #3
0
def test_cortex(save_path):
    cortex_dataset = CortexDataset(save_path=save_path)
    vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches)
    trainer_cortex_vae = UnsupervisedTrainer(vae, cortex_dataset, train_size=0.5, use_cuda=use_cuda)
    trainer_cortex_vae.train(n_epochs=1)
    trainer_cortex_vae.train_set.ll()
    trainer_cortex_vae.train_set.differential_expression_stats()

    trainer_cortex_vae.corrupt_posteriors(corruption='binomial')
    trainer_cortex_vae.corrupt_posteriors()
    trainer_cortex_vae.train(n_epochs=1)
    trainer_cortex_vae.uncorrupt_posteriors()

    trainer_cortex_vae.train_set.imputation_benchmark(n_samples=1, show_plot=False,
                                                      title_plot='imputation', save_path=save_path)

    svaec = SCANVI(cortex_dataset.nb_genes, cortex_dataset.n_batches, cortex_dataset.n_labels)
    trainer_cortex_svaec = JointSemiSupervisedTrainer(svaec, cortex_dataset,
                                                      n_labelled_samples_per_class=3,
                                                      use_cuda=use_cuda)
    trainer_cortex_svaec.train(n_epochs=1)
    trainer_cortex_svaec.labelled_set.accuracy()
    trainer_cortex_svaec.full_dataset.ll()

    svaec = SCANVI(cortex_dataset.nb_genes, cortex_dataset.n_batches, cortex_dataset.n_labels)
    trainer_cortex_svaec = AlternateSemiSupervisedTrainer(svaec, cortex_dataset,
                                                          n_labelled_samples_per_class=3,
                                                          use_cuda=use_cuda)
    trainer_cortex_svaec.train(n_epochs=1, lr=1e-2)
    trainer_cortex_svaec.unlabelled_set.accuracy()
    data_train, labels_train = trainer_cortex_svaec.labelled_set.raw_data()
    data_test, labels_test = trainer_cortex_svaec.unlabelled_set.raw_data()
    compute_accuracy_svc(data_train, labels_train, data_test, labels_test,
                         param_grid=[{'C': [1], 'kernel': ['linear']}])
    compute_accuracy_rf(data_train, labels_train, data_test, labels_test,
                        param_grid=[{'max_depth': [3], 'n_estimators': [10]}])

    cls = Classifier(cortex_dataset.nb_genes, n_labels=cortex_dataset.n_labels)
    cls_trainer = ClassifierTrainer(cls, cortex_dataset)
    cls_trainer.train(n_epochs=1)
    cls_trainer.train_set.accuracy()
Example #4
0
def test_cortex(save_path):
    cortex_dataset = CortexDataset(save_path=save_path)
    vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches)
    trainer_cortex_vae = UnsupervisedTrainer(
        vae, cortex_dataset, train_size=0.5, use_cuda=use_cuda
    )
    trainer_cortex_vae.train(n_epochs=1)
    trainer_cortex_vae.train_set.reconstruction_error()
    trainer_cortex_vae.train_set.differential_expression_stats()
    trainer_cortex_vae.train_set.generate_feature_correlation_matrix(
        n_samples=2, correlation_type="pearson"
    )
    trainer_cortex_vae.train_set.generate_feature_correlation_matrix(
        n_samples=2, correlation_type="spearman"
    )
    trainer_cortex_vae.train_set.imputation(n_samples=1)
    trainer_cortex_vae.test_set.imputation(n_samples=5)

    trainer_cortex_vae.corrupt_posteriors(corruption="binomial")
    trainer_cortex_vae.corrupt_posteriors()
    trainer_cortex_vae.train(n_epochs=1)
    trainer_cortex_vae.uncorrupt_posteriors()

    trainer_cortex_vae.train_set.imputation_benchmark(
        n_samples=1, show_plot=False, title_plot="imputation", save_path=save_path
    )
    trainer_cortex_vae.train_set.generate_parameters()

    n_cells, n_genes = (
        len(trainer_cortex_vae.train_set.indices),
        cortex_dataset.nb_genes,
    )
    n_samples = 3
    (dropout, means, dispersions,) = trainer_cortex_vae.train_set.generate_parameters()
    assert dropout.shape == (n_cells, n_genes) and means.shape == (n_cells, n_genes)
    assert dispersions.shape == (n_cells, n_genes)
    (dropout, means, dispersions,) = trainer_cortex_vae.train_set.generate_parameters(
        n_samples=n_samples
    )
    assert dropout.shape == (n_samples, n_cells, n_genes)
    assert means.shape == (n_samples, n_cells, n_genes,)
    (dropout, means, dispersions,) = trainer_cortex_vae.train_set.generate_parameters(
        n_samples=n_samples, give_mean=True
    )
    assert dropout.shape == (n_cells, n_genes) and means.shape == (n_cells, n_genes)

    full = trainer_cortex_vae.create_posterior(
        vae, cortex_dataset, indices=np.arange(len(cortex_dataset))
    )
    x_new, x_old = full.generate(n_samples=10)
    assert x_new.shape == (cortex_dataset.nb_cells, cortex_dataset.nb_genes, 10)
    assert x_old.shape == (cortex_dataset.nb_cells, cortex_dataset.nb_genes)

    trainer_cortex_vae.train_set.imputation_benchmark(
        n_samples=1, show_plot=False, title_plot="imputation", save_path=save_path
    )

    svaec = SCANVI(
        cortex_dataset.nb_genes, cortex_dataset.n_batches, cortex_dataset.n_labels
    )
    trainer_cortex_svaec = JointSemiSupervisedTrainer(
        svaec, cortex_dataset, n_labelled_samples_per_class=3, use_cuda=use_cuda
    )
    trainer_cortex_svaec.train(n_epochs=1)
    trainer_cortex_svaec.labelled_set.accuracy()
    trainer_cortex_svaec.full_dataset.reconstruction_error()

    svaec = SCANVI(
        cortex_dataset.nb_genes, cortex_dataset.n_batches, cortex_dataset.n_labels
    )
    trainer_cortex_svaec = AlternateSemiSupervisedTrainer(
        svaec, cortex_dataset, n_labelled_samples_per_class=3, use_cuda=use_cuda
    )
    trainer_cortex_svaec.train(n_epochs=1, lr=1e-2)
    trainer_cortex_svaec.unlabelled_set.accuracy()
    data_train, labels_train = trainer_cortex_svaec.labelled_set.raw_data()
    data_test, labels_test = trainer_cortex_svaec.unlabelled_set.raw_data()
    compute_accuracy_svc(
        data_train,
        labels_train,
        data_test,
        labels_test,
        param_grid=[{"C": [1], "kernel": ["linear"]}],
    )
    compute_accuracy_rf(
        data_train,
        labels_train,
        data_test,
        labels_test,
        param_grid=[{"max_depth": [3], "n_estimators": [10]}],
    )

    cls = Classifier(cortex_dataset.nb_genes, n_labels=cortex_dataset.n_labels)
    cls_trainer = ClassifierTrainer(cls, cortex_dataset)
    cls_trainer.train(n_epochs=1)
    cls_trainer.train_set.accuracy()
Example #5
0
print(input_gene_bc_mat.shape)
row_attrs = {"Gene": gene_name[gene_filter]}
col_attrs = {"CellID": cell_id}
input_loom_name = FLAGS["loom"].rsplit("/", 1)[1]
output_loom_name = input_loom_name.replace(
    ".loom",
    "_mc_{}_mce_{}.loom".format(min_expressed_cell,
                                min_expressed_cell_average_expression))
filt_gene_loom_path = "{}/{}".format(output_dir, output_loom_name)
loompy.create(filt_gene_loom_path, input_gene_bc_mat, row_attrs, col_attrs)
loom_dataset = LoomDataset(filt_gene_loom_path, save_path="")
vae = VAE(loom_dataset.nb_genes, n_batch=loom_dataset.n_batches)
trainer = UnsupervisedTrainer(vae, loom_dataset)
trainer.corrupt_posteriors()
trainer.train(n_epochs=250, lr=0.001)
trainer.uncorrupt_posteriors()
full = trainer.create_posterior(trainer.model,
                                loom_dataset,
                                indices=np.arange(len(loom_dataset)))
imputed_values = full.sequential().imputation()
output_h5 = input_loom_name.replace(
    ".loom",
    "_scVI_mc_{}_mce_{}.hdf5".format(min_expressed_cell,
                                     min_expressed_cell_average_expression))
with h5py.File("{}/{}".format(output_dir, output_h5), "w") as f:
    f["cell_id"] = cell_id.astype(h5py.special_dtype(vlen=str))
    f["gene_name"] = gene_name[gene_filter].astype(
        h5py.special_dtype(vlen=str))
    if_dset_imputation = f.create_dataset("imputation",
                                          shape=(cell_id.size,
                                                 gene_filter.sum()),