def test_cortex(save_path): cortex_dataset = CortexDataset(save_path=save_path) vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches) trainer_cortex_vae = UnsupervisedTrainer(vae, cortex_dataset, train_size=0.5, use_cuda=use_cuda) trainer_cortex_vae.train(n_epochs=1) trainer_cortex_vae.train_set.reconstruction_error() trainer_cortex_vae.train_set.differential_expression_stats() trainer_cortex_vae.train_set.generate_feature_correlation_matrix( n_samples=2, correlation_type="pearson") trainer_cortex_vae.train_set.generate_feature_correlation_matrix( n_samples=2, correlation_type="spearman") trainer_cortex_vae.train_set.imputation(n_samples=1) trainer_cortex_vae.test_set.imputation(n_samples=5) trainer_cortex_vae.corrupt_posteriors(corruption="binomial") trainer_cortex_vae.corrupt_posteriors() trainer_cortex_vae.train(n_epochs=1) trainer_cortex_vae.uncorrupt_posteriors() trainer_cortex_vae.train_set.imputation_benchmark(n_samples=1, show_plot=False, title_plot="imputation", save_path=save_path) trainer_cortex_vae.train_set.generate_parameters() n_cells, n_genes = ( len(trainer_cortex_vae.train_set.indices), cortex_dataset.nb_genes, ) n_samples = 3 (dropout, means, dispersions) = trainer_cortex_vae.train_set.generate_parameters() assert dropout.shape == (n_cells, n_genes) and means.shape == (n_cells, n_genes) assert dispersions.shape == (n_cells, n_genes) (dropout, means, dispersions) = trainer_cortex_vae.train_set.generate_parameters( n_samples=n_samples) assert dropout.shape == (n_samples, n_cells, n_genes) assert means.shape == (n_samples, n_cells, n_genes) (dropout, means, dispersions) = trainer_cortex_vae.train_set.generate_parameters( n_samples=n_samples, give_mean=True) assert dropout.shape == (n_cells, n_genes) and means.shape == (n_cells, n_genes) full = trainer_cortex_vae.create_posterior(vae, cortex_dataset, indices=np.arange( len(cortex_dataset))) x_new, x_old = full.generate(n_samples=10) assert x_new.shape == (cortex_dataset.nb_cells, cortex_dataset.nb_genes, 10) assert x_old.shape == (cortex_dataset.nb_cells, cortex_dataset.nb_genes) trainer_cortex_vae.train_set.imputation_benchmark(n_samples=1, show_plot=False, title_plot="imputation", save_path=save_path) svaec = SCANVI(cortex_dataset.nb_genes, cortex_dataset.n_batches, cortex_dataset.n_labels) trainer_cortex_svaec = JointSemiSupervisedTrainer( svaec, cortex_dataset, n_labelled_samples_per_class=3, use_cuda=use_cuda) trainer_cortex_svaec.train(n_epochs=1) trainer_cortex_svaec.labelled_set.accuracy() trainer_cortex_svaec.full_dataset.reconstruction_error() svaec = SCANVI(cortex_dataset.nb_genes, cortex_dataset.n_batches, cortex_dataset.n_labels) trainer_cortex_svaec = AlternateSemiSupervisedTrainer( svaec, cortex_dataset, n_labelled_samples_per_class=3, use_cuda=use_cuda) trainer_cortex_svaec.train(n_epochs=1, lr=1e-2) trainer_cortex_svaec.unlabelled_set.accuracy() data_train, labels_train = trainer_cortex_svaec.labelled_set.raw_data() data_test, labels_test = trainer_cortex_svaec.unlabelled_set.raw_data() compute_accuracy_svc( data_train, labels_train, data_test, labels_test, param_grid=[{ "C": [1], "kernel": ["linear"] }], ) compute_accuracy_rf( data_train, labels_train, data_test, labels_test, param_grid=[{ "max_depth": [3], "n_estimators": [10] }], ) cls = Classifier(cortex_dataset.nb_genes, n_labels=cortex_dataset.n_labels) cls_trainer = ClassifierTrainer(cls, cortex_dataset) cls_trainer.train(n_epochs=1) cls_trainer.train_set.accuracy()
def test_populate(self): dataset = CortexDataset(save_path="tests/data") unsupervised_training_one_epoch(dataset)
def to_tensor(x): """ numpy array to pytorch tensor """ return torch.from_numpy(x.astype('float32')).to(torch_device) def to_array(x): """ pytorch tensor to numpy array """ if hasattr(x, 'todense'): return np.array(x.todense()) if hasattr(x, 'cpu'): return x.data.cpu().numpy() return x # Load dataset cortex = CortexDataset(save_path=SAVE_DATA_PATH) X = cortex.X labels = cortex.cell_types n_labels = len(labels) Y = one_hot(cortex.labels.ravel(), n_labels) # =========================================================================== # scVI # =========================================================================== scvi = VAE(n_input=cortex.nb_genes, n_batch=0, n_labels=0, n_hidden=n_hidden, n_latent=n_latent, n_layers=n_layer, dispersion=dispersion,
def test_differential_expression(save_path): dataset = CortexDataset(save_path=save_path) n_cells = len(dataset) all_indices = np.arange(n_cells) vae = VAE(dataset.nb_genes, dataset.n_batches) trainer = UnsupervisedTrainer(vae, dataset, train_size=0.5, use_cuda=use_cuda) trainer.train(n_epochs=2) post = trainer.create_posterior(vae, dataset, shuffle=False, indices=all_indices) with tempfile.TemporaryDirectory() as temp_dir: posterior_save_path = os.path.join(temp_dir, "posterior_data") post = post.sequential(batch_size=3) post.save_posterior(posterior_save_path) new_vae = VAE(dataset.nb_genes, dataset.n_batches) new_post = load_posterior(posterior_save_path, model=new_vae, use_cuda=False) assert new_post.data_loader.batch_size == 3 assert np.array_equal(new_post.indices, post.indices) assert np.array_equal(new_post.gene_dataset.X, post.gene_dataset.X) # Sample scale example px_scales = post.scale_sampler( n_samples_per_cell=4, n_samples=None, selection=all_indices )["scale"] assert ( px_scales.shape[1] == dataset.nb_genes ), "posterior scales should have shape (n_samples, n_genes)" # Differential expression different models idx_1 = [1, 2, 3] idx_2 = [4, 5, 6, 7] de_dataframe = post.differential_expression_score( idx1=idx_1, idx2=idx_2, n_samples=10, mode="vanilla", use_permutation=True, M_permutation=100, ) de_dataframe = post.differential_expression_score( idx1=idx_1, idx2=idx_2, n_samples=10, mode="change", use_permutation=True, M_permutation=100, cred_interval_lvls=[0.5, 0.95], ) print(de_dataframe.keys()) assert ( de_dataframe["lfc_confidence_interval_0.5_min"] <= de_dataframe["lfc_confidence_interval_0.5_max"] ).all() assert ( de_dataframe["lfc_confidence_interval_0.95_min"] <= de_dataframe["lfc_confidence_interval_0.95_max"] ).all() # DE estimation example de_probabilities = de_dataframe.loc[:, "proba_de"] assert ((0.0 <= de_probabilities) & (de_probabilities <= 1.0)).all() # Test totalVI DE sp = os.path.join(save_path, "10X") dataset = Dataset10X(dataset_name="pbmc_10k_protein_v3", save_path=sp) n_cells = len(dataset) all_indices = np.arange(n_cells) vae = TOTALVI( dataset.nb_genes, len(dataset.protein_names), n_batch=dataset.n_batches ) trainer = TotalTrainer( vae, dataset, train_size=0.5, use_cuda=use_cuda, early_stopping_kwargs=None ) trainer.train(n_epochs=2) post = trainer.create_posterior( vae, dataset, shuffle=False, indices=all_indices, type_class=TotalPosterior ) # Differential expression different models idx_1 = [1, 2, 3] idx_2 = [4, 5, 6, 7] de_dataframe = post.differential_expression_score( idx1=idx_1, idx2=idx_2, n_samples=10, mode="vanilla", use_permutation=True, M_permutation=100, ) de_dataframe = post.differential_expression_score( idx1=idx_1, idx2=idx_2, n_samples=10, mode="change", use_permutation=True, M_permutation=100, )
def test_cortex(save_path): cortex_dataset = CortexDataset(save_path=save_path) vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches) trainer_cortex_vae = UnsupervisedTrainer(vae, cortex_dataset, train_size=0.5, use_cuda=use_cuda) trainer_cortex_vae.train(n_epochs=1) trainer_cortex_vae.train_set.ll() trainer_cortex_vae.train_set.differential_expression_stats() trainer_cortex_vae.corrupt_posteriors(corruption='binomial') trainer_cortex_vae.corrupt_posteriors() trainer_cortex_vae.train(n_epochs=1) trainer_cortex_vae.uncorrupt_posteriors() trainer_cortex_vae.train_set.imputation_benchmark(n_samples=1, show_plot=False, title_plot='imputation', save_path=save_path) svaec = SCANVI(cortex_dataset.nb_genes, cortex_dataset.n_batches, cortex_dataset.n_labels) trainer_cortex_svaec = JointSemiSupervisedTrainer( svaec, cortex_dataset, n_labelled_samples_per_class=3, use_cuda=use_cuda) trainer_cortex_svaec.train(n_epochs=1) trainer_cortex_svaec.labelled_set.accuracy() trainer_cortex_svaec.full_dataset.ll() svaec = SCANVI(cortex_dataset.nb_genes, cortex_dataset.n_batches, cortex_dataset.n_labels) trainer_cortex_svaec = AlternateSemiSupervisedTrainer( svaec, cortex_dataset, n_labelled_samples_per_class=3, use_cuda=use_cuda) trainer_cortex_svaec.train(n_epochs=1, lr=1e-2) trainer_cortex_svaec.unlabelled_set.accuracy() data_train, labels_train = trainer_cortex_svaec.labelled_set.raw_data() data_test, labels_test = trainer_cortex_svaec.unlabelled_set.raw_data() compute_accuracy_svc(data_train, labels_train, data_test, labels_test, param_grid=[{ 'C': [1], 'kernel': ['linear'] }]) compute_accuracy_rf(data_train, labels_train, data_test, labels_test, param_grid=[{ 'max_depth': [3], 'n_estimators': [10] }]) cls = Classifier(cortex_dataset.nb_genes, n_labels=cortex_dataset.n_labels) cls_trainer = ClassifierTrainer(cls, cortex_dataset) cls_trainer.train(n_epochs=1) cls_trainer.train_set.accuracy()
def test_fish_rna(): gene_dataset_fish = SmfishDataset() gene_dataset_seq = CortexDataset(genes_fish=gene_dataset_fish.gene_names, genes_to_keep=[], additional_genes=50) benchamrk_fish_scrna(gene_dataset_seq, gene_dataset_fish)
def test_fish_rna(save_path): gene_dataset_fish = SmfishDataset(save_path) gene_dataset_seq = CortexDataset(save_path=save_path, genes_fish=gene_dataset_fish.gene_names, genes_to_keep=[], additional_genes=50) benchmark_fish_scrna(gene_dataset_seq, gene_dataset_fish)
show_plot = True import numpy as np import pandas as pd from sklearn.manifold import TSNE import matplotlib.pyplot as plt from scvi.dataset import CortexDataset, RetinaDataset from scvi.models import * from scvi.inference import UnsupervisedTrainer import torch import ssl ssl._create_default_https_context = ssl._create_unverified_context gene_dataset = CortexDataset(save_path=save_path) n_epochs = 400 if n_epochs_all is None else n_epochs_all lr = 1e-3 use_batches = False use_cuda = True vae = VAE(gene_dataset.nb_genes, n_batch=gene_dataset.n_batches * use_batches) trainer = UnsupervisedTrainer(vae, gene_dataset, train_size=0.75, use_cuda=use_cuda, frequency=5, verbose=True) trainer.train(n_epochs=n_epochs, lr=lr)