def cortex_benchmark(n_epochs=250, use_cuda=True, save_path="data/", show_plot=True): cortex_dataset = CortexDataset(save_path=save_path) vae = VAE(cortex_dataset.nb_genes) trainer_cortex_vae = UnsupervisedTrainer(vae, cortex_dataset, use_cuda=use_cuda) trainer_cortex_vae.train(n_epochs=n_epochs) couple_celltypes = (4, 5) # the couple types on which to study DE cell_idx1 = cortex_dataset.labels.ravel() == couple_celltypes[0] cell_idx2 = cortex_dataset.labels.ravel() == couple_celltypes[1] trainer_cortex_vae.train_set.differential_expression_score( cell_idx1, cell_idx2, genes=["THY1", "MBP"]) trainer_cortex_vae.test_set.reconstruction_error() # assert ~ 1200 vae = VAE(cortex_dataset.nb_genes) trainer_cortex_vae = UnsupervisedTrainer(vae, cortex_dataset, use_cuda=use_cuda) trainer_cortex_vae.corrupt_posteriors() trainer_cortex_vae.train(n_epochs=n_epochs) trainer_cortex_vae.uncorrupt_posteriors() trainer_cortex_vae.train_set.imputation_benchmark(save_path=save_path, show_plot=show_plot) n_samples = 10 if n_epochs == 1 else None # n_epochs == 1 is unit tests trainer_cortex_vae.train_set.show_t_sne(n_samples=n_samples) return trainer_cortex_vae
def cortex_benchmark(n_epochs=250, use_cuda=True, save_path='data/', show_plot=True): cortex_dataset = CortexDataset(save_path=save_path) vae = VAE(cortex_dataset.nb_genes) trainer_cortex_vae = UnsupervisedTrainer(vae, cortex_dataset, use_cuda=use_cuda) trainer_cortex_vae.train(n_epochs=n_epochs) trainer_cortex_vae.train_set.differential_expression_score( 'oligodendrocytes', 'pyramidal CA1', genes=["THY1", "MBP"]) trainer_cortex_vae.test_set.ll() # assert ~ 1200 vae = VAE(cortex_dataset.nb_genes) trainer_cortex_vae = UnsupervisedTrainer(vae, cortex_dataset, use_cuda=use_cuda) trainer_cortex_vae.corrupt_posteriors() trainer_cortex_vae.train(n_epochs=n_epochs) trainer_cortex_vae.uncorrupt_posteriors() trainer_cortex_vae.train_set.imputation_benchmark(verbose=(n_epochs > 1), save_path=save_path, show_plot=show_plot) n_samples = 10 if n_epochs == 1 else None # n_epochs == 1 is unit tests trainer_cortex_vae.train_set.show_t_sne(n_samples=n_samples) return trainer_cortex_vae
def test_cortex(save_path): cortex_dataset = CortexDataset(save_path=save_path) vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches) trainer_cortex_vae = UnsupervisedTrainer(vae, cortex_dataset, train_size=0.5, use_cuda=use_cuda) trainer_cortex_vae.train(n_epochs=1) trainer_cortex_vae.train_set.ll() trainer_cortex_vae.train_set.differential_expression_stats() trainer_cortex_vae.corrupt_posteriors(corruption='binomial') trainer_cortex_vae.corrupt_posteriors() trainer_cortex_vae.train(n_epochs=1) trainer_cortex_vae.uncorrupt_posteriors() trainer_cortex_vae.train_set.imputation_benchmark(n_samples=1, show_plot=False, title_plot='imputation', save_path=save_path) svaec = SCANVI(cortex_dataset.nb_genes, cortex_dataset.n_batches, cortex_dataset.n_labels) trainer_cortex_svaec = JointSemiSupervisedTrainer(svaec, cortex_dataset, n_labelled_samples_per_class=3, use_cuda=use_cuda) trainer_cortex_svaec.train(n_epochs=1) trainer_cortex_svaec.labelled_set.accuracy() trainer_cortex_svaec.full_dataset.ll() svaec = SCANVI(cortex_dataset.nb_genes, cortex_dataset.n_batches, cortex_dataset.n_labels) trainer_cortex_svaec = AlternateSemiSupervisedTrainer(svaec, cortex_dataset, n_labelled_samples_per_class=3, use_cuda=use_cuda) trainer_cortex_svaec.train(n_epochs=1, lr=1e-2) trainer_cortex_svaec.unlabelled_set.accuracy() data_train, labels_train = trainer_cortex_svaec.labelled_set.raw_data() data_test, labels_test = trainer_cortex_svaec.unlabelled_set.raw_data() compute_accuracy_svc(data_train, labels_train, data_test, labels_test, param_grid=[{'C': [1], 'kernel': ['linear']}]) compute_accuracy_rf(data_train, labels_train, data_test, labels_test, param_grid=[{'max_depth': [3], 'n_estimators': [10]}]) cls = Classifier(cortex_dataset.nb_genes, n_labels=cortex_dataset.n_labels) cls_trainer = ClassifierTrainer(cls, cortex_dataset) cls_trainer.train(n_epochs=1) cls_trainer.train_set.accuracy()
def test_cortex(save_path): cortex_dataset = CortexDataset(save_path=save_path) vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches) trainer_cortex_vae = UnsupervisedTrainer( vae, cortex_dataset, train_size=0.5, use_cuda=use_cuda ) trainer_cortex_vae.train(n_epochs=1) trainer_cortex_vae.train_set.reconstruction_error() trainer_cortex_vae.train_set.differential_expression_stats() trainer_cortex_vae.train_set.generate_feature_correlation_matrix( n_samples=2, correlation_type="pearson" ) trainer_cortex_vae.train_set.generate_feature_correlation_matrix( n_samples=2, correlation_type="spearman" ) trainer_cortex_vae.train_set.imputation(n_samples=1) trainer_cortex_vae.test_set.imputation(n_samples=5) trainer_cortex_vae.corrupt_posteriors(corruption="binomial") trainer_cortex_vae.corrupt_posteriors() trainer_cortex_vae.train(n_epochs=1) trainer_cortex_vae.uncorrupt_posteriors() trainer_cortex_vae.train_set.imputation_benchmark( n_samples=1, show_plot=False, title_plot="imputation", save_path=save_path ) trainer_cortex_vae.train_set.generate_parameters() n_cells, n_genes = ( len(trainer_cortex_vae.train_set.indices), cortex_dataset.nb_genes, ) n_samples = 3 (dropout, means, dispersions,) = trainer_cortex_vae.train_set.generate_parameters() assert dropout.shape == (n_cells, n_genes) and means.shape == (n_cells, n_genes) assert dispersions.shape == (n_cells, n_genes) (dropout, means, dispersions,) = trainer_cortex_vae.train_set.generate_parameters( n_samples=n_samples ) assert dropout.shape == (n_samples, n_cells, n_genes) assert means.shape == (n_samples, n_cells, n_genes,) (dropout, means, dispersions,) = trainer_cortex_vae.train_set.generate_parameters( n_samples=n_samples, give_mean=True ) assert dropout.shape == (n_cells, n_genes) and means.shape == (n_cells, n_genes) full = trainer_cortex_vae.create_posterior( vae, cortex_dataset, indices=np.arange(len(cortex_dataset)) ) x_new, x_old = full.generate(n_samples=10) assert x_new.shape == (cortex_dataset.nb_cells, cortex_dataset.nb_genes, 10) assert x_old.shape == (cortex_dataset.nb_cells, cortex_dataset.nb_genes) trainer_cortex_vae.train_set.imputation_benchmark( n_samples=1, show_plot=False, title_plot="imputation", save_path=save_path ) svaec = SCANVI( cortex_dataset.nb_genes, cortex_dataset.n_batches, cortex_dataset.n_labels ) trainer_cortex_svaec = JointSemiSupervisedTrainer( svaec, cortex_dataset, n_labelled_samples_per_class=3, use_cuda=use_cuda ) trainer_cortex_svaec.train(n_epochs=1) trainer_cortex_svaec.labelled_set.accuracy() trainer_cortex_svaec.full_dataset.reconstruction_error() svaec = SCANVI( cortex_dataset.nb_genes, cortex_dataset.n_batches, cortex_dataset.n_labels ) trainer_cortex_svaec = AlternateSemiSupervisedTrainer( svaec, cortex_dataset, n_labelled_samples_per_class=3, use_cuda=use_cuda ) trainer_cortex_svaec.train(n_epochs=1, lr=1e-2) trainer_cortex_svaec.unlabelled_set.accuracy() data_train, labels_train = trainer_cortex_svaec.labelled_set.raw_data() data_test, labels_test = trainer_cortex_svaec.unlabelled_set.raw_data() compute_accuracy_svc( data_train, labels_train, data_test, labels_test, param_grid=[{"C": [1], "kernel": ["linear"]}], ) compute_accuracy_rf( data_train, labels_train, data_test, labels_test, param_grid=[{"max_depth": [3], "n_estimators": [10]}], ) cls = Classifier(cortex_dataset.nb_genes, n_labels=cortex_dataset.n_labels) cls_trainer = ClassifierTrainer(cls, cortex_dataset) cls_trainer.train(n_epochs=1) cls_trainer.train_set.accuracy()
print(input_gene_bc_mat.shape) row_attrs = {"Gene": gene_name[gene_filter]} col_attrs = {"CellID": cell_id} input_loom_name = FLAGS["loom"].rsplit("/", 1)[1] output_loom_name = input_loom_name.replace( ".loom", "_mc_{}_mce_{}.loom".format(min_expressed_cell, min_expressed_cell_average_expression)) filt_gene_loom_path = "{}/{}".format(output_dir, output_loom_name) loompy.create(filt_gene_loom_path, input_gene_bc_mat, row_attrs, col_attrs) loom_dataset = LoomDataset(filt_gene_loom_path, save_path="") vae = VAE(loom_dataset.nb_genes, n_batch=loom_dataset.n_batches) trainer = UnsupervisedTrainer(vae, loom_dataset) trainer.corrupt_posteriors() trainer.train(n_epochs=250, lr=0.001) trainer.uncorrupt_posteriors() full = trainer.create_posterior(trainer.model, loom_dataset, indices=np.arange(len(loom_dataset))) imputed_values = full.sequential().imputation() output_h5 = input_loom_name.replace( ".loom", "_scVI_mc_{}_mce_{}.hdf5".format(min_expressed_cell, min_expressed_cell_average_expression)) with h5py.File("{}/{}".format(output_dir, output_h5), "w") as f: f["cell_id"] = cell_id.astype(h5py.special_dtype(vlen=str)) f["gene_name"] = gene_name[gene_filter].astype( h5py.special_dtype(vlen=str)) if_dset_imputation = f.create_dataset("imputation", shape=(cell_id.size, gene_filter.sum()),