def load_datasets(dataset_name, save_path='data/', url=None): if dataset_name == 'synthetic': gene_dataset = SyntheticDataset() elif dataset_name == 'cortex': gene_dataset = CortexDataset() elif dataset_name == 'brain_large': gene_dataset = BrainLargeDataset(save_path=save_path) elif dataset_name == 'retina': gene_dataset = RetinaDataset(save_path=save_path) elif dataset_name == 'cbmc': gene_dataset = CbmcDataset(save_path=save_path) elif dataset_name == 'brain_small': gene_dataset = BrainSmallDataset(save_path=save_path) elif dataset_name == 'hemato': gene_dataset = HematoDataset(save_path='data/HEMATO/') elif dataset_name == 'pbmc': gene_dataset = PbmcDataset(save_path=save_path) elif dataset_name[-5:] == ".loom": gene_dataset = LoomDataset(filename=dataset_name, save_path=save_path, url=url) elif dataset_name[-5:] == ".h5ad": gene_dataset = AnnDataset(dataset_name, save_path=save_path, url=url) elif ".csv" in dataset_name: gene_dataset = CsvDataset(dataset_name, save_path=save_path) else: raise "No such dataset available" return gene_dataset
def load_datasets(dataset_name, save_path="data/", url=None): if dataset_name == "synthetic": gene_dataset = SyntheticDataset() elif dataset_name == "cortex": gene_dataset = CortexDataset() elif dataset_name == "brain_large": gene_dataset = BrainLargeDataset(save_path=save_path) elif dataset_name == "retina": gene_dataset = RetinaDataset(save_path=save_path) elif dataset_name == "cbmc": gene_dataset = CbmcDataset(save_path=save_path) elif dataset_name == "brain_small": gene_dataset = BrainSmallDataset(save_path=save_path) elif dataset_name == "hemato": gene_dataset = HematoDataset(save_path="data/HEMATO/") elif dataset_name == "pbmc": gene_dataset = PbmcDataset(save_path=save_path) elif dataset_name[-5:] == ".loom": gene_dataset = LoomDataset(filename=dataset_name, save_path=save_path, url=url) elif dataset_name[-5:] == ".h5ad": gene_dataset = AnnDataset(dataset_name, save_path=save_path, url=url) elif ".csv" in dataset_name: gene_dataset = CsvDataset(dataset_name, save_path=save_path) else: raise Exception("No such dataset available") return gene_dataset
def test_retina(): retina_dataset = RetinaDataset(save_path='tests/data/') base_benchmark(retina_dataset)
def test_retina(save_path): retina_dataset = RetinaDataset(save_path=save_path) base_benchmark(retina_dataset)
def test_retina_load_train_one(self): dataset = RetinaDataset(save_path="tests/data") unsupervised_training_one_epoch(dataset)
datasets = { 'scvi_pbmc': PbmcDataset(), 'bermuda_pbmc': CsvDataset( str(DIRPATH / './pbmc/expression.csv'), labels_file = str(DIRPATH / './pbmc/labels.csv'), batch_ids_file = str(DIRPATH / './pbmc/batches.csv'), gene_by_cell = False ), 'mouse': CsvDataset( str(DIRPATH / './mouse_genes/ST1 - original_expression.csv'), labels_file = str(DIRPATH / './mouse_genes/labels.csv'), batch_ids_file = str(DIRPATH / './mouse_genes/batches.csv'), gene_by_cell = False ), #'pancreas': BermudaDataset('./pancreas/muraro_seurat.csv'), 'retina': RetinaDataset(), 'starmap': PreFrontalCortexStarmapDataset(), } parser = argparse.ArgumentParser(description="A way to define variables for \ training, tests and models") parser.add_argument('--metrics_dir', type=str, help="Path to save metrics file \ \nDisabled if save_metrics flag is False.\n\nDefault value \ is './'") parser.add_argument('--save_metrics', type=bool, help='Boolean flag determines \ whether metrics should be saved.\n\n Default value is True') parser.add_argument('--custom_config', type=str, help='The path to the \ configuration file to be used. If equal to None, then \ the config located in the same folder will be used.', default=None) args = parser.parse_args()
import os import numpy as np import pandas as pd from sklearn.manifold import TSNE import matplotlib.pyplot as plt from scvi.dataset import CortexDataset, RetinaDataset from scvi.models import * from scvi.inference import UnsupervisedTrainer import torch ## Correction for batch effects gene_dataset = RetinaDataset(save_path=save_path) n_epochs=50 if n_epochs_all is None else n_epochs_all lr=1e-3 use_batches=True use_cuda=True ### Train the model and output model likelihood every 5 epochs vae = VAE(gene_dataset.nb_genes, n_batch=gene_dataset.n_batches * use_batches) trainer = UnsupervisedTrainer(vae, gene_dataset, train_size=0.9, use_cuda=use_cuda, frequency=5) trainer.train(n_epochs=n_epochs, lr=lr) #%% # Plotting the likelihood change across the 50 epochs of training: blue for training error and orange for testing error.