def test_sparse_no_batch_correction(self): for flavor in ["seurat", "cell_ranger"]: dataset = BrainLargeDataset( save_path="tests/data", sample_size_gene_var=10, nb_genes_to_keep=128, max_cells_to_keep=256, ) n_genes = dataset.nb_genes n_top = n_genes // 2 dataset.subsample_genes(mode=flavor, new_n_genes=n_top, n_bins=3) assert dataset.nb_genes < n_genes # For some reason the new number of genes can be slightly different than n_top dataset.highly_variable_genes(flavor=flavor, n_bins=3) dataset = BrainLargeDataset( save_path="tests/data", sample_size_gene_var=10, nb_genes_to_keep=128, max_cells_to_keep=256, ) n_genes = dataset.nb_genes dataset.subsample_genes() assert dataset.nb_genes < n_genes, "subsample_genes did not filter out genes"
def load_datasets(dataset_name, save_path="data/", url=None): if dataset_name == "synthetic": gene_dataset = SyntheticDataset() elif dataset_name == "cortex": gene_dataset = CortexDataset() elif dataset_name == "brain_large": gene_dataset = BrainLargeDataset(save_path=save_path) elif dataset_name == "retina": gene_dataset = RetinaDataset(save_path=save_path) elif dataset_name == "cbmc": gene_dataset = CbmcDataset(save_path=save_path) elif dataset_name == "brain_small": gene_dataset = BrainSmallDataset(save_path=save_path) elif dataset_name == "hemato": gene_dataset = HematoDataset(save_path="data/HEMATO/") elif dataset_name == "pbmc": gene_dataset = PbmcDataset(save_path=save_path) elif dataset_name[-5:] == ".loom": gene_dataset = LoomDataset(filename=dataset_name, save_path=save_path, url=url) elif dataset_name[-5:] == ".h5ad": gene_dataset = AnnDataset(dataset_name, save_path=save_path, url=url) elif ".csv" in dataset_name: gene_dataset = CsvDataset(dataset_name, save_path=save_path) else: raise Exception("No such dataset available") return gene_dataset
def load_datasets(dataset_name, save_path='data/', url=None): if dataset_name == 'synthetic': gene_dataset = SyntheticDataset() elif dataset_name == 'cortex': gene_dataset = CortexDataset() elif dataset_name == 'brain_large': gene_dataset = BrainLargeDataset(save_path=save_path) elif dataset_name == 'retina': gene_dataset = RetinaDataset(save_path=save_path) elif dataset_name == 'cbmc': gene_dataset = CbmcDataset(save_path=save_path) elif dataset_name == 'brain_small': gene_dataset = BrainSmallDataset(save_path=save_path) elif dataset_name == 'hemato': gene_dataset = HematoDataset(save_path='data/HEMATO/') elif dataset_name == 'pbmc': gene_dataset = PbmcDataset(save_path=save_path) elif dataset_name[-5:] == ".loom": gene_dataset = LoomDataset(filename=dataset_name, save_path=save_path, url=url) elif dataset_name[-5:] == ".h5ad": gene_dataset = AnnDataset(dataset_name, save_path=save_path, url=url) elif ".csv" in dataset_name: gene_dataset = CsvDataset(dataset_name, save_path=save_path) else: raise "No such dataset available" return gene_dataset
def test_populate(self): dataset = BrainLargeDataset( save_path="tests/data", sample_size_gene_var=10, nb_genes_to_keep=10, max_cells_to_keep=128, ) unsupervised_training_one_epoch(dataset)
def test_brain_large(): brain_large_dataset = BrainLargeDataset(subsample_size=128, save_path='tests/data/') base_benchmark(brain_large_dataset)