Ejemplo n.º 1
0
    def test_sparse_no_batch_correction(self):
        for flavor in ["seurat", "cell_ranger"]:
            dataset = BrainLargeDataset(
                save_path="tests/data",
                sample_size_gene_var=10,
                nb_genes_to_keep=128,
                max_cells_to_keep=256,
            )

            n_genes = dataset.nb_genes
            n_top = n_genes // 2
            dataset.subsample_genes(mode=flavor, new_n_genes=n_top, n_bins=3)
            assert dataset.nb_genes < n_genes
            # For some reason the new number of genes can be slightly different than n_top

            dataset.highly_variable_genes(flavor=flavor, n_bins=3)

        dataset = BrainLargeDataset(
            save_path="tests/data",
            sample_size_gene_var=10,
            nb_genes_to_keep=128,
            max_cells_to_keep=256,
        )
        n_genes = dataset.nb_genes
        dataset.subsample_genes()
        assert dataset.nb_genes < n_genes, "subsample_genes did not filter out genes"
Ejemplo n.º 2
0
def load_datasets(dataset_name, save_path="data/", url=None):
    if dataset_name == "synthetic":
        gene_dataset = SyntheticDataset()
    elif dataset_name == "cortex":
        gene_dataset = CortexDataset()
    elif dataset_name == "brain_large":
        gene_dataset = BrainLargeDataset(save_path=save_path)
    elif dataset_name == "retina":
        gene_dataset = RetinaDataset(save_path=save_path)
    elif dataset_name == "cbmc":
        gene_dataset = CbmcDataset(save_path=save_path)
    elif dataset_name == "brain_small":
        gene_dataset = BrainSmallDataset(save_path=save_path)
    elif dataset_name == "hemato":
        gene_dataset = HematoDataset(save_path="data/HEMATO/")
    elif dataset_name == "pbmc":
        gene_dataset = PbmcDataset(save_path=save_path)
    elif dataset_name[-5:] == ".loom":
        gene_dataset = LoomDataset(filename=dataset_name, save_path=save_path, url=url)
    elif dataset_name[-5:] == ".h5ad":
        gene_dataset = AnnDataset(dataset_name, save_path=save_path, url=url)
    elif ".csv" in dataset_name:
        gene_dataset = CsvDataset(dataset_name, save_path=save_path)
    else:
        raise Exception("No such dataset available")
    return gene_dataset
Ejemplo n.º 3
0
def load_datasets(dataset_name, save_path='data/', url=None):
    if dataset_name == 'synthetic':
        gene_dataset = SyntheticDataset()
    elif dataset_name == 'cortex':
        gene_dataset = CortexDataset()
    elif dataset_name == 'brain_large':
        gene_dataset = BrainLargeDataset(save_path=save_path)
    elif dataset_name == 'retina':
        gene_dataset = RetinaDataset(save_path=save_path)
    elif dataset_name == 'cbmc':
        gene_dataset = CbmcDataset(save_path=save_path)
    elif dataset_name == 'brain_small':
        gene_dataset = BrainSmallDataset(save_path=save_path)
    elif dataset_name == 'hemato':
        gene_dataset = HematoDataset(save_path='data/HEMATO/')
    elif dataset_name == 'pbmc':
        gene_dataset = PbmcDataset(save_path=save_path)
    elif dataset_name[-5:] == ".loom":
        gene_dataset = LoomDataset(filename=dataset_name,
                                   save_path=save_path,
                                   url=url)
    elif dataset_name[-5:] == ".h5ad":
        gene_dataset = AnnDataset(dataset_name, save_path=save_path, url=url)
    elif ".csv" in dataset_name:
        gene_dataset = CsvDataset(dataset_name, save_path=save_path)
    else:
        raise "No such dataset available"
    return gene_dataset
Ejemplo n.º 4
0
 def test_populate(self):
     dataset = BrainLargeDataset(
         save_path="tests/data",
         sample_size_gene_var=10,
         nb_genes_to_keep=10,
         max_cells_to_keep=128,
     )
     unsupervised_training_one_epoch(dataset)
Ejemplo n.º 5
0
def test_brain_large():
    brain_large_dataset = BrainLargeDataset(subsample_size=128, save_path='tests/data/')
    base_benchmark(brain_large_dataset)