Example #1
0
def load_datasets(dataset_name, save_path='data/', url=None):
    if dataset_name == 'synthetic':
        gene_dataset = SyntheticDataset()
    elif dataset_name == 'cortex':
        gene_dataset = CortexDataset()
    elif dataset_name == 'brain_large':
        gene_dataset = BrainLargeDataset(save_path=save_path)
    elif dataset_name == 'retina':
        gene_dataset = RetinaDataset(save_path=save_path)
    elif dataset_name == 'cbmc':
        gene_dataset = CbmcDataset(save_path=save_path)
    elif dataset_name == 'brain_small':
        gene_dataset = BrainSmallDataset(save_path=save_path)
    elif dataset_name == 'hemato':
        gene_dataset = HematoDataset(save_path='data/HEMATO/')
    elif dataset_name == 'pbmc':
        gene_dataset = PbmcDataset(save_path=save_path)
    elif dataset_name[-5:] == ".loom":
        gene_dataset = LoomDataset(filename=dataset_name,
                                   save_path=save_path,
                                   url=url)
    elif dataset_name[-5:] == ".h5ad":
        gene_dataset = AnnDataset(dataset_name, save_path=save_path, url=url)
    elif ".csv" in dataset_name:
        gene_dataset = CsvDataset(dataset_name, save_path=save_path)
    else:
        raise "No such dataset available"
    return gene_dataset
Example #2
0
def load_datasets(dataset_name, save_path="data/", url=None):
    if dataset_name == "synthetic":
        gene_dataset = SyntheticDataset()
    elif dataset_name == "cortex":
        gene_dataset = CortexDataset()
    elif dataset_name == "brain_large":
        gene_dataset = BrainLargeDataset(save_path=save_path)
    elif dataset_name == "retina":
        gene_dataset = RetinaDataset(save_path=save_path)
    elif dataset_name == "cbmc":
        gene_dataset = CbmcDataset(save_path=save_path)
    elif dataset_name == "brain_small":
        gene_dataset = BrainSmallDataset(save_path=save_path)
    elif dataset_name == "hemato":
        gene_dataset = HematoDataset(save_path="data/HEMATO/")
    elif dataset_name == "pbmc":
        gene_dataset = PbmcDataset(save_path=save_path)
    elif dataset_name[-5:] == ".loom":
        gene_dataset = LoomDataset(filename=dataset_name, save_path=save_path, url=url)
    elif dataset_name[-5:] == ".h5ad":
        gene_dataset = AnnDataset(dataset_name, save_path=save_path, url=url)
    elif ".csv" in dataset_name:
        gene_dataset = CsvDataset(dataset_name, save_path=save_path)
    else:
        raise Exception("No such dataset available")
    return gene_dataset
Example #3
0
def read_Hemato(override=False, verbose=False):
    preprocessed_path = select_path(os.path.join(DATA_DIR,
                                                 'HEMATO_preprocessed'),
                                    create_new=True)

    if override:
        shutil.rmtree(preprocessed_path)
        os.mkdir(preprocessed_path)
    # ====== copy the dataset from scVI ====== #
    if not os.path.exists(os.path.join(preprocessed_path, 'X')):
        try:
            from scvi.dataset import HematoDataset
        except ImportError:
            raise RuntimeError("Require `scVI` package for HEMATO dataset")

        gene_dataset = HematoDataset(
            save_path=os.path.join(DOWNLOAD_DIR, 'HEMATO/'))

        X = gene_dataset._X
        gene_names = np.array(gene_dataset.gene_names)
        assert len(gene_names) == X.shape[1]

        y = gene_dataset.meta.values[:, 1:]
        label_names = np.array(gene_dataset.cell_types_levels)
        assert len(label_names) == y.shape[1]

        cell_names = np.array(['Cell#%d' % i for i in range(X.shape[0])])

        _save_data_to_path(preprocessed_path, X, y, gene_names, label_names,
                           cell_names, verbose)

        # create a binary classes for testing
        label_names = np.array(["Erythroblasts", "Granulocytes"])
        min_y = np.min(gene_dataset.labels)
        max_y = np.max(gene_dataset.labels)
        y_val = 2 * (gene_dataset.labels - min_y) / (max_y - min_y) - 1
        y_bin = np.argmax(
            np.hstack((
                gene_dataset.meta.iloc[:, 1].values[:, None],  # Er
                gene_dataset.meta.iloc[:, 2].values[:, None])),  # Gr
            axis=-1)
        with open(os.path.join(preprocessed_path, 'labels_name'), 'wb') as f:
            pickle.dump(label_names, f)
        with open(os.path.join(preprocessed_path, 'labels_bin'), 'wb') as f:
            pickle.dump(y_bin, f)
        with open(os.path.join(preprocessed_path, 'labels_val'), 'wb') as f:
            pickle.dump(y_val, f)
    # ====== read preprocessed data ====== #
    ds = Dataset(preprocessed_path, read_only=True)
    return ds
Example #4
0
 def test_populate(self):
     dataset = HematoDataset(save_path="tests/data/HEMATO")
     unsupervised_training_one_epoch(dataset)
Example #5
0
def test_hemato():
    hemato_dataset = HematoDataset(save_path='tests/data/HEMATO/')
    base_benchmark(hemato_dataset)
Example #6
0
def test_hemato(save_path):
    hemato_dataset = HematoDataset(
        save_path=os.path.join(save_path, 'HEMATO/'))
    base_benchmark(hemato_dataset)