def test_filter_and_concat_datasets(): cortex_dataset_1 = CortexDataset(save_path='tests/data/') cortex_dataset_1.subsample_genes(subset_genes=np.arange(0, 3)) cortex_dataset_1.filter_cell_types(["microglia", "oligodendrocytes"]) cortex_dataset_2 = CortexDataset(save_path='tests/data/') cortex_dataset_2.subsample_genes(subset_genes=np.arange(1, 4)) cortex_dataset_2.filter_cell_types(["endothelial-mural", "interneurons", "microglia", "oligodendrocytes"]) cortex_dataset_2.filter_cell_types([2, 0]) cortex_dataset_merged = GeneExpressionDataset.concat_datasets(cortex_dataset_1, cortex_dataset_2) assert cortex_dataset_merged.nb_genes == 2 synthetic_dataset_1 = SyntheticDataset(n_batches=2, n_labels=5) synthetic_dataset_2 = SyntheticDataset(n_batches=3, n_labels=3) synthetic_merged_1 = GeneExpressionDataset.concat_datasets(synthetic_dataset_1, synthetic_dataset_2) assert synthetic_merged_1.n_batches == 5 assert synthetic_merged_1.n_labels == 5 synthetic_merged_2 = GeneExpressionDataset.concat_datasets(synthetic_dataset_1, synthetic_dataset_2, shared_labels=False) assert synthetic_merged_2.n_batches == 5 assert synthetic_merged_2.n_labels == 8 synthetic_dataset_1.filter_cell_types([0, 1, 2, 3]) assert synthetic_dataset_1.n_labels == 4 synthetic_dataset_1.subsample_cells(50) assert len(synthetic_dataset_1) == 50 synthetic_dataset_3 = SyntheticDataset(n_labels=6) synthetic_dataset_3.cell_types = np.arange(6).astype(np.str) synthetic_dataset_3.map_cell_types({"2": "9", ("4", "3"): "8"})
def test_filter_and_concat_datasets(): cortex_dataset_1 = CortexDataset() cortex_dataset_1.subsample_genes(subset_genes=np.arange(0, 300)) cortex_dataset_1.filter_cell_types(["microglia", "oligodendrocytes"]) cortex_dataset_2 = CortexDataset() cortex_dataset_2.subsample_genes(subset_genes=np.arange(100, 400)) cortex_dataset_2.filter_cell_types( ["endothelial-mural", "interneurons", "microglia", "oligodendrocytes"]) cortex_dataset_2.filter_cell_types([2, 0]) cortex_dataset_merged = GeneExpressionDataset.concat_datasets( cortex_dataset_1, cortex_dataset_2) assert cortex_dataset_merged.nb_genes == 200 synthetic_dataset_1 = SyntheticDataset(n_batches=2, n_labels=5) synthetic_dataset_2 = SyntheticDataset(n_batches=3, n_labels=3) synthetic_merged_1 = GeneExpressionDataset.concat_datasets( synthetic_dataset_1, synthetic_dataset_2) assert synthetic_merged_1.n_batches == 5 assert synthetic_merged_1.n_labels == 5 synthetic_merged_2 = GeneExpressionDataset.concat_datasets( synthetic_dataset_1, synthetic_dataset_2, shared_labels=False) assert synthetic_merged_2.n_batches == 5 assert synthetic_merged_2.n_labels == 8 synthetic_dataset_1.filter_cell_types([0, 1, 2, 3]) assert synthetic_dataset_1.n_labels == 4 synthetic_dataset_1.subsample_cells(50) assert len(synthetic_dataset_1) == 50