예제 #1
0
    def test_concatenate_from_memory_to_memory(self):
        dset1, dset2 = copy.deepcopy(dataset1), copy.deepcopy(dataset2)
        union = UnionDataset(save_path=save_path,
                             low_memory=True,
                             ignore_batch_annotation=False)
        union.build_genemap(data_source="memory", gene_datasets=[dset1, dset2])
        union.join_datasets(data_source="memory",
                            data_target="memory",
                            gene_datasets=[dset1, dset2])

        expected_gene_names = np.sort(
            np.unique(
                np.concatenate([dataset1.gene_names, dataset2.gene_names])))
        expected_cell_types = np.sort(
            np.unique(
                np.concatenate([dataset1.cell_types, dataset2.cell_types])))
        expected_batch_indices = np.concatenate(
            [dataset1.batch_indices,
             dataset2.batch_indices + 5]).reshape(-1, 1)
        cell_types_1, cell_types_2 = dataset1.cell_types[
            dataset1.labels], dataset2.cell_types[dataset2.labels]
        expected_cell_types_rank = np.arange(len(expected_cell_types))
        expected_labels = np.concatenate([cell_types_1, cell_types_2])
        for rank, ct in zip(expected_cell_types_rank, expected_cell_types):
            expected_labels[expected_labels == ct] = rank
        expected_labels = expected_labels.astype(int)
        self.assertTrue((union.gene_names == expected_gene_names).all())
        self.assertTrue((union.cell_types == expected_cell_types).all())
        self.assertTrue((union.batch_indices == expected_batch_indices).all())
        self.assertTrue((union.labels == expected_labels).all())

        unsupervised_training_one_epoch(union)
예제 #2
0
    def test_build_gene_map_from_memory(self):
        dset1, dset2 = copy.deepcopy(dataset1), copy.deepcopy(dataset2)
        union = UnionDataset(save_path=save_path,
                             low_memory=True,
                             ignore_batch_annotation=False)
        union.build_genemap(data_source="memory", gene_datasets=[dset1, dset2])

        expected_map = pd.Series(np.arange(len(list("ABCDEF"))),
                                 index=list("ABCDEF"))
        self.assertEqual(union.gene_names.tolist(), list("ABCDEF"))
        self.assertTrue(
            (union.gene_map.index.values == expected_map.index.values).all())
        self.assertTrue((union.gene_map.values == expected_map.values).all())
예제 #3
0
    def test_concatenate_from_memory_to_loom(self):
        try:
            dset1, dset2 = copy.deepcopy(dataset1), copy.deepcopy(dataset2)

            union_mem = UnionDataset(save_path=save_path,
                                     low_memory=False,
                                     ignore_batch_annotation=False)
            union_mem.build_genemap(data_source="memory",
                                    gene_datasets=[dset1, dset2])
            union_mem.join_datasets(data_source="memory",
                                    data_target="memory",
                                    gene_datasets=[dset1, dset2])

            union = UnionDataset(save_path=save_path,
                                 low_memory=True,
                                 ignore_batch_annotation=False)
            union.build_genemap(data_source="memory",
                                gene_datasets=[dset1, dset2])
            union.join_datasets(data_source='memory',
                                data_target='loom',
                                gene_datasets=[dset1, dset2],
                                out_filename="test_concat.loom")

            self.assertTrue(len(union) == len(union_mem))

            random_indices = np.sort(
                np.random.choice(np.arange(len(union)),
                                 size=int(len(union) / 5),
                                 replace=False))

            self.assertTrue(
                (union.X[random_indices] == union_mem.X[random_indices]).all())

            self.assertTrue((union.gene_names == union_mem.gene_names).all())
            self.assertTrue((union.cell_types == union_mem.cell_types).all())
            self.assertTrue(
                (union.batch_indices == union_mem.batch_indices).all())
            self.assertTrue((union.labels == union_mem.labels).all())

            unsupervised_training_one_epoch(union)

        except Exception as e:
            if os.path.exists(os.path.join(save_path, "test_concat.loom")):
                os.remove(os.path.join(save_path, "test_concat.loom"))
            raise e
예제 #4
0
    def test_concatenate_from_scvi_to_loom(self):
        try:
            random_seed = 0
            dset1_args = {
                "batch_size": 10,
                "nb_genes": 4,
                "n_proteins": 4,
                "n_batches": 4,
                "n_labels": 3,
                "seed": random_seed
            }
            dset2_args = {
                "batch_size": 30,
                "nb_genes": 2,
                "n_proteins": 6,
                "n_batches": 2,
                "n_labels": 4,
                "seed": random_seed
            }
            dset1, dset2 = (SyntheticDataset(**dset1_args),
                            SyntheticDataset(**dset2_args))

            # Concatenate the datasets in memory first as reference
            union_from_mem_to_mem = UnionDataset(save_path=save_path,
                                                 low_memory=True,
                                                 ignore_batch_annotation=False)
            union_from_mem_to_mem.build_genemap(data_source="memory",
                                                gene_datasets=[dset1, dset2])
            union_from_mem_to_mem.join_datasets(data_source='memory',
                                                data_target='memory',
                                                gene_datasets=[dset1, dset2])

            union_from_mem_to_mem_perturb = UnionDataset(
                save_path=save_path,
                low_memory=True,
                ignore_batch_annotation=False)
            union_from_mem_to_mem_perturb.build_genemap(
                data_source="memory", gene_datasets=[dset1, dset2])
            union_from_mem_to_mem_perturb.join_datasets(
                data_source='memory',
                data_target='memory',
                gene_datasets=[dset2, dset1])

            # Load datasets from scvi and concatenate them in memory
            union_from_scvi_to_loom = UnionDataset(
                save_path=save_path,
                low_memory=True,
                ignore_batch_annotation=False)
            union_from_scvi_to_loom.build_genemap(data_source="memory",
                                                  gene_datasets=[dset1, dset2])
            union_from_scvi_to_loom.join_datasets(
                data_source='scvi',
                data_target='loom',
                dataset_classes=[SyntheticDataset, SyntheticDataset],
                dataset_args=[dset1_args, dset2_args],
                out_filename="test_concat.loom")

            self.assertTrue(
                len(union_from_scvi_to_loom) == (len(dset1) + len(dset2)))

            random_indices = np.sort(
                np.random.choice(np.arange(len(union_from_scvi_to_loom)),
                                 size=int(len(union_from_scvi_to_loom) / 5),
                                 replace=False))

            self.assertTrue(
                (union_from_scvi_to_loom.X[random_indices]
                 == union_from_mem_to_mem.X[random_indices].toarray()).all() or
                (union_from_scvi_to_loom.X[random_indices]
                 == union_from_mem_to_mem_perturb.X[random_indices].toarray()
                 ).all())

            self.assertTrue((union_from_scvi_to_loom.gene_names ==
                             union_from_mem_to_mem.gene_names).all())
            self.assertTrue((union_from_scvi_to_loom.cell_types ==
                             union_from_mem_to_mem.cell_types).all())
            self.assertTrue(
                (union_from_scvi_to_loom.batch_indices
                 == union_from_mem_to_mem.batch_indices).all()
                or (union_from_scvi_to_loom.batch_indices
                    == union_from_mem_to_mem_perturb.batch_indices).all())
            self.assertTrue((union_from_scvi_to_loom.labels
                             == union_from_mem_to_mem.labels).all()
                            or (union_from_scvi_to_loom.labels
                                == union_from_mem_to_mem_perturb.labels).all())

            unsupervised_training_one_epoch(union_from_scvi_to_loom)

        except Exception as e:
            if os.path.exists(os.path.join(save_path, "test_concat.loom")):
                os.remove(os.path.join(save_path, "test_concat.loom"))
            raise e
예제 #5
0
    def test_concatenate_from_loom_to_hdf5(self):
        try:
            dset1, dset2 = copy.deepcopy(dataset1), copy.deepcopy(dataset2)

            # Concatenate the datasets in memory first as reference
            union_from_mem_to_mem = UnionDataset(save_path=save_path,
                                                 low_memory=True,
                                                 ignore_batch_annotation=False)
            union_from_mem_to_mem.build_genemap(data_source="memory",
                                                gene_datasets=[dset1, dset2])
            union_from_mem_to_mem.join_datasets(data_source='memory',
                                                data_target='memory',
                                                gene_datasets=[dset1, dset2])

            # do the concatenation directly onto a loom file
            union_from_mem_to_loom = UnionDataset(
                save_path=save_path,
                low_memory=True,
                ignore_batch_annotation=False)
            union_from_mem_to_loom.build_genemap(data_source="memory",
                                                 gene_datasets=[dset1, dset2])
            union_from_mem_to_loom.join_datasets(
                data_source='memory',
                data_target='loom',
                gene_datasets=[dset1, dset2],
                out_filename="test_concat.loom")

            # convert the loom file to an hdf5 file
            union_from_loom_to_hdf5 = UnionDataset(
                save_path=save_path,
                low_memory=True,
                ignore_batch_annotation=False)
            union_from_loom_to_hdf5.build_genemap(data_source="memory",
                                                  gene_datasets=[dset1, dset2])
            union_from_loom_to_hdf5.join_datasets(
                data_source="loom",
                data_target="hdf5",
                in_filename="test_concat.loom",
                out_filename="test_concat.h5")

            self.assertTrue(
                len(union_from_loom_to_hdf5) == len(union_from_mem_to_mem))

            random_indices = np.sort(
                np.random.choice(np.arange(len(union_from_mem_to_mem)),
                                 size=int(len(union_from_mem_to_mem) / 5),
                                 replace=False))

            self.assertTrue(
                (union_from_loom_to_hdf5.X[random_indices] ==
                 union_from_mem_to_mem.X[random_indices].toarray()).all())

            self.assertTrue((union_from_loom_to_hdf5.gene_names ==
                             union_from_mem_to_mem.gene_names).all())
            self.assertTrue((union_from_loom_to_hdf5.cell_types ==
                             union_from_mem_to_mem.cell_types).all())
            self.assertTrue((union_from_loom_to_hdf5.batch_indices ==
                             union_from_mem_to_mem.batch_indices).all())
            self.assertTrue(
                (union_from_loom_to_hdf5.labels == union_from_mem_to_mem.labels
                 ).all())

            unsupervised_training_one_epoch(union_from_loom_to_hdf5)

        except Exception as e:
            if os.path.exists(os.path.join(save_path, "test_concat.loom")):
                os.remove(os.path.join(save_path, "test_concat.loom"))
            raise e