Example #1
0
    def test_heterogeneous_collections(self):
        """Test detecton of heterogeneous collections.

        Check is done in __init__, so it is sufficient to initialize
        RNATables with an appropriate collection.
        """
        # Different processes
        data2 = MagicMock()
        data2.id = 12345
        data2.process.slug = "process-slug2"
        data2.output.__getitem__.side_effect = {"source": "ENSEMBL"}.__getitem__
        self.collection.data.filter = self.web_request([self.data, data2])

        with self.assertRaisesRegex(ValueError, r"Expressions of all samples.*"):
            RNATables(self.collection)

        # Different source
        data2 = MagicMock()
        data2.id = 12345
        data2.process.slug = "process-slug"
        data2.output.__getitem__.side_effect = {"source": "GENCODE"}.__getitem__
        self.collection.data.filter = self.web_request([self.data, data2])

        with self.assertRaisesRegex(ValueError, r"Alignment of all samples.*"):
            RNATables(self.collection)
Example #2
0
    def test_id_to_symbol(self, mapping_mock):
        mapping_mock.side_effect = self.web_request(self.gene_map)

        ct = RNATables(self.collection)
        with self.assertRaises(ValueError):
            mapping = ct.id_to_symbol

        ct = RNATables(self.collection)
        ct.gene_ids = ["ENSG001", "ENSG002", "ENSG003"]
        t = time()
        mapping = ct.id_to_symbol
        self.assertTrue(time() - t > 0.1)
        mapping_mock.assert_called_with(
            ["ENSG001", "ENSG002", "ENSG003"], "ENSEMBL", "H**o sapiens"
        )
        self.assertIs(mapping, self.gene_map)

        # test if use case works
        new_exp = self.expressions_df.rename(columns=ct.id_to_symbol)
        self.assertListEqual(new_exp.columns.tolist(), ["GA", "GB", "GC"])

        # use cache
        t = time()
        mapping = ct.id_to_symbol
        self.assertTrue(time() - t < 0.1)
        self.assertIs(mapping, self.gene_map)
Example #3
0
    def test_get_descriptors(self):
        ct = RNATables(self.collection)
        descriptors = ct._get_descriptors()

        expected = pd.DataFrame([1], columns=["PFS"], index=[123], dtype=float)
        expected.index.name = "sample_id"

        assert_frame_equal(descriptors, expected)
Example #4
0
    def test_get_relations(self):
        ct = RNATables(self.collection)
        relations = ct._get_relations()

        expected = pd.DataFrame(["L1"], columns=["Category"], index=[123])
        expected.index.name = "sample_id"

        assert_frame_equal(relations, expected)
Example #5
0
 def setUp(self):
     self.cache_dir = tempfile.mkdtemp()
     self.test_server_url = "https://app.genialis.com"
     self.test_collection_slug = "resdk-test-collection-tables"
     self.res = resdk.Resolwe(
         url=self.test_server_url, username="******", password="******"
     )
     self.collection = self.res.collection.get(self.test_collection_slug)
     self.ct = RNATables(self.collection, cache_dir=self.cache_dir)
Example #6
0
    def test_init(self, exists_mock):
        ct = RNATables(self.collection)

        self.assertIs(ct.collection, self.collection)
        self.assertEqual(ct.cache_dir, "/tmp/resdk/")
        exists_mock.assert_called_with("/tmp/resdk/")

        # using different cache dir
        ct = RNATables(self.collection, cache_dir="/tmp/cache_dir/")
        self.assertEqual(ct.cache_dir, "/tmp/cache_dir/")
        exists_mock.assert_called_with("/tmp/cache_dir/")
Example #7
0
    def test_get_orange_data(self):
        response = MagicMock()
        response.content = b"mS#Sample ID\tCol1\n123\t42"
        self.collection.resolwe.session.get.return_value = response
        self.collection.data.get = self.web_request(self.orange_data)

        ct = RNATables(self.collection)
        orange_data = ct._get_orange_data()

        expected = pd.DataFrame([42], columns=["Col1"], index=[123])
        expected.index.name = "sample_id"

        assert_frame_equal(orange_data, expected)
Example #8
0
    def test_get_data_uri(self):
        self.data.files.return_value = ["exp_file.csv"]

        ct = RNATables(self.collection)
        file_url = ct._get_data_uri(self.data, RNATables.EXP)
        self.assertEqual(file_url, "12345/exp_file.csv")

        self.data.files.return_value = []
        with self.assertRaises(LookupError):
            file_url = ct._get_data_uri(self.data, RNATables.EXP)

        self.data.files.return_value = ["exp_file1.csv", "exp_file2.csv"]
        with self.assertRaises(LookupError):
            file_url = ct._get_data_uri(self.data, RNATables.EXP)
Example #9
0
    def test_data_version(self):
        ct = RNATables(self.collection)
        version = ct._data_version
        self.assertEqual(version, str(hash(tuple([12345]))))

        # use cache
        t = time()
        version = ct._data_version
        self.assertTrue(time() - t < 0.1)

        self.collection.data.filter = MagicMock(return_value=[])
        ct = RNATables(self.collection)
        with self.assertRaises(ValueError):
            version = ct._data_version
Example #10
0
    def test_qc_version(self):
        self.collection.data.filter = self.web_request([self.data])

        ct = RNATables(self.collection)
        version = ct._qc_version
        self.assertEqual(version, str(hash(tuple([12345]))))

        # use cache
        t = time()
        version = ct._qc_version
        self.assertTrue(time() - t < 0.1)

        self.collection.data.filter = self.web_request([])
        ct1 = RNATables(self.collection)
        with self.assertRaises(ValueError):
            version = ct1._qc_version
Example #11
0
    def test_caching(self):
        # Call rc first time with self.ct to populate the cache
        t0 = time.time()
        rc1 = self.ct.rc
        t1 = time.time() - t0

        # Make sure that cache file is created
        cache_file = self.ct._cache_file(self.ct.RC)
        self.assertTrue(os.path.isfile(cache_file))

        # Make new table instance (to prevent loading from memory)
        ct2 = RNATables(self.collection, cache_dir=self.cache_dir)
        # Call rc second time, with it should load from disk cache
        t0 = time.time()
        rc2 = ct2.rc
        t2 = time.time() - t0
        self.assertTrue((rc1 == rc2).all(axis=None))
        self.assertTrue(t2 < t1)

        # Call rc second time with rc2 to test loading from memory
        t0 = time.time()
        rc3 = ct2.rc
        t3 = time.time() - t0
        self.assertTrue((rc2 == rc3).all(axis=None))
        self.assertTrue(t3 < t2)
Example #12
0
    def test_metadata_version(self):
        self.collection.samples.get = self.web_request(self.sample)
        self.collection.relations.get = self.web_request(self.relation)
        self.collection.data.get = self.web_request(self.orange_data)

        ct = RNATables(self.collection)
        version = ct._metadata_version
        self.assertEqual(version, "2020-11-01T12:15:00Z")

        # use cache
        t = time()
        version = ct._metadata_version
        self.assertTrue(time() - t < 0.1)

        self.collection.samples.get = MagicMock(side_effect=LookupError())
        ct1 = RNATables(self.collection)
        with self.assertRaises(ValueError):
            version = ct1._metadata_version
Example #13
0
    def test_download_metadata(self, orange_mock, relations_mock, descriptors_mock):
        descriptors_mock.return_value = self.metadata_df
        relations_mock.return_value = pd.DataFrame(
            [["A"]], index=[123], columns=["Replicate"]
        )
        orange_mock.return_value = pd.DataFrame(
            [["X"]], index=[123], columns=["Clinical"]
        )

        ct = RNATables(self.collection)
        meta = ct._download_metadata()

        expected_content = [[0, "A", "X"]]
        expected_columns = ["PFS", "Replicate", "Clinical"]
        expected_meta = pd.DataFrame(
            expected_content, columns=expected_columns, index=[123]
        )
        expected_meta.index.name = "sample_id"

        assert_frame_equal(meta, expected_meta)
Example #14
0
    def test_download_mapping(self):
        def create_feature(fid, name):
            m = MagicMock(feature_id=fid)
            # name can't be set on initialization
            m.name = name
            return m

        self.resolwe.feature.filter.return_value = [
            create_feature(fid, name) for fid, name in self.gene_map.items()
        ]

        ct = RNATables(self.collection)
        mapping = ct._download_mapping(
            ["ENSG001", "ENSG002", "ENSG003"], "ENSEMBL", "H**o sapiens"
        )

        self.resolwe.feature.filter.assert_called_once()
        self.resolwe.feature.filter.assert_called_once_with(
            source="ENSEMBL",
            species="H**o sapiens",
            feature_id__in=["ENSG001", "ENSG002", "ENSG003"],
        )
        self.assertDictEqual(mapping, self.gene_map)
Example #15
0
    def test_meta(self, load_mock):
        load_mock.side_effect = self.web_request(self.metadata_df)

        ct = RNATables(self.collection)
        t = time()
        meta = ct.meta
        self.assertTrue(time() - t > 0.1)
        self.assertIs(meta, self.metadata_df)
        load_mock.assert_called_with(RNATables.META)

        # use cache
        t = time()
        meta = ct.meta
        self.assertTrue(time() - t < 0.1)
        self.assertIs(meta, self.metadata_df)
Example #16
0
    def test_mapping(self, download_mock, save_mock, load_mock):
        load_mock.return_value = None
        download_mock.return_value = self.gene_map

        ct = RNATables(self.collection)
        mapping = ct._mapping(
            ["ENSG001", "ENSG002", "ENSG003"], "ENSEMBL", "H**o sapiens"
        )
        self.assertDictEqual(mapping, self.gene_map)
        self.assertListEqual(
            sorted(download_mock.call_args[0][0]), ["ENSG001", "ENSG002", "ENSG003"]
        )
        save_mock.assert_called_with(
            self.gene_map, "/tmp/resdk/ENSEMBL_Homo sapiens.pickle", override=True
        )

        # download only missing values
        download_mock.reset_mock()
        load_mock.return_value = {"ENSG002": "GB", "ENSG003": "GC"}
        mapping = ct._mapping(
            ["ENSG001", "ENSG002", "ENSG003"], "ENSEMBL", "H**o sapiens"
        )
        self.assertDictEqual(mapping, self.gene_map)
        self.assertListEqual(sorted(download_mock.call_args[0][0]), ["ENSG001"])
Example #17
0
    def test_rc(self, load_mock):
        load_mock.side_effect = self.web_request(self.expressions_df)

        ct = RNATables(self.collection)
        t = time()
        rc = ct.rc
        self.assertTrue(time() - t > 0.1)
        self.assertIs(rc, self.expressions_df)
        load_mock.assert_called_with(RNATables.RC)
        self.assertListEqual(ct.gene_ids, ["ENSG001", "ENSG002", "ENSG003"])

        # use cache
        t = time()
        rc = ct.rc
        self.assertTrue(time() - t < 0.1)
        self.assertIs(rc, self.expressions_df)
Example #18
0
    def test_qc(self, load_mock):
        qc_df = pd.DataFrame(
            [[None, 30], [12, 42]],
            index=["0", "1"],
            columns=["total_read_count_raw", "total_read_count_trimmed"],
        )
        load_mock.side_effect = self.web_request(qc_df)

        ct = RNATables(self.collection)
        t = time()
        meta = ct.meta
        self.assertTrue(time() - t > 0.1)
        self.assertIs(meta, qc_df)
        load_mock.assert_called_with(RNATables.META)

        # use cache
        t = time()
        meta = ct.meta
        self.assertTrue(time() - t < 0.1)
        self.assertIs(meta, qc_df)
Example #19
0
    def test_load_fetch(self, data_mock, meta_mock, save_mock, load_mock):
        data_mock.return_value = self.expressions_df
        meta_mock.return_value = self.metadata_df
        load_mock.return_value = None

        self.collection.samples.get = self.web_request(self.sample)
        self.collection.relations.get = self.web_request(self.relation)
        self.collection.data.get = self.web_request(self.orange_data)
        ct = RNATables(self.collection)
        data = ct._load_fetch(RNATables.META)
        self.assertIs(data, self.metadata_df)
        save_mock.assert_called_with(
            self.metadata_df,
            "/tmp/resdk/slug_meta_None_None_2020-11-01T12:15:00Z.pickle",
        )

        save_mock.reset_mock()
        data = ct._load_fetch(RNATables.EXP)
        self.assertIs(data, self.expressions_df)
        data_mock.assert_called_with(RNATables.EXP)
        save_mock.assert_called_with(
            self.expressions_df,
            f"/tmp/resdk/slug_exp_None_None_{str(hash((12345,)))}.pickle",
        )

        data_mock.reset_mock()
        save_mock.reset_mock()
        data = ct._load_fetch(RNATables.RC)
        self.assertIs(data, self.expressions_df)
        data_mock.assert_called_with(RNATables.RC)
        save_mock.assert_called_with(
            self.expressions_df,
            f"/tmp/resdk/slug_rc_None_None_{str(hash((12345,)))}.pickle",
        )

        data_mock.reset_mock()
        load_mock.return_value = self.expressions_df
        data = ct._load_fetch(RNATables.EXP)
        self.assertIs(data, self.expressions_df)
        data_mock.assert_not_called()
Example #20
0
 def test_clear_cache(self, clear_mock):
     RNATables.clear_cache()
     clear_mock.assert_called()
Example #21
0
class TestTables(BaseResdkFunctionalTest):
    def setUp(self):
        self.cache_dir = tempfile.mkdtemp()
        self.test_server_url = "https://app.genialis.com"
        self.test_collection_slug = "resdk-test-collection-tables"
        self.res = resdk.Resolwe(
            url=self.test_server_url, username="******", password="******"
        )
        self.collection = self.res.collection.get(self.test_collection_slug)
        self.ct = RNATables(self.collection, cache_dir=self.cache_dir)

    def tearDown(self):
        shutil.rmtree(self.cache_dir)

    def test_meta(self):
        self.assertEqual(self.ct.meta.shape, (8, 9))
        self.assertIn(39000, self.ct.meta.index)
        self.assertIn("general.species", self.ct.meta.columns)

    def test_qc(self):
        self.assertEqual(self.ct.qc.shape, (8, 12))
        self.assertIn(39000, self.ct.qc.index)
        self.assertIn("total_read_count_raw", self.ct.qc.columns)
        self.assertEqual(int(self.ct.qc.loc[39000, "total_read_count_raw"]), 42738650)

    def test_rc(self):
        self.assertEqual(self.ct.rc.shape, (8, 58487))
        self.assertIn(39000, self.ct.rc.index)
        self.assertIn("ENSG00000000003", self.ct.rc.columns)
        self.assertEqual(self.ct.rc.iloc[0, 0], 792)
        self.assertIsInstance(self.ct.rc.iloc[0, 0], np.int64)

    def test_exp(self):
        self.assertEqual(self.ct.exp.shape, (8, 58487))
        self.assertIn(39000, self.ct.exp.index)
        self.assertIn("ENSG00000000003", self.ct.exp.columns)
        self.assertAlmostEqual(self.ct.exp.iloc[0, 0], 19.447467, places=3)
        self.assertIsInstance(self.ct.exp.iloc[0, 0], np.float64)

    def test_consistent_index(self):
        self.assertTrue(all(self.ct.exp.index == self.ct.meta.index))
        self.assertTrue(all(self.ct.rc.index == self.ct.meta.index))

    def test_caching(self):
        # Call rc first time with self.ct to populate the cache
        t0 = time.time()
        rc1 = self.ct.rc
        t1 = time.time() - t0

        # Make sure that cache file is created
        cache_file = self.ct._cache_file(self.ct.RC)
        self.assertTrue(os.path.isfile(cache_file))

        # Make new table instance (to prevent loading from memory)
        ct2 = RNATables(self.collection, cache_dir=self.cache_dir)
        # Call rc second time, with it should load from disk cache
        t0 = time.time()
        rc2 = ct2.rc
        t2 = time.time() - t0
        self.assertTrue((rc1 == rc2).all(axis=None))
        self.assertTrue(t2 < t1)

        # Call rc second time with rc2 to test loading from memory
        t0 = time.time()
        rc3 = ct2.rc
        t3 = time.time() - t0
        self.assertTrue((rc2 == rc3).all(axis=None))
        self.assertTrue(t3 < t2)
Example #22
0
 def test_get_orange_object(self):
     # Orange Data is found ad-hoc
     self.collection.data.get = self.web_request(self.orange_data)
     ct = RNATables(self.collection)
     obj = ct._get_orange_object()
     self.assertEqual(obj, self.orange_data)