예제 #1
0
class DatasetCollectionApiTestCase(ApiTestCase):

    def setUp(self):
        super().setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_collection_populator = DatasetCollectionPopulator(self.galaxy_interactor)
        self.history_id = self.dataset_populator.new_history()

    def test_create_pair_from_history(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id,
            instance_type="history",
        )
        create_response = self._post("dataset_collections", payload)
        dataset_collection = self._check_create_response(create_response)
        returned_datasets = dataset_collection["elements"]
        assert len(returned_datasets) == 2, dataset_collection

    def test_create_list_from_history(self):
        element_identifiers = self.dataset_collection_populator.list_identifiers(self.history_id)

        payload = dict(
            instance_type="history",
            history_id=self.history_id,
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )

        create_response = self._post("dataset_collections", payload)
        dataset_collection = self._check_create_response(create_response)
        returned_datasets = dataset_collection["elements"]
        assert len(returned_datasets) == 3, dataset_collection

    def test_create_list_of_existing_pairs(self):
        pair_payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id,
            instance_type="history",
        )
        pair_create_response = self._post("dataset_collections", pair_payload)
        dataset_collection = self._check_create_response(pair_create_response)
        hdca_id = dataset_collection["id"]

        element_identifiers = [
            dict(name="test1", src="hdca", id=hdca_id)
        ]

        payload = dict(
            instance_type="history",
            history_id=self.history_id,
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )
        create_response = self._post("dataset_collections", payload)
        dataset_collection = self._check_create_response(create_response)
        returned_collections = dataset_collection["elements"]
        assert len(returned_collections) == 1, dataset_collection

    def test_create_list_of_new_pairs(self):
        identifiers = self.dataset_collection_populator.nested_collection_identifiers(self.history_id, "list:paired")
        payload = dict(
            collection_type="list:paired",
            instance_type="history",
            history_id=self.history_id,
            name="a nested collection",
            element_identifiers=json.dumps(identifiers),
        )
        create_response = self._post("dataset_collections", payload)
        dataset_collection = self._check_create_response(create_response)
        assert dataset_collection["collection_type"] == "list:paired"
        assert dataset_collection["name"] == "a nested collection"
        returned_collections = dataset_collection["elements"]
        assert len(returned_collections) == 1, dataset_collection
        pair_1_element = returned_collections[0]
        self._assert_has_keys(pair_1_element, "element_identifier", "element_index", "object")
        assert pair_1_element["element_identifier"] == "test_level_1", pair_1_element
        assert pair_1_element["element_index"] == 0, pair_1_element
        pair_1_object = pair_1_element["object"]
        self._assert_has_keys(pair_1_object, "collection_type", "elements", "element_count")
        self.assertEqual(pair_1_object["collection_type"], "paired")
        self.assertEqual(pair_1_object["populated"], True)
        pair_elements = pair_1_object["elements"]
        assert len(pair_elements) == 2
        pair_1_element_1 = pair_elements[0]
        assert pair_1_element_1["element_index"] == 0

    def test_list_download(self):
        fetch_response = self.dataset_collection_populator.create_list_in_history(self.history_id, direct_upload=True).json()
        dataset_collection = self.dataset_collection_populator.wait_for_fetched_collection(fetch_response)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 3, dataset_collection
        create_response = self._download_dataset_collection(history_id=self.history_id, hdca_id=dataset_collection['id'])
        self._assert_status_code_is(create_response, 200)
        archive = zipfile.ZipFile(BytesIO(create_response.content))
        namelist = archive.namelist()
        assert len(namelist) == 3, "Expected 3 elements in [%s]" % namelist
        collection_name = dataset_collection['name']
        for element, zip_path in zip(returned_dce, namelist):
            assert f"{collection_name}/{element['element_identifier']}.{element['object']['file_ext']}" == zip_path

    def test_pair_download(self):
        fetch_response = self.dataset_collection_populator.create_pair_in_history(self.history_id, direct_upload=True).json()
        dataset_collection = self.dataset_collection_populator.wait_for_fetched_collection(fetch_response)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 2, dataset_collection
        hdca_id = dataset_collection['id']
        create_response = self._download_dataset_collection(history_id=self.history_id, hdca_id=hdca_id)
        self._assert_status_code_is(create_response, 200)
        archive = zipfile.ZipFile(BytesIO(create_response.content))
        namelist = archive.namelist()
        assert len(namelist) == 2, "Expected 2 elements in [%s]" % namelist
        collection_name = dataset_collection['name']
        for element, zip_path in zip(returned_dce, namelist):
            assert "{}/{}.{}".format(collection_name, element['element_identifier'], element['object']['file_ext']) == zip_path

    def test_list_pair_download(self):
        fetch_response = self.dataset_collection_populator.create_list_of_pairs_in_history(self.history_id).json()
        dataset_collection = self.dataset_collection_populator.wait_for_fetched_collection(fetch_response)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 1, dataset_collection
        list_collection_name = dataset_collection['name']
        pair = returned_dce[0]
        create_response = self._download_dataset_collection(history_id=self.history_id, hdca_id=dataset_collection['id'])
        self._assert_status_code_is(create_response, 200)
        archive = zipfile.ZipFile(BytesIO(create_response.content))
        namelist = archive.namelist()
        assert len(namelist) == 2, "Expected 2 elements in [%s]" % namelist
        pair_collection_name = pair['element_identifier']
        for element, zip_path in zip(pair['object']['elements'], namelist):
            assert "{}/{}/{}.{}".format(list_collection_name, pair_collection_name, element['element_identifier'], element['object']['file_ext']) == zip_path

    def test_list_list_download(self):
        dataset_collection = self.dataset_collection_populator.create_list_of_list_in_history(self.history_id).json()
        self.dataset_collection_populator.wait_for_dataset_collection(dataset_collection, assert_ok=True)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 1, dataset_collection
        create_response = self._download_dataset_collection(history_id=self.history_id, hdca_id=dataset_collection['id'])
        self._assert_status_code_is(create_response, 200)
        archive = zipfile.ZipFile(BytesIO(create_response.content))
        namelist = archive.namelist()
        assert len(namelist) == 3, "Expected 3 elements in [%s]" % namelist

    def test_list_list_list_download(self):
        dataset_collection = self.dataset_collection_populator.create_list_of_list_in_history(self.history_id, collection_type='list:list:list').json()
        self.dataset_collection_populator.wait_for_dataset_collection(dataset_collection, assert_ok=True)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 1, dataset_collection
        create_response = self._download_dataset_collection(history_id=self.history_id, hdca_id=dataset_collection['id'])
        self._assert_status_code_is(create_response, 200)
        archive = zipfile.ZipFile(BytesIO(create_response.content))
        namelist = archive.namelist()
        assert len(namelist) == 3, "Expected 3 elements in [%s]" % namelist

    def test_hda_security(self):
        element_identifiers = self.dataset_collection_populator.pair_identifiers(self.history_id)
        self.dataset_populator.make_private(self.history_id, element_identifiers[0]["id"])
        with self._different_user():
            history_id = self.dataset_populator.new_history()
            payload = dict(
                instance_type="history",
                history_id=history_id,
                element_identifiers=json.dumps(element_identifiers),
                collection_type="paired",
            )
            create_response = self._post("dataset_collections", payload)
            self._assert_status_code_is(create_response, 403)

    def test_enforces_unique_names(self):
        element_identifiers = self.dataset_collection_populator.list_identifiers(self.history_id)
        element_identifiers[2]["name"] = element_identifiers[0]["name"]
        payload = dict(
            instance_type="history",
            history_id=self.history_id,
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )

        create_response = self._post("dataset_collections", payload)
        self._assert_status_code_is(create_response, 400)

    def test_upload_collection(self):
        elements = [{"src": "files", "dbkey": "hg19", "info": "my cool bed", "tags": ["name:data1", "group:condition:treated", "machine:illumina"]}]
        targets = [{
            "destination": {"type": "hdca"},
            "elements": elements,
            "collection_type": "list",
            "name": "Test upload",
            "tags": ["name:collection1"]
        }]
        payload = {
            "history_id": self.history_id,
            "targets": json.dumps(targets),
            "__files": {"files_0|file_data": open(self.test_data_resolver.get_filename("4.bed"))},
        }
        self.dataset_populator.fetch(payload)
        hdca = self._assert_one_collection_created_in_history()
        self.assertEqual(hdca["name"], "Test upload")
        hdca_tags = hdca["tags"]
        assert len(hdca_tags) == 1
        assert "name:collection1" in hdca_tags
        assert len(hdca["elements"]) == 1, hdca
        element0 = hdca["elements"][0]
        assert element0["element_identifier"] == "4.bed"
        dataset0 = element0["object"]
        assert dataset0["file_size"] == 61
        dataset_tags = dataset0["tags"]
        assert len(dataset_tags) == 3, dataset0

    def test_upload_nested(self):
        elements = [{"name": "samp1", "elements": [{"src": "files", "dbkey": "hg19", "info": "my cool bed"}]}]
        targets = [{
            "destination": {"type": "hdca"},
            "elements": elements,
            "collection_type": "list:list",
            "name": "Test upload",
        }]
        payload = {
            "history_id": self.history_id,
            "targets": json.dumps(targets),
            "__files": {"files_0|file_data": open(self.test_data_resolver.get_filename("4.bed"))},
        }
        self.dataset_populator.fetch(payload)
        hdca = self._assert_one_collection_created_in_history()
        self.assertEqual(hdca["name"], "Test upload")
        assert len(hdca["elements"]) == 1, hdca
        element0 = hdca["elements"][0]
        assert element0["element_identifier"] == "samp1"

    @skip_if_github_down
    def test_upload_collection_from_url(self):
        elements = [{"src": "url", "url": "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/4.bed", "info": "my cool bed"}]
        targets = [{
            "destination": {"type": "hdca"},
            "elements": elements,
            "collection_type": "list",
        }]
        payload = {
            "history_id": self.history_id,
            "targets": json.dumps(targets),
            "__files": {"files_0|file_data": open(self.test_data_resolver.get_filename("4.bed"))},
        }
        self.dataset_populator.fetch(payload)
        hdca = self._assert_one_collection_created_in_history()
        assert len(hdca["elements"]) == 1, hdca
        element0 = hdca["elements"][0]
        assert element0["element_identifier"] == "4.bed"
        assert element0["object"]["file_size"] == 61

    @skip_if_github_down
    def test_upload_collection_failed_expansion_url(self):
        targets = [{
            "destination": {"type": "hdca"},
            "elements_from": "bagit",
            "collection_type": "list",
            "src": "url",
            "url": "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/4.bed",
        }]
        payload = {
            "history_id": self.history_id,
            "targets": json.dumps(targets),
            "__files": {"files_0|file_data": open(self.test_data_resolver.get_filename("4.bed"))},
        }
        self.dataset_populator.fetch(payload, assert_ok=False, wait=True)
        hdca = self._assert_one_collection_created_in_history()
        assert hdca["populated"] is False
        assert "bagit.txt" in hdca["populated_state_message"], hdca

    def _assert_one_collection_created_in_history(self):
        contents_response = self._get("histories/%s/contents/dataset_collections" % self.history_id)
        self._assert_status_code_is(contents_response, 200)
        contents = contents_response.json()
        assert len(contents) == 1
        hdca = contents[0]
        assert hdca["history_content_type"] == "dataset_collection"
        hdca_id = hdca["id"]
        collection_response = self._get(f"histories/{self.history_id}/contents/dataset_collections/{hdca_id}")
        self._assert_status_code_is(collection_response, 200)
        return collection_response.json()

    def _check_create_response(self, create_response):
        self._assert_status_code_is(create_response, 200)
        dataset_collection = create_response.json()
        self._assert_has_keys(dataset_collection, "elements", "url", "name", "collection_type", "element_count")
        return dataset_collection

    def _download_dataset_collection(self, history_id, hdca_id):
        return self._get(f"histories/{history_id}/contents/dataset_collections/{hdca_id}/download")

    def test_collection_contents_security(self):
        # request contents on an hdca that doesn't belong to user
        hdca, contents_url = self._create_collection_contents_pair()
        with self._different_user():
            contents_response = self._get(contents_url)
            self._assert_status_code_is(contents_response, 403)

    def test_collection_contents_invalid_collection(self):
        # request an invalid collection from a valid hdca, should get 404
        hdca, contents_url = self._create_collection_contents_pair()
        response = self._get(contents_url)
        self._assert_status_code_is(response, 200)
        fake_collection_id = '5d7db0757a2eb7ef'
        fake_contents_url = '/api/dataset_collections/{}/contents/{}'.format(hdca['id'], fake_collection_id)
        error_response = self._get(fake_contents_url)
        assert_object_id_error(error_response)

    def test_show_dataset_collection_contents(self):
        # Get contents_url from history contents, use it to show the first level
        # of collection contents in the created HDCA, then use it again to drill
        # down into the nested collection contents
        hdca = self.dataset_collection_populator.create_list_of_list_in_history(self.history_id).json()
        root_contents_url = self._get_contents_url_for_hdca(hdca)

        # check root contents for this collection
        root_contents = self._get(root_contents_url).json()
        assert len(root_contents) == len(hdca['elements'])
        self._compare_collection_contents_elements(root_contents, hdca['elements'])

        # drill down, retrieve nested collection contents
        assert 'object' in root_contents[0]
        assert 'contents_url' in root_contents[0]['object']
        drill_contents_url = root_contents[0]['object']['contents_url']
        drill_contents = self._get(drill_contents_url).json()
        assert len(drill_contents) == len(hdca['elements'][0]['object']['elements'])
        self._compare_collection_contents_elements(drill_contents, hdca['elements'][0]['object']['elements'])

    def test_collection_contents_limit_offset(self):
        # check limit/offset params for collection contents endpoint
        hdca, root_contents_url = self._create_collection_contents_pair()

        # check limit
        limited_contents = self._get(root_contents_url + '?limit=1').json()
        assert len(limited_contents) == 1
        assert limited_contents[0]['element_index'] == 0

        # check offset
        offset_contents = self._get(root_contents_url + '?offset=1').json()
        assert len(offset_contents) == 1
        assert offset_contents[0]['element_index'] == 1

    def _compare_collection_contents_elements(self, contents_elements, hdca_elements):
        # compare collection api results to existing hdca element contents
        fields = ['element_identifier', 'element_index', 'element_type', 'id', 'model_class']
        for (content_element, hdca_element) in zip(contents_elements, hdca_elements):
            for f in fields:
                assert content_element[f] == hdca_element[f]

    def _create_collection_contents_pair(self):
        # Create a simple collection, return hdca and contents_url
        payload = self.dataset_collection_populator.create_pair_payload(self.history_id, instance_type="history")
        create_response = self._post("dataset_collections", payload)
        hdca = self._check_create_response(create_response)
        root_contents_url = self._get_contents_url_for_hdca(hdca)
        return hdca, root_contents_url

    def _get_contents_url_for_hdca(self, hdca):
        # look up the history contents using optional serialization key
        history_contents_url = "histories/%s/contents?v=dev&view=summary&keys=contents_url" % (self.history_id)
        json = self._get(history_contents_url).json()

        # filter out the collection we just made id = hdca.id
        # make sure the contents_url appears
        def find_hdca(c):
            return c['history_content_type'] == 'dataset_collection' and c['id'] == hdca['id']

        matches = list(filter(find_hdca, json))
        assert len(matches) == 1
        assert 'contents_url' in matches[0]

        return matches[0]['contents_url']
예제 #2
0
class HistoryContentsApiTestCase(ApiTestCase):
    def setUp(self):
        super().setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_collection_populator = DatasetCollectionPopulator(
            self.galaxy_interactor)
        self.library_populator = LibraryPopulator(self.galaxy_interactor)
        self.history_id = self.dataset_populator.new_history()

    def test_index_hda_summary(self):
        hda1 = self.dataset_populator.new_dataset(self.history_id)
        contents_response = self._get("histories/%s/contents" %
                                      self.history_id)
        hda_summary = self.__check_for_hda(contents_response, hda1)
        assert "display_types" not in hda_summary  # Quick summary, not full details

    def test_make_private_and_public(self):
        hda1 = self._wait_for_new_hda()
        update_url = "histories/{}/contents/{}/permissions".format(
            self.history_id, hda1["id"])

        role_id = self.dataset_populator.user_private_role_id()
        # Give manage permission to the user.
        payload = {
            "access": [],
            "manage": [role_id],
        }
        update_response = self._update_permissions(update_url,
                                                   payload,
                                                   admin=True)
        self._assert_status_code_is(update_response, 200)
        self._assert_other_user_can_access(hda1["id"])
        # Then we restrict access.
        payload = {
            "action": "make_private",
        }
        update_response = self._update_permissions(update_url, payload)
        self._assert_status_code_is(update_response, 200)
        self._assert_other_user_cannot_access(hda1["id"])

        # Then we restrict access.
        payload = {
            "action": "remove_restrictions",
        }
        update_response = self._update_permissions(update_url, payload)
        self._assert_status_code_is(update_response, 200)
        self._assert_other_user_can_access(hda1["id"])

    def test_set_permissions_add_admin_history_contents(self):
        self._verify_dataset_permissions("history_contents")

    def test_set_permissions_add_admin_datasets(self):
        self._verify_dataset_permissions("dataset")

    def _verify_dataset_permissions(self, api_endpoint):
        hda1 = self._wait_for_new_hda()
        hda_id = hda1["id"]
        if api_endpoint == "history_contents":
            update_url = f"histories/{self.history_id}/contents/{hda_id}/permissions"
        else:
            update_url = "datasets/%s/permissions" % hda_id

        role_id = self.dataset_populator.user_private_role_id()

        payload = {
            "access": [role_id],
            "manage": [role_id],
        }

        # Other users cannot modify permissions.
        with self._different_user():
            update_response = self._update_permissions(update_url, payload)
            self._assert_status_code_is(update_response, 403)

        # First the details render for another user.
        self._assert_other_user_can_access(hda_id)

        # Then we restrict access.
        update_response = self._update_permissions(update_url,
                                                   payload,
                                                   admin=True)
        self._assert_status_code_is(update_response, 200)

        # Finally the details don't render.
        self._assert_other_user_cannot_access(hda_id)

        # But they do for the original user.
        contents_response = self._get(
            f"histories/{self.history_id}/contents/{hda_id}").json()
        assert "name" in contents_response

        update_response = self._update_permissions(update_url, payload)
        self._assert_status_code_is(update_response, 200)

        payload = {
            "access": [role_id],
            "manage": [role_id],
        }
        update_response = self._update_permissions(update_url, payload)
        self._assert_status_code_is(update_response, 200)
        self._assert_other_user_cannot_access(hda_id)

        user_id = self.dataset_populator.user_id()
        with self._different_user():
            different_user_id = self.dataset_populator.user_id()
        combined_user_role = self.dataset_populator.create_role(
            [user_id, different_user_id],
            description="role for testing permissions")

        payload = {
            "access": [combined_user_role["id"]],
            "manage": [role_id],
        }
        update_response = self._update_permissions(update_url, payload)
        self._assert_status_code_is(update_response, 200)
        # Now other user can see dataset again with access permission.
        self._assert_other_user_can_access(hda_id)
        # access doesn't imply management though...
        with self._different_user():
            update_response = self._update_permissions(update_url, payload)
            self._assert_status_code_is(update_response, 403)

    def _assert_other_user_cannot_access(self, history_content_id):
        with self._different_user():
            contents_response = self.dataset_populator.get_history_dataset_details_raw(
                history_id=self.history_id, dataset_id=history_content_id)
            assert contents_response.status_code == 403

    def _assert_other_user_can_access(self, history_content_id):
        with self._different_user():
            contents_response = self.dataset_populator.get_history_dataset_details_raw(
                history_id=self.history_id, dataset_id=history_content_id)
            contents_response.raise_for_status()
            assert "name" in contents_response.json()

    def test_index_hda_all_details(self):
        hda1 = self.dataset_populator.new_dataset(self.history_id)
        contents_response = self._get("histories/%s/contents?details=all" %
                                      self.history_id)
        hda_details = self.__check_for_hda(contents_response, hda1)
        self.__assert_hda_has_full_details(hda_details)

    def test_index_hda_detail_by_id(self):
        hda1 = self.dataset_populator.new_dataset(self.history_id)
        contents_response = self._get(
            "histories/{}/contents?details={}".format(self.history_id,
                                                      hda1["id"]))
        hda_details = self.__check_for_hda(contents_response, hda1)
        self.__assert_hda_has_full_details(hda_details)

    def test_show_hda(self):
        hda1 = self.dataset_populator.new_dataset(self.history_id)
        show_response = self.__show(hda1)
        self._assert_status_code_is(show_response, 200)
        self.__assert_matches_hda(hda1, show_response.json())

    def test_hda_copy(self):
        hda1 = self.dataset_populator.new_dataset(self.history_id)
        create_data = dict(
            source='hda',
            content=hda1["id"],
        )
        second_history_id = self.dataset_populator.new_history()
        assert self.__count_contents(second_history_id) == 0
        create_response = self._post(
            "histories/%s/contents" % second_history_id, create_data)
        self._assert_status_code_is(create_response, 200)
        assert self.__count_contents(second_history_id) == 1

    def test_library_copy(self):
        ld = self.library_populator.new_library_dataset("lda_test_library")
        create_data = dict(
            source='library',
            content=ld["id"],
        )
        assert self.__count_contents(self.history_id) == 0
        create_response = self._post("histories/%s/contents" % self.history_id,
                                     create_data)
        self._assert_status_code_is(create_response, 200)
        assert self.__count_contents(self.history_id) == 1

    def test_update(self):
        hda1 = self._wait_for_new_hda()
        assert str(hda1["deleted"]).lower() == "false"
        update_response = self._raw_update(hda1["id"], dict(deleted=True))
        self._assert_status_code_is(update_response, 200)
        show_response = self.__show(hda1)
        assert str(show_response.json()["deleted"]).lower() == "true"

        update_response = self._raw_update(hda1["id"],
                                           dict(name="Updated Name"))
        assert self.__show(hda1).json()["name"] == "Updated Name"

        update_response = self._raw_update(hda1["id"],
                                           dict(name="Updated Name"))
        assert self.__show(hda1).json()["name"] == "Updated Name"

        unicode_name = 'ржевский сапоги'
        update_response = self._raw_update(hda1["id"], dict(name=unicode_name))
        updated_hda = self.__show(hda1).json()
        assert updated_hda["name"] == unicode_name, updated_hda

        quoted_name = '"Mooo"'
        update_response = self._raw_update(hda1["id"], dict(name=quoted_name))
        updated_hda = self.__show(hda1).json()
        assert updated_hda["name"] == quoted_name, quoted_name

        data = {
            "dataset_id": hda1["id"],
            "name": "moocow",
            "dbkey": "?",
            "annotation": None,
            "info": "my info is",
            "operation": "attributes"
        }
        update_response = self._set_edit_update(data)
        # No key or anything supplied, expect a permission problem.
        # A bit questionable but I think this is a 400 instead of a 403 so that
        # we don't distinguish between this is a valid ID you don't have access to
        # and this is an invalid ID.
        assert update_response.status_code == 400, update_response.content

    def test_update_batch(self):
        hda1 = self._wait_for_new_hda()
        assert str(hda1["deleted"]).lower() == "false"
        payload = dict(items=[{
            "history_content_type": "dataset",
            "id": hda1["id"]
        }],
                       deleted=True)
        update_response = self._raw_update_batch(payload)
        objects = update_response.json()
        assert objects[0]["deleted"]

    def test_update_type_failures(self):
        hda1 = self._wait_for_new_hda()
        update_response = self._raw_update(hda1["id"],
                                           dict(deleted='not valid'))
        self._assert_status_code_is(update_response, 400)

    def _wait_for_new_hda(self):
        hda1 = self.dataset_populator.new_dataset(self.history_id)
        self.dataset_populator.wait_for_history(self.history_id)
        return hda1

    def _set_edit_update(self, json):
        set_edit_url = "%s/dataset/set_edit" % self.url
        update_response = put(set_edit_url, json=json)
        return update_response

    def _raw_update(self, item_id, data, admin=False, history_id=None):
        history_id = history_id or self.history_id
        key_param = "use_admin_key" if admin else "use_key"
        update_url = self._api_url(
            f"histories/{history_id}/contents/{item_id}", **{key_param: True})
        update_response = put(update_url, json=data)
        return update_response

    def _update_permissions(self, url, data, admin=False):
        key_param = "use_admin_key" if admin else "use_key"
        update_url = self._api_url(url, **{key_param: True})
        update_response = put(update_url, json=data)
        return update_response

    def _raw_update_batch(self, data):
        update_url = self._api_url("histories/%s/contents" % (self.history_id),
                                   use_key=True)
        update_response = put(update_url, json=data)
        return update_response

    def test_delete(self):
        hda1 = self.dataset_populator.new_dataset(self.history_id)
        self.dataset_populator.wait_for_history(self.history_id)
        assert str(self.__show(hda1).json()["deleted"]).lower() == "false"
        delete_response = self._delete("histories/{}/contents/{}".format(
            self.history_id, hda1["id"]))
        assert delete_response.status_code < 300  # Something in the 200s :).
        assert str(self.__show(hda1).json()["deleted"]).lower() == "true"

    def test_purge(self):
        hda1 = self.dataset_populator.new_dataset(self.history_id)
        self.dataset_populator.wait_for_history(self.history_id)
        assert str(self.__show(hda1).json()["deleted"]).lower() == "false"
        assert str(self.__show(hda1).json()["purged"]).lower() == "false"
        data = {'purge': True}
        delete_response = self._delete("histories/{}/contents/{}".format(
            self.history_id, hda1["id"]),
                                       data=data)
        assert delete_response.status_code < 300  # Something in the 200s :).
        assert str(self.__show(hda1).json()["deleted"]).lower() == "true"
        assert str(self.__show(hda1).json()["purged"]).lower() == "true"

    def test_dataset_collection_creation_on_contents(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id, type="dataset_collection")
        endpoint = "histories/%s/contents" % self.history_id
        self._check_pair_creation(endpoint, payload)

    def test_dataset_collection_creation_on_typed_contents(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id, )
        endpoint = "histories/%s/contents/dataset_collections" % self.history_id
        self._check_pair_creation(endpoint, payload)

    def test_dataset_collection_create_from_exisiting_datasets_with_new_tags(
            self):
        with self.dataset_populator.test_history() as history_id:
            hda_id = self.dataset_populator.new_dataset(history_id,
                                                        content="1 2 3")['id']
            hda2_id = self.dataset_populator.new_dataset(history_id,
                                                         content="1 2 3")['id']
            update_response = self._raw_update(hda2_id,
                                               dict(tags=['existing:tag']),
                                               history_id=history_id).json()
            assert update_response['tags'] == ['existing:tag']
            creation_payload = {
                'collection_type':
                'list',
                'history_id':
                history_id,
                'element_identifiers':
                json.dumps([{
                    'id': hda_id,
                    'src': 'hda',
                    'name': 'element_id1',
                    'tags': ['my_new_tag']
                }, {
                    'id': hda2_id,
                    'src': 'hda',
                    'name': 'element_id2',
                    'tags': ['another_new_tag']
                }]),
                'type':
                'dataset_collection',
                'copy_elements':
                True
            }
            r = self._post("histories/%s/contents" % self.history_id,
                           creation_payload).json()
            assert r['elements'][0]['object'][
                'id'] != hda_id, "HDA has not been copied"
            assert len(r['elements'][0]['object']['tags']) == 1
            assert r['elements'][0]['object']['tags'][0] == 'my_new_tag'
            assert len(r['elements'][1]['object']
                       ['tags']) == 2, r['elements'][1]['object']['tags']
            original_hda = self.dataset_populator.get_history_dataset_details(
                history_id=history_id, dataset_id=hda_id)
            assert len(original_hda['tags']) == 0, original_hda['tags']

    def _check_pair_creation(self, endpoint, payload):
        pre_collection_count = self.__count_contents(type="dataset_collection")
        pre_dataset_count = self.__count_contents(type="dataset")
        pre_combined_count = self.__count_contents(
            type="dataset,dataset_collection")

        dataset_collection_response = self._post(endpoint, payload)

        dataset_collection = self.__check_create_collection_response(
            dataset_collection_response)

        post_collection_count = self.__count_contents(
            type="dataset_collection")
        post_dataset_count = self.__count_contents(type="dataset")
        post_combined_count = self.__count_contents(
            type="dataset,dataset_collection")

        # Test filtering types with index.
        assert pre_collection_count == 0
        assert post_collection_count == 1
        assert post_combined_count == pre_dataset_count + 1
        assert post_combined_count == pre_combined_count + 1
        assert pre_dataset_count == post_dataset_count

        # Test show dataset colleciton.
        collection_url = "histories/{}/contents/dataset_collections/{}".format(
            self.history_id, dataset_collection["id"])
        show_response = self._get(collection_url)
        self._assert_status_code_is(show_response, 200)
        dataset_collection = show_response.json()
        self._assert_has_keys(dataset_collection, "url", "name", "deleted")

        assert not dataset_collection["deleted"]

        delete_response = delete(self._api_url(collection_url, use_key=True))
        self._assert_status_code_is(delete_response, 200)

        show_response = self._get(collection_url)
        dataset_collection = show_response.json()
        assert dataset_collection["deleted"]

    @skip_without_tool("collection_creates_list")
    def test_jobs_summary_simple_hdca(self):
        create_response = self.dataset_collection_populator.create_list_in_history(
            self.history_id, contents=["a\nb\nc\nd", "e\nf\ng\nh"])
        hdca_id = create_response.json()["id"]
        run = self.dataset_populator.run_collection_creates_list(
            self.history_id, hdca_id)
        collections = run['output_collections']
        collection = collections[0]
        jobs_summary_url = "histories/{}/contents/dataset_collections/{}/jobs_summary".format(
            self.history_id, collection["id"])
        jobs_summary_response = self._get(jobs_summary_url)
        self._assert_status_code_is(jobs_summary_response, 200)
        jobs_summary = jobs_summary_response.json()
        self._assert_has_keys(jobs_summary, "populated_state", "states")

    @skip_without_tool("cat1")
    def test_jobs_summary_implicit_hdca(self):
        create_response = self.dataset_collection_populator.create_pair_in_history(
            self.history_id, contents=["123", "456"])
        hdca_id = create_response.json()["id"]
        inputs = {
            "input1": {
                'batch': True,
                'values': [{
                    'src': 'hdca',
                    'id': hdca_id
                }]
            },
        }
        run = self.dataset_populator.run_tool("cat1",
                                              inputs=inputs,
                                              history_id=self.history_id)
        self.dataset_populator.wait_for_history_jobs(self.history_id)
        collections = run['implicit_collections']
        collection = collections[0]
        jobs_summary_url = "histories/{}/contents/dataset_collections/{}/jobs_summary".format(
            self.history_id, collection["id"])
        jobs_summary_response = self._get(jobs_summary_url)
        self._assert_status_code_is(jobs_summary_response, 200)
        jobs_summary = jobs_summary_response.json()
        self._assert_has_keys(jobs_summary, "populated_state", "states")
        states = jobs_summary["states"]
        assert states.get("ok") == 2, states

    def test_dataset_collection_hide_originals(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id, type="dataset_collection")

        payload["hide_source_items"] = True
        dataset_collection_response = self._post(
            "histories/%s/contents" % self.history_id, payload)
        self.__check_create_collection_response(dataset_collection_response)

        contents_response = self._get("histories/%s/contents" %
                                      self.history_id)
        datasets = [
            d for d in contents_response.json()
            if d["history_content_type"] == "dataset" and d["hid"] in [1, 2]
        ]
        # Assert two datasets in source were hidden.
        assert len(datasets) == 2
        assert not datasets[0]["visible"]
        assert not datasets[1]["visible"]

    def test_update_dataset_collection(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id, type="dataset_collection")
        dataset_collection_response = self._post(
            "histories/%s/contents" % self.history_id, payload)
        self._assert_status_code_is(dataset_collection_response, 200)
        hdca = dataset_collection_response.json()
        update_url = self._api_url(
            "histories/{}/contents/dataset_collections/{}".format(
                self.history_id, hdca["id"]),
            use_key=True)
        # Awkward json.dumps required here because of https://trello.com/c/CQwmCeG6
        body = json.dumps(dict(name="newnameforpair"))
        update_response = put(update_url, data=body)
        self._assert_status_code_is(update_response, 200)
        show_response = self.__show(hdca)
        assert str(show_response.json()["name"]) == "newnameforpair"

    def test_hdca_copy(self):
        hdca = self.dataset_collection_populator.create_pair_in_history(
            self.history_id).json()
        hdca_id = hdca["id"]
        second_history_id = self.dataset_populator.new_history()
        create_data = dict(
            source='hdca',
            content=hdca_id,
        )
        assert len(
            self._get("histories/%s/contents/dataset_collections" %
                      second_history_id).json()) == 0
        create_response = self._post(
            "histories/%s/contents/dataset_collections" % second_history_id,
            create_data)
        self.__check_create_collection_response(create_response)
        contents = self._get("histories/%s/contents/dataset_collections" %
                             second_history_id).json()
        assert len(contents) == 1
        new_forward, _ = self.__get_paired_response_elements(contents[0])
        self._assert_has_keys(new_forward, "history_id")
        assert new_forward["history_id"] == self.history_id

    def test_hdca_copy_with_new_dbkey(self):
        hdca = self.dataset_collection_populator.create_pair_in_history(
            self.history_id).json()
        hdca_id = hdca["id"]
        assert hdca["elements"][0]["object"]["metadata_dbkey"] == "?"
        assert hdca["elements"][0]["object"]["genome_build"] == "?"
        create_data = {'source': 'hdca', 'content': hdca_id, 'dbkey': 'hg19'}
        create_response = self._post(
            f"histories/{self.history_id}/contents/dataset_collections",
            create_data)
        collection = self.__check_create_collection_response(create_response)
        new_forward = collection['elements'][0]['object']
        assert new_forward["metadata_dbkey"] == "hg19"
        assert new_forward["genome_build"] == "hg19"

    def test_hdca_copy_and_elements(self):
        hdca = self.dataset_collection_populator.create_pair_in_history(
            self.history_id).json()
        hdca_id = hdca["id"]
        second_history_id = self.dataset_populator.new_history()
        create_data = dict(
            source='hdca',
            content=hdca_id,
            copy_elements=True,
        )
        assert len(
            self._get("histories/%s/contents/dataset_collections" %
                      second_history_id).json()) == 0
        create_response = self._post(
            "histories/%s/contents/dataset_collections" % second_history_id,
            create_data)
        self.__check_create_collection_response(create_response)

        contents = self._get("histories/%s/contents/dataset_collections" %
                             second_history_id).json()
        assert len(contents) == 1
        new_forward, _ = self.__get_paired_response_elements(contents[0])
        self._assert_has_keys(new_forward, "history_id")
        assert new_forward["history_id"] == second_history_id

    def __get_paired_response_elements(self, contents):
        hdca = self.__show(contents).json()
        self._assert_has_keys(hdca, "name", "deleted", "visible", "elements")
        elements = hdca["elements"]
        assert len(elements) == 2
        element0 = elements[0]
        element1 = elements[1]
        self._assert_has_keys(element0, "object")
        self._assert_has_keys(element1, "object")

        return element0["object"], element1["object"]

    def test_hdca_from_library_datasets(self):
        ld = self.library_populator.new_library_dataset("el1")
        ldda_id = ld["ldda_id"]
        element_identifiers = [{"name": "el1", "src": "ldda", "id": ldda_id}]
        history_id = self.dataset_populator.new_history()
        create_data = dict(
            history_id=history_id,
            type="dataset_collection",
            name="Test From Library",
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )
        create_response = self._post(
            "histories/%s/contents/dataset_collections" % history_id,
            create_data)
        hdca = self.__check_create_collection_response(create_response)
        elements = hdca["elements"]
        assert len(elements) == 1
        hda = elements[0]["object"]
        assert hda["hda_ldda"] == "hda"
        assert hda["history_content_type"] == "dataset"
        assert hda["copied_from_ldda_id"] == ldda_id
        assert hda['history_id'] == history_id

    def test_hdca_from_inaccessible_library_datasets(self):
        library, library_dataset = self.library_populator.new_library_dataset_in_private_library(
            "HDCACreateInaccesibleLibrary")
        ldda_id = library_dataset["id"]
        element_identifiers = [{"name": "el1", "src": "ldda", "id": ldda_id}]
        create_data = dict(
            history_id=self.history_id,
            type="dataset_collection",
            name="Test From Library",
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )
        with self._different_user():
            second_history_id = self.dataset_populator.new_history()
            create_response = self._post(
                "histories/%s/contents/dataset_collections" %
                second_history_id, create_data)
            self._assert_status_code_is(create_response, 403)

    def __check_create_collection_response(self, response):
        self._assert_status_code_is(response, 200)
        dataset_collection = response.json()
        self._assert_has_keys(dataset_collection, "url", "name", "deleted",
                              "visible", "elements")
        return dataset_collection

    def __show(self, contents):
        show_response = self._get("histories/{}/contents/{}s/{}".format(
            self.history_id, contents["history_content_type"], contents["id"]))
        return show_response

    def __count_contents(self, history_id=None, **kwds):
        if history_id is None:
            history_id = self.history_id
        contents_response = self._get("histories/%s/contents" % history_id,
                                      kwds)
        return len(contents_response.json())

    def __assert_hda_has_full_details(self, hda_details):
        self._assert_has_keys(hda_details, "display_types", "display_apps")

    def __check_for_hda(self, contents_response, hda):
        self._assert_status_code_is(contents_response, 200)
        contents = contents_response.json()
        assert len(contents) == 1
        hda_summary = contents[0]
        self.__assert_matches_hda(hda, hda_summary)
        return hda_summary

    def __assert_matches_hda(self, input_hda, query_hda):
        self._assert_has_keys(query_hda, "id", "name")
        assert input_hda["name"] == query_hda["name"]
        assert input_hda["id"] == query_hda["id"]

    def test_job_state_summary_field(self):
        create_response = self.dataset_collection_populator.create_pair_in_history(
            self.history_id, contents=["123", "456"])
        self._assert_status_code_is(create_response, 200)
        contents_response = self._get(
            "histories/%s/contents?v=dev&keys=job_state_summary&view=summary" %
            self.history_id)
        self._assert_status_code_is(contents_response, 200)
        contents = contents_response.json()
        for c in filter(
                lambda c: c['history_content_type'] == 'dataset_collection',
                contents):
            assert isinstance(c, dict)
            assert 'job_state_summary' in c
            assert isinstance(c['job_state_summary'], dict)

    def _get_content(self, history_id, update_time):
        return self._get(
            f"/api/histories/{history_id}/contents/near/100/100?update_time-ge={update_time}"
        ).json()

    def test_history_contents_near_with_update_time(self):
        with self.dataset_populator.test_history() as history_id:
            first_time = datetime.utcnow().isoformat()
            assert len(self._get_content(history_id,
                                         update_time=first_time)) == 0
            self.dataset_collection_populator.create_list_in_history(
                history_id=history_id)
            assert len(self._get_content(
                history_id, update_time=first_time)) == 4  # 3 datasets
            self.dataset_populator.wait_for_history(history_id)
            all_datasets_finished = first_time = datetime.utcnow().isoformat()
            assert len(
                self._get_content(history_id,
                                  update_time=all_datasets_finished)) == 0

    @skip_without_tool('cat_data_and_sleep')
    def test_history_contents_near_with_update_time_implicit_collection(self):
        with self.dataset_populator.test_history() as history_id:
            hdca_id = self.dataset_collection_populator.create_list_in_history(
                history_id=history_id).json()['id']
            self.dataset_populator.wait_for_history(history_id)
            inputs = {
                "input1": {
                    'batch': True,
                    'values': [{
                        "src": "hdca",
                        "id": hdca_id
                    }]
                },
                "sleep_time": 2,
            }
            response = self.dataset_populator.run_tool(
                "cat_data_and_sleep",
                inputs,
                history_id,
                assert_ok=False,
            ).json()
            update_time = datetime.utcnow().isoformat()
            collection_id = response['implicit_collections'][0]['id']
            for _ in range(20):
                time.sleep(1)
                update = self._get_content(history_id, update_time=update_time)
                if any(c for c in update
                       if c['history_content_type'] == 'dataset_collection'
                       and c['job_state_summary']['ok'] == 3):
                    return
            raise Exception(
                f"History content update time query did not include final update for implicit collection {collection_id}"
            )

    @skip_without_tool('collection_creates_dynamic_nested')
    def test_history_contents_near_with_update_time_explicit_collection(self):
        with self.dataset_populator.test_history() as history_id:
            inputs = {'foo': 'bar', 'sleep_time': 2}
            response = self.dataset_populator.run_tool(
                "collection_creates_dynamic_nested",
                inputs,
                history_id,
                assert_ok=False,
            ).json()
            update_time = datetime.utcnow().isoformat()
            collection_id = response['output_collections'][0]['id']
            for _ in range(20):
                time.sleep(1)
                update = self._get_content(history_id, update_time=update_time)
                if any(c for c in update
                       if c['history_content_type'] == 'dataset_collection'
                       and c['populated_state'] == 'ok'):
                    return
            raise Exception(
                f"History content update time query did not include populated_state update for dynamic nested collection {collection_id}"
            )
예제 #3
0
class HistoryContentsApiTestCase(ApiTestCase):
    def setUp(self):
        super().setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_collection_populator = DatasetCollectionPopulator(
            self.galaxy_interactor)
        self.library_populator = LibraryPopulator(self.galaxy_interactor)
        self.history_id = self.dataset_populator.new_history()

    def test_index_hda_summary(self):
        hda1 = self.dataset_populator.new_dataset(self.history_id)
        contents_response = self._get(f"histories/{self.history_id}/contents")
        hda_summary = self.__check_for_hda(contents_response, hda1)
        assert "display_types" not in hda_summary  # Quick summary, not full details

    def test_make_private_and_public(self):
        hda1 = self._wait_for_new_hda()
        update_url = f"histories/{self.history_id}/contents/{hda1['id']}/permissions"

        role_id = self.dataset_populator.user_private_role_id()
        # Give manage permission to the user.
        payload = {
            "access": [],
            "manage": [role_id],
        }
        update_response = self._update_permissions(update_url,
                                                   payload,
                                                   admin=True)
        self._assert_status_code_is(update_response, 200)
        self._assert_other_user_can_access(hda1["id"])
        # Then we restrict access.
        payload = {
            "action": "make_private",
        }
        update_response = self._update_permissions(update_url, payload)
        self._assert_status_code_is(update_response, 200)
        self._assert_other_user_cannot_access(hda1["id"])

        # Then we restrict access.
        payload = {
            "action": "remove_restrictions",
        }
        update_response = self._update_permissions(update_url, payload)
        self._assert_status_code_is(update_response, 200)
        self._assert_other_user_can_access(hda1["id"])

    def test_set_permissions_add_admin_history_contents(self):
        self._verify_dataset_permissions("history_contents")

    def test_set_permissions_add_admin_datasets(self):
        self._verify_dataset_permissions("dataset")

    def _verify_dataset_permissions(self, api_endpoint):
        hda1 = self._wait_for_new_hda()
        hda_id = hda1["id"]
        if api_endpoint == "history_contents":
            update_url = f"histories/{self.history_id}/contents/{hda_id}/permissions"
        else:
            update_url = f"datasets/{hda_id}/permissions"

        role_id = self.dataset_populator.user_private_role_id()

        payload = {
            "access": [role_id],
            "manage": [role_id],
        }

        # Other users cannot modify permissions.
        with self._different_user():
            update_response = self._update_permissions(update_url, payload)
            self._assert_status_code_is(update_response, 403)

        # First the details render for another user.
        self._assert_other_user_can_access(hda_id)

        # Then we restrict access.
        update_response = self._update_permissions(update_url,
                                                   payload,
                                                   admin=True)
        self._assert_status_code_is(update_response, 200)

        # Finally the details don't render.
        self._assert_other_user_cannot_access(hda_id)

        # But they do for the original user.
        contents_response = self._get(
            f"histories/{self.history_id}/contents/{hda_id}").json()
        assert "name" in contents_response

        update_response = self._update_permissions(update_url, payload)
        self._assert_status_code_is(update_response, 200)

        payload = {
            "access": [role_id],
            "manage": [role_id],
        }
        update_response = self._update_permissions(update_url, payload)
        self._assert_status_code_is(update_response, 200)
        self._assert_other_user_cannot_access(hda_id)

        user_id = self.dataset_populator.user_id()
        with self._different_user():
            different_user_id = self.dataset_populator.user_id()
        combined_user_role = self.dataset_populator.create_role(
            [user_id, different_user_id],
            description="role for testing permissions")

        payload = {
            "access": [combined_user_role["id"]],
            "manage": [role_id],
        }
        update_response = self._update_permissions(update_url, payload)
        self._assert_status_code_is(update_response, 200)
        # Now other user can see dataset again with access permission.
        self._assert_other_user_can_access(hda_id)
        # access doesn't imply management though...
        with self._different_user():
            update_response = self._update_permissions(update_url, payload)
            self._assert_status_code_is(update_response, 403)

    def _assert_other_user_cannot_access(self, history_content_id):
        with self._different_user():
            contents_response = self.dataset_populator.get_history_dataset_details_raw(
                history_id=self.history_id, dataset_id=history_content_id)
            assert contents_response.status_code == 403

    def _assert_other_user_can_access(self, history_content_id):
        with self._different_user():
            contents_response = self.dataset_populator.get_history_dataset_details_raw(
                history_id=self.history_id, dataset_id=history_content_id)
            contents_response.raise_for_status()
            assert "name" in contents_response.json()

    def test_index_hda_all_details(self):
        hda1 = self.dataset_populator.new_dataset(self.history_id)
        contents_response = self._get(
            f"histories/{self.history_id}/contents?details=all")
        hda_details = self.__check_for_hda(contents_response, hda1)
        self.__assert_hda_has_full_details(hda_details)

    def test_index_hda_detail_by_id(self):
        hda1 = self.dataset_populator.new_dataset(self.history_id)
        contents_response = self._get(
            f"histories/{self.history_id}/contents?details={hda1['id']}")
        hda_details = self.__check_for_hda(contents_response, hda1)
        self.__assert_hda_has_full_details(hda_details)

    def test_show_hda(self):
        hda1 = self.dataset_populator.new_dataset(self.history_id)
        show_response = self.__show(hda1)
        self._assert_status_code_is(show_response, 200)
        self.__assert_matches_hda(hda1, show_response.json())

    def _create_copy(self):
        hda1 = self.dataset_populator.new_dataset(self.history_id)
        create_data = dict(
            source='hda',
            content=hda1["id"],
        )
        second_history_id = self.dataset_populator.new_history()
        assert self.__count_contents(second_history_id) == 0
        create_response = self._post(f"histories/{second_history_id}/contents",
                                     create_data,
                                     json=True)
        self._assert_status_code_is(create_response, 200)
        return create_response.json()

    def test_hda_copy(self):
        response = self._create_copy()
        assert self.__count_contents(response['history_id']) == 1

    def test_inheritance_chain(self):
        response = self._create_copy()
        inheritance_chain_response = self._get(
            f"datasets/{response['id']}/inheritance_chain")
        self._assert_status_code_is_ok(inheritance_chain_response)
        inheritance_chain = inheritance_chain_response.json()
        assert len(inheritance_chain) == 1

    def test_library_copy(self):
        ld = self.library_populator.new_library_dataset("lda_test_library")
        create_data = dict(
            source='library',
            content=ld["id"],
        )
        assert self.__count_contents(self.history_id) == 0
        create_response = self._post(f"histories/{self.history_id}/contents",
                                     create_data,
                                     json=True)
        self._assert_status_code_is(create_response, 200)
        assert self.__count_contents(self.history_id) == 1

    def test_update(self):
        hda1 = self._wait_for_new_hda()
        assert str(hda1["deleted"]).lower() == "false"
        update_response = self._update(hda1["id"], dict(deleted=True))
        self._assert_status_code_is(update_response, 200)
        show_response = self.__show(hda1)
        assert str(show_response.json()["deleted"]).lower() == "true"

        update_response = self._update(hda1["id"], dict(name="Updated Name"))
        assert self.__show(hda1).json()["name"] == "Updated Name"

        update_response = self._update(hda1["id"], dict(name="Updated Name"))
        assert self.__show(hda1).json()["name"] == "Updated Name"

        unicode_name = 'ржевский сапоги'
        update_response = self._update(hda1["id"], dict(name=unicode_name))
        updated_hda = self.__show(hda1).json()
        assert updated_hda["name"] == unicode_name, updated_hda

        quoted_name = '"Mooo"'
        update_response = self._update(hda1["id"], dict(name=quoted_name))
        updated_hda = self.__show(hda1).json()
        assert updated_hda["name"] == quoted_name, quoted_name

        data = {
            "dataset_id": hda1["id"],
            "name": "moocow",
            "dbkey": "?",
            "annotation": None,
            "info": "my info is",
            "operation": "attributes"
        }
        update_response = self._set_edit_update(data)
        # No key or anything supplied, expect a permission problem.
        # A bit questionable but I think this is a 400 instead of a 403 so that
        # we don't distinguish between this is a valid ID you don't have access to
        # and this is an invalid ID.
        assert update_response.status_code == 400, update_response.content

    def test_update_batch(self):
        hda1 = self._wait_for_new_hda()
        assert str(hda1["deleted"]).lower() == "false"
        assert str(hda1["visible"]).lower() == "true"

        # update deleted flag => true
        payload = dict(items=[{
            "history_content_type": "dataset",
            "id": hda1["id"]
        }],
                       deleted=True)
        update_response = self._update_batch(payload)
        objects = update_response.json()
        assert objects[0]["deleted"] is True
        assert objects[0]["visible"] is True

        # update visibility flag => false
        payload = dict(items=[{
            "history_content_type": "dataset",
            "id": hda1["id"]
        }],
                       visible=False)
        update_response = self._update_batch(payload)
        objects = update_response.json()
        assert objects[0]["deleted"] is True
        assert objects[0]["visible"] is False

        # update both flags
        payload = dict(items=[{
            "history_content_type": "dataset",
            "id": hda1["id"]
        }],
                       deleted=False,
                       visible=True)
        update_response = self._update_batch(payload)
        objects = update_response.json()
        assert objects[0]["deleted"] is False
        assert objects[0]["visible"] is True

    def test_update_batch_collections(self):
        hdca = self._create_pair_collection()
        assert hdca["deleted"] is False
        assert hdca["visible"] is True

        # update deleted flag => true
        payload = dict(items=[{
            "history_content_type": "dataset_collection",
            "id": hdca["id"]
        }],
                       deleted=True)
        update_response = self._update_batch(payload)
        objects = update_response.json()
        assert objects[0]["deleted"] is True
        assert objects[0]["visible"] is True

        # update visibility flag => false
        payload = dict(items=[{
            "history_content_type": "dataset_collection",
            "id": hdca["id"]
        }],
                       visible=False)
        update_response = self._update_batch(payload)
        objects = update_response.json()
        assert objects[0]["deleted"] is True
        assert objects[0]["visible"] is False

        # update both flags
        payload = dict(items=[{
            "history_content_type": "dataset_collection",
            "id": hdca["id"]
        }],
                       deleted=False,
                       visible=True)
        update_response = self._update_batch(payload)
        objects = update_response.json()
        assert objects[0]["deleted"] is False
        assert objects[0]["visible"] is True

    def test_update_type_failures(self):
        hda1 = self._wait_for_new_hda()
        update_response = self._update(hda1["id"], dict(deleted='not valid'))
        self._assert_status_code_is(update_response, 400)

    def _wait_for_new_hda(self):
        hda1 = self.dataset_populator.new_dataset(self.history_id)
        self.dataset_populator.wait_for_history(self.history_id)
        return hda1

    def _set_edit_update(self, data):
        update_response = self._put(urllib.parse.urljoin(
            self.url, "dataset/set_edit"),
                                    data=data,
                                    json=True)
        return update_response

    def _update(self, item_id, data, admin=False, history_id=None):
        history_id = history_id or self.history_id
        update_response = self._put(
            f"histories/{history_id}/contents/{item_id}",
            data=data,
            json=True,
            admin=admin)
        return update_response

    def _update_permissions(self, url, data, admin=False):
        update_response = self._put(url, data=data, json=True, admin=admin)
        return update_response

    def _update_batch(self, data):
        update_response = self._put(f"histories/{self.history_id}/contents",
                                    data=data,
                                    json=True)
        return update_response

    def test_delete(self):
        hda1 = self.dataset_populator.new_dataset(self.history_id)
        self.dataset_populator.wait_for_history(self.history_id)
        assert str(self.__show(hda1).json()["deleted"]).lower() == "false"
        delete_response = self._delete(
            f"histories/{self.history_id}/contents/{hda1['id']}")
        assert delete_response.status_code < 300  # Something in the 200s :).
        assert str(self.__show(hda1).json()["deleted"]).lower() == "true"

    def test_delete_anon(self):
        with self._different_user(anon=True):
            history_id = self._get(
                urllib.parse.urljoin(
                    self.url, "history/current_history_json")).json()['id']
            hda1 = self.dataset_populator.new_dataset(history_id)
            self.dataset_populator.wait_for_history(history_id)
            assert str(self.__show(hda1).json()["deleted"]).lower() == "false"
            delete_response = self._delete(
                f"histories/{history_id}/contents/{hda1['id']}")
            assert delete_response.status_code < 300  # Something in the 200s :).
            assert str(self.__show(hda1).json()["deleted"]).lower() == "true"

    def test_delete_permission_denied(self):
        hda1 = self.dataset_populator.new_dataset(self.history_id)
        with self._different_user(anon=True):
            delete_response = self._delete(
                f"histories/{self.history_id}/contents/{hda1['id']}")
            assert delete_response.status_code == 403
            assert delete_response.json(
            )['err_msg'] == 'HistoryDatasetAssociation is not owned by user'

    def test_purge(self):
        hda1 = self.dataset_populator.new_dataset(self.history_id)
        self.dataset_populator.wait_for_history(self.history_id)
        assert str(self.__show(hda1).json()["deleted"]).lower() == "false"
        assert str(self.__show(hda1).json()["purged"]).lower() == "false"
        data = {'purge': True}
        delete_response = self._delete(
            f"histories/{self.history_id}/contents/{hda1['id']}",
            data=data,
            json=True)
        assert delete_response.status_code < 300  # Something in the 200s :).
        assert str(self.__show(hda1).json()["deleted"]).lower() == "true"
        assert str(self.__show(hda1).json()["purged"]).lower() == "true"

    def test_dataset_collection_creation_on_contents(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id, type="dataset_collection")
        endpoint = f"histories/{self.history_id}/contents"
        self._check_pair_creation(endpoint, payload)

    def test_dataset_collection_creation_on_typed_contents(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id, )
        endpoint = f"histories/{self.history_id}/contents/dataset_collections"
        self._check_pair_creation(endpoint, payload)

    def test_dataset_collection_create_from_exisiting_datasets_with_new_tags(
            self):
        with self.dataset_populator.test_history() as history_id:
            hda_id = self.dataset_populator.new_dataset(history_id,
                                                        content="1 2 3")['id']
            hda2_id = self.dataset_populator.new_dataset(history_id,
                                                         content="1 2 3")['id']
            update_response = self._update(hda2_id,
                                           dict(tags=['existing:tag']),
                                           history_id=history_id).json()
            assert update_response['tags'] == ['existing:tag']
            creation_payload = {
                'collection_type':
                'list',
                'history_id':
                history_id,
                'element_identifiers': [{
                    'id': hda_id,
                    'src': 'hda',
                    'name': 'element_id1',
                    'tags': ['my_new_tag']
                }, {
                    'id': hda2_id,
                    'src': 'hda',
                    'name': 'element_id2',
                    'tags': ['another_new_tag']
                }],
                'type':
                'dataset_collection',
                'copy_elements':
                True
            }
            r = self._post(f"histories/{self.history_id}/contents",
                           creation_payload,
                           json=True).json()
            assert r['elements'][0]['object'][
                'id'] != hda_id, "HDA has not been copied"
            assert len(r['elements'][0]['object']['tags']) == 1
            assert r['elements'][0]['object']['tags'][0] == 'my_new_tag'
            assert len(r['elements'][1]['object']
                       ['tags']) == 2, r['elements'][1]['object']['tags']
            original_hda = self.dataset_populator.get_history_dataset_details(
                history_id=history_id, dataset_id=hda_id)
            assert len(original_hda['tags']) == 0, original_hda['tags']

    def _check_pair_creation(self, endpoint, payload):
        pre_collection_count = self.__count_contents(type="dataset_collection")
        pre_dataset_count = self.__count_contents(type="dataset")
        pre_combined_count = self.__count_contents(
            type="dataset,dataset_collection")

        dataset_collection_response = self._post(endpoint, payload, json=True)

        dataset_collection = self.__check_create_collection_response(
            dataset_collection_response)

        post_collection_count = self.__count_contents(
            type="dataset_collection")
        post_dataset_count = self.__count_contents(type="dataset")
        post_combined_count = self.__count_contents(
            type="dataset,dataset_collection")

        # Test filtering types with index.
        assert pre_collection_count == 0
        assert post_collection_count == 1
        assert post_combined_count == pre_dataset_count + 1
        assert post_combined_count == pre_combined_count + 1
        assert pre_dataset_count == post_dataset_count

        # Test show dataset colleciton.
        collection_url = f"histories/{self.history_id}/contents/dataset_collections/{dataset_collection['id']}"
        show_response = self._get(collection_url)
        self._assert_status_code_is(show_response, 200)
        dataset_collection = show_response.json()
        self._assert_has_keys(dataset_collection, "url", "name", "deleted")

        assert not dataset_collection["deleted"]

        delete_response = self._delete(collection_url)
        self._assert_status_code_is(delete_response, 200)

        show_response = self._get(collection_url)
        dataset_collection = show_response.json()
        assert dataset_collection["deleted"]

    @skip_without_tool("collection_creates_list")
    def test_jobs_summary_simple_hdca(self):
        create_response = self.dataset_collection_populator.create_list_in_history(
            self.history_id, contents=["a\nb\nc\nd", "e\nf\ng\nh"])
        hdca_id = create_response.json()["id"]
        run = self.dataset_populator.run_collection_creates_list(
            self.history_id, hdca_id)
        collections = run['output_collections']
        collection = collections[0]
        jobs_summary_url = f"histories/{self.history_id}/contents/dataset_collections/{collection['id']}/jobs_summary"
        jobs_summary_response = self._get(jobs_summary_url)
        self._assert_status_code_is(jobs_summary_response, 200)
        jobs_summary = jobs_summary_response.json()
        self._assert_has_keys(jobs_summary, "populated_state", "states")

    @skip_without_tool("cat1")
    def test_jobs_summary_implicit_hdca(self):
        create_response = self.dataset_collection_populator.create_pair_in_history(
            self.history_id, contents=["123", "456"])
        hdca_id = create_response.json()["id"]
        inputs = {
            "input1": {
                'batch': True,
                'values': [{
                    'src': 'hdca',
                    'id': hdca_id
                }]
            },
        }
        run = self.dataset_populator.run_tool("cat1",
                                              inputs=inputs,
                                              history_id=self.history_id)
        self.dataset_populator.wait_for_history_jobs(self.history_id)
        collections = run['implicit_collections']
        collection = collections[0]
        jobs_summary_url = f"histories/{self.history_id}/contents/dataset_collections/{collection['id']}/jobs_summary"
        jobs_summary_response = self._get(jobs_summary_url)
        self._assert_status_code_is(jobs_summary_response, 200)
        jobs_summary = jobs_summary_response.json()
        self._assert_has_keys(jobs_summary, "populated_state", "states")
        states = jobs_summary["states"]
        assert states.get("ok") == 2, states

    def test_dataset_collection_hide_originals(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id, type="dataset_collection")

        payload["hide_source_items"] = True
        dataset_collection_response = self._post(
            f"histories/{self.history_id}/contents", payload, json=True)
        self.__check_create_collection_response(dataset_collection_response)

        contents_response = self._get(f"histories/{self.history_id}/contents")
        datasets = [
            d for d in contents_response.json()
            if d["history_content_type"] == "dataset" and d["hid"] in [1, 2]
        ]
        # Assert two datasets in source were hidden.
        assert len(datasets) == 2
        assert not datasets[0]["visible"]
        assert not datasets[1]["visible"]

    def test_update_dataset_collection(self):
        hdca = self._create_pair_collection()
        body = dict(name="newnameforpair")
        update_response = self._put(
            f"histories/{self.history_id}/contents/dataset_collections/{hdca['id']}",
            data=body,
            json=True)
        self._assert_status_code_is(update_response, 200)
        show_response = self.__show(hdca)
        assert str(show_response.json()["name"]) == "newnameforpair"

    def test_update_batch_dataset_collection(self):
        hdca = self._create_pair_collection()
        body = {
            "items": [{
                "history_content_type": "dataset_collection",
                "id": hdca["id"]
            }],
            "name":
            "newnameforpair"
        }
        update_response = self._put(f"histories/{self.history_id}/contents",
                                    data=body,
                                    json=True)
        self._assert_status_code_is(update_response, 200)
        show_response = self.__show(hdca)
        assert str(show_response.json()["name"]) == "newnameforpair"

    def _create_pair_collection(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id, type="dataset_collection")
        dataset_collection_response = self._post(
            f"histories/{self.history_id}/contents", payload, json=True)
        self._assert_status_code_is(dataset_collection_response, 200)
        hdca = dataset_collection_response.json()
        return hdca

    def test_hdca_copy(self):
        hdca = self.dataset_collection_populator.create_pair_in_history(
            self.history_id).json()
        hdca_id = hdca["id"]
        second_history_id = self.dataset_populator.new_history()
        create_data = dict(
            source='hdca',
            content=hdca_id,
        )
        assert len(
            self._get(
                f"histories/{second_history_id}/contents/dataset_collections").
            json()) == 0
        create_response = self._post(
            f"histories/{second_history_id}/contents/dataset_collections",
            create_data,
            json=True)
        self.__check_create_collection_response(create_response)
        contents = self._get(
            f"histories/{second_history_id}/contents/dataset_collections"
        ).json()
        assert len(contents) == 1
        new_forward, _ = self.__get_paired_response_elements(contents[0])
        self._assert_has_keys(new_forward, "history_id")
        assert new_forward["history_id"] == self.history_id

    def test_hdca_copy_with_new_dbkey(self):
        hdca = self.dataset_collection_populator.create_pair_in_history(
            self.history_id).json()
        hdca_id = hdca["id"]
        assert hdca["elements"][0]["object"]["metadata_dbkey"] == "?"
        assert hdca["elements"][0]["object"]["genome_build"] == "?"
        create_data = {'source': 'hdca', 'content': hdca_id, 'dbkey': 'hg19'}
        create_response = self._post(
            f"histories/{self.history_id}/contents/dataset_collections",
            create_data,
            json=True)
        collection = self.__check_create_collection_response(create_response)
        new_forward = collection['elements'][0]['object']
        assert new_forward["metadata_dbkey"] == "hg19"
        assert new_forward["genome_build"] == "hg19"

    def test_hdca_copy_and_elements(self):
        hdca = self.dataset_collection_populator.create_pair_in_history(
            self.history_id).json()
        hdca_id = hdca["id"]
        second_history_id = self.dataset_populator.new_history()
        create_data = dict(
            source='hdca',
            content=hdca_id,
            copy_elements=True,
        )
        assert len(
            self._get(
                f"histories/{second_history_id}/contents/dataset_collections").
            json()) == 0
        create_response = self._post(
            f"histories/{second_history_id}/contents/dataset_collections",
            create_data,
            json=True)
        self.__check_create_collection_response(create_response)

        contents = self._get(
            f"histories/{second_history_id}/contents/dataset_collections"
        ).json()
        assert len(contents) == 1
        new_forward, _ = self.__get_paired_response_elements(contents[0])
        self._assert_has_keys(new_forward, "history_id")
        assert new_forward["history_id"] == second_history_id

    def __get_paired_response_elements(self, contents):
        hdca = self.__show(contents).json()
        self._assert_has_keys(hdca, "name", "deleted", "visible", "elements")
        elements = hdca["elements"]
        assert len(elements) == 2
        element0 = elements[0]
        element1 = elements[1]
        self._assert_has_keys(element0, "object")
        self._assert_has_keys(element1, "object")

        return element0["object"], element1["object"]

    def test_hdca_from_library_datasets(self):
        ld = self.library_populator.new_library_dataset("el1")
        ldda_id = ld["ldda_id"]
        element_identifiers = [{"name": "el1", "src": "ldda", "id": ldda_id}]
        history_id = self.dataset_populator.new_history()
        create_data = dict(
            history_id=history_id,
            type="dataset_collection",
            name="Test From Library",
            element_identifiers=element_identifiers,
            collection_type="list",
        )
        create_response = self._post(
            f"histories/{history_id}/contents/dataset_collections",
            create_data,
            json=True)
        hdca = self.__check_create_collection_response(create_response)
        elements = hdca["elements"]
        assert len(elements) == 1
        hda = elements[0]["object"]
        assert hda["hda_ldda"] == "hda"
        assert hda["history_content_type"] == "dataset"
        assert hda["copied_from_ldda_id"] == ldda_id
        assert hda['history_id'] == history_id

    def test_hdca_from_inaccessible_library_datasets(self):
        library, library_dataset = self.library_populator.new_library_dataset_in_private_library(
            "HDCACreateInaccesibleLibrary")
        ldda_id = library_dataset["id"]
        element_identifiers = [{"name": "el1", "src": "ldda", "id": ldda_id}]
        create_data = dict(
            history_id=self.history_id,
            type="dataset_collection",
            name="Test From Library",
            element_identifiers=element_identifiers,
            collection_type="list",
        )
        with self._different_user():
            second_history_id = self.dataset_populator.new_history()
            create_response = self._post(
                f"histories/{second_history_id}/contents/dataset_collections",
                create_data,
                json=True)
            self._assert_status_code_is(create_response, 403)

    def __check_create_collection_response(self, response):
        self._assert_status_code_is(response, 200)
        dataset_collection = response.json()
        self._assert_has_keys(dataset_collection, "url", "name", "deleted",
                              "visible", "elements")
        return dataset_collection

    def __show(self, contents):
        show_response = self._get(
            f"histories/{self.history_id}/contents/{contents['history_content_type']}s/{contents['id']}"
        )
        return show_response

    def __count_contents(self, history_id=None, **kwds):
        if history_id is None:
            history_id = self.history_id
        contents_response = self._get(f"histories/{history_id}/contents", kwds)
        return len(contents_response.json())

    def __assert_hda_has_full_details(self, hda_details):
        self._assert_has_keys(hda_details, "display_types", "display_apps")

    def __check_for_hda(self, contents_response, hda):
        self._assert_status_code_is(contents_response, 200)
        contents = contents_response.json()
        assert len(contents) == 1
        hda_summary = contents[0]
        self.__assert_matches_hda(hda, hda_summary)
        return hda_summary

    def __assert_matches_hda(self, input_hda, query_hda):
        self._assert_has_keys(query_hda, "id", "name")
        assert input_hda["name"] == query_hda["name"]
        assert input_hda["id"] == query_hda["id"]

    def test_job_state_summary_field(self):
        create_response = self.dataset_collection_populator.create_pair_in_history(
            self.history_id, contents=["123", "456"])
        self._assert_status_code_is(create_response, 200)
        contents_response = self._get(
            f"histories/{self.history_id}/contents?v=dev&keys=job_state_summary&view=summary"
        )
        self._assert_status_code_is(contents_response, 200)
        contents = contents_response.json()
        for c in filter(
                lambda c: c['history_content_type'] == 'dataset_collection',
                contents):
            assert isinstance(c, dict)
            assert 'job_state_summary' in c
            assert isinstance(c['job_state_summary'], dict)

    def _get_content(self, history_id, update_time):
        return self._get(
            f"/api/histories/{history_id}/contents/near/100/100?update_time-gt={update_time}"
        ).json()

    def test_history_contents_near_with_update_time(self):
        with self.dataset_populator.test_history() as history_id:
            first_time = datetime.utcnow().isoformat()
            assert len(self._get_content(history_id,
                                         update_time=first_time)) == 0
            self.dataset_collection_populator.create_list_in_history(
                history_id=history_id)
            assert len(self._get_content(
                history_id, update_time=first_time)) == 4  # 3 datasets
            self.dataset_populator.wait_for_history(history_id)
            all_datasets_finished = first_time = datetime.utcnow().isoformat()
            assert len(
                self._get_content(history_id,
                                  update_time=all_datasets_finished)) == 0

    def test_history_contents_near_with_since(self):
        with self.dataset_populator.test_history() as history_id:
            original_history = self._get(f"/api/histories/{history_id}").json()
            original_history_stamp = original_history['update_time']

            # check empty contents, with no since flag, should return an empty 200 result
            history_contents = self._get(
                f"/api/histories/{history_id}/contents/near/100/100")
            assert history_contents.status_code == 200
            assert len(history_contents.json()) == 0

            # adding a since parameter, should return a 204 if history has not changed at all
            history_contents = self._get(
                f"/api/histories/{history_id}/contents/near/100/100?since={original_history_stamp}"
            )
            assert history_contents.status_code == 204

            # add some stuff
            self.dataset_collection_populator.create_list_in_history(
                history_id=history_id)
            self.dataset_populator.wait_for_history(history_id)

            # check to make sure the added stuff is there
            changed_history_contents = self._get(
                f"/api/histories/{history_id}/contents/near/100/100")
            assert changed_history_contents.status_code == 200
            assert len(changed_history_contents.json()) == 4

            # check to make sure the history date has actually changed due to changing the contents
            changed_history = self._get(f"/api/histories/{history_id}").json()
            changed_history_stamp = changed_history['update_time']
            assert original_history_stamp != changed_history_stamp

            # a repeated contents request with since=original_history_stamp should now return data
            # because we have added datasets and the update_time should have been changed
            changed_content = self._get(
                f"/api/histories/{history_id}/contents/near/100/100?since={original_history_stamp}"
            )
            assert changed_content.status_code == 200
            assert len(changed_content.json()) == 4

    def test_history_contents_near_since_with_standard_iso8601_date(self):
        with self.dataset_populator.test_history() as history_id:
            original_history = self._get(f"/api/histories/{history_id}").json()
            original_history_stamp = original_history['update_time']

            # this is the standard date format that javascript will emit using .toISOString(), it
            # should be the expected date format for any modern api
            # https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Date/toISOString

            # checking to make sure that the same exact history.update_time returns a "not changed"
            # result after date parsing
            valid_iso8601_date = original_history_stamp + 'Z'
            encoded_valid_date = urllib.parse.quote_plus(valid_iso8601_date)
            history_contents = self._get(
                f"/api/histories/{history_id}/contents/near/100/100?since={encoded_valid_date}"
            )
            assert history_contents.status_code == 204

            # test parsing for other standard is08601 formats
            sample_formats = [
                '2021-08-26T15:53:02+00:00', '2021-08-26T15:53:02Z',
                '2002-10-10T12:00:00-05:00'
            ]
            for date_str in sample_formats:
                encoded_date = urllib.parse.quote_plus(
                    date_str)  # handles pluses, minuses
                history_contents = self._get(
                    f"/api/histories/{history_id}/contents/near/100/100?since={encoded_date}"
                )
                self._assert_status_code_is_ok(history_contents)

    @skip_without_tool('cat_data_and_sleep')
    def test_history_contents_near_with_update_time_implicit_collection(self):
        with self.dataset_populator.test_history() as history_id:
            hdca_id = self.dataset_collection_populator.create_list_in_history(
                history_id=history_id).json()['id']
            self.dataset_populator.wait_for_history(history_id)
            inputs = {
                "input1": {
                    'batch': True,
                    'values': [{
                        "src": "hdca",
                        "id": hdca_id
                    }]
                },
                "sleep_time": 2,
            }
            response = self.dataset_populator.run_tool(
                "cat_data_and_sleep",
                inputs,
                history_id,
            )
            update_time = datetime.utcnow().isoformat()
            collection_id = response['implicit_collections'][0]['id']
            for _ in range(20):
                time.sleep(1)
                update = self._get_content(history_id, update_time=update_time)
                if any(c for c in update
                       if c['history_content_type'] == 'dataset_collection'
                       and c['job_state_summary']['ok'] == 3):
                    return
            raise Exception(
                f"History content update time query did not include final update for implicit collection {collection_id}"
            )

    @skip_without_tool('collection_creates_dynamic_nested')
    def test_history_contents_near_with_update_time_explicit_collection(self):
        with self.dataset_populator.test_history() as history_id:
            inputs = {'foo': 'bar', 'sleep_time': 2}
            response = self.dataset_populator.run_tool(
                "collection_creates_dynamic_nested",
                inputs,
                history_id,
            )
            update_time = datetime.utcnow().isoformat()
            collection_id = response['output_collections'][0]['id']
            for _ in range(20):
                time.sleep(1)
                update = self._get_content(history_id, update_time=update_time)
                if any(c for c in update
                       if c['history_content_type'] == 'dataset_collection'
                       and c['populated_state'] == 'ok'):
                    return
            raise Exception(
                f"History content update time query did not include populated_state update for dynamic nested collection {collection_id}"
            )

    def test_index_filter_by_type(self):
        history_id = self.dataset_populator.new_history()
        self.dataset_populator.new_dataset(history_id)
        self.dataset_collection_populator.create_list_in_history(
            history_id=history_id)

        contents_response = self._get(
            f"histories/{history_id}/contents").json()
        num_items = len(contents_response)
        expected_num_collections = 1
        expected_num_datasets = num_items - expected_num_collections

        contents_response = self._get(
            f"histories/{history_id}/contents?types=dataset").json()
        assert len(contents_response) == expected_num_datasets
        contents_response = self._get(
            f"histories/{history_id}/contents?types=dataset_collection").json(
            )
        assert len(contents_response) == expected_num_collections

    def test_elements_datatypes_field(self):
        history_id = self.dataset_populator.new_history()
        collection_name = "homogeneous"
        expected_datatypes = ["txt"]
        elements = [  # List with all elements of txt datatype (homogeneous)
            {
                "name": "test1",
                "src": "pasted",
                "paste_content": "abc",
                "ext": "txt"
            },
            {
                "name": "test2",
                "src": "pasted",
                "paste_content": "abc",
                "ext": "txt"
            },
        ]
        self._upload_collection_list_with_elements(history_id, collection_name,
                                                   elements)
        self._assert_collection_has_expected_elements_datatypes(
            history_id, collection_name, expected_datatypes)

        collection_name = "heterogeneous"
        expected_datatypes = ["txt", "tabular"]
        elements = [  # List with txt and tabular datatype (heterogeneous)
            {
                "name": "test2",
                "src": "pasted",
                "paste_content": "abc",
                "ext": "txt"
            },
            {
                "name": "test3",
                "src": "pasted",
                "paste_content": "a,b,c\n",
                "ext": "tabular"
            },
        ]
        self._upload_collection_list_with_elements(history_id, collection_name,
                                                   elements)
        self._assert_collection_has_expected_elements_datatypes(
            history_id, collection_name, expected_datatypes)

    def _upload_collection_list_with_elements(self, history_id: str,
                                              collection_name: str,
                                              elements: List[Any]):
        create_homogeneous_response = self.dataset_collection_populator.upload_collection(
            history_id, "list", elements=elements, name=collection_name)
        self._assert_status_code_is_ok(create_homogeneous_response)

    def _assert_collection_has_expected_elements_datatypes(
            self, history_id, collection_name, expected_datatypes):
        contents_response = self._get(
            f"histories/{history_id}/contents?v=dev&view=betawebclient&q=name-eq&qv={collection_name}"
        )
        self._assert_status_code_is(contents_response, 200)
        collection = contents_response.json()[0]
        self.assertCountEqual(collection["elements_datatypes"],
                              expected_datatypes)
예제 #4
0
class DatasetCollectionApiTestCase(ApiTestCase):
    history_id: str

    def setUp(self):
        super().setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_collection_populator = DatasetCollectionPopulator(
            self.galaxy_interactor)
        self.history_id = self.dataset_populator.new_history()

    def test_create_pair_from_history(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id,
            instance_type="history",
        )
        create_response = self._post("dataset_collections", payload, json=True)
        dataset_collection = self._check_create_response(create_response)
        returned_datasets = dataset_collection["elements"]
        assert len(returned_datasets) == 2, dataset_collection

    def test_create_list_from_history(self):
        element_identifiers = self.dataset_collection_populator.list_identifiers(
            self.history_id)

        payload = dict(
            instance_type="history",
            history_id=self.history_id,
            element_identifiers=element_identifiers,
            collection_type="list",
        )

        create_response = self._post("dataset_collections", payload, json=True)
        dataset_collection = self._check_create_response(create_response)
        returned_datasets = dataset_collection["elements"]
        assert len(returned_datasets) == 3, dataset_collection

    def test_create_list_of_existing_pairs(self):
        pair_payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id,
            instance_type="history",
        )
        pair_create_response = self._post("dataset_collections",
                                          pair_payload,
                                          json=True)
        dataset_collection = self._check_create_response(pair_create_response)
        hdca_id = dataset_collection["id"]

        element_identifiers = [dict(name="test1", src="hdca", id=hdca_id)]

        payload = dict(
            instance_type="history",
            history_id=self.history_id,
            element_identifiers=element_identifiers,
            collection_type="list",
        )
        create_response = self._post("dataset_collections", payload, json=True)
        dataset_collection = self._check_create_response(create_response)
        returned_collections = dataset_collection["elements"]
        assert len(returned_collections) == 1, dataset_collection

    def test_create_list_of_new_pairs(self):
        identifiers = self.dataset_collection_populator.nested_collection_identifiers(
            self.history_id, "list:paired")
        payload = dict(
            collection_type="list:paired",
            instance_type="history",
            history_id=self.history_id,
            name="a nested collection",
            element_identifiers=identifiers,
        )
        create_response = self._post("dataset_collections", payload, json=True)
        dataset_collection = self._check_create_response(create_response)
        assert dataset_collection["collection_type"] == "list:paired"
        assert dataset_collection["name"] == "a nested collection"
        returned_collections = dataset_collection["elements"]
        assert len(returned_collections) == 1, dataset_collection
        pair_1_element = returned_collections[0]
        self._assert_has_keys(pair_1_element, "element_identifier",
                              "element_index", "object")
        assert pair_1_element[
            "element_identifier"] == "test_level_1", pair_1_element
        assert pair_1_element["element_index"] == 0, pair_1_element
        pair_1_object = pair_1_element["object"]
        self._assert_has_keys(pair_1_object, "collection_type", "elements",
                              "element_count")
        self.assertEqual(pair_1_object["collection_type"], "paired")
        self.assertEqual(pair_1_object["populated"], True)
        pair_elements = pair_1_object["elements"]
        assert len(pair_elements) == 2
        pair_1_element_1 = pair_elements[0]
        assert pair_1_element_1["element_index"] == 0

    def test_list_download(self):
        fetch_response = self.dataset_collection_populator.create_list_in_history(
            self.history_id, direct_upload=True).json()
        dataset_collection = self.dataset_collection_populator.wait_for_fetched_collection(
            fetch_response)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 3, dataset_collection
        create_response = self._download_dataset_collection(
            history_id=self.history_id, hdca_id=dataset_collection['id'])
        self._assert_status_code_is(create_response, 200)
        archive = zipfile.ZipFile(BytesIO(create_response.content))
        namelist = archive.namelist()
        assert len(namelist) == 3, f"Expected 3 elements in [{namelist}]"
        collection_name = dataset_collection['name']
        for element, zip_path in zip(returned_dce, namelist):
            assert f"{collection_name}/{element['element_identifier']}.{element['object']['file_ext']}" == zip_path

    def test_pair_download(self):
        fetch_response = self.dataset_collection_populator.create_pair_in_history(
            self.history_id, direct_upload=True).json()
        dataset_collection = self.dataset_collection_populator.wait_for_fetched_collection(
            fetch_response)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 2, dataset_collection
        hdca_id = dataset_collection['id']
        create_response = self._download_dataset_collection(
            history_id=self.history_id, hdca_id=hdca_id)
        self._assert_status_code_is(create_response, 200)
        archive = zipfile.ZipFile(BytesIO(create_response.content))
        namelist = archive.namelist()
        assert len(namelist) == 2, f"Expected 2 elements in [{namelist}]"
        collection_name = dataset_collection['name']
        for element, zip_path in zip(returned_dce, namelist):
            assert f"{collection_name}/{element['element_identifier']}.{element['object']['file_ext']}" == zip_path

    def test_list_pair_download(self):
        fetch_response = self.dataset_collection_populator.create_list_of_pairs_in_history(
            self.history_id).json()
        dataset_collection = self.dataset_collection_populator.wait_for_fetched_collection(
            fetch_response)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 1, dataset_collection
        list_collection_name = dataset_collection['name']
        pair = returned_dce[0]
        create_response = self._download_dataset_collection(
            history_id=self.history_id, hdca_id=dataset_collection['id'])
        self._assert_status_code_is(create_response, 200)
        archive = zipfile.ZipFile(BytesIO(create_response.content))
        namelist = archive.namelist()
        assert len(namelist) == 2, f"Expected 2 elements in [{namelist}]"
        pair_collection_name = pair['element_identifier']
        for element, zip_path in zip(pair['object']['elements'], namelist):
            assert f"{list_collection_name}/{pair_collection_name}/{element['element_identifier']}.{element['object']['file_ext']}" == zip_path

    def test_list_list_download(self):
        dataset_collection = self.dataset_collection_populator.create_list_of_list_in_history(
            self.history_id).json()
        self.dataset_collection_populator.wait_for_dataset_collection(
            dataset_collection, assert_ok=True)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 1, dataset_collection
        create_response = self._download_dataset_collection(
            history_id=self.history_id, hdca_id=dataset_collection['id'])
        self._assert_status_code_is(create_response, 200)
        archive = zipfile.ZipFile(BytesIO(create_response.content))
        namelist = archive.namelist()
        assert len(namelist) == 3, f"Expected 3 elements in [{namelist}]"

    def test_list_list_list_download(self):
        dataset_collection = self.dataset_collection_populator.create_list_of_list_in_history(
            self.history_id, collection_type='list:list:list').json()
        self.dataset_collection_populator.wait_for_dataset_collection(
            dataset_collection, assert_ok=True)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 1, dataset_collection
        create_response = self._download_dataset_collection(
            history_id=self.history_id, hdca_id=dataset_collection['id'])
        self._assert_status_code_is(create_response, 200)
        archive = zipfile.ZipFile(BytesIO(create_response.content))
        namelist = archive.namelist()
        assert len(namelist) == 3, f"Expected 3 elements in [{namelist}]"

    def test_hda_security(self):
        element_identifiers = self.dataset_collection_populator.pair_identifiers(
            self.history_id)
        self.dataset_populator.make_private(self.history_id,
                                            element_identifiers[0]["id"])
        with self._different_user():
            history_id = self.dataset_populator.new_history()
            payload = dict(
                instance_type="history",
                history_id=history_id,
                element_identifiers=element_identifiers,
                collection_type="paired",
            )
            create_response = self._post("dataset_collections",
                                         payload,
                                         json=True)
            self._assert_status_code_is(create_response, 403)

    def test_enforces_unique_names(self):
        element_identifiers = self.dataset_collection_populator.list_identifiers(
            self.history_id)
        element_identifiers[2]["name"] = element_identifiers[0]["name"]
        payload = dict(
            instance_type="history",
            history_id=self.history_id,
            element_identifiers=element_identifiers,
            collection_type="list",
        )

        create_response = self._post("dataset_collections", payload, json=True)
        self._assert_status_code_is(create_response, 400)

    def test_upload_collection(self):
        elements = [{
            "src":
            "files",
            "dbkey":
            "hg19",
            "info":
            "my cool bed",
            "tags":
            ["name:data1", "group:condition:treated", "machine:illumina"]
        }]
        targets = [{
            "destination": {
                "type": "hdca"
            },
            "elements": elements,
            "collection_type": "list",
            "name": "Test upload",
            "tags": ["name:collection1"]
        }]
        payload = {
            "history_id": self.history_id,
            "targets": json.dumps(targets),
            "__files": {
                "files_0|file_data":
                open(self.test_data_resolver.get_filename("4.bed"))
            },
        }
        self.dataset_populator.fetch(payload)
        hdca = self._assert_one_collection_created_in_history()
        self.assertEqual(hdca["name"], "Test upload")
        hdca_tags = hdca["tags"]
        assert len(hdca_tags) == 1
        assert "name:collection1" in hdca_tags
        assert len(hdca["elements"]) == 1, hdca
        element0 = hdca["elements"][0]
        assert element0["element_identifier"] == "4.bed"
        dataset0 = element0["object"]
        assert dataset0["file_size"] == 61
        dataset_tags = dataset0["tags"]
        assert len(dataset_tags) == 3, dataset0

    def test_upload_nested(self):
        elements = [{
            "name":
            "samp1",
            "elements": [{
                "src": "files",
                "dbkey": "hg19",
                "info": "my cool bed"
            }]
        }]
        targets = [{
            "destination": {
                "type": "hdca"
            },
            "elements": elements,
            "collection_type": "list:list",
            "name": "Test upload",
        }]
        payload = {
            "history_id": self.history_id,
            "targets": json.dumps(targets),
            "__files": {
                "files_0|file_data":
                open(self.test_data_resolver.get_filename("4.bed"))
            },
        }
        self.dataset_populator.fetch(payload)
        hdca = self._assert_one_collection_created_in_history()
        self.assertEqual(hdca["name"], "Test upload")
        assert len(hdca["elements"]) == 1, hdca
        element0 = hdca["elements"][0]
        assert element0["element_identifier"] == "samp1"

    @skip_if_github_down
    def test_upload_collection_from_url(self):
        elements = [{
            "src": "url",
            "url":
            "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/4.bed",
            "info": "my cool bed"
        }]
        targets = [{
            "destination": {
                "type": "hdca"
            },
            "elements": elements,
            "collection_type": "list",
        }]
        payload = {
            "history_id": self.history_id,
            "targets": json.dumps(targets),
            "__files": {
                "files_0|file_data":
                open(self.test_data_resolver.get_filename("4.bed"))
            },
        }
        self.dataset_populator.fetch(payload)
        hdca = self._assert_one_collection_created_in_history()
        assert len(hdca["elements"]) == 1, hdca
        element0 = hdca["elements"][0]
        assert element0["element_identifier"] == "4.bed"
        assert element0["object"]["file_size"] == 61

    @skip_if_github_down
    def test_upload_collection_failed_expansion_url(self):
        targets = [{
            "destination": {
                "type": "hdca"
            },
            "elements_from":
            "bagit",
            "collection_type":
            "list",
            "src":
            "url",
            "url":
            "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/4.bed",
        }]
        payload = {
            "history_id": self.history_id,
            "targets": json.dumps(targets),
            "__files": {
                "files_0|file_data":
                open(self.test_data_resolver.get_filename("4.bed"))
            },
        }
        self.dataset_populator.fetch(payload, assert_ok=False, wait=True)
        hdca = self._assert_one_collection_created_in_history()
        assert hdca["populated"] is False
        assert "bagit.txt" in hdca["populated_state_message"], hdca

    def _assert_one_collection_created_in_history(self):
        contents_response = self._get(
            f"histories/{self.history_id}/contents/dataset_collections")
        self._assert_status_code_is(contents_response, 200)
        contents = contents_response.json()
        assert len(contents) == 1
        hdca = contents[0]
        assert hdca["history_content_type"] == "dataset_collection"
        hdca_id = hdca["id"]
        collection_response = self._get(
            f"histories/{self.history_id}/contents/dataset_collections/{hdca_id}"
        )
        self._assert_status_code_is(collection_response, 200)
        return collection_response.json()

    def _check_create_response(self, create_response):
        self._assert_status_code_is(create_response, 200)
        dataset_collection = create_response.json()
        self._assert_has_keys(dataset_collection, "elements", "url", "name",
                              "collection_type", "element_count")
        return dataset_collection

    def _download_dataset_collection(self, history_id, hdca_id):
        return self._get(
            f"histories/{history_id}/contents/dataset_collections/{hdca_id}/download"
        )

    def test_collection_contents_security(self):
        # request contents on an hdca that doesn't belong to user
        hdca, contents_url = self._create_collection_contents_pair()
        with self._different_user():
            contents_response = self._get(contents_url)
            self._assert_status_code_is(contents_response, 403)

    def test_collection_contents_invalid_collection(self):
        # request an invalid collection from a valid hdca, should get 404
        hdca, contents_url = self._create_collection_contents_pair()
        response = self._get(contents_url)
        self._assert_status_code_is(response, 200)
        fake_collection_id = '5d7db0757a2eb7ef'
        fake_contents_url = f"/api/dataset_collections/{hdca['id']}/contents/{fake_collection_id}"
        error_response = self._get(fake_contents_url)
        assert_object_id_error(error_response)

    def test_show_dataset_collection(self):
        fetch_response = self.dataset_collection_populator.create_list_in_history(
            self.history_id, direct_upload=True).json()
        dataset_collection = self.dataset_collection_populator.wait_for_fetched_collection(
            fetch_response)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 3, dataset_collection
        hdca_id = dataset_collection['id']
        dataset_collection_url = f"/api/dataset_collections/{hdca_id}"
        dataset_collection = self._get(dataset_collection_url).json()
        assert dataset_collection['id'] == hdca_id
        assert dataset_collection['collection_type'] == 'list'

    def test_show_dataset_collection_contents(self):
        # Get contents_url from history contents, use it to show the first level
        # of collection contents in the created HDCA, then use it again to drill
        # down into the nested collection contents
        hdca = self.dataset_collection_populator.create_list_of_list_in_history(
            self.history_id).json()
        root_contents_url = self._get_contents_url_for_hdca(hdca)

        # check root contents for this collection
        root_contents = self._get(root_contents_url).json()
        assert len(root_contents) == len(hdca['elements'])
        self._compare_collection_contents_elements(root_contents,
                                                   hdca['elements'])

        # drill down, retrieve nested collection contents
        assert 'object' in root_contents[0]
        assert 'contents_url' in root_contents[0]['object']
        drill_contents_url = root_contents[0]['object']['contents_url']
        drill_contents = self._get(drill_contents_url).json()
        assert len(drill_contents) == len(
            hdca['elements'][0]['object']['elements'])
        self._compare_collection_contents_elements(
            drill_contents, hdca['elements'][0]['object']['elements'])

    def test_collection_contents_limit_offset(self):
        # check limit/offset params for collection contents endpoint
        hdca, root_contents_url = self._create_collection_contents_pair()

        # check limit
        limited_contents = self._get(f"{root_contents_url}?limit=1").json()
        assert len(limited_contents) == 1
        assert limited_contents[0]['element_index'] == 0

        # check offset
        offset_contents = self._get(f"{root_contents_url}?offset=1").json()
        assert len(offset_contents) == 1
        assert offset_contents[0]['element_index'] == 1

    def test_get_suitable_converters_single_datatype(self):
        response = self.dataset_collection_populator.upload_collection(
            self.history_id,
            "list:paired",
            elements=[{
                "name":
                "test0",
                "elements": [
                    {
                        "src": "pasted",
                        "paste_content": "123\n",
                        "name": "forward",
                        "ext": "bed"
                    },
                    {
                        "src": "pasted",
                        "paste_content": "456\n",
                        "name": "reverse",
                        "ext": "bed"
                    },
                ]
            }, {
                "name":
                "test1",
                "elements": [
                    {
                        "src": "pasted",
                        "paste_content": "789\n",
                        "name": "forward",
                        "ext": "bed"
                    },
                    {
                        "src": "pasted",
                        "paste_content": "0ab\n",
                        "name": "reverse",
                        "ext": "bed"
                    },
                ]
            }])
        self._assert_status_code_is(response, 200)
        hdca_list_id = response.json()["outputs"][0]["id"]
        converters = self._get("dataset_collections/" + hdca_list_id +
                               "/suitable_converters")
        expected = [  # This list is subject to change, but it's unlikely we'll be removing converters
            'CONVERTER_bed_to_fli_0', 'CONVERTER_bed_gff_or_vcf_to_bigwig_0',
            'CONVERTER_bed_to_gff_0', 'CONVERTER_interval_to_bgzip_0',
            'tabular_to_csv', 'CONVERTER_interval_to_bed6_0',
            'CONVERTER_interval_to_bedstrict_0',
            'CONVERTER_interval_to_tabix_0', 'CONVERTER_interval_to_bed12_0'
        ]
        actual = []
        for converter in converters.json():
            actual.append(converter["tool_id"])
        missing_expected_converters = set(expected) - set(actual)
        assert not missing_expected_converters, f"Expected converter(s) {', '.join(missing_expected_converters)} missing from response"

    def test_get_suitable_converters_different_datatypes_matches(self):
        response = self.dataset_collection_populator.upload_collection(
            self.history_id,
            "list:paired",
            elements=[{
                "name":
                "test0",
                "elements": [
                    {
                        "src": "pasted",
                        "paste_content": "123\n",
                        "name": "forward",
                        "ext": "bed"
                    },
                    {
                        "src": "pasted",
                        "paste_content": "456\n",
                        "name": "reverse",
                        "ext": "bed"
                    },
                ]
            }, {
                "name":
                "test1",
                "elements": [
                    {
                        "src": "pasted",
                        "paste_content": "789\n",
                        "name": "forward",
                        "ext": "tabular"
                    },
                    {
                        "src": "pasted",
                        "paste_content": "0ab\n",
                        "name": "reverse",
                        "ext": "tabular"
                    },
                ]
            }])
        self._assert_status_code_is(response, 200)
        hdca_list_id = response.json()["outputs"][0]["id"]
        converters = self._get("dataset_collections/" + hdca_list_id +
                               "/suitable_converters")
        expected = 'tabular_to_csv'
        actual = []
        for converter in converters.json():
            actual.append(converter["tool_id"])
        assert expected in actual

    def test_get_suitable_converters_different_datatypes_no_matches(self):
        response = self.dataset_collection_populator.upload_collection(
            self.history_id,
            "list:paired",
            elements=[{
                "name":
                "test0",
                "elements": [
                    {
                        "src": "pasted",
                        "paste_content": "123\n",
                        "name": "forward",
                        "ext": "bed"
                    },
                    {
                        "src": "pasted",
                        "paste_content": "456\n",
                        "name": "reverse",
                        "ext": "bed"
                    },
                ]
            }, {
                "name":
                "test1",
                "elements": [
                    {
                        "src": "pasted",
                        "paste_content": "789\n",
                        "name": "forward",
                        "ext": "fasta"
                    },
                    {
                        "src": "pasted",
                        "paste_content": "0ab\n",
                        "name": "reverse",
                        "ext": "fasta"
                    },
                ]
            }])
        self._assert_status_code_is(response, 200)
        hdca_list_id = response.json()["outputs"][0]["id"]
        converters = self._get("dataset_collections/" + hdca_list_id +
                               "/suitable_converters")
        actual: List[str] = []
        for converter in converters.json():
            actual.append(converter["tool_id"])
        assert actual == []

    def test_collection_tools_tag_propagation(self):
        elements = [{"src": "files", "tags": ["name:element_tag"]}]
        targets = [{
            "destination": {
                "type": "hdca"
            },
            "elements": elements,
            "collection_type": "list",
            "name": "Test collection",
            "tags": ["name:collection_tag"]
        }]
        payload = {
            "history_id": self.history_id,
            "targets": json.dumps(targets),
            "__files": {
                "files_0|file_data":
                open(self.test_data_resolver.get_filename("4.bed"))
            },
        }
        hdca_id = self.dataset_populator.fetch(
            payload).json()['output_collections'][0]['id']
        inputs = {
            "input": {
                "batch": False,
                "src": "hdca",
                "id": hdca_id
            },
        }
        payload = self.dataset_populator.run_tool_payload(
            tool_id='__FILTER_FAILED_DATASETS__',
            inputs=inputs,
            history_id=self.history_id,
            input_format='legacy',
        )
        response = self._post("tools", payload).json()
        self.dataset_populator.wait_for_history(self.history_id,
                                                assert_ok=False)
        output_collection = response["output_collections"][0]
        # collection should not inherit tags from input collection elements, only parent collection
        assert output_collection['tags'] == ["name:collection_tag"]
        element = output_collection['elements'][0]
        # new element hda should have tags copied from old hda
        assert element['object']['tags'] == ['name:element_tag']

    def _compare_collection_contents_elements(self, contents_elements,
                                              hdca_elements):
        # compare collection api results to existing hdca element contents
        fields = [
            'element_identifier', 'element_index', 'element_type', 'id',
            'model_class'
        ]
        for (content_element, hdca_element) in zip(contents_elements,
                                                   hdca_elements):
            for f in fields:
                assert content_element[f] == hdca_element[f]

    def _create_collection_contents_pair(self):
        # Create a simple collection, return hdca and contents_url
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id, instance_type="history")
        create_response = self._post("dataset_collections", payload, json=True)
        hdca = self._check_create_response(create_response)
        root_contents_url = self._get_contents_url_for_hdca(hdca)
        return hdca, root_contents_url

    def _get_contents_url_for_hdca(self, hdca):
        # look up the history contents using optional serialization key
        history_contents_url = f"histories/{self.history_id}/contents?v=dev&view=summary&keys=contents_url"
        json = self._get(history_contents_url).json()

        # filter out the collection we just made id = hdca.id
        # make sure the contents_url appears
        def find_hdca(c):
            return c['history_content_type'] == 'dataset_collection' and c[
                'id'] == hdca['id']

        matches = list(filter(find_hdca, json))
        assert len(matches) == 1
        assert 'contents_url' in matches[0]

        return matches[0]['contents_url']
예제 #5
0
class DatasetCollectionApiTestCase(ApiTestCase):
    def setUp(self):
        super(DatasetCollectionApiTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_collection_populator = DatasetCollectionPopulator(
            self.galaxy_interactor)
        self.history_id = self.dataset_populator.new_history()

    def test_create_pair_from_history(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id,
            instance_type="history",
        )
        create_response = self._post("dataset_collections", payload)
        dataset_collection = self._check_create_response(create_response)
        returned_datasets = dataset_collection["elements"]
        assert len(returned_datasets) == 2, dataset_collection

    def test_create_list_from_history(self):
        element_identifiers = self.dataset_collection_populator.list_identifiers(
            self.history_id)

        payload = dict(
            instance_type="history",
            history_id=self.history_id,
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )

        create_response = self._post("dataset_collections", payload)
        dataset_collection = self._check_create_response(create_response)
        returned_datasets = dataset_collection["elements"]
        assert len(returned_datasets) == 3, dataset_collection

    def test_create_list_of_existing_pairs(self):
        pair_payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id,
            instance_type="history",
        )
        pair_create_response = self._post("dataset_collections", pair_payload)
        dataset_collection = self._check_create_response(pair_create_response)
        hdca_id = dataset_collection["id"]

        element_identifiers = [dict(name="test1", src="hdca", id=hdca_id)]

        payload = dict(
            instance_type="history",
            history_id=self.history_id,
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )
        create_response = self._post("dataset_collections", payload)
        dataset_collection = self._check_create_response(create_response)
        returned_collections = dataset_collection["elements"]
        assert len(returned_collections) == 1, dataset_collection

    def test_create_list_of_new_pairs(self):
        identifiers = self.dataset_collection_populator.nested_collection_identifiers(
            self.history_id, "list:paired")
        payload = dict(
            collection_type="list:paired",
            instance_type="history",
            history_id=self.history_id,
            name="a nested collection",
            element_identifiers=json.dumps(identifiers),
        )
        create_response = self._post("dataset_collections", payload)
        dataset_collection = self._check_create_response(create_response)
        assert dataset_collection["collection_type"] == "list:paired"
        assert dataset_collection["name"] == "a nested collection"
        returned_collections = dataset_collection["elements"]
        assert len(returned_collections) == 1, dataset_collection
        pair_1_element = returned_collections[0]
        self._assert_has_keys(pair_1_element, "element_identifier",
                              "element_index", "object")
        assert pair_1_element[
            "element_identifier"] == "test_level_1", pair_1_element
        assert pair_1_element["element_index"] == 0, pair_1_element
        pair_1_object = pair_1_element["object"]
        self._assert_has_keys(pair_1_object, "collection_type", "elements",
                              "element_count")
        self.assertEqual(pair_1_object["collection_type"], "paired")
        self.assertEqual(pair_1_object["populated"], True)
        pair_elements = pair_1_object["elements"]
        assert len(pair_elements) == 2
        pair_1_element_1 = pair_elements[0]
        assert pair_1_element_1["element_index"] == 0

    def test_list_download(self):
        fetch_response = self.dataset_collection_populator.create_list_in_history(
            self.history_id, direct_upload=True).json()
        dataset_collection = self.dataset_collection_populator.wait_for_fetched_collection(
            fetch_response)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 3, dataset_collection
        create_response = self._download_dataset_collection(
            history_id=self.history_id, hdca_id=dataset_collection['id'])
        self._assert_status_code_is(create_response, 200)
        tar_contents = tarfile.open(fileobj=BytesIO(create_response.content))
        namelist = tar_contents.getnames()
        assert len(namelist) == 3, "Expected 3 elements in [%s]" % namelist
        collection_name = dataset_collection['name']
        for element, zip_path in zip(returned_dce, namelist):
            assert "%s/%s.%s" % (collection_name,
                                 element['element_identifier'],
                                 element['object']['file_ext']) == zip_path

    def test_pair_download(self):
        fetch_response = self.dataset_collection_populator.create_pair_in_history(
            self.history_id, direct_upload=True).json()
        dataset_collection = self.dataset_collection_populator.wait_for_fetched_collection(
            fetch_response)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 2, dataset_collection
        hdca_id = dataset_collection['id']
        create_response = self._download_dataset_collection(
            history_id=self.history_id, hdca_id=hdca_id)
        self._assert_status_code_is(create_response, 200)
        tar_contents = tarfile.open(fileobj=BytesIO(create_response.content))
        namelist = tar_contents.getnames()
        assert len(namelist) == 2, "Expected 2 elements in [%s]" % namelist
        collection_name = dataset_collection['name']
        for element, zip_path in zip(returned_dce, namelist):
            assert "%s/%s.%s" % (collection_name,
                                 element['element_identifier'],
                                 element['object']['file_ext']) == zip_path

    def test_list_pair_download(self):
        fetch_response = self.dataset_collection_populator.create_list_of_pairs_in_history(
            self.history_id).json()
        dataset_collection = self.dataset_collection_populator.wait_for_fetched_collection(
            fetch_response)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 1, dataset_collection
        list_collection_name = dataset_collection['name']
        pair = returned_dce[0]
        create_response = self._download_dataset_collection(
            history_id=self.history_id, hdca_id=dataset_collection['id'])
        self._assert_status_code_is(create_response, 200)
        tar_contents = tarfile.open(fileobj=BytesIO(create_response.content))
        namelist = tar_contents.getnames()
        assert len(namelist) == 2, "Expected 2 elements in [%s]" % namelist
        pair_collection_name = pair['element_identifier']
        for element, zip_path in zip(pair['object']['elements'], namelist):
            assert "%s/%s/%s.%s" % (list_collection_name, pair_collection_name,
                                    element['element_identifier'],
                                    element['object']['file_ext']) == zip_path

    def test_list_list_download(self):
        dataset_collection = self.dataset_collection_populator.create_list_of_list_in_history(
            self.history_id).json()
        self.dataset_collection_populator.wait_for_dataset_collection(
            dataset_collection, assert_ok=True)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 1, dataset_collection
        create_response = self._download_dataset_collection(
            history_id=self.history_id, hdca_id=dataset_collection['id'])
        self._assert_status_code_is(create_response, 200)
        tar_contents = tarfile.open(fileobj=BytesIO(create_response.content))
        namelist = tar_contents.getnames()
        assert len(namelist) == 3, "Expected 3 elements in [%s]" % namelist

    def test_list_list_list_download(self):
        dataset_collection = self.dataset_collection_populator.create_list_of_list_in_history(
            self.history_id, collection_type='list:list:list').json()
        self.dataset_collection_populator.wait_for_dataset_collection(
            dataset_collection, assert_ok=True)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 1, dataset_collection
        create_response = self._download_dataset_collection(
            history_id=self.history_id, hdca_id=dataset_collection['id'])
        self._assert_status_code_is(create_response, 200)
        tar_contents = tarfile.open(fileobj=BytesIO(create_response.content))
        namelist = tar_contents.getnames()
        assert len(namelist) == 3, "Expected 3 elements in [%s]" % namelist

    def test_hda_security(self):
        element_identifiers = self.dataset_collection_populator.pair_identifiers(
            self.history_id)
        self.dataset_populator.make_private(self.history_id,
                                            element_identifiers[0]["id"])
        with self._different_user():
            history_id = self.dataset_populator.new_history()
            payload = dict(
                instance_type="history",
                history_id=history_id,
                element_identifiers=json.dumps(element_identifiers),
                collection_type="paired",
            )
            create_response = self._post("dataset_collections", payload)
            self._assert_status_code_is(create_response, 403)

    def test_enforces_unique_names(self):
        element_identifiers = self.dataset_collection_populator.list_identifiers(
            self.history_id)
        element_identifiers[2]["name"] = element_identifiers[0]["name"]
        payload = dict(
            instance_type="history",
            history_id=self.history_id,
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )

        create_response = self._post("dataset_collections", payload)
        self._assert_status_code_is(create_response, 400)

    def test_upload_collection(self):
        elements = [{
            "src":
            "files",
            "dbkey":
            "hg19",
            "info":
            "my cool bed",
            "tags":
            ["name:data1", "group:condition:treated", "machine:illumina"]
        }]
        targets = [{
            "destination": {
                "type": "hdca"
            },
            "elements": elements,
            "collection_type": "list",
            "name": "Test upload",
            "tags": ["name:collection1"]
        }]
        payload = {
            "history_id": self.history_id,
            "targets": json.dumps(targets),
            "__files": {
                "files_0|file_data":
                open(self.test_data_resolver.get_filename("4.bed"))
            },
        }
        self.dataset_populator.fetch(payload)
        hdca = self._assert_one_collection_created_in_history()
        self.assertEqual(hdca["name"], "Test upload")
        hdca_tags = hdca["tags"]
        assert len(hdca_tags) == 1
        assert "name:collection1" in hdca_tags
        assert len(hdca["elements"]) == 1, hdca
        element0 = hdca["elements"][0]
        assert element0["element_identifier"] == "4.bed"
        dataset0 = element0["object"]
        assert dataset0["file_size"] == 61
        dataset_tags = dataset0["tags"]
        assert len(dataset_tags) == 3, dataset0

    def test_upload_nested(self):
        elements = [{
            "name":
            "samp1",
            "elements": [{
                "src": "files",
                "dbkey": "hg19",
                "info": "my cool bed"
            }]
        }]
        targets = [{
            "destination": {
                "type": "hdca"
            },
            "elements": elements,
            "collection_type": "list:list",
            "name": "Test upload",
        }]
        payload = {
            "history_id": self.history_id,
            "targets": json.dumps(targets),
            "__files": {
                "files_0|file_data":
                open(self.test_data_resolver.get_filename("4.bed"))
            },
        }
        self.dataset_populator.fetch(payload)
        hdca = self._assert_one_collection_created_in_history()
        self.assertEqual(hdca["name"], "Test upload")
        assert len(hdca["elements"]) == 1, hdca
        element0 = hdca["elements"][0]
        assert element0["element_identifier"] == "samp1"

    @skip_if_github_down
    def test_upload_collection_from_url(self):
        elements = [{
            "src": "url",
            "url":
            "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/4.bed",
            "info": "my cool bed"
        }]
        targets = [{
            "destination": {
                "type": "hdca"
            },
            "elements": elements,
            "collection_type": "list",
        }]
        payload = {
            "history_id": self.history_id,
            "targets": json.dumps(targets),
            "__files": {
                "files_0|file_data":
                open(self.test_data_resolver.get_filename("4.bed"))
            },
        }
        self.dataset_populator.fetch(payload)
        hdca = self._assert_one_collection_created_in_history()
        assert len(hdca["elements"]) == 1, hdca
        element0 = hdca["elements"][0]
        assert element0["element_identifier"] == "4.bed"
        assert element0["object"]["file_size"] == 61

    def _assert_one_collection_created_in_history(self):
        contents_response = self._get(
            "histories/%s/contents/dataset_collections" % self.history_id)
        self._assert_status_code_is(contents_response, 200)
        contents = contents_response.json()
        assert len(contents) == 1
        hdca = contents[0]
        assert hdca["history_content_type"] == "dataset_collection"
        hdca_id = hdca["id"]
        collection_response = self._get(
            "histories/%s/contents/dataset_collections/%s" %
            (self.history_id, hdca_id))
        self._assert_status_code_is(collection_response, 200)
        return collection_response.json()

    def _check_create_response(self, create_response):
        self._assert_status_code_is(create_response, 200)
        dataset_collection = create_response.json()
        self._assert_has_keys(dataset_collection, "elements", "url", "name",
                              "collection_type", "element_count")
        return dataset_collection

    def _download_dataset_collection(self, history_id, hdca_id):
        return self._get(
            "histories/%s/contents/dataset_collections/%s/download" %
            (history_id, hdca_id))