Beispiel #1
0
class JobRecoveryBeforeHandledIntegerationTestCase(
        integration_util.IntegrationTestCase):
    framework_tool_and_types = True

    def setUp(self):
        super(JobRecoveryBeforeHandledIntegerationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.history_id = self.dataset_populator.new_history()

    @classmethod
    def handle_galaxy_config_kwds(cls, config):
        config["job_config_file"] = SIMPLE_JOB_CONFIG_FILE
        config["server_name"] = "moo"

    def handle_reconfigure_galaxy_config_kwds(self, config):
        config["server_name"] = "main"

    def test_recovery(self):
        history_id = self.dataset_populator.new_history()
        self.dataset_populator.run_tool(
            "exit_code_oom",
            {},
            history_id,
            assert_ok=False,
        ).json()
        self.restart(
            handle_reconfig=self.handle_reconfigure_galaxy_config_kwds)
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_populator.wait_for_history(history_id, assert_ok=True)
class JobRecoveryAfterHandledIntegerationTestCase(integration_util.IntegrationTestCase):
    framework_tool_and_types = True

    def setUp(self):
        super(JobRecoveryAfterHandledIntegerationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)

    @classmethod
    def handle_galaxy_config_kwds(cls, config):
        config["job_config_file"] = DELAY_JOB_CONFIG_FILE

    def handle_reconfigure_galaxy_config_kwds(self, config):
        config["job_config_file"] = SIMPLE_JOB_CONFIG_FILE

    def test_recovery(self):
        history_id = self.dataset_populator.new_history()
        self.dataset_populator.run_tool(
            "exit_code_oom",
            {},
            history_id,
            assert_ok=False,
        ).json()
        self.restart(handle_reconfig=self.handle_reconfigure_galaxy_config_kwds)
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_populator.wait_for_history(history_id, assert_ok=True)
class MaximumWorkflowInvocationDurationTestCase(integration_util.IntegrationTestCase):

    framework_tool_and_types = True

    def setUp(self):
        super(MaximumWorkflowInvocationDurationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.workflow_populator = WorkflowPopulator(self.galaxy_interactor)

    @classmethod
    def handle_galaxy_config_kwds(cls, config):
        config["maximum_workflow_invocation_duration"] = 20

    def do_test(self):
        workflow = self.workflow_populator.load_workflow_from_resource("test_workflow_pause")
        workflow_id = self.workflow_populator.create_workflow(workflow)
        history_id = self.dataset_populator.new_history()
        hda1 = self.dataset_populator.new_dataset(history_id, content="1 2 3")
        index_map = {
            '0': dict(src="hda", id=hda1["id"])
        }
        request = {}
        request["history"] = "hist_id=%s" % history_id
        request["inputs"] = dumps(index_map)
        request["inputs_by"] = 'step_index'
        url = "workflows/%s/invocations" % (workflow_id)
        invocation_response = self._post(url, data=request)
        invocation_url = url + "/" + invocation_response.json()["id"]
        time.sleep(5)
        state = self._get(invocation_url).json()["state"]
        assert state != "failed", state
        time.sleep(35)
        state = self._get(invocation_url).json()["state"]
        assert state == "failed", state
class MaximumWorkflowInvocationDurationTestCase(
        integration_util.IntegrationTestCase):
    """Start a Pulsar job."""

    framework_tool_and_types = True

    def setUp(self):
        super(MaximumWorkflowInvocationDurationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.workflow_populator = WorkflowPopulator(self.galaxy_interactor)

    @classmethod
    def handle_galaxy_config_kwds(cls, config):
        config["maximum_workflow_invocation_duration"] = 20

    def do_test(self):
        workflow = self.workflow_populator.load_workflow_from_resource(
            "test_workflow_pause")
        workflow_id = self.workflow_populator.create_workflow(workflow)
        history_id = self.dataset_populator.new_history()
        hda1 = self.dataset_populator.new_dataset(history_id, content="1 2 3")
        index_map = {'0': dict(src="hda", id=hda1["id"])}
        request = {}
        request["history"] = "hist_id=%s" % history_id
        request["inputs"] = dumps(index_map)
        request["inputs_by"] = 'step_index'
        url = "workflows/%s/invocations" % (workflow_id)
        invocation_response = self._post(url, data=request)
        invocation_url = url + "/" + invocation_response.json()["id"]
        time.sleep(5)
        state = self._get(invocation_url).json()["state"]
        assert state != "failed", state
        time.sleep(35)
        state = self._get(invocation_url).json()["state"]
        assert state == "failed", state
Beispiel #5
0
class PageJsonEncodingIntegrationTestCase(integration_util.IntegrationTestCase
                                          ):
    def setUp(self):
        super(PageJsonEncodingIntegrationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)

    def test_page_encoding(self):
        history_id = self.dataset_populator.new_history()
        request = dict(
            slug="mypage",
            title="MY PAGE",
            content=
            '''<p>Page!<div class="embedded-item" id="History-%s"></div></p>'''
            % history_id,
        )
        page_response = self._post("pages", request)
        api_asserts.assert_status_code_is_ok(page_response)
        sa_session = self._app.model.context
        page_revision = sa_session.query(model.PageRevision).all()[0]
        assert '''id="History-1"''' in page_revision.content, page_revision.content
        assert '''id="History-%s"''' % history_id not in page_revision.content, page_revision.content

        show_page_response = self._get("pages/%s" % page_response.json()["id"])
        api_asserts.assert_status_code_is_ok(show_page_response)
        content = show_page_response.json()["content"]
        assert '''id="History-1"''' not in content, content
        assert '''id="History-%s"''' % history_id in content, content
Beispiel #6
0
class BaseCheckUploadContentConfigurationTestCase(integration_util.IntegrationTestCase):

    framework_tool_and_types = True

    def setUp(self):
        super(BaseCheckUploadContentConfigurationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.history_id = self.dataset_populator.new_history()
Beispiel #7
0
 def test_400_on_invalid_embedded_content(self):
     dataset_populator = DatasetPopulator(self.galaxy_interactor)
     valid_id = dataset_populator.new_history()
     page_request = self._test_page_payload(slug="invalid-id-encding")
     page_request["content"] = '''<p>Page!<div class="embedded-item" id="CoolObject-%s"></div></p>''' % valid_id
     page_response = self._post("pages", page_request)
     self._assert_status_code_is(page_response, 400)
     self._assert_error_code_is(page_response, error_codes.USER_REQUEST_INVALID_PARAMETER)
     assert "embedded HTML content" in page_response.text
class BaseUploadContentConfigurationTestCase(integration_util.IntegrationTestCase):

    framework_tool_and_types = True

    def setUp(self):
        super(BaseUploadContentConfigurationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.library_populator = LibraryPopulator(self.galaxy_interactor)
        self.history_id = self.dataset_populator.new_history()
Beispiel #9
0
class FailJobWhenToolUnavailableTestCase(integration_util.IntegrationTestCase):

    require_admin_user = True

    def setUp(self):
        super(FailJobWhenToolUnavailableTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.workflow_populator = WorkflowPopulator(self.galaxy_interactor)
        self.history_id = self.dataset_populator.new_history()

    @classmethod
    def handle_galaxy_config_kwds(
        cls,
        config,
    ):
        # config["jobs_directory"] = cls.jobs_directory
        # Disable tool dependency resolution.
        config["tool_dependency_dir"] = "none"

    def test_fail_job_when_tool_unavailable(self):
        self.workflow_populator.run_workflow("""
class: GalaxyWorkflow
steps:
  - label: sleep
    run:
      class: GalaxyTool
      command: sleep 20s && echo 'hello world 2' > '$output1'
      outputs:
        output1:
          format: txt
  - tool_id: cat1
    state:
      input1:
        $link: sleep#output1
      queries:
        input2:
          $link: sleep#output1
""",
                                             history_id=self.history_id,
                                             assert_ok=False,
                                             wait=False)
        # Wait until workflow is fully scheduled, otherwise can't test effect of removing tool from queued job
        time.sleep(10)
        self._app.toolbox.remove_tool_by_id('cat1')
        self.dataset_populator.wait_for_history(self.history_id,
                                                assert_ok=False)
        state_details = self.galaxy_interactor.get(
            'histories/%s' % self.history_id).json()['state_details']
        assert state_details['running'] == 0
        assert state_details['ok'] == 1
        assert state_details['error'] == 1
        failed_hda = self.dataset_populator.get_history_dataset_details(
            history_id=self.history_id, assert_ok=False, details=True)
        assert failed_hda['state'] == 'error'
        job = self.galaxy_interactor.get("jobs/%s" %
                                         failed_hda['creating_job']).json()
        assert job['state'] == 'error'
class DockerizedJobsIntegrationTestCase(integration_util.IntegrationTestCase, RunsEnvironmentJobs):

    framework_tool_and_types = True

    @classmethod
    def handle_galaxy_config_kwds(cls, config):
        cls.jobs_directory = tempfile.mkdtemp()
        config["jobs_directory"] = cls.jobs_directory
        config["job_config_file"] = DOCKERIZED_JOB_CONFIG_FILE
        # Disable tool dependency resolution.
        config["tool_dependency_dir"] = "none"
        config["enable_beta_mulled_containers"] = "true"

    def setUp(self):
        super(DockerizedJobsIntegrationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.history_id = self.dataset_populator.new_history()

    def test_explicit(self):
        self.dataset_populator.run_tool("mulled_example_explicit", {}, self.history_id)
        self.dataset_populator.wait_for_history(self.history_id, assert_ok=True)
        output = self.dataset_populator.get_history_dataset_content(self.history_id)
        assert "0.7.15-r1140" in output

    def test_mulled_simple(self):
        self.dataset_populator.run_tool("mulled_example_simple", {}, self.history_id)
        self.dataset_populator.wait_for_history(self.history_id, assert_ok=True)
        output = self.dataset_populator.get_history_dataset_content(self.history_id)
        assert "0.7.15-r1140" in output

    def test_docker_job_enviornment(self):
        job_env = self._run_and_get_environment_properties("job_environment_default")

        euid = os.geteuid()
        egid = os.getgid()

        assert job_env.user_id == str(euid), job_env.user_id
        assert job_env.group_id == str(egid), job_env.group_id
        assert job_env.pwd.startswith(self.jobs_directory)
        assert job_env.pwd.endswith("/working")
        assert job_env.home.startswith(self.jobs_directory)
        assert job_env.home.endswith("/home")

    def test_docker_job_environment_legacy(self):
        job_env = self._run_and_get_environment_properties("job_environment_default_legacy")

        euid = os.geteuid()
        egid = os.getgid()

        assert job_env.user_id == str(euid), job_env.user_id
        assert job_env.group_id == str(egid), job_env.group_id
        assert job_env.pwd.startswith(self.jobs_directory)
        assert job_env.pwd.endswith("/working")
        # Should we change env_pass_through to just always include TMP and HOME for docker?
        # I'm not sure, if yes this would change.
        assert job_env.home == "/", job_env.home
class TestProvenance(api.ApiTestCase):

    def setUp(self):
        super(TestProvenance, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)

    def test_show_prov(self):
        history_id = self.dataset_populator.new_history()
        new_dataset1 = self.dataset_populator.new_dataset(history_id, content='for prov')
        prov_response = self._get("histories/%s/contents/%s/provenance" % (history_id, new_dataset1["id"]))
        self._assert_status_code_is(prov_response, 200)
        self._assert_has_keys(prov_response.json(), "job_id", "id", "stdout", "stderr", "parameters", "tool_id")
class TestProvenance(api.ApiTestCase):
    def setUp(self):
        super(TestProvenance, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)

    def test_show_prov(self):
        history_id = self.dataset_populator.new_history()
        new_dataset1 = self.dataset_populator.new_dataset(history_id,
                                                          content='for prov')
        prov_response = self._get("histories/%s/contents/%s/provenance" %
                                  (history_id, new_dataset1["id"]))
        self._assert_status_code_is(prov_response, 200)
        self._assert_has_keys(prov_response.json(), "job_id", "id", "stdout",
                              "stderr", "parameters", "tool_id")
Beispiel #13
0
 def test_legacy_r_mapping(self):
     """
     """
     tool_id = "legacy_R"
     dataset_populator = DatasetPopulator(self.galaxy_interactor)
     history_id = dataset_populator.new_history()
     endpoint = "tools/%s/install_dependencies" % tool_id
     data = {'id': tool_id}
     create_response = self._post(endpoint, data=data, admin=True)
     self._assert_status_code_is(create_response, 200)
     payload = dataset_populator.run_tool_payload(
         tool_id=tool_id,
         inputs={},
         history_id=history_id,
     )
     create_response = self._post("tools", data=payload)
     self._assert_status_code_is(create_response, 200)
     dataset_populator.wait_for_history(history_id, assert_ok=True)
Beispiel #14
0
 def test_legacy_r_mapping(self):
     """
     """
     tool_id = "legacy_R"
     dataset_populator = DatasetPopulator(self.galaxy_interactor)
     history_id = dataset_populator.new_history()
     endpoint = "tools/%s/install_dependencies" % tool_id
     data = {'id': tool_id}
     create_response = self._post(endpoint, data=data, admin=True)
     self._assert_status_code_is(create_response, 200)
     payload = dataset_populator.run_tool_payload(
         tool_id=tool_id,
         inputs={},
         history_id=history_id,
     )
     create_response = self._post("tools", data=payload)
     self._assert_status_code_is(create_response, 200)
     dataset_populator.wait_for_history(history_id, assert_ok=True)
 def test_runs_on_mule(self):
     tool_id = 'config_vars'
     expect_server_name = self.expected_server_name
     dataset_populator = DatasetPopulator(self.galaxy_interactor)
     history_id = dataset_populator.new_history()
     payload = dataset_populator.run_tool(
         tool_id=tool_id,
         inputs={'var': 'server_name'},
         history_id=history_id,
     )
     dataset_id = payload['outputs'][0]['id']
     dataset_populator.wait_for_dataset(history_id, dataset_id, assert_ok=True)
     output = dataset_populator.get_history_dataset_content(history_id, dataset_id=dataset_id).strip()
     assert output.startswith(expect_server_name), (
         "Job handler's server name '{output}' does not start with expected string '{expected}'".format(
             output=output,
             expected=expect_server_name,
         )
     )
Beispiel #16
0
class BaseWorkflowHandlerConfigurationTestCase(integration_util.IntegrationTestCase):

    framework_tool_and_types = True

    def setUp(self):
        super(BaseWorkflowHandlerConfigurationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.workflow_populator = WorkflowPopulator(self.galaxy_interactor)
        self.history_id = self.dataset_populator.new_history()

    @classmethod
    def handle_galaxy_config_kwds(cls, config):
        config["job_config_file"] = WORKFLOW_HANDLER_CONFIGURATION_JOB_CONF

    def _invoke_n_workflows(self, n):
        workflow_id = self.workflow_populator.upload_yaml_workflow(PAUSE_WORKFLOW)
        history_id = self.history_id
        hda1 = self.dataset_populator.new_dataset(history_id, content="1 2 3")
        index_map = {
            '0': dict(src="hda", id=hda1["id"])
        }
        request = {}
        request["history"] = "hist_id=%s" % history_id
        request["inputs"] = dumps(index_map)
        request["inputs_by"] = 'step_index'
        url = "workflows/%s/invocations" % (workflow_id)
        for i in range(n):
            self._post(url, data=request)

    def _get_workflow_invocations(self):
        # Consider exposing handler via the API to reduce breaking
        # into Galaxy's internal state.
        app = self._app
        history_id = app.security.decode_id(self.history_id)
        sa_session = app.model.context.current
        history = sa_session.query(app.model.History).get(history_id)
        workflow_invocations = history.workflow_invocations
        return workflow_invocations

    @property
    def is_app_workflow_scheduler(self):
        return self._app.workflow_scheduling_manager.request_monitor is not None
class BaseUploadContentConfigurationTestCase(
        integration_util.IntegrationTestCase):

    framework_tool_and_types = True

    def setUp(self):
        super(BaseUploadContentConfigurationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.library_populator = LibraryPopulator(self.galaxy_interactor)
        self.history_id = self.dataset_populator.new_history()

    def fetch_target(self, target, assert_ok=False, attach_test_file=False):
        payload = {
            "history_id": self.history_id,
            "targets": json.dumps([target]),
        }
        if attach_test_file:
            payload["__files"] = {
                "files_0|file_data":
                open(self.test_data_resolver.get_filename("4.bed"))
            }

        response = self.dataset_populator.fetch(payload, assert_ok=assert_ok)
        return response

    @classmethod
    def temp_config_dir(cls, name):
        # realpath here to get around problems with symlinks being blocked.
        return os.path.realpath(
            os.path.join(cls._test_driver.galaxy_test_tmp_dir, name))

    def _write_file(self, dir_path, content, filename="test"):
        """Helper for writing ftp/server dir files."""
        self._ensure_directory(dir_path)
        path = os.path.join(dir_path, filename)
        with open(path, "w") as f:
            f.write(content)
        return path

    def _ensure_directory(self, path):
        if not os.path.exists(path):
            os.makedirs(path)
class BaseEmbeddedPulsarContainerIntegrationTestCase(
        integration_util.IntegrationTestCase):
    framework_tool_and_types = True

    @classmethod
    def handle_galaxy_config_kwds(cls, config):
        cls.jobs_directory = cls._test_driver.mkdtemp()
        config["jobs_directory"] = cls.jobs_directory
        config["job_config_file"] = cls.job_config_file
        disable_dependency_resolution(config)

    def setUp(self):
        super(BaseEmbeddedPulsarContainerIntegrationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.history_id = self.dataset_populator.new_history()

    @classmethod
    def setUpClass(cls):
        skip_if_container_type_unavailable(cls)
        super(BaseEmbeddedPulsarContainerIntegrationTestCase, cls).setUpClass()
class BaseUploadContentConfigurationTestCase(integration_util.IntegrationTestCase):

    framework_tool_and_types = True

    def setUp(self):
        super(BaseUploadContentConfigurationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.library_populator = LibraryPopulator(self.galaxy_interactor)
        self.history_id = self.dataset_populator.new_history()

    def fetch_target(self, target, assert_ok=False, attach_test_file=False):
        payload = {
            "history_id": self.history_id,
            "targets": json.dumps([target]),
        }
        if attach_test_file:
            payload["__files"] = {"files_0|file_data": open(self.test_data_resolver.get_filename("4.bed"))}

        response = self.dataset_populator.fetch(payload, assert_ok=assert_ok)
        return response

    @classmethod
    def temp_config_dir(cls, name):
        # realpath here to get around problems with symlinks being blocked.
        return os.path.realpath(os.path.join(cls._test_driver.galaxy_test_tmp_dir, name))

    def _write_file(self, dir_path, content, filename="test"):
        """Helper for writing ftp/server dir files."""
        self._ensure_directory(dir_path)
        path = os.path.join(dir_path, filename)
        with open(path, "w") as f:
            f.write(content)
        return path

    def _ensure_directory(self, path):
        if not os.path.exists(path):
            os.makedirs(path)
Beispiel #20
0
class LibrariesApiTestCase(api.ApiTestCase, TestsDatasets):

    def setUp(self):
        super(LibrariesApiTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_collection_populator = DatasetCollectionPopulator(self.galaxy_interactor)
        self.library_populator = LibraryPopulator(self.galaxy_interactor)

    def test_create(self):
        data = dict(name="CreateTestLibrary")
        create_response = self._post("libraries", data=data, admin=True)
        self._assert_status_code_is(create_response, 200)
        library = create_response.json()
        self._assert_has_keys(library, "name")
        assert library["name"] == "CreateTestLibrary"

    def test_delete(self):
        library = self.library_populator.new_library("DeleteTestLibrary")
        create_response = self._delete("libraries/%s" % library["id"], admin=True)
        self._assert_status_code_is(create_response, 200)
        library = create_response.json()
        self._assert_has_keys(library, "deleted")
        assert library["deleted"] is True
        # Test undeleting
        data = dict(undelete='true')
        create_response = self._delete("libraries/%s" % library["id"], data=data, admin=True)
        library = create_response.json()
        self._assert_status_code_is(create_response, 200)
        assert library["deleted"] is False

    def test_nonadmin(self):
        # Anons can't create libs
        data = dict(name="CreateTestLibrary")
        create_response = self._post("libraries", data=data, admin=False, anon=True)
        self._assert_status_code_is(create_response, 403)
        # Anons can't delete libs
        library = self.library_populator.new_library("AnonDeleteTestLibrary")
        create_response = self._delete("libraries/%s" % library["id"], admin=False, anon=True)
        self._assert_status_code_is(create_response, 403)
        # Anons can't update libs
        data = dict(name="ChangedName", description="ChangedDescription", synopsis='ChangedSynopsis')
        create_response = self._patch("libraries/%s" % library["id"], data=data, admin=False, anon=True)
        self._assert_status_code_is(create_response, 403)

    def test_update(self):
        library = self.library_populator.new_library("UpdateTestLibrary")
        data = dict(name='ChangedName', description='ChangedDescription', synopsis='ChangedSynopsis')
        create_response = self._patch("libraries/%s" % library["id"], data=data, admin=True)
        self._assert_status_code_is(create_response, 200)
        library = create_response.json()
        self._assert_has_keys(library, 'name', 'description', 'synopsis')
        assert library['name'] == 'ChangedName'
        assert library['description'] == 'ChangedDescription'
        assert library['synopsis'] == 'ChangedSynopsis'

    def test_create_private_library_permissions(self):
        library = self.library_populator.new_library("PermissionTestLibrary")
        library_id = library["id"]
        role_id = self.library_populator.user_private_role_id()
        self.library_populator.set_permissions(library_id, role_id)
        create_response = self._create_folder(library)
        self._assert_status_code_is(create_response, 200)

    def test_create_dataset_denied(self):
        library = self.library_populator.new_private_library("ForCreateDatasets")
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        history_id = self.dataset_populator.new_history()
        hda_id = self.dataset_populator.new_dataset(history_id, content="1 2 3")['id']
        with self._different_user():
            payload = {'from_hda_id': hda_id}
            create_response = self._post("folders/%s/contents" % folder_id, payload)
            self._assert_status_code_is(create_response, 403)

    def test_show_private_dataset_permissions(self):
        library, library_dataset = self.library_populator.new_library_dataset_in_private_library("ForCreateDatasets", wait=True)
        with self._different_user():
            response = self.library_populator.show_ldda(library["id"], library_dataset["id"])
            # TODO: this should really be 403 and a proper JSON exception.
            self._assert_status_code_is(response, 400)

    def test_create_dataset(self):
        library, library_dataset = self.library_populator.new_library_dataset_in_private_library("ForCreateDatasets", wait=True)
        self._assert_has_keys(library_dataset, "peek", "data_type")
        assert library_dataset["peek"].find("create_test") >= 0
        assert library_dataset["file_ext"] == "txt", library_dataset["file_ext"]

    def test_create_dataset_in_folder(self):
        library = self.library_populator.new_private_library("ForCreateDatasets")
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        history_id = self.dataset_populator.new_history()
        hda_id = self.dataset_populator.new_dataset(history_id, content="1 2 3")['id']
        payload = {'from_hda_id': hda_id}
        create_response = self._post("folders/%s/contents" % folder_id, payload)
        self._assert_status_code_is(create_response, 200)
        self._assert_has_keys(create_response.json(), "name", "id")

    def test_update_dataset_in_folder(self):
        library = self.library_populator.new_private_library("ForUpdateDataset")
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        history_id = self.dataset_populator.new_history()
        hda_id = self.dataset_populator.new_dataset(history_id, content="1 2 3")['id']
        payload = {'from_hda_id': hda_id, 'create_type': 'file', 'folder_id': folder_id}
        ld = self._post("libraries/%s/contents" % folder_id, payload)
        data = {'name': 'updated_name', 'file_ext': 'fastq', 'misc_info': 'updated_info', 'genome_build': 'updated_genome_build'}
        create_response = self._patch("libraries/datasets/%s" % ld.json()["id"], data=data)
        self._assert_status_code_is(create_response, 200)
        self._assert_has_keys(create_response.json(), "name", "file_ext", "misc_info", "genome_build")

    def test_invalid_update_dataset_in_folder(self):
        library = self.library_populator.new_private_library("ForInvalidUpdateDataset")
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        history_id = self.dataset_populator.new_history()
        hda_id = self.dataset_populator.new_dataset(history_id, content="1 2 3")['id']
        payload = {'from_hda_id': hda_id, 'create_type': 'file', 'folder_id': folder_id}
        ld = self._post("libraries/%s/contents" % folder_id, payload)
        data = {'file_ext': 'nonexisting_type'}
        create_response = self._patch("libraries/datasets/%s" % ld.json()["id"], data=data)
        self._assert_status_code_is(create_response, 400)
        assert 'This Galaxy does not recognize the datatype of:' in create_response.json()['err_msg']

    def test_create_datasets_in_library_from_collection(self):
        library = self.library_populator.new_private_library("ForCreateDatasetsFromCollection")
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        history_id = self.dataset_populator.new_history()
        hdca_id = self.dataset_collection_populator.create_list_in_history(history_id, contents=["xxx", "yyy"]).json()["id"]
        payload = {'from_hdca_id': hdca_id, 'create_type': 'file', 'folder_id': folder_id}
        create_response = self._post("libraries/%s/contents" % library['id'], payload)
        self._assert_status_code_is(create_response, 200)

    def test_create_datasets_in_folder_from_collection(self):
        library = self.library_populator.new_private_library("ForCreateDatasetsFromCollection")
        history_id = self.dataset_populator.new_history()
        hdca_id = self.dataset_collection_populator.create_list_in_history(history_id, contents=["xxx", "yyy"]).json()["id"]
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        payload = {'from_hdca_id': hdca_id}
        create_response = self._post("folders/%s/contents" % folder_id, payload)
        self._assert_status_code_is(create_response, 200)
        assert len(create_response.json()) == 2
        # Also test that anything different from a flat dataset collection list
        # is refused
        hdca_pair_id = self.dataset_collection_populator.create_list_of_pairs_in_history(history_id).json()['id']
        payload = {'from_hdca_id': hdca_pair_id}
        create_response = self._post("folders/%s/contents" % folder_id, payload)
        self._assert_status_code_is(create_response, 501)
        assert create_response.json()['err_msg'] == 'Cannot add nested collections to library. Please flatten your collection first.'

    def _create_folder(self, library):
        create_data = dict(
            folder_id=library["root_folder_id"],
            create_type="folder",
            name="New Folder",
        )
        return self._post("libraries/%s/contents" % library["id"], data=create_data)
class LibrariesApiTestCase(api.ApiTestCase, TestsDatasets):
    def setUp(self):
        super(LibrariesApiTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_collection_populator = DatasetCollectionPopulator(
            self.galaxy_interactor)
        self.library_populator = LibraryPopulator(self.galaxy_interactor)

    def test_create(self):
        data = dict(name="CreateTestLibrary")
        create_response = self._post("libraries", data=data, admin=True)
        self._assert_status_code_is(create_response, 200)
        library = create_response.json()
        self._assert_has_keys(library, "name")
        assert library["name"] == "CreateTestLibrary"

    def test_delete(self):
        library = self.library_populator.new_library("DeleteTestLibrary")
        create_response = self._delete("libraries/%s" % library["id"],
                                       admin=True)
        self._assert_status_code_is(create_response, 200)
        library = create_response.json()
        self._assert_has_keys(library, "deleted")
        assert library["deleted"] is True
        # Test undeleting
        data = dict(undelete=True)
        create_response = self._delete("libraries/%s" % library["id"],
                                       data=data,
                                       admin=True)
        library = create_response.json()
        self._assert_status_code_is(create_response, 200)
        assert library["deleted"] is False

    def test_nonadmin(self):
        # Anons can't create libs
        data = dict(name="CreateTestLibrary")
        create_response = self._post("libraries",
                                     data=data,
                                     admin=False,
                                     anon=True)
        self._assert_status_code_is(create_response, 403)
        # Anons can't delete libs
        library = self.library_populator.new_library("AnonDeleteTestLibrary")
        create_response = self._delete("libraries/%s" % library["id"],
                                       admin=False,
                                       anon=True)
        self._assert_status_code_is(create_response, 403)
        # Anons can't update libs
        data = dict(name="ChangedName",
                    description="ChangedDescription",
                    synopsis='ChangedSynopsis')
        create_response = self._patch("libraries/%s" % library["id"],
                                      data=data,
                                      admin=False,
                                      anon=True)
        self._assert_status_code_is(create_response, 403)

    def test_update(self):
        library = self.library_populator.new_library("UpdateTestLibrary")
        data = dict(name='ChangedName',
                    description='ChangedDescription',
                    synopsis='ChangedSynopsis')
        create_response = self._patch("libraries/%s" % library["id"],
                                      data=data,
                                      admin=True)
        self._assert_status_code_is(create_response, 200)
        library = create_response.json()
        self._assert_has_keys(library, 'name', 'description', 'synopsis')
        assert library['name'] == 'ChangedName'
        assert library['description'] == 'ChangedDescription'
        assert library['synopsis'] == 'ChangedSynopsis'

    def test_create_private_library_permissions(self):
        library = self.library_populator.new_library("PermissionTestLibrary")
        library_id = library["id"]
        role_id = self.library_populator.user_private_role_id()
        self.library_populator.set_permissions(library_id, role_id)
        create_response = self._create_folder(library)
        self._assert_status_code_is(create_response, 200)

    def test_create_dataset_denied(self):
        library = self.library_populator.new_private_library(
            "ForCreateDatasets")
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        history_id = self.dataset_populator.new_history()
        hda_id = self.dataset_populator.new_dataset(history_id,
                                                    content="1 2 3")['id']
        with self._different_user():
            payload = {'from_hda_id': hda_id}
            create_response = self._post("folders/%s/contents" % folder_id,
                                         payload)
            self._assert_status_code_is(create_response, 403)

    def test_show_private_dataset_permissions(self):
        library, library_dataset = self.library_populator.new_library_dataset_in_private_library(
            "ForCreateDatasets", wait=True)
        with self._different_user():
            response = self.library_populator.show_ldda(
                library["id"], library_dataset["id"])
            # TODO: this should really be 403 and a proper JSON exception.
            self._assert_status_code_is(response, 400)

    def test_create_dataset(self):
        library, library_dataset = self.library_populator.new_library_dataset_in_private_library(
            "ForCreateDatasets", wait=True)
        self._assert_has_keys(library_dataset, "peek", "data_type")
        assert library_dataset["peek"].find("create_test") >= 0
        assert library_dataset["file_ext"] == "txt", library_dataset[
            "file_ext"]

    def test_fetch_upload_to_folder(self):
        history_id, library, destination = self._setup_fetch_to_folder(
            "flat_zip")
        items = [{"src": "files", "dbkey": "hg19", "info": "my cool bed"}]
        targets = [{"destination": destination, "items": items}]
        payload = {
            "history_id": history_id,  # TODO: Shouldn't be needed :(
            "targets": json.dumps(targets),
            "__files": {
                "files_0|file_data":
                open(self.test_data_resolver.get_filename("4.bed"))
            },
        }
        self.dataset_populator.fetch(payload)
        dataset = self.library_populator.get_library_contents_with_path(
            library["id"], "/4.bed")
        assert dataset["file_size"] == 61, dataset
        assert dataset["genome_build"] == "hg19", dataset
        assert dataset["misc_info"] == "my cool bed", dataset
        assert dataset["file_ext"] == "bed", dataset

    def test_fetch_zip_to_folder(self):
        history_id, library, destination = self._setup_fetch_to_folder(
            "flat_zip")
        bed_test_data_path = self.test_data_resolver.get_filename("4.bed.zip")
        targets = [{
            "destination": destination,
            "items_from": "archive",
            "src": "files",
        }]
        payload = {
            "history_id": history_id,  # TODO: Shouldn't be needed :(
            "targets": json.dumps(targets),
            "__files": {
                "files_0|file_data": open(bed_test_data_path, 'rb')
            }
        }
        self.dataset_populator.fetch(payload)
        dataset = self.library_populator.get_library_contents_with_path(
            library["id"], "/4.bed")
        assert dataset["file_size"] == 61, dataset

    def test_fetch_single_url_to_folder(self):
        history_id, library, destination = self._setup_fetch_to_folder(
            "single_url")
        items = [{
            "src":
            "url",
            "url":
            "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/4.bed"
        }]
        targets = [{"destination": destination, "items": items}]
        payload = {
            "history_id": history_id,  # TODO: Shouldn't be needed :(
            "targets": json.dumps(targets),
        }
        self.dataset_populator.fetch(payload)
        dataset = self.library_populator.get_library_contents_with_path(
            library["id"], "/4.bed")
        assert dataset["file_size"] == 61, dataset

    def test_fetch_url_archive_to_folder(self):
        history_id, library, destination = self._setup_fetch_to_folder(
            "single_url")
        targets = [{
            "destination":
            destination,
            "items_from":
            "archive",
            "src":
            "url",
            "url":
            "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/4.bed.zip",
        }]
        payload = {
            "history_id": history_id,  # TODO: Shouldn't be needed :(
            "targets": json.dumps(targets),
        }
        self.dataset_populator.fetch(payload)
        dataset = self.library_populator.get_library_contents_with_path(
            library["id"], "/4.bed")
        assert dataset["file_size"] == 61, dataset

    @unittest.skip  # reference URLs changed, checksums now invalid.
    def test_fetch_bagit_archive_to_folder(self):
        history_id, library, destination = self._setup_fetch_to_folder(
            "bagit_archive")
        example_bag_path = self.test_data_resolver.get_filename(
            "example-bag.zip")
        targets = [{
            "destination": destination,
            "items_from": "bagit_archive",
            "src": "files",
        }]
        payload = {
            "history_id": history_id,  # TODO: Shouldn't be needed :(
            "targets": json.dumps(targets),
            "__files": {
                "files_0|file_data": open(example_bag_path)
            },
        }
        self.dataset_populator.fetch(payload)
        dataset = self.library_populator.get_library_contents_with_path(
            library["id"], "/README.txt")
        assert dataset["file_size"] == 66, dataset

        dataset = self.library_populator.get_library_contents_with_path(
            library["id"], "/bdbag-profile.json")
        assert dataset["file_size"] == 723, dataset

    def _setup_fetch_to_folder(self, test_name):
        return self.library_populator.setup_fetch_to_folder(test_name)

    def test_create_dataset_in_folder(self):
        library = self.library_populator.new_private_library(
            "ForCreateDatasets")
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        history_id = self.dataset_populator.new_history()
        hda_id = self.dataset_populator.new_dataset(history_id,
                                                    content="1 2 3")['id']
        payload = {'from_hda_id': hda_id}
        create_response = self._post("folders/%s/contents" % folder_id,
                                     payload)
        self._assert_status_code_is(create_response, 200)
        self._assert_has_keys(create_response.json(), "name", "id")

    def test_update_dataset_in_folder(self):
        library = self.library_populator.new_private_library(
            "ForUpdateDataset")
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        history_id = self.dataset_populator.new_history()
        hda_id = self.dataset_populator.new_dataset(history_id,
                                                    content="1 2 3")['id']
        payload = {
            'from_hda_id': hda_id,
            'create_type': 'file',
            'folder_id': folder_id
        }
        ld = self._post("libraries/%s/contents" % folder_id, payload)
        data = {
            'name': 'updated_name',
            'file_ext': 'fastq',
            'misc_info': 'updated_info',
            'genome_build': 'updated_genome_build'
        }
        create_response = self._patch("libraries/datasets/%s" %
                                      ld.json()["id"],
                                      data=data)
        self._assert_status_code_is(create_response, 200)
        self._assert_has_keys(create_response.json(), "name", "file_ext",
                              "misc_info", "genome_build")

    def test_invalid_update_dataset_in_folder(self):
        library = self.library_populator.new_private_library(
            "ForInvalidUpdateDataset")
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        history_id = self.dataset_populator.new_history()
        hda_id = self.dataset_populator.new_dataset(history_id,
                                                    content="1 2 3")['id']
        payload = {
            'from_hda_id': hda_id,
            'create_type': 'file',
            'folder_id': folder_id
        }
        ld = self._post("libraries/%s/contents" % folder_id, payload)
        data = {'file_ext': 'nonexisting_type'}
        create_response = self._patch("libraries/datasets/%s" %
                                      ld.json()["id"],
                                      data=data)
        self._assert_status_code_is(create_response, 400)
        assert 'This Galaxy does not recognize the datatype of:' in create_response.json(
        )['err_msg']

    def test_create_datasets_in_library_from_collection(self):
        library = self.library_populator.new_private_library(
            "ForCreateDatasetsFromCollection")
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        history_id = self.dataset_populator.new_history()
        hdca_id = self.dataset_collection_populator.create_list_in_history(
            history_id, contents=["xxx", "yyy"],
            direct_upload=True).json()["outputs"][0]["id"]
        payload = {
            'from_hdca_id': hdca_id,
            'create_type': 'file',
            'folder_id': folder_id
        }
        create_response = self._post("libraries/%s/contents" % library['id'],
                                     payload)
        self._assert_status_code_is(create_response, 200)

    def test_create_datasets_in_folder_from_collection(self):
        library = self.library_populator.new_private_library(
            "ForCreateDatasetsFromCollection")
        history_id = self.dataset_populator.new_history()
        hdca_id = self.dataset_collection_populator.create_list_in_history(
            history_id, contents=["xxx", "yyy"],
            direct_upload=True).json()["outputs"][0]["id"]
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        payload = {'from_hdca_id': hdca_id}
        create_response = self._post("folders/%s/contents" % folder_id,
                                     payload)
        self._assert_status_code_is(create_response, 200)
        assert len(create_response.json()) == 2
        # Also test that anything different from a flat dataset collection list
        # is refused
        hdca_pair_id = self.dataset_collection_populator.create_list_of_pairs_in_history(
            history_id).json()["outputs"][0]['id']
        payload = {'from_hdca_id': hdca_pair_id}
        create_response = self._post("folders/%s/contents" % folder_id,
                                     payload)
        self._assert_status_code_is(create_response, 501)
        assert create_response.json(
        )['err_msg'] == 'Cannot add nested collections to library. Please flatten your collection first.'

    def _create_folder(self, library):
        create_data = dict(
            folder_id=library["root_folder_id"],
            create_type="folder",
            name="New Folder",
        )
        return self._post("libraries/%s/contents" % library["id"],
                          data=create_data)
Beispiel #22
0
class DatasetsApiTestCase(api.ApiTestCase):

    def setUp(self):
        super(DatasetsApiTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_collection_populator = DatasetCollectionPopulator(self.galaxy_interactor)
        self.history_id = self.dataset_populator.new_history()

    def test_index(self):
        index_response = self._get("datasets")
        self._assert_status_code_is(index_response, 200)

    def test_search_datasets(self):
        hda_id = self.dataset_populator.new_dataset(self.history_id)['id']
        payload = {'limit': 1, 'offset': 0}
        index_response = self._get("datasets", payload).json()
        assert len(index_response) == 1
        assert index_response[0]['id'] == hda_id
        hdca_id = self.dataset_collection_populator.create_list_in_history(self.history_id,
                                                                           contents=["1\n2\n3"]).json()['id']
        payload = {'limit': 3, 'offset': 0}
        index_response = self._get("datasets", payload).json()
        assert len(index_response) == 3
        assert index_response[0]['id'] == hdca_id
        assert index_response[0]['history_content_type'] == 'dataset_collection'
        assert index_response[2]['id'] == hda_id
        assert index_response[2]['history_content_type'] == 'dataset'
        payload = {'limit': 2, 'offset': 0, 'q': ['history_content_type'], 'qv': ['dataset']}
        index_response = self._get("datasets", payload).json()
        assert index_response[1]['id'] == hda_id

    def test_search_by_tag(self):
        hda_id = self.dataset_populator.new_dataset(self.history_id)['id']
        update_payload = {
            'tags': ['cool:new_tag', 'cool:another_tag'],
        }
        updated_hda = self._put(
            "histories/{history_id}/contents/{hda_id}".format(history_id=self.history_id, hda_id=hda_id),
            update_payload).json()
        assert 'cool:new_tag' in updated_hda['tags']
        assert 'cool:another_tag' in updated_hda['tags']
        payload = {'limit': 10, 'offset': 0, 'q': ['history_content_type', 'tag'], 'qv': ['dataset', 'cool:new_tag']}
        index_response = self._get("datasets", payload).json()
        assert len(index_response) == 1
        payload = {'limit': 10, 'offset': 0, 'q': ['history_content_type', 'tag-contains'],
                   'qv': ['dataset', 'new_tag']}
        index_response = self._get("datasets", payload).json()
        assert len(index_response) == 1
        payload = {'limit': 10, 'offset': 0, 'q': ['history_content_type', 'tag-contains'], 'qv': ['dataset', 'notag']}
        index_response = self._get("datasets", payload).json()
        assert len(index_response) == 0

    def test_search_by_tool_id(self):
        self.dataset_populator.new_dataset(self.history_id)
        payload = {'limit': 1, 'offset': 0, 'q': ['history_content_type', 'tool_id'], 'qv': ['dataset', 'upload1']}
        assert len(self._get("datasets", payload).json()) == 1
        payload = {'limit': 1, 'offset': 0, 'q': ['history_content_type', 'tool_id'], 'qv': ['dataset', 'uploadX']}
        assert len(self._get("datasets", payload).json()) == 0
        payload = {'limit': 1, 'offset': 0, 'q': ['history_content_type', 'tool_id-contains'], 'qv': ['dataset', 'pload1']}
        assert len(self._get("datasets", payload).json()) == 1
        self.dataset_collection_populator.create_list_in_history(self.history_id,
                                                                 name="search by tool id",
                                                                 contents=["1\n2\n3"]).json()
        self.dataset_populator.wait_for_history(self.history_id)
        payload = {'limit': 10, 'offset': 0, 'history_id': self.history_id, 'q': ['name', 'tool_id'],
                   'qv': ['search by tool id', 'upload1']}
        result = self._get("datasets", payload).json()
        assert result[0]['name'] == 'search by tool id', result
        payload = {'limit': 1, 'offset': 0, 'q': ['history_content_type', 'tool_id'],
                   'qv': ['dataset_collection', 'uploadX']}
        result = self._get("datasets", payload).json()
        assert len(result) == 0

    def test_invalid_search(self):
        payload = {'limit': 10, 'offset': 0, 'q': ['history_content_type', 'tag-invalid_op'], 'qv': ['dataset', 'notag']}
        index_response = self._get("datasets", payload)
        self._assert_status_code_is(index_response, 400)
        assert index_response.json()['err_msg'] == 'bad op in filter'

    def test_search_returns_only_accessible(self):
        hda_id = self.dataset_populator.new_dataset(self.history_id)['id']
        with self._different_user():
            payload = {'limit': 10, 'offset': 0, 'q': ['history_content_type'], 'qv': ['dataset']}
            index_response = self._get("datasets", payload).json()
            for item in index_response:
                assert hda_id != item['id']

    def test_show(self):
        hda1 = self.dataset_populator.new_dataset(self.history_id)
        show_response = self._get("datasets/%s" % (hda1["id"]))
        self._assert_status_code_is(show_response, 200)
        self.__assert_matches_hda(hda1, show_response.json())

    def __assert_matches_hda(self, input_hda, query_hda):
        self._assert_has_keys(query_hda, "id", "name")
        assert input_hda["name"] == query_hda["name"]
        assert input_hda["id"] == query_hda["id"]

    def test_display(self):
        contents = textwrap.dedent("""\
        1   2   3   4
        A   B   C   D
        10  20  30  40
        """)
        hda1 = self.dataset_populator.new_dataset(self.history_id, content=contents)
        self.dataset_populator.wait_for_history(self.history_id)
        display_response = self._get("histories/%s/contents/%s/display" % (self.history_id, hda1["id"]), {
            'raw': 'True'
        })
        self._assert_status_code_is(display_response, 200)
        assert display_response.text == contents
Beispiel #23
0
class DatasetCollectionApiTestCase(api.ApiTestCase):

    def setUp(self):
        super(DatasetCollectionApiTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_collection_populator = DatasetCollectionPopulator(self.galaxy_interactor)
        self.history_id = self.dataset_populator.new_history()

    def test_create_pair_from_history(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id,
            instance_type="history",
        )
        create_response = self._post("dataset_collections", payload)
        dataset_collection = self._check_create_response(create_response)
        returned_datasets = dataset_collection["elements"]
        assert len(returned_datasets) == 2, dataset_collection

    def test_create_list_from_history(self):
        element_identifiers = self.dataset_collection_populator.list_identifiers(self.history_id)

        payload = dict(
            instance_type="history",
            history_id=self.history_id,
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )

        create_response = self._post("dataset_collections", payload)
        dataset_collection = self._check_create_response(create_response)
        returned_datasets = dataset_collection["elements"]
        assert len(returned_datasets) == 3, dataset_collection

    def test_create_list_of_existing_pairs(self):
        pair_payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id,
            instance_type="history",
        )
        pair_create_response = self._post("dataset_collections", pair_payload)
        dataset_collection = self._check_create_response(pair_create_response)
        hdca_id = dataset_collection["id"]

        element_identifiers = [
            dict(name="test1", src="hdca", id=hdca_id)
        ]

        payload = dict(
            instance_type="history",
            history_id=self.history_id,
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )
        create_response = self._post("dataset_collections", payload)
        dataset_collection = self._check_create_response(create_response)
        returned_collections = dataset_collection["elements"]
        assert len(returned_collections) == 1, dataset_collection

    def test_create_list_of_new_pairs(self):
        identifiers = self.dataset_collection_populator.nested_collection_identifiers(self.history_id, "list:paired")
        payload = dict(
            collection_type="list:paired",
            instance_type="history",
            history_id=self.history_id,
            name="a nested collection",
            element_identifiers=json.dumps(identifiers),
        )
        create_response = self._post("dataset_collections", payload)
        dataset_collection = self._check_create_response(create_response)
        assert dataset_collection["collection_type"] == "list:paired"
        assert dataset_collection["name"] == "a nested collection"
        returned_collections = dataset_collection["elements"]
        assert len(returned_collections) == 1, dataset_collection
        pair_1_element = returned_collections[0]
        self._assert_has_keys(pair_1_element, "element_identifier", "element_index", "object")
        assert pair_1_element["element_identifier"] == "test_level_1", pair_1_element
        assert pair_1_element["element_index"] == 0, pair_1_element
        pair_1_object = pair_1_element["object"]
        self._assert_has_keys(pair_1_object, "collection_type", "elements", "element_count")
        self.assertEquals(pair_1_object["collection_type"], "paired")
        self.assertEquals(pair_1_object["populated"], True)
        pair_elements = pair_1_object["elements"]
        assert len(pair_elements) == 2
        pair_1_element_1 = pair_elements[0]
        assert pair_1_element_1["element_index"] == 0

    def test_list_download(self):
        dataset_collection = self.dataset_collection_populator.create_list_in_history(self.history_id).json()
        self.dataset_collection_populator.wait_for_dataset_collection(dataset_collection, assert_ok=True)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 3, dataset_collection
        create_response = self._download_dataset_collection(history_id=self.history_id, hdca_id=dataset_collection['id'])
        self._assert_status_code_is(create_response, 200)
        tar_contents = tarfile.open(fileobj=StringIO(create_response.content))
        namelist = tar_contents.getnames()
        assert len(namelist) == 3, "Expected 3 elements in [%s]" % namelist
        collection_name = dataset_collection['name']
        for element, zip_path in zip(returned_dce, namelist):
            assert "%s/%s.%s" % (collection_name, element['element_identifier'], element['object']['file_ext']) == zip_path

    def test_pair_download(self):
        dataset_collection = self.dataset_collection_populator.create_pair_in_history(self.history_id).json()
        self.dataset_collection_populator.wait_for_dataset_collection(dataset_collection, assert_ok=True)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 2, dataset_collection
        hdca_id = dataset_collection['id']
        create_response = self._download_dataset_collection(history_id=self.history_id, hdca_id=hdca_id)
        self._assert_status_code_is(create_response, 200)
        tar_contents = tarfile.open(fileobj=StringIO(create_response.content))
        namelist = tar_contents.getnames()
        assert len(namelist) == 2, "Expected 2 elements in [%s]" % namelist
        collection_name = dataset_collection['name']
        for element, zip_path in zip(returned_dce, namelist):
            assert "%s/%s.%s" % (collection_name, element['element_identifier'], element['object']['file_ext']) == zip_path

    def test_list_pair_download(self):
        dataset_collection = self.dataset_collection_populator.create_list_of_pairs_in_history(self.history_id).json()
        self.dataset_collection_populator.wait_for_dataset_collection(dataset_collection, assert_ok=True)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 1, dataset_collection
        list_collection_name = dataset_collection['name']
        pair = returned_dce[0]
        create_response = self._download_dataset_collection(history_id=self.history_id, hdca_id=dataset_collection['id'])
        self._assert_status_code_is(create_response, 200)
        tar_contents = tarfile.open(fileobj=StringIO(create_response.content))
        namelist = tar_contents.getnames()
        assert len(namelist) == 2, "Expected 2 elements in [%s]" % namelist
        pair_collection_name = pair['element_identifier']
        for element, zip_path in zip(pair['object']['elements'], namelist):
            assert "%s/%s/%s.%s" % (list_collection_name, pair_collection_name, element['element_identifier'], element['object']['file_ext']) == zip_path

    def test_list_list_download(self):
        dataset_collection = self.dataset_collection_populator.create_list_of_list_in_history(self.history_id).json()
        self.dataset_collection_populator.wait_for_dataset_collection(dataset_collection, assert_ok=True)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 1, dataset_collection
        create_response = self._download_dataset_collection(history_id=self.history_id, hdca_id=dataset_collection['id'])
        self._assert_status_code_is(create_response, 200)
        tar_contents = tarfile.open(fileobj=StringIO(create_response.content))
        namelist = tar_contents.getnames()
        assert len(namelist) == 3, "Expected 3 elements in [%s]" % namelist

    def test_list_list_list_download(self):
        dataset_collection = self.dataset_collection_populator.create_list_of_list_in_history(self.history_id, collection_type='list:list:list').json()
        self.dataset_collection_populator.wait_for_dataset_collection(dataset_collection, assert_ok=True)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 1, dataset_collection
        create_response = self._download_dataset_collection(history_id=self.history_id, hdca_id=dataset_collection['id'])
        self._assert_status_code_is(create_response, 200)
        tar_contents = tarfile.open(fileobj=StringIO(create_response.content))
        namelist = tar_contents.getnames()
        assert len(namelist) == 3, "Expected 3 elements in [%s]" % namelist

    def test_hda_security(self):
        element_identifiers = self.dataset_collection_populator.pair_identifiers(self.history_id)

        with self._different_user():
            history_id = self.dataset_populator.new_history()
            payload = dict(
                instance_type="history",
                history_id=history_id,
                element_identifiers=json.dumps(element_identifiers),
                collection_type="paired",
            )

            self._post("dataset_collections", payload)
            # TODO: re-enable once there is a way to restrict access
            # to this dataset via the API.
            # self._assert_status_code_is( create_response, 403 )

    def test_enforces_unique_names(self):
        element_identifiers = self.dataset_collection_populator.list_identifiers(self.history_id)
        element_identifiers[2]["name"] = element_identifiers[0]["name"]
        payload = dict(
            instance_type="history",
            history_id=self.history_id,
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )

        create_response = self._post("dataset_collections", payload)
        self._assert_status_code_is(create_response, 400)

    def test_upload_collection(self):
        elements = [{"src": "files", "dbkey": "hg19", "info": "my cool bed"}]
        targets = [{
            "destination": {"type": "hdca"},
            "elements": elements,
            "collection_type": "list",
            "name": "Test upload",
        }]
        payload = {
            "history_id": self.history_id,
            "targets": json.dumps(targets),
            "__files": {"files_0|file_data": open(self.test_data_resolver.get_filename("4.bed"))},
        }
        self.dataset_populator.fetch(payload)
        hdca = self._assert_one_collection_created_in_history()
        self.assertEquals(hdca["name"], "Test upload")
        assert len(hdca["elements"]) == 1, hdca
        element0 = hdca["elements"][0]
        assert element0["element_identifier"] == "4.bed"
        assert element0["object"]["file_size"] == 61

    def test_upload_nested(self):
        elements = [{"name": "samp1", "elements": [{"src": "files", "dbkey": "hg19", "info": "my cool bed"}]}]
        targets = [{
            "destination": {"type": "hdca"},
            "elements": elements,
            "collection_type": "list:list",
            "name": "Test upload",
        }]
        payload = {
            "history_id": self.history_id,
            "targets": json.dumps(targets),
            "__files": {"files_0|file_data": open(self.test_data_resolver.get_filename("4.bed"))},
        }
        self.dataset_populator.fetch(payload)
        hdca = self._assert_one_collection_created_in_history()
        self.assertEquals(hdca["name"], "Test upload")
        assert len(hdca["elements"]) == 1, hdca
        element0 = hdca["elements"][0]
        assert element0["element_identifier"] == "samp1"

    def test_upload_collection_from_url(self):
        elements = [{"src": "url", "url": "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/4.bed", "info": "my cool bed"}]
        targets = [{
            "destination": {"type": "hdca"},
            "elements": elements,
            "collection_type": "list",
        }]
        payload = {
            "history_id": self.history_id,
            "targets": json.dumps(targets),
            "__files": {"files_0|file_data": open(self.test_data_resolver.get_filename("4.bed"))},
        }
        self.dataset_populator.fetch(payload)
        hdca = self._assert_one_collection_created_in_history()
        assert len(hdca["elements"]) == 1, hdca
        element0 = hdca["elements"][0]
        assert element0["element_identifier"] == "4.bed"
        assert element0["object"]["file_size"] == 61

    def _assert_one_collection_created_in_history(self):
        contents_response = self._get("histories/%s/contents/dataset_collections" % self.history_id)
        self._assert_status_code_is(contents_response, 200)
        contents = contents_response.json()
        assert len(contents) == 1
        hdca = contents[0]
        assert hdca["history_content_type"] == "dataset_collection"
        hdca_id = hdca["id"]
        collection_response = self._get("histories/%s/contents/dataset_collections/%s" % (self.history_id, hdca_id))
        self._assert_status_code_is(collection_response, 200)
        return collection_response.json()

    def _check_create_response(self, create_response):
        self._assert_status_code_is(create_response, 200)
        dataset_collection = create_response.json()
        self._assert_has_keys(dataset_collection, "elements", "url", "name", "collection_type", "element_count")
        return dataset_collection

    def _download_dataset_collection(self, history_id, hdca_id):
        return self._get("histories/%s/contents/dataset_collections/%s/download" % (history_id, hdca_id))
class ToolsUploadTestCase(api.ApiTestCase):
    def setUp(self):
        super(ToolsUploadTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)

    def test_upload1_paste(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(
                history_id, 'Hello World')
            create_response = self._post("tools", data=payload)
            self._assert_has_keys(create_response.json(), 'outputs')

    def test_upload1_paste_bad_datatype(self):
        # Check that you get a nice message if you upload an incorrect datatype
        with self.dataset_populator.test_history() as history_id:
            file_type = "johnsawesomebutfakedatatype"
            payload = self.dataset_populator.upload_payload(
                history_id, 'Hello World', file_type=file_type)
            create = self._post("tools", data=payload).json()
            self._assert_has_keys(create, 'err_msg')
            assert file_type in create['err_msg']

    def test_upload_posix_newline_fixes(self):
        windows_content = ONE_TO_SIX_ON_WINDOWS
        result_content = self._upload_and_get_content(windows_content)
        self.assertEquals(result_content, ONE_TO_SIX_WITH_TABS)

    def test_upload_disable_posix_fix(self):
        windows_content = ONE_TO_SIX_ON_WINDOWS
        result_content = self._upload_and_get_content(windows_content,
                                                      to_posix_lines=None)
        self.assertEquals(result_content, windows_content)

    def test_upload_tab_to_space(self):
        table = ONE_TO_SIX_WITH_SPACES
        result_content = self._upload_and_get_content(table,
                                                      space_to_tab="Yes")
        self.assertEquals(result_content, ONE_TO_SIX_WITH_TABS)

    def test_upload_tab_to_space_off_by_default(self):
        table = ONE_TO_SIX_WITH_SPACES
        result_content = self._upload_and_get_content(table)
        self.assertEquals(result_content, table)

    @skip_without_datatype("rdata")
    def test_rdata_not_decompressed(self):
        # Prevent regression of https://github.com/galaxyproject/galaxy/issues/753
        rdata_path = TestDataResolver().get_filename("1.RData")
        rdata_metadata = self._upload_and_get_details(open(rdata_path, "rb"),
                                                      file_type="auto")
        self.assertEquals(rdata_metadata["file_ext"], "rdata")

    @skip_without_datatype("velvet")
    def test_composite_datatype(self):
        with self.dataset_populator.test_history() as history_id:
            dataset = self._velvet_upload(history_id,
                                          extra_inputs={
                                              "files_1|url_paste":
                                              "roadmaps content",
                                              "files_1|type": "upload_dataset",
                                              "files_2|url_paste":
                                              "log content",
                                              "files_2|type": "upload_dataset",
                                          })

            roadmaps_content = self._get_roadmaps_content(history_id, dataset)
            assert roadmaps_content.strip(
            ) == "roadmaps content", roadmaps_content

    @skip_without_datatype("velvet")
    def test_composite_datatype_space_to_tab(self):
        # Like previous test but set one upload with space_to_tab to True to
        # verify that works.
        with self.dataset_populator.test_history() as history_id:
            dataset = self._velvet_upload(history_id,
                                          extra_inputs={
                                              "files_1|url_paste":
                                              "roadmaps content",
                                              "files_1|type": "upload_dataset",
                                              "files_1|space_to_tab": "Yes",
                                              "files_2|url_paste":
                                              "log content",
                                              "files_2|type": "upload_dataset",
                                          })

            roadmaps_content = self._get_roadmaps_content(history_id, dataset)
            assert roadmaps_content.strip(
            ) == "roadmaps\tcontent", roadmaps_content

    @skip_without_datatype("velvet")
    def test_composite_datatype_posix_lines(self):
        # Like previous test but set one upload with space_to_tab to True to
        # verify that works.
        with self.dataset_populator.test_history() as history_id:
            dataset = self._velvet_upload(history_id,
                                          extra_inputs={
                                              "files_1|url_paste":
                                              "roadmaps\rcontent",
                                              "files_1|type": "upload_dataset",
                                              "files_1|space_to_tab": "Yes",
                                              "files_2|url_paste":
                                              "log\rcontent",
                                              "files_2|type": "upload_dataset",
                                          })

            roadmaps_content = self._get_roadmaps_content(history_id, dataset)
            assert roadmaps_content.strip(
            ) == "roadmaps\ncontent", roadmaps_content

    def test_upload_dbkey(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(history_id,
                                                            "Test123",
                                                            dbkey="hg19")
            run_response = self.dataset_populator.tools_post(payload)
            self.dataset_populator.wait_for_tool_run(history_id, run_response)
            datasets = run_response.json()["outputs"]
            assert datasets[0].get("genome_build") == "hg19", datasets[0]

    def test_upload_multiple_files_1(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(
                history_id,
                "Test123",
                dbkey="hg19",
                extra_inputs={
                    "files_1|url_paste": "SecondOutputContent",
                    "files_1|NAME": "SecondOutputName",
                    "files_1|file_type": "tabular",
                    "files_1|dbkey": "hg18",
                    "file_count": "2",
                })
            run_response = self.dataset_populator.tools_post(payload)
            self.dataset_populator.wait_for_tool_run(history_id, run_response)
            datasets = run_response.json()["outputs"]

            assert len(datasets) == 2, datasets
            content = self.dataset_populator.get_history_dataset_content(
                history_id, dataset=datasets[0])
            assert content.strip() == "Test123"
            assert datasets[0]["file_ext"] == "txt"
            assert datasets[0]["genome_build"] == "hg19", datasets

            content = self.dataset_populator.get_history_dataset_content(
                history_id, dataset=datasets[1])
            assert content.strip() == "SecondOutputContent"
            assert datasets[1]["file_ext"] == "tabular"
            assert datasets[1]["genome_build"] == "hg18", datasets

    def test_upload_multiple_files_2(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(
                history_id,
                "Test123",
                file_type="tabular",
                dbkey="hg19",
                extra_inputs={
                    "files_1|url_paste": "SecondOutputContent",
                    "files_1|NAME": "SecondOutputName",
                    "files_1|file_type": "txt",
                    "files_1|dbkey": "hg18",
                    "file_count": "2",
                })
            run_response = self.dataset_populator.tools_post(payload)
            self.dataset_populator.wait_for_tool_run(history_id, run_response)
            datasets = run_response.json()["outputs"]

            assert len(datasets) == 2, datasets
            content = self.dataset_populator.get_history_dataset_content(
                history_id, dataset=datasets[0])
            assert content.strip() == "Test123"
            assert datasets[0]["file_ext"] == "tabular", datasets
            assert datasets[0]["genome_build"] == "hg19", datasets

            content = self.dataset_populator.get_history_dataset_content(
                history_id, dataset=datasets[1])
            assert content.strip() == "SecondOutputContent"
            assert datasets[1]["file_ext"] == "txt"
            assert datasets[1]["genome_build"] == "hg18", datasets

    def test_upload_multiple_files_3(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(
                history_id,
                "Test123",
                file_type="tabular",
                dbkey="hg19",
                extra_inputs={
                    "files_0|file_type": "txt",
                    "files_0|dbkey": "hg18",
                    "files_1|url_paste": "SecondOutputContent",
                    "files_1|NAME": "SecondOutputName",
                    "files_1|file_type": "txt",
                    "files_1|dbkey": "hg18",
                    "file_count": "2",
                })
            run_response = self.dataset_populator.tools_post(payload)
            self.dataset_populator.wait_for_tool_run(history_id, run_response)
            datasets = run_response.json()["outputs"]

            assert len(datasets) == 2, datasets
            content = self.dataset_populator.get_history_dataset_content(
                history_id, dataset=datasets[0])
            assert content.strip() == "Test123"
            assert datasets[0]["file_ext"] == "txt", datasets
            assert datasets[0]["genome_build"] == "hg18", datasets

            content = self.dataset_populator.get_history_dataset_content(
                history_id, dataset=datasets[1])
            assert content.strip() == "SecondOutputContent"
            assert datasets[1]["file_ext"] == "txt"
            assert datasets[1]["genome_build"] == "hg18", datasets

    def test_upload_multiple_files_no_dbkey(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(
                history_id,
                "Test123",
                file_type="tabular",
                dbkey=None,
                extra_inputs={
                    "files_0|file_type": "txt",
                    "files_1|url_paste": "SecondOutputContent",
                    "files_1|NAME": "SecondOutputName",
                    "files_1|file_type": "txt",
                    "file_count": "2",
                })
            run_response = self.dataset_populator.tools_post(payload)
            self.dataset_populator.wait_for_tool_run(history_id, run_response)
            datasets = run_response.json()["outputs"]

            assert len(datasets) == 2, datasets
            content = self.dataset_populator.get_history_dataset_content(
                history_id, dataset=datasets[0])
            assert content.strip() == "Test123"
            assert datasets[0]["file_ext"] == "txt", datasets
            assert datasets[0]["genome_build"] == "?", datasets

            content = self.dataset_populator.get_history_dataset_content(
                history_id, dataset=datasets[1])
            assert content.strip() == "SecondOutputContent"
            assert datasets[1]["file_ext"] == "txt"
            assert datasets[1]["genome_build"] == "?", datasets

    def test_upload_multiple_files_space_to_tab(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(
                history_id,
                content=ONE_TO_SIX_WITH_SPACES,
                file_type="tabular",
                dbkey="hg19",
                extra_inputs={
                    "files_0|file_type": "txt",
                    "files_0|space_to_tab": "Yes",
                    "files_1|url_paste": ONE_TO_SIX_WITH_SPACES,
                    "files_1|NAME": "SecondOutputName",
                    "files_1|file_type": "txt",
                    "files_2|url_paste": ONE_TO_SIX_WITH_SPACES,
                    "files_2|NAME": "ThirdOutputName",
                    "files_2|file_type": "txt",
                    "files_2|space_to_tab": "Yes",
                    "file_count": "3",
                })
            run_response = self.dataset_populator.tools_post(payload)
            self.dataset_populator.wait_for_tool_run(history_id, run_response)
            datasets = run_response.json()["outputs"]

            assert len(datasets) == 3, datasets
            content = self.dataset_populator.get_history_dataset_content(
                history_id, dataset=datasets[0])
            assert content == ONE_TO_SIX_WITH_TABS

            content = self.dataset_populator.get_history_dataset_content(
                history_id, dataset=datasets[1])
            assert content == ONE_TO_SIX_WITH_SPACES

            content = self.dataset_populator.get_history_dataset_content(
                history_id, dataset=datasets[2])
            assert content == ONE_TO_SIX_WITH_TABS

    def test_multiple_files_posix_lines(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(
                history_id,
                content=ONE_TO_SIX_ON_WINDOWS,
                file_type="tabular",
                dbkey="hg19",
                extra_inputs={
                    "files_0|file_type": "txt",
                    "files_0|to_posix_lines": "Yes",
                    "files_1|url_paste": ONE_TO_SIX_ON_WINDOWS,
                    "files_1|NAME": "SecondOutputName",
                    "files_1|file_type": "txt",
                    "files_1|to_posix_lines": None,
                    "files_2|url_paste": ONE_TO_SIX_ON_WINDOWS,
                    "files_2|NAME": "ThirdOutputName",
                    "files_2|file_type": "txt",
                    "file_count": "3",
                })
            run_response = self.dataset_populator.tools_post(payload)
            self.dataset_populator.wait_for_tool_run(history_id, run_response)
            datasets = run_response.json()["outputs"]

            assert len(datasets) == 3, datasets
            content = self.dataset_populator.get_history_dataset_content(
                history_id, dataset=datasets[0])
            assert content == ONE_TO_SIX_WITH_TABS

            content = self.dataset_populator.get_history_dataset_content(
                history_id, dataset=datasets[1])
            assert content == ONE_TO_SIX_ON_WINDOWS

            content = self.dataset_populator.get_history_dataset_content(
                history_id, dataset=datasets[2])
            assert content == ONE_TO_SIX_WITH_TABS

    def _velvet_upload(self, history_id, extra_inputs):
        payload = self.dataset_populator.upload_payload(
            history_id,
            "sequences content",
            file_type="velvet",
            extra_inputs=extra_inputs,
        )
        run_response = self.dataset_populator.tools_post(payload)
        self.dataset_populator.wait_for_tool_run(history_id, run_response)
        datasets = run_response.json()["outputs"]

        assert len(datasets) == 1
        dataset = datasets[0]

        return dataset

    def _get_roadmaps_content(self, history_id, dataset):
        roadmaps_content = self.dataset_populator.get_history_dataset_content(
            history_id, dataset=dataset, filename="Roadmaps")
        return roadmaps_content

    def _upload_and_get_content(self, content, **upload_kwds):
        history_id, new_dataset = self._upload(content, **upload_kwds)
        return self.dataset_populator.get_history_dataset_content(
            history_id, dataset=new_dataset)

    def _upload_and_get_details(self, content, **upload_kwds):
        history_id, new_dataset = self._upload(content, **upload_kwds)
        return self.dataset_populator.get_history_dataset_details(
            history_id, dataset=new_dataset)

    def _upload(self, content, **upload_kwds):
        history_id = self.dataset_populator.new_history()
        new_dataset = self.dataset_populator.new_dataset(history_id,
                                                         content=content,
                                                         **upload_kwds)
        self.dataset_populator.wait_for_history(history_id, assert_ok=True)
        return history_id, new_dataset
Beispiel #25
0
class JobsApiTestCase(api.ApiTestCase):

    def setUp(self):
        super(JobsApiTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_collection_populator = DatasetCollectionPopulator(self.galaxy_interactor)

    def test_index(self):
        # Create HDA to ensure at least one job exists...
        self.__history_with_new_dataset()
        jobs = self.__jobs_index()
        assert "upload1" in map(itemgetter("tool_id"), jobs)

    def test_system_details_admin_only(self):
        self.__history_with_new_dataset()
        jobs = self.__jobs_index(admin=False)
        job = jobs[0]
        self._assert_not_has_keys(job, "command_line", "external_id")

        jobs = self.__jobs_index(admin=True)
        job = jobs[0]
        self._assert_has_keys(job, "command_line", "external_id")

    def test_index_state_filter(self):
        # Initial number of ok jobs
        original_count = len(self.__uploads_with_state("ok"))
        # Run through dataset upload to ensure num uplaods at least greater
        # by 1.
        self.__history_with_ok_dataset()

        # Verify number of ok jobs is actually greater.
        count_increased = False
        for i in range(10):
            new_count = len(self.__uploads_with_state("ok"))
            if original_count < new_count:
                count_increased = True
                break
            time.sleep(.1)

        if not count_increased:
            template = "Jobs in ok state did not increase (was %d, now %d)"
            message = template % (original_count, new_count)
            raise AssertionError(message)

    def test_index_date_filter(self):
        self.__history_with_new_dataset()
        two_weeks_ago = (datetime.datetime.utcnow() - datetime.timedelta(7)).isoformat()
        last_week = (datetime.datetime.utcnow() - datetime.timedelta(7)).isoformat()
        next_week = (datetime.datetime.utcnow() + datetime.timedelta(7)).isoformat()
        today = datetime.datetime.utcnow().isoformat()
        tomorrow = (datetime.datetime.utcnow() + datetime.timedelta(1)).isoformat()

        jobs = self.__jobs_index(data={"date_range_min": today[0:10], "date_range_max": tomorrow[0:10]})
        assert len(jobs) > 0
        today_job_id = jobs[0]["id"]

        jobs = self.__jobs_index(data={"date_range_min": two_weeks_ago, "date_range_max": last_week})
        assert today_job_id not in map(itemgetter("id"), jobs)

        jobs = self.__jobs_index(data={"date_range_min": last_week, "date_range_max": next_week})
        assert today_job_id in map(itemgetter("id"), jobs)

    def test_index_history(self):
        history_id, _ = self.__history_with_new_dataset()
        jobs = self.__jobs_index(data={"history_id": history_id})
        assert len(jobs) > 0

        history_id = self.dataset_populator.new_history()
        jobs = self.__jobs_index(data={"history_id": history_id})
        assert len(jobs) == 0

    def test_index_multiple_states_filter(self):
        # Initial number of ok jobs
        original_count = len(self.__uploads_with_state("ok", "new"))

        # Run through dataset upload to ensure num uplaods at least greater
        # by 1.
        self.__history_with_ok_dataset()

        # Verify number of ok jobs is actually greater.
        new_count = len(self.__uploads_with_state("new", "ok"))
        assert original_count < new_count, new_count

    def test_show(self):
        # Create HDA to ensure at least one job exists...
        self.__history_with_new_dataset()

        jobs_response = self._get("jobs")
        first_job = jobs_response.json()[0]
        self._assert_has_key(first_job, 'id', 'state', 'exit_code', 'update_time', 'create_time')

        job_id = first_job["id"]
        show_jobs_response = self._get("jobs/%s" % job_id)
        self._assert_status_code_is(show_jobs_response, 200)

        job_details = show_jobs_response.json()
        self._assert_has_key(job_details, 'id', 'state', 'exit_code', 'update_time', 'create_time')

    def test_show_security(self):
        history_id, _ = self.__history_with_new_dataset()
        jobs_response = self._get("jobs", data={"history_id": history_id})
        job = jobs_response.json()[0]
        job_id = job["id"]

        show_jobs_response = self._get("jobs/%s" % job_id, admin=False)
        self._assert_not_has_keys(show_jobs_response.json(), "command_line", "external_id")

        # TODO: Re-activate test case when API accepts privacy settings
        # with self._different_user():
        #    show_jobs_response = self._get( "jobs/%s" % job_id, admin=False )
        #    self._assert_status_code_is( show_jobs_response, 200 )

        show_jobs_response = self._get("jobs/%s" % job_id, admin=True)
        self._assert_has_keys(show_jobs_response.json(), "command_line", "external_id")

    def test_deleting_output_keep_running_until_all_deleted(self):
        history_id, job_state, outputs = self._setup_running_two_output_job(120)

        self._hack_to_skip_test_if_state_ok(job_state)

        # Delete one of the two outputs and make sure the job is still running.
        self._raw_update_history_item(history_id, outputs[0]["id"], {"deleted": True})

        self._hack_to_skip_test_if_state_ok(job_state)

        time.sleep(1)

        self._hack_to_skip_test_if_state_ok(job_state)

        state = job_state().json()["state"]
        assert state == "running", state

        # Delete the second output and make sure the job is cancelled.
        self._raw_update_history_item(history_id, outputs[1]["id"], {"deleted": True})
        final_state = wait_on_state(job_state, assert_ok=False, timeout=15)
        assert final_state in ["deleted_new", "deleted"], final_state

    def test_purging_output_keep_running_until_all_purged(self):
        history_id, job_state, outputs = self._setup_running_two_output_job(120)

        # Pretty much right away after the job is running, these paths should be populated -
        # if they are grab them and make sure they are deleted at the end of the job.
        dataset_1 = self._get_history_item_as_admin(history_id, outputs[0]["id"])
        dataset_2 = self._get_history_item_as_admin(history_id, outputs[1]["id"])
        if "file_name" in dataset_1:
            output_dataset_paths = [dataset_1["file_name"], dataset_2["file_name"]]
            # This may or may not exist depending on if the test is local or not.
            output_dataset_paths_exist = os.path.exists(output_dataset_paths[0])
        else:
            output_dataset_paths = []
            output_dataset_paths_exist = False

        self._hack_to_skip_test_if_state_ok(job_state)

        current_state = job_state().json()["state"]
        assert current_state == "running", current_state

        # Purge one of the two outputs and make sure the job is still running.
        self._raw_update_history_item(history_id, outputs[0]["id"], {"purged": True})
        time.sleep(1)

        self._hack_to_skip_test_if_state_ok(job_state)

        current_state = job_state().json()["state"]
        assert current_state == "running", current_state

        # Purge the second output and make sure the job is cancelled.
        self._raw_update_history_item(history_id, outputs[1]["id"], {"purged": True})
        final_state = wait_on_state(job_state, assert_ok=False, timeout=15)
        assert final_state in ["deleted_new", "deleted"], final_state

        def paths_deleted():
            if not os.path.exists(output_dataset_paths[0]) and not os.path.exists(output_dataset_paths[1]):
                return True

        if output_dataset_paths_exist:
            wait_on(paths_deleted, "path deletion")

    def test_purging_output_cleaned_after_ok_run(self):
        history_id, job_state, outputs = self._setup_running_two_output_job(10)

        # Pretty much right away after the job is running, these paths should be populated -
        # if they are grab them and make sure they are deleted at the end of the job.
        dataset_1 = self._get_history_item_as_admin(history_id, outputs[0]["id"])
        dataset_2 = self._get_history_item_as_admin(history_id, outputs[1]["id"])
        if "file_name" in dataset_1:
            output_dataset_paths = [dataset_1["file_name"], dataset_2["file_name"]]
            # This may or may not exist depending on if the test is local or not.
            output_dataset_paths_exist = os.path.exists(output_dataset_paths[0])
        else:
            output_dataset_paths = []
            output_dataset_paths_exist = False

        if not output_dataset_paths_exist:
            # Given this Galaxy configuration - there is nothing more to be tested here.
            # Consider throwing a skip instead.
            return

        # Purge one of the two outputs and wait for the job to complete.
        self._raw_update_history_item(history_id, outputs[0]["id"], {"purged": True})
        wait_on_state(job_state, assert_ok=True)

        if output_dataset_paths_exist:
            time.sleep(.5)
            # Make sure the non-purged dataset is on disk and the purged one is not.
            assert os.path.exists(output_dataset_paths[1])
            assert not os.path.exists(output_dataset_paths[0])

    def _hack_to_skip_test_if_state_ok(self, job_state):
        from nose.plugins.skip import SkipTest
        if job_state().json()["state"] == "ok":
            message = "Job state switch from running to ok too quickly - the rest of the test requires the job to be in a running state. Skipping test."
            raise SkipTest(message)

    def _setup_running_two_output_job(self, sleep_time):
        history_id = self.dataset_populator.new_history()
        payload = self.dataset_populator.run_tool_payload(
            tool_id='create_2',
            inputs=dict(
                sleep_time=sleep_time,
            ),
            history_id=history_id,
        )
        run_response = self._post("tools", data=payload).json()
        outputs = run_response["outputs"]
        jobs = run_response["jobs"]

        assert len(outputs) == 2
        assert len(jobs) == 1

        def job_state():
            jobs_response = self._get("jobs/%s" % jobs[0]["id"])
            return jobs_response

        # Give job some time to get up and running.
        time.sleep(2)
        running_state = wait_on_state(job_state, skip_states=["queued", "new"], assert_ok=False, timeout=15)
        assert running_state == "running", running_state

        def job_state():
            jobs_response = self._get("jobs/%s" % jobs[0]["id"])
            return jobs_response

        return history_id, job_state, outputs

    def _raw_update_history_item(self, history_id, item_id, data):
        update_url = self._api_url("histories/%s/contents/%s" % (history_id, item_id), use_key=True)
        update_response = put(update_url, json=data)
        assert_status_code_is_ok(update_response)
        return update_response

    def _get_history_item_as_admin(self, history_id, item_id):
        response = self._get("histories/%s/contents/%s?view=detailed" % (history_id, item_id), admin=True)
        assert_status_code_is_ok(response)
        return response.json()

    def test_search(self):
        history_id, dataset_id = self.__history_with_ok_dataset()
        # We first copy the datasets, so that the update time is lower than the job creation time
        new_history_id = self.dataset_populator.new_history()
        copy_payload = {"content": dataset_id, "source": "hda", "type": "dataset"}
        copy_response = self._post("histories/%s/contents" % new_history_id, data=copy_payload)
        self._assert_status_code_is(copy_response, 200)
        inputs = json.dumps({
            'input1': {'src': 'hda', 'id': dataset_id}
        })
        self._job_search(tool_id='cat1', history_id=history_id, inputs=inputs)
        # We test that a job can be found even if the dataset has been copied to another history
        new_dataset_id = copy_response.json()['id']
        copied_inputs = json.dumps({
            'input1': {'src': 'hda', 'id': new_dataset_id}
        })
        search_payload = self._search_payload(history_id=history_id, tool_id='cat1', inputs=copied_inputs)
        self._search(search_payload, expected_search_count=1)
        # Now we delete the original input HDA that was used -- we should still be able to find the job
        delete_respone = self._delete("histories/%s/contents/%s" % (history_id, dataset_id))
        self._assert_status_code_is(delete_respone, 200)
        self._search(search_payload, expected_search_count=1)
        # Now we also delete the copy -- we shouldn't find a job
        delete_respone = self._delete("histories/%s/contents/%s" % (new_history_id, new_dataset_id))
        self._assert_status_code_is(delete_respone, 200)
        self._search(search_payload, expected_search_count=0)

    def test_search_handle_identifiers(self):
        # Test that input name and element identifier of a jobs' output must match for a job to be returned.
        history_id, dataset_id = self.__history_with_ok_dataset()
        inputs = json.dumps({
            'input1': {'src': 'hda', 'id': dataset_id}
        })
        self._job_search(tool_id='identifier_single', history_id=history_id, inputs=inputs)
        dataset_details = self._get("histories/%s/contents/%s" % (history_id, dataset_id)).json()
        dataset_details['name'] = 'Renamed Test Dataset'
        dataset_update_response = self._put("histories/%s/contents/%s" % (history_id, dataset_id), data=dict(name='Renamed Test Dataset'))
        self._assert_status_code_is(dataset_update_response, 200)
        assert dataset_update_response.json()['name'] == 'Renamed Test Dataset'
        search_payload = self._search_payload(history_id=history_id, tool_id='identifier_single', inputs=inputs)
        self._search(search_payload, expected_search_count=0)

    def test_search_delete_outputs(self):
        history_id, dataset_id = self.__history_with_ok_dataset()
        inputs = json.dumps({
            'input1': {'src': 'hda', 'id': dataset_id}
        })
        tool_response = self._job_search(tool_id='cat1', history_id=history_id, inputs=inputs)
        output_id = tool_response.json()['outputs'][0]['id']
        delete_respone = self._delete("histories/%s/contents/%s" % (history_id, output_id))
        self._assert_status_code_is(delete_respone, 200)
        search_payload = self._search_payload(history_id=history_id, tool_id='cat1', inputs=inputs)
        self._search(search_payload, expected_search_count=0)

    def test_search_with_hdca_list_input(self):
        history_id, list_id_a = self.__history_with_ok_collection(collection_type='list')
        history_id, list_id_b = self.__history_with_ok_collection(collection_type='list', history_id=history_id)
        inputs = json.dumps({
            'f1': {'src': 'hdca', 'id': list_id_a},
            'f2': {'src': 'hdca', 'id': list_id_b},
        })
        tool_response = self._job_search(tool_id='multi_data_param', history_id=history_id, inputs=inputs)
        # We switch the inputs, this should not return a match
        inputs_switched = json.dumps({
            'f2': {'src': 'hdca', 'id': list_id_a},
            'f1': {'src': 'hdca', 'id': list_id_b},
        })
        search_payload = self._search_payload(history_id=history_id, tool_id='multi_data_param', inputs=inputs_switched)
        self._search(search_payload, expected_search_count=0)
        # We delete the ouput (this is a HDA, as multi_data_param reduces collections)
        # and use the correct input job definition, the job should not be found
        output_id = tool_response.json()['outputs'][0]['id']
        delete_respone = self._delete("histories/%s/contents/%s" % (history_id, output_id))
        self._assert_status_code_is(delete_respone, 200)
        search_payload = self._search_payload(history_id=history_id, tool_id='multi_data_param', inputs=inputs)
        self._search(search_payload, expected_search_count=0)

    def test_search_delete_hdca_output(self):
        history_id, list_id_a = self.__history_with_ok_collection(collection_type='list')
        inputs = json.dumps({
            'input1': {'src': 'hdca', 'id': list_id_a},
        })
        tool_response = self._job_search(tool_id='collection_creates_list', history_id=history_id, inputs=inputs)
        output_id = tool_response.json()['outputs'][0]['id']
        # We delete a single tool output, no job should be returned
        delete_respone = self._delete("histories/%s/contents/%s" % (history_id, output_id))
        self._assert_status_code_is(delete_respone, 200)
        search_payload = self._search_payload(history_id=history_id, tool_id='collection_creates_list', inputs=inputs)
        self._search(search_payload, expected_search_count=0)
        tool_response = self._job_search(tool_id='collection_creates_list', history_id=history_id, inputs=inputs)
        output_collection_id = tool_response.json()['output_collections'][0]['id']
        # We delete a collection output, no job should be returned
        delete_respone = self._delete("histories/%s/contents/dataset_collections/%s" % (history_id, output_collection_id))
        self._assert_status_code_is(delete_respone, 200)
        search_payload = self._search_payload(history_id=history_id, tool_id='collection_creates_list', inputs=inputs)
        self._search(search_payload, expected_search_count=0)

    def test_search_with_hdca_pair_input(self):
        history_id, list_id_a = self.__history_with_ok_collection(collection_type='pair')
        inputs = json.dumps({
            'f1': {'src': 'hdca', 'id': list_id_a},
            'f2': {'src': 'hdca', 'id': list_id_a},
        })
        self._job_search(tool_id='multi_data_param', history_id=history_id, inputs=inputs)
        # We test that a job can be found even if the collection has been copied to another history
        new_history_id = self.dataset_populator.new_history()
        copy_payload = {"content": list_id_a, "source": "hdca", "type": "dataset_collection"}
        copy_response = self._post("histories/%s/contents" % new_history_id, data=copy_payload)
        self._assert_status_code_is(copy_response, 200)
        new_list_a = copy_response.json()['id']
        copied_inputs = json.dumps({
            'f1': {'src': 'hdca', 'id': new_list_a},
            'f2': {'src': 'hdca', 'id': new_list_a},
        })
        search_payload = self._search_payload(history_id=new_history_id, tool_id='multi_data_param', inputs=copied_inputs)
        self._search(search_payload, expected_search_count=1)
        # Now we delete the original input HDCA that was used -- we should still be able to find the job
        delete_respone = self._delete("histories/%s/contents/dataset_collections/%s" % (history_id, list_id_a))
        self._assert_status_code_is(delete_respone, 200)
        self._search(search_payload, expected_search_count=1)
        # Now we also delete the copy -- we shouldn't find a job
        delete_respone = self._delete("histories/%s/contents/dataset_collections/%s" % (history_id, new_list_a))
        self._assert_status_code_is(delete_respone, 200)
        self._search(search_payload, expected_search_count=0)

    def test_search_with_hdca_list_pair_input(self):
        history_id, list_id_a = self.__history_with_ok_collection(collection_type='list:pair')
        inputs = json.dumps({
            'f1': {'src': 'hdca', 'id': list_id_a},
            'f2': {'src': 'hdca', 'id': list_id_a},
        })
        self._job_search(tool_id='multi_data_param', history_id=history_id, inputs=inputs)

    def _job_search(self, tool_id, history_id, inputs):
        search_payload = self._search_payload(history_id=history_id, tool_id=tool_id, inputs=inputs)
        empty_search_response = self._post("jobs/search", data=search_payload)
        self._assert_status_code_is(empty_search_response, 200)
        self.assertEquals(len(empty_search_response.json()), 0)
        tool_response = self._post("tools", data=search_payload)
        self.dataset_populator.wait_for_tool_run(history_id, run_response=tool_response)
        self._search(search_payload, expected_search_count=1)
        return tool_response

    def _search_payload(self, history_id, tool_id, inputs, state='ok'):
        search_payload = dict(
            tool_id=tool_id,
            inputs=inputs,
            history_id=history_id,
            state=state
        )
        return search_payload

    def _search(self, payload, expected_search_count=1):
        # in case job and history aren't updated at exactly the same
        # time give time to wait
        for i in range(5):
            search_count = self._search_count(payload)
            if search_count == expected_search_count:
                break
            time.sleep(1)
        assert search_count == expected_search_count, "expected to find %d jobs, got %d jobs" % (expected_search_count, search_count)
        return search_count

    def _search_count(self, search_payload):
        search_response = self._post("jobs/search", data=search_payload)
        self._assert_status_code_is(search_response, 200)
        search_json = search_response.json()
        return len(search_json)

    def __uploads_with_state(self, *states):
        jobs_response = self._get("jobs", data=dict(state=states))
        self._assert_status_code_is(jobs_response, 200)
        jobs = jobs_response.json()
        assert not filter(lambda j: j["state"] not in states, jobs)
        return filter(lambda j: j["tool_id"] == "upload1", jobs)

    def __history_with_new_dataset(self):
        history_id = self.dataset_populator.new_history()
        dataset_id = self.dataset_populator.new_dataset(history_id)["id"]
        return history_id, dataset_id

    def __history_with_ok_dataset(self):
        history_id = self.dataset_populator.new_history()
        dataset_id = self.dataset_populator.new_dataset(history_id, wait=True)["id"]
        return history_id, dataset_id

    def __history_with_ok_collection(self, collection_type='list', history_id=None):
        if not history_id:
            history_id = self.dataset_populator.new_history()
        if collection_type == 'list':
            create_reposonse = self.dataset_collection_populator.create_list_in_history(history_id).json()
        elif collection_type == 'pair':
            create_reposonse = self.dataset_collection_populator.create_pair_in_history(history_id).json()
        elif collection_type == 'list:pair':
            create_reposonse = self.dataset_collection_populator.create_list_of_pairs_in_history(history_id).json()
        self.dataset_collection_populator.wait_for_dataset_collection(create_reposonse)
        return history_id, create_reposonse['id']

    def __jobs_index(self, **kwds):
        jobs_response = self._get("jobs", **kwds)
        self._assert_status_code_is(jobs_response, 200)
        jobs = jobs_response.json()
        assert isinstance(jobs, list)
        return jobs
Beispiel #26
0
class JobsApiTestCase(api.ApiTestCase):
    def setUp(self):
        super(JobsApiTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)

    def test_index(self):
        # Create HDA to ensure at least one job exists...
        self.__history_with_new_dataset()
        jobs = self.__jobs_index()
        assert "upload1" in map(itemgetter("tool_id"), jobs)

    def test_system_details_admin_only(self):
        self.__history_with_new_dataset()
        jobs = self.__jobs_index(admin=False)
        job = jobs[0]
        self._assert_not_has_keys(job, "command_line", "external_id")

        jobs = self.__jobs_index(admin=True)
        job = jobs[0]
        self._assert_has_keys(job, "command_line", "external_id")

    def test_index_state_filter(self):
        # Initial number of ok jobs
        original_count = len(self.__uploads_with_state("ok"))
        # Run through dataset upload to ensure num uplaods at least greater
        # by 1.
        self.__history_with_ok_dataset()

        # Verify number of ok jobs is actually greater.
        count_increased = False
        for i in range(10):
            new_count = len(self.__uploads_with_state("ok"))
            if original_count < new_count:
                count_increased = True
                break
            time.sleep(.1)

        if not count_increased:
            template = "Jobs in ok state did not increase (was %d, now %d)"
            message = template % (original_count, new_count)
            raise AssertionError(message)

    def test_index_date_filter(self):
        self.__history_with_new_dataset()
        two_weeks_ago = (datetime.datetime.utcnow() -
                         datetime.timedelta(7)).isoformat()
        last_week = (datetime.datetime.utcnow() -
                     datetime.timedelta(7)).isoformat()
        next_week = (datetime.datetime.utcnow() +
                     datetime.timedelta(7)).isoformat()
        today = datetime.datetime.utcnow().isoformat()
        tomorrow = (datetime.datetime.utcnow() +
                    datetime.timedelta(1)).isoformat()

        jobs = self.__jobs_index(data={
            "date_range_min": today[0:10],
            "date_range_max": tomorrow[0:10]
        })
        assert len(jobs) > 0
        today_job_id = jobs[0]["id"]

        jobs = self.__jobs_index(data={
            "date_range_min": two_weeks_ago,
            "date_range_max": last_week
        })
        assert today_job_id not in map(itemgetter("id"), jobs)

        jobs = self.__jobs_index(data={
            "date_range_min": last_week,
            "date_range_max": next_week
        })
        assert today_job_id in map(itemgetter("id"), jobs)

    def test_index_history(self):
        history_id, _ = self.__history_with_new_dataset()
        jobs = self.__jobs_index(data={"history_id": history_id})
        assert len(jobs) > 0

        history_id = self.dataset_populator.new_history()
        jobs = self.__jobs_index(data={"history_id": history_id})
        assert len(jobs) == 0

    def test_index_multiple_states_filter(self):
        # Initial number of ok jobs
        original_count = len(self.__uploads_with_state("ok", "new"))

        # Run through dataset upload to ensure num uplaods at least greater
        # by 1.
        self.__history_with_ok_dataset()

        # Verify number of ok jobs is actually greater.
        new_count = len(self.__uploads_with_state("new", "ok"))
        assert original_count < new_count, new_count

    def test_show(self):
        # Create HDA to ensure at least one job exists...
        self.__history_with_new_dataset()

        jobs_response = self._get("jobs")
        first_job = jobs_response.json()[0]
        self._assert_has_key(first_job, 'id', 'state', 'exit_code',
                             'update_time', 'create_time')

        job_id = first_job["id"]
        show_jobs_response = self._get("jobs/%s" % job_id)
        self._assert_status_code_is(show_jobs_response, 200)

        job_details = show_jobs_response.json()
        self._assert_has_key(job_details, 'id', 'state', 'exit_code',
                             'update_time', 'create_time')

    def test_show_security(self):
        history_id, _ = self.__history_with_new_dataset()
        jobs_response = self._get("jobs", data={"history_id": history_id})
        job = jobs_response.json()[0]
        job_id = job["id"]

        show_jobs_response = self._get("jobs/%s" % job_id, admin=False)
        self._assert_not_has_keys(show_jobs_response.json(), "command_line",
                                  "external_id")

        # TODO: Re-activate test case when API accepts privacy settings
        # with self._different_user():
        #    show_jobs_response = self._get( "jobs/%s" % job_id, admin=False )
        #    self._assert_status_code_is( show_jobs_response, 200 )

        show_jobs_response = self._get("jobs/%s" % job_id, admin=True)
        self._assert_has_keys(show_jobs_response.json(), "command_line",
                              "external_id")

    def test_deleting_output_keep_running_until_all_deleted(self):
        history_id, job_state, outputs = self._setup_running_two_output_job(60)

        # Delete one of the two outputs and make sure the job is still running.
        self._raw_update_history_item(history_id, outputs[0]["id"],
                                      {"deleted": True})
        time.sleep(1)
        state = job_state().json()["state"]
        assert state == "running", state

        # Delete the second output and make sure the job is cancelled.
        self._raw_update_history_item(history_id, outputs[1]["id"],
                                      {"deleted": True})
        final_state = wait_on_state(job_state, assert_ok=False, timeout=15)
        assert final_state in ["deleted_new", "deleted"], final_state

    def test_purging_output_keep_running_until_all_purged(self):
        history_id, job_state, outputs = self._setup_running_two_output_job(60)

        # Pretty much right away after the job is running, these paths should be populated -
        # if they are grab them and make sure they are deleted at the end of the job.
        dataset_1 = self._get_history_item_as_admin(history_id,
                                                    outputs[0]["id"])
        dataset_2 = self._get_history_item_as_admin(history_id,
                                                    outputs[1]["id"])
        if "file_name" in dataset_1:
            output_dataset_paths = [
                dataset_1["file_name"], dataset_2["file_name"]
            ]
            # This may or may not exist depending on if the test is local or not.
            output_dataset_paths_exist = os.path.exists(
                output_dataset_paths[0])
        else:
            output_dataset_paths = []
            output_dataset_paths_exist = False

        current_state = job_state().json()["state"]
        assert current_state == "running", current_state

        # Purge one of the two outputs and make sure the job is still running.
        self._raw_update_history_item(history_id, outputs[0]["id"],
                                      {"purged": True})
        time.sleep(1)
        current_state = job_state().json()["state"]
        assert current_state == "running", current_state

        # Purge the second output and make sure the job is cancelled.
        self._raw_update_history_item(history_id, outputs[1]["id"],
                                      {"purged": True})
        final_state = wait_on_state(job_state, assert_ok=False, timeout=15)
        assert final_state in ["deleted_new", "deleted"], final_state

        def paths_deleted():
            if not os.path.exists(
                    output_dataset_paths[0]) and not os.path.exists(
                        output_dataset_paths[1]):
                return True

        if output_dataset_paths_exist:
            wait_on(paths_deleted, "path deletion")

    def test_purging_output_cleaned_after_ok_run(self):
        history_id, job_state, outputs = self._setup_running_two_output_job(10)

        # Pretty much right away after the job is running, these paths should be populated -
        # if they are grab them and make sure they are deleted at the end of the job.
        dataset_1 = self._get_history_item_as_admin(history_id,
                                                    outputs[0]["id"])
        dataset_2 = self._get_history_item_as_admin(history_id,
                                                    outputs[1]["id"])
        if "file_name" in dataset_1:
            output_dataset_paths = [
                dataset_1["file_name"], dataset_2["file_name"]
            ]
            # This may or may not exist depending on if the test is local or not.
            output_dataset_paths_exist = os.path.exists(
                output_dataset_paths[0])
        else:
            output_dataset_paths = []
            output_dataset_paths_exist = False

        if not output_dataset_paths_exist:
            # Given this Galaxy configuration - there is nothing more to be tested here.
            # Consider throwing a skip instead.
            return

        # Purge one of the two outputs and wait for the job to complete.
        self._raw_update_history_item(history_id, outputs[0]["id"],
                                      {"purged": True})
        wait_on_state(job_state, assert_ok=True)

        if output_dataset_paths_exist:
            time.sleep(.5)
            # Make sure the non-purged dataset is on disk and the purged one is not.
            assert os.path.exists(output_dataset_paths[1])
            assert not os.path.exists(output_dataset_paths[0])

    def _setup_running_two_output_job(self, sleep_time):
        history_id = self.dataset_populator.new_history()
        payload = self.dataset_populator.run_tool_payload(
            tool_id='create_2',
            inputs=dict(sleep_time=sleep_time, ),
            history_id=history_id,
        )
        run_response = self._post("tools", data=payload).json()
        outputs = run_response["outputs"]
        jobs = run_response["jobs"]

        assert len(outputs) == 2
        assert len(jobs) == 1

        def job_state():
            jobs_response = self._get("jobs/%s" % jobs[0]["id"])
            return jobs_response

        # Give job some time to get up and running.
        time.sleep(2)
        running_state = wait_on_state(job_state,
                                      skip_states=["queued", "new"],
                                      assert_ok=False,
                                      timeout=15)
        assert running_state == "running", running_state

        def job_state():
            jobs_response = self._get("jobs/%s" % jobs[0]["id"])
            return jobs_response

        return history_id, job_state, outputs

    def _raw_update_history_item(self, history_id, item_id, data):
        update_url = self._api_url("histories/%s/contents/%s" %
                                   (history_id, item_id),
                                   use_key=True)
        update_response = put(update_url, json=data)
        assert_status_code_is_ok(update_response)
        return update_response

    def _get_history_item_as_admin(self, history_id, item_id):
        response = self._get("histories/%s/contents/%s?view=detailed" %
                             (history_id, item_id),
                             admin=True)
        assert_status_code_is_ok(response)
        return response.json()

    def test_search(self):
        history_id, dataset_id = self.__history_with_ok_dataset()

        inputs = json.dumps(dict(input1=dict(
            src='hda',
            id=dataset_id,
        )))
        search_payload = dict(
            tool_id="cat1",
            inputs=inputs,
            state="ok",
        )

        empty_search_response = self._post("jobs/search", data=search_payload)
        self._assert_status_code_is(empty_search_response, 200)
        self.assertEquals(len(empty_search_response.json()), 0)

        self.__run_cat_tool(history_id, dataset_id)
        self.dataset_populator.wait_for_history(history_id, assert_ok=True)

        search_count = -1
        # in case job and history aren't updated at exactly the same
        # time give time to wait
        for i in range(5):
            search_count = self._search_count(search_payload)
            if search_count == 1:
                break
            time.sleep(.1)

        self.assertEquals(search_count, 1)

    def _search_count(self, search_payload):
        search_response = self._post("jobs/search", data=search_payload)
        self._assert_status_code_is(search_response, 200)
        search_json = search_response.json()
        return len(search_json)

    def __run_cat_tool(self, history_id, dataset_id):
        # Code duplication with test_jobs.py, eliminate
        payload = self.dataset_populator.run_tool_payload(
            tool_id='cat1',
            inputs=dict(input1=dict(src='hda', id=dataset_id), ),
            history_id=history_id,
        )
        self._post("tools", data=payload)

    def __run_randomlines_tool(self, lines, history_id, dataset_id):
        payload = self.dataset_populator.run_tool_payload(
            tool_id="random_lines1",
            inputs=dict(
                num_lines=lines,
                input=dict(
                    src='hda',
                    id=dataset_id,
                ),
            ),
            history_id=history_id,
        )
        self._post("tools", data=payload)

    def __uploads_with_state(self, *states):
        jobs_response = self._get("jobs", data=dict(state=states))
        self._assert_status_code_is(jobs_response, 200)
        jobs = jobs_response.json()
        assert not filter(lambda j: j["state"] not in states, jobs)
        return filter(lambda j: j["tool_id"] == "upload1", jobs)

    def __history_with_new_dataset(self):
        history_id = self.dataset_populator.new_history()
        dataset_id = self.dataset_populator.new_dataset(history_id)["id"]
        return history_id, dataset_id

    def __history_with_ok_dataset(self):
        history_id = self.dataset_populator.new_history()
        dataset_id = self.dataset_populator.new_dataset(history_id,
                                                        wait=True)["id"]
        return history_id, dataset_id

    def __jobs_index(self, **kwds):
        jobs_response = self._get("jobs", **kwds)
        self._assert_status_code_is(jobs_response, 200)
        jobs = jobs_response.json()
        assert isinstance(jobs, list)
        return jobs
class LibrariesApiTestCase(api.ApiTestCase, TestsDatasets):

    def setUp(self):
        super(LibrariesApiTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_collection_populator = DatasetCollectionPopulator(self.galaxy_interactor)
        self.library_populator = LibraryPopulator(self.galaxy_interactor)

    def test_create(self):
        data = dict(name="CreateTestLibrary")
        create_response = self._post("libraries", data=data, admin=True)
        self._assert_status_code_is(create_response, 200)
        library = create_response.json()
        self._assert_has_keys(library, "name")
        assert library["name"] == "CreateTestLibrary"

    def test_delete(self):
        library = self.library_populator.new_library("DeleteTestLibrary")
        create_response = self._delete("libraries/%s" % library["id"], admin=True)
        self._assert_status_code_is(create_response, 200)
        library = create_response.json()
        self._assert_has_keys(library, "deleted")
        assert library["deleted"] is True
        # Test undeleting
        data = dict(undelete='true')
        create_response = self._delete("libraries/%s" % library["id"], data=data, admin=True)
        library = create_response.json()
        self._assert_status_code_is(create_response, 200)
        assert library["deleted"] is False

    def test_nonadmin(self):
        # Anons can't create libs
        data = dict(name="CreateTestLibrary")
        create_response = self._post("libraries", data=data, admin=False, anon=True)
        self._assert_status_code_is(create_response, 403)
        # Anons can't delete libs
        library = self.library_populator.new_library("AnonDeleteTestLibrary")
        create_response = self._delete("libraries/%s" % library["id"], admin=False, anon=True)
        self._assert_status_code_is(create_response, 403)
        # Anons can't update libs
        data = dict(name="ChangedName", description="ChangedDescription", synopsis='ChangedSynopsis')
        create_response = self._patch("libraries/%s" % library["id"], data=data, admin=False, anon=True)
        self._assert_status_code_is(create_response, 403)

    def test_update(self):
        library = self.library_populator.new_library("UpdateTestLibrary")
        data = dict(name='ChangedName', description='ChangedDescription', synopsis='ChangedSynopsis')
        create_response = self._patch("libraries/%s" % library["id"], data=data, admin=True)
        self._assert_status_code_is(create_response, 200)
        library = create_response.json()
        self._assert_has_keys(library, 'name', 'description', 'synopsis')
        assert library['name'] == 'ChangedName'
        assert library['description'] == 'ChangedDescription'
        assert library['synopsis'] == 'ChangedSynopsis'

    def test_create_private_library_permissions(self):
        library = self.library_populator.new_library("PermissionTestLibrary")
        library_id = library["id"]
        role_id = self.library_populator.user_private_role_id()
        self.library_populator.set_permissions(library_id, role_id)
        create_response = self._create_folder(library)
        self._assert_status_code_is(create_response, 200)

    def test_create_dataset_denied(self):
        library = self.library_populator.new_private_library("ForCreateDatasets")
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        history_id = self.dataset_populator.new_history()
        hda_id = self.dataset_populator.new_dataset(history_id, content="1 2 3")['id']
        with self._different_user():
            payload = {'from_hda_id': hda_id}
            create_response = self._post("folders/%s/contents" % folder_id, payload)
            self._assert_status_code_is(create_response, 403)

    def test_show_private_dataset_permissions(self):
        library, library_dataset = self.library_populator.new_library_dataset_in_private_library("ForCreateDatasets", wait=True)
        with self._different_user():
            response = self.library_populator.show_ldda(library["id"], library_dataset["id"])
            # TODO: this should really be 403 and a proper JSON exception.
            self._assert_status_code_is(response, 400)

    def test_create_dataset(self):
        library, library_dataset = self.library_populator.new_library_dataset_in_private_library("ForCreateDatasets", wait=True)
        self._assert_has_keys(library_dataset, "peek", "data_type")
        assert library_dataset["peek"].find("create_test") >= 0
        assert library_dataset["file_ext"] == "txt", library_dataset["file_ext"]

    def test_fetch_upload_to_folder(self):
        history_id, library, destination = self._setup_fetch_to_folder("flat_zip")
        items = [{"src": "files", "dbkey": "hg19", "info": "my cool bed"}]
        targets = [{
            "destination": destination,
            "items": items
        }]
        payload = {
            "history_id": history_id,  # TODO: Shouldn't be needed :(
            "targets": json.dumps(targets),
            "__files": {"files_0|file_data": open(self.test_data_resolver.get_filename("4.bed"))},
        }
        self.dataset_populator.fetch(payload)
        dataset = self.library_populator.get_library_contents_with_path(library["id"], "/4.bed")
        assert dataset["file_size"] == 61, dataset
        assert dataset["genome_build"] == "hg19", dataset
        assert dataset["misc_info"] == "my cool bed", dataset
        assert dataset["file_ext"] == "bed", dataset

    def test_fetch_zip_to_folder(self):
        history_id, library, destination = self._setup_fetch_to_folder("flat_zip")
        bed_test_data_path = self.test_data_resolver.get_filename("4.bed.zip")
        targets = [{
            "destination": destination,
            "items_from": "archive", "src": "files",
        }]
        payload = {
            "history_id": history_id,  # TODO: Shouldn't be needed :(
            "targets": json.dumps(targets),
            "__files": {"files_0|file_data": open(bed_test_data_path)}
        }
        self.dataset_populator.fetch(payload)
        dataset = self.library_populator.get_library_contents_with_path(library["id"], "/4.bed")
        assert dataset["file_size"] == 61, dataset

    def test_fetch_single_url_to_folder(self):
        history_id, library, destination = self._setup_fetch_to_folder("single_url")
        items = [{"src": "url", "url": "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/4.bed"}]
        targets = [{
            "destination": destination,
            "items": items
        }]
        payload = {
            "history_id": history_id,  # TODO: Shouldn't be needed :(
            "targets": json.dumps(targets),
        }
        self.dataset_populator.fetch(payload)
        dataset = self.library_populator.get_library_contents_with_path(library["id"], "/4.bed")
        assert dataset["file_size"] == 61, dataset

    def test_fetch_url_archive_to_folder(self):
        history_id, library, destination = self._setup_fetch_to_folder("single_url")
        targets = [{
            "destination": destination,
            "items_from": "archive",
            "src": "url",
            "url": "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/4.bed.zip",
        }]
        payload = {
            "history_id": history_id,  # TODO: Shouldn't be needed :(
            "targets": json.dumps(targets),
        }
        self.dataset_populator.fetch(payload)
        dataset = self.library_populator.get_library_contents_with_path(library["id"], "/4.bed")
        assert dataset["file_size"] == 61, dataset

    @unittest.skip  # reference URLs changed, checksums now invalid.
    def test_fetch_bagit_archive_to_folder(self):
        history_id, library, destination = self._setup_fetch_to_folder("bagit_archive")
        example_bag_path = self.test_data_resolver.get_filename("example-bag.zip")
        targets = [{
            "destination": destination,
            "items_from": "bagit_archive", "src": "files",
        }]
        payload = {
            "history_id": history_id,  # TODO: Shouldn't be needed :(
            "targets": json.dumps(targets),
            "__files": {"files_0|file_data": open(example_bag_path)},
        }
        self.dataset_populator.fetch(payload)
        dataset = self.library_populator.get_library_contents_with_path(library["id"], "/README.txt")
        assert dataset["file_size"] == 66, dataset

        dataset = self.library_populator.get_library_contents_with_path(library["id"], "/bdbag-profile.json")
        assert dataset["file_size"] == 723, dataset

    def _setup_fetch_to_folder(self, test_name):
        return self.library_populator.setup_fetch_to_folder(test_name)

    def test_create_dataset_in_folder(self):
        library = self.library_populator.new_private_library("ForCreateDatasets")
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        history_id = self.dataset_populator.new_history()
        hda_id = self.dataset_populator.new_dataset(history_id, content="1 2 3")['id']
        payload = {'from_hda_id': hda_id}
        create_response = self._post("folders/%s/contents" % folder_id, payload)
        self._assert_status_code_is(create_response, 200)
        self._assert_has_keys(create_response.json(), "name", "id")

    def test_update_dataset_in_folder(self):
        library = self.library_populator.new_private_library("ForUpdateDataset")
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        history_id = self.dataset_populator.new_history()
        hda_id = self.dataset_populator.new_dataset(history_id, content="1 2 3")['id']
        payload = {'from_hda_id': hda_id, 'create_type': 'file', 'folder_id': folder_id}
        ld = self._post("libraries/%s/contents" % folder_id, payload)
        data = {'name': 'updated_name', 'file_ext': 'fastq', 'misc_info': 'updated_info', 'genome_build': 'updated_genome_build'}
        create_response = self._patch("libraries/datasets/%s" % ld.json()["id"], data=data)
        self._assert_status_code_is(create_response, 200)
        self._assert_has_keys(create_response.json(), "name", "file_ext", "misc_info", "genome_build")

    def test_invalid_update_dataset_in_folder(self):
        library = self.library_populator.new_private_library("ForInvalidUpdateDataset")
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        history_id = self.dataset_populator.new_history()
        hda_id = self.dataset_populator.new_dataset(history_id, content="1 2 3")['id']
        payload = {'from_hda_id': hda_id, 'create_type': 'file', 'folder_id': folder_id}
        ld = self._post("libraries/%s/contents" % folder_id, payload)
        data = {'file_ext': 'nonexisting_type'}
        create_response = self._patch("libraries/datasets/%s" % ld.json()["id"], data=data)
        self._assert_status_code_is(create_response, 400)
        assert 'This Galaxy does not recognize the datatype of:' in create_response.json()['err_msg']

    def test_create_datasets_in_library_from_collection(self):
        library = self.library_populator.new_private_library("ForCreateDatasetsFromCollection")
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        history_id = self.dataset_populator.new_history()
        hdca_id = self.dataset_collection_populator.create_list_in_history(history_id, contents=["xxx", "yyy"]).json()["id"]
        payload = {'from_hdca_id': hdca_id, 'create_type': 'file', 'folder_id': folder_id}
        create_response = self._post("libraries/%s/contents" % library['id'], payload)
        self._assert_status_code_is(create_response, 200)

    def test_create_datasets_in_folder_from_collection(self):
        library = self.library_populator.new_private_library("ForCreateDatasetsFromCollection")
        history_id = self.dataset_populator.new_history()
        hdca_id = self.dataset_collection_populator.create_list_in_history(history_id, contents=["xxx", "yyy"]).json()["id"]
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        payload = {'from_hdca_id': hdca_id}
        create_response = self._post("folders/%s/contents" % folder_id, payload)
        self._assert_status_code_is(create_response, 200)
        assert len(create_response.json()) == 2
        # Also test that anything different from a flat dataset collection list
        # is refused
        hdca_pair_id = self.dataset_collection_populator.create_list_of_pairs_in_history(history_id).json()['id']
        payload = {'from_hdca_id': hdca_pair_id}
        create_response = self._post("folders/%s/contents" % folder_id, payload)
        self._assert_status_code_is(create_response, 501)
        assert create_response.json()['err_msg'] == 'Cannot add nested collections to library. Please flatten your collection first.'

    def _create_folder(self, library):
        create_data = dict(
            folder_id=library["root_folder_id"],
            create_type="folder",
            name="New Folder",
        )
        return self._post("libraries/%s/contents" % library["id"], data=create_data)
class DatasetCollectionApiTestCase(api.ApiTestCase):

    def setUp(self):
        super(DatasetCollectionApiTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_collection_populator = DatasetCollectionPopulator(self.galaxy_interactor)
        self.history_id = self.dataset_populator.new_history()

    def test_create_pair_from_history(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id,
            instance_type="history",
        )
        create_response = self._post("dataset_collections", payload)
        dataset_collection = self._check_create_response(create_response)
        returned_datasets = dataset_collection["elements"]
        assert len(returned_datasets) == 2, dataset_collection

    def test_create_list_from_history(self):
        element_identifiers = self.dataset_collection_populator.list_identifiers(self.history_id)

        payload = dict(
            instance_type="history",
            history_id=self.history_id,
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )

        create_response = self._post("dataset_collections", payload)
        dataset_collection = self._check_create_response(create_response)
        returned_datasets = dataset_collection["elements"]
        assert len(returned_datasets) == 3, dataset_collection

    def test_create_list_of_existing_pairs(self):
        pair_payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id,
            instance_type="history",
        )
        pair_create_response = self._post("dataset_collections", pair_payload)
        dataset_collection = self._check_create_response(pair_create_response)
        hdca_id = dataset_collection["id"]

        element_identifiers = [
            dict(name="test1", src="hdca", id=hdca_id)
        ]

        payload = dict(
            instance_type="history",
            history_id=self.history_id,
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )
        create_response = self._post("dataset_collections", payload)
        dataset_collection = self._check_create_response(create_response)
        returned_collections = dataset_collection["elements"]
        assert len(returned_collections) == 1, dataset_collection

    def test_create_list_of_new_pairs(self):
        identifiers = self.dataset_collection_populator.nested_collection_identifiers(self.history_id, "list:paired")
        payload = dict(
            collection_type="list:paired",
            instance_type="history",
            history_id=self.history_id,
            name="a nested collection",
            element_identifiers=json.dumps(identifiers),
        )
        create_response = self._post("dataset_collections", payload)
        dataset_collection = self._check_create_response(create_response)
        assert dataset_collection["collection_type"] == "list:paired"
        assert dataset_collection["name"] == "a nested collection"
        returned_collections = dataset_collection["elements"]
        assert len(returned_collections) == 1, dataset_collection
        pair_1_element = returned_collections[0]
        self._assert_has_keys(pair_1_element, "element_identifier", "element_index", "object")
        assert pair_1_element["element_identifier"] == "test_level_1", pair_1_element
        assert pair_1_element["element_index"] == 0, pair_1_element
        pair_1_object = pair_1_element["object"]
        self._assert_has_keys(pair_1_object, "collection_type", "elements", "element_count")
        self.assertEqual(pair_1_object["collection_type"], "paired")
        self.assertEqual(pair_1_object["populated"], True)
        pair_elements = pair_1_object["elements"]
        assert len(pair_elements) == 2
        pair_1_element_1 = pair_elements[0]
        assert pair_1_element_1["element_index"] == 0

    def test_list_download(self):
        fetch_response = self.dataset_collection_populator.create_list_in_history(self.history_id, direct_upload=True).json()
        dataset_collection = self.dataset_collection_populator.wait_for_fetched_collection(fetch_response)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 3, dataset_collection
        create_response = self._download_dataset_collection(history_id=self.history_id, hdca_id=dataset_collection['id'])
        self._assert_status_code_is(create_response, 200)
        tar_contents = tarfile.open(fileobj=BytesIO(create_response.content))
        namelist = tar_contents.getnames()
        assert len(namelist) == 3, "Expected 3 elements in [%s]" % namelist
        collection_name = dataset_collection['name']
        for element, zip_path in zip(returned_dce, namelist):
            assert "%s/%s.%s" % (collection_name, element['element_identifier'], element['object']['file_ext']) == zip_path

    def test_pair_download(self):
        fetch_response = self.dataset_collection_populator.create_pair_in_history(self.history_id, direct_upload=True).json()
        dataset_collection = self.dataset_collection_populator.wait_for_fetched_collection(fetch_response)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 2, dataset_collection
        hdca_id = dataset_collection['id']
        create_response = self._download_dataset_collection(history_id=self.history_id, hdca_id=hdca_id)
        self._assert_status_code_is(create_response, 200)
        tar_contents = tarfile.open(fileobj=BytesIO(create_response.content))
        namelist = tar_contents.getnames()
        assert len(namelist) == 2, "Expected 2 elements in [%s]" % namelist
        collection_name = dataset_collection['name']
        for element, zip_path in zip(returned_dce, namelist):
            assert "%s/%s.%s" % (collection_name, element['element_identifier'], element['object']['file_ext']) == zip_path

    def test_list_pair_download(self):
        fetch_response = self.dataset_collection_populator.create_list_of_pairs_in_history(self.history_id).json()
        dataset_collection = self.dataset_collection_populator.wait_for_fetched_collection(fetch_response)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 1, dataset_collection
        list_collection_name = dataset_collection['name']
        pair = returned_dce[0]
        create_response = self._download_dataset_collection(history_id=self.history_id, hdca_id=dataset_collection['id'])
        self._assert_status_code_is(create_response, 200)
        tar_contents = tarfile.open(fileobj=BytesIO(create_response.content))
        namelist = tar_contents.getnames()
        assert len(namelist) == 2, "Expected 2 elements in [%s]" % namelist
        pair_collection_name = pair['element_identifier']
        for element, zip_path in zip(pair['object']['elements'], namelist):
            assert "%s/%s/%s.%s" % (list_collection_name, pair_collection_name, element['element_identifier'], element['object']['file_ext']) == zip_path

    def test_list_list_download(self):
        dataset_collection = self.dataset_collection_populator.create_list_of_list_in_history(self.history_id).json()
        self.dataset_collection_populator.wait_for_dataset_collection(dataset_collection, assert_ok=True)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 1, dataset_collection
        create_response = self._download_dataset_collection(history_id=self.history_id, hdca_id=dataset_collection['id'])
        self._assert_status_code_is(create_response, 200)
        tar_contents = tarfile.open(fileobj=BytesIO(create_response.content))
        namelist = tar_contents.getnames()
        assert len(namelist) == 3, "Expected 3 elements in [%s]" % namelist

    def test_list_list_list_download(self):
        dataset_collection = self.dataset_collection_populator.create_list_of_list_in_history(self.history_id, collection_type='list:list:list').json()
        self.dataset_collection_populator.wait_for_dataset_collection(dataset_collection, assert_ok=True)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 1, dataset_collection
        create_response = self._download_dataset_collection(history_id=self.history_id, hdca_id=dataset_collection['id'])
        self._assert_status_code_is(create_response, 200)
        tar_contents = tarfile.open(fileobj=BytesIO(create_response.content))
        namelist = tar_contents.getnames()
        assert len(namelist) == 3, "Expected 3 elements in [%s]" % namelist

    def test_hda_security(self):
        element_identifiers = self.dataset_collection_populator.pair_identifiers(self.history_id)
        self.dataset_populator.make_private(self.history_id, element_identifiers[0]["id"])
        with self._different_user():
            history_id = self.dataset_populator.new_history()
            payload = dict(
                instance_type="history",
                history_id=history_id,
                element_identifiers=json.dumps(element_identifiers),
                collection_type="paired",
            )
            create_response = self._post("dataset_collections", payload)
            self._assert_status_code_is(create_response, 403)

    def test_enforces_unique_names(self):
        element_identifiers = self.dataset_collection_populator.list_identifiers(self.history_id)
        element_identifiers[2]["name"] = element_identifiers[0]["name"]
        payload = dict(
            instance_type="history",
            history_id=self.history_id,
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )

        create_response = self._post("dataset_collections", payload)
        self._assert_status_code_is(create_response, 400)

    def test_upload_collection(self):
        elements = [{"src": "files", "dbkey": "hg19", "info": "my cool bed", "tags": ["name:data1", "group:condition:treated", "machine:illumina"]}]
        targets = [{
            "destination": {"type": "hdca"},
            "elements": elements,
            "collection_type": "list",
            "name": "Test upload",
            "tags": ["name:collection1"]
        }]
        payload = {
            "history_id": self.history_id,
            "targets": json.dumps(targets),
            "__files": {"files_0|file_data": open(self.test_data_resolver.get_filename("4.bed"))},
        }
        self.dataset_populator.fetch(payload)
        hdca = self._assert_one_collection_created_in_history()
        self.assertEqual(hdca["name"], "Test upload")
        hdca_tags = hdca["tags"]
        assert len(hdca_tags) == 1
        assert "name:collection1" in hdca_tags
        assert len(hdca["elements"]) == 1, hdca
        element0 = hdca["elements"][0]
        assert element0["element_identifier"] == "4.bed"
        dataset0 = element0["object"]
        assert dataset0["file_size"] == 61
        dataset_tags = dataset0["tags"]
        assert len(dataset_tags) == 3, dataset0

    def test_upload_nested(self):
        elements = [{"name": "samp1", "elements": [{"src": "files", "dbkey": "hg19", "info": "my cool bed"}]}]
        targets = [{
            "destination": {"type": "hdca"},
            "elements": elements,
            "collection_type": "list:list",
            "name": "Test upload",
        }]
        payload = {
            "history_id": self.history_id,
            "targets": json.dumps(targets),
            "__files": {"files_0|file_data": open(self.test_data_resolver.get_filename("4.bed"))},
        }
        self.dataset_populator.fetch(payload)
        hdca = self._assert_one_collection_created_in_history()
        self.assertEqual(hdca["name"], "Test upload")
        assert len(hdca["elements"]) == 1, hdca
        element0 = hdca["elements"][0]
        assert element0["element_identifier"] == "samp1"

    def test_upload_collection_from_url(self):
        elements = [{"src": "url", "url": "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/4.bed", "info": "my cool bed"}]
        targets = [{
            "destination": {"type": "hdca"},
            "elements": elements,
            "collection_type": "list",
        }]
        payload = {
            "history_id": self.history_id,
            "targets": json.dumps(targets),
            "__files": {"files_0|file_data": open(self.test_data_resolver.get_filename("4.bed"))},
        }
        self.dataset_populator.fetch(payload)
        hdca = self._assert_one_collection_created_in_history()
        assert len(hdca["elements"]) == 1, hdca
        element0 = hdca["elements"][0]
        assert element0["element_identifier"] == "4.bed"
        assert element0["object"]["file_size"] == 61

    def _assert_one_collection_created_in_history(self):
        contents_response = self._get("histories/%s/contents/dataset_collections" % self.history_id)
        self._assert_status_code_is(contents_response, 200)
        contents = contents_response.json()
        assert len(contents) == 1
        hdca = contents[0]
        assert hdca["history_content_type"] == "dataset_collection"
        hdca_id = hdca["id"]
        collection_response = self._get("histories/%s/contents/dataset_collections/%s" % (self.history_id, hdca_id))
        self._assert_status_code_is(collection_response, 200)
        return collection_response.json()

    def _check_create_response(self, create_response):
        self._assert_status_code_is(create_response, 200)
        dataset_collection = create_response.json()
        self._assert_has_keys(dataset_collection, "elements", "url", "name", "collection_type", "element_count")
        return dataset_collection

    def _download_dataset_collection(self, history_id, hdca_id):
        return self._get("histories/%s/contents/dataset_collections/%s/download" % (history_id, hdca_id))
Beispiel #29
0
class ToolsUploadTestCase(api.ApiTestCase):

    def setUp(self):
        super(ToolsUploadTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)

    def test_upload1_paste(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(history_id, 'Hello World')
            create_response = self._post("tools", data=payload)
            self._assert_has_keys(create_response.json(), 'outputs')

    def test_upload1_paste_bad_datatype(self):
        # Check that you get a nice message if you upload an incorrect datatype
        with self.dataset_populator.test_history() as history_id:
            file_type = "johnsawesomebutfakedatatype"
            payload = self.dataset_populator.upload_payload(history_id, 'Hello World', file_type=file_type)
            create = self._post("tools", data=payload).json()
            self._assert_has_keys(create, 'err_msg')
            assert file_type in create['err_msg']

    def test_upload_posix_newline_fixes(self):
        windows_content = ONE_TO_SIX_ON_WINDOWS
        result_content = self._upload_and_get_content(windows_content)
        self.assertEquals(result_content, ONE_TO_SIX_WITH_TABS)

    def test_upload_disable_posix_fix(self):
        windows_content = ONE_TO_SIX_ON_WINDOWS
        result_content = self._upload_and_get_content(windows_content, to_posix_lines=None)
        self.assertEquals(result_content, windows_content)

    def test_upload_tab_to_space(self):
        table = ONE_TO_SIX_WITH_SPACES
        result_content = self._upload_and_get_content(table, space_to_tab="Yes")
        self.assertEquals(result_content, ONE_TO_SIX_WITH_TABS)

    def test_upload_tab_to_space_off_by_default(self):
        table = ONE_TO_SIX_WITH_SPACES
        result_content = self._upload_and_get_content(table)
        self.assertEquals(result_content, table)

    @skip_without_datatype("rdata")
    def test_rdata_not_decompressed(self):
        # Prevent regression of https://github.com/galaxyproject/galaxy/issues/753
        rdata_path = TestDataResolver().get_filename("1.RData")
        rdata_metadata = self._upload_and_get_details(open(rdata_path, "rb"), file_type="auto")
        self.assertEquals(rdata_metadata["file_ext"], "rdata")

    @skip_without_datatype("velvet")
    def test_composite_datatype(self):
        with self.dataset_populator.test_history() as history_id:
            dataset = self._velvet_upload(history_id, extra_inputs={
                "files_1|url_paste": "roadmaps content",
                "files_1|type": "upload_dataset",
                "files_2|url_paste": "log content",
                "files_2|type": "upload_dataset",
            })

            roadmaps_content = self._get_roadmaps_content(history_id, dataset)
            assert roadmaps_content.strip() == "roadmaps content", roadmaps_content

    @skip_without_datatype("velvet")
    def test_composite_datatype_space_to_tab(self):
        # Like previous test but set one upload with space_to_tab to True to
        # verify that works.
        with self.dataset_populator.test_history() as history_id:
            dataset = self._velvet_upload(history_id, extra_inputs={
                "files_1|url_paste": "roadmaps content",
                "files_1|type": "upload_dataset",
                "files_1|space_to_tab": "Yes",
                "files_2|url_paste": "log content",
                "files_2|type": "upload_dataset",
            })

            roadmaps_content = self._get_roadmaps_content(history_id, dataset)
            assert roadmaps_content.strip() == "roadmaps\tcontent", roadmaps_content

    @skip_without_datatype("velvet")
    def test_composite_datatype_posix_lines(self):
        # Like previous test but set one upload with space_to_tab to True to
        # verify that works.
        with self.dataset_populator.test_history() as history_id:
            dataset = self._velvet_upload(history_id, extra_inputs={
                "files_1|url_paste": "roadmaps\rcontent",
                "files_1|type": "upload_dataset",
                "files_1|space_to_tab": "Yes",
                "files_2|url_paste": "log\rcontent",
                "files_2|type": "upload_dataset",
            })

            roadmaps_content = self._get_roadmaps_content(history_id, dataset)
            assert roadmaps_content.strip() == "roadmaps\ncontent", roadmaps_content

    def test_upload_dbkey(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(history_id, "Test123", dbkey="hg19")
            run_response = self.dataset_populator.tools_post(payload)
            self.dataset_populator.wait_for_tool_run(history_id, run_response)
            datasets = run_response.json()["outputs"]
            assert datasets[0].get("genome_build") == "hg19", datasets[0]

    def test_upload_multiple_files_1(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(history_id, "Test123",
                dbkey="hg19",
                extra_inputs={
                    "files_1|url_paste": "SecondOutputContent",
                    "files_1|NAME": "SecondOutputName",
                    "files_1|file_type": "tabular",
                    "files_1|dbkey": "hg18",
                    "file_count": "2",
                }
            )
            run_response = self.dataset_populator.tools_post(payload)
            self.dataset_populator.wait_for_tool_run(history_id, run_response)
            datasets = run_response.json()["outputs"]

            assert len(datasets) == 2, datasets
            content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[0])
            assert content.strip() == "Test123"
            assert datasets[0]["file_ext"] == "txt"
            assert datasets[0]["genome_build"] == "hg19", datasets

            content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[1])
            assert content.strip() == "SecondOutputContent"
            assert datasets[1]["file_ext"] == "tabular"
            assert datasets[1]["genome_build"] == "hg18", datasets

    def test_upload_multiple_files_2(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(history_id, "Test123",
                file_type="tabular",
                dbkey="hg19",
                extra_inputs={
                    "files_1|url_paste": "SecondOutputContent",
                    "files_1|NAME": "SecondOutputName",
                    "files_1|file_type": "txt",
                    "files_1|dbkey": "hg18",
                    "file_count": "2",
                }
            )
            run_response = self.dataset_populator.tools_post(payload)
            self.dataset_populator.wait_for_tool_run(history_id, run_response)
            datasets = run_response.json()["outputs"]

            assert len(datasets) == 2, datasets
            content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[0])
            assert content.strip() == "Test123"
            assert datasets[0]["file_ext"] == "tabular", datasets
            assert datasets[0]["genome_build"] == "hg19", datasets

            content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[1])
            assert content.strip() == "SecondOutputContent"
            assert datasets[1]["file_ext"] == "txt"
            assert datasets[1]["genome_build"] == "hg18", datasets

    def test_upload_multiple_files_3(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(history_id, "Test123",
                file_type="tabular",
                dbkey="hg19",
                extra_inputs={
                    "files_0|file_type": "txt",
                    "files_0|dbkey": "hg18",
                    "files_1|url_paste": "SecondOutputContent",
                    "files_1|NAME": "SecondOutputName",
                    "files_1|file_type": "txt",
                    "files_1|dbkey": "hg18",
                    "file_count": "2",
                }
            )
            run_response = self.dataset_populator.tools_post(payload)
            self.dataset_populator.wait_for_tool_run(history_id, run_response)
            datasets = run_response.json()["outputs"]

            assert len(datasets) == 2, datasets
            content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[0])
            assert content.strip() == "Test123"
            assert datasets[0]["file_ext"] == "txt", datasets
            assert datasets[0]["genome_build"] == "hg18", datasets

            content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[1])
            assert content.strip() == "SecondOutputContent"
            assert datasets[1]["file_ext"] == "txt"
            assert datasets[1]["genome_build"] == "hg18", datasets

    def test_upload_multiple_files_no_dbkey(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(history_id, "Test123",
                file_type="tabular",
                dbkey=None,
                extra_inputs={
                    "files_0|file_type": "txt",
                    "files_1|url_paste": "SecondOutputContent",
                    "files_1|NAME": "SecondOutputName",
                    "files_1|file_type": "txt",
                    "file_count": "2",
                }
            )
            run_response = self.dataset_populator.tools_post(payload)
            self.dataset_populator.wait_for_tool_run(history_id, run_response)
            datasets = run_response.json()["outputs"]

            assert len(datasets) == 2, datasets
            content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[0])
            assert content.strip() == "Test123"
            assert datasets[0]["file_ext"] == "txt", datasets
            assert datasets[0]["genome_build"] == "?", datasets

            content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[1])
            assert content.strip() == "SecondOutputContent"
            assert datasets[1]["file_ext"] == "txt"
            assert datasets[1]["genome_build"] == "?", datasets

    def test_upload_multiple_files_space_to_tab(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(history_id,
                content=ONE_TO_SIX_WITH_SPACES,
                file_type="tabular",
                dbkey="hg19",
                extra_inputs={
                    "files_0|file_type": "txt",
                    "files_0|space_to_tab": "Yes",
                    "files_1|url_paste": ONE_TO_SIX_WITH_SPACES,
                    "files_1|NAME": "SecondOutputName",
                    "files_1|file_type": "txt",
                    "files_2|url_paste": ONE_TO_SIX_WITH_SPACES,
                    "files_2|NAME": "ThirdOutputName",
                    "files_2|file_type": "txt",
                    "files_2|space_to_tab": "Yes",
                    "file_count": "3",
                }
            )
            run_response = self.dataset_populator.tools_post(payload)
            self.dataset_populator.wait_for_tool_run(history_id, run_response)
            datasets = run_response.json()["outputs"]

            assert len(datasets) == 3, datasets
            content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[0])
            assert content == ONE_TO_SIX_WITH_TABS

            content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[1])
            assert content == ONE_TO_SIX_WITH_SPACES

            content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[2])
            assert content == ONE_TO_SIX_WITH_TABS

    def test_multiple_files_posix_lines(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(history_id,
                content=ONE_TO_SIX_ON_WINDOWS,
                file_type="tabular",
                dbkey="hg19",
                extra_inputs={
                    "files_0|file_type": "txt",
                    "files_0|to_posix_lines": "Yes",
                    "files_1|url_paste": ONE_TO_SIX_ON_WINDOWS,
                    "files_1|NAME": "SecondOutputName",
                    "files_1|file_type": "txt",
                    "files_1|to_posix_lines": None,
                    "files_2|url_paste": ONE_TO_SIX_ON_WINDOWS,
                    "files_2|NAME": "ThirdOutputName",
                    "files_2|file_type": "txt",
                    "file_count": "3",
                }
            )
            run_response = self.dataset_populator.tools_post(payload)
            self.dataset_populator.wait_for_tool_run(history_id, run_response)
            datasets = run_response.json()["outputs"]

            assert len(datasets) == 3, datasets
            content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[0])
            assert content == ONE_TO_SIX_WITH_TABS

            content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[1])
            assert content == ONE_TO_SIX_ON_WINDOWS

            content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[2])
            assert content == ONE_TO_SIX_WITH_TABS

    def test_upload_from_invalid_url(self):
        history_id, new_dataset = self._upload('https://usegalaxy.org/bla123', assert_ok=False)
        dataset_details = self.dataset_populator.get_history_dataset_details(history_id, dataset_id=new_dataset["id"], assert_ok=False)
        assert dataset_details['state'] == 'error', "expected dataset state to be 'error', but got '%s'" % dataset_details['state']

    def test_upload_from_valid_url(self):
        history_id, new_dataset = self._upload('https://usegalaxy.org/api/version')
        self.dataset_populator.get_history_dataset_details(history_id, dataset_id=new_dataset["id"], assert_ok=True)

    def _velvet_upload(self, history_id, extra_inputs):
        payload = self.dataset_populator.upload_payload(
            history_id,
            "sequences content",
            file_type="velvet",
            extra_inputs=extra_inputs,
        )
        run_response = self.dataset_populator.tools_post(payload)
        self.dataset_populator.wait_for_tool_run(history_id, run_response)
        datasets = run_response.json()["outputs"]

        assert len(datasets) == 1
        dataset = datasets[0]

        return dataset

    def _get_roadmaps_content(self, history_id, dataset):
        roadmaps_content = self.dataset_populator.get_history_dataset_content(history_id, dataset=dataset, filename="Roadmaps")
        return roadmaps_content

    def _upload_and_get_content(self, content, **upload_kwds):
        history_id, new_dataset = self._upload(content, **upload_kwds)
        return self.dataset_populator.get_history_dataset_content(history_id, dataset=new_dataset)

    def _upload_and_get_details(self, content, **upload_kwds):
        history_id, new_dataset = self._upload(content, **upload_kwds)
        return self.dataset_populator.get_history_dataset_details(history_id, dataset=new_dataset)

    def _upload(self, content, **upload_kwds):
        history_id = self.dataset_populator.new_history()
        new_dataset = self.dataset_populator.new_dataset(history_id, content=content, **upload_kwds)
        self.dataset_populator.wait_for_history(history_id, assert_ok=upload_kwds.get("assert_ok", True))
        return history_id, new_dataset
Beispiel #30
0
class DatasetCollectionApiTestCase(api.ApiTestCase):
    def setUp(self):
        super(DatasetCollectionApiTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_collection_populator = DatasetCollectionPopulator(
            self.galaxy_interactor)
        self.history_id = self.dataset_populator.new_history()

    def test_create_pair_from_history(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id,
            instance_type="history",
        )
        create_response = self._post("dataset_collections", payload)
        dataset_collection = self._check_create_response(create_response)
        returned_datasets = dataset_collection["elements"]
        assert len(returned_datasets) == 2, dataset_collection

    def test_create_list_from_history(self):
        element_identifiers = self.dataset_collection_populator.list_identifiers(
            self.history_id)

        payload = dict(
            instance_type="history",
            history_id=self.history_id,
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )

        create_response = self._post("dataset_collections", payload)
        dataset_collection = self._check_create_response(create_response)
        returned_datasets = dataset_collection["elements"]
        assert len(returned_datasets) == 3, dataset_collection

    def test_create_list_of_existing_pairs(self):
        pair_payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id,
            instance_type="history",
        )
        pair_create_response = self._post("dataset_collections", pair_payload)
        dataset_collection = self._check_create_response(pair_create_response)
        hdca_id = dataset_collection["id"]

        element_identifiers = [dict(name="test1", src="hdca", id=hdca_id)]

        payload = dict(
            instance_type="history",
            history_id=self.history_id,
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )
        create_response = self._post("dataset_collections", payload)
        dataset_collection = self._check_create_response(create_response)
        returned_collections = dataset_collection["elements"]
        assert len(returned_collections) == 1, dataset_collection

    def test_create_list_of_new_pairs(self):
        pair_identifiers = self.dataset_collection_populator.pair_identifiers(
            self.history_id)
        element_identifiers = [
            dict(
                src="new_collection",
                name="test_pair",
                collection_type="paired",
                element_identifiers=pair_identifiers,
            )
        ]
        payload = dict(
            collection_type="list:paired",
            instance_type="history",
            history_id=self.history_id,
            name="a nested collection",
            element_identifiers=json.dumps(element_identifiers),
        )
        create_response = self._post("dataset_collections", payload)
        dataset_collection = self._check_create_response(create_response)
        assert dataset_collection["collection_type"] == "list:paired"
        assert dataset_collection["name"] == "a nested collection"
        returned_collections = dataset_collection["elements"]
        assert len(returned_collections) == 1, dataset_collection
        pair_1_element = returned_collections[0]
        self._assert_has_keys(pair_1_element, "element_index")
        pair_1_object = pair_1_element["object"]
        self._assert_has_keys(pair_1_object, "collection_type", "elements")
        self.assertEquals(pair_1_object["collection_type"], "paired")
        self.assertEquals(pair_1_object["populated"], True)
        pair_elements = pair_1_object["elements"]
        assert len(pair_elements) == 2
        pair_1_element_1 = pair_elements[0]
        assert pair_1_element_1["element_index"] == 0

    def test_hda_security(self):
        element_identifiers = self.dataset_collection_populator.pair_identifiers(
            self.history_id)

        with self._different_user():
            history_id = self.dataset_populator.new_history()
            payload = dict(
                instance_type="history",
                history_id=history_id,
                element_identifiers=json.dumps(element_identifiers),
                collection_type="paired",
            )

            self._post("dataset_collections", payload)
            # TODO: re-enable once there is a way to restrict access
            # to this dataset via the API.
            # self._assert_status_code_is( create_response, 403 )

    def test_enforces_unique_names(self):
        element_identifiers = self.dataset_collection_populator.list_identifiers(
            self.history_id)
        element_identifiers[2]["name"] = element_identifiers[0]["name"]
        payload = dict(
            instance_type="history",
            history_id=self.history_id,
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )

        create_response = self._post("dataset_collections", payload)
        self._assert_status_code_is(create_response, 400)

    def _check_create_response(self, create_response):
        self._assert_status_code_is(create_response, 200)
        dataset_collection = create_response.json()
        self._assert_has_keys(dataset_collection, "elements", "url", "name",
                              "collection_type")
        return dataset_collection
Beispiel #31
0
class HistoriesApiTestCase(api.ApiTestCase):
    def setUp(self):
        super(HistoriesApiTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_collection_populator = DatasetCollectionPopulator(
            self.galaxy_interactor)

    def test_create_history(self):
        # Create a history.
        create_response = self._create_history("TestHistory1")
        created_id = create_response["id"]

        # Make sure new history appears in index of user's histories.
        index_response = self._get("histories").json()
        indexed_history = [h for h in index_response
                           if h["id"] == created_id][0]
        self.assertEquals(indexed_history["name"], "TestHistory1")

    def test_show_history(self):
        history_id = self._create_history("TestHistoryForShow")["id"]
        show_response = self._show(history_id)
        self._assert_has_key(show_response, 'id', 'name', 'annotation', 'size',
                             'contents_url', 'state', 'state_details',
                             'state_ids')

        state_details = show_response["state_details"]
        state_ids = show_response["state_ids"]
        states = [
            'discarded', 'empty', 'error', 'failed_metadata', 'new', 'ok',
            'paused', 'queued', 'running', 'setting_metadata', 'upload'
        ]
        assert isinstance(state_details, dict)
        assert isinstance(state_ids, dict)
        self._assert_has_keys(state_details, *states)
        self._assert_has_keys(state_ids, *states)

    def test_show_most_recently_used(self):
        history_id = self._create_history("TestHistoryRecent")["id"]
        show_response = self._get("histories/most_recently_used").json()
        assert show_response["id"] == history_id

    def test_index_order(self):
        slightly_older_history_id = self._create_history(
            "TestHistorySlightlyOlder")["id"]
        newer_history_id = self._create_history("TestHistoryNewer")["id"]
        index_response = self._get("histories").json()
        assert index_response[0]["id"] == newer_history_id
        assert index_response[1]["id"] == slightly_older_history_id

    def test_delete(self):
        # Setup a history and ensure it is in the index
        history_id = self._create_history("TestHistoryForDelete")["id"]
        index_response = self._get("histories").json()
        assert index_response[0]["id"] == history_id

        show_response = self._show(history_id)
        assert not show_response["deleted"]

        # Delete the history
        self._delete("histories/%s" % history_id)

        # Check can view it - but it is deleted
        show_response = self._show(history_id)
        assert show_response["deleted"]

        # Verify it is dropped from history index
        index_response = self._get("histories").json()
        assert len(
            index_response) == 0 or index_response[0]["id"] != history_id

        # Add deleted filter to index to view it
        index_response = self._get("histories", {"deleted": "true"}).json()
        assert index_response[0]["id"] == history_id

    def test_purge(self):
        history_id = self._create_history("TestHistoryForPurge")["id"]
        data = {'purge': True}
        self._delete("histories/%s" % history_id, data=data)
        show_response = self._show(history_id)
        assert show_response["deleted"]
        assert show_response["purged"]

    def test_undelete(self):
        history_id = self._create_history(
            "TestHistoryForDeleteAndUndelete")["id"]
        self._delete("histories/%s" % history_id)
        self._post("histories/deleted/%s/undelete" % history_id)
        show_response = self._show(history_id)
        assert not show_response["deleted"]

    def test_update(self):
        history_id = self._create_history("TestHistoryForUpdating")["id"]

        self._update(history_id, {"name": "New Name"})
        show_response = self._show(history_id)
        assert show_response["name"] == "New Name"

        unicode_name = u'桜ゲノム'
        self._update(history_id, {"name": unicode_name})
        show_response = self._show(history_id)
        assert show_response["name"] == unicode_name, show_response

        quoted_name = "'MooCow'"
        self._update(history_id, {"name": quoted_name})
        show_response = self._show(history_id)
        assert show_response["name"] == quoted_name

        self._update(history_id, {"deleted": True})
        show_response = self._show(history_id)
        assert show_response["deleted"], show_response

        self._update(history_id, {"deleted": False})
        show_response = self._show(history_id)
        assert not show_response["deleted"]

        self._update(history_id, {"published": True})
        show_response = self._show(history_id)
        assert show_response["published"]

        self._update(history_id, {"genome_build": "hg18"})
        show_response = self._show(history_id)
        assert show_response["genome_build"] == "hg18"

        self._update(history_id, {"annotation": "The annotation is cool"})
        show_response = self._show(history_id)
        assert show_response["annotation"] == "The annotation is cool"

        self._update(history_id, {"annotation": unicode_name})
        show_response = self._show(history_id)
        assert show_response["annotation"] == unicode_name, show_response

        self._update(history_id, {"annotation": quoted_name})
        show_response = self._show(history_id)
        assert show_response["annotation"] == quoted_name

    def test_update_invalid_attribute(self):
        history_id = self._create_history(
            "TestHistoryForInvalidUpdating")["id"]
        put_response = self._update(history_id, {"invalidkey": "moo"})
        assert "invalidkey" not in put_response.json()

    def test_update_invalid_types(self):
        history_id = self._create_history(
            "TestHistoryForUpdatingInvalidTypes")["id"]
        for str_key in ["name", "annotation"]:
            assert self._update(history_id, {
                str_key: False
            }).status_code == 400

        for bool_key in ['deleted', 'importable', 'published']:
            assert self._update(history_id, {
                bool_key: "a string"
            }).status_code == 400

        assert self._update(history_id, {
            "tags": "a simple string"
        }).status_code == 400
        assert self._update(history_id, {"tags": [True]}).status_code == 400

    def test_invalid_keys(self):
        invalid_history_id = "1234123412341234"

        assert self._get("histories/%s" %
                         invalid_history_id).status_code == 400
        assert self._update(invalid_history_id, {
            "name": "new name"
        }).status_code == 400
        assert self._delete("histories/%s" %
                            invalid_history_id).status_code == 400
        assert self._post("histories/deleted/%s/undelete" %
                          invalid_history_id).status_code == 400

    def test_create_anonymous_fails(self):
        post_data = dict(name="CannotCreate")
        # Using lower-level _api_url will cause key to not be injected.
        histories_url = self._api_url("histories")
        create_response = post(url=histories_url, data=post_data)
        self._assert_status_code_is(create_response, 403)

    def test_import_export(self):
        history_name = "for_export"
        history_id = self.dataset_populator.new_history(name=history_name)
        self.dataset_populator.new_dataset(history_id, content="1 2 3")
        imported_history_id = self._reimport_history(history_id, history_name)

        contents_response = self._get("histories/%s/contents" %
                                      imported_history_id)
        self._assert_status_code_is(contents_response, 200)
        contents = contents_response.json()
        assert len(contents) == 1
        imported_content = self.dataset_populator.get_history_dataset_content(
            history_id=imported_history_id, dataset_id=contents[0]["id"])
        assert imported_content == "1 2 3\n"

    def test_import_export_collection(self):
        from nose.plugins.skip import SkipTest
        raise SkipTest("Collection import/export not yet implemented")

        history_name = "for_export_with_collections"
        history_id = self.dataset_populator.new_history(name=history_name)
        self.dataset_collection_populator.create_list_in_history(
            history_id, contents=["Hello", "World"])

        imported_history_id = self._reimport_history(history_id, history_name)

        contents_response = self._get("histories/%s/contents" %
                                      imported_history_id)
        self._assert_status_code_is(contents_response, 200)
        contents = contents_response.json()
        assert len(contents) == 3

    def _reimport_history(self, history_id, history_name):
        # Ensure the history is ready to go...
        self.dataset_populator.wait_for_history(history_id, assert_ok=True)

        # Export the history.
        download_path = self._export(history_id)

        # Create download for history
        full_download_url = "%s%s?key=%s" % (self.url, download_path,
                                             self.galaxy_interactor.api_key)
        download_response = get(full_download_url)
        self._assert_status_code_is(download_response, 200)

        def history_names():
            history_index = self._get("histories")
            return dict((h["name"], h) for h in history_index.json())

        import_name = "imported from archive: %s" % history_name
        assert import_name not in history_names()

        import_data = dict(archive_source=full_download_url,
                           archive_type="url")
        import_response = self._post("histories", data=import_data)

        self._assert_status_code_is(import_response, 200)

        def has_history_with_name():
            histories = history_names()
            return histories.get(import_name, None)

        imported_history = wait_on(has_history_with_name,
                                   desc="import history")
        imported_history_id = imported_history["id"]
        self.dataset_populator.wait_for_history(imported_history_id)

        return imported_history_id

    def test_create_tag(self):
        post_data = dict(name="TestHistoryForTag")
        history_id = self._post("histories", data=post_data).json()["id"]
        tag_data = dict(value="awesometagvalue")
        tag_url = "histories/%s/tags/awesometagname" % history_id
        tag_create_response = self._post(tag_url, data=tag_data)
        self._assert_status_code_is(tag_create_response, 200)

    def _export(self, history_id):
        export_url = self._api_url("histories/%s/exports" % history_id,
                                   use_key=True)
        put_response = put(export_url)
        self._assert_status_code_is(put_response, 202)

        def export_ready_response():
            put_response = put(export_url)
            if put_response.status_code == 202:
                return None
            return put_response

        put_response = wait_on(export_ready_response, desc="export ready")
        self._assert_status_code_is(put_response, 200)
        response = put_response.json()
        self._assert_has_keys(response, "download_url")
        download_path = response["download_url"]
        return download_path

    def _show(self, history_id):
        return self._get("histories/%s" % history_id).json()

    def _update(self, history_id, data):
        update_url = self._api_url("histories/%s" % history_id, use_key=True)
        put_response = put(update_url, json=data)
        return put_response

    def _create_history(self, name):
        post_data = dict(name=name)
        create_response = self._post("histories", data=post_data).json()
        self._assert_has_keys(create_response, "name", "id")
        self.assertEquals(create_response["name"], name)
        return create_response
class DatasetCollectionApiTestCase(api.ApiTestCase):
    def setUp(self):
        super(DatasetCollectionApiTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_collection_populator = DatasetCollectionPopulator(
            self.galaxy_interactor)
        self.history_id = self.dataset_populator.new_history()

    def test_create_pair_from_history(self):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id,
            instance_type="history",
        )
        create_response = self._post("dataset_collections", payload)
        dataset_collection = self._check_create_response(create_response)
        returned_datasets = dataset_collection["elements"]
        assert len(returned_datasets) == 2, dataset_collection

    def test_create_list_from_history(self):
        element_identifiers = self.dataset_collection_populator.list_identifiers(
            self.history_id)

        payload = dict(
            instance_type="history",
            history_id=self.history_id,
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )

        create_response = self._post("dataset_collections", payload)
        dataset_collection = self._check_create_response(create_response)
        returned_datasets = dataset_collection["elements"]
        assert len(returned_datasets) == 3, dataset_collection

    def test_create_list_of_existing_pairs(self):
        pair_payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id,
            instance_type="history",
        )
        pair_create_response = self._post("dataset_collections", pair_payload)
        dataset_collection = self._check_create_response(pair_create_response)
        hdca_id = dataset_collection["id"]

        element_identifiers = [dict(name="test1", src="hdca", id=hdca_id)]

        payload = dict(
            instance_type="history",
            history_id=self.history_id,
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )
        create_response = self._post("dataset_collections", payload)
        dataset_collection = self._check_create_response(create_response)
        returned_collections = dataset_collection["elements"]
        assert len(returned_collections) == 1, dataset_collection

    def test_create_list_of_new_pairs(self):
        pair_identifiers = self.dataset_collection_populator.pair_identifiers(
            self.history_id)
        element_identifiers = [
            dict(
                src="new_collection",
                name="test_pair",
                collection_type="paired",
                element_identifiers=pair_identifiers,
            )
        ]
        payload = dict(
            collection_type="list:paired",
            instance_type="history",
            history_id=self.history_id,
            name="a nested collection",
            element_identifiers=json.dumps(element_identifiers),
        )
        create_response = self._post("dataset_collections", payload)
        dataset_collection = self._check_create_response(create_response)
        assert dataset_collection["collection_type"] == "list:paired"
        assert dataset_collection["name"] == "a nested collection"
        returned_collections = dataset_collection["elements"]
        assert len(returned_collections) == 1, dataset_collection
        pair_1_element = returned_collections[0]
        self._assert_has_keys(pair_1_element, "element_index")
        pair_1_object = pair_1_element["object"]
        self._assert_has_keys(pair_1_object, "collection_type", "elements")
        self.assertEquals(pair_1_object["collection_type"], "paired")
        self.assertEquals(pair_1_object["populated"], True)
        pair_elements = pair_1_object["elements"]
        assert len(pair_elements) == 2
        pair_1_element_1 = pair_elements[0]
        assert pair_1_element_1["element_index"] == 0

    def test_list_download(self):
        dataset_collection = self.dataset_collection_populator.create_list_in_history(
            self.history_id).json()
        self.dataset_collection_populator.wait_for_dataset_collection(
            dataset_collection, assert_ok=True)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 3, dataset_collection
        create_response = self._download_dataset_collection(
            history_id=self.history_id, hdca_id=dataset_collection['id'])
        self._assert_status_code_is(create_response, 200)
        tar_contents = tarfile.open(fileobj=StringIO(create_response.content))
        namelist = tar_contents.getnames()
        assert len(namelist) == 3, "Expected 3 elements in [%s]" % namelist
        collection_name = dataset_collection['name']
        for element, zip_path in zip(returned_dce, namelist):
            assert "%s/%s.%s" % (collection_name,
                                 element['element_identifier'],
                                 element['object']['file_ext']) == zip_path

    def test_pair_download(self):
        dataset_collection = self.dataset_collection_populator.create_pair_in_history(
            self.history_id).json()
        self.dataset_collection_populator.wait_for_dataset_collection(
            dataset_collection, assert_ok=True)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 2, dataset_collection
        hdca_id = dataset_collection['id']
        create_response = self._download_dataset_collection(
            history_id=self.history_id, hdca_id=hdca_id)
        self._assert_status_code_is(create_response, 200)
        tar_contents = tarfile.open(fileobj=StringIO(create_response.content))
        namelist = tar_contents.getnames()
        assert len(namelist) == 2, "Expected 2 elements in [%s]" % namelist
        collection_name = dataset_collection['name']
        for element, zip_path in zip(returned_dce, namelist):
            assert "%s/%s.%s" % (collection_name,
                                 element['element_identifier'],
                                 element['object']['file_ext']) == zip_path

    def test_list_pair_download(self):
        dataset_collection = self.dataset_collection_populator.create_list_of_pairs_in_history(
            self.history_id).json()
        self.dataset_collection_populator.wait_for_dataset_collection(
            dataset_collection, assert_ok=True)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 1, dataset_collection
        list_collection_name = dataset_collection['name']
        pair = returned_dce[0]
        create_response = self._download_dataset_collection(
            history_id=self.history_id, hdca_id=dataset_collection['id'])
        self._assert_status_code_is(create_response, 200)
        tar_contents = tarfile.open(fileobj=StringIO(create_response.content))
        namelist = tar_contents.getnames()
        assert len(namelist) == 2, "Expected 2 elements in [%s]" % namelist
        pair_collection_name = pair['element_identifier']
        for element, zip_path in zip(pair['object']['elements'], namelist):
            assert "%s/%s/%s.%s" % (list_collection_name, pair_collection_name,
                                    element['element_identifier'],
                                    element['object']['file_ext']) == zip_path

    def test_list_list_download(self):
        dataset_collection = self.dataset_collection_populator.create_list_of_list_in_history(
            self.history_id).json()
        self.dataset_collection_populator.wait_for_dataset_collection(
            dataset_collection, assert_ok=True)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 1, dataset_collection
        create_response = self._download_dataset_collection(
            history_id=self.history_id, hdca_id=dataset_collection['id'])
        self._assert_status_code_is(create_response, 200)
        tar_contents = tarfile.open(fileobj=StringIO(create_response.content))
        namelist = tar_contents.getnames()
        assert len(namelist) == 3, "Expected 3 elements in [%s]" % namelist

    def test_list_list_list_download(self):
        dataset_collection = self.dataset_collection_populator.create_list_of_list_in_history(
            self.history_id, collection_type='list:list:list').json()
        self.dataset_collection_populator.wait_for_dataset_collection(
            dataset_collection, assert_ok=True)
        returned_dce = dataset_collection["elements"]
        assert len(returned_dce) == 1, dataset_collection
        create_response = self._download_dataset_collection(
            history_id=self.history_id, hdca_id=dataset_collection['id'])
        self._assert_status_code_is(create_response, 200)
        tar_contents = tarfile.open(fileobj=StringIO(create_response.content))
        namelist = tar_contents.getnames()
        assert len(namelist) == 3, "Expected 3 elements in [%s]" % namelist

    def test_hda_security(self):
        element_identifiers = self.dataset_collection_populator.pair_identifiers(
            self.history_id)

        with self._different_user():
            history_id = self.dataset_populator.new_history()
            payload = dict(
                instance_type="history",
                history_id=history_id,
                element_identifiers=json.dumps(element_identifiers),
                collection_type="paired",
            )

            self._post("dataset_collections", payload)
            # TODO: re-enable once there is a way to restrict access
            # to this dataset via the API.
            # self._assert_status_code_is( create_response, 403 )

    def test_enforces_unique_names(self):
        element_identifiers = self.dataset_collection_populator.list_identifiers(
            self.history_id)
        element_identifiers[2]["name"] = element_identifiers[0]["name"]
        payload = dict(
            instance_type="history",
            history_id=self.history_id,
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )

        create_response = self._post("dataset_collections", payload)
        self._assert_status_code_is(create_response, 400)

    def _check_create_response(self, create_response):
        self._assert_status_code_is(create_response, 200)
        dataset_collection = create_response.json()
        self._assert_has_keys(dataset_collection, "elements", "url", "name",
                              "collection_type")
        return dataset_collection

    def _download_dataset_collection(self, history_id, hdca_id):
        return self._get(
            "histories/%s/contents/dataset_collections/%s/download" %
            (history_id, hdca_id))
Beispiel #33
0
class JobsApiTestCase(api.ApiTestCase):

    def setUp(self):
        super(JobsApiTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)

    def test_index(self):
        # Create HDA to ensure at least one job exists...
        self.__history_with_new_dataset()
        jobs = self.__jobs_index()
        assert "upload1" in map(itemgetter("tool_id"), jobs)

    def test_system_details_admin_only(self):
        self.__history_with_new_dataset()
        jobs = self.__jobs_index(admin=False)
        job = jobs[0]
        self._assert_not_has_keys(job, "command_line", "external_id")

        jobs = self.__jobs_index(admin=True)
        job = jobs[0]
        self._assert_has_keys(job, "command_line", "external_id")

    def test_index_state_filter(self):
        # Initial number of ok jobs
        original_count = len(self.__uploads_with_state("ok"))
        # Run through dataset upload to ensure num uplaods at least greater
        # by 1.
        self.__history_with_ok_dataset()

        # Verify number of ok jobs is actually greater.
        count_increased = False
        for i in range(10):
            new_count = len(self.__uploads_with_state("ok"))
            if original_count < new_count:
                count_increased = True
                break
            time.sleep(.1)

        if not count_increased:
            template = "Jobs in ok state did not increase (was %d, now %d)"
            message = template % (original_count, new_count)
            raise AssertionError(message)

    def test_index_date_filter(self):
        self.__history_with_new_dataset()
        two_weeks_ago = (datetime.datetime.utcnow() - datetime.timedelta(7)).isoformat()
        last_week = (datetime.datetime.utcnow() - datetime.timedelta(7)).isoformat()
        next_week = (datetime.datetime.utcnow() + datetime.timedelta(7)).isoformat()
        today = datetime.datetime.utcnow().isoformat()
        tomorrow = (datetime.datetime.utcnow() + datetime.timedelta(1)).isoformat()

        jobs = self.__jobs_index(data={"date_range_min": today[0:10], "date_range_max": tomorrow[0:10]})
        assert len(jobs) > 0
        today_job_id = jobs[0]["id"]

        jobs = self.__jobs_index(data={"date_range_min": two_weeks_ago, "date_range_max": last_week})
        assert today_job_id not in map(itemgetter("id"), jobs)

        jobs = self.__jobs_index(data={"date_range_min": last_week, "date_range_max": next_week})
        assert today_job_id in map(itemgetter("id"), jobs)

    def test_index_history(self):
        history_id, _ = self.__history_with_new_dataset()
        jobs = self.__jobs_index(data={"history_id": history_id})
        assert len(jobs) > 0

        history_id = self.dataset_populator.new_history()
        jobs = self.__jobs_index(data={"history_id": history_id})
        assert len(jobs) == 0

    def test_index_multiple_states_filter(self):
        # Initial number of ok jobs
        original_count = len(self.__uploads_with_state("ok", "new"))

        # Run through dataset upload to ensure num uplaods at least greater
        # by 1.
        self.__history_with_ok_dataset()

        # Verify number of ok jobs is actually greater.
        new_count = len(self.__uploads_with_state("new", "ok"))
        assert original_count < new_count, new_count

    def test_show(self):
        # Create HDA to ensure at least one job exists...
        self.__history_with_new_dataset()

        jobs_response = self._get("jobs")
        first_job = jobs_response.json()[0]
        self._assert_has_key(first_job, 'id', 'state', 'exit_code', 'update_time', 'create_time')

        job_id = first_job["id"]
        show_jobs_response = self._get("jobs/%s" % job_id)
        self._assert_status_code_is(show_jobs_response, 200)

        job_details = show_jobs_response.json()
        self._assert_has_key(job_details, 'id', 'state', 'exit_code', 'update_time', 'create_time')

    def test_show_security(self):
        history_id, _ = self.__history_with_new_dataset()
        jobs_response = self._get("jobs", data={"history_id": history_id})
        job = jobs_response.json()[0]
        job_id = job["id"]

        show_jobs_response = self._get("jobs/%s" % job_id, admin=False)
        self._assert_not_has_keys(show_jobs_response.json(), "command_line", "external_id")

        # TODO: Re-activate test case when API accepts privacy settings
        # with self._different_user():
        #    show_jobs_response = self._get( "jobs/%s" % job_id, admin=False )
        #    self._assert_status_code_is( show_jobs_response, 200 )

        show_jobs_response = self._get("jobs/%s" % job_id, admin=True)
        self._assert_has_keys(show_jobs_response.json(), "command_line", "external_id")

    def test_deleting_output_keep_running_until_all_deleted(self):
        history_id, job_state, outputs = self._setup_running_two_output_job(120)

        self._hack_to_skip_test_if_state_ok(job_state)

        # Delete one of the two outputs and make sure the job is still running.
        self._raw_update_history_item(history_id, outputs[0]["id"], {"deleted": True})

        self._hack_to_skip_test_if_state_ok(job_state)

        time.sleep(1)

        self._hack_to_skip_test_if_state_ok(job_state)

        state = job_state().json()["state"]
        assert state == "running", state

        # Delete the second output and make sure the job is cancelled.
        self._raw_update_history_item(history_id, outputs[1]["id"], {"deleted": True})
        final_state = wait_on_state(job_state, assert_ok=False, timeout=15)
        assert final_state in ["deleted_new", "deleted"], final_state

    def test_purging_output_keep_running_until_all_purged(self):
        history_id, job_state, outputs = self._setup_running_two_output_job(120)

        # Pretty much right away after the job is running, these paths should be populated -
        # if they are grab them and make sure they are deleted at the end of the job.
        dataset_1 = self._get_history_item_as_admin(history_id, outputs[0]["id"])
        dataset_2 = self._get_history_item_as_admin(history_id, outputs[1]["id"])
        if "file_name" in dataset_1:
            output_dataset_paths = [dataset_1["file_name"], dataset_2["file_name"]]
            # This may or may not exist depending on if the test is local or not.
            output_dataset_paths_exist = os.path.exists(output_dataset_paths[0])
        else:
            output_dataset_paths = []
            output_dataset_paths_exist = False

        self._hack_to_skip_test_if_state_ok(job_state)

        current_state = job_state().json()["state"]
        assert current_state == "running", current_state

        # Purge one of the two outputs and make sure the job is still running.
        self._raw_update_history_item(history_id, outputs[0]["id"], {"purged": True})
        time.sleep(1)

        self._hack_to_skip_test_if_state_ok(job_state)

        current_state = job_state().json()["state"]
        assert current_state == "running", current_state

        # Purge the second output and make sure the job is cancelled.
        self._raw_update_history_item(history_id, outputs[1]["id"], {"purged": True})
        final_state = wait_on_state(job_state, assert_ok=False, timeout=15)
        assert final_state in ["deleted_new", "deleted"], final_state

        def paths_deleted():
            if not os.path.exists(output_dataset_paths[0]) and not os.path.exists(output_dataset_paths[1]):
                return True

        if output_dataset_paths_exist:
            wait_on(paths_deleted, "path deletion")

    def test_purging_output_cleaned_after_ok_run(self):
        history_id, job_state, outputs = self._setup_running_two_output_job(10)

        # Pretty much right away after the job is running, these paths should be populated -
        # if they are grab them and make sure they are deleted at the end of the job.
        dataset_1 = self._get_history_item_as_admin(history_id, outputs[0]["id"])
        dataset_2 = self._get_history_item_as_admin(history_id, outputs[1]["id"])
        if "file_name" in dataset_1:
            output_dataset_paths = [dataset_1["file_name"], dataset_2["file_name"]]
            # This may or may not exist depending on if the test is local or not.
            output_dataset_paths_exist = os.path.exists(output_dataset_paths[0])
        else:
            output_dataset_paths = []
            output_dataset_paths_exist = False

        if not output_dataset_paths_exist:
            # Given this Galaxy configuration - there is nothing more to be tested here.
            # Consider throwing a skip instead.
            return

        # Purge one of the two outputs and wait for the job to complete.
        self._raw_update_history_item(history_id, outputs[0]["id"], {"purged": True})
        wait_on_state(job_state, assert_ok=True)

        if output_dataset_paths_exist:
            time.sleep(.5)
            # Make sure the non-purged dataset is on disk and the purged one is not.
            assert os.path.exists(output_dataset_paths[1])
            assert not os.path.exists(output_dataset_paths[0])

    def _hack_to_skip_test_if_state_ok(self, job_state):
        from nose.plugins.skip import SkipTest
        if job_state().json()["state"] == "ok":
            message = "Job state switch from running to ok too quickly - the rest of the test requires the job to be in a running state. Skipping test."
            raise SkipTest(message)

    def _setup_running_two_output_job(self, sleep_time):
        history_id = self.dataset_populator.new_history()
        payload = self.dataset_populator.run_tool_payload(
            tool_id='create_2',
            inputs=dict(
                sleep_time=sleep_time,
            ),
            history_id=history_id,
        )
        run_response = self._post("tools", data=payload).json()
        outputs = run_response["outputs"]
        jobs = run_response["jobs"]

        assert len(outputs) == 2
        assert len(jobs) == 1

        def job_state():
            jobs_response = self._get("jobs/%s" % jobs[0]["id"])
            return jobs_response

        # Give job some time to get up and running.
        time.sleep(2)
        running_state = wait_on_state(job_state, skip_states=["queued", "new"], assert_ok=False, timeout=15)
        assert running_state == "running", running_state

        def job_state():
            jobs_response = self._get("jobs/%s" % jobs[0]["id"])
            return jobs_response

        return history_id, job_state, outputs

    def _raw_update_history_item(self, history_id, item_id, data):
        update_url = self._api_url("histories/%s/contents/%s" % (history_id, item_id), use_key=True)
        update_response = put(update_url, json=data)
        assert_status_code_is_ok(update_response)
        return update_response

    def _get_history_item_as_admin(self, history_id, item_id):
        response = self._get("histories/%s/contents/%s?view=detailed" % (history_id, item_id), admin=True)
        assert_status_code_is_ok(response)
        return response.json()

    def test_search(self):
        history_id, dataset_id = self.__history_with_ok_dataset()

        inputs = json.dumps(
            dict(
                input1=dict(
                    src='hda',
                    id=dataset_id,
                )
            )
        )
        search_payload = dict(
            tool_id="cat1",
            inputs=inputs,
            state="ok",
        )

        empty_search_response = self._post("jobs/search", data=search_payload)
        self._assert_status_code_is(empty_search_response, 200)
        self.assertEquals(len(empty_search_response.json()), 0)

        self.__run_cat_tool(history_id, dataset_id)
        self.dataset_populator.wait_for_history(history_id, assert_ok=True)

        search_count = -1
        # in case job and history aren't updated at exactly the same
        # time give time to wait
        for i in range(5):
            search_count = self._search_count(search_payload)
            if search_count == 1:
                break
            time.sleep(.1)

        self.assertEquals(search_count, 1)

    def _search_count(self, search_payload):
        search_response = self._post("jobs/search", data=search_payload)
        self._assert_status_code_is(search_response, 200)
        search_json = search_response.json()
        return len(search_json)

    def __run_cat_tool(self, history_id, dataset_id):
        # Code duplication with test_jobs.py, eliminate
        payload = self.dataset_populator.run_tool_payload(
            tool_id='cat1',
            inputs=dict(
                input1=dict(
                    src='hda',
                    id=dataset_id
                ),
            ),
            history_id=history_id,
        )
        self._post("tools", data=payload)

    def __run_randomlines_tool(self, lines, history_id, dataset_id):
        payload = self.dataset_populator.run_tool_payload(
            tool_id="random_lines1",
            inputs=dict(
                num_lines=lines,
                input=dict(
                    src='hda',
                    id=dataset_id,
                ),
            ),
            history_id=history_id,
        )
        self._post("tools", data=payload)

    def __uploads_with_state(self, *states):
        jobs_response = self._get("jobs", data=dict(state=states))
        self._assert_status_code_is(jobs_response, 200)
        jobs = jobs_response.json()
        assert not filter(lambda j: j["state"] not in states, jobs)
        return filter(lambda j: j["tool_id"] == "upload1", jobs)

    def __history_with_new_dataset(self):
        history_id = self.dataset_populator.new_history()
        dataset_id = self.dataset_populator.new_dataset(history_id)["id"]
        return history_id, dataset_id

    def __history_with_ok_dataset(self):
        history_id = self.dataset_populator.new_history()
        dataset_id = self.dataset_populator.new_dataset(history_id, wait=True)["id"]
        return history_id, dataset_id

    def __jobs_index(self, **kwds):
        jobs_response = self._get("jobs", **kwds)
        self._assert_status_code_is(jobs_response, 200)
        jobs = jobs_response.json()
        assert isinstance(jobs, list)
        return jobs
class DockerizedJobsIntegrationTestCase(integration_util.IntegrationTestCase, RunsEnvironmentJobs):

    framework_tool_and_types = True
    job_config_file = DOCKERIZED_JOB_CONFIG_FILE
    build_mulled_resolver = 'build_mulled'
    container_type = 'docker'
    default_container_home_dir = '/'

    @classmethod
    def handle_galaxy_config_kwds(cls, config):
        cls.jobs_directory = cls._test_driver.mkdtemp()
        config["jobs_directory"] = cls.jobs_directory
        config["job_config_file"] = cls.job_config_file
        # Disable tool dependency resolution.
        config["tool_dependency_dir"] = "none"
        config["conda_auto_init"] = False
        config["conda_auto_install"] = False
        config["enable_beta_mulled_containers"] = "true"

    @classmethod
    def setUpClass(cls):
        if not which(cls.container_type):
            raise unittest.SkipTest("Executable '%s' not found on PATH" % cls.container_type)
        super(DockerizedJobsIntegrationTestCase, cls).setUpClass()

    def setUp(self):
        super(DockerizedJobsIntegrationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.history_id = self.dataset_populator.new_history()

    def test_explicit(self):
        self.dataset_populator.run_tool("mulled_example_explicit", {}, self.history_id)
        self.dataset_populator.wait_for_history(self.history_id, assert_ok=True)
        output = self.dataset_populator.get_history_dataset_content(self.history_id, timeout=EXTENDED_TIMEOUT)
        assert "0.7.15-r1140" in output

    def test_mulled_simple(self):
        self.dataset_populator.run_tool("mulled_example_simple", {}, self.history_id)
        self.dataset_populator.wait_for_history(self.history_id, assert_ok=True)
        output = self.dataset_populator.get_history_dataset_content(self.history_id, timeout=EXTENDED_TIMEOUT)
        assert "0.7.15-r1140" in output

    def test_container_job_enviornment(self):
        job_env = self._run_and_get_environment_properties("job_environment_default")

        euid = os.geteuid()
        egid = os.getgid()

        assert job_env.user_id == str(euid), job_env.user_id
        assert job_env.group_id == str(egid), job_env.group_id
        assert job_env.pwd.startswith(self.jobs_directory)
        assert job_env.pwd.endswith("/working")
        assert job_env.home.startswith(self.jobs_directory)
        assert job_env.home.endswith("/home")

    def test_container_job_environment_legacy(self):
        job_env = self._run_and_get_environment_properties("job_environment_default_legacy")

        euid = os.geteuid()
        egid = os.getgid()

        assert job_env.user_id == str(euid), job_env.user_id
        assert job_env.group_id == str(egid), job_env.group_id
        assert job_env.pwd.startswith(self.jobs_directory)
        assert job_env.pwd.endswith("/working")
        # Should we change env_pass_through to just always include TMP and HOME for docker?
        # I'm not sure, if yes this would change.
        assert job_env.home == self.default_container_home_dir, job_env.home

    def test_build_mulled(self):
        if not which('docker'):
            raise unittest.SkipTest("Docker not found on PATH, required for building images via involucro")
        resolver_type = self.build_mulled_resolver
        tool_id = 'mulled_example_multi_1'
        endpoint = "tools/%s/dependencies" % tool_id
        data = {'id': tool_id, 'resolver_type': resolver_type}
        create_response = self._post(endpoint, data=data, admin=True)
        self._assert_status_code_is(create_response, 200)
        response = create_response.json()
        assert any([True for d in response if d['dependency_type'] == self.container_type])
Beispiel #35
0
class ToolsUploadTestCase(api.ApiTestCase):
    def setUp(self):
        super(ToolsUploadTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)

    def test_upload1_paste(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(
                history_id, 'Hello World')
            create_response = self._post("tools", data=payload)
            self._assert_has_keys(create_response.json(), 'outputs')

    def test_upload1_paste_bad_datatype(self):
        # Check that you get a nice message if you upload an incorrect datatype
        with self.dataset_populator.test_history() as history_id:
            file_type = "johnsawesomebutfakedatatype"
            payload = self.dataset_populator.upload_payload(
                history_id, 'Hello World', file_type=file_type)
            create = self._post("tools", data=payload).json()
            self._assert_has_keys(create, 'err_msg')
            assert file_type in create['err_msg']

    # upload1 rewrites content with posix lines by default but this can be disabled by setting
    # to_posix_lines=None in the request. Newer fetch API does not do this by default prefering
    # to keep content unaltered if possible but it can be enabled with a simple JSON boolean switch
    # of the same name (to_posix_lines).
    def test_upload_posix_newline_fixes_by_default(self):
        windows_content = ONE_TO_SIX_ON_WINDOWS
        result_content = self._upload_and_get_content(windows_content)
        self.assertEquals(result_content, ONE_TO_SIX_WITH_TABS)

    def test_fetch_posix_unaltered(self):
        windows_content = ONE_TO_SIX_ON_WINDOWS
        result_content = self._upload_and_get_content(windows_content,
                                                      api="fetch")
        self.assertEquals(result_content, ONE_TO_SIX_ON_WINDOWS)

    def test_upload_disable_posix_fix(self):
        windows_content = ONE_TO_SIX_ON_WINDOWS
        result_content = self._upload_and_get_content(windows_content,
                                                      to_posix_lines=None)
        self.assertEquals(result_content, windows_content)

    def test_fetch_post_lines_option(self):
        windows_content = ONE_TO_SIX_ON_WINDOWS
        result_content = self._upload_and_get_content(windows_content,
                                                      api="fetch",
                                                      to_posix_lines=True)
        self.assertEquals(result_content, ONE_TO_SIX_WITH_TABS)

    def test_upload_tab_to_space_off_by_default(self):
        table = ONE_TO_SIX_WITH_SPACES
        result_content = self._upload_and_get_content(table)
        self.assertEquals(result_content, table)

    def test_fetch_tab_to_space_off_by_default(self):
        table = ONE_TO_SIX_WITH_SPACES
        result_content = self._upload_and_get_content(table, api='fetch')
        self.assertEquals(result_content, table)

    def test_upload_tab_to_space(self):
        table = ONE_TO_SIX_WITH_SPACES
        result_content = self._upload_and_get_content(table,
                                                      space_to_tab="Yes")
        self.assertEquals(result_content, ONE_TO_SIX_WITH_TABS)

    def test_fetch_tab_to_space(self):
        table = ONE_TO_SIX_WITH_SPACES
        result_content = self._upload_and_get_content(table,
                                                      api="fetch",
                                                      space_to_tab=True)
        self.assertEquals(result_content, ONE_TO_SIX_WITH_TABS)

    def test_fetch_compressed_with_explicit_type(self):
        fastqgz_path = TestDataResolver().get_filename("1.fastqsanger.gz")
        with open(fastqgz_path, "rb") as fh:
            details = self._upload_and_get_details(fh,
                                                   api="fetch",
                                                   ext="fastqsanger.gz")
        assert details["state"] == "ok"
        assert details["file_ext"] == "fastqsanger.gz"

    def test_fetch_compressed_default(self):
        fastqgz_path = TestDataResolver().get_filename("1.fastqsanger.gz")
        with open(fastqgz_path, "rb") as fh:
            details = self._upload_and_get_details(fh,
                                                   api="fetch",
                                                   assert_ok=False)
        assert details["state"] == "ok"
        assert details["file_ext"] == "fastqsanger.gz", details

    @uses_test_history(require_new=True)
    def test_fetch_compressed_auto_decompress_target(self, history_id):
        # TODO: this should definitely be fixed to allow auto decompression via that API.
        fastqgz_path = TestDataResolver().get_filename("1.fastqsanger.gz")
        with open(fastqgz_path, "rb") as fh:
            details = self._upload_and_get_details(fh,
                                                   api="fetch",
                                                   history_id=history_id,
                                                   assert_ok=False,
                                                   auto_decompress=True)
        assert details["state"] == "ok"
        assert details["file_ext"] == "fastqsanger.gz", details

    def test_upload_decompress_off_with_auto_by_default(self):
        # UNSTABLE_FLAG: This might default to a bed.gz datatype in the future.
        bedgz_path = TestDataResolver().get_filename("4.bed.gz")
        with open(bedgz_path, "rb") as fh:
            details = self._upload_and_get_details(fh, file_type="auto")
        assert details["state"] == "ok"
        assert details["file_ext"] == "bed", details

    def test_upload_decompresses_if_uncompressed_type_selected(self):
        fastqgz_path = TestDataResolver().get_filename("1.fastqsanger.gz")
        with open(fastqgz_path, "rb") as fh:
            details = self._upload_and_get_details(fh, file_type="fastqsanger")
        assert details["state"] == "ok"
        assert details["file_ext"] == "fastqsanger", details
        assert details["file_size"] == 178, details

    def test_upload_decompress_off_if_compressed_type_selected(self):
        fastqgz_path = TestDataResolver().get_filename("1.fastqsanger.gz")
        with open(fastqgz_path, "rb") as fh:
            details = self._upload_and_get_details(fh,
                                                   file_type="fastqsanger.gz")
        assert details["state"] == "ok"
        assert details["file_ext"] == "fastqsanger.gz", details
        assert details["file_size"] == 161, details

    def test_upload_auto_decompress_off(self):
        # UNSTABLE_FLAG: This might default to a bed.gz datatype in the future.
        bedgz_path = TestDataResolver().get_filename("4.bed.gz")
        with open(bedgz_path, "rb") as fh:
            details = self._upload_and_get_details(fh,
                                                   file_type="auto",
                                                   assert_ok=False,
                                                   auto_decompress=False)
        assert details["file_ext"] == "binary", details

    @uses_test_history(require_new=True)
    def test_fetch_compressed_with_auto(self, history_id):
        # UNSTABLE_FLAG: This might default to a bed.gz datatype in the future.
        # TODO: this should definitely be fixed to allow auto decompression via that API.
        bedgz_path = TestDataResolver().get_filename("4.bed.gz")
        with open(bedgz_path, "rb") as fh:
            details = self._upload_and_get_details(fh,
                                                   api="fetch",
                                                   history_id=history_id,
                                                   auto_decompress=True,
                                                   assert_ok=False)
        assert details["state"] == "ok"
        assert details["file_ext"] == "bed"

    @skip_without_datatype("rdata")
    def test_rdata_not_decompressed(self):
        # Prevent regression of https://github.com/galaxyproject/galaxy/issues/753
        rdata_path = TestDataResolver().get_filename("1.RData")
        with open(rdata_path, "rb") as fh:
            rdata_metadata = self._upload_and_get_details(fh, file_type="auto")
        self.assertEquals(rdata_metadata["file_ext"], "rdata")

    @skip_without_datatype("csv")
    def test_csv_upload(self):
        csv_path = TestDataResolver().get_filename("1.csv")
        with open(csv_path, "rb") as fh:
            csv_metadata = self._upload_and_get_details(fh, file_type="csv")
        self.assertEquals(csv_metadata["file_ext"], "csv")

    @skip_without_datatype("csv")
    def test_csv_upload_auto(self):
        csv_path = TestDataResolver().get_filename("1.csv")
        with open(csv_path, "rb") as fh:
            csv_metadata = self._upload_and_get_details(fh, file_type="auto")
        self.assertEquals(csv_metadata["file_ext"], "csv")

    @skip_without_datatype("csv")
    def test_csv_fetch(self):
        csv_path = TestDataResolver().get_filename("1.csv")
        with open(csv_path, "rb") as fh:
            csv_metadata = self._upload_and_get_details(fh,
                                                        api="fetch",
                                                        ext="csv",
                                                        to_posix_lines=True)
        self.assertEquals(csv_metadata["file_ext"], "csv")

    @skip_without_datatype("csv")
    def test_csv_sniff_fetch(self):
        csv_path = TestDataResolver().get_filename("1.csv")
        with open(csv_path, "rb") as fh:
            csv_metadata = self._upload_and_get_details(fh,
                                                        api="fetch",
                                                        ext="auto",
                                                        to_posix_lines=True)
        self.assertEquals(csv_metadata["file_ext"], "csv")

    @skip_without_datatype("tiff")
    def test_image_upload_auto(self):
        tiff_path = TestDataResolver().get_filename("1.tiff")
        with open(tiff_path, "rb") as fh:
            tiff_metadata = self._upload_and_get_details(fh, file_type="auto")
        self.assertEquals(tiff_metadata["file_ext"], "tiff")

    @skip_without_datatype("velvet")
    def test_composite_datatype(self):
        with self.dataset_populator.test_history() as history_id:
            dataset = self._velvet_upload(history_id,
                                          extra_inputs={
                                              "files_1|url_paste":
                                              "roadmaps content",
                                              "files_1|type": "upload_dataset",
                                              "files_2|url_paste":
                                              "log content",
                                              "files_2|type": "upload_dataset",
                                          })

            roadmaps_content = self._get_roadmaps_content(history_id, dataset)
            assert roadmaps_content.strip(
            ) == "roadmaps content", roadmaps_content

    @skip_without_datatype("velvet")
    def test_composite_datatype_space_to_tab(self):
        # Like previous test but set one upload with space_to_tab to True to
        # verify that works.
        with self.dataset_populator.test_history() as history_id:
            dataset = self._velvet_upload(history_id,
                                          extra_inputs={
                                              "files_1|url_paste":
                                              "roadmaps content",
                                              "files_1|type": "upload_dataset",
                                              "files_1|space_to_tab": "Yes",
                                              "files_2|url_paste":
                                              "log content",
                                              "files_2|type": "upload_dataset",
                                          })

            roadmaps_content = self._get_roadmaps_content(history_id, dataset)
            assert roadmaps_content.strip(
            ) == "roadmaps\tcontent", roadmaps_content

    @skip_without_datatype("velvet")
    def test_composite_datatype_posix_lines(self):
        # Like previous test but set one upload with space_to_tab to True to
        # verify that works.
        with self.dataset_populator.test_history() as history_id:
            dataset = self._velvet_upload(history_id,
                                          extra_inputs={
                                              "files_1|url_paste":
                                              "roadmaps\rcontent",
                                              "files_1|type": "upload_dataset",
                                              "files_1|space_to_tab": "Yes",
                                              "files_2|url_paste":
                                              "log\rcontent",
                                              "files_2|type": "upload_dataset",
                                          })

            roadmaps_content = self._get_roadmaps_content(history_id, dataset)
            assert roadmaps_content.strip(
            ) == "roadmaps\ncontent", roadmaps_content

    def test_upload_dbkey(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(history_id,
                                                            "Test123",
                                                            dbkey="hg19")
            run_response = self.dataset_populator.tools_post(payload)
            self.dataset_populator.wait_for_tool_run(history_id, run_response)
            datasets = run_response.json()["outputs"]
            assert datasets[0].get("genome_build") == "hg19", datasets[0]

    @uses_test_history(require_new=False)
    def test_fetch_bam_file(self, history_id):
        bam_path = TestDataResolver().get_filename("1.bam")
        with open(bam_path, "rb") as fh:
            details = self._upload_and_get_details(fh,
                                                   api="fetch",
                                                   history_id=history_id,
                                                   assert_ok=False)
        assert details["state"] == "ok"
        assert details["file_ext"] == "bam", details

    def test_upload_bam_file(self):
        bam_path = TestDataResolver().get_filename("1.bam")
        with open(bam_path, "rb") as fh:
            details = self._upload_and_get_details(fh, file_type="auto")
        assert details["state"] == "ok"
        assert details["file_ext"] == "bam", details

    def test_fetch_metadata(self):
        table = ONE_TO_SIX_WITH_SPACES
        details = self._upload_and_get_details(
            table,
            api='fetch',
            dbkey="hg19",
            info="cool upload",
            tags=["name:data", "group:type:paired-end"])
        assert details.get("genome_build") == "hg19"
        assert details.get("misc_info") == "cool upload", details
        tags = details.get("tags")
        assert len(tags) == 2, details
        assert "group:type:paired-end" in tags
        assert "name:data" in tags

    def test_upload_multiple_files_1(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(
                history_id,
                "Test123",
                dbkey="hg19",
                extra_inputs={
                    "files_1|url_paste": "SecondOutputContent",
                    "files_1|NAME": "SecondOutputName",
                    "files_1|file_type": "tabular",
                    "files_1|dbkey": "hg18",
                    "file_count": "2",
                })
            run_response = self.dataset_populator.tools_post(payload)
            self.dataset_populator.wait_for_tool_run(history_id, run_response)
            datasets = run_response.json()["outputs"]

            assert len(datasets) == 2, datasets
            content = self.dataset_populator.get_history_dataset_content(
                history_id, dataset=datasets[0])
            assert content.strip() == "Test123"
            assert datasets[0]["file_ext"] == "txt"
            assert datasets[0]["genome_build"] == "hg19", datasets

            content = self.dataset_populator.get_history_dataset_content(
                history_id, dataset=datasets[1])
            assert content.strip() == "SecondOutputContent"
            assert datasets[1]["file_ext"] == "tabular"
            assert datasets[1]["genome_build"] == "hg18", datasets

    def test_upload_multiple_files_2(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(
                history_id,
                "Test123",
                file_type="tabular",
                dbkey="hg19",
                extra_inputs={
                    "files_1|url_paste": "SecondOutputContent",
                    "files_1|NAME": "SecondOutputName",
                    "files_1|file_type": "txt",
                    "files_1|dbkey": "hg18",
                    "file_count": "2",
                })
            run_response = self.dataset_populator.tools_post(payload)
            self.dataset_populator.wait_for_tool_run(history_id, run_response)
            datasets = run_response.json()["outputs"]

            assert len(datasets) == 2, datasets
            content = self.dataset_populator.get_history_dataset_content(
                history_id, dataset=datasets[0])
            assert content.strip() == "Test123"
            assert datasets[0]["file_ext"] == "tabular", datasets
            assert datasets[0]["genome_build"] == "hg19", datasets

            content = self.dataset_populator.get_history_dataset_content(
                history_id, dataset=datasets[1])
            assert content.strip() == "SecondOutputContent"
            assert datasets[1]["file_ext"] == "txt"
            assert datasets[1]["genome_build"] == "hg18", datasets

    def test_upload_multiple_files_3(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(
                history_id,
                "Test123",
                file_type="tabular",
                dbkey="hg19",
                extra_inputs={
                    "files_0|file_type": "txt",
                    "files_0|dbkey": "hg18",
                    "files_1|url_paste": "SecondOutputContent",
                    "files_1|NAME": "SecondOutputName",
                    "files_1|file_type": "txt",
                    "files_1|dbkey": "hg18",
                    "file_count": "2",
                })
            run_response = self.dataset_populator.tools_post(payload)
            self.dataset_populator.wait_for_tool_run(history_id, run_response)
            datasets = run_response.json()["outputs"]

            assert len(datasets) == 2, datasets
            content = self.dataset_populator.get_history_dataset_content(
                history_id, dataset=datasets[0])
            assert content.strip() == "Test123"
            assert datasets[0]["file_ext"] == "txt", datasets
            assert datasets[0]["genome_build"] == "hg18", datasets

            content = self.dataset_populator.get_history_dataset_content(
                history_id, dataset=datasets[1])
            assert content.strip() == "SecondOutputContent"
            assert datasets[1]["file_ext"] == "txt"
            assert datasets[1]["genome_build"] == "hg18", datasets

    def test_upload_multiple_files_no_dbkey(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(
                history_id,
                "Test123",
                file_type="tabular",
                dbkey=None,
                extra_inputs={
                    "files_0|file_type": "txt",
                    "files_1|url_paste": "SecondOutputContent",
                    "files_1|NAME": "SecondOutputName",
                    "files_1|file_type": "txt",
                    "file_count": "2",
                })
            run_response = self.dataset_populator.tools_post(payload)
            self.dataset_populator.wait_for_tool_run(history_id, run_response)
            datasets = run_response.json()["outputs"]

            assert len(datasets) == 2, datasets
            content = self.dataset_populator.get_history_dataset_content(
                history_id, dataset=datasets[0])
            assert content.strip() == "Test123"
            assert datasets[0]["file_ext"] == "txt", datasets
            assert datasets[0]["genome_build"] == "?", datasets

            content = self.dataset_populator.get_history_dataset_content(
                history_id, dataset=datasets[1])
            assert content.strip() == "SecondOutputContent"
            assert datasets[1]["file_ext"] == "txt"
            assert datasets[1]["genome_build"] == "?", datasets

    def test_upload_multiple_files_space_to_tab(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(
                history_id,
                content=ONE_TO_SIX_WITH_SPACES,
                file_type="tabular",
                dbkey="hg19",
                extra_inputs={
                    "files_0|file_type": "txt",
                    "files_0|space_to_tab": "Yes",
                    "files_1|url_paste": ONE_TO_SIX_WITH_SPACES,
                    "files_1|NAME": "SecondOutputName",
                    "files_1|file_type": "txt",
                    "files_2|url_paste": ONE_TO_SIX_WITH_SPACES,
                    "files_2|NAME": "ThirdOutputName",
                    "files_2|file_type": "txt",
                    "files_2|space_to_tab": "Yes",
                    "file_count": "3",
                })
            run_response = self.dataset_populator.tools_post(payload)
            self.dataset_populator.wait_for_tool_run(history_id, run_response)
            datasets = run_response.json()["outputs"]

            assert len(datasets) == 3, datasets
            content = self.dataset_populator.get_history_dataset_content(
                history_id, dataset=datasets[0])
            assert content == ONE_TO_SIX_WITH_TABS

            content = self.dataset_populator.get_history_dataset_content(
                history_id, dataset=datasets[1])
            assert content == ONE_TO_SIX_WITH_SPACES

            content = self.dataset_populator.get_history_dataset_content(
                history_id, dataset=datasets[2])
            assert content == ONE_TO_SIX_WITH_TABS

    def test_multiple_files_posix_lines(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(
                history_id,
                content=ONE_TO_SIX_ON_WINDOWS,
                file_type="tabular",
                dbkey="hg19",
                extra_inputs={
                    "files_0|file_type": "txt",
                    "files_0|to_posix_lines": "Yes",
                    "files_1|url_paste": ONE_TO_SIX_ON_WINDOWS,
                    "files_1|NAME": "SecondOutputName",
                    "files_1|file_type": "txt",
                    "files_1|to_posix_lines": None,
                    "files_2|url_paste": ONE_TO_SIX_ON_WINDOWS,
                    "files_2|NAME": "ThirdOutputName",
                    "files_2|file_type": "txt",
                    "file_count": "3",
                })
            run_response = self.dataset_populator.tools_post(payload)
            self.dataset_populator.wait_for_tool_run(history_id, run_response)
            datasets = run_response.json()["outputs"]

            assert len(datasets) == 3, datasets
            content = self.dataset_populator.get_history_dataset_content(
                history_id, dataset=datasets[0])
            assert content == ONE_TO_SIX_WITH_TABS

            content = self.dataset_populator.get_history_dataset_content(
                history_id, dataset=datasets[1])
            assert content == ONE_TO_SIX_ON_WINDOWS

            content = self.dataset_populator.get_history_dataset_content(
                history_id, dataset=datasets[2])
            assert content == ONE_TO_SIX_WITH_TABS

    def test_upload_from_invalid_url(self):
        history_id, new_dataset = self._upload('https://usegalaxy.org/bla123',
                                               assert_ok=False)
        dataset_details = self.dataset_populator.get_history_dataset_details(
            history_id, dataset_id=new_dataset["id"], assert_ok=False)
        assert dataset_details[
            'state'] == 'error', "expected dataset state to be 'error', but got '%s'" % dataset_details[
                'state']

    def test_upload_from_valid_url(self):
        history_id, new_dataset = self._upload(
            'https://usegalaxy.org/api/version')
        self.dataset_populator.get_history_dataset_details(
            history_id, dataset_id=new_dataset["id"], assert_ok=True)

    def _velvet_upload(self, history_id, extra_inputs):
        payload = self.dataset_populator.upload_payload(
            history_id,
            "sequences content",
            file_type="velvet",
            extra_inputs=extra_inputs,
        )
        run_response = self.dataset_populator.tools_post(payload)
        self.dataset_populator.wait_for_tool_run(history_id, run_response)
        datasets = run_response.json()["outputs"]

        assert len(datasets) == 1
        dataset = datasets[0]

        return dataset

    def _get_roadmaps_content(self, history_id, dataset):
        roadmaps_content = self.dataset_populator.get_history_dataset_content(
            history_id, dataset=dataset, filename="Roadmaps")
        return roadmaps_content

    def _upload_and_get_content(self, content, **upload_kwds):
        history_id, new_dataset = self._upload(content, **upload_kwds)
        return self.dataset_populator.get_history_dataset_content(
            history_id, dataset=new_dataset)

    def _upload_and_get_details(self, content, **upload_kwds):
        history_id, new_dataset = self._upload(content, **upload_kwds)
        assert_ok = upload_kwds.get("assert_ok", True)
        return self.dataset_populator.get_history_dataset_details(
            history_id, dataset=new_dataset, assert_ok=assert_ok)

    def _upload(self, content, api="upload1", history_id=None, **upload_kwds):
        assert_ok = upload_kwds.get("assert_ok", True)
        history_id = history_id or self.dataset_populator.new_history()
        if api == "upload1":
            new_dataset = self.dataset_populator.new_dataset(history_id,
                                                             content=content,
                                                             **upload_kwds)
        else:
            assert api == "fetch"
            element = dict(src="files", **upload_kwds)
            target = {
                "destination": {
                    "type": "hdas"
                },
                "elements": [element],
            }
            targets = json.dumps([target])
            payload = {
                "history_id": history_id,
                "targets": targets,
                "__files": {
                    "files_0|file_data": content
                }
            }
            new_dataset = self.dataset_populator.fetch(
                payload, assert_ok=assert_ok).json()["outputs"][0]
        self.dataset_populator.wait_for_history(history_id,
                                                assert_ok=assert_ok)
        return history_id, new_dataset
class DatasetCollectionApiTestCase( api.ApiTestCase ):

    def setUp( self ):
        super( DatasetCollectionApiTestCase, self ).setUp()
        self.dataset_populator = DatasetPopulator( self.galaxy_interactor )
        self.dataset_collection_populator = DatasetCollectionPopulator( self.galaxy_interactor )
        self.history_id = self.dataset_populator.new_history()

    def test_create_pair_from_history( self ):
        payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id,
            instance_type="history",
        )
        create_response = self._post( "dataset_collections", payload )
        dataset_collection = self._check_create_response( create_response )
        returned_datasets = dataset_collection[ "elements" ]
        assert len( returned_datasets ) == 2, dataset_collection

    def test_create_list_from_history( self ):
        element_identifiers = self.dataset_collection_populator.list_identifiers( self.history_id )

        payload = dict(
            instance_type="history",
            history_id=self.history_id,
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )

        create_response = self._post( "dataset_collections", payload )
        dataset_collection = self._check_create_response( create_response )
        returned_datasets = dataset_collection[ "elements" ]
        assert len( returned_datasets ) == 3, dataset_collection

    def test_create_list_of_existing_pairs( self ):
        pair_payload = self.dataset_collection_populator.create_pair_payload(
            self.history_id,
            instance_type="history",
        )
        pair_create_response = self._post( "dataset_collections", pair_payload )
        dataset_collection = self._check_create_response( pair_create_response )
        hdca_id = dataset_collection[ "id" ]

        element_identifiers = [
            dict( name="test1", src="hdca", id=hdca_id )
        ]

        payload = dict(
            instance_type="history",
            history_id=self.history_id,
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )
        create_response = self._post( "dataset_collections", payload )
        dataset_collection = self._check_create_response( create_response )
        returned_collections = dataset_collection[ "elements" ]
        assert len( returned_collections ) == 1, dataset_collection

    def test_create_list_of_new_pairs( self ):
        pair_identifiers = self.dataset_collection_populator.pair_identifiers( self.history_id )
        element_identifiers = [ dict(
            src="new_collection",
            name="test_pair",
            collection_type="paired",
            element_identifiers=pair_identifiers,
        ) ]
        payload = dict(
            collection_type="list:paired",
            instance_type="history",
            history_id=self.history_id,
            name="a nested collection",
            element_identifiers=json.dumps( element_identifiers ),
        )
        create_response = self._post( "dataset_collections", payload )
        dataset_collection = self._check_create_response( create_response )
        assert dataset_collection[ "collection_type" ] == "list:paired"
        assert dataset_collection[ "name" ] == "a nested collection"
        returned_collections = dataset_collection[ "elements" ]
        assert len( returned_collections ) == 1, dataset_collection
        pair_1_element = returned_collections[ 0 ]
        self._assert_has_keys( pair_1_element, "element_index" )
        pair_1_object = pair_1_element[ "object" ]
        self._assert_has_keys( pair_1_object, "collection_type", "elements" )
        self.assertEquals( pair_1_object[ "collection_type" ], "paired" )
        self.assertEquals( pair_1_object[ "populated" ], True )
        pair_elements = pair_1_object[ "elements" ]
        assert len( pair_elements ) == 2
        pair_1_element_1 = pair_elements[ 0 ]
        assert pair_1_element_1[ "element_index" ] == 0

    def test_hda_security( self ):
        element_identifiers = self.dataset_collection_populator.pair_identifiers( self.history_id )

        with self._different_user( ):
            history_id = self.dataset_populator.new_history()
            payload = dict(
                instance_type="history",
                history_id=history_id,
                element_identifiers=json.dumps(element_identifiers),
                collection_type="paired",
            )

            self._post( "dataset_collections", payload )
            # TODO: re-enable once there is a way to restrict access
            # to this dataset via the API.
            # self._assert_status_code_is( create_response, 403 )

    def test_enforces_unique_names( self ):
        element_identifiers = self.dataset_collection_populator.list_identifiers( self.history_id )
        element_identifiers[ 2 ][ "name" ] = element_identifiers[ 0 ][ "name" ]
        payload = dict(
            instance_type="history",
            history_id=self.history_id,
            element_identifiers=json.dumps(element_identifiers),
            collection_type="list",
        )

        create_response = self._post( "dataset_collections", payload )
        self._assert_status_code_is( create_response, 400 )

    def _check_create_response( self, create_response ):
        self._assert_status_code_is( create_response, 200 )
        dataset_collection = create_response.json()
        self._assert_has_keys( dataset_collection, "elements", "url", "name", "collection_type" )
        return dataset_collection
class DockerizedJobsIntegrationTestCase(integration_util.IntegrationTestCase,
                                        RunsEnvironmentJobs):

    framework_tool_and_types = True

    @classmethod
    def handle_galaxy_config_kwds(cls, config):
        cls.jobs_directory = tempfile.mkdtemp()
        config["jobs_directory"] = cls.jobs_directory
        config["job_config_file"] = DOCKERIZED_JOB_CONFIG_FILE
        # Disable tool dependency resolution.
        config["tool_dependency_dir"] = "none"
        config["enable_beta_mulled_containers"] = "true"

    def setUp(self):
        super(DockerizedJobsIntegrationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.history_id = self.dataset_populator.new_history()

    def test_explicit(self):
        self.dataset_populator.run_tool("mulled_example_explicit", {},
                                        self.history_id)
        self.dataset_populator.wait_for_history(self.history_id,
                                                assert_ok=True)
        output = self.dataset_populator.get_history_dataset_content(
            self.history_id)
        assert "0.7.15-r1140" in output

    def test_mulled_simple(self):
        self.dataset_populator.run_tool("mulled_example_simple", {},
                                        self.history_id)
        self.dataset_populator.wait_for_history(self.history_id,
                                                assert_ok=True)
        output = self.dataset_populator.get_history_dataset_content(
            self.history_id)
        assert "0.7.15-r1140" in output

    def test_docker_job_enviornment(self):
        job_env = self._run_and_get_environment_properties(
            "job_environment_default")

        euid = os.geteuid()
        egid = os.getgid()

        assert job_env.user_id == str(euid), job_env.user_id
        assert job_env.group_id == str(egid), job_env.group_id
        assert job_env.pwd.startswith(self.jobs_directory)
        assert job_env.pwd.endswith("/working")
        assert job_env.home.startswith(self.jobs_directory)
        assert job_env.home.endswith("/home")

    def test_docker_job_environment_legacy(self):
        job_env = self._run_and_get_environment_properties(
            "job_environment_default_legacy")

        euid = os.geteuid()
        egid = os.getgid()

        assert job_env.user_id == str(euid), job_env.user_id
        assert job_env.group_id == str(egid), job_env.group_id
        assert job_env.pwd.startswith(self.jobs_directory)
        assert job_env.pwd.endswith("/working")
        # Should we change env_pass_through to just always include TMP and HOME for docker?
        # I'm not sure, if yes this would change.
        assert job_env.home == "/", job_env.home
Beispiel #38
0
class HistoriesApiTestCase(api.ApiTestCase):

    def setUp(self):
        super(HistoriesApiTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_collection_populator = DatasetCollectionPopulator(self.galaxy_interactor)

    def test_create_history(self):
        # Create a history.
        create_response = self._create_history("TestHistory1")
        created_id = create_response["id"]

        # Make sure new history appears in index of user's histories.
        index_response = self._get("histories").json()
        indexed_history = [h for h in index_response if h["id"] == created_id][0]
        self.assertEqual(indexed_history["name"], "TestHistory1")

    def test_show_history(self):
        history_id = self._create_history("TestHistoryForShow")["id"]
        show_response = self._show(history_id)
        self._assert_has_key(
            show_response,
            'id', 'name', 'annotation', 'size', 'contents_url',
            'state', 'state_details', 'state_ids'
        )

        state_details = show_response["state_details"]
        state_ids = show_response["state_ids"]
        states = [
            'discarded', 'empty', 'error', 'failed_metadata', 'new',
            'ok', 'paused', 'queued', 'running', 'setting_metadata', 'upload'
        ]
        assert isinstance(state_details, dict)
        assert isinstance(state_ids, dict)
        self._assert_has_keys(state_details, *states)
        self._assert_has_keys(state_ids, *states)

    def test_show_most_recently_used(self):
        history_id = self._create_history("TestHistoryRecent")["id"]
        show_response = self._get("histories/most_recently_used").json()
        assert show_response["id"] == history_id

    def test_index_order(self):
        slightly_older_history_id = self._create_history("TestHistorySlightlyOlder")["id"]
        newer_history_id = self._create_history("TestHistoryNewer")["id"]
        index_response = self._get("histories").json()
        assert index_response[0]["id"] == newer_history_id
        assert index_response[1]["id"] == slightly_older_history_id

    def test_delete(self):
        # Setup a history and ensure it is in the index
        history_id = self._create_history("TestHistoryForDelete")["id"]
        index_response = self._get("histories").json()
        assert index_response[0]["id"] == history_id

        show_response = self._show(history_id)
        assert not show_response["deleted"]

        # Delete the history
        self._delete("histories/%s" % history_id)

        # Check can view it - but it is deleted
        show_response = self._show(history_id)
        assert show_response["deleted"]

        # Verify it is dropped from history index
        index_response = self._get("histories").json()
        assert len(index_response) == 0 or index_response[0]["id"] != history_id

        # Add deleted filter to index to view it
        index_response = self._get("histories", {"deleted": "true"}).json()
        assert index_response[0]["id"] == history_id

    def test_purge(self):
        history_id = self._create_history("TestHistoryForPurge")["id"]
        data = {'purge': True}
        self._delete("histories/%s" % history_id, data=data)
        show_response = self._show(history_id)
        assert show_response["deleted"]
        assert show_response["purged"]

    def test_undelete(self):
        history_id = self._create_history("TestHistoryForDeleteAndUndelete")["id"]
        self._delete("histories/%s" % history_id)
        self._post("histories/deleted/%s/undelete" % history_id)
        show_response = self._show(history_id)
        assert not show_response["deleted"]

    def test_update(self):
        history_id = self._create_history("TestHistoryForUpdating")["id"]

        self._update(history_id, {"name": "New Name"})
        show_response = self._show(history_id)
        assert show_response["name"] == "New Name"

        unicode_name = u'桜ゲノム'
        self._update(history_id, {"name": unicode_name})
        show_response = self._show(history_id)
        assert show_response["name"] == unicode_name, show_response

        quoted_name = "'MooCow'"
        self._update(history_id, {"name": quoted_name})
        show_response = self._show(history_id)
        assert show_response["name"] == quoted_name

        self._update(history_id, {"deleted": True})
        show_response = self._show(history_id)
        assert show_response["deleted"], show_response

        self._update(history_id, {"deleted": False})
        show_response = self._show(history_id)
        assert not show_response["deleted"]

        self._update(history_id, {"published": True})
        show_response = self._show(history_id)
        assert show_response["published"]

        self._update(history_id, {"genome_build": "hg18"})
        show_response = self._show(history_id)
        assert show_response["genome_build"] == "hg18"

        self._update(history_id, {"annotation": "The annotation is cool"})
        show_response = self._show(history_id)
        assert show_response["annotation"] == "The annotation is cool"

        self._update(history_id, {"annotation": unicode_name})
        show_response = self._show(history_id)
        assert show_response["annotation"] == unicode_name, show_response

        self._update(history_id, {"annotation": quoted_name})
        show_response = self._show(history_id)
        assert show_response["annotation"] == quoted_name

    def test_update_invalid_attribute(self):
        history_id = self._create_history("TestHistoryForInvalidUpdating")["id"]
        put_response = self._update(history_id, {"invalidkey": "moo"})
        assert "invalidkey" not in put_response.json()

    def test_update_invalid_types(self):
        history_id = self._create_history("TestHistoryForUpdatingInvalidTypes")["id"]
        for str_key in ["name", "annotation"]:
            assert self._update(history_id, {str_key: False}).status_code == 400

        for bool_key in ['deleted', 'importable', 'published']:
            assert self._update(history_id, {bool_key: "a string"}).status_code == 400

        assert self._update(history_id, {"tags": "a simple string"}).status_code == 400
        assert self._update(history_id, {"tags": [True]}).status_code == 400

    def test_invalid_keys(self):
        invalid_history_id = "1234123412341234"

        assert self._get("histories/%s" % invalid_history_id).status_code == 400
        assert self._update(invalid_history_id, {"name": "new name"}).status_code == 400
        assert self._delete("histories/%s" % invalid_history_id).status_code == 400
        assert self._post("histories/deleted/%s/undelete" % invalid_history_id).status_code == 400

    def test_create_anonymous_fails(self):
        post_data = dict(name="CannotCreate")
        # Using lower-level _api_url will cause key to not be injected.
        histories_url = self._api_url("histories")
        create_response = post(url=histories_url, data=post_data)
        self._assert_status_code_is(create_response, 403)

    def test_import_export(self):
        history_name = "for_export_default"
        history_id = self.dataset_populator.new_history(name=history_name)
        self.dataset_populator.new_dataset(history_id, content="1 2 3")
        deleted_hda = self.dataset_populator.new_dataset(history_id, content="1 2 3")
        self.dataset_populator.delete_dataset(history_id, deleted_hda["id"])
        deleted_details = self.dataset_populator.get_history_dataset_details(history_id, id=deleted_hda["id"])
        assert deleted_details["deleted"]
        imported_history_id = self._reimport_history(history_id, history_name, wait_on_history_length=2)

        def upload_job_check(job):
            assert job["tool_id"] == "upload1"

        def check_discarded(hda):
            assert hda["deleted"]
            assert hda["state"] == "discarded", hda
            assert hda["purged"] is True

        self._check_imported_dataset(history_id=imported_history_id, hid=1, job_checker=upload_job_check)
        self._check_imported_dataset(history_id=imported_history_id, hid=2, has_job=False, hda_checker=check_discarded, job_checker=upload_job_check)

        imported_content = self.dataset_populator.get_history_dataset_content(
            history_id=imported_history_id,
            hid=1,
        )
        assert imported_content == "1 2 3\n"

    def test_import_1901_histories(self):
        f = open(self.test_data_resolver.get_filename("exports/1901_two_datasets.tgz"), 'rb')
        import_data = dict(archive_source='', archive_file=f)
        self._import_history_and_wait(import_data, "API Test History", wait_on_history_length=2)

    def test_import_export_include_deleted(self):
        history_name = "for_export_include_deleted"
        history_id = self.dataset_populator.new_history(name=history_name)
        self.dataset_populator.new_dataset(history_id, content="1 2 3")
        deleted_hda = self.dataset_populator.new_dataset(history_id, content="1 2 3")
        self.dataset_populator.delete_dataset(history_id, deleted_hda["id"])

        imported_history_id = self._reimport_history(history_id, history_name, wait_on_history_length=2, export_kwds={"include_deleted": "True"})
        self._assert_history_length(imported_history_id, 2)

        def upload_job_check(job):
            assert job["tool_id"] == "upload1"

        def check_ok(hda):
            assert hda["state"] == "ok", hda
            assert hda["deleted"] is True, hda

        self._check_imported_dataset(history_id=imported_history_id, hid=1, job_checker=upload_job_check)
        self._check_imported_dataset(history_id=imported_history_id, hid=2, hda_checker=check_ok, job_checker=upload_job_check)

        imported_content = self.dataset_populator.get_history_dataset_content(
            history_id=imported_history_id,
            hid=1,
        )
        assert imported_content == "1 2 3\n"

    def test_import_metadata_regeneration(self):
        history_name = "for_import_metadata_regeneration"
        history_id = self.dataset_populator.new_history(name=history_name)
        self.dataset_populator.new_dataset(history_id, content=open(self.test_data_resolver.get_filename("1.bam"), 'rb'), file_type='bam', wait=True)
        imported_history_id = self._reimport_history(history_id, history_name)
        self._assert_history_length(imported_history_id, 1)
        self._check_imported_dataset(history_id=imported_history_id, hid=1)
        import_bam_metadata = self.dataset_populator.get_history_dataset_details(
            history_id=imported_history_id,
            hid=1,
        )
        # The cleanup() method of the __IMPORT_HISTORY__ job (which is executed
        # after the job has entered its final state):
        # - creates a new dataset with 'ok' state and adds it to the history
        # - starts a __SET_METADATA__ job to regenerate the dataset metadata, if
        #   needed
        # We need to wait a bit for the creation of the __SET_METADATA__ job.
        time.sleep(1)
        self.dataset_populator.wait_for_history_jobs(imported_history_id, assert_ok=True)
        bai_metadata = import_bam_metadata["meta_files"][0]
        assert bai_metadata["file_type"] == "bam_index"
        api_url = bai_metadata["download_url"].split("api/", 1)[1]
        bai_response = self._get(api_url)
        self._assert_status_code_is(bai_response, 200)
        assert len(bai_response.content) > 4

    def test_import_export_collection(self):
        history_name = "for_export_with_collections"
        history_id = self.dataset_populator.new_history(name=history_name)
        self.dataset_collection_populator.create_list_in_history(history_id, contents=["Hello", "World"], direct_upload=True)

        imported_history_id = self._reimport_history(history_id, history_name, wait_on_history_length=3)
        self._assert_history_length(imported_history_id, 3)

        def check_elements(elements):
            assert len(elements) == 2
            element0 = elements[0]["object"]
            element1 = elements[1]["object"]
            for element in [element0, element1]:
                assert not element["visible"]
                assert not element["deleted"]
                assert element["state"] == "ok"

            assert element0["hid"] == 2
            assert element1["hid"] == 3

        self._check_imported_collection(imported_history_id, hid=1, collection_type="list", elements_checker=check_elements)

    def test_import_export_nested_collection(self):
        history_name = "for_export_with_nested_collections"
        history_id = self.dataset_populator.new_history(name=history_name)
        self.dataset_collection_populator.create_list_of_pairs_in_history(history_id)

        imported_history_id = self._reimport_history(history_id, history_name, wait_on_history_length=3)
        self._assert_history_length(imported_history_id, 3)

        def check_elements(elements):
            assert len(elements) == 1
            element0 = elements[0]["object"]
            self._assert_has_keys(element0, "elements", "collection_type")

            child_elements = element0["elements"]

            assert len(child_elements) == 2
            assert element0["collection_type"] == "paired"

        self._check_imported_collection(imported_history_id, hid=1, collection_type="list:paired", elements_checker=check_elements)

    def _reimport_history(self, history_id, history_name, wait_on_history_length=None, export_kwds={}):
        # Ensure the history is ready to go...
        self.dataset_populator.wait_for_history(history_id, assert_ok=True)

        return self.dataset_populator.reimport_history(
            history_id, history_name, wait_on_history_length=wait_on_history_length, export_kwds=export_kwds, url=self.url, api_key=self.galaxy_interactor.api_key
        )

    def _import_history_and_wait(self, import_data, history_name, wait_on_history_length=None):

        imported_history_id = self.dataset_populator.import_history_and_wait_for_name(import_data, history_name)

        if wait_on_history_length:
            self.dataset_populator.wait_on_history_length(imported_history_id, wait_on_history_length)

        return imported_history_id

    def _assert_history_length(self, history_id, n):
        contents_response = self._get("histories/%s/contents" % history_id)
        self._assert_status_code_is(contents_response, 200)
        contents = contents_response.json()
        assert len(contents) == n, contents

    def _check_imported_dataset(self, history_id, hid, has_job=True, hda_checker=None, job_checker=None):
        imported_dataset_metadata = self.dataset_populator.get_history_dataset_details(
            history_id=history_id,
            hid=hid,
        )
        assert imported_dataset_metadata["history_content_type"] == "dataset"
        assert imported_dataset_metadata["history_id"] == history_id

        if hda_checker is not None:
            hda_checker(imported_dataset_metadata)

        assert "creating_job" in imported_dataset_metadata
        job_id = imported_dataset_metadata["creating_job"]
        if has_job:
            assert job_id

            job_details = self.dataset_populator.get_job_details(job_id, full=True)
            assert job_details.status_code == 200, job_details.content
            job = job_details.json()
            assert 'history_id' in job, job
            assert job['history_id'] == history_id, job

            if job_checker is not None:
                job_checker(job)

    def _check_imported_collection(self, history_id, hid, collection_type=None, elements_checker=None):
        imported_collection_metadata = self.dataset_populator.get_history_collection_details(
            history_id=history_id,
            hid=hid,
        )
        assert imported_collection_metadata["history_content_type"] == "dataset_collection"
        assert imported_collection_metadata["history_id"] == history_id
        assert "collection_type" in imported_collection_metadata
        assert "elements" in imported_collection_metadata
        if collection_type is not None:
            assert imported_collection_metadata["collection_type"] == collection_type, imported_collection_metadata

        if elements_checker is not None:
            elements_checker(imported_collection_metadata["elements"])

    def test_create_tag(self):
        post_data = dict(name="TestHistoryForTag")
        history_id = self._post("histories", data=post_data).json()["id"]
        tag_data = dict(value="awesometagvalue")
        tag_url = "histories/%s/tags/awesometagname" % history_id
        tag_create_response = self._post(tag_url, data=tag_data)
        self._assert_status_code_is(tag_create_response, 200)

    def _show(self, history_id):
        return self._get("histories/%s" % history_id).json()

    def _update(self, history_id, data):
        update_url = self._api_url("histories/%s" % history_id, use_key=True)
        put_response = put(update_url, json=data)
        return put_response

    def _create_history(self, name):
        post_data = dict(name=name)
        create_response = self._post("histories", data=post_data).json()
        self._assert_has_keys(create_response, "name", "id")
        self.assertEqual(create_response["name"], name)
        return create_response
class ScriptsIntegrationTestCase(integration_util.IntegrationTestCase):

    def setUp(self):
        super(ScriptsIntegrationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.config_dir = tempfile.mkdtemp()

    @classmethod
    def handle_galaxy_config_kwds(cls, config):
        cls._raw_config = config

    def test_helper(self):
        script = "helper.py"
        self._scripts_check_argparse_help(script)

        history_id = self.dataset_populator.new_history()
        dataset = self.dataset_populator.new_dataset(history_id, wait=True)
        dataset_id = dataset["id"]
        config_file = self.write_config_file()
        output = self._scripts_check_output(script, ["-c", config_file, "--decode-id", dataset_id])
        assert "Decoded " in output

    def test_cleanup(self):
        script = "cleanup_datasets/cleanup_datasets.py"
        self._scripts_check_argparse_help(script)

        history_id = self.dataset_populator.new_history()
        delete_response = self.dataset_populator._delete("histories/%s" % history_id)
        assert delete_response.status_code == 200
        assert delete_response.json()["purged"] is False
        config_file = self.write_config_file()
        output = self._scripts_check_output(script, ["-c", config_file, "--days", "0", "--purge_histories"])
        print(output)
        history_response = self.dataset_populator._get("histories/%s" % history_id)
        assert history_response.status_code == 200
        assert history_response.json()["purged"] is True, history_response.json()

    def test_pgcleanup(self):
        self._skip_if_not_postgres()

        script = "cleanup_datasets/pgcleanup.py"
        self._scripts_check_argparse_help(script)

        history_id = self.dataset_populator.new_history()
        delete_response = self.dataset_populator._delete("histories/%s" % history_id)
        assert delete_response.status_code == 200
        assert delete_response.json()["purged"] is False
        config_file = self.write_config_file()
        output = self._scripts_check_output(script, ["-c", config_file, "--older-than", "0", "--sequence", "purge_deleted_histories"])
        print(output)
        history_response = self.dataset_populator._get("histories/%s" % history_id)
        assert history_response.status_code == 200
        assert history_response.json()["purged"] is True, history_response.json()

    def test_set_user_disk_usage(self):
        script = "set_user_disk_usage.py"
        self._scripts_check_argparse_help(script)

        history_id = self.dataset_populator.new_history()
        self.dataset_populator.new_dataset(history_id, wait=True)
        config_file = self.write_config_file()
        output = self._scripts_check_output(script, ["-c", config_file])
        # verify the script runs to completion without crashing
        assert "100% complete" in output, output

    def test_set_dataset_sizes(self):
        script = "set_dataset_sizes.py"
        self._scripts_check_argparse_help(script)

        # TODO: change the size of the dataset and verify this works.
        history_id = self.dataset_populator.new_history()
        self.dataset_populator.new_dataset(history_id, wait=True)
        config_file = self.write_config_file()
        output = self._scripts_check_output(script, ["-c", config_file])
        # verify the script runs to completion without crashing
        assert "Completed 100%" in output, output

    def test_populate_uuid(self):
        script = "cleanup_datasets/populate_uuid.py"
        self._scripts_check_argparse_help(script)

        history_id = self.dataset_populator.new_history()
        self.dataset_populator.new_dataset(history_id, wait=True)
        config_file = self.write_config_file()
        output = self._scripts_check_output(script, ["-c", config_file])
        assert "Complete" in output

    @integration_util.skip_if_jenkins
    def test_grt_export(self):
        script = "grt/export.py"
        self._scripts_check_argparse_help(script)

        history_id = self.dataset_populator.new_history()
        self.dataset_populator.new_dataset(history_id, wait=True)
        config_file = self.write_config_file()
        grt_config_file = os.path.join(self.config_dir, "grt.yml")
        with open(grt_config_file, "w") as f:
            yaml.dump({"grt": {"share_toolbox": True}, "sanitization": {"tools": []}, "tool_params": {}}, f)
        self._scripts_check_output(script, ["-c", config_file, "-g", grt_config_file, "-r", self.config_dir])
        report_files = os.listdir(self.config_dir)
        json_files = [j for j in report_files if j.endswith(".json")]
        assert len(json_files) == 1, "Expected one json report file in [%s]" % json_files
        json_file = os.path.join(self.config_dir, json_files[0])
        with open(json_file, "r") as f:
            export = json.load(f)
        assert export["version"] == 2

    def test_admin_cleanup_datasets(self):
        self._scripts_check_argparse_help("cleanup_datasets/admin_cleanup_datasets.py")

    @skip_unless_module("flask_socketio")
    def test_communication_server(self):
        self._scripts_check_argparse_help("communication/communication_server.py")

    def test_secret_decoder_ring(self):
        script = "secret_decoder_ring.py"
        self._scripts_check_argparse_help(script)

        config_file = self.write_config_file()
        output = self._scripts_check_output(script, ["-c", config_file, "encode", "1"])
        encoded_id = output.strip()

        output = self._scripts_check_output(script, ["-c", config_file, "decode", encoded_id])
        assert output.strip() == "1"

    def test_database_scripts(self):
        self._scripts_check_argparse_help("create_db.py")
        self._scripts_check_argparse_help("manage_db.py")
        # TODO: test creating a smaller database - e.g. tool install database based on fresh
        # config file.

    def test_galaxy_main(self):
        self._scripts_check_argparse_help("galaxy-main")

    def test_runtime_stats(self):
        self._skip_if_not_postgres()
        self._scripts_check_argparse_help("runtime_stats.py")

    def _skip_if_not_postgres(self):
        if not self._app.config.database_connection.startswith("post"):
            raise unittest.SkipTest("Test only valid for postgres")

    def _scripts_check_argparse_help(self, script):
        # Test imports and argparse response to --help with 0 exit code.
        output = self._scripts_check_output(script, ["--help"])
        # Test -h, --help in printed output message.
        assert "-h, --help" in output

    def _scripts_check_output(self, script, args):
        cwd = galaxy_directory()
        cmd = ["python", os.path.join(cwd, "scripts", script)] + args
        clean_env = {
            "PATH": os.environ.get("PATH", None),
        }  # Don't let testing environment variables interfere with config.
        return unicodify(subprocess.check_output(cmd, cwd=cwd, env=clean_env))

    def write_config_file(self):
        config_dir = self.config_dir
        path = os.path.join(config_dir, "galaxy.yml")
        self._test_driver.temp_directories.extend([config_dir])
        with open(path, "w") as f:
            yaml.dump({"galaxy": self._raw_config}, f)

        return path
class LibrariesApiTestCase(api.ApiTestCase, TestsDatasets):
    def setUp(self):
        super(LibrariesApiTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_collection_populator = DatasetCollectionPopulator(
            self.galaxy_interactor)
        self.library_populator = LibraryPopulator(self)

    def test_create(self):
        data = dict(name="CreateTestLibrary")
        create_response = self._post("libraries", data=data, admin=True)
        self._assert_status_code_is(create_response, 200)
        library = create_response.json()
        self._assert_has_keys(library, "name")
        assert library["name"] == "CreateTestLibrary"

    def test_delete(self):
        library = self.library_populator.new_library("DeleteTestLibrary")
        create_response = self._delete("libraries/%s" % library["id"],
                                       admin=True)
        self._assert_status_code_is(create_response, 200)
        library = create_response.json()
        self._assert_has_keys(library, "deleted")
        assert library["deleted"] is True
        # Test undeleting
        data = dict(undelete='true')
        create_response = self._delete("libraries/%s" % library["id"],
                                       data=data,
                                       admin=True)
        library = create_response.json()
        self._assert_status_code_is(create_response, 200)
        assert library["deleted"] is False

    def test_nonadmin(self):
        # Anons can't create libs
        data = dict(name="CreateTestLibrary")
        create_response = self._post("libraries",
                                     data=data,
                                     admin=False,
                                     anon=True)
        self._assert_status_code_is(create_response, 403)
        # Anons can't delete libs
        library = self.library_populator.new_library("AnonDeleteTestLibrary")
        create_response = self._delete("libraries/%s" % library["id"],
                                       admin=False,
                                       anon=True)
        self._assert_status_code_is(create_response, 403)
        # Anons can't update libs
        data = dict(name="ChangedName",
                    description="ChangedDescription",
                    synopsis='ChangedSynopsis')
        create_response = self._patch("libraries/%s" % library["id"],
                                      data=data,
                                      admin=False,
                                      anon=True)
        self._assert_status_code_is(create_response, 403)

    def test_update(self):
        library = self.library_populator.new_library("UpdateTestLibrary")
        data = dict(name='ChangedName',
                    description='ChangedDescription',
                    synopsis='ChangedSynopsis')
        create_response = self._patch("libraries/%s" % library["id"],
                                      data=data,
                                      admin=True)
        self._assert_status_code_is(create_response, 200)
        library = create_response.json()
        self._assert_has_keys(library, 'name', 'description', 'synopsis')
        assert library['name'] == 'ChangedName'
        assert library['description'] == 'ChangedDescription'
        assert library['synopsis'] == 'ChangedSynopsis'

    def test_create_private_library_permissions(self):
        library = self.library_populator.new_library("PermissionTestLibrary")
        library_id = library["id"]
        role_id = self.library_populator.user_private_role_id()
        self.library_populator.set_permissions(library_id, role_id)
        create_response = self._create_folder(library)
        self._assert_status_code_is(create_response, 200)

    def test_create_dataset_denied(self):
        library = self.library_populator.new_private_library(
            "ForCreateDatasets")
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        history_id = self.dataset_populator.new_history()
        hda_id = self.dataset_populator.new_dataset(history_id,
                                                    content="1 2 3")['id']
        with self._different_user():
            payload = {'from_hda_id': hda_id}
            create_response = self._post("folders/%s/contents" % folder_id,
                                         payload)
            self._assert_status_code_is(create_response, 403)

    def test_create_dataset(self):
        library = self.library_populator.new_private_library(
            "ForCreateDatasets")
        payload, files = self.library_populator.create_dataset_request(
            library, file_type="txt", contents="create_test")
        create_response = self._post("libraries/%s/contents" % library["id"],
                                     payload,
                                     files=files)
        self._assert_status_code_is(create_response, 200)
        library_datasets = create_response.json()
        assert len(library_datasets) == 1
        library_dataset = library_datasets[0]

        def show():
            return self._get("libraries/%s/contents/%s" %
                             (library["id"], library_dataset["id"]))

        wait_on_state(show, assert_ok=True)
        library_dataset = show().json()
        self._assert_has_keys(library_dataset, "peek", "data_type")
        assert library_dataset["peek"].find("create_test") >= 0
        assert library_dataset["file_ext"] == "txt", library_dataset[
            "file_ext"]

    def test_create_dataset_in_folder(self):
        library = self.library_populator.new_private_library(
            "ForCreateDatasets")
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        history_id = self.dataset_populator.new_history()
        hda_id = self.dataset_populator.new_dataset(history_id,
                                                    content="1 2 3")['id']
        payload = {'from_hda_id': hda_id}
        create_response = self._post("folders/%s/contents" % folder_id,
                                     payload)
        self._assert_status_code_is(create_response, 200)
        library_datasets = create_response.json()
        assert len(library_datasets) == 1

    def test_create_datasets_in_library_from_collection(self):
        library = self.library_populator.new_private_library(
            "ForCreateDatasetsFromCollection")
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        history_id = self.dataset_populator.new_history()
        hdca_id = self.dataset_collection_populator.create_list_in_history(
            history_id, contents=["xxx", "yyy"]).json()["id"]
        payload = {
            'from_hdca_id': hdca_id,
            'create_type': 'file',
            'folder_id': folder_id
        }
        create_response = self._post("libraries/%s/contents" % library['id'],
                                     payload)
        self._assert_status_code_is(create_response, 200)

    def test_create_datasets_in_folder_from_collection(self):
        library = self.library_populator.new_private_library(
            "ForCreateDatasetsFromCollection")
        history_id = self.dataset_populator.new_history()
        hdca_id = self.dataset_collection_populator.create_list_in_history(
            history_id, contents=["xxx", "yyy"]).json()["id"]
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        payload = {'from_hdca_id': hdca_id}
        create_response = self._post("folders/%s/contents" % folder_id,
                                     payload)
        self._assert_status_code_is(create_response, 200)
        assert len(create_response.json()) == 2
        # Also test that anything different from a flat dataset collection list
        # is refused
        hdca_pair_id = self.dataset_collection_populator.create_list_of_pairs_in_history(
            history_id).json()['id']
        payload = {'from_hdca_id': hdca_pair_id}
        create_response = self._post("folders/%s/contents" % folder_id,
                                     payload)
        self._assert_status_code_is(create_response, 501)
        assert create_response.json(
        )['err_msg'] == 'Cannot add nested collections to library. Please flatten your collection first.'

    def _create_folder(self, library):
        create_data = dict(
            folder_id=library["root_folder_id"],
            create_type="folder",
            name="New Folder",
        )
        return self._post("libraries/%s/contents" % library["id"],
                          data=create_data)
class ToolsUploadTestCase(api.ApiTestCase):

    def setUp(self):
        super(ToolsUploadTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)

    def test_upload1_paste(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(history_id, 'Hello World')
            create_response = self._post("tools", data=payload)
            self._assert_has_keys(create_response.json(), 'outputs')

    def test_upload1_paste_bad_datatype(self):
        # Check that you get a nice message if you upload an incorrect datatype
        with self.dataset_populator.test_history() as history_id:
            file_type = "johnsawesomebutfakedatatype"
            payload = self.dataset_populator.upload_payload(history_id, 'Hello World', file_type=file_type)
            create = self._post("tools", data=payload).json()
            self._assert_has_keys(create, 'err_msg')
            assert file_type in create['err_msg']

    # upload1 rewrites content with posix lines by default but this can be disabled by setting
    # to_posix_lines=None in the request. Newer fetch API does not do this by default prefering
    # to keep content unaltered if possible but it can be enabled with a simple JSON boolean switch
    # of the same name (to_posix_lines).
    def test_upload_posix_newline_fixes_by_default(self):
        windows_content = ONE_TO_SIX_ON_WINDOWS
        result_content = self._upload_and_get_content(windows_content)
        self.assertEquals(result_content, ONE_TO_SIX_WITH_TABS)

    def test_fetch_posix_unaltered(self):
        windows_content = ONE_TO_SIX_ON_WINDOWS
        result_content = self._upload_and_get_content(windows_content, api="fetch")
        self.assertEquals(result_content, ONE_TO_SIX_ON_WINDOWS)

    def test_upload_disable_posix_fix(self):
        windows_content = ONE_TO_SIX_ON_WINDOWS
        result_content = self._upload_and_get_content(windows_content, to_posix_lines=None)
        self.assertEquals(result_content, windows_content)

    def test_fetch_post_lines_option(self):
        windows_content = ONE_TO_SIX_ON_WINDOWS
        result_content = self._upload_and_get_content(windows_content, api="fetch", to_posix_lines=True)
        self.assertEquals(result_content, ONE_TO_SIX_WITH_TABS)

    def test_upload_tab_to_space_off_by_default(self):
        table = ONE_TO_SIX_WITH_SPACES
        result_content = self._upload_and_get_content(table)
        self.assertEquals(result_content, table)

    def test_fetch_tab_to_space_off_by_default(self):
        table = ONE_TO_SIX_WITH_SPACES
        result_content = self._upload_and_get_content(table, api='fetch')
        self.assertEquals(result_content, table)

    def test_upload_tab_to_space(self):
        table = ONE_TO_SIX_WITH_SPACES
        result_content = self._upload_and_get_content(table, space_to_tab="Yes")
        self.assertEquals(result_content, ONE_TO_SIX_WITH_TABS)

    def test_fetch_tab_to_space(self):
        table = ONE_TO_SIX_WITH_SPACES
        result_content = self._upload_and_get_content(table, api="fetch", space_to_tab=True)
        self.assertEquals(result_content, ONE_TO_SIX_WITH_TABS)

    def test_fetch_compressed_with_explicit_type(self):
        fastqgz_path = TestDataResolver().get_filename("1.fastqsanger.gz")
        with open(fastqgz_path, "rb") as fh:
            details = self._upload_and_get_details(fh, api="fetch", ext="fastqsanger.gz")
        assert details["state"] == "ok"
        assert details["file_ext"] == "fastqsanger.gz"

    def test_fetch_compressed_default(self):
        fastqgz_path = TestDataResolver().get_filename("1.fastqsanger.gz")
        with open(fastqgz_path, "rb") as fh:
            details = self._upload_and_get_details(fh, api="fetch", assert_ok=False)
        assert details["state"] == "ok"
        assert details["file_ext"] == "fastqsanger.gz", details

    @uses_test_history(require_new=True)
    def test_fetch_compressed_auto_decompress_target(self, history_id):
        # TODO: this should definitely be fixed to allow auto decompression via that API.
        fastqgz_path = TestDataResolver().get_filename("1.fastqsanger.gz")
        with open(fastqgz_path, "rb") as fh:
            details = self._upload_and_get_details(fh,
                                                   api="fetch",
                                                   history_id=history_id,
                                                   assert_ok=False,
                                                   auto_decompress=True)
        assert details["state"] == "ok"
        assert details["file_ext"] == "fastqsanger.gz", details

    def test_upload_decompress_off_with_auto_by_default(self):
        # UNSTABLE_FLAG: This might default to a bed.gz datatype in the future.
        bedgz_path = TestDataResolver().get_filename("4.bed.gz")
        with open(bedgz_path, "rb") as fh:
            details = self._upload_and_get_details(fh, file_type="auto")
        assert details["state"] == "ok"
        assert details["file_ext"] == "bed", details

    def test_upload_decompresses_if_uncompressed_type_selected(self):
        fastqgz_path = TestDataResolver().get_filename("1.fastqsanger.gz")
        with open(fastqgz_path, "rb") as fh:
            details = self._upload_and_get_details(fh, file_type="fastqsanger")
        assert details["state"] == "ok"
        assert details["file_ext"] == "fastqsanger", details
        assert details["file_size"] == 178, details

    def test_upload_decompress_off_if_compressed_type_selected(self):
        fastqgz_path = TestDataResolver().get_filename("1.fastqsanger.gz")
        with open(fastqgz_path, "rb") as fh:
            details = self._upload_and_get_details(fh, file_type="fastqsanger.gz")
        assert details["state"] == "ok"
        assert details["file_ext"] == "fastqsanger.gz", details
        assert details["file_size"] == 161, details

    def test_upload_auto_decompress_off(self):
        # UNSTABLE_FLAG: This might default to a bed.gz datatype in the future.
        bedgz_path = TestDataResolver().get_filename("4.bed.gz")
        with open(bedgz_path, "rb") as fh:
            details = self._upload_and_get_details(fh, file_type="auto", assert_ok=False, auto_decompress=False)
        assert details["file_ext"] == "binary", details

    @uses_test_history(require_new=True)
    def test_fetch_compressed_with_auto(self, history_id):
        # UNSTABLE_FLAG: This might default to a bed.gz datatype in the future.
        # TODO: this should definitely be fixed to allow auto decompression via that API.
        bedgz_path = TestDataResolver().get_filename("4.bed.gz")
        with open(bedgz_path, "rb") as fh:
            details = self._upload_and_get_details(fh,
                                                   api="fetch",
                                                   history_id=history_id,
                                                   auto_decompress=True,
                                                   assert_ok=False)
        assert details["state"] == "ok"
        assert details["file_ext"] == "bed"

    @skip_without_datatype("rdata")
    def test_rdata_not_decompressed(self):
        # Prevent regression of https://github.com/galaxyproject/galaxy/issues/753
        rdata_path = TestDataResolver().get_filename("1.RData")
        with open(rdata_path, "rb") as fh:
            rdata_metadata = self._upload_and_get_details(fh, file_type="auto")
        self.assertEquals(rdata_metadata["file_ext"], "rdata")

    @skip_without_datatype("csv")
    def test_csv_upload(self):
        csv_path = TestDataResolver().get_filename("1.csv")
        with open(csv_path, "rb") as fh:
            csv_metadata = self._upload_and_get_details(fh, file_type="csv")
        self.assertEquals(csv_metadata["file_ext"], "csv")

    @skip_without_datatype("csv")
    def test_csv_upload_auto(self):
        csv_path = TestDataResolver().get_filename("1.csv")
        with open(csv_path, "rb") as fh:
            csv_metadata = self._upload_and_get_details(fh, file_type="auto")
        self.assertEquals(csv_metadata["file_ext"], "csv")

    @skip_without_datatype("csv")
    def test_csv_fetch(self):
        csv_path = TestDataResolver().get_filename("1.csv")
        with open(csv_path, "rb") as fh:
            csv_metadata = self._upload_and_get_details(fh, api="fetch", ext="csv", to_posix_lines=True)
        self.assertEquals(csv_metadata["file_ext"], "csv")

    @skip_without_datatype("csv")
    def test_csv_sniff_fetch(self):
        csv_path = TestDataResolver().get_filename("1.csv")
        with open(csv_path, "rb") as fh:
            csv_metadata = self._upload_and_get_details(fh, api="fetch", ext="auto", to_posix_lines=True)
        self.assertEquals(csv_metadata["file_ext"], "csv")

    @skip_without_datatype("tiff")
    def test_image_upload_auto(self):
        tiff_path = TestDataResolver().get_filename("1.tiff")
        with open(tiff_path, "rb") as fh:
            tiff_metadata = self._upload_and_get_details(fh, file_type="auto")
        self.assertEquals(tiff_metadata["file_ext"], "tiff")

    @skip_without_datatype("velvet")
    def test_composite_datatype(self):
        with self.dataset_populator.test_history() as history_id:
            dataset = self._velvet_upload(history_id, extra_inputs={
                "files_1|url_paste": "roadmaps content",
                "files_1|type": "upload_dataset",
                "files_2|url_paste": "log content",
                "files_2|type": "upload_dataset",
            })

            roadmaps_content = self._get_roadmaps_content(history_id, dataset)
            assert roadmaps_content.strip() == "roadmaps content", roadmaps_content

    @skip_without_datatype("velvet")
    def test_composite_datatype_space_to_tab(self):
        # Like previous test but set one upload with space_to_tab to True to
        # verify that works.
        with self.dataset_populator.test_history() as history_id:
            dataset = self._velvet_upload(history_id, extra_inputs={
                "files_1|url_paste": "roadmaps content",
                "files_1|type": "upload_dataset",
                "files_1|space_to_tab": "Yes",
                "files_2|url_paste": "log content",
                "files_2|type": "upload_dataset",
            })

            roadmaps_content = self._get_roadmaps_content(history_id, dataset)
            assert roadmaps_content.strip() == "roadmaps\tcontent", roadmaps_content

    @skip_without_datatype("velvet")
    def test_composite_datatype_posix_lines(self):
        # Like previous test but set one upload with space_to_tab to True to
        # verify that works.
        with self.dataset_populator.test_history() as history_id:
            dataset = self._velvet_upload(history_id, extra_inputs={
                "files_1|url_paste": "roadmaps\rcontent",
                "files_1|type": "upload_dataset",
                "files_1|space_to_tab": "Yes",
                "files_2|url_paste": "log\rcontent",
                "files_2|type": "upload_dataset",
            })

            roadmaps_content = self._get_roadmaps_content(history_id, dataset)
            assert roadmaps_content.strip() == "roadmaps\ncontent", roadmaps_content

    def test_upload_dbkey(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(history_id, "Test123", dbkey="hg19")
            run_response = self.dataset_populator.tools_post(payload)
            self.dataset_populator.wait_for_tool_run(history_id, run_response)
            datasets = run_response.json()["outputs"]
            assert datasets[0].get("genome_build") == "hg19", datasets[0]

    @uses_test_history(require_new=False)
    def test_fetch_bam_file(self, history_id):
        bam_path = TestDataResolver().get_filename("1.bam")
        with open(bam_path, "rb") as fh:
            details = self._upload_and_get_details(fh,
                                                   api="fetch",
                                                   history_id=history_id,
                                                   assert_ok=False)
        assert details["state"] == "ok"
        assert details["file_ext"] == "bam", details

    def test_upload_bam_file(self):
        bam_path = TestDataResolver().get_filename("1.bam")
        with open(bam_path, "rb") as fh:
            details = self._upload_and_get_details(fh, file_type="auto")
        assert details["state"] == "ok"
        assert details["file_ext"] == "bam", details

    def test_fetch_metadata(self):
        table = ONE_TO_SIX_WITH_SPACES
        details = self._upload_and_get_details(table, api='fetch', dbkey="hg19", info="cool upload", tags=["name:data", "group:type:paired-end"])
        assert details.get("genome_build") == "hg19"
        assert details.get("misc_info") == "cool upload", details
        tags = details.get("tags")
        assert len(tags) == 2, details
        assert "group:type:paired-end" in tags
        assert "name:data" in tags

    def test_upload_multiple_files_1(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(history_id, "Test123",
                dbkey="hg19",
                extra_inputs={
                    "files_1|url_paste": "SecondOutputContent",
                    "files_1|NAME": "SecondOutputName",
                    "files_1|file_type": "tabular",
                    "files_1|dbkey": "hg18",
                    "file_count": "2",
                }
            )
            run_response = self.dataset_populator.tools_post(payload)
            self.dataset_populator.wait_for_tool_run(history_id, run_response)
            datasets = run_response.json()["outputs"]

            assert len(datasets) == 2, datasets
            content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[0])
            assert content.strip() == "Test123"
            assert datasets[0]["file_ext"] == "txt"
            assert datasets[0]["genome_build"] == "hg19", datasets

            content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[1])
            assert content.strip() == "SecondOutputContent"
            assert datasets[1]["file_ext"] == "tabular"
            assert datasets[1]["genome_build"] == "hg18", datasets

    def test_upload_multiple_files_2(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(history_id, "Test123",
                file_type="tabular",
                dbkey="hg19",
                extra_inputs={
                    "files_1|url_paste": "SecondOutputContent",
                    "files_1|NAME": "SecondOutputName",
                    "files_1|file_type": "txt",
                    "files_1|dbkey": "hg18",
                    "file_count": "2",
                }
            )
            run_response = self.dataset_populator.tools_post(payload)
            self.dataset_populator.wait_for_tool_run(history_id, run_response)
            datasets = run_response.json()["outputs"]

            assert len(datasets) == 2, datasets
            content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[0])
            assert content.strip() == "Test123"
            assert datasets[0]["file_ext"] == "tabular", datasets
            assert datasets[0]["genome_build"] == "hg19", datasets

            content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[1])
            assert content.strip() == "SecondOutputContent"
            assert datasets[1]["file_ext"] == "txt"
            assert datasets[1]["genome_build"] == "hg18", datasets

    def test_upload_multiple_files_3(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(history_id, "Test123",
                file_type="tabular",
                dbkey="hg19",
                extra_inputs={
                    "files_0|file_type": "txt",
                    "files_0|dbkey": "hg18",
                    "files_1|url_paste": "SecondOutputContent",
                    "files_1|NAME": "SecondOutputName",
                    "files_1|file_type": "txt",
                    "files_1|dbkey": "hg18",
                    "file_count": "2",
                }
            )
            run_response = self.dataset_populator.tools_post(payload)
            self.dataset_populator.wait_for_tool_run(history_id, run_response)
            datasets = run_response.json()["outputs"]

            assert len(datasets) == 2, datasets
            content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[0])
            assert content.strip() == "Test123"
            assert datasets[0]["file_ext"] == "txt", datasets
            assert datasets[0]["genome_build"] == "hg18", datasets

            content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[1])
            assert content.strip() == "SecondOutputContent"
            assert datasets[1]["file_ext"] == "txt"
            assert datasets[1]["genome_build"] == "hg18", datasets

    def test_upload_multiple_files_no_dbkey(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(history_id, "Test123",
                file_type="tabular",
                dbkey=None,
                extra_inputs={
                    "files_0|file_type": "txt",
                    "files_1|url_paste": "SecondOutputContent",
                    "files_1|NAME": "SecondOutputName",
                    "files_1|file_type": "txt",
                    "file_count": "2",
                }
            )
            run_response = self.dataset_populator.tools_post(payload)
            self.dataset_populator.wait_for_tool_run(history_id, run_response)
            datasets = run_response.json()["outputs"]

            assert len(datasets) == 2, datasets
            content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[0])
            assert content.strip() == "Test123"
            assert datasets[0]["file_ext"] == "txt", datasets
            assert datasets[0]["genome_build"] == "?", datasets

            content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[1])
            assert content.strip() == "SecondOutputContent"
            assert datasets[1]["file_ext"] == "txt"
            assert datasets[1]["genome_build"] == "?", datasets

    def test_upload_multiple_files_space_to_tab(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(history_id,
                content=ONE_TO_SIX_WITH_SPACES,
                file_type="tabular",
                dbkey="hg19",
                extra_inputs={
                    "files_0|file_type": "txt",
                    "files_0|space_to_tab": "Yes",
                    "files_1|url_paste": ONE_TO_SIX_WITH_SPACES,
                    "files_1|NAME": "SecondOutputName",
                    "files_1|file_type": "txt",
                    "files_2|url_paste": ONE_TO_SIX_WITH_SPACES,
                    "files_2|NAME": "ThirdOutputName",
                    "files_2|file_type": "txt",
                    "files_2|space_to_tab": "Yes",
                    "file_count": "3",
                }
            )
            run_response = self.dataset_populator.tools_post(payload)
            self.dataset_populator.wait_for_tool_run(history_id, run_response)
            datasets = run_response.json()["outputs"]

            assert len(datasets) == 3, datasets
            content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[0])
            assert content == ONE_TO_SIX_WITH_TABS

            content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[1])
            assert content == ONE_TO_SIX_WITH_SPACES

            content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[2])
            assert content == ONE_TO_SIX_WITH_TABS

    def test_multiple_files_posix_lines(self):
        with self.dataset_populator.test_history() as history_id:
            payload = self.dataset_populator.upload_payload(history_id,
                content=ONE_TO_SIX_ON_WINDOWS,
                file_type="tabular",
                dbkey="hg19",
                extra_inputs={
                    "files_0|file_type": "txt",
                    "files_0|to_posix_lines": "Yes",
                    "files_1|url_paste": ONE_TO_SIX_ON_WINDOWS,
                    "files_1|NAME": "SecondOutputName",
                    "files_1|file_type": "txt",
                    "files_1|to_posix_lines": None,
                    "files_2|url_paste": ONE_TO_SIX_ON_WINDOWS,
                    "files_2|NAME": "ThirdOutputName",
                    "files_2|file_type": "txt",
                    "file_count": "3",
                }
            )
            run_response = self.dataset_populator.tools_post(payload)
            self.dataset_populator.wait_for_tool_run(history_id, run_response)
            datasets = run_response.json()["outputs"]

            assert len(datasets) == 3, datasets
            content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[0])
            assert content == ONE_TO_SIX_WITH_TABS

            content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[1])
            assert content == ONE_TO_SIX_ON_WINDOWS

            content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[2])
            assert content == ONE_TO_SIX_WITH_TABS

    def test_upload_from_invalid_url(self):
        history_id, new_dataset = self._upload('https://usegalaxy.org/bla123', assert_ok=False)
        dataset_details = self.dataset_populator.get_history_dataset_details(history_id, dataset_id=new_dataset["id"], assert_ok=False)
        assert dataset_details['state'] == 'error', "expected dataset state to be 'error', but got '%s'" % dataset_details['state']

    def test_upload_from_valid_url(self):
        history_id, new_dataset = self._upload('https://usegalaxy.org/api/version')
        self.dataset_populator.get_history_dataset_details(history_id, dataset_id=new_dataset["id"], assert_ok=True)

    def _velvet_upload(self, history_id, extra_inputs):
        payload = self.dataset_populator.upload_payload(
            history_id,
            "sequences content",
            file_type="velvet",
            extra_inputs=extra_inputs,
        )
        run_response = self.dataset_populator.tools_post(payload)
        self.dataset_populator.wait_for_tool_run(history_id, run_response)
        datasets = run_response.json()["outputs"]

        assert len(datasets) == 1
        dataset = datasets[0]

        return dataset

    def _get_roadmaps_content(self, history_id, dataset):
        roadmaps_content = self.dataset_populator.get_history_dataset_content(history_id, dataset=dataset, filename="Roadmaps")
        return roadmaps_content

    def _upload_and_get_content(self, content, **upload_kwds):
        history_id, new_dataset = self._upload(content, **upload_kwds)
        return self.dataset_populator.get_history_dataset_content(history_id, dataset=new_dataset)

    def _upload_and_get_details(self, content, **upload_kwds):
        history_id, new_dataset = self._upload(content, **upload_kwds)
        assert_ok = upload_kwds.get("assert_ok", True)
        return self.dataset_populator.get_history_dataset_details(history_id, dataset=new_dataset, assert_ok=assert_ok)

    def _upload(self, content, api="upload1", history_id=None, **upload_kwds):
        assert_ok = upload_kwds.get("assert_ok", True)
        history_id = history_id or self.dataset_populator.new_history()
        if api == "upload1":
            new_dataset = self.dataset_populator.new_dataset(history_id, content=content, **upload_kwds)
        else:
            assert api == "fetch"
            element = dict(src="files", **upload_kwds)
            target = {
                "destination": {"type": "hdas"},
                "elements": [element],
            }
            targets = json.dumps([target])
            payload = {
                "history_id": history_id,
                "targets": targets,
                "__files": {"files_0|file_data": content}
            }
            new_dataset = self.dataset_populator.fetch(payload, assert_ok=assert_ok).json()["outputs"][0]
        self.dataset_populator.wait_for_history(history_id, assert_ok=assert_ok)
        return history_id, new_dataset
Beispiel #42
0
class JobsApiTestCase(api.ApiTestCase):
    def setUp(self):
        super(JobsApiTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_collection_populator = DatasetCollectionPopulator(
            self.galaxy_interactor)

    def test_index(self):
        # Create HDA to ensure at least one job exists...
        self.__history_with_new_dataset()
        jobs = self.__jobs_index()
        assert "upload1" in map(itemgetter("tool_id"), jobs)

    def test_system_details_admin_only(self):
        self.__history_with_new_dataset()
        jobs = self.__jobs_index(admin=False)
        job = jobs[0]
        self._assert_not_has_keys(job, "command_line", "external_id")

        jobs = self.__jobs_index(admin=True)
        job = jobs[0]
        self._assert_has_keys(job, "command_line", "external_id")

    def test_index_state_filter(self):
        # Initial number of ok jobs
        original_count = len(self.__uploads_with_state("ok"))
        # Run through dataset upload to ensure num uplaods at least greater
        # by 1.
        self.__history_with_ok_dataset()

        # Verify number of ok jobs is actually greater.
        count_increased = False
        for i in range(10):
            new_count = len(self.__uploads_with_state("ok"))
            if original_count < new_count:
                count_increased = True
                break
            time.sleep(.1)

        if not count_increased:
            template = "Jobs in ok state did not increase (was %d, now %d)"
            message = template % (original_count, new_count)
            raise AssertionError(message)

    def test_index_date_filter(self):
        self.__history_with_new_dataset()
        two_weeks_ago = (datetime.datetime.utcnow() -
                         datetime.timedelta(7)).isoformat()
        last_week = (datetime.datetime.utcnow() -
                     datetime.timedelta(7)).isoformat()
        next_week = (datetime.datetime.utcnow() +
                     datetime.timedelta(7)).isoformat()
        today = datetime.datetime.utcnow().isoformat()
        tomorrow = (datetime.datetime.utcnow() +
                    datetime.timedelta(1)).isoformat()

        jobs = self.__jobs_index(data={
            "date_range_min": today[0:10],
            "date_range_max": tomorrow[0:10]
        })
        assert len(jobs) > 0
        today_job_id = jobs[0]["id"]

        jobs = self.__jobs_index(data={
            "date_range_min": two_weeks_ago,
            "date_range_max": last_week
        })
        assert today_job_id not in map(itemgetter("id"), jobs)

        jobs = self.__jobs_index(data={
            "date_range_min": last_week,
            "date_range_max": next_week
        })
        assert today_job_id in map(itemgetter("id"), jobs)

    def test_index_history(self):
        history_id, _ = self.__history_with_new_dataset()
        jobs = self.__jobs_index(data={"history_id": history_id})
        assert len(jobs) > 0

        history_id = self.dataset_populator.new_history()
        jobs = self.__jobs_index(data={"history_id": history_id})
        assert len(jobs) == 0

    def test_index_multiple_states_filter(self):
        # Initial number of ok jobs
        original_count = len(self.__uploads_with_state("ok", "new"))

        # Run through dataset upload to ensure num uplaods at least greater
        # by 1.
        self.__history_with_ok_dataset()

        # Verify number of ok jobs is actually greater.
        new_count = len(self.__uploads_with_state("new", "ok"))
        assert original_count < new_count, new_count

    def test_show(self):
        # Create HDA to ensure at least one job exists...
        self.__history_with_new_dataset()

        jobs_response = self._get("jobs")
        first_job = jobs_response.json()[0]
        self._assert_has_key(first_job, 'id', 'state', 'exit_code',
                             'update_time', 'create_time')

        job_id = first_job["id"]
        show_jobs_response = self._get("jobs/%s" % job_id)
        self._assert_status_code_is(show_jobs_response, 200)

        job_details = show_jobs_response.json()
        self._assert_has_key(job_details, 'id', 'state', 'exit_code',
                             'update_time', 'create_time')

    def test_show_security(self):
        history_id, _ = self.__history_with_new_dataset()
        jobs_response = self._get("jobs", data={"history_id": history_id})
        job = jobs_response.json()[0]
        job_id = job["id"]

        show_jobs_response = self._get("jobs/%s" % job_id, admin=False)
        self._assert_not_has_keys(show_jobs_response.json(), "command_line",
                                  "external_id")

        # TODO: Re-activate test case when API accepts privacy settings
        # with self._different_user():
        #    show_jobs_response = self._get( "jobs/%s" % job_id, admin=False )
        #    self._assert_status_code_is( show_jobs_response, 200 )

        show_jobs_response = self._get("jobs/%s" % job_id, admin=True)
        self._assert_has_keys(show_jobs_response.json(), "command_line",
                              "external_id")

    def test_deleting_output_keep_running_until_all_deleted(self):
        history_id, job_state, outputs = self._setup_running_two_output_job(
            120)

        self._hack_to_skip_test_if_state_ok(job_state)

        # Delete one of the two outputs and make sure the job is still running.
        self._raw_update_history_item(history_id, outputs[0]["id"],
                                      {"deleted": True})

        self._hack_to_skip_test_if_state_ok(job_state)

        time.sleep(1)

        self._hack_to_skip_test_if_state_ok(job_state)

        state = job_state().json()["state"]
        assert state == "running", state

        # Delete the second output and make sure the job is cancelled.
        self._raw_update_history_item(history_id, outputs[1]["id"],
                                      {"deleted": True})
        final_state = wait_on_state(job_state, assert_ok=False, timeout=15)
        assert final_state in ["deleted_new", "deleted"], final_state

    def test_purging_output_keep_running_until_all_purged(self):
        history_id, job_state, outputs = self._setup_running_two_output_job(
            120)

        # Pretty much right away after the job is running, these paths should be populated -
        # if they are grab them and make sure they are deleted at the end of the job.
        dataset_1 = self._get_history_item_as_admin(history_id,
                                                    outputs[0]["id"])
        dataset_2 = self._get_history_item_as_admin(history_id,
                                                    outputs[1]["id"])
        if "file_name" in dataset_1:
            output_dataset_paths = [
                dataset_1["file_name"], dataset_2["file_name"]
            ]
            # This may or may not exist depending on if the test is local or not.
            output_dataset_paths_exist = os.path.exists(
                output_dataset_paths[0])
        else:
            output_dataset_paths = []
            output_dataset_paths_exist = False

        self._hack_to_skip_test_if_state_ok(job_state)

        current_state = job_state().json()["state"]
        assert current_state == "running", current_state

        # Purge one of the two outputs and make sure the job is still running.
        self._raw_update_history_item(history_id, outputs[0]["id"],
                                      {"purged": True})
        time.sleep(1)

        self._hack_to_skip_test_if_state_ok(job_state)

        current_state = job_state().json()["state"]
        assert current_state == "running", current_state

        # Purge the second output and make sure the job is cancelled.
        self._raw_update_history_item(history_id, outputs[1]["id"],
                                      {"purged": True})
        final_state = wait_on_state(job_state, assert_ok=False, timeout=15)
        assert final_state in ["deleted_new", "deleted"], final_state

        def paths_deleted():
            if not os.path.exists(
                    output_dataset_paths[0]) and not os.path.exists(
                        output_dataset_paths[1]):
                return True

        if output_dataset_paths_exist:
            wait_on(paths_deleted, "path deletion")

    def test_purging_output_cleaned_after_ok_run(self):
        history_id, job_state, outputs = self._setup_running_two_output_job(10)

        # Pretty much right away after the job is running, these paths should be populated -
        # if they are grab them and make sure they are deleted at the end of the job.
        dataset_1 = self._get_history_item_as_admin(history_id,
                                                    outputs[0]["id"])
        dataset_2 = self._get_history_item_as_admin(history_id,
                                                    outputs[1]["id"])
        if "file_name" in dataset_1:
            output_dataset_paths = [
                dataset_1["file_name"], dataset_2["file_name"]
            ]
            # This may or may not exist depending on if the test is local or not.
            output_dataset_paths_exist = os.path.exists(
                output_dataset_paths[0])
        else:
            output_dataset_paths = []
            output_dataset_paths_exist = False

        if not output_dataset_paths_exist:
            # Given this Galaxy configuration - there is nothing more to be tested here.
            # Consider throwing a skip instead.
            return

        # Purge one of the two outputs and wait for the job to complete.
        self._raw_update_history_item(history_id, outputs[0]["id"],
                                      {"purged": True})
        wait_on_state(job_state, assert_ok=True)

        if output_dataset_paths_exist:
            time.sleep(.5)
            # Make sure the non-purged dataset is on disk and the purged one is not.
            assert os.path.exists(output_dataset_paths[1])
            assert not os.path.exists(output_dataset_paths[0])

    def _hack_to_skip_test_if_state_ok(self, job_state):
        from nose.plugins.skip import SkipTest
        if job_state().json()["state"] == "ok":
            message = "Job state switch from running to ok too quickly - the rest of the test requires the job to be in a running state. Skipping test."
            raise SkipTest(message)

    def _setup_running_two_output_job(self, sleep_time):
        history_id = self.dataset_populator.new_history()
        payload = self.dataset_populator.run_tool_payload(
            tool_id='create_2',
            inputs=dict(sleep_time=sleep_time, ),
            history_id=history_id,
        )
        run_response = self._post("tools", data=payload).json()
        outputs = run_response["outputs"]
        jobs = run_response["jobs"]

        assert len(outputs) == 2
        assert len(jobs) == 1

        def job_state():
            jobs_response = self._get("jobs/%s" % jobs[0]["id"])
            return jobs_response

        # Give job some time to get up and running.
        time.sleep(2)
        running_state = wait_on_state(job_state,
                                      skip_states=["queued", "new"],
                                      assert_ok=False,
                                      timeout=15)
        assert running_state == "running", running_state

        def job_state():
            jobs_response = self._get("jobs/%s" % jobs[0]["id"])
            return jobs_response

        return history_id, job_state, outputs

    def _raw_update_history_item(self, history_id, item_id, data):
        update_url = self._api_url("histories/%s/contents/%s" %
                                   (history_id, item_id),
                                   use_key=True)
        update_response = put(update_url, json=data)
        assert_status_code_is_ok(update_response)
        return update_response

    def _get_history_item_as_admin(self, history_id, item_id):
        response = self._get("histories/%s/contents/%s?view=detailed" %
                             (history_id, item_id),
                             admin=True)
        assert_status_code_is_ok(response)
        return response.json()

    def test_search(self):
        history_id, dataset_id = self.__history_with_ok_dataset()
        inputs = json.dumps({'input1': {'src': 'hda', 'id': dataset_id}})
        self._job_search(tool_id='cat1', history_id=history_id, inputs=inputs)
        # We test that a job can be found even if the dataset has been copied to another history
        new_history_id = self.dataset_populator.new_history()
        copy_payload = {
            "content": dataset_id,
            "source": "hda",
            "type": "dataset"
        }
        copy_response = self._post("histories/%s/contents" % new_history_id,
                                   data=copy_payload)
        self._assert_status_code_is(copy_response, 200)
        new_dataset_id = copy_response.json()['id']
        copied_inputs = json.dumps(
            {'input1': {
                'src': 'hda',
                'id': new_dataset_id
            }})
        search_payload = self._search_payload(history_id=history_id,
                                              tool_id='cat1',
                                              inputs=copied_inputs)
        self._search(search_payload, expected_search_count=1)
        # Now we delete the original input HDA that was used -- we should still be able to find the job
        delete_respone = self._delete("histories/%s/contents/%s" %
                                      (history_id, dataset_id))
        self._assert_status_code_is(delete_respone, 200)
        self._search(search_payload, expected_search_count=1)
        # Now we also delete the copy -- we shouldn't find a job
        delete_respone = self._delete("histories/%s/contents/%s" %
                                      (new_history_id, new_dataset_id))
        self._assert_status_code_is(delete_respone, 200)
        self._search(search_payload, expected_search_count=0)

    def test_search_delete_outputs(self):
        history_id, dataset_id = self.__history_with_ok_dataset()
        inputs = json.dumps({'input1': {'src': 'hda', 'id': dataset_id}})
        tool_response = self._job_search(tool_id='cat1',
                                         history_id=history_id,
                                         inputs=inputs)
        output_id = tool_response.json()['outputs'][0]['id']
        delete_respone = self._delete("histories/%s/contents/%s" %
                                      (history_id, output_id))
        self._assert_status_code_is(delete_respone, 200)
        search_payload = self._search_payload(history_id=history_id,
                                              tool_id='cat1',
                                              inputs=inputs)
        self._search(search_payload, expected_search_count=0)

    def test_search_with_hdca_list_input(self):
        history_id, list_id_a = self.__history_with_ok_collection(
            collection_type='list')
        history_id, list_id_b = self.__history_with_ok_collection(
            collection_type='list', history_id=history_id)
        inputs = json.dumps({
            'f1': {
                'src': 'hdca',
                'id': list_id_a
            },
            'f2': {
                'src': 'hdca',
                'id': list_id_b
            },
        })
        tool_response = self._job_search(tool_id='multi_data_param',
                                         history_id=history_id,
                                         inputs=inputs)
        # We switch the inputs, this should not return a match
        inputs_switched = json.dumps({
            'f2': {
                'src': 'hdca',
                'id': list_id_a
            },
            'f1': {
                'src': 'hdca',
                'id': list_id_b
            },
        })
        search_payload = self._search_payload(history_id=history_id,
                                              tool_id='multi_data_param',
                                              inputs=inputs_switched)
        self._search(search_payload, expected_search_count=0)
        # We delete the ouput (this is a HDA, as multi_data_param reduces collections)
        # and use the correct input job definition, the job should not be found
        output_id = tool_response.json()['outputs'][0]['id']
        delete_respone = self._delete("histories/%s/contents/%s" %
                                      (history_id, output_id))
        self._assert_status_code_is(delete_respone, 200)
        search_payload = self._search_payload(history_id=history_id,
                                              tool_id='multi_data_param',
                                              inputs=inputs)
        self._search(search_payload, expected_search_count=0)

    def test_search_delete_hdca_output(self):
        history_id, list_id_a = self.__history_with_ok_collection(
            collection_type='list')
        inputs = json.dumps({
            'input1': {
                'src': 'hdca',
                'id': list_id_a
            },
        })
        tool_response = self._job_search(tool_id='collection_creates_list',
                                         history_id=history_id,
                                         inputs=inputs)
        output_id = tool_response.json()['outputs'][0]['id']
        # We delete a single tool output, no job should be returned
        delete_respone = self._delete("histories/%s/contents/%s" %
                                      (history_id, output_id))
        self._assert_status_code_is(delete_respone, 200)
        search_payload = self._search_payload(
            history_id=history_id,
            tool_id='collection_creates_list',
            inputs=inputs)
        self._search(search_payload, expected_search_count=0)
        tool_response = self._job_search(tool_id='collection_creates_list',
                                         history_id=history_id,
                                         inputs=inputs)
        output_collection_id = tool_response.json(
        )['output_collections'][0]['id']
        # We delete a collection output, no job should be returned
        delete_respone = self._delete(
            "histories/%s/contents/dataset_collections/%s" %
            (history_id, output_collection_id))
        self._assert_status_code_is(delete_respone, 200)
        search_payload = self._search_payload(
            history_id=history_id,
            tool_id='collection_creates_list',
            inputs=inputs)
        self._search(search_payload, expected_search_count=0)

    def test_search_with_hdca_pair_input(self):
        history_id, list_id_a = self.__history_with_ok_collection(
            collection_type='pair')
        inputs = json.dumps({
            'f1': {
                'src': 'hdca',
                'id': list_id_a
            },
            'f2': {
                'src': 'hdca',
                'id': list_id_a
            },
        })
        self._job_search(tool_id='multi_data_param',
                         history_id=history_id,
                         inputs=inputs)
        # We test that a job can be found even if the collection has been copied to another history
        new_history_id = self.dataset_populator.new_history()
        copy_payload = {
            "content": list_id_a,
            "source": "hdca",
            "type": "dataset_collection"
        }
        copy_response = self._post("histories/%s/contents" % new_history_id,
                                   data=copy_payload)
        self._assert_status_code_is(copy_response, 200)
        new_list_a = copy_response.json()['id']
        copied_inputs = json.dumps({
            'f1': {
                'src': 'hdca',
                'id': new_list_a
            },
            'f2': {
                'src': 'hdca',
                'id': new_list_a
            },
        })
        search_payload = self._search_payload(history_id=new_history_id,
                                              tool_id='multi_data_param',
                                              inputs=copied_inputs)
        self._search(search_payload, expected_search_count=1)
        # Now we delete the original input HDCA that was used -- we should still be able to find the job
        delete_respone = self._delete(
            "histories/%s/contents/dataset_collections/%s" %
            (history_id, list_id_a))
        self._assert_status_code_is(delete_respone, 200)
        self._search(search_payload, expected_search_count=1)
        # Now we also delete the copy -- we shouldn't find a job
        delete_respone = self._delete(
            "histories/%s/contents/dataset_collections/%s" %
            (history_id, new_list_a))
        self._assert_status_code_is(delete_respone, 200)
        self._search(search_payload, expected_search_count=0)

    def test_search_with_hdca_list_pair_input(self):
        history_id, list_id_a = self.__history_with_ok_collection(
            collection_type='list:pair')
        inputs = json.dumps({
            'f1': {
                'src': 'hdca',
                'id': list_id_a
            },
            'f2': {
                'src': 'hdca',
                'id': list_id_a
            },
        })
        self._job_search(tool_id='multi_data_param',
                         history_id=history_id,
                         inputs=inputs)

    def _job_search(self, tool_id, history_id, inputs):
        search_payload = self._search_payload(history_id=history_id,
                                              tool_id=tool_id,
                                              inputs=inputs)
        empty_search_response = self._post("jobs/search", data=search_payload)
        self._assert_status_code_is(empty_search_response, 200)
        self.assertEquals(len(empty_search_response.json()), 0)
        tool_response = self._post("tools", data=search_payload)
        self.dataset_populator.wait_for_tool_run(history_id,
                                                 run_response=tool_response)
        self._search(search_payload, expected_search_count=1)
        return tool_response

    def _search_payload(self, history_id, tool_id, inputs, state='ok'):
        search_payload = dict(tool_id=tool_id,
                              inputs=inputs,
                              history_id=history_id,
                              state=state)
        return search_payload

    def _search(self, payload, expected_search_count=1):
        # in case job and history aren't updated at exactly the same
        # time give time to wait
        for i in range(15):
            search_count = self._search_count(payload)
            if search_count == expected_search_count:
                break
            time.sleep(1)
        assert search_count == expected_search_count, "expected to find %d jobs, got %d jobs" % (
            expected_search_count, search_count)
        return search_count

    def _search_count(self, search_payload):
        search_response = self._post("jobs/search", data=search_payload)
        self._assert_status_code_is(search_response, 200)
        search_json = search_response.json()
        return len(search_json)

    def __uploads_with_state(self, *states):
        jobs_response = self._get("jobs", data=dict(state=states))
        self._assert_status_code_is(jobs_response, 200)
        jobs = jobs_response.json()
        assert not filter(lambda j: j["state"] not in states, jobs)
        return filter(lambda j: j["tool_id"] == "upload1", jobs)

    def __history_with_new_dataset(self):
        history_id = self.dataset_populator.new_history()
        dataset_id = self.dataset_populator.new_dataset(history_id)["id"]
        return history_id, dataset_id

    def __history_with_ok_dataset(self):
        history_id = self.dataset_populator.new_history()
        dataset_id = self.dataset_populator.new_dataset(history_id,
                                                        wait=True)["id"]
        return history_id, dataset_id

    def __history_with_ok_collection(self,
                                     collection_type='list',
                                     history_id=None):
        if not history_id:
            history_id = self.dataset_populator.new_history()
        if collection_type == 'list':
            create_reposonse = self.dataset_collection_populator.create_list_in_history(
                history_id).json()
        elif collection_type == 'pair':
            create_reposonse = self.dataset_collection_populator.create_pair_in_history(
                history_id).json()
        elif collection_type == 'list:pair':
            create_reposonse = self.dataset_collection_populator.create_list_of_pairs_in_history(
                history_id).json()
        self.dataset_collection_populator.wait_for_dataset_collection(
            create_reposonse)
        return history_id, create_reposonse['id']

    def __jobs_index(self, **kwds):
        jobs_response = self._get("jobs", **kwds)
        self._assert_status_code_is(jobs_response, 200)
        jobs = jobs_response.json()
        assert isinstance(jobs, list)
        return jobs
Beispiel #43
0
class LibrariesApiTestCase(api.ApiTestCase, TestsDatasets):
    def setUp(self):
        super(LibrariesApiTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_collection_populator = DatasetCollectionPopulator(
            self.galaxy_interactor)
        self.library_populator = LibraryPopulator(self.galaxy_interactor)

    def test_create(self):
        data = dict(name="CreateTestLibrary")
        create_response = self._post("libraries", data=data, admin=True)
        self._assert_status_code_is(create_response, 200)
        library = create_response.json()
        self._assert_has_keys(library, "name")
        assert library["name"] == "CreateTestLibrary"

    def test_delete(self):
        library = self.library_populator.new_library("DeleteTestLibrary")
        create_response = self._delete("libraries/%s" % library["id"],
                                       admin=True)
        self._assert_status_code_is(create_response, 200)
        library = create_response.json()
        self._assert_has_keys(library, "deleted")
        assert library["deleted"] is True
        # Test undeleting
        data = dict(undelete=True)
        create_response = self._delete("libraries/%s" % library["id"],
                                       data=data,
                                       admin=True)
        library = create_response.json()
        self._assert_status_code_is(create_response, 200)
        assert library["deleted"] is False

    def test_nonadmin(self):
        # Anons can't create libs
        data = dict(name="CreateTestLibrary")
        create_response = self._post("libraries",
                                     data=data,
                                     admin=False,
                                     anon=True)
        self._assert_status_code_is(create_response, 403)
        # Anons can't delete libs
        library = self.library_populator.new_library("AnonDeleteTestLibrary")
        create_response = self._delete("libraries/%s" % library["id"],
                                       admin=False,
                                       anon=True)
        self._assert_status_code_is(create_response, 403)
        # Anons can't update libs
        data = dict(name="ChangedName",
                    description="ChangedDescription",
                    synopsis='ChangedSynopsis')
        create_response = self._patch("libraries/%s" % library["id"],
                                      data=data,
                                      admin=False,
                                      anon=True)
        self._assert_status_code_is(create_response, 403)

    def test_update(self):
        library = self.library_populator.new_library("UpdateTestLibrary")
        data = dict(name='ChangedName',
                    description='ChangedDescription',
                    synopsis='ChangedSynopsis')
        create_response = self._patch("libraries/%s" % library["id"],
                                      data=data,
                                      admin=True)
        self._assert_status_code_is(create_response, 200)
        library = create_response.json()
        self._assert_has_keys(library, 'name', 'description', 'synopsis')
        assert library['name'] == 'ChangedName'
        assert library['description'] == 'ChangedDescription'
        assert library['synopsis'] == 'ChangedSynopsis'

    def test_create_private_library_permissions(self):
        library = self.library_populator.new_library("PermissionTestLibrary")
        library_id = library["id"]
        role_id = self.library_populator.user_private_role_id()
        self.library_populator.set_permissions(library_id, role_id)
        create_response = self._create_folder(library)
        self._assert_status_code_is(create_response, 200)

    def test_create_dataset_denied(self):
        library = self.library_populator.new_private_library(
            "ForCreateDatasets")
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        history_id = self.dataset_populator.new_history()
        hda_id = self.dataset_populator.new_dataset(history_id,
                                                    content="1 2 3")['id']
        with self._different_user():
            payload = {'from_hda_id': hda_id}
            create_response = self._post("folders/%s/contents" % folder_id,
                                         payload)
            self._assert_status_code_is(create_response, 403)

    def test_show_private_dataset_permissions(self):
        library, library_dataset = self.library_populator.new_library_dataset_in_private_library(
            "ForCreateDatasets", wait=True)
        with self._different_user():
            response = self.library_populator.show_ldda(
                library["id"], library_dataset["id"])
            # TODO: this should really be 403 and a proper JSON exception.
            self._assert_status_code_is(response, 400)

    def test_create_dataset(self):
        library, library_dataset = self.library_populator.new_library_dataset_in_private_library(
            "ForCreateDatasets", wait=True)
        self._assert_has_keys(library_dataset, "peek", "data_type")
        assert library_dataset["peek"].find("create_test") >= 0
        assert library_dataset["file_ext"] == "txt", library_dataset[
            "file_ext"]

    def test_fetch_upload_to_folder(self):
        history_id, library, destination = self._setup_fetch_to_folder(
            "flat_zip")
        items = [{
            "src": "files",
            "dbkey": "hg19",
            "info": "my cool bed",
            "created_from_basename": "4.bed"
        }]
        targets = [{"destination": destination, "items": items}]
        payload = {
            "history_id": history_id,  # TODO: Shouldn't be needed :(
            "targets": json.dumps(targets),
            "__files": {
                "files_0|file_data":
                open(self.test_data_resolver.get_filename("4.bed"))
            },
        }
        self.dataset_populator.fetch(payload)
        dataset = self.library_populator.get_library_contents_with_path(
            library["id"], "/4.bed")
        assert dataset["file_size"] == 61, dataset
        assert dataset["genome_build"] == "hg19", dataset
        assert dataset["misc_info"] == "my cool bed", dataset
        assert dataset["file_ext"] == "bed", dataset
        assert dataset["created_from_basename"] == "4.bed"

    def test_fetch_zip_to_folder(self):
        history_id, library, destination = self._setup_fetch_to_folder(
            "flat_zip")
        bed_test_data_path = self.test_data_resolver.get_filename("4.bed.zip")
        targets = [{
            "destination": destination,
            "items_from": "archive",
            "src": "files",
        }]
        payload = {
            "history_id": history_id,  # TODO: Shouldn't be needed :(
            "targets": json.dumps(targets),
            "__files": {
                "files_0|file_data": open(bed_test_data_path, 'rb')
            }
        }
        self.dataset_populator.fetch(payload)
        dataset = self.library_populator.get_library_contents_with_path(
            library["id"], "/4.bed")
        assert dataset["file_size"] == 61, dataset

    def test_fetch_single_url_to_folder(self):
        library, response = self._fetch_single_url_to_folder()
        dataset = self.library_populator.get_library_contents_with_path(
            library["id"], "/4.bed")
        assert dataset["file_size"] == 61, dataset

    def test_fetch_single_url_with_invalid_datatype(self):
        _, response = self._fetch_single_url_to_folder('xxx', assert_ok=False)
        self._assert_status_code_is(response, 400)
        assert response.json(
        )['err_msg'] == "Requested extension 'xxx' unknown, cannot upload dataset."

    def _fetch_single_url_to_folder(self, file_type='auto', assert_ok=True):
        history_id, library, destination = self._setup_fetch_to_folder(
            "single_url")
        items = [{
            "src": "url",
            "url": FILE_URL,
            "MD5": FILE_MD5,
            "ext": file_type,
        }]
        targets = [{
            "destination": destination,
            "items": items,
        }]
        payload = {
            "history_id": history_id,  # TODO: Shouldn't be needed :(
            "targets": json.dumps(targets),
            "validate_hashes": True
        }
        return library, self.dataset_populator.fetch(payload,
                                                     assert_ok=assert_ok)

    def test_legacy_upload_unknown_datatype(self):
        library = self.library_populator.new_private_library("ForLegacyUpload")
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        payload = {
            'folder_id': folder_id,
            'create_type': 'file',
            'file_type': 'xxx',
            'upload_option': 'upload_file',
            'files_0|url_paste': FILE_URL,
        }
        create_response = self._post("libraries/%s/contents" % library['id'],
                                     payload)
        self._assert_status_code_is(create_response, 400)
        assert create_response.json(
        ) == "Requested extension 'xxx' unknown, cannot upload dataset."

    def test_fetch_failed_validation(self):
        # Exception handling is really rough here - we should be creating a dataset in error instead
        # of just failing the job like this.
        history_id, library, destination = self._setup_fetch_to_folder(
            "single_url")
        items = [{
            "src": "url",
            "url":
            "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/4.bed",
            "MD5": "37b59762b59fff860460522d271bc112"
        }]
        targets = [{
            "destination": destination,
            "items": items,
        }]
        payload = {
            "history_id": history_id,  # TODO: Shouldn't be needed :(
            "targets": json.dumps(targets),
            "validate_hashes": True
        }
        tool_response = self.dataset_populator.fetch(payload, assert_ok=False)
        job = self.dataset_populator.check_run(tool_response)
        self.dataset_populator.wait_for_job(job["id"])

        job = tool_response.json()["jobs"][0]
        details = self.dataset_populator.get_job_details(job["id"]).json()
        assert details["state"] == "error", details

    def test_fetch_url_archive_to_folder(self):
        history_id, library, destination = self._setup_fetch_to_folder(
            "single_url")
        targets = [{
            "destination":
            destination,
            "items_from":
            "archive",
            "src":
            "url",
            "url":
            "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/4.bed.zip",
        }]
        payload = {
            "history_id": history_id,  # TODO: Shouldn't be needed :(
            "targets": json.dumps(targets),
        }
        self.dataset_populator.fetch(payload)
        dataset = self.library_populator.get_library_contents_with_path(
            library["id"], "/4.bed")
        assert dataset["file_size"] == 61, dataset

    @unittest.skip  # reference URLs changed, checksums now invalid.
    def test_fetch_bagit_archive_to_folder(self):
        history_id, library, destination = self._setup_fetch_to_folder(
            "bagit_archive")
        example_bag_path = self.test_data_resolver.get_filename(
            "example-bag.zip")
        targets = [{
            "destination": destination,
            "items_from": "bagit_archive",
            "src": "files",
        }]
        payload = {
            "history_id": history_id,  # TODO: Shouldn't be needed :(
            "targets": json.dumps(targets),
            "__files": {
                "files_0|file_data": open(example_bag_path)
            },
        }
        self.dataset_populator.fetch(payload)
        dataset = self.library_populator.get_library_contents_with_path(
            library["id"], "/README.txt")
        assert dataset["file_size"] == 66, dataset

        dataset = self.library_populator.get_library_contents_with_path(
            library["id"], "/bdbag-profile.json")
        assert dataset["file_size"] == 723, dataset

    def _setup_fetch_to_folder(self, test_name):
        return self.library_populator.setup_fetch_to_folder(test_name)

    def test_create_dataset_in_folder(self):
        library = self.library_populator.new_private_library(
            "ForCreateDatasets")
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        history_id = self.dataset_populator.new_history()
        hda_id = self.dataset_populator.new_dataset(history_id,
                                                    content="1 2 3")['id']
        payload = {'from_hda_id': hda_id}
        create_response = self._post("folders/%s/contents" % folder_id,
                                     payload)
        self._assert_status_code_is(create_response, 200)
        self._assert_has_keys(create_response.json(), "name", "id")

    def test_create_dataset_in_subfolder(self):
        library = self.library_populator.new_private_library(
            "ForCreateDatasets")
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        subfolder_response = self._create_subfolder(folder_id)
        self._assert_status_code_is(folder_response, 200)
        print(subfolder_response.json())
        subfolder_id = subfolder_response.json()['id']
        history_id = self.dataset_populator.new_history()
        hda_id = self.dataset_populator.new_dataset(history_id,
                                                    content="1 2 3 sub")['id']
        payload = {'from_hda_id': hda_id}
        create_response = self._post("folders/%s/contents" % subfolder_id,
                                     payload)
        self._assert_status_code_is(create_response, 200)
        self._assert_has_keys(create_response.json(), "name", "id")
        dataset_update_time = create_response.json()['update_time']
        container_fetch_response = self.galaxy_interactor.get(
            "folders/%s/contents" % folder_id)
        container_update_time = container_fetch_response.json(
        )['folder_contents'][0]['update_time']
        assert dataset_update_time == container_update_time, container_fetch_response

    def test_update_dataset_in_folder(self):
        ld = self._create_dataset_in_folder_in_library("ForUpdateDataset")
        data = {
            'name': 'updated_name',
            'file_ext': 'fastq',
            'misc_info': 'updated_info',
            'genome_build': 'updated_genome_build'
        }
        create_response = self._patch("libraries/datasets/%s" %
                                      ld.json()["id"],
                                      data=data)
        self._assert_status_code_is(create_response, 200)
        self._assert_has_keys(create_response.json(), "name", "file_ext",
                              "misc_info", "genome_build")

    def test_update_dataset_tags(self):
        ld = self._create_dataset_in_folder_in_library("ForTagtestDataset")
        data = {"tags": ["#Lancelot", "name:Holy Grail", "blue"]}
        create_response = self._patch("libraries/datasets/%s" %
                                      ld.json()["id"],
                                      data=data)
        self._assert_status_code_is(create_response, 200)
        self._assert_has_keys(create_response.json(), "tags")
        assert create_response.json(
        )["tags"] == "name:Lancelot, name:HolyGrail, blue"

    def test_invalid_update_dataset_in_folder(self):
        ld = self._create_dataset_in_folder_in_library(
            "ForInvalidUpdateDataset")
        data = {'file_ext': 'nonexisting_type'}
        create_response = self._patch("libraries/datasets/%s" %
                                      ld.json()["id"],
                                      data=data)
        self._assert_status_code_is(create_response, 400)
        assert 'This Galaxy does not recognize the datatype of:' in create_response.json(
        )['err_msg']

    def test_detect_datatype_of_dataset_in_folder(self):
        ld = self._create_dataset_in_folder_in_library("ForDetectDataset")
        # Wait for metadata job to finish.
        time.sleep(2)
        data = {'file_ext': 'data'}
        create_response = self._patch("libraries/datasets/%s" %
                                      ld.json()["id"],
                                      data=data)
        self._assert_status_code_is(create_response, 200)
        self._assert_has_keys(create_response.json(), "file_ext")
        assert create_response.json()["file_ext"] == "data"
        # Wait for metadata job to finish.
        time.sleep(2)
        data = {'file_ext': 'auto'}
        create_response = self._patch("libraries/datasets/%s" %
                                      ld.json()["id"],
                                      data=data)
        self._assert_status_code_is(create_response, 200)
        self._assert_has_keys(create_response.json(), "file_ext")
        assert create_response.json()["file_ext"] == "txt"

    def test_create_datasets_in_library_from_collection(self):
        library = self.library_populator.new_private_library(
            "ForCreateDatasetsFromCollection")
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        history_id = self.dataset_populator.new_history()
        hdca_id = self.dataset_collection_populator.create_list_in_history(
            history_id, contents=["xxx", "yyy"],
            direct_upload=True).json()["outputs"][0]["id"]
        payload = {
            'from_hdca_id': hdca_id,
            'create_type': 'file',
            'folder_id': folder_id
        }
        create_response = self._post("libraries/%s/contents" % library['id'],
                                     payload)
        self._assert_status_code_is(create_response, 200)

    def test_create_datasets_in_folder_from_collection(self):
        library = self.library_populator.new_private_library(
            "ForCreateDatasetsFromCollection")
        history_id = self.dataset_populator.new_history()
        hdca_id = self.dataset_collection_populator.create_list_in_history(
            history_id, contents=["xxx", "yyy"],
            direct_upload=True).json()["outputs"][0]["id"]
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        payload = {'from_hdca_id': hdca_id}
        create_response = self._post("folders/%s/contents" % folder_id,
                                     payload)
        self._assert_status_code_is(create_response, 200)
        assert len(create_response.json()) == 2
        # Also test that anything different from a flat dataset collection list
        # is refused
        hdca_pair_id = self.dataset_collection_populator.create_list_of_pairs_in_history(
            history_id).json()["outputs"][0]['id']
        payload = {'from_hdca_id': hdca_pair_id}
        create_response = self._post("folders/%s/contents" % folder_id,
                                     payload)
        self._assert_status_code_is(create_response, 501)
        assert create_response.json(
        )['err_msg'] == 'Cannot add nested collections to library. Please flatten your collection first.'

    def _create_folder(self, library):
        create_data = dict(
            folder_id=library["root_folder_id"],
            create_type="folder",
            name="New Folder",
        )
        return self._post("libraries/%s/contents" % library["id"],
                          data=create_data)

    def _create_subfolder(self, containing_folder_id):
        create_data = dict(
            description="new subfolder desc",
            name="New Subfolder",
        )
        return self._post("folders/%s" % containing_folder_id,
                          data=create_data)

    def _create_dataset_in_folder_in_library(self, library_name):
        library = self.library_populator.new_private_library(library_name)
        folder_response = self._create_folder(library)
        self._assert_status_code_is(folder_response, 200)
        folder_id = folder_response.json()[0]['id']
        history_id = self.dataset_populator.new_history()
        hda_id = self.dataset_populator.new_dataset(history_id,
                                                    content="1 2 3")['id']
        payload = {
            'from_hda_id': hda_id,
            'create_type': 'file',
            'folder_id': folder_id
        }
        ld = self._post("libraries/%s/contents" % folder_id, payload)
        return ld
class InteractiveToolsIntegrationTestCase(ContainerizedIntegrationTestCase):

    framework_tool_and_types = True
    container_type = "docker"
    require_uwsgi = True
    enable_realtime_mapping = True

    def setUp(self):
        super(InteractiveToolsIntegrationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.history_id = self.dataset_populator.new_history()

    def test_simple_execution(self):
        response_dict = self.dataset_populator.run_tool(
            "interactivetool_simple", {}, self.history_id, assert_ok=True)
        assert "jobs" in response_dict, response_dict
        jobs = response_dict["jobs"]
        assert isinstance(jobs, list)
        assert len(jobs) == 1
        job0 = jobs[0]
        entry_points = self.wait_on_entry_points_active(job0["id"])
        assert len(entry_points) == 1
        entry_point0 = entry_points[0]
        target = self.entry_point_target(entry_point0["id"])
        content = self.wait_on_proxied_content(target)
        assert content == "moo cow\n", content

    def test_multi_server_realtime_tool(self):
        response_dict = self.dataset_populator.run_tool(
            "interactivetool_two_entry_points", {},
            self.history_id,
            assert_ok=True)
        assert "jobs" in response_dict, response_dict
        jobs = response_dict["jobs"]
        assert isinstance(jobs, list)
        assert len(jobs) == 1
        job0 = jobs[0]
        entry_points = self.wait_on_entry_points_active(job0["id"])
        assert len(entry_points) == 2
        entry_point0 = entry_points[0]
        entry_point1 = entry_points[1]
        target0 = self.entry_point_target(entry_point0["id"])
        target1 = self.entry_point_target(entry_point1["id"])
        assert target0 != target1
        content0 = self.wait_on_proxied_content(target0)
        assert content0 == "moo cow\n", content0

        content1 = self.wait_on_proxied_content(target1)
        assert content1 == "moo cow\n", content1
        assert False

    def wait_on_proxied_content(self, target):
        def get_hosted_content():
            try:
                scheme, rest = target.split("://", 1)
                prefix, host_and_port = rest.split(".realtime.")
                print(rest)
                faked_host = rest
                if "/" in rest:
                    faked_host = rest.split("/", 1)[0]
                response = requests.get("%s://%s" % (scheme, host_and_port),
                                        timeout=1,
                                        headers={"Host": faked_host})
                return response.content
            except Exception as e:
                print(e)
                return None

        content = wait_on(get_hosted_content,
                          "realtime hosted content at %s" % target)
        return content

    def entry_point_target(self, entry_point_id):
        entry_point_access_response = self._get("entry_points/%s/access" %
                                                entry_point_id)
        api_asserts.assert_status_code_is(entry_point_access_response, 200)
        access_json = entry_point_access_response.json()
        api_asserts.assert_has_key(access_json, "target")
        return access_json["target"]

    def wait_on_entry_points_active(self, job_id, expected_num=1):
        def active_entry_points():
            entry_points = self.entry_points_for_job(job_id)
            if len(entry_points) != expected_num:
                return None
            elif any([not e["active"] for e in entry_points]):
                return None
            else:
                return entry_points

        return wait_on(active_entry_points, "entry points to become active")

    def entry_points_for_job(self, job_id):
        entry_points_response = self._get("entry_points?job_id=%s" % job_id)
        api_asserts.assert_status_code_is(entry_points_response, 200)
        return entry_points_response.json()
Beispiel #45
0
class HistoriesApiTestCase(api.ApiTestCase):

    def setUp(self):
        super(HistoriesApiTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.dataset_collection_populator = DatasetCollectionPopulator(self.galaxy_interactor)

    def test_create_history(self):
        # Create a history.
        create_response = self._create_history("TestHistory1")
        created_id = create_response["id"]

        # Make sure new history appears in index of user's histories.
        index_response = self._get("histories").json()
        indexed_history = [h for h in index_response if h["id"] == created_id][0]
        self.assertEquals(indexed_history["name"], "TestHistory1")

    def test_show_history(self):
        history_id = self._create_history("TestHistoryForShow")["id"]
        show_response = self._show(history_id)
        self._assert_has_key(
            show_response,
            'id', 'name', 'annotation', 'size', 'contents_url',
            'state', 'state_details', 'state_ids'
        )

        state_details = show_response["state_details"]
        state_ids = show_response["state_ids"]
        states = [
            'discarded', 'empty', 'error', 'failed_metadata', 'new',
            'ok', 'paused', 'queued', 'running', 'setting_metadata', 'upload'
        ]
        assert isinstance(state_details, dict)
        assert isinstance(state_ids, dict)
        self._assert_has_keys(state_details, *states)
        self._assert_has_keys(state_ids, *states)

    def test_show_most_recently_used(self):
        history_id = self._create_history("TestHistoryRecent")["id"]
        show_response = self._get("histories/most_recently_used").json()
        assert show_response["id"] == history_id

    def test_index_order(self):
        slightly_older_history_id = self._create_history("TestHistorySlightlyOlder")["id"]
        newer_history_id = self._create_history("TestHistoryNewer")["id"]
        index_response = self._get("histories").json()
        assert index_response[0]["id"] == newer_history_id
        assert index_response[1]["id"] == slightly_older_history_id

    def test_delete(self):
        # Setup a history and ensure it is in the index
        history_id = self._create_history("TestHistoryForDelete")["id"]
        index_response = self._get("histories").json()
        assert index_response[0]["id"] == history_id

        show_response = self._show(history_id)
        assert not show_response["deleted"]

        # Delete the history
        self._delete("histories/%s" % history_id)

        # Check can view it - but it is deleted
        show_response = self._show(history_id)
        assert show_response["deleted"]

        # Verify it is dropped from history index
        index_response = self._get("histories").json()
        assert len(index_response) == 0 or index_response[0]["id"] != history_id

        # Add deleted filter to index to view it
        index_response = self._get("histories", {"deleted": "true"}).json()
        assert index_response[0]["id"] == history_id

    def test_purge(self):
        history_id = self._create_history("TestHistoryForPurge")["id"]
        data = {'purge': True}
        self._delete("histories/%s" % history_id, data=data)
        show_response = self._show(history_id)
        assert show_response["deleted"]
        assert show_response["purged"]

    def test_undelete(self):
        history_id = self._create_history("TestHistoryForDeleteAndUndelete")["id"]
        self._delete("histories/%s" % history_id)
        self._post("histories/deleted/%s/undelete" % history_id)
        show_response = self._show(history_id)
        assert not show_response["deleted"]

    def test_update(self):
        history_id = self._create_history("TestHistoryForUpdating")["id"]

        self._update(history_id, {"name": "New Name"})
        show_response = self._show(history_id)
        assert show_response["name"] == "New Name"

        unicode_name = u'桜ゲノム'
        self._update(history_id, {"name": unicode_name})
        show_response = self._show(history_id)
        assert show_response["name"] == unicode_name, show_response

        quoted_name = "'MooCow'"
        self._update(history_id, {"name": quoted_name})
        show_response = self._show(history_id)
        assert show_response["name"] == quoted_name

        self._update(history_id, {"deleted": True})
        show_response = self._show(history_id)
        assert show_response["deleted"], show_response

        self._update(history_id, {"deleted": False})
        show_response = self._show(history_id)
        assert not show_response["deleted"]

        self._update(history_id, {"published": True})
        show_response = self._show(history_id)
        assert show_response["published"]

        self._update(history_id, {"genome_build": "hg18"})
        show_response = self._show(history_id)
        assert show_response["genome_build"] == "hg18"

        self._update(history_id, {"annotation": "The annotation is cool"})
        show_response = self._show(history_id)
        assert show_response["annotation"] == "The annotation is cool"

        self._update(history_id, {"annotation": unicode_name})
        show_response = self._show(history_id)
        assert show_response["annotation"] == unicode_name, show_response

        self._update(history_id, {"annotation": quoted_name})
        show_response = self._show(history_id)
        assert show_response["annotation"] == quoted_name

    def test_update_invalid_attribute(self):
        history_id = self._create_history("TestHistoryForInvalidUpdating")["id"]
        put_response = self._update(history_id, {"invalidkey": "moo"})
        assert "invalidkey" not in put_response.json()

    def test_update_invalid_types(self):
        history_id = self._create_history("TestHistoryForUpdatingInvalidTypes")["id"]
        for str_key in ["name", "annotation"]:
            assert self._update(history_id, {str_key: False}).status_code == 400

        for bool_key in ['deleted', 'importable', 'published']:
            assert self._update(history_id, {bool_key: "a string"}).status_code == 400

        assert self._update(history_id, {"tags": "a simple string"}).status_code == 400
        assert self._update(history_id, {"tags": [True]}).status_code == 400

    def test_invalid_keys(self):
        invalid_history_id = "1234123412341234"

        assert self._get("histories/%s" % invalid_history_id).status_code == 400
        assert self._update(invalid_history_id, {"name": "new name"}).status_code == 400
        assert self._delete("histories/%s" % invalid_history_id).status_code == 400
        assert self._post("histories/deleted/%s/undelete" % invalid_history_id).status_code == 400

    def test_create_anonymous_fails(self):
        post_data = dict(name="CannotCreate")
        # Using lower-level _api_url will cause key to not be injected.
        histories_url = self._api_url("histories")
        create_response = post(url=histories_url, data=post_data)
        self._assert_status_code_is(create_response, 403)

    def test_import_export(self):
        history_name = "for_export"
        history_id = self.dataset_populator.new_history(name=history_name)
        self.dataset_populator.new_dataset(history_id, content="1 2 3")
        imported_history_id = self._reimport_history(history_id, history_name)

        contents_response = self._get("histories/%s/contents" % imported_history_id)
        self._assert_status_code_is(contents_response, 200)
        contents = contents_response.json()
        assert len(contents) == 1
        imported_content = self.dataset_populator.get_history_dataset_content(
            history_id=imported_history_id,
            dataset_id=contents[0]["id"]
        )
        assert imported_content == "1 2 3\n"

    def test_import_export_collection(self):
        from nose.plugins.skip import SkipTest
        raise SkipTest("Collection import/export not yet implemented")

        history_name = "for_export_with_collections"
        history_id = self.dataset_populator.new_history(name=history_name)
        self.dataset_collection_populator.create_list_in_history(history_id, contents=["Hello", "World"])

        imported_history_id = self._reimport_history(history_id, history_name)

        contents_response = self._get("histories/%s/contents" % imported_history_id)
        self._assert_status_code_is(contents_response, 200)
        contents = contents_response.json()
        assert len(contents) == 3

    def _reimport_history(self, history_id, history_name):
        # Ensure the history is ready to go...
        self.dataset_populator.wait_for_history(history_id, assert_ok=True)

        # Export the history.
        download_path = self._export(history_id)

        # Create download for history
        full_download_url = "%s%s?key=%s" % (self.url, download_path, self.galaxy_interactor.api_key)
        download_response = get(full_download_url)
        self._assert_status_code_is(download_response, 200)

        def history_names():
            history_index = self._get("histories")
            return dict((h["name"], h) for h in history_index.json())

        import_name = "imported from archive: %s" % history_name
        assert import_name not in history_names()

        import_data = dict(archive_source=full_download_url, archive_type="url")
        import_response = self._post("histories", data=import_data)

        self._assert_status_code_is(import_response, 200)

        def has_history_with_name():
            histories = history_names()
            return histories.get(import_name, None)

        imported_history = wait_on(has_history_with_name, desc="import history")
        imported_history_id = imported_history["id"]
        self.dataset_populator.wait_for_history(imported_history_id)

        return imported_history_id

    def test_create_tag(self):
        post_data = dict(name="TestHistoryForTag")
        history_id = self._post("histories", data=post_data).json()["id"]
        tag_data = dict(value="awesometagvalue")
        tag_url = "histories/%s/tags/awesometagname" % history_id
        tag_create_response = self._post(tag_url, data=tag_data)
        self._assert_status_code_is(tag_create_response, 200)

    def _export(self, history_id):
        export_url = self._api_url("histories/%s/exports" % history_id, use_key=True)
        put_response = put(export_url)
        self._assert_status_code_is(put_response, 202)

        def export_ready_response():
            put_response = put(export_url)
            if put_response.status_code == 202:
                return None
            return put_response

        put_response = wait_on(export_ready_response, desc="export ready")
        self._assert_status_code_is(put_response, 200)
        response = put_response.json()
        self._assert_has_keys(response, "download_url")
        download_path = response["download_url"]
        return download_path

    def _show(self, history_id):
        return self._get("histories/%s" % history_id).json()

    def _update(self, history_id, data):
        update_url = self._api_url("histories/%s" % history_id, use_key=True)
        put_response = put(update_url, json=data)
        return put_response

    def _create_history(self, name):
        post_data = dict(name=name)
        create_response = self._post("histories", data=post_data).json()
        self._assert_has_keys(create_response, "name", "id")
        self.assertEquals(create_response["name"], name)
        return create_response
Beispiel #46
0
class ScriptsIntegrationTestCase(integration_util.IntegrationTestCase):
    def setUp(self):
        super(ScriptsIntegrationTestCase, self).setUp()
        self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
        self.config_dir = tempfile.mkdtemp()

    @classmethod
    def handle_galaxy_config_kwds(cls, config):
        cls._raw_config = config

    def test_helper(self):
        script = "helper.py"
        self._scripts_check_argparse_help(script)

        history_id = self.dataset_populator.new_history()
        dataset = self.dataset_populator.new_dataset(history_id, wait=True)
        dataset_id = dataset["id"]
        config_file = self.write_config_file()
        output = self._scripts_check_output(
            script, ["-c", config_file, "--decode-id", dataset_id])
        assert "Decoded " in output

    def test_cleanup(self):
        script = "cleanup_datasets/cleanup_datasets.py"
        self._scripts_check_argparse_help(script)

        history_id = self.dataset_populator.new_history()
        delete_response = self.dataset_populator._delete("histories/%s" %
                                                         history_id)
        assert delete_response.status_code == 200
        assert delete_response.json()["purged"] is False
        config_file = self.write_config_file()
        output = self._scripts_check_output(
            script, ["-c", config_file, "--days", "0", "--purge_histories"])
        print(output)
        history_response = self.dataset_populator._get("histories/%s" %
                                                       history_id)
        assert history_response.status_code == 200
        assert history_response.json(
        )["purged"] is True, history_response.json()

    def test_pgcleanup(self):
        self._skip_if_not_postgres()

        script = "cleanup_datasets/pgcleanup.py"
        self._scripts_check_argparse_help(script)

        history_id = self.dataset_populator.new_history()
        delete_response = self.dataset_populator._delete("histories/%s" %
                                                         history_id)
        assert delete_response.status_code == 200
        assert delete_response.json()["purged"] is False
        config_file = self.write_config_file()
        output = self._scripts_check_output(script, [
            "-c", config_file, "--older-than", "0", "--sequence",
            "purge_deleted_histories"
        ])
        print(output)
        history_response = self.dataset_populator._get("histories/%s" %
                                                       history_id)
        assert history_response.status_code == 200
        assert history_response.json(
        )["purged"] is True, history_response.json()

    def test_set_user_disk_usage(self):
        script = "set_user_disk_usage.py"
        self._scripts_check_argparse_help(script)

        history_id = self.dataset_populator.new_history()
        self.dataset_populator.new_dataset(history_id, wait=True)
        config_file = self.write_config_file()
        output = self._scripts_check_output(script, ["-c", config_file])
        # verify the script runs to completion without crashing
        assert "100% complete" in output, output

    def test_set_dataset_sizes(self):
        script = "set_dataset_sizes.py"
        self._scripts_check_argparse_help(script)

        # TODO: change the size of the dataset and verify this works.
        history_id = self.dataset_populator.new_history()
        self.dataset_populator.new_dataset(history_id, wait=True)
        config_file = self.write_config_file()
        output = self._scripts_check_output(script, ["-c", config_file])
        # verify the script runs to completion without crashing
        assert "Completed 100%" in output, output

    def test_populate_uuid(self):
        script = "cleanup_datasets/populate_uuid.py"
        self._scripts_check_argparse_help(script)

        history_id = self.dataset_populator.new_history()
        self.dataset_populator.new_dataset(history_id, wait=True)
        config_file = self.write_config_file()
        output = self._scripts_check_output(script, ["-c", config_file])
        assert "Complete" in output

    @integration_util.skip_if_jenkins
    def test_grt_export(self):
        script = "grt/export.py"
        self._scripts_check_argparse_help(script)

        history_id = self.dataset_populator.new_history()
        self.dataset_populator.new_dataset(history_id, wait=True)
        config_file = self.write_config_file()
        grt_config_file = os.path.join(self.config_dir, "grt.yml")
        with open(grt_config_file, "w") as f:
            yaml.dump(
                {
                    "grt": {
                        "share_toolbox": True
                    },
                    "sanitization": {
                        "tools": []
                    },
                    "tool_params": {}
                }, f)
        self._scripts_check_output(
            script,
            ["-c", config_file, "-g", grt_config_file, "-r", self.config_dir])
        report_files = os.listdir(self.config_dir)
        json_files = [j for j in report_files if j.endswith(".json")]
        assert len(json_files
                   ) == 1, "Expected one json report file in [%s]" % json_files
        json_file = os.path.join(self.config_dir, json_files[0])
        with open(json_file, "r") as f:
            export = json.load(f)
        assert export["version"] == 1

    def test_admin_cleanup_datasets(self):
        self._scripts_check_argparse_help(
            "cleanup_datasets/admin_cleanup_datasets.py")

    @skip_unless_module("flask_socketio")
    def test_communication_server(self):
        self._scripts_check_argparse_help(
            "communication/communication_server.py")

    def test_secret_decoder_ring(self):
        script = "secret_decoder_ring.py"
        self._scripts_check_argparse_help(script)

        config_file = self.write_config_file()
        output = self._scripts_check_output(script,
                                            ["-c", config_file, "encode", "1"])
        encoded_id = output.strip()

        output = self._scripts_check_output(
            script, ["-c", config_file, "decode", encoded_id])
        assert output.strip() == "1"

    def test_database_scripts(self):
        self._scripts_check_argparse_help("create_db.py")
        self._scripts_check_argparse_help("manage_db.py")
        # TODO: test creating a smaller database - e.g. tool install database based on fresh
        # config file.

    def test_runtime_stats(self):
        self._skip_if_not_postgres()
        self._scripts_check_argparse_help("runtime_stats.py")

    def _skip_if_not_postgres(self):
        if not self._app.config.database_connection.startswith("post"):
            raise unittest.SkipTest("Test only valid for postgres")

    def _scripts_check_argparse_help(self, script):
        # Test imports and argparse repsonse to --help with 0 exit code.
        output = self._scripts_check_output(script, ["--help"])
        # Test -h, --help in printed output message.
        assert "-h, --help" in output

    def _scripts_check_output(self, script, args):
        cwd = galaxy_directory()
        cmd = ["python", os.path.join(cwd, "scripts", script)] + args
        clean_env = {
            "PATH": os.environ.get("PATH", None),
        }  # Don't let testing environment variables interfere with config.
        return subprocess.check_output(cmd, cwd=cwd, env=clean_env)

    def write_config_file(self):
        config_dir = self.config_dir
        path = os.path.join(config_dir, "galaxy.yml")
        self._test_driver.temp_directories.extend([config_dir])
        with open(path, "w") as f:
            yaml.dump({"galaxy": self._raw_config}, f)

        return path