Ejemplo n.º 1
0
 def setUp(self):
     super(TestStager, self).setUp()
     from .test_utils import get_test_tool
     self.tool = get_test_tool()
     self.client = MockClient(self.temp_directory, self.tool)
     inputs = self.__setup_inputs()
     self.client_job_description = ClientJobDescription(
         tool=self.tool,
         command_line="run_test.exe",
         config_files=[],
         input_files=inputs,
         client_outputs=ClientOutputs("/galaxy/database/working_directory/1", []),
         working_directory="/galaxy/database/working_directory/1",
         dependencies_description=DependenciesDescription(requirements=[TEST_REQUIREMENT_1, TEST_REQUIREMENT_2]),
         env=[TEST_ENV_1],
         rewrite_paths=False,
     )
     self.job_config = dict(
         configs_directory="/pulsar/staging/1/configs",
         working_directory="/pulsar/staging/1/working",
         outputs_directory="/pulsar/staging/1/outputs",
         system_properties=dict(
             separator="\\",
         ),
     )
Ejemplo n.º 2
0
    def queue_job(self, job_wrapper):
        job_destination = job_wrapper.job_destination
        self._populate_parameter_defaults(job_destination)

        command_line, client, remote_job_config, compute_environment = self.__prepare_job(
            job_wrapper, job_destination)

        if not command_line:
            return

        try:
            dependencies_description = PulsarJobRunner.__dependencies_description(
                client, job_wrapper)
            rewrite_paths = not PulsarJobRunner.__rewrite_parameters(client)
            unstructured_path_rewrites = {}
            output_names = []
            if compute_environment:
                unstructured_path_rewrites = compute_environment.unstructured_path_rewrites
                output_names = compute_environment.output_names()

            if self.app.config.metadata_strategy == "legacy":
                # Drop this branch in 19.09.
                metadata_directory = job_wrapper.working_directory
            else:
                metadata_directory = os.path.join(
                    job_wrapper.working_directory, "metadata")

            client_job_description = ClientJobDescription(
                command_line=command_line,
                input_files=self.get_input_files(job_wrapper),
                client_outputs=self.__client_outputs(client, job_wrapper),
                working_directory=job_wrapper.tool_working_directory,
                metadata_directory=metadata_directory,
                tool=job_wrapper.tool,
                config_files=job_wrapper.extra_filenames,
                dependencies_description=dependencies_description,
                env=client.env,
                rewrite_paths=rewrite_paths,
                arbitrary_files=unstructured_path_rewrites,
                touch_outputs=output_names,
            )
            job_id = pulsar_submit_job(client, client_job_description,
                                       remote_job_config)
            log.info("Pulsar job submitted with job_id %s" % job_id)
            job_wrapper.set_job_destination(job_destination, job_id)
            job_wrapper.change_state(model.Job.states.QUEUED)
        except Exception:
            job_wrapper.fail("failure running job", exception=True)
            log.exception("failure running job %d", job_wrapper.job_id)
            return

        pulsar_job_state = AsynchronousJobState()
        pulsar_job_state.job_wrapper = job_wrapper
        pulsar_job_state.job_id = job_id
        pulsar_job_state.old_state = True
        pulsar_job_state.running = False
        pulsar_job_state.job_destination = job_destination
        self.monitor_job(pulsar_job_state)
Ejemplo n.º 3
0
def _run_client_for_job(args):
    if args.job_id is None:
        args.job_id = str(uuid.uuid4())
    output_patterns = []
    output_patterns.extend(args.output_pattern)
    for output in args.output:
        output_patterns.append(fnmatch.translate(output))

    client_options = extract_client_options(args)
    client, client_manager = client_info(args, client_options)
    try:
        working_directory = args.working_directory
        client_outputs = ClientOutputs(
            working_directory=working_directory,
            dynamic_outputs=output_patterns,
        )
        job_description = ClientJobDescription(
            command_line=args.command,
            working_directory=working_directory,
            client_outputs=client_outputs,
        )
        submit_job(client, job_description)
        waiter = Waiter(client, client_manager)
        result_status = waiter.wait()
        pulsar_outputs = PulsarOutputs.from_status_response(result_status)
        if args.result_json:
            open(args.result_json, "w").write(json_dumps(result_status))
        finish_args = dict(
            client=client,
            job_completed_normally=True,
            cleanup_job=args.cleanup,
            client_outputs=client_outputs,
            pulsar_outputs=pulsar_outputs,
        )
        failed = finish_job(**finish_args)
        return failed
    finally:
        client_manager.shutdown()
Ejemplo n.º 4
0
def run(options):
    waiter = None
    try:
        temp_directory = tempfile.mkdtemp()
        temp_index_dir = os.path.join(temp_directory, "idx", "bwa")
        temp_index_dir_sibbling = os.path.join(temp_directory, "idx", "seq")
        temp_shared_dir = os.path.join(temp_directory, "shared", "test1")
        temp_work_dir = os.path.join(temp_directory, "w")
        temp_tool_dir = os.path.join(temp_directory, "t")

        __makedirs([temp_tool_dir, temp_work_dir, temp_index_dir, temp_index_dir_sibbling, temp_shared_dir])

        temp_input_path = os.path.join(temp_directory, "dataset_0.dat")
        temp_input_extra_path = os.path.join(temp_directory, "dataset_0_files", "input_subdir", "extra")
        temp_index_path = os.path.join(temp_index_dir, "human.fa")

        temp_config_path = os.path.join(temp_work_dir, "config.txt")
        temp_tool_path = os.path.join(temp_directory, "t", "script.py")
        temp_output_path = os.path.join(temp_directory, "dataset_1.dat")
        temp_output2_path = os.path.join(temp_directory, "dataset_2.dat")
        temp_output3_path = os.path.join(temp_directory, "dataset_3.dat")
        temp_output4_path = os.path.join(temp_directory, "dataset_4.dat")
        temp_version_output_path = os.path.join(temp_directory, "GALAXY_VERSION_1234")
        temp_output_workdir_destination = os.path.join(temp_directory, "dataset_77.dat")
        temp_output_workdir = os.path.join(temp_work_dir, "env_test")

        temp_output_workdir_destination2 = os.path.join(temp_directory, "dataset_78.dat")
        temp_output_workdir2 = os.path.join(temp_work_dir, "rewrite_action_test")

        __write_to_file(temp_input_path, b"Hello world input!!@!")
        __write_to_file(temp_input_extra_path, b"INPUT_EXTRA_CONTENTS")
        __write_to_file(temp_config_path, EXPECTED_OUTPUT)
        __write_to_file(temp_tool_path, TEST_SCRIPT)
        __write_to_file(temp_index_path, b"AGTC")
        # Implicit files that should also get transferred since depth > 0
        __write_to_file("%s.fai" % temp_index_path, b"AGTC")
        __write_to_file(os.path.join(temp_index_dir_sibbling, "human_full_seqs"), b"AGTC")

        empty_input = u"/foo/bar/x"

        test_unicode = getattr(options, "test_unicode", False)  # TODO Switch this in integration tests
        cmd_text = EXAMPLE_UNICODE_TEXT if test_unicode else "Hello World"
        command_line_params = (
            temp_tool_path,
            temp_config_path,
            temp_input_path,
            temp_output_path,
            empty_input,
            temp_output2_path,
            cmd_text,
            temp_output3_path,
            temp_input_extra_path,
            temp_version_output_path,
            temp_index_path,
            temp_output4_path,
            temp_shared_dir,
        )
        assert os.path.exists(temp_index_path)
        command_line = u'python %s "%s" "%s" "%s" "%s" "%s" "%s" "%s" "%s" "%s" "%s" "%s" "%s"' % command_line_params
        config_files = [temp_config_path]
        input_files = [temp_input_path, empty_input]
        output_files = [
            temp_output_path,
            temp_output2_path,
            temp_output3_path,
            temp_output4_path,
            temp_output_workdir_destination,
            temp_output_workdir_destination2
        ]
        client, client_manager = __client(temp_directory, options)
        waiter = Waiter(client, client_manager)
        client_outputs = ClientOutputs(
            working_directory=temp_work_dir,
            work_dir_outputs=[
                (temp_output_workdir, temp_output_workdir_destination),
                (temp_output_workdir2, temp_output_workdir_destination2),
            ],
            output_files=output_files,
            version_file=temp_version_output_path,
        )

        job_description = ClientJobDescription(
            command_line=command_line,
            tool=MockTool(temp_tool_dir),
            config_files=config_files,
            input_files=input_files,
            client_outputs=client_outputs,
            working_directory=temp_work_dir,
            **__extra_job_description_kwargs(options)
        )
        submit_job(client, job_description)
        result_status = waiter.wait()

        __finish(options, client, client_outputs, result_status)
        __assert_contents(temp_output_path, EXPECTED_OUTPUT, result_status)
        __assert_contents(temp_output2_path, cmd_text, result_status)
        __assert_contents(os.path.join(temp_work_dir, "galaxy.json"), b"GALAXY_JSON", result_status)
        __assert_contents(os.path.join(temp_directory, "dataset_1_files", "extra"), b"EXTRA_OUTPUT_CONTENTS", result_status)
        if getattr(options, "test_rewrite_action", False):
            __assert_contents(temp_output_workdir_destination2, os.path.join(temp_directory, "shared2", "test1"), result_status)
        if job_description.env:
            __assert_contents(temp_output_workdir_destination, b"TEST_ENV_VALUE", result_status)
        __assert_contents(temp_version_output_path, b"1.0.1", result_status)
        if job_description.dependencies_description:
            __assert_contents(temp_output3_path, "moo_override", result_status)
        else:
            __assert_contents(temp_output3_path, "moo_default", result_status)
        if client.default_file_action != "none":
            rewritten_index_path = open(temp_output4_path, 'r', encoding='utf-8').read()
            # Path written to this file will differ between Windows and Linux.
            assert re.search(r"123456[/\\]unstructured[/\\]\w+[/\\]bwa[/\\]human.fa", rewritten_index_path) is not None
        __exercise_errors(options, client, temp_output_path, temp_directory)
        client_manager.shutdown()
    except BaseException:
        if not options.suppress_output:
            traceback.print_exc()
        raise
    finally:
        if waiter is not None:
            waiter.shutdown()
        shutil.rmtree(temp_directory)
Ejemplo n.º 5
0
def run(options):
    waiter = None
    try:
        temp_directory = tempfile.mkdtemp()
        temp_index_dir = os.path.join(temp_directory, "idx", "bwa")
        temp_index_dir_sibbling = os.path.join(temp_directory, "idx", "seq")
        temp_shared_dir = os.path.join(temp_directory, "shared", "test1")
        temp_work_dir = os.path.join(temp_directory, "w")
        temp_metadata_dir = os.path.join(temp_directory, "m")
        temp_false_working_dir = os.path.join(temp_metadata_dir, "working")
        temp_tool_dir = os.path.join(temp_directory, "t")

        __makedirs([
            temp_tool_dir,
            temp_work_dir,
            temp_index_dir,
            temp_index_dir_sibbling,
            temp_shared_dir,
            temp_metadata_dir,
            temp_false_working_dir,
        ])

        temp_input_path = os.path.join(temp_directory, "dataset_0.dat")
        temp_input_extra_path = os.path.join(temp_directory, "dataset_0_files",
                                             "input_subdir", "extra")
        temp_input_metadata_path = os.path.join(temp_directory, "metadata",
                                                "12312231231231.dat")
        temp_index_path = os.path.join(temp_index_dir, "human.fa")

        temp_config_path = os.path.join(temp_work_dir, "config.txt")
        temp_tool_path = os.path.join(temp_directory, "t", "script.py")
        temp_output_path = os.path.join(temp_directory, "dataset_1.dat")
        temp_output2_path = os.path.join(temp_directory, "dataset_2.dat")
        temp_output3_path = os.path.join(temp_directory, "dataset_3.dat")
        temp_output4_path = os.path.join(temp_directory, "dataset_4.dat")
        temp_version_output_path = os.path.join(temp_directory,
                                                "GALAXY_VERSION_1234")
        temp_output_workdir_destination = os.path.join(temp_directory,
                                                       "dataset_77.dat")
        temp_output_workdir = os.path.join(temp_work_dir, "env_test")
        temp_metadata_path = os.path.join(temp_metadata_dir,
                                          "metadata_test123")

        temp_output_workdir_destination2 = os.path.join(
            temp_directory, "dataset_78.dat")
        temp_output_workdir2 = os.path.join(temp_work_dir,
                                            "rewrite_action_test")

        __write_to_file(temp_input_path, b"Hello world input!!@!")
        __write_to_file(temp_input_extra_path, b"INPUT_EXTRA_CONTENTS")
        __write_to_file(temp_input_metadata_path,
                        b"INPUT METADATA CONTENTS...")
        __write_to_file(temp_config_path, EXPECTED_OUTPUT)
        __write_to_file(temp_metadata_path, "meta input")
        __write_to_file(temp_tool_path, TEST_SCRIPT)
        __write_to_file(temp_index_path, b"AGTC")
        # Implicit files that should also get transferred since depth > 0
        __write_to_file("%s.fai" % temp_index_path, b"AGTC")
        __write_to_file(
            os.path.join(temp_index_dir_sibbling, "human_full_seqs"), b"AGTC")

        empty_input = u"/foo/bar/x"

        test_unicode = getattr(options, "test_unicode",
                               False)  # TODO Switch this in integration tests
        cmd_text = EXAMPLE_UNICODE_TEXT if test_unicode else "Hello World"
        command_line_params = (
            temp_tool_path,
            temp_config_path,
            temp_input_path,
            temp_output_path,
            empty_input,
            temp_output2_path,
            cmd_text,
            temp_output3_path,
            temp_input_extra_path,
            temp_version_output_path,
            temp_index_path,
            temp_output4_path,
            temp_shared_dir,
            temp_metadata_path,
            temp_input_metadata_path,
        )
        assert os.path.exists(temp_index_path)
        command_line = u'python %s "%s" "%s" "%s" "%s" "%s" "%s" "%s" "%s" "%s" "%s" "%s" "%s" "%s" "%s"' % command_line_params
        config_files = [temp_config_path]
        client_inputs = []
        client_inputs.append(
            ClientInput(temp_input_path, CLIENT_INPUT_PATH_TYPES.INPUT_PATH))
        client_inputs.append(
            ClientInput(temp_input_path, CLIENT_INPUT_PATH_TYPES.INPUT_PATH))
        # Reverting empty input handling added in:
        #  https://github.com/galaxyproject/pulsar/commit/2fb36ba979cf047a595c53cdef833cae79cbb380
        # Seems like it really should cause a failure.
        # client_inputs.append(ClientInput(empty_input, CLIENT_INPUT_PATH_TYPES.INPUT_PATH))
        client_inputs.append(
            ClientInput(os.path.join(temp_directory, "dataset_0_files"),
                        CLIENT_INPUT_PATH_TYPES.INPUT_EXTRA_FILES_PATH))
        client_inputs.append(
            ClientInput(temp_input_metadata_path,
                        CLIENT_INPUT_PATH_TYPES.INPUT_METADATA_PATH))
        output_files = [
            temp_output_path, temp_output2_path, temp_output3_path,
            temp_output4_path, temp_output_workdir_destination,
            temp_output_workdir_destination2
        ]
        client, client_manager = __client(temp_directory, options)
        waiter = Waiter(client, client_manager)
        client_outputs = ClientOutputs(
            working_directory=temp_work_dir,
            metadata_directory=temp_metadata_dir,
            work_dir_outputs=[
                (temp_output_workdir, temp_output_workdir_destination),
                (temp_output_workdir2, temp_output_workdir_destination2),
            ],
            output_files=output_files,
            version_file=temp_version_output_path,
        )

        job_description = ClientJobDescription(
            command_line=command_line,
            tool=MockTool(temp_tool_dir),
            config_files=config_files,
            client_inputs=ClientInputs(client_inputs),
            client_outputs=client_outputs,
            working_directory=temp_work_dir,
            metadata_directory=temp_metadata_dir,
            **__extra_job_description_kwargs(options))
        submit_job(client, job_description)
        result_status = waiter.wait()

        stdout = result_status["stdout"].strip()
        assert "stdout output".startswith(stdout)
        if hasattr(options, "maximum_stream_size"):
            assert len(stdout) == options.maximum_stream_size

        __finish(options, client, client_outputs, result_status)
        __assert_contents(temp_output_path, EXPECTED_OUTPUT, result_status)
        __assert_contents(temp_output2_path, cmd_text, result_status)
        __assert_contents(os.path.join(temp_work_dir, "galaxy.json"),
                          b"GALAXY_JSON", result_status)
        __assert_contents(
            os.path.join(temp_directory, "dataset_1_files", "extra"),
            b"EXTRA_OUTPUT_CONTENTS", result_status)
        __assert_contents(os.path.join(temp_metadata_dir, "metadata_output"),
                          b"meta output", result_status)
        if getattr(options, "test_rewrite_action", False):
            __assert_contents(temp_output_workdir_destination2,
                              os.path.join(temp_directory, "shared2", "test1"),
                              result_status)
        if job_description.env:
            __assert_contents(temp_output_workdir_destination,
                              b"TEST_ENV_VALUE", result_status)
        __assert_contents(temp_version_output_path, b"1.0.1", result_status)
        if job_description.dependencies_description:
            __assert_contents(temp_output3_path, "moo_override", result_status)
        else:
            __assert_contents(temp_output3_path, "moo_default", result_status)
        __assert_has_rewritten_bwa_path(client, temp_output4_path)
        __exercise_errors(options, client, temp_output_path, temp_directory)
        client_manager.shutdown()
    except BaseException:
        if not options.suppress_output:
            traceback.print_exc()
        raise
    finally:
        if waiter is not None:
            waiter.shutdown()
        shutil.rmtree(temp_directory)
Ejemplo n.º 6
0
    def queue_job(self, job_wrapper):
        job_destination = job_wrapper.job_destination
        self._populate_parameter_defaults(job_destination)

        command_line, client, remote_job_config, compute_environment, remote_container = self.__prepare_job(job_wrapper, job_destination)

        if not command_line:
            return

        try:
            dependencies_description = PulsarJobRunner.__dependencies_description(client, job_wrapper)
            rewrite_paths = not PulsarJobRunner.__rewrite_parameters(client)
            path_rewrites_unstructured = {}
            output_names = []
            if compute_environment:
                path_rewrites_unstructured = compute_environment.path_rewrites_unstructured
                output_names = compute_environment.output_names()

                client_inputs_list = []
                for input_dataset_wrapper in job_wrapper.get_input_paths():
                    # str here to resolve false_path if set on a DatasetPath object.
                    path = str(input_dataset_wrapper)
                    object_store_ref = {
                        "dataset_id": input_dataset_wrapper.dataset_id,
                        "dataset_uuid": str(input_dataset_wrapper.dataset_uuid),
                        "object_store_id": input_dataset_wrapper.object_store_id,
                    }
                    client_inputs_list.append(ClientInput(path, CLIENT_INPUT_PATH_TYPES.INPUT_PATH, object_store_ref=object_store_ref))

                for input_extra_path in compute_environment.path_rewrites_input_extra.keys():
                    # TODO: track dataset for object_Store_ref...
                    client_inputs_list.append(ClientInput(input_extra_path, CLIENT_INPUT_PATH_TYPES.INPUT_EXTRA_FILES_PATH))

                for input_metadata_path in compute_environment.path_rewrites_input_metadata.keys():
                    # TODO: track dataset for object_Store_ref...
                    client_inputs_list.append(ClientInput(input_metadata_path, CLIENT_INPUT_PATH_TYPES.INPUT_METADATA_PATH))

                input_files = None
                client_inputs = ClientInputs(client_inputs_list)
            else:
                input_files = self.get_input_files(job_wrapper)
                client_inputs = None

            if self.app.config.metadata_strategy == "legacy":
                # Drop this branch in 19.09.
                metadata_directory = job_wrapper.working_directory
            else:
                metadata_directory = os.path.join(job_wrapper.working_directory, "metadata")

            remote_pulsar_app_config = job_destination.params.get("pulsar_app_config", {})
            job_directory_files = []
            config_files = job_wrapper.extra_filenames
            tool_script = os.path.join(job_wrapper.working_directory, "tool_script.sh")
            if os.path.exists(tool_script):
                log.debug("Registering tool_script for Pulsar transfer [%s]" % tool_script)
                job_directory_files.append(tool_script)
            client_job_description = ClientJobDescription(
                command_line=command_line,
                input_files=input_files,
                client_inputs=client_inputs,  # Only one of these input defs should be non-None
                client_outputs=self.__client_outputs(client, job_wrapper),
                working_directory=job_wrapper.tool_working_directory,
                metadata_directory=metadata_directory,
                tool=job_wrapper.tool,
                config_files=config_files,
                dependencies_description=dependencies_description,
                env=client.env,
                rewrite_paths=rewrite_paths,
                arbitrary_files=path_rewrites_unstructured,
                touch_outputs=output_names,
                remote_pulsar_app_config=remote_pulsar_app_config,
                job_directory_files=job_directory_files,
                container=None if not remote_container else remote_container.container_id,
            )
            job_id = pulsar_submit_job(client, client_job_description, remote_job_config)
            log.info("Pulsar job submitted with job_id %s" % job_id)
            job_wrapper.set_job_destination(job_destination, job_id)
            job_wrapper.change_state(model.Job.states.QUEUED)
        except Exception:
            job_wrapper.fail("failure running job", exception=True)
            log.exception("failure running job %d", job_wrapper.job_id)
            return

        pulsar_job_state = AsynchronousJobState()
        pulsar_job_state.job_wrapper = job_wrapper
        pulsar_job_state.job_id = job_id
        pulsar_job_state.old_state = True
        pulsar_job_state.running = False
        pulsar_job_state.job_destination = job_destination
        self.monitor_job(pulsar_job_state)