Ejemplo n.º 1
0
    def __command_line(self, job_wrapper):
        """
        """
        command_line = job_wrapper.runner_command_line

        # slots would be cleaner name, but don't want deployers to see examples and think it
        # is going to work with other job runners.
        slots = job_wrapper.job_destination.params.get(
            "local_slots", None) or os.environ.get("GALAXY_SLOTS", None)
        if slots:
            slots_statement = 'GALAXY_SLOTS="%d"; export GALAXY_SLOTS; GALAXY_SLOTS_CONFIGURED="1"; export GALAXY_SLOTS_CONFIGURED;' % (
                int(slots))
        else:
            slots_statement = 'GALAXY_SLOTS="1"; export GALAXY_SLOTS;'

        job_id = job_wrapper.get_id_tag()
        job_file = JobState.default_job_file(job_wrapper.working_directory,
                                             job_id)
        exit_code_path = default_exit_code_file(job_wrapper.working_directory,
                                                job_id)
        job_script_props = {
            'slots_statement': slots_statement,
            'command': command_line,
            'exit_code_path': exit_code_path,
            'working_directory': job_wrapper.working_directory,
            'shell': job_wrapper.shell,
        }
        job_file_contents = self.get_job_file(job_wrapper, **job_script_props)
        self.write_executable_script(job_file, job_file_contents)
        return job_file, exit_code_path
Ejemplo n.º 2
0
 def set_defaults(self, files_dir):
     if self.job_wrapper is not None:
         id_tag = self.job_wrapper.get_id_tag()
         if files_dir is not None:
             self.job_file = JobState.default_job_file(files_dir, id_tag)
             self.output_file = os.path.join(files_dir, 'galaxy_%s.o' % id_tag)
             self.error_file = os.path.join(files_dir, 'galaxy_%s.e' % id_tag)
             self.exit_code_file = default_exit_code_file(files_dir, id_tag)
         job_name = 'g%s' % id_tag
         if self.job_wrapper.tool.old_id:
             job_name += '_%s' % self.job_wrapper.tool.old_id
         if not self.redact_email_in_job_name and self.job_wrapper.user:
             job_name += '_%s' % self.job_wrapper.user
         self.job_name = ''.join(x if x in (string.ascii_letters + string.digits + '_') else '_' for x in job_name)
Ejemplo n.º 3
0
 def set_defaults(self, files_dir):
     if self.job_wrapper is not None:
         id_tag = self.job_wrapper.get_id_tag()
         if files_dir is not None:
             self.job_file = JobState.default_job_file(files_dir, id_tag)
             self.output_file = os.path.join(files_dir,
                                             f'galaxy_{id_tag}.o')
             self.error_file = os.path.join(files_dir, f'galaxy_{id_tag}.e')
             self.exit_code_file = default_exit_code_file(files_dir, id_tag)
         job_name = f'g{id_tag}'
         if self.job_wrapper.tool.old_id:
             job_name += f'_{self.job_wrapper.tool.old_id}'
         if not self.redact_email_in_job_name and self.job_wrapper.user:
             job_name += f'_{self.job_wrapper.user}'
         self.job_name = ''.join(
             x if x in (f"{string.ascii_letters + string.digits}_") else '_'
             for x in job_name)
Ejemplo n.º 4
0
    def queue_job(self, job_wrapper):
        if not self._prepare_job_local(job_wrapper):
            return

        stderr = stdout = ''

        # command line has been added to the wrapper by prepare_job()
        job_file, exit_code_path = self.__command_line(job_wrapper)
        job_id = job_wrapper.get_id_tag()

        try:
            stdout_file = tempfile.NamedTemporaryFile(
                mode='wb+',
                suffix='_stdout',
                dir=job_wrapper.working_directory)
            stderr_file = tempfile.NamedTemporaryFile(
                mode='wb+',
                suffix='_stderr',
                dir=job_wrapper.working_directory)
            log.debug('({}) executing job script: {}'.format(job_id, job_file))
            # The preexec_fn argument of Popen() is used to call os.setpgrp() in
            # the child process just before the child is executed. This will set
            # the PGID of the child process to its PID (i.e. ensures that it is
            # the root of its own process group instead of Galaxy's one).
            proc = subprocess.Popen(args=[job_file],
                                    cwd=job_wrapper.working_directory,
                                    stdout=stdout_file,
                                    stderr=stderr_file,
                                    env=self._environ,
                                    preexec_fn=os.setpgrp)

            proc.terminated_by_shutdown = False
            with self._proc_lock:
                self._procs.append(proc)

            try:
                job = job_wrapper.get_job()
                # Flush job with change_state.
                job_wrapper.set_external_id(proc.pid, job=job, flush=False)
                job_wrapper.change_state(model.Job.states.RUNNING, job=job)
                self._handle_container(job_wrapper, proc)

                terminated = self.__poll_if_needed(proc, job_wrapper, job_id)
                proc.wait()  # reap
                if terminated:
                    return
                elif check_pg(proc.pid):
                    kill_pg(proc.pid)
            finally:
                with self._proc_lock:
                    self._procs.remove(proc)

            if proc.terminated_by_shutdown:
                self._fail_job_local(job_wrapper,
                                     "job terminated by Galaxy shutdown")
                return

            stdout_file.seek(0)
            stderr_file.seek(0)
            stdout = self._job_io_for_db(stdout_file)
            stderr = self._job_io_for_db(stderr_file)
            stdout_file.close()
            stderr_file.close()
            log.debug('execution finished: %s' % job_file)
        except Exception:
            log.exception("failure running job %d", job_wrapper.job_id)
            self._fail_job_local(job_wrapper, "failure running job")
            return

        self._handle_metadata_if_needed(job_wrapper)

        job_destination = job_wrapper.job_destination
        job_state = JobState(job_wrapper, job_destination)
        job_state.exit_code_file = default_exit_code_file(
            job_wrapper.working_directory, job_id)
        job_state.stop_job = False
        self._finish_or_resubmit_job(job_state, stdout, stderr, job_id=job_id)
Ejemplo n.º 5
0
def set_metadata_portable():
    import galaxy.model
    tool_job_working_directory = os.path.abspath(os.getcwd())
    metadata_tmp_files_dir = os.path.join(tool_job_working_directory,
                                          "metadata")
    galaxy.model.metadata.MetadataTempFile.tmp_dir = metadata_tmp_files_dir

    metadata_params_path = os.path.join("metadata", "params.json")
    try:
        with open(metadata_params_path, "r") as f:
            metadata_params = json.load(f)
    except IOError:
        raise Exception("Failed to find metadata/params.json from cwd [%s]" %
                        tool_job_working_directory)
    datatypes_config = metadata_params["datatypes_config"]
    job_metadata = metadata_params["job_metadata"]
    provided_metadata_style = metadata_params.get("provided_metadata_style")
    max_metadata_value_size = metadata_params.get(
        "max_metadata_value_size") or 0
    outputs = metadata_params["outputs"]

    datatypes_registry = validate_and_load_datatypes_config(datatypes_config)
    tool_provided_metadata = load_job_metadata(job_metadata,
                                               provided_metadata_style)

    def set_meta(new_dataset_instance, file_dict):
        set_meta_with_tool_provided(new_dataset_instance, file_dict,
                                    set_meta_kwds, datatypes_registry,
                                    max_metadata_value_size)

    object_store_conf_path = os.path.join("metadata", "object_store_conf.json")
    extended_metadata_collection = os.path.exists(object_store_conf_path)

    object_store = None
    job_context = None
    version_string = ""

    export_store = None
    if extended_metadata_collection:
        from galaxy.tool_util.parser.stdio import ToolStdioRegex, ToolStdioExitCode
        tool_dict = metadata_params["tool"]
        stdio_exit_code_dicts, stdio_regex_dicts = tool_dict[
            "stdio_exit_codes"], tool_dict["stdio_regexes"]
        stdio_exit_codes = list(map(ToolStdioExitCode, stdio_exit_code_dicts))
        stdio_regexes = list(map(ToolStdioRegex, stdio_regex_dicts))

        with open(object_store_conf_path, "r") as f:
            config_dict = json.load(f)
        from galaxy.objectstore import build_object_store_from_config
        assert config_dict is not None
        object_store = build_object_store_from_config(None,
                                                      config_dict=config_dict)
        galaxy.model.Dataset.object_store = object_store

        outputs_directory = os.path.join(tool_job_working_directory, "outputs")
        if not os.path.exists(outputs_directory):
            outputs_directory = tool_job_working_directory

        # TODO: constants...
        if os.path.exists(os.path.join(outputs_directory, "tool_stdout")):
            with open(os.path.join(outputs_directory, "tool_stdout"),
                      "rb") as f:
                tool_stdout = f.read()

            with open(os.path.join(outputs_directory, "tool_stderr"),
                      "rb") as f:
                tool_stderr = f.read()
        elif os.path.exists(os.path.join(outputs_directory, "stdout")):
            # Puslar style working directory.
            with open(os.path.join(outputs_directory, "stdout"), "rb") as f:
                tool_stdout = f.read()

            with open(os.path.join(outputs_directory, "stderr"), "rb") as f:
                tool_stderr = f.read()

        job_id_tag = metadata_params["job_id_tag"]

        # TODO: this clearly needs to be refactored, nothing in runners should be imported here..
        from galaxy.job_execution.output_collect import default_exit_code_file, read_exit_code_from
        exit_code_file = default_exit_code_file(".", job_id_tag)
        tool_exit_code = read_exit_code_from(exit_code_file, job_id_tag)

        from galaxy.tool_util.output_checker import check_output, DETECTED_JOB_STATE
        check_output_detected_state, tool_stdout, tool_stderr, job_messages = check_output(
            stdio_regexes, stdio_exit_codes, tool_stdout, tool_stderr,
            tool_exit_code, job_id_tag)
        if check_output_detected_state == DETECTED_JOB_STATE.OK and not tool_provided_metadata.has_failed_outputs(
        ):
            final_job_state = galaxy.model.Job.states.OK
        else:
            final_job_state = galaxy.model.Job.states.ERROR

        from pulsar.client.staging import COMMAND_VERSION_FILENAME
        version_string = ""
        if os.path.exists(COMMAND_VERSION_FILENAME):
            version_string = open(COMMAND_VERSION_FILENAME).read()

        # TODO: handle outputs_to_working_directory?
        from galaxy.util.expressions import ExpressionContext
        job_context = ExpressionContext(
            dict(stdout=tool_stdout, stderr=tool_stderr))

        # Load outputs.
        import_model_store = store.imported_store_for_metadata(
            'metadata/outputs_new', object_store=object_store)
        export_store = store.DirectoryModelExportStore(
            'metadata/outputs_populated',
            serialize_dataset_objects=True,
            for_edit=True)

    for output_name, output_dict in outputs.items():
        if extended_metadata_collection:
            dataset_instance_id = output_dict["id"]
            dataset = import_model_store.sa_session.query(
                galaxy.model.HistoryDatasetAssociation).find(
                    dataset_instance_id)
            assert dataset is not None
        else:
            filename_in = os.path.join("metadata/metadata_in_%s" % output_name)
            dataset = cPickle.load(open(filename_in,
                                        'rb'))  # load DatasetInstance

        filename_kwds = os.path.join("metadata/metadata_kwds_%s" % output_name)
        filename_out = os.path.join("metadata/metadata_out_%s" % output_name)
        filename_results_code = os.path.join("metadata/metadata_results_%s" %
                                             output_name)
        override_metadata = os.path.join("metadata/metadata_override_%s" %
                                         output_name)
        dataset_filename_override = output_dict["filename_override"]

        # Same block as below...
        set_meta_kwds = stringify_dictionary_keys(
            json.load(open(filename_kwds))
        )  # load kwds; need to ensure our keywords are not unicode
        try:
            dataset.dataset.external_filename = dataset_filename_override
            store_by = metadata_params.get("object_store_store_by", "id")
            extra_files_dir_name = "dataset_%s_files" % getattr(
                dataset.dataset, store_by)
            files_path = os.path.abspath(
                os.path.join(tool_job_working_directory, "working",
                             extra_files_dir_name))
            dataset.dataset.external_extra_files_path = files_path
            file_dict = tool_provided_metadata.get_dataset_meta(
                output_name, dataset.dataset.id, dataset.dataset.uuid)
            if 'ext' in file_dict:
                dataset.extension = file_dict['ext']
            # Metadata FileParameter types may not be writable on a cluster node, and are therefore temporarily substituted with MetadataTempFiles
            override_metadata = json.load(open(override_metadata))
            for metadata_name, metadata_file_override in override_metadata:
                if galaxy.datatypes.metadata.MetadataTempFile.is_JSONified_value(
                        metadata_file_override):
                    metadata_file_override = galaxy.datatypes.metadata.MetadataTempFile.from_JSON(
                        metadata_file_override)
                setattr(dataset.metadata, metadata_name,
                        metadata_file_override)
            if output_dict.get("validate", False):
                set_validated_state(dataset)
            set_meta(dataset, file_dict)

            if extended_metadata_collection:
                meta = tool_provided_metadata.get_dataset_meta(
                    output_name, dataset.dataset.id, dataset.dataset.uuid)
                if meta:
                    context = ExpressionContext(meta, job_context)
                else:
                    context = job_context

                # Lazy and unattached
                # if getattr(dataset, "hidden_beneath_collection_instance", None):
                #    dataset.visible = False
                dataset.blurb = 'done'
                dataset.peek = 'no peek'
                dataset.info = (dataset.info or '')
                if context['stdout'].strip():
                    # Ensure white space between entries
                    dataset.info = dataset.info.rstrip(
                    ) + "\n" + context['stdout'].strip()
                if context['stderr'].strip():
                    # Ensure white space between entries
                    dataset.info = dataset.info.rstrip(
                    ) + "\n" + context['stderr'].strip()
                dataset.tool_version = version_string
                dataset.set_size()
                if 'uuid' in context:
                    dataset.dataset.uuid = context['uuid']
                object_store.update_from_file(dataset.dataset, create=True)
                from galaxy.job_execution.output_collect import collect_extra_files
                collect_extra_files(object_store, dataset, ".")
                if galaxy.model.Job.states.ERROR == final_job_state:
                    dataset.blurb = "error"
                    dataset.mark_unhidden()
                else:
                    # If the tool was expected to set the extension, attempt to retrieve it
                    if dataset.ext == 'auto':
                        dataset.extension = context.get('ext', 'data')
                        dataset.init_meta(copy_from=dataset)

                    # This has already been done:
                    # else:
                    #     self.external_output_metadata.load_metadata(dataset, output_name, self.sa_session, working_directory=self.working_directory, remote_metadata_directory=remote_metadata_directory)
                    line_count = context.get('line_count', None)
                    try:
                        # Certain datatype's set_peek methods contain a line_count argument
                        dataset.set_peek(line_count=line_count)
                    except TypeError:
                        # ... and others don't
                        dataset.set_peek()

                from galaxy.jobs import TOOL_PROVIDED_JOB_METADATA_KEYS
                for context_key in TOOL_PROVIDED_JOB_METADATA_KEYS:
                    if context_key in context:
                        context_value = context[context_key]
                        setattr(dataset, context_key, context_value)

                if extended_metadata_collection:
                    export_store.add_dataset(dataset)
                else:
                    cPickle.dump(dataset, open(filename_out, 'wb+'))
            else:
                dataset.metadata.to_JSON_dict(
                    filename_out)  # write out results of set_meta

            json.dump((True, 'Metadata has been set successfully'),
                      open(filename_results_code,
                           'wt+'))  # setting metadata has succeeded
        except Exception:
            json.dump((False, traceback.format_exc()),
                      open(filename_results_code,
                           'wt+'))  # setting metadata has failed somehow

    if extended_metadata_collection:
        # discover extra outputs...
        from galaxy.job_execution.output_collect import collect_dynamic_outputs, collect_primary_datasets, SessionlessJobContext

        job_context = SessionlessJobContext(
            metadata_params, tool_provided_metadata, object_store,
            export_store, import_model_store,
            os.path.join(tool_job_working_directory, "working"))

        output_collections = {}
        for name, output_collection in metadata_params[
                "output_collections"].items():
            output_collections[name] = import_model_store.sa_session.query(
                galaxy.model.HistoryDatasetCollectionAssociation).find(
                    output_collection["id"])
        outputs = {}
        for name, output in metadata_params["outputs"].items():
            outputs[name] = import_model_store.sa_session.query(
                galaxy.model.HistoryDatasetAssociation).find(output["id"])

        input_ext = json.loads(metadata_params["job_params"].get(
            "__input_ext", '"data"'))
        collect_primary_datasets(
            job_context,
            outputs,
            input_ext=input_ext,
        )
        collect_dynamic_outputs(job_context, output_collections)

    if export_store:
        export_store._finalize()
    write_job_metadata(tool_job_working_directory, job_metadata, set_meta,
                       tool_provided_metadata)
Ejemplo n.º 6
0
def handle_outputs(job_directory=None):
    # Relocate dynamically collected files to pre-determined locations
    # registered with ToolOutput objects via from_work_dir handling.
    if job_directory is None:
        job_directory = os.path.join(os.getcwd(), os.path.pardir)
    metadata_directory = os.path.join(job_directory, "metadata")
    metadata_params_path = os.path.join(metadata_directory, "params.json")
    try:
        with open(metadata_params_path) as f:
            metadata_params = json.load(f)
    except OSError:
        raise Exception("Failed to find params.json from metadata directory [%s]" % metadata_directory)

    cwl_job_file = os.path.join(job_directory, JOB_JSON_FILE)
    if not os.path.exists(cwl_job_file):
        # Not a CWL job, just continue
        return
    # So we only need to do strict validation when the tool was loaded,
    # no reason to do it again during job execution - so this shortcut
    # allows us to not need Galaxy's full configuration on job nodes.
    job_proxy = load_job_proxy(job_directory, strict_cwl_validation=False)
    tool_working_directory = os.path.join(job_directory, "working")

    job_id_tag = metadata_params["job_id_tag"]
    from galaxy.job_execution.output_collect import default_exit_code_file, read_exit_code_from
    exit_code_file = default_exit_code_file(".", job_id_tag)
    tool_exit_code = read_exit_code_from(exit_code_file, job_id_tag)

    outputs = job_proxy.collect_outputs(tool_working_directory, tool_exit_code)

    # Build galaxy.json file.
    provided_metadata = {}

    def move_directory(output, target_path, output_name=None):
        assert output["class"] == "Directory"
        output_path = _possible_uri_to_path(output["location"])
        if output_path.startswith("_:"):
            # No a real path, just copy listing to target path.
            safe_makedirs(target_path)
            for listed_file in output["listing"]:
                # TODO: handle directories
                assert listed_file["class"] == "File"
                file_description = file_dict_to_description(listed_file)
                file_description.write_to(os.path.join(target_path, listed_file["basename"]))
        else:
            shutil.move(output_path, target_path)
        return {"created_from_basename": output["basename"]}

    def move_output(output, target_path, output_name=None):
        assert output["class"] == "File"
        file_description = file_dict_to_description(output)
        file_description.write_to(target_path)

        secondary_files = output.get("secondaryFiles", [])
        if secondary_files:

            order = []
            index_contents = {
                "order": order
            }

            for secondary_file in secondary_files:
                if output_name is None:
                    raise NotImplementedError("secondaryFiles are unimplemented for dynamic list elements")

                # TODO: handle nested files...
                secondary_file_description = file_dict_to_description(secondary_file)
                # assert secondary_file_path.startswith(output_path), "[%s] does not start with [%s]" % (secondary_file_path, output_path)
                secondary_file_basename = secondary_file["basename"]

                if not STORE_SECONDARY_FILES_WITH_BASENAME:
                    output_basename = output["basename"]
                    prefix = ""
                    while True:
                        if secondary_file_basename.startswith(output_basename):
                            secondary_file_name = prefix + secondary_file_basename[len(output_basename):]
                            break
                        prefix = "^%s" % prefix
                        if "." not in output_basename:
                            secondary_file_name = prefix + secondary_file_name
                            break
                        else:
                            output_basename = output_basename.rsplit(".", 1)[0]
                else:
                    secondary_file_name = secondary_file_basename
                # Convert to ^ format....
                secondary_files_dir = job_proxy.output_secondary_files_dir(
                    output_name, create=True
                )
                extra_target = os.path.join(secondary_files_dir, secondary_file_name)
                secondary_file_description.write_to(extra_target)
                order.append(secondary_file_name)

            with open(os.path.join(secondary_files_dir, "..", SECONDARY_FILES_INDEX_PATH), "w") as f:
                json.dump(index_contents, f)

        return {"created_from_basename": output["basename"]}

    def handle_known_output(output, output_key, output_name):
        # if output["class"] != "File":
        #    # This case doesn't seem like it would be reached - why is this here?
        #    provided_metadata[output_name] = {
        #        "ext": "expression.json",
        #    }
        # else:
        assert output_name
        if output["class"] == "File":
            target_path = job_proxy.output_path(output_name)
            file_metadata = move_output(output, target_path, output_name=output_name)
        elif output["class"] == "Directory":
            target_path = job_proxy.output_directory_contents_dir(output_name)
            file_metadata = move_directory(output, target_path, output_name=output_name)
        else:
            raise Exception("Unknown output type [%s] encountered" % output)
        provided_metadata[output_name] = file_metadata

    def handle_known_output_json(output, output_name):
        target_path = job_proxy.output_path(output_name)
        with open(target_path, "w") as f:
            f.write(json.dumps(output))
        provided_metadata[output_name] = {
            "ext": "expression.json",
        }

    handled_outputs = []
    for output_name, output in outputs.items():
        handled_outputs.append(output_name)
        if isinstance(output, dict) and "location" in output:
            handle_known_output(output, output_name, output_name)
        elif isinstance(output, dict):
            prefix = "%s|__part__|" % output_name
            for record_key, record_value in output.items():
                record_value_output_key = "{}{}".format(prefix, record_key)
                if isinstance(record_value, dict) and "class" in record_value:
                    handle_known_output(record_value, record_value_output_key, output_name)
                else:
                    # param_evaluation_noexpr
                    handle_known_output_json(output, output_name)

        elif isinstance(output, list):
            elements = []
            for index, el in enumerate(output):
                if isinstance(el, dict) and el["class"] == "File":
                    output_path = _possible_uri_to_path(el["location"])
                    elements.append({"name": str(index), "filename": output_path, "created_from_basename": el["basename"]})
                else:
                    target_path = "{}____{}".format(output_name, str(index))
                    with open(target_path, "w") as f:
                        f.write(json.dumps(el))
                    elements.append({"name": str(index), "filename": target_path, "ext": "expression.json"})
            provided_metadata[output_name] = {"elements": elements}
        else:
            handle_known_output_json(output, output_name)

    for output_instance in job_proxy._tool_proxy.output_instances():
        output_name = output_instance.name
        if output_name not in handled_outputs:
            handle_known_output_json(None, output_name)

    with open("galaxy.json", "w") as f:
        json.dump(provided_metadata, f)
Ejemplo n.º 7
0
def set_metadata_portable():
    tool_job_working_directory = os.path.abspath(os.getcwd())
    metadata_tmp_files_dir = os.path.join(tool_job_working_directory,
                                          "metadata")
    MetadataTempFile.tmp_dir = metadata_tmp_files_dir

    metadata_params_path = os.path.join("metadata", "params.json")
    try:
        with open(metadata_params_path) as f:
            metadata_params = json.load(f)
    except OSError:
        raise Exception(
            f"Failed to find metadata/params.json from cwd [{tool_job_working_directory}]"
        )
    datatypes_config = metadata_params["datatypes_config"]
    job_metadata = metadata_params["job_metadata"]
    provided_metadata_style = metadata_params.get("provided_metadata_style")
    max_metadata_value_size = metadata_params.get(
        "max_metadata_value_size") or 0
    outputs = metadata_params["outputs"]

    datatypes_registry = validate_and_load_datatypes_config(datatypes_config)
    tool_provided_metadata = load_job_metadata(job_metadata,
                                               provided_metadata_style)

    def set_meta(new_dataset_instance, file_dict):
        set_meta_with_tool_provided(new_dataset_instance, file_dict,
                                    set_meta_kwds, datatypes_registry,
                                    max_metadata_value_size)

    object_store_conf_path = os.path.join("metadata", "object_store_conf.json")
    extended_metadata_collection = os.path.exists(object_store_conf_path)

    object_store = None
    job_context = None
    version_string = ""

    export_store = None
    final_job_state = Job.states.OK
    if extended_metadata_collection:
        tool_dict = metadata_params["tool"]
        stdio_exit_code_dicts, stdio_regex_dicts = tool_dict[
            "stdio_exit_codes"], tool_dict["stdio_regexes"]
        stdio_exit_codes = list(map(ToolStdioExitCode, stdio_exit_code_dicts))
        stdio_regexes = list(map(ToolStdioRegex, stdio_regex_dicts))

        with open(object_store_conf_path) as f:
            config_dict = json.load(f)
        assert config_dict is not None
        object_store = build_object_store_from_config(None,
                                                      config_dict=config_dict)
        Dataset.object_store = object_store

        outputs_directory = os.path.join(tool_job_working_directory, "outputs")
        if not os.path.exists(outputs_directory):
            outputs_directory = tool_job_working_directory

        # TODO: constants...
        if os.path.exists(os.path.join(outputs_directory, "tool_stdout")):
            with open(os.path.join(outputs_directory, "tool_stdout"),
                      "rb") as f:
                tool_stdout = f.read()

            with open(os.path.join(outputs_directory, "tool_stderr"),
                      "rb") as f:
                tool_stderr = f.read()
        elif os.path.exists(os.path.join(tool_job_working_directory,
                                         "stdout")):
            with open(os.path.join(tool_job_working_directory, "stdout"),
                      "rb") as f:
                tool_stdout = f.read()

            with open(os.path.join(tool_job_working_directory, "stderr"),
                      "rb") as f:
                tool_stderr = f.read()
        elif os.path.exists(os.path.join(outputs_directory, "stdout")):
            # Puslar style output directory? Was this ever used - did this ever work?
            with open(os.path.join(outputs_directory, "stdout"), "rb") as f:
                tool_stdout = f.read()

            with open(os.path.join(outputs_directory, "stderr"), "rb") as f:
                tool_stderr = f.read()
        else:
            wdc = os.listdir(tool_job_working_directory)
            odc = os.listdir(outputs_directory)
            error_desc = "Failed to find tool_stdout or tool_stderr for this job, cannot collect metadata"
            error_extra = f"Working dir contents [{wdc}], output directory contents [{odc}]"
            log.warn(f"{error_desc}. {error_extra}")
            raise Exception(error_desc)

        job_id_tag = metadata_params["job_id_tag"]

        exit_code_file = default_exit_code_file(".", job_id_tag)
        tool_exit_code = read_exit_code_from(exit_code_file, job_id_tag)

        check_output_detected_state, tool_stdout, tool_stderr, job_messages = check_output(
            stdio_regexes, stdio_exit_codes, tool_stdout, tool_stderr,
            tool_exit_code, job_id_tag)
        if check_output_detected_state == DETECTED_JOB_STATE.OK and not tool_provided_metadata.has_failed_outputs(
        ):
            final_job_state = Job.states.OK
        else:
            final_job_state = Job.states.ERROR

        version_string = ""
        if os.path.exists(COMMAND_VERSION_FILENAME):
            version_string = open(COMMAND_VERSION_FILENAME).read()

        expression_context = ExpressionContext(
            dict(stdout=tool_stdout, stderr=tool_stderr))

        # Load outputs.
        export_store = store.DirectoryModelExportStore(
            'metadata/outputs_populated',
            serialize_dataset_objects=True,
            for_edit=True,
            strip_metadata_files=False,
            serialize_jobs=False)
    try:
        import_model_store = store.imported_store_for_metadata(
            'metadata/outputs_new', object_store=object_store)
    except AssertionError:
        # Remove in 21.09, this should only happen for jobs that started on <= 20.09 and finish now
        import_model_store = None

    job_context = SessionlessJobContext(
        metadata_params,
        tool_provided_metadata,
        object_store,
        export_store,
        import_model_store,
        os.path.join(tool_job_working_directory, "working"),
        final_job_state=final_job_state,
    )

    unnamed_id_to_path = {}
    for unnamed_output_dict in job_context.tool_provided_metadata.get_unnamed_outputs(
    ):
        destination = unnamed_output_dict["destination"]
        elements = unnamed_output_dict["elements"]
        destination_type = destination["type"]
        if destination_type == 'hdas':
            for element in elements:
                filename = element.get('filename')
                if filename:
                    unnamed_id_to_path[element['object_id']] = os.path.join(
                        job_context.job_working_directory, filename)

    for output_name, output_dict in outputs.items():
        dataset_instance_id = output_dict["id"]
        klass = getattr(
            galaxy.model,
            output_dict.get('model_class', 'HistoryDatasetAssociation'))
        dataset = None
        if import_model_store:
            dataset = import_model_store.sa_session.query(klass).find(
                dataset_instance_id)
        if dataset is None:
            # legacy check for jobs that started before 21.01, remove on 21.05
            filename_in = os.path.join(f"metadata/metadata_in_{output_name}")
            import pickle
            dataset = pickle.load(open(filename_in,
                                       'rb'))  # load DatasetInstance
        assert dataset is not None

        filename_kwds = os.path.join(f"metadata/metadata_kwds_{output_name}")
        filename_out = os.path.join(f"metadata/metadata_out_{output_name}")
        filename_results_code = os.path.join(
            f"metadata/metadata_results_{output_name}")
        override_metadata = os.path.join(
            f"metadata/metadata_override_{output_name}")
        dataset_filename_override = output_dict["filename_override"]
        # pre-20.05 this was a per job parameter and not a per dataset parameter, drop in 21.XX
        legacy_object_store_store_by = metadata_params.get(
            "object_store_store_by", "id")

        # Same block as below...
        set_meta_kwds = stringify_dictionary_keys(
            json.load(open(filename_kwds))
        )  # load kwds; need to ensure our keywords are not unicode
        try:
            dataset.dataset.external_filename = unnamed_id_to_path.get(
                dataset_instance_id, dataset_filename_override)
            store_by = output_dict.get("object_store_store_by",
                                       legacy_object_store_store_by)
            extra_files_dir_name = f"dataset_{getattr(dataset.dataset, store_by)}_files"
            files_path = os.path.abspath(
                os.path.join(tool_job_working_directory, "working",
                             extra_files_dir_name))
            dataset.dataset.external_extra_files_path = files_path
            file_dict = tool_provided_metadata.get_dataset_meta(
                output_name, dataset.dataset.id, dataset.dataset.uuid)
            if 'ext' in file_dict:
                dataset.extension = file_dict['ext']
            # Metadata FileParameter types may not be writable on a cluster node, and are therefore temporarily substituted with MetadataTempFiles
            override_metadata = json.load(open(override_metadata))
            for metadata_name, metadata_file_override in override_metadata:
                if MetadataTempFile.is_JSONified_value(metadata_file_override):
                    metadata_file_override = MetadataTempFile.from_JSON(
                        metadata_file_override)
                setattr(dataset.metadata, metadata_name,
                        metadata_file_override)
            if output_dict.get("validate", False):
                set_validated_state(dataset)
            if dataset_instance_id not in unnamed_id_to_path:
                # We're going to run through set_metadata in collect_dynamic_outputs with more contextual metadata,
                # so skip set_meta here.
                set_meta(dataset, file_dict)

            if extended_metadata_collection:
                meta = tool_provided_metadata.get_dataset_meta(
                    output_name, dataset.dataset.id, dataset.dataset.uuid)
                if meta:
                    context = ExpressionContext(meta, expression_context)
                else:
                    context = expression_context

                # Lazy and unattached
                # if getattr(dataset, "hidden_beneath_collection_instance", None):
                #    dataset.visible = False
                dataset.blurb = 'done'
                dataset.peek = 'no peek'
                dataset.info = (dataset.info or '')
                if context['stdout'].strip():
                    # Ensure white space between entries
                    dataset.info = f"{dataset.info.rstrip()}\n{context['stdout'].strip()}"
                if context['stderr'].strip():
                    # Ensure white space between entries
                    dataset.info = f"{dataset.info.rstrip()}\n{context['stderr'].strip()}"
                dataset.tool_version = version_string
                dataset.set_size()
                if 'uuid' in context:
                    dataset.dataset.uuid = context['uuid']
                if dataset_filename_override and dataset_filename_override != dataset.file_name:
                    # This has to be a job with outputs_to_working_directory set.
                    # We update the object store with the created output file.
                    object_store.update_from_file(
                        dataset.dataset,
                        file_name=dataset_filename_override,
                        create=True)
                collect_extra_files(object_store, dataset, ".")
                if Job.states.ERROR == final_job_state:
                    dataset.blurb = "error"
                    dataset.mark_unhidden()
                else:
                    # If the tool was expected to set the extension, attempt to retrieve it
                    if dataset.ext == 'auto':
                        dataset.extension = context.get('ext', 'data')
                        dataset.init_meta(copy_from=dataset)

                    # This has already been done:
                    # else:
                    #     self.external_output_metadata.load_metadata(dataset, output_name, self.sa_session, working_directory=self.working_directory, remote_metadata_directory=remote_metadata_directory)
                    line_count = context.get('line_count', None)
                    try:
                        # Certain datatype's set_peek methods contain a line_count argument
                        dataset.set_peek(line_count=line_count)
                    except TypeError:
                        # ... and others don't
                        dataset.set_peek()

                for context_key in TOOL_PROVIDED_JOB_METADATA_KEYS:
                    if context_key in context:
                        context_value = context[context_key]
                        setattr(dataset, context_key, context_value)
                # We never want to persist the external_filename.
                dataset.dataset.external_filename = None
                export_store.add_dataset(dataset)
            else:
                dataset.metadata.to_JSON_dict(
                    filename_out)  # write out results of set_meta

            json.dump((True, 'Metadata has been set successfully'),
                      open(filename_results_code,
                           'wt+'))  # setting metadata has succeeded
        except Exception:
            json.dump((False, traceback.format_exc()),
                      open(filename_results_code,
                           'wt+'))  # setting metadata has failed somehow

    if extended_metadata_collection:
        # discover extra outputs...
        output_collections = {}
        for name, output_collection in metadata_params[
                "output_collections"].items():
            output_collections[name] = import_model_store.sa_session.query(
                HistoryDatasetCollectionAssociation).find(
                    output_collection["id"])
        outputs = {}
        for name, output in metadata_params["outputs"].items():
            klass = getattr(
                galaxy.model,
                output.get('model_class', 'HistoryDatasetAssociation'))
            outputs[name] = import_model_store.sa_session.query(klass).find(
                output["id"])

        input_ext = json.loads(metadata_params["job_params"].get(
            "__input_ext", '"data"'))
        collect_primary_datasets(
            job_context,
            outputs,
            input_ext=input_ext,
        )
        collect_dynamic_outputs(job_context, output_collections)

    if export_store:
        export_store._finalize()
    write_job_metadata(tool_job_working_directory, job_metadata, set_meta,
                       tool_provided_metadata)
Ejemplo n.º 8
0
def set_metadata_portable():
    tool_job_working_directory = os.path.abspath(os.getcwd())
    metadata_tmp_files_dir = os.path.join(tool_job_working_directory, "metadata")
    MetadataTempFile.tmp_dir = metadata_tmp_files_dir

    metadata_params = get_metadata_params(tool_job_working_directory)
    datatypes_config = metadata_params["datatypes_config"]
    job_metadata = metadata_params["job_metadata"]
    provided_metadata_style = metadata_params.get("provided_metadata_style")
    max_metadata_value_size = metadata_params.get("max_metadata_value_size") or 0
    max_discovered_files = metadata_params.get("max_discovered_files")
    outputs = metadata_params["outputs"]

    datatypes_registry = validate_and_load_datatypes_config(datatypes_config)
    tool_provided_metadata = load_job_metadata(job_metadata, provided_metadata_style)

    def set_meta(new_dataset_instance, file_dict):
        set_meta_with_tool_provided(new_dataset_instance, file_dict, set_meta_kwds, datatypes_registry, max_metadata_value_size)

    try:
        object_store = get_object_store(tool_job_working_directory=tool_job_working_directory)
    except (FileNotFoundError, AssertionError):
        object_store = None
    extended_metadata_collection = bool(object_store)
    job_context = None
    version_string = None

    export_store = None
    final_job_state = Job.states.OK
    job_messages = []
    if extended_metadata_collection:
        tool_dict = metadata_params["tool"]
        stdio_exit_code_dicts, stdio_regex_dicts = tool_dict["stdio_exit_codes"], tool_dict["stdio_regexes"]
        stdio_exit_codes = list(map(ToolStdioExitCode, stdio_exit_code_dicts))
        stdio_regexes = list(map(ToolStdioRegex, stdio_regex_dicts))

        outputs_directory = os.path.join(tool_job_working_directory, "outputs")
        if not os.path.exists(outputs_directory):
            outputs_directory = tool_job_working_directory

        # TODO: constants...
        locations = [
            (outputs_directory, 'tool_'),
            (tool_job_working_directory, ''),
            (outputs_directory, ''),  # # Pulsar style output directory? Was this ever used - did this ever work?
        ]
        for directory, prefix in locations:
            if os.path.exists(os.path.join(directory, f"{prefix}stdout")):
                with open(os.path.join(directory, f"{prefix}stdout"), 'rb') as f:
                    tool_stdout = f.read(MAX_STDIO_READ_BYTES)
                with open(os.path.join(directory, f"{prefix}stderr"), 'rb') as f:
                    tool_stderr = f.read(MAX_STDIO_READ_BYTES)
                break
        else:
            if os.path.exists(os.path.join(tool_job_working_directory, 'task_0')):
                # We have a task splitting job
                tool_stdout = b''
                tool_stderr = b''
                paths = Path(tool_job_working_directory).glob('task_*')
                for path in paths:
                    with open(path / 'outputs' / 'tool_stdout', 'rb') as f:
                        task_stdout = f.read(MAX_STDIO_READ_BYTES)
                        if task_stdout:
                            tool_stdout = b"%s[%s stdout]\n%s\n" % (tool_stdout, path.name.encode(), task_stdout)
                    with open(path / 'outputs' / 'tool_stderr', 'rb') as f:
                        task_stderr = f.read(MAX_STDIO_READ_BYTES)
                        if task_stderr:
                            tool_stderr = b"%s[%s stdout]\n%s\n" % (tool_stderr, path.name.encode(), task_stderr)
            else:
                wdc = os.listdir(tool_job_working_directory)
                odc = os.listdir(outputs_directory)
                error_desc = "Failed to find tool_stdout or tool_stderr for this job, cannot collect metadata"
                error_extra = f"Working dir contents [{wdc}], output directory contents [{odc}]"
                log.warn(f"{error_desc}. {error_extra}")
                raise Exception(error_desc)

        job_id_tag = metadata_params["job_id_tag"]

        exit_code_file = default_exit_code_file(".", job_id_tag)
        tool_exit_code = read_exit_code_from(exit_code_file, job_id_tag)

        check_output_detected_state, tool_stdout, tool_stderr, job_messages = check_output(stdio_regexes, stdio_exit_codes, tool_stdout, tool_stderr, tool_exit_code, job_id_tag)
        if check_output_detected_state == DETECTED_JOB_STATE.OK and not tool_provided_metadata.has_failed_outputs():
            final_job_state = Job.states.OK
        else:
            final_job_state = Job.states.ERROR

        version_string_path = os.path.join('outputs', COMMAND_VERSION_FILENAME)
        version_string = collect_shrinked_content_from_path(version_string_path)

        expression_context = ExpressionContext(dict(stdout=tool_stdout[:255], stderr=tool_stderr[:255]))

        # Load outputs.
        export_store = store.DirectoryModelExportStore('metadata/outputs_populated', serialize_dataset_objects=True, for_edit=True, strip_metadata_files=False, serialize_jobs=True)
    try:
        import_model_store = store.imported_store_for_metadata('metadata/outputs_new', object_store=object_store)
    except AssertionError:
        # Remove in 21.09, this should only happen for jobs that started on <= 20.09 and finish now
        import_model_store = None

    tool_script_file = os.path.join(tool_job_working_directory, 'tool_script.sh')
    job = None
    if import_model_store and export_store:
        job = next(iter(import_model_store.sa_session.objects[Job].values()))

    job_context = SessionlessJobContext(
        metadata_params,
        tool_provided_metadata,
        object_store,
        export_store,
        import_model_store,
        os.path.join(tool_job_working_directory, "working"),
        final_job_state=final_job_state,
        max_discovered_files=max_discovered_files,
    )

    if extended_metadata_collection:
        # discover extra outputs...
        output_collections = {}
        for name, output_collection in metadata_params["output_collections"].items():
            # TODO: remove HistoryDatasetCollectionAssociation fallback on 22.01, model_class used to not be serialized prior to 21.09
            model_class = output_collection.get('model_class', 'HistoryDatasetCollectionAssociation')
            collection = import_model_store.sa_session.query(getattr(galaxy.model, model_class)).find(output_collection["id"])
            output_collections[name] = collection
        output_instances = {}
        for name, output in metadata_params["outputs"].items():
            klass = getattr(galaxy.model, output.get('model_class', 'HistoryDatasetAssociation'))
            output_instances[name] = import_model_store.sa_session.query(klass).find(output["id"])

        input_ext = json.loads(metadata_params["job_params"].get("__input_ext") or '"data"')
        try:
            collect_primary_datasets(
                job_context,
                output_instances,
                input_ext=input_ext,
            )
            collect_dynamic_outputs(job_context, output_collections)
        except MaxDiscoveredFilesExceededError as e:
            final_job_state = Job.states.ERROR
            job_messages.append(str(e))
        if job:
            job.job_messages = job_messages
            job.state = final_job_state
        if os.path.exists(tool_script_file):
            with open(tool_script_file) as command_fh:
                command_line_lines = []
                for i, line in enumerate(command_fh):
                    if i == 0 and line.endswith('COMMAND_VERSION 2>&1;'):
                        # Don't record version command as part of command line
                        continue
                    command_line_lines.append(line)
                job.command_line = "".join(command_line_lines).strip()
                export_store.export_job(job, include_job_data=False)

    unnamed_id_to_path = {}
    for unnamed_output_dict in job_context.tool_provided_metadata.get_unnamed_outputs():
        destination = unnamed_output_dict["destination"]
        elements = unnamed_output_dict["elements"]
        destination_type = destination["type"]
        if destination_type == 'hdas':
            for element in elements:
                filename = element.get('filename')
                object_id = element.get('object_id')
                if filename and object_id:
                    unnamed_id_to_path[object_id] = os.path.join(job_context.job_working_directory, filename)

    for output_name, output_dict in outputs.items():
        dataset_instance_id = output_dict["id"]
        klass = getattr(galaxy.model, output_dict.get('model_class', 'HistoryDatasetAssociation'))
        dataset = None
        if import_model_store:
            dataset = import_model_store.sa_session.query(klass).find(dataset_instance_id)
        if dataset is None:
            # legacy check for jobs that started before 21.01, remove on 21.05
            filename_in = os.path.join(f"metadata/metadata_in_{output_name}")
            import pickle
            dataset = pickle.load(open(filename_in, 'rb'))  # load DatasetInstance
        assert dataset is not None

        filename_kwds = os.path.join(f"metadata/metadata_kwds_{output_name}")
        filename_out = os.path.join(f"metadata/metadata_out_{output_name}")
        filename_results_code = os.path.join(f"metadata/metadata_results_{output_name}")
        override_metadata = os.path.join(f"metadata/metadata_override_{output_name}")
        dataset_filename_override = output_dict["filename_override"]
        # pre-20.05 this was a per job parameter and not a per dataset parameter, drop in 21.XX
        legacy_object_store_store_by = metadata_params.get("object_store_store_by", "id")

        # Same block as below...
        set_meta_kwds = stringify_dictionary_keys(json.load(open(filename_kwds)))  # load kwds; need to ensure our keywords are not unicode
        try:
            external_filename = unnamed_id_to_path.get(dataset_instance_id, dataset_filename_override)
            if not os.path.exists(external_filename):
                matches = glob.glob(external_filename)
                assert len(matches) == 1, f"More than one file matched by output glob '{external_filename}'"
                external_filename = matches[0]
                assert safe_contains(tool_job_working_directory, external_filename), f"Cannot collect output '{external_filename}' from outside of working directory"
                created_from_basename = os.path.relpath(external_filename, os.path.join(tool_job_working_directory, 'working'))
                dataset.dataset.created_from_basename = created_from_basename
            # override filename if we're dealing with outputs to working directory and dataset is not linked to
            link_data_only = metadata_params.get("link_data_only")
            if not link_data_only:
                # Only set external filename if we're dealing with files in job working directory.
                # Fixes link_data_only uploads
                dataset.dataset.external_filename = external_filename
                store_by = output_dict.get("object_store_store_by", legacy_object_store_store_by)
                extra_files_dir_name = f"dataset_{getattr(dataset.dataset, store_by)}_files"
                files_path = os.path.abspath(os.path.join(tool_job_working_directory, "working", extra_files_dir_name))
                dataset.dataset.external_extra_files_path = files_path
            file_dict = tool_provided_metadata.get_dataset_meta(output_name, dataset.dataset.id, dataset.dataset.uuid)
            if 'ext' in file_dict:
                dataset.extension = file_dict['ext']
            # Metadata FileParameter types may not be writable on a cluster node, and are therefore temporarily substituted with MetadataTempFiles
            override_metadata = json.load(open(override_metadata))
            for metadata_name, metadata_file_override in override_metadata:
                if MetadataTempFile.is_JSONified_value(metadata_file_override):
                    metadata_file_override = MetadataTempFile.from_JSON(metadata_file_override)
                setattr(dataset.metadata, metadata_name, metadata_file_override)
            if output_dict.get("validate", False):
                set_validated_state(dataset)
            if dataset_instance_id not in unnamed_id_to_path:
                # We're going to run through set_metadata in collect_dynamic_outputs with more contextual metadata,
                # so skip set_meta here.
                set_meta(dataset, file_dict)
                if extended_metadata_collection:
                    collect_extra_files(object_store, dataset, ".")
                    dataset.state = dataset.dataset.state = final_job_state

            if extended_metadata_collection:
                if not link_data_only and os.path.getsize(external_filename):
                    # Here we might be updating a disk based objectstore when outputs_to_working_directory is used,
                    # or a remote object store from its cache path.
                    object_store.update_from_file(dataset.dataset, file_name=external_filename, create=True)
                # TODO: merge expression_context into tool_provided_metadata so we don't have to special case this (here and in _finish_dataset)
                meta = tool_provided_metadata.get_dataset_meta(output_name, dataset.dataset.id, dataset.dataset.uuid)
                if meta:
                    context = ExpressionContext(meta, expression_context)
                else:
                    context = expression_context
                dataset.blurb = 'done'
                dataset.peek = 'no peek'
                dataset.info = (dataset.info or '')
                if context['stdout'].strip():
                    # Ensure white space between entries
                    dataset.info = f"{dataset.info.rstrip()}\n{context['stdout'].strip()}"
                if context['stderr'].strip():
                    # Ensure white space between entries
                    dataset.info = f"{dataset.info.rstrip()}\n{context['stderr'].strip()}"
                dataset.tool_version = version_string
                if 'uuid' in context:
                    dataset.dataset.uuid = context['uuid']
                if not final_job_state == Job.states.ERROR:
                    line_count = context.get('line_count', None)
                    try:
                        # Certain datatype's set_peek methods contain a line_count argument
                        dataset.set_peek(line_count=line_count)
                    except TypeError:
                        # ... and others don't
                        dataset.set_peek()
                for context_key in TOOL_PROVIDED_JOB_METADATA_KEYS:
                    if context_key in context:
                        context_value = context[context_key]
                        setattr(dataset, context_key, context_value)
                # We only want to persist the external_filename if the dataset has been linked in.
                if not link_data_only:
                    dataset.dataset.external_filename = None
                    dataset.dataset.extra_files_path = None
                export_store.add_dataset(dataset)
            else:
                dataset.metadata.to_JSON_dict(filename_out)  # write out results of set_meta

            json.dump((True, 'Metadata has been set successfully'), open(filename_results_code, 'wt+'))  # setting metadata has succeeded
        except Exception:
            json.dump((False, traceback.format_exc()), open(filename_results_code, 'wt+'))  # setting metadata has failed somehow

    if export_store:
        export_store._finalize()
    write_job_metadata(tool_job_working_directory, job_metadata, set_meta, tool_provided_metadata)
Ejemplo n.º 9
0
def build_command(
    runner,
    job_wrapper,
    container=None,
    modify_command_for_container=True,
    include_metadata=False,
    include_work_dir_outputs=True,
    create_tool_working_directory=True,
    remote_command_params=None,
    remote_job_directory=None,
):
    """
    Compose the sequence of commands necessary to execute a job. This will
    currently include:

        - environment settings corresponding to any requirement tags
        - preparing input files
        - command line taken from job wrapper
        - commands to set metadata (if include_metadata is True)
    """
    remote_command_params = remote_command_params or {}
    shell = job_wrapper.shell
    base_command_line = job_wrapper.get_command_line()
    # job_id = job_wrapper.job_id
    # log.debug( 'Tool evaluation for job (%s) produced command-line: %s' % ( job_id, base_command_line ) )

    commands_builder = CommandsBuilder(base_command_line)

    # Dependency resolution and task splitting are prepended to the
    # command - so they need to appear in the following order to ensure that
    # the underlying application used by version command is available in the
    # environment after dependency resolution, but the task splitting command
    # is still executed in Galaxy's Python environment.

    # One could imagine also allowing dependencies inside of the container but
    # that is too sophisticated for a first crack at this - build your
    # containers ready to go!
    if not container or container.resolve_dependencies:
        __handle_dependency_resolution(commands_builder, job_wrapper,
                                       remote_command_params)

    __handle_task_splitting(commands_builder, job_wrapper)

    if (container and
            modify_command_for_container) or job_wrapper.commands_in_new_shell:
        if container and modify_command_for_container:
            # Many Docker containers do not have /bin/bash.
            external_command_shell = container.shell
        else:
            external_command_shell = shell
        externalized_commands = __externalize_commands(job_wrapper,
                                                       external_command_shell,
                                                       commands_builder,
                                                       remote_command_params,
                                                       container=container)
        if container and modify_command_for_container:
            # Stop now and build command before handling metadata and copying
            # working directory files back. These should always happen outside
            # of docker container - no security implications when generating
            # metadata and means no need for Galaxy to be available to container
            # and not copying workdir outputs back means on can be more restrictive
            # of where container can write to in some circumstances.
            run_in_container_command = container.containerize_command(
                externalized_commands)
            commands_builder = CommandsBuilder(run_in_container_command)
        else:
            commands_builder = CommandsBuilder(externalized_commands)

    for_pulsar = 'script_directory' in remote_command_params
    if not for_pulsar:
        commands_builder.capture_stdout_stderr('../outputs/tool_stdout',
                                               '../outputs/tool_stderr')

    # Don't need to create a separate tool working directory for Pulsar
    # jobs - that is handled by Pulsar.
    if create_tool_working_directory:
        # usually working will already exist, but it will not for task
        # split jobs.

        # Copy working and outputs before job submission so that these can be restored on resubmission
        # xref https://github.com/galaxyproject/galaxy/issues/3289
        commands_builder.prepend_command(PREPARE_DIRS)

    __handle_remote_command_line_building(commands_builder,
                                          job_wrapper,
                                          for_pulsar=for_pulsar)

    container_monitor_command = job_wrapper.container_monitor_command(
        container)
    if container_monitor_command:
        commands_builder.prepend_command(container_monitor_command)

    working_directory = remote_job_directory or job_wrapper.working_directory
    commands_builder.capture_return_code(
        default_exit_code_file(working_directory, job_wrapper.job_id))

    if include_work_dir_outputs:
        __handle_work_dir_outputs(commands_builder, job_wrapper, runner,
                                  remote_command_params)

    if include_metadata and job_wrapper.requires_setting_metadata:
        commands_builder.append_command(f"cd '{working_directory}'")
        __handle_metadata(commands_builder, job_wrapper, runner,
                          remote_command_params)

    return commands_builder.build()