Example #1
0
    def __init__(
            self,
            builder,  # type: Builder
            joborder,  # type: JobOrderType
            make_path_mapper,  # type: Callable[..., PathMapper]
            requirements,  # type: List[Dict[Text, Text]]
            hints,  # type: List[Dict[Text, Text]]
            name,  # type: Text
            runtime_context,
            url,
            spec):
        super(TESTask, self).__init__(builder, joborder, make_path_mapper,
                                      requirements, hints, name)
        self.runtime_context = runtime_context
        self.spec = spec
        self.outputs = None
        self.inplace_update = False
        if runtime_context.basedir is not None:
            self.basedir = runtime_context.basedir
        else:
            self.basedir = os.getcwd()
        self.fs_access = StdFsAccess(self.basedir)

        self.id = None
        self.docker_workdir = '/var/spool/cwl'
        self.state = "UNKNOWN"
        self.poll_interval = 1
        self.poll_retries = 10
        self.client = tes.HTTPClient(url)
Example #2
0
    def __init__(
            self,
            builder,  # type: Builder
            joborder,  # type: JSON
            make_path_mapper,  # type: Callable[..., PathMapper]
            requirements,  # type: List[Dict[Text, Text]]
            hints,  # type: List[Dict[Text, Text]]
            name,  # type: Text
            runtime_context,
            url,
            spec,
            remote_storage_url=None,
            token=None):
        super(TESTask, self).__init__(builder, joborder, make_path_mapper,
                                      requirements, hints, name)
        self.runtime_context = runtime_context
        self.spec = spec
        self.outputs = None
        self.inplace_update = False
        self.basedir = runtime_context.basedir or os.getcwd()
        self.fs_access = StdFsAccess(self.basedir)

        self.id = None
        self.state = "UNKNOWN"
        self.exit_code = None
        self.poll_interval = 1
        self.poll_retries = 10
        self.client = tes.HTTPClient(url, token=token)
        self.remote_storage_url = remote_storage_url
        self.token = token
Example #3
0
def _makebuilder(cudaReq: CWLObjectType) -> Builder:
    return Builder(
        {},
        [],
        [],
        {},
        schema.Names(),
        [cudaReq],
        [],
        {"cudaDeviceCount": 1},
        None,
        None,
        StdFsAccess,
        StdFsAccess(""),
        None,
        0.1,
        False,
        False,
        False,
        "",
        "",
        "",
        "",
        INTERNAL_VERSION,
        "docker",
    )
Example #4
0
    def execute(self, context):
        collected_outputs = {}
        for task_outputs in self.xcom_pull(
                context=context,
                task_ids=[task.task_id for task in self.upstream_list]):
            collected_outputs = merge(collected_outputs,
                                      task_outputs["outputs"])
        logging.debug('Collected outputs: \n{}'.format(
            json.dumps(collected_outputs, indent=4)))
        tmp_folder = collected_outputs["tmp_folder"]
        output_folder = collected_outputs["output_folder"]
        relocated_outputs = relocateOutputs(outputObj={
            output_id: collected_outputs[output_src]
            for output_src, output_id in self.dag.get_output_list().items()
            if output_src in collected_outputs
        },
                                            outdir=output_folder,
                                            output_dirs=[output_folder],
                                            action="copy",
                                            fs_access=StdFsAccess(""))

        relocated_outputs = {
            key.split("/")[-1]: val
            for key, val in relocated_outputs.items()
        }
        shutil.rmtree(tmp_folder, ignore_errors=False)
        logging.debug(
            'Delete temporary output directory: \n{}'.format(tmp_folder))
        logging.info("WORKFLOW RESULTS\n" +
                     json.dumps(relocated_outputs, indent=4))
Example #5
0
    def __init__(self, kwargs):
        super(CommandLineWorkflow, self).__init__()

        if kwargs.get("basedir") is not None:
            self.basedir = kwargs.get("basedir")
        else:
            self.basedir = os.getcwd()
        self.fs_access = StdFsAccess(self.basedir)
Example #6
0
 def __init__(self, url, kwargs):
     self.threads = []
     self.kwargs = kwargs
     self.client = tes.HTTPClient(url)
     if kwargs.get("basedir") is not None:
         self.basedir = kwargs.get("basedir")
     else:
         self.basedir = os.getcwd()
     self.fs_access = StdFsAccess(self.basedir)
Example #7
0
 def __init__(self, url, kwargs):
     super(TESPipeline, self).__init__()
     self.kwargs = kwargs
     self.service = tes.HTTPClient(url)
     if kwargs.get("basedir") is not None:
         self.basedir = kwargs.get("basedir")
     else:
         self.basedir = os.getcwd()
     self.fs_access = StdFsAccess(self.basedir)
    def cwl_gather(self, context):
        upstream_task_ids = [t.task_id for t in self.dag.tasks if isinstance(t, CWLStepOperator)] + \
                            ([self.reader_task_id] if self.reader_task_id else [])
        upstream_data = self.xcom_pull(context=context,
                                       task_ids=upstream_task_ids)

        _logger.debug('{0}: xcom_pull data: \n {1}'.format(
            self.task_id, dumps(upstream_data, indent=4)))

        promises = {}
        for data in upstream_data:
            promises = merge(promises, data["promises"])
            if "outdir" in data:
                self.outdir = data["outdir"]

        if "output_folder" in promises:
            self.output_folder = os.path.abspath(promises["output_folder"])
        else:
            return

        _move_job = {out: promises[out] for out, val in self.outputs.items()}
        _logger.debug(
            '{0}: Final job: \n{1}\nMoving data: \n{2}\nMoving job:{3}'.format(
                self.task_id, dumps(promises, indent=4),
                dumps(self.outputs, indent=4), dumps(_move_job, indent=4)))

        _files_moved = relocateOutputs(_move_job, self.output_folder,
                                       [self.outdir],
                                       self.dag.default_args["move_outputs"],
                                       StdFsAccess(""))
        _job_result = {
            val.split("/")[-1]: _files_moved[out]  # TODO: is split required?
            for out, val in self.outputs.items() if out in _files_moved
        }
        try:
            if self.outdir:
                shutil.rmtree(self.outdir, ignore_errors=False)
            _logger.info('{0}: Delete temporary output directory {1}'.format(
                self.task_id, self.outdir))
        except Exception as e:
            _logger.error(
                "{0}: Temporary output directory hasn't been set {1}".format(
                    self.task_id, e))
            pass
        _logger.info("Job done: {}".format(dumps(_job_result, indent=4)))

        return _job_result, promises
Example #9
0
def add_details_to_outputs(outputs):
    adjustFileObjs(outputs, expand_to_file_uri)
    adjustDirObjs(outputs, expand_to_file_uri)
    normalizeFilesDirs(outputs)
    adjustFileObjs(outputs, partial(compute_checksums, StdFsAccess("")))
Example #10
0
class TESTask(JobBase):
    JobOrderType = Dict[Text, Union[Dict[Text, Any], List, Text]]

    def __init__(
            self,
            builder,  # type: Builder
            joborder,  # type: JobOrderType
            make_path_mapper,  # type: Callable[..., PathMapper]
            requirements,  # type: List[Dict[Text, Text]]
            hints,  # type: List[Dict[Text, Text]]
            name,  # type: Text
            runtime_context,
            url,
            spec):
        super(TESTask, self).__init__(builder, joborder, make_path_mapper,
                                      requirements, hints, name)
        self.runtime_context = runtime_context
        self.spec = spec
        self.outputs = None
        self.inplace_update = False
        if runtime_context.basedir is not None:
            self.basedir = runtime_context.basedir
        else:
            self.basedir = os.getcwd()
        self.fs_access = StdFsAccess(self.basedir)

        self.id = None
        self.docker_workdir = '/var/spool/cwl'
        self.state = "UNKNOWN"
        self.poll_interval = 1
        self.poll_retries = 10
        self.client = tes.HTTPClient(url)

    def get_container(self):
        default = "python:2.7"
        container = default
        if self.runtime_context.default_container:
            container = self.runtime_context.default_container

        reqs = self.spec.get("requirements", []) + self.spec.get("hints", [])
        for i in reqs:
            if i.get("class", "NA") == "DockerRequirement":
                container = i.get("dockerPull", i.get("dockerImageId",
                                                      default))
        return container

    def create_input(self, name, d):
        if "contents" in d:
            return tes.Input(name=name,
                             description="cwl_input:%s" % (name),
                             path=d["path"],
                             content=d["contents"],
                             type=d["class"].upper())
        else:
            return tes.Input(name=name,
                             description="cwl_input:%s" % (name),
                             url=d["location"],
                             path=d["path"],
                             type=d["class"].upper())

    def parse_job_order(self, k, v, inputs):
        if isinstance(v, dict):
            if all([i in v for i in ["location", "path", "class"]]):
                inputs.append(self.create_input(k, v))

                if "secondaryFiles" in v:
                    for f in v["secondaryFiles"]:
                        self.parse_job_order(f["basename"], f, inputs)

            else:
                for sk, sv in v.items():
                    if isinstance(sv, dict):
                        self.parse_job_order(sk, sv, inputs)

                    else:
                        break

        elif isinstance(v, list):
            for i in range(len(v)):
                if isinstance(v[i], dict):
                    self.parse_job_order("%s[%s]" % (k, i), v[i], inputs)

                else:
                    break

        return inputs

    def parse_listing(self, listing, inputs):
        for item in listing:

            if "writable" in item:
                raise UnsupportedRequirement(
                    "The TES spec does not allow for writable inputs")

            if "contents" in item:
                loc = self.fs_access.join(self.tmpdir, item["basename"])
                with self.fs_access.open(loc, "wb") as gen:
                    gen.write(item["contents"])
            else:
                loc = item["location"]

            parameter = tes.Input(
                name=item["basename"],
                description="InitialWorkDirRequirement:cwl_input:%s" %
                (item["basename"]),
                url=file_uri(loc),
                path=self.fs_access.join(self.docker_workdir,
                                         item["basename"]),
                type=item["class"].upper())
            inputs.append(parameter)

        return inputs

    def get_inputs(self):
        inputs = []

        # find all primary and secondary input files
        for k, v in self.joborder.items():
            self.parse_job_order(k, v, inputs)

        # manage InitialWorkDirRequirement
        self.parse_listing(self.generatefiles["listing"], inputs)

        return inputs

    def get_envvars(self):
        env = self.environment
        vars_to_preserve = self.runtime_context.preserve_environment
        if self.runtime_context.preserve_entire_environment:
            vars_to_preserve = os.environ
        if vars_to_preserve is not None:
            for key, value in os.environ.items():
                if key in vars_to_preserve and key not in env:
                    # On Windows, subprocess env can't handle unicode.
                    env[key] = str(value) if onWindows() else value
        env["HOME"] = str(self.outdir) if onWindows() else self.outdir
        env["TMPDIR"] = str(self.tmpdir) if onWindows() else self.tmpdir
        return env

    def create_task_msg(self):
        input_parameters = self.get_inputs()
        output_parameters = []

        if self.stdout is not None:
            parameter = tes.Output(name="stdout",
                                   url=self.output2url(self.stdout),
                                   path=self.output2path(self.stdout))
            output_parameters.append(parameter)

        if self.stderr is not None:
            parameter = tes.Output(name="stderr",
                                   url=self.output2url(self.stderr),
                                   path=self.output2path(self.stderr))
            output_parameters.append(parameter)

        output_parameters.append(
            tes.Output(name="workdir",
                       url=self.output2url(""),
                       path=self.docker_workdir,
                       type="DIRECTORY"))

        container = self.get_container()
        cpus = None
        ram = None
        disk = None

        for i in self.builder.requirements:
            if i.get("class", "NA") == "ResourceRequirement":
                cpus = i.get("coresMin", i.get("coresMax", None))
                ram = i.get("ramMin", i.get("ramMax", None))
                disk = i.get("outdirMin", i.get("outdirMax", None))

                if (cpus is None or isinstance(cpus, str)) or \
                   (ram is None or isinstance(ram, str)) or \
                   (disk is None or isinstance(disk, str)):
                    raise UnsupportedRequirement(
                        "cwl-tes does not support dynamic resource requests")

                ram = ram / 953.674 if ram is not None else None
                disk = disk / 953.674 if disk is not None else None
            elif i.get("class", "NA") == "DockerRequirement":
                if i.get("dockerOutputDirectory", None) is not None:
                    output_parameters.append(
                        tes.Output(name="dockerOutputDirectory",
                                   url=self.output2url(""),
                                   path=i.get("dockerOutputDirectory"),
                                   type="DIRECTORY"))

        create_body = tes.Task(name=self.name,
                               description=self.spec.get("doc", ""),
                               executors=[
                                   tes.Executor(
                                       command=self.command_line,
                                       image=container,
                                       workdir=self.docker_workdir,
                                       stdout=self.output2path(self.stdout),
                                       stderr=self.output2path(self.stderr),
                                       stdin=self.stdin,
                                       env=self.get_envvars())
                               ],
                               inputs=input_parameters,
                               outputs=output_parameters,
                               resources=tes.Resources(cpu_cores=cpus,
                                                       ram_gb=ram,
                                                       disk_gb=disk),
                               tags={"CWLDocumentId": self.spec.get("id")})

        return create_body

    def run(self, runtimeContext):
        log.debug("[job %s] self.__dict__ in run() ----------------------",
                  self.name)
        log.debug(pformat(self.__dict__))

        task = self.create_task_msg()

        log.info("[job %s] CREATED TASK MSG----------------------", self.name)
        log.info(pformat(task))

        try:
            self.id = self.client.create_task(task)
            log.info("[job %s] SUBMITTED TASK ----------------------",
                     self.name)
            log.info("[job %s] task id: %s ", self.name, self.id)
        except Exception as e:
            log.error("[job %s] Failed to submit task to TES service:\n%s",
                      self.name, e)
            raise WorkflowException(e)

        max_tries = 10
        current_try = 1
        while not self.is_done():
            delay = 1.5 * current_try**2
            time.sleep(
                random.randint(round(delay - 0.5 * delay),
                               round(delay + 0.5 * delay)))
            log.debug("[job %s] POLLING %s", self.name, pformat(self.id))
            try:
                task = self.client.get_task(self.id, "MINIMAL")
                self.state = task.state
            except Exception as e:
                log.error("[job %s] POLLING ERROR %s", self.name, e)
                if current_try <= max_tries:
                    current_try += 1
                    continue
                else:
                    log.error("[job %s] MAX POLLING RETRIES EXCEEDED",
                              self.name)
                    break

        try:
            outputs = self.collect_outputs(self.outdir)
            cleaned_outputs = {}
            for k, v in outputs.items():
                if isinstance(k, bytes):
                    k = k.decode("utf8")
                if isinstance(v, bytes):
                    v = v.decode("utf8")
                cleaned_outputs[k] = v
                self.outputs = cleaned_outputs
            self.output_callback(self.outputs, "success")
        except WorkflowException as e:
            log.error("[job %s] job error:\n%s", self.name, e)
            self.output_callback({}, "permanentFail")
        except Exception as e:
            log.error("[job %s] job error:\n%s", self.name, e)
            self.output_callback({}, "permanentFail")
        finally:
            if self.outputs is not None:
                log.info("[job %s] OUTPUTS ------------------", self.name)
                log.info(pformat(self.outputs))
            self.cleanup(self.runtime_context.rm_tmpdir)
        return

    def is_done(self):
        terminal_states = [
            "COMPLETE", "CANCELED", "EXECUTOR_ERROR", "SYSTEM_ERROR"
        ]
        if self.state in terminal_states:
            log.info("[job %s] FINAL JOB STATE: %s ------------------",
                     self.name, self.state)
            if self.state != "COMPLETE":
                log.error("[job %s] task id: %s", self.name, self.id)
                log.error("[job %s] logs: %s", self.name,
                          self.client.get_task(self.id, "FULL").logs)
            return True
        return False

    def cleanup(self, rm_tmpdir):
        log.debug("[job %s] STARTING CLEAN UP ------------------", self.name)
        if self.stagedir and os.path.exists(self.stagedir):
            log.debug("[job %s] Removing input staging directory %s",
                      self.name, self.stagedir)
            shutil.rmtree(self.stagedir, True)

        if rm_tmpdir:
            log.debug("[job %s] Removing temporary directory %s", self.name,
                      self.tmpdir)
            shutil.rmtree(self.tmpdir, True)

    def output2url(self, path):
        if path is not None:
            return file_uri(
                self.fs_access.join(self.outdir, os.path.basename(path)))
        return None

    def output2path(self, path):
        if path is not None:
            return self.fs_access.join(self.docker_workdir, path)
        return None
Example #11
0
class TESTask(JobBase):
    JobOrderType = Dict[Text, Union[Dict[Text, Any], List, Text]]

    def __init__(
            self,
            builder,  # type: Builder
            joborder,  # type: JSON
            make_path_mapper,  # type: Callable[..., PathMapper]
            requirements,  # type: List[Dict[Text, Text]]
            hints,  # type: List[Dict[Text, Text]]
            name,  # type: Text
            runtime_context,
            url,
            spec,
            remote_storage_url=None,
            token=None):
        super(TESTask, self).__init__(builder, joborder, make_path_mapper,
                                      requirements, hints, name)
        self.runtime_context = runtime_context
        self.spec = spec
        self.outputs = None
        self.inplace_update = False
        self.basedir = runtime_context.basedir or os.getcwd()
        self.fs_access = StdFsAccess(self.basedir)

        self.id = None
        self.state = "UNKNOWN"
        self.exit_code = None
        self.poll_interval = 1
        self.poll_retries = 10
        self.client = tes.HTTPClient(url, token=token)
        self.remote_storage_url = remote_storage_url
        self.token = token

    def get_container(self):
        default = self.runtime_context.default_container or "python:2.7"
        container = default

        docker_req, _ = self.get_requirement("DockerRequirement")
        if docker_req:
            container = docker_req.get(
                "dockerPull", docker_req.get("dockerImageId", default))
        return container

    def create_input(self, name, d):
        if "contents" in d:
            return tes.Input(name=name,
                             description="cwl_input:%s" % (name),
                             path=d["path"],
                             content=d["contents"],
                             type=d["class"].upper())
        return tes.Input(name=name,
                         description="cwl_input:%s" % (name),
                         url=d["location"],
                         path=d["path"],
                         type=d["class"].upper())

    def parse_job_order(self, k, v, inputs):
        if isinstance(v, MutableMapping):
            if all([i in v for i in ["location", "path", "class"]]):
                inputs.append(self.create_input(k, v))

                if "secondaryFiles" in v:
                    for f in v["secondaryFiles"]:
                        self.parse_job_order(f["basename"], f, inputs)

            else:
                for sk, sv in v.items():
                    if isinstance(sv, MutableMapping):
                        self.parse_job_order(sk, sv, inputs)

                    else:
                        break

        elif isinstance(v, MutableSequence):
            for i in range(len(v)):
                if isinstance(v[i], MutableMapping):
                    self.parse_job_order("%s[%s]" % (k, i), v[i], inputs)

                else:
                    break

        return inputs

    def parse_listing(self, listing, inputs):
        for item in listing:

            if "writable" in item:
                raise UnsupportedRequirement(
                    "The TES spec does not allow for writable inputs")

            if "contents" in item:
                loc = self.fs_access.join(self.tmpdir, item["basename"])
                with self.fs_access.open(loc, "wb") as gen:
                    gen.write(item["contents"])
            else:
                loc = item["location"]

            if urllib.parse.urlparse(loc).scheme:
                url = loc
            else:
                url = file_uri(loc)
            parameter = tes.Input(
                name=item["basename"],
                description="InitialWorkDirRequirement:cwl_input:%s" %
                (item["basename"]),
                url=url,
                path=self.fs_access.join(self.builder.outdir,
                                         item["basename"]),
                type=item["class"].upper())
            inputs.append(parameter)

        return inputs

    def get_inputs(self):
        inputs = []

        # find all primary and secondary input files
        for k, v in self.joborder.items():
            self.parse_job_order(k, v, inputs)

        # manage InitialWorkDirRequirement
        self.parse_listing(self.generatefiles["listing"], inputs)

        return inputs

    def get_envvars(self):
        env = self.environment
        vars_to_preserve = self.runtime_context.preserve_environment
        if self.runtime_context.preserve_entire_environment:
            vars_to_preserve = os.environ
        if vars_to_preserve is not None:
            for key, value in os.environ.items():
                if key in vars_to_preserve and key not in env:
                    # On Windows, subprocess env can't handle unicode.
                    env[key] = str(value) if onWindows() else value
        env["HOME"] = str(self.builder.outdir) if onWindows() \
            else self.builder.outdir
        env["TMPDIR"] = str(self.builder.tmpdir) if onWindows() \
            else self.builder.tmpdir
        return env

    def create_task_msg(self):
        input_parameters = self.get_inputs()
        output_parameters = []

        if self.stdout is not None:
            parameter = tes.Output(name="stdout",
                                   url=self.output2url(self.stdout),
                                   path=self.output2path(self.stdout))
            output_parameters.append(parameter)

        if self.stderr is not None:
            parameter = tes.Output(name="stderr",
                                   url=self.output2url(self.stderr),
                                   path=self.output2path(self.stderr))
            output_parameters.append(parameter)

        output_parameters.append(
            tes.Output(name="workdir",
                       url=self.output2url(""),
                       path=self.builder.outdir,
                       type="DIRECTORY"))

        container = self.get_container()

        res_reqs = self.builder.resources
        ram = res_reqs['ram'] / 953.674
        disk = (res_reqs['outdirSize'] + res_reqs['tmpdirSize']) / 953.674
        cpus = res_reqs['cores']

        docker_req, _ = self.get_requirement("DockerRequirement")
        if docker_req and hasattr(docker_req, "dockerOutputDirectory"):
            output_parameters.append(
                tes.Output(name="dockerOutputDirectory",
                           url=self.output2url(""),
                           path=docker_req.dockerOutputDirectory,
                           type="DIRECTORY"))

        create_body = tes.Task(name=self.name,
                               description=self.spec.get("doc", ""),
                               executors=[
                                   tes.Executor(
                                       command=self.command_line,
                                       image=container,
                                       workdir=self.builder.outdir,
                                       stdout=self.output2path(self.stdout),
                                       stderr=self.output2path(self.stderr),
                                       stdin=self.stdin,
                                       env=self.get_envvars())
                               ],
                               inputs=input_parameters,
                               outputs=output_parameters,
                               resources=tes.Resources(cpu_cores=cpus,
                                                       ram_gb=ram,
                                                       disk_gb=disk),
                               tags={"CWLDocumentId": self.spec.get("id")})

        return create_body

    def run(
        self,
        runtimeContext,  # type: RuntimeContext
        tmpdir_lock=None  # type: Optional[threading.Lock]
    ):  # type: (...) -> None
        log.debug("[job %s] self.__dict__ in run() ----------------------",
                  self.name)
        log.debug(pformat(self.__dict__))
        if not self.successCodes:
            self.successCodes = [0]

        task = self.create_task_msg()

        log.info("[job %s] CREATED TASK MSG----------------------", self.name)
        log.info(pformat(task))

        try:
            self.id = self.client.create_task(task)
            log.info("[job %s] SUBMITTED TASK ----------------------",
                     self.name)
            log.info("[job %s] task id: %s ", self.name, self.id)
        except Exception as e:
            log.error("[job %s] Failed to submit task to TES service:\n%s",
                      self.name, e)
            raise WorkflowException(e)

        max_tries = 10
        current_try = 1
        self.exit_code = None
        while not self.is_done():
            delay = 1.5 * current_try**2
            time.sleep(
                random.randint(round(delay - 0.5 * delay),
                               round(delay + 0.5 * delay)))
            try:
                task = self.client.get_task(self.id, "MINIMAL")
                self.state = task.state
                log.debug("[job %s] POLLING %s, result: %s", self.name,
                          pformat(self.id), task.state)
            except Exception as e:
                log.error("[job %s] POLLING ERROR %s", self.name, e)
                if current_try <= max_tries:
                    current_try += 1
                    continue
                else:
                    log.error("[job %s] MAX POLLING RETRIES EXCEEDED",
                              self.name)
                    break

        try:
            process_status = None
            if self.state != "COMPLETE" \
                    and self.exit_code not in self.successCodes:
                process_status = "permanentFail"
                log.error("[job %s] job error:\n%s", self.name, self.state)
            remote_cwl_output_json = False
            if self.remote_storage_url:
                remote_fs_access = runtimeContext.make_fs_access(
                    self.remote_storage_url)
                remote_cwl_output_json = remote_fs_access.exists(
                    remote_fs_access.join(self.remote_storage_url,
                                          "cwl.output.json"))
            if self.remote_storage_url:
                original_outdir = self.builder.outdir
                if not remote_cwl_output_json:
                    self.builder.outdir = self.remote_storage_url
                outputs = self.collect_outputs(self.remote_storage_url,
                                               self.exit_code)
                self.builder.outdir = original_outdir
            else:
                outputs = self.collect_outputs(self.outdir, self.exit_code)
            cleaned_outputs = {}
            for k, v in outputs.items():
                if isinstance(k, bytes):
                    k = k.decode("utf8")
                if isinstance(v, bytes):
                    v = v.decode("utf8")
                cleaned_outputs[k] = v
            self.outputs = cleaned_outputs
            if not process_status:
                process_status = "success"
        except (WorkflowException, Exception) as err:
            log.error("[job %s] job error:\n%s", self.name, err)
            if log.isEnabledFor(logging.DEBUG):
                log.exception(err)
            process_status = "permanentFail"
        finally:
            if self.outputs is None:
                self.outputs = {}
            with self.runtime_context.workflow_eval_lock:
                self.output_callback(self.outputs, process_status)
            log.info("[job %s] OUTPUTS ------------------", self.name)
            log.info(pformat(self.outputs))
            self.cleanup(self.runtime_context.rm_tmpdir)
        return

    def is_done(self):
        terminal_states = [
            "COMPLETE", "CANCELED", "EXECUTOR_ERROR", "SYSTEM_ERROR"
        ]
        if self.state in terminal_states:
            log.info("[job %s] FINAL JOB STATE: %s ------------------",
                     self.name, self.state)
            if self.state != "COMPLETE":
                log.error("[job %s] task id: %s", self.name, self.id)
                logs = self.client.get_task(self.id, "FULL").logs
                log.error("[job %s] logs: %s", self.name, logs)
                if isinstance(logs, MutableSequence):
                    last_log = logs[-1]
                    if isinstance(last_log, tes.TaskLog) and last_log.logs:
                        self.exit_code = last_log.logs[-1].exit_code
            return True
        return False

    def cleanup(self, rm_tmpdir):
        log.debug("[job %s] STARTING CLEAN UP ------------------", self.name)
        if self.stagedir and os.path.exists(self.stagedir):
            log.debug("[job %s] Removing input staging directory %s",
                      self.name, self.stagedir)
            shutil.rmtree(self.stagedir, True)

        if rm_tmpdir:
            log.debug("[job %s] Removing temporary directory %s", self.name,
                      self.tmpdir)
            shutil.rmtree(self.tmpdir, True)

    def output2url(self, path):
        if path is not None:
            if self.remote_storage_url:
                return self.fs_access.join(self.remote_storage_url,
                                           os.path.basename(path))
            return file_uri(
                self.fs_access.join(self.outdir, os.path.basename(path)))
        return None

    def output2path(self, path):
        if path is not None:
            return self.fs_access.join(self.builder.outdir, path)
        return None
Example #12
0
def research_object() -> Generator[ResearchObject, None, None]:
    re_ob = ResearchObject(StdFsAccess(""))
    yield re_ob
    re_ob.close()
Example #13
0
def test_docker_tmpdir_prefix(tmp_path: Path) -> None:
    """Test that DockerCommandLineJob respects temp directory directives."""
    (tmp_path / "3").mkdir()
    tmpdir_prefix = str(tmp_path / "3" / "ttmp")
    runtime_context = RuntimeContext({
        "tmpdir_prefix": tmpdir_prefix,
        "user_space_docker_cmd": None
    })
    builder = Builder(
        {},
        [],
        [],
        {},
        schema.Names(),
        [],
        [],
        {},
        None,
        None,
        StdFsAccess,
        StdFsAccess(""),
        None,
        0.1,
        False,
        False,
        False,
        "",
        runtime_context.get_outdir(),
        runtime_context.get_tmpdir(),
        runtime_context.get_stagedir(),
        INTERNAL_VERSION,
    )
    job = DockerCommandLineJob(builder, {}, PathMapper, [], [], "")
    runtime: List[str] = []

    volume_writable_file = MapperEnt(resolved=get_data("tests/2.fastq"),
                                     target="foo",
                                     type=None,
                                     staged=None)
    (tmp_path / "1").mkdir()
    job.add_writable_file_volume(runtime, volume_writable_file, None,
                                 str(tmp_path / "1" / "writable_file"))
    children = sorted((tmp_path / "1").glob("*"))
    assert len(children) == 1
    subdir = tmp_path / children[0]
    assert subdir.name.startswith("writable_file")
    assert len(sorted(subdir.glob("*"))) == 1
    assert (subdir / "2.fastq").exists()

    resolved_writable_dir = tmp_path / "data_orig"
    resolved_writable_dir.mkdir(parents=True)
    volume_dir = MapperEnt(resolved=str(resolved_writable_dir),
                           target="bar",
                           type=None,
                           staged=None)
    (tmp_path / "2").mkdir()
    job.add_writable_directory_volume(runtime, volume_dir, None,
                                      str(tmp_path / "2" / "dir"))
    children = sorted((tmp_path / "2").glob("*"))
    assert len(children) == 1
    subdir = tmp_path / "2" / children[0]
    assert subdir.name.startswith("dir")
    assert len(sorted(subdir.glob("*"))) == 1
    assert (subdir / "data_orig").exists()

    cidfile = job.create_runtime({}, runtime_context)[1]
    assert cidfile and cidfile.startswith(tmpdir_prefix)

    volume_file = MapperEnt(resolved="Hoopla!",
                            target="baz",
                            type=None,
                            staged=None)
    (tmp_path / "4").mkdir()
    job.create_file_and_add_volume(runtime, volume_file, None, None,
                                   str(tmp_path / "4" / "file"))
    children = sorted((tmp_path / "4").glob("*"))
    assert len(children) == 1
    subdir = tmp_path / "4" / children[0]
    assert subdir.name.startswith("file")
    assert len(sorted(subdir.glob("*"))) == 1
    assert (subdir / "baz").exists()
Example #14
0
def research_object():
    re_ob = provenance.ResearchObject(StdFsAccess(""))
    yield re_ob
    re_ob.close()