Example #1
0
    def process_potential_out(key, out):

        if isinstance(out, list):
            outs = [CWLTool.process_potential_out(key, o) for o in out]
            ups = {}
            for o in outs:
                for k, v in o.items():
                    if k not in ups:
                        ups[k] = []
                    ups[k].append(v)
            return ups

        updates = {}

        if isinstance(out, str):
            updates[key] = WorkflowOutputModel(
                tag=key,
                original_path=None,
                iscopyable=False,
                timestamp=DateUtil.now(),
                value=out,
                new_path=None,
                output_folder=None,
                output_name=None,
                secondaries=None,
                extension=None,
            )

        elif "path" in out:
            updates[key] = WorkflowOutputModel(
                tag=key,
                iscopyable=True,
                original_path=out["path"],
                timestamp=DateUtil.now(),
                new_path=None,
                output_folder=None,
                output_name=None,
                secondaries=None,
                extension=None,
            )
            for s in out.get("secondaryFiles", []):
                path = s["path"]
                ext = path.rpartition(".")[-1]
                newk = f"{key}_{ext}"
                updates[newk] = WorkflowOutputModel(
                    tag=newk,
                    original_path=path,
                    iscopyable=True,
                    timestamp=DateUtil.now(),
                    new_path=None,
                    output_folder=None,
                    output_name=None,
                    secondaries=None,
                    extension=None,
                )

        return updates
    def set(self, key: ProgressKeys):
        if self.has(key):
            return

        with self.with_cursor() as cursor:
            cursor.execute(self._insert_statement,
                           (self.wid, key.value, str(DateUtil.now())))
Example #3
0
    def parse_output(key, value):
        newkey = "".join(key.split(".")[1:])

        fileloc = value
        if isinstance(value, dict):
            fileloc = value["location"]

        if isinstance(fileloc, list):
            return newkey, [Cromwell.parse_output(key, f)[1] for f in fileloc]

        # It's hard to know whether the value we get is a File or just a value,
        # so we'll write it in both values and let Janis figure it out later
        return (
            newkey,
            WorkflowOutputModel(
                tag=newkey,
                original_path=fileloc,
                timestamp=DateUtil.now(),
                new_path=None,
                output_folder=None,
                output_name=None,
                secondaries=None,
                extension=None,
                value=fileloc,
                iscopyable=True,
            ),
        )
Example #4
0
 def update_paths(self, tag: str, original_path: str, new_path: str):
     with self.with_cursor() as cursor:
         cursor.execute(
             """\
         UPDATE outputs SET
             original_path=?,
             new_path=?,
             timestamp=?
         WHERE wid = ? AND tag = ?
         """,
             (original_path, new_path, DateUtil.now(), self.wid, tag),
         )
     self.commit()
Example #5
0
    def format(self, **kwargs):
        tb = "    "
        nl = "\n"

        fin = self.finish if self.finish else DateUtil.now()
        duration = round(
            (fin - self.start).total_seconds()) if self.start else 0

        updated_text = "Unknown"
        if self.last_updated:
            secs_ago = int(
                (DateUtil.now() - self.last_updated).total_seconds())
            if secs_ago > 2:
                updated_text = second_formatter(secs_ago) + " ago"
            else:
                updated_text = "Just now"
            updated_text += f" ({self.last_updated.replace(microsecond=0).isoformat()})"

        return f"""\
WID:        {self.wid}
EngId:      {self.engine_wid}
Name:       {self.name}
Engine:     {self.engine}

Task Dir:   {self.outdir}
Exec Dir:   {self.execution_dir}

Status:     {self.status}
Duration:   {second_formatter(duration)}
Start:      {self.start.isoformat() if self.start else 'N/A'}
Finish:     {self.finish.isoformat() if self.finish else "N/A"}
Updated:    {updated_text}

Jobs: 
{nl.join(j.format(tb, **kwargs) for j in sorted(self.jobs, key=lambda j: j.start or DateUtil.now()))}       

{("Outputs:" + "".join(nl + tb + o.format() for o in self.outputs) if self.outputs else '')}
{("Error: " + self.error) if self.error else ''}
        """.strip()
Example #6
0
    def start_from_paths(self, wid, source_path: str, input_path: str,
                         deps_path: str):

        from janis_assistant.management.configuration import JanisConfiguration

        jc = JanisConfiguration.manager()

        self.taskmeta = {
            "start": DateUtil.now(),
            "status": TaskStatus.PROCESSING,
            "jobs": {},
        }
        config: CWLToolConfiguration = self.config

        if Logger.CONSOLE_LEVEL == LogLevel.VERBOSE:
            config.debug = True

        config.disable_color = True

        # more options
        if not config.tmpdir_prefix:
            config.outdir = self.execution_dir + "/"
            config.tmpdir_prefix = self.execution_dir + "/"
            config.leave_tmpdir = True

        if jc.call_caching_enabled:
            config.cachedir = os.path.join(self.execution_dir, "cached/")

        cmd = config.build_command_line(source_path, input_path)

        Logger.debug("Running command: '" + " ".join(cmd) + "'")

        process = subprocess.Popen(cmd,
                                   stdout=subprocess.PIPE,
                                   preexec_fn=os.setsid,
                                   stderr=subprocess.PIPE)
        self.taskmeta["status"] = TaskStatus.RUNNING
        Logger.info("CWLTool has started with pid=" + str(process.pid))
        self.process_id = process.pid

        self._logger = CWLToolLogger(
            wid,
            process,
            logfp=open(self.logfile, "a+"),
            metadata_callback=self.task_did_update,
            exit_function=self.task_did_exit,
        )

        return wid
Example #7
0
    def task_did_exit(self, logger: CWLToolLogger, status: TaskStatus):
        Logger.debug("CWLTool fired 'did exit'")
        self.taskmeta["status"] = status
        self.taskmeta["finish"] = DateUtil.now()
        self.taskmeta["outputs"] = logger.outputs

        if status != TaskStatus.COMPLETED:
            js: Dict[str, WorkflowJobModel] = self.taskmeta.get("jobs")
            for j in js.values():
                if j.status != TaskStatus.COMPLETED:
                    j.status = status

        if logger.error:
            self.taskmeta["error"] = logger.error

        for callback in self.progress_callbacks.get(logger.wid, []):
            callback(self.metadata(logger.wid))
    def save_metadata(self, metadata: WorkflowModel):

        # mfranklin: DO NOT UPDATE THE STATUS HERE!

        # Let's just say the actual workflow metadata has to updated separately
        alljobs = self.flatten_jobs(metadata.jobs or [])
        self.jobsDB.update_or_insert_many(alljobs)

        self.workflowmetadata.last_updated = DateUtil.now()
        if metadata.error:
            self.workflowmetadata.error = metadata.error
        if metadata.execution_dir:
            self.workflowmetadata.execution_dir = metadata.execution_dir

        if metadata.finish:
            self.workflowmetadata.finish = metadata.finish
        return
    def from_janis(
        wid: str,
        outdir: str,
        tool: Tool,
        environment: Environment,
        hints: Dict[str, str],
        validation_requirements: Optional[ValidationRequirements],
        batchrun_requirements: Optional[BatchRunRequirements],
        inputs_dict: dict = None,
        dryrun=False,
        watch=True,
        max_cores=None,
        max_memory=None,
        keep_intermediate_files=False,
        run_in_background=True,
        dbconfig=None,
        allow_empty_container=False,
        container_override: dict = None,
        check_files=True,
    ):

        jc = JanisConfiguration.manager()

        # output directory has been created

        environment.identifier += "_" + wid

        tm = WorkflowManager(wid=wid, outdir=outdir, environment=environment)

        tm.database.runs.insert(wid)

        tm.database.workflowmetadata.wid = wid
        tm.database.workflowmetadata.engine = environment.engine
        tm.database.workflowmetadata.filescheme = environment.filescheme
        tm.database.workflowmetadata.environment = environment.id()
        tm.database.workflowmetadata.name = tool.id()
        tm.database.workflowmetadata.start = DateUtil.now()
        tm.database.workflowmetadata.executiondir = None
        tm.database.workflowmetadata.keepexecutiondir = keep_intermediate_files
        tm.database.workflowmetadata.configuration = jc
        tm.database.workflowmetadata.dbconfig = dbconfig

        # This is the only time we're allowed to skip the tm.set_status
        # This is a temporary stop gap until "notification on status" is implemented.
        # tm.set_status(TaskStatus.PROCESSING)
        tm.database.workflowmetadata.status = TaskStatus.PROCESSING

        tm.database.commit()

        spec = get_ideal_specification_for_engine(environment.engine)
        spec_translator = get_translator(spec)
        tool_evaluate = tm.prepare_and_output_workflow_to_evaluate_if_required(
            tool=tool,
            translator=spec_translator,
            validation=validation_requirements,
            batchrun=batchrun_requirements,
            hints=hints,
            additional_inputs=inputs_dict,
            max_cores=max_cores or jc.environment.max_cores,
            max_memory=max_memory or jc.environment.max_ram,
            allow_empty_container=allow_empty_container,
            container_override=container_override,
            check_files=check_files,
        )

        outdir_workflow = tm.get_path_for_component(
            WorkflowManager.WorkflowManagerPath.workflow
        )

        tm.database.workflowmetadata.submission_workflow = os.path.join(
            outdir_workflow, spec_translator.filename(tool_evaluate)
        )
        tm.database.workflowmetadata.submission_inputs = os.path.join(
            outdir_workflow, spec_translator.inputs_filename(tool_evaluate)
        )
        tm.database.workflowmetadata.submission_resources = os.path.join(
            outdir_workflow, spec_translator.dependencies_filename(tool_evaluate)
        )

        tm.database.commit()

        if not dryrun:
            if (
                not run_in_background
                and jc.template
                and jc.template.template
                and jc.template.template.can_run_in_foreground is False
            ):
                raise Exception(
                    f"Your template '{jc.template.template.__class__.__name__}' is not allowed to run "
                    f"in the foreground, try adding the '--background' argument"
                )
            tm.start_or_submit(run_in_background=run_in_background, watch=watch)
        else:
            tm.set_status(TaskStatus.DRY_RUN)

        tm.database.commit()

        return tm
Example #10
0
    def format(self, pre, monochrome=False, brief=False, **kwargs):

        tb = "    "
        fin = self.finish if self.finish else DateUtil.now()
        time = round(DateUtil.secs_difference(self.start,
                                              fin)) if self.start else None
        # percentage = (
        #     (round(1000 * time / self.supertime) / 10)
        #     if (self.start and self.supertime)
        #     else None
        # )
        status = self.status or (sorted(self.events,
                                        key=lambda e: e.timestamp)[-1].status
                                 if self.events else TaskStatus.PROCESSING)

        name = self.name
        if self.shard is not None and self.shard >= 0:
            name += f"_shard-{self.shard}"
        if self.attempt and self.attempt > 1:
            name += f"_attempt-{self.attempt}"

        standard = pre + f"[{status.symbol()}] {name} ({second_formatter(time)})"

        col = ""
        uncol = ""

        if not monochrome:
            if status == TaskStatus.FAILED:
                col = _bcolors.FAIL
            elif status == TaskStatus.COMPLETED:
                col = _bcolors.OKGREEN
            # else:
            # col = _bcolors.UNDERLINE
            uncol = _bcolors.ENDC

        if status != TaskStatus.COMPLETED or brief == False:
            if self.jobs:
                ppre = pre + tb
                subs: List[WorkflowJobModel] = sorted(
                    self.jobs,
                    key=lambda j: j.start if j.start else DateUtil.now(),
                    reverse=False,
                )

                return (col + standard + "".join([
                    "\n" + j.format(ppre, monochrome, brief, **kwargs)
                    for j in subs
                ]) + uncol)

        fields: List[Tuple[str, str]] = []

        if status == TaskStatus.COMPLETED:
            if not self.finish:
                raise Exception(
                    f"Finish was null for completed task: {self.name}")
            if self.cached:
                fields.append(("from cache", str(self.cached)))

        elif status == TaskStatus.RUNNING:
            fields.extend([("batchid", self.batchid),
                           ("backend", self.backend)])

        elif status == TaskStatus.FAILED:
            fields.extend([("stdout", self.stdout), ("stderr", self.stderr)])
        elif status == TaskStatus.PROCESSING:
            pass
        elif status == TaskStatus.QUEUED:
            pass

        else:
            return (
                standard +
                f" :: Unimplemented status: '{status}' for task: '{self.name}'"
            )

        ppre = "\n" + " " * len(pre) + 2 * tb
        retval = standard + "".join(f"{ppre}{f[0]}: {f[1]}"
                                    for f in fields if f[1])

        return col + retval + uncol
    def insert(self, wid: str):
        with self.with_cursor() as cursor:

            cursor.execute(self._insert_statement, (wid, str(DateUtil.now())))
Example #12
0
    def process_metadataupdate_if_match(self, line):
        match = self.statusupdateregex.match(line)
        if not match:
            return

        name, action = match.groups()

        s = name.split(" ")
        if len(s) == 0 or len(s) > 2:
            return Logger.critical("Unsure how to handle metadata update: " +
                                   str(line))

        component = s[0]
        stepname = s[1] if len(s) > 1 else None

        status = None
        parentid = "_".join(self.workflow_scope) or None

        if component == "workflow":
            if action == "start":
                if stepname:  # stepname is empty for root workflow
                    self.workflow_scope.append(stepname)
                    stepname = None
                    status = TaskStatus.RUNNING
            elif action == "completed success":
                if len(self.workflow_scope) > 0:
                    self.workflow_scope.pop(0)
                status = TaskStatus.COMPLETED

        elif component == "step":
            if action == "start":
                status = TaskStatus.RUNNING
            elif action == "completed success":
                status = TaskStatus.COMPLETED

        if not status:
            return

        if not stepname:
            # return WorkflowModel
            return

        jid = f"{parentid}_{stepname}" if parentid else stepname

        start = DateUtil.now() if status == TaskStatus.RUNNING else None
        finish = DateUtil.now() if status == TaskStatus.COMPLETED else None

        job = WorkflowJobModel(
            jid=jid,
            parentjid=parentid,
            name=stepname,
            status=status,
            attempt=None,
            shard=None,
            start=start,
            finish=finish,
            backend="local",
            batchid="",
            cached=False,
            container=None,
            stderr=self.logfp.name,
            stdout=None,
        )

        self.metadata_callback(self, job)