def standard(self) -> RunModel:
        jobs = []

        s = DateUtil.parse_iso(self.meta.get("start"))
        f = DateUtil.parse_iso(self.meta.get("end"))
        st = 0
        if s:
            ff = f if f else DateUtil.now()
            st = int(DateUtil.secs_difference(s, ff))

        jid = self.meta.get("id")
        for stepname, call in self.meta.get("calls").items():
            stepname = ".".join(stepname.split(".")[1:])
            jobs.extend(self.parse_standard_calls(None, stepname, call))

        model = RunModel(
            submission_id=None,
            id_=None,
            engine_id=jid,
            name=self.meta.get("workflowName"),
            # start=s,
            # finish=f,
            execution_dir=self.meta.get("workflowRoot"),
            status=cromwell_status_to_status(self.meta.get("status")),
            error=self.get_caused_by_text(),
        )
        model.jobs = jobs
        return model
Example #2
0
    def __init__(
        self,
        id_: str,
        submission_id: str,
        run_id: str,
        output_name: Optional[str],
        output_folder: Optional[str],
        secondaries: Optional[List[str]],
        extension: Optional[str],
        is_copyable: bool,
        original_path: Optional[str],
        new_path: Optional[str],
        timestamp: Optional[Union[str, datetime.datetime]],
        value: Optional[any] = None,
    ):
        self.id_ = id_
        self.submission_id = submission_id
        self.run_id = run_id
        if isinstance(output_name, bool):
            self.output_name = output_name
        else:
            self.output_name = stringify_value_or_array(output_name)
        self.output_folder = stringify_value_or_array(output_folder)
        self.secondaries = secondaries
        self.extension = extension
        self.is_copyable = is_copyable
        self.original_path = original_path
        self.new_path = new_path

        self.value = value

        if not isinstance(timestamp, datetime.datetime):
            timestamp = DateUtil.parse_iso(timestamp)
        self.timestamp = timestamp
def do_query(args):
    status = None
    if args.status:
        status = TaskStatus(args.status.lower())

    name = args.name
    tasks = ConfigManager(db_path=None).query_tasks(status=status, name=name)

    prepared = [(
        wid,
        t.status,
        t.name,
        t.start,
        (", ".join(t.labels) if t.labels else ""),
        t.execution_dir,
    ) for wid, t in tasks.items()]
    prepared.sort(key=lambda p: p[3] if p[3] else DateUtil.max())

    print(
        tabulate.tabulate(
            prepared,
            headers=[
                "TaskID", "status", "name", "start date", "labels", "path"
            ],
        ),
        file=sys.stdout,
    )
    def parse_output(
        key, value
    ) -> Tuple[str, Union[WorkflowOutputModel, List[WorkflowOutputModel]]]:
        newkey = "".join(key.split(".")[1:])

        fileloc = value
        if isinstance(value, dict):
            fileloc = value["location"]

        if isinstance(fileloc, list):
            return newkey, [Cromwell.parse_output(key, f)[1] for f in fileloc]

        # It's hard to know whether the value we get is a File or just a value,
        # so we'll write it in both values and let Janis figure it out later
        return (
            newkey,
            WorkflowOutputModel(
                id_=newkey,
                original_path=None,
                timestamp=DateUtil.now(),
                new_path=None,
                output_folder=None,
                output_name=None,
                secondaries=None,
                extension=None,
                value=fileloc,
                is_copyable=True,
                submission_id=None,
                run_id=None,
            ),
        )
Example #5
0
    def __init__(
        self,
        submission_id,
        output_dir: str,
        execution_dir: str,
        timestamp: Optional[Union[str, datetime]] = None,
    ):
        self.submission_id = submission_id
        self.output_dir = output_dir
        self.execution_dir = execution_dir

        if not timestamp:
            timestamp = DateUtil.now()
        elif isinstance(timestamp, str):
            timestamp = DateUtil.parse_iso(timestamp)
        self.timestamp = timestamp
 def get_all(self) -> Dict[str, datetime]:
     with self.with_cursor() as cursor:
         cursor.execute(
             "SELECT key, timestamp FROM progress WHERE submission_id = ?",
             (self.submission_id, ),
         )
         rows = cursor.fetchall()
         return {row[0]: DateUtil.parse_iso(row[1]) for row in rows}
Example #7
0
    def format(self, tb, **kwargs):
        if not self.jobs:
            return ""
        nl = "\n"

        return nl.join(
            j.format(tb, njobs_in_parent=len(self.jobs), **kwargs) for j in
            sorted(self.jobs, key=lambda j: j.start or DateUtil.now()))
    def set(self, key: ProgressKeys):
        if self.has(key):
            return

        with self.with_cursor() as cursor:
            cursor.execute(
                self._insert_statement,
                (self.submission_id, key.value, str(DateUtil.now())),
            )
Example #9
0
 def __init__(self, submission_id: str, run_id: str, status: TaskStatus,
              date: datetime):
     self.submission_id = submission_id
     self.run_id = run_id
     self.status = status if isinstance(status,
                                        TaskStatus) else TaskStatus(status)
     if not isinstance(date, datetime):
         date = DateUtil.parse_iso(date)
     self.date = date
Example #10
0
 def update(self, run_id: str, status: TaskStatus):
     return self.insert_or_update_many([
         RunStatusUpdate(
             submission_id=self.submission_id,
             run_id=run_id,
             status=status,
             date=DateUtil.now(),
         )
     ])
    def get_poll_interval(self):
        """
        Get poll interval in SECONDS
        """
        if self.polling_interval is not None:
            return self.polling_interval

        max_poll = 60  # seconds
        min_poll = 5  # seconds
        falloff = 30 * 60  # seconds

        # https://www.desmos.com/calculator/f5lmjtctfg
        # ~ (max_poll - min_poll)/2 * (tanh(x/falloff - pi) + 1) + min_poll
        #
        # It's just a tanh graph that's been rebiased tanh(x-pi) + 1
        # and then scaled

        x = (DateUtil.now() -
             (self._start_time or DateUtil.now())).total_seconds()
        return (max_poll - min_poll) / 2 * (math.tanh(math.pi *
                                                      (2 * x / falloff - 1)) +
                                            1) + min_poll
    def _generate_call_times_from_calls(
        cls, calls, prefix="", include_subworkflow_total=True
    ) -> dict:
        dcalls = {}

        for call_key in calls:
            call = calls[call_key][0]

            s, f = call["start"], call.get("end")
            sd = DateUtil.parse_iso(s)
            fd = DateUtil.parse_iso(f) if f else None

            prefixed_call_key = prefix + call_key

            if prefixed_call_key in dcalls:
                Logger.warn(
                    "Doesn't know how to handle multiple instances of calls, skipping this one (with id: "
                    + call["id"]
                )
                continue

            is_subworkflow_total = "subWorkflowMetadata" in call
            if is_subworkflow_total:
                dcalls.update(
                    cls._generate_call_times_from_calls(
                        call["subWorkflowMetadata"]["calls"], prefix=call_key + "."
                    )
                )

            if not is_subworkflow_total or include_subworkflow_total:
                dcalls[prefixed_call_key] = {
                    "start": s,
                    "end": f,
                    "time": (fd - sd).total_seconds() if fd else "N/A",
                }

        return dcalls
    def start_from_paths(self, wid, source_path: str, input_path: str,
                         deps_path: str):

        from janis_assistant.data.models.preparedjob import PreparedJob

        jobfile = PreparedJob.instance()

        self.taskmeta = {
            "start": DateUtil.now(),
            "status": TaskStatus.PROCESSING,
            "jobs": {},
        }
        config: CWLToolConfiguration = self.config

        if Logger.CONSOLE_LEVEL == LogLevel.VERBOSE:
            config.debug = True

        config.disable_color = True

        # more options
        if not config.tmpdir_prefix:
            config.outdir = self.execution_dir + "/"
            config.tmpdir_prefix = self.execution_dir + "/"
            config.leave_tmpdir = True

        if jobfile.call_caching_enabled:
            config.cachedir = os.path.join(self.execution_dir, "cached/")

        cmd = config.build_command_line(source_path, input_path)

        Logger.debug("Running command: '" + " ".join(cmd) + "'")

        process = subprocess.Popen(cmd,
                                   stdout=subprocess.PIPE,
                                   preexec_fn=os.setsid,
                                   stderr=subprocess.PIPE)
        self.taskmeta["status"] = TaskStatus.RUNNING
        Logger.info("CWLTool has started with pid=" + str(process.pid))
        self.process_id = process.pid

        self._logger = CWLToolLogger(
            wid,
            process,
            logfp=open(self.logfile, "a+"),
            metadata_callback=self.task_did_update,
            exit_function=self.task_did_exit,
        )

        return wid
    def task_did_exit(self, logger: CWLToolLogger, status: TaskStatus):
        Logger.debug("CWLTool fired 'did exit'")
        self.taskmeta["status"] = status
        self.taskmeta["finish"] = DateUtil.now()
        self.taskmeta["outputs"] = logger.outputs

        if status != TaskStatus.COMPLETED:
            js: Dict[str, RunJobModel] = self.taskmeta.get("jobs")
            for j in js.values():
                if j.status != TaskStatus.COMPLETED:
                    j.status = status

        if logger.error:
            self.taskmeta["error"] = logger.error

        for callback in self.progress_callbacks.get(logger.sid, []):
            callback(self.metadata(logger.sid))
Example #15
0
    def __init__(
        self,
        id_: str,
        submission_id: str,
        engine_id: str,
        status: TaskStatus,
        execution_dir: str,
        name: str,
        error: str = None,
        labels: List[str] = None,
        tags: List[str] = None,
        last_updated: datetime = None,
        # extra fields not populated by DB
        jobs: List[RunJobModel] = None,
        inputs: List[WorkflowInputModel] = None,
        outputs: List[WorkflowOutputModel] = None,
        events: List[RunStatusUpdate] = None,
    ):
        self.id_ = id_
        self.submission_id = submission_id
        self.execution_dir = execution_dir
        self.engine_id = engine_id
        self.name = name
        self.status = status
        self.error = error
        self.labels = labels
        self.tags = tags
        self.jobs = jobs
        self.inputs = inputs
        self.outputs = outputs

        if isinstance(last_updated, str):
            last_updated = DateUtil.parse_iso(last_updated)

        self.last_updated = last_updated

        # to be calculated
        self.start = None
        self.finish = None

        # call setters
        self.events = None
        self.set_events(events)
Example #16
0
 def __init__(
     self,
     id_: str,
     output_dir: str,
     execution_dir: str,
     author: str,
     labels: List[str],
     tags: List[str],
     timestamp: Union[str, datetime],
     engine_type: str,
     # metadata not populated directly by DB, but might be used for formatting
     engine_url: Optional[str] = None,
     runs: List[RunModel] = None,
     name: Optional[str] = None,
     status: Optional[TaskStatus] = None,
     error: Optional[str] = None
     # From previous WorkflowModel, some data is now derived:
     #   runs:
     #       - Start / finish times
     #       - Errors
     #       - Last updated
 ):
     self.id_ = id_
     self.output_dir = output_dir
     self.execution_dir = execution_dir
     self.author = author
     self.tags = tags
     self.labels = labels
     if not isinstance(timestamp, datetime):
         timestamp = DateUtil.parse_iso(timestamp)
     self.timestamp = timestamp
     self.engine_type = engine_type
     # other things
     self.runs = runs or []
     self.engine_url = engine_url
     self.name = name
     self.status = status
     self.error = error
Example #17
0
 def insert_or_update_many(self, els: List[RunModel]):
     for el in els:
         el.last_updated = DateUtil.now()
     return super().insert_or_update_many(els)
    def process_potential_out(run_id, key, out):

        if isinstance(out, list):
            outs = [CWLTool.process_potential_out(run_id, key, o) for o in out]
            ups = {}
            for o in outs:
                for k, v in o.items():
                    if k not in ups:
                        ups[k] = []
                    ups[k].append(v)
            return ups

        updates = {}
        if out is None:
            return {}

        if is_python_primitive(out):
            updates[key] = WorkflowOutputModel(
                submission_id=None,
                run_id=run_id,
                id_=key,
                original_path=None,
                is_copyable=False,
                timestamp=DateUtil.now(),
                value=out,
                new_path=None,
                output_folder=None,
                output_name=None,
                secondaries=None,
                extension=None,
            )

        elif "path" in out:
            updates[key] = WorkflowOutputModel(
                submission_id=None,
                run_id=run_id,
                id_=key,
                is_copyable=True,
                value=out["path"],
                original_path=None,
                timestamp=DateUtil.now(),
                new_path=None,
                output_folder=None,
                output_name=None,
                secondaries=None,
                extension=None,
            )
            for s in out.get("secondaryFiles", []):
                path = s["path"]
                ext = path.rpartition(".")[-1]
                newk = f"{key}_{ext}"
                updates[newk] = WorkflowOutputModel(
                    submission_id=None,
                    run_id=run_id,
                    id_=newk,
                    value=path,
                    original_path=None,
                    is_copyable=True,
                    timestamp=DateUtil.now(),
                    new_path=None,
                    output_folder=None,
                    output_name=None,
                    secondaries=None,
                    extension=None,
                )

        return updates
    def process_metadataupdate_if_match(self, line):
        match = self.statusupdateregex.match(line)
        if not match:
            return

        name, action = match.groups()

        s = name.split(" ")
        if len(s) == 0 or len(s) > 2:
            return Logger.critical("Unsure how to handle metadata update: " +
                                   str(line))

        component = s[0]
        stepname = s[1] if len(s) > 1 else None

        status = None
        parentid = "_".join(self.workflow_scope) or None

        if component == "workflow":
            if action == "start":
                if stepname:  # stepname is empty for root workflow
                    self.workflow_scope.append(stepname)
                    stepname = None
                    status = TaskStatus.RUNNING
            elif action == "completed success":
                if len(self.workflow_scope) > 0:
                    self.workflow_scope.pop(0)
                status = TaskStatus.COMPLETED

        elif component == "step":
            if action == "start":
                status = TaskStatus.RUNNING
            elif action == "completed success":
                status = TaskStatus.COMPLETED

        if not status:
            return

        if not stepname:
            # return WorkflowModel
            return

        jid = f"{parentid}_{stepname}" if parentid else stepname

        start = DateUtil.now() if status == TaskStatus.RUNNING else None
        finish = DateUtil.now() if status == TaskStatus.COMPLETED else None

        job = RunJobModel(
            submission_id=None,
            run_id=self.sid,
            id_=jid,
            parent=parentid,
            name=stepname,
            status=status,
            attempt=None,
            shard=None,
            start=start,
            finish=finish,
            backend="local",
            batchid="",
            cached=False,
            container=None,
            stderr=self.logfp.name,
            stdout=None,
            analysis=None,
            script=None,
            cpu=None,
            memory=None,
        )

        self.metadata_callback(self, job)
Example #20
0
    def insert_or_update_many(self, els: List[T]):
        if len(els) == 0:
            return
        queries: Dict[str, List[List[any]]] = {}
        update_separator = ",\n"
        tab = "\t"

        idkeys = set(self.get_id_keys())
        idkeys_ordered = list(idkeys)
        pkeys_ordered = self.get_primary_keys()
        existing_keys = set()  # (*pkeys_ordered)

        # get all primary keys

        dbalias_map: Dict[str, DatabaseObjectField] = {
            t.dbalias: t
            for t in self._base.keymap()
        }

        updates, inserts = self.filter_updates(els)

        def add_query(query, values):
            if query in queries:
                queries[query].append(values)
            else:
                queries[query] = [values]

        for job in updates:
            keys, values = job.prepare_insert()
            # el_pkeys = [getattr(job, dbalias_map[_k]) for _k in idkeys_ordered]

            keys_np, values_np = [], []
            for k, v in zip(keys, values):
                if k in idkeys:
                    continue

                keys_np.append(k)
                values_np.append(v)

            # problem is we want to update matching on some fields when they are NULL, our WHERE statement
            # should be something like:
            #   WHERE id1 = ? AND id2 = ? AND id3 is null AND id4 is null

            id_keyvalues = {
                pkey: prep_object_for_db(
                    getattr(job, dbalias_map[pkey].name),
                    encode=dbalias_map[pkey].encode,
                )
                for pkey in idkeys_ordered
            }
            id_withvalues_keyvalue_ordered = [
                (idkey, idvalue) for idkey, idvalue in id_keyvalues.items()
                if idvalue is not None
            ]
            id_withvalues_updater_keys = [
                f"{idkey} = ?" for idkey, _ in id_withvalues_keyvalue_ordered
            ]
            id_withvalues_updater_values = [
                idvalue for _, idvalue in id_withvalues_keyvalue_ordered
            ]
            id_novalues_updater_keys = [
                f"{idkey} is NULL" for idkey, idvalue in id_keyvalues.items()
                if idvalue is None
            ]

            prepared_statement = f"""
            UPDATE {self._tablename}
                SET {', '.join(f'{k} = ?' for k in keys_np)}
            WHERE
                {" AND ".join([*id_withvalues_updater_keys, *id_novalues_updater_keys])}
            """
            vtuple = (
                *values_np,
                *id_withvalues_updater_values,
            )

            add_query(prepared_statement, vtuple)

        for job in inserts:
            keys, values = job.prepare_insert()
            # el_pkeys = [getattr(job, dbalias_map[_k]) for _k in idkeys_ordered]
            prepared_statement = f"""
            INSERT INTO {self._tablename}
                ({', '.join(keys)})
            VALUES
                ({', '.join(f'?' for _ in keys)});
            """
            add_query(prepared_statement, values)

        Logger.log(
            f"DB {self._tablename}: Inserting {len(inserts)} and updating {len(updates)} rows"
        )
        with self.with_cursor() as cursor:
            start = DateUtil.now()
            if len(inserts) + len(updates) > 300:
                Logger.warn(
                    f"DB '{self._tablename}' is inserting {len(inserts)} and updating {len(updates)} rows, this might take a while"
                )
            for query, vvalues in queries.items():
                try:
                    Logger.log(
                        f"Running query: {query}\n\t: values: {vvalues}")
                    cursor.executemany(query, vvalues)
                except OperationalError as e:
                    Logger.log_ex(e)
            seconds = (DateUtil.now() - start).total_seconds()
            if seconds > 2:
                Logger.warn(
                    f"DB '{self._tablename}' took {second_formatter(seconds)} to insert {len(inserts)} and update {len(updates)} rows"
                )

        return True
 def __setstate__(self, state):
     super().__setstate__(state)
     self._logger = None
     self._process = None
     self._timer_thread = None
     self._start_time = DateUtil.now()
    def start_engine(self, additional_cromwell_options: List[str] = None):
        from ...data.models.preparedjob import PreparedJob

        job = PreparedJob.instance()

        self._start_time = DateUtil.now()
        self.timeout = job.cromwell.timeout or 10

        if self.test_connection():
            Logger.info("Engine has already been started")
            return self

        if self.connect_to_instance:
            self.is_started = True
            Logger.info(
                "Cromwell environment discovered, skipping local instance")
            return self

        if self._process:
            self.is_started = True
            Logger.info(
                f"Discovered Cromwell instance (pid={self._process}), skipping start"
            )
            return self

        if self.config:

            with open(self.config_path, "w+") as f:
                f.writelines(self.config.output())

        Logger.log("Finding cromwell jar")
        cromwell_loc = self.resolve_jar(self.cromwelljar, job.cromwell,
                                        job.config_dir)

        Logger.info(f"Starting cromwell ({cromwell_loc})...")
        cmd = ["java", "-DLOG_MODE=standard"]

        if job.cromwell and job.cromwell.memory_mb:
            cmd.extend([
                f"-Xmx{job.cromwell.memory_mb}M",
                f"-Xms{max(job.cromwell.memory_mb//2, 1)}M",
            ])

        # if Logger.CONSOLE_LEVEL == LogLevel.VERBOSE:
        #     cmd.append("-DLOG_LEVEL=DEBUG")

        if additional_cromwell_options:
            cmd.extend(additional_cromwell_options)

        self.port = find_free_port()
        self.host = f"127.0.0.1:{self.port}"

        cmd.append(f"-Dwebservice.port={self.port}")
        cmd.append(f"-Dwebservice.interface=127.0.0.1")

        if self.config_path and os.path.exists(self.config_path):
            Logger.debug("Using configuration file for Cromwell: " +
                         self.config_path)
            cmd.append("-Dconfig.file=" + self.config_path)
        cmd.extend(["-jar", cromwell_loc, "server"])

        Logger.debug(f"Starting Cromwell with command: '{' '.join(cmd)}'")
        self._process = subprocess.Popen(
            cmd,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            # preexec_fn creates a process group https://stackoverflow.com/a/4791612/
            preexec_fn=os.setsid,
        )
        Logger.info("Cromwell is starting with pid=" + str(self._process.pid))
        Logger.debug(
            "Cromwell will start the HTTP server, reading logs to determine when this occurs"
        )

        self._logfp = open(self.logfile, "a+")
        Logger.info(f"Will log Cromwell output to the file: {self.logfile}"
                    if bool(self._logfp) else
                    "Janis is NOT logging Cromwell output to a file")

        for c in iter(self._process.stdout.readline,
                      "b"):  # replace '' with b'' for Python 3

            line = None
            if c:
                line = c.decode("utf-8").rstrip()

            if not line:
                rc = self._process.poll()
                if rc is not None:
                    critical_suffix = f"Last received message '{line}'. "
                    Logger.critical(
                        f"Cromwell has exited with rc={rc}. {critical_suffix}The last lines of the logfile ({self.logfile}):"
                    )
                    Logger.critical("".join(tail(self._logfp, 10)))
                    return False
                continue

            if self._logfp and not self._logfp.closed:
                self._logfp.write(line + "\n")
                self._logfp.flush()
                os.fsync(self._logfp.fileno())

            Logger.debug("Cromwell: " + line)

            # self.stdout.append(str(c))
            if "service started on" in line:
                self.process_id = self._process.pid
                Logger.info("Service successfully started with pid=" +
                            str(self._process.pid))
                break
            # elif ansi_escape.match():
            #     raise Exception(cd)

        self.is_started = True

        if self._process:
            self._logger = ProcessLogger(
                process=self._process,
                prefix="Cromwell: ",
                logfp=self._logfp,
                # exit_function=self.something_has_happened_to_cromwell,
            )

        return self
Example #23
0
    def format(
        self,
        pre,
        monochrome=False,
        brief=False,
        layer=0,
        njobs_in_parent=None,
        **kwargs,
    ):

        tb = "    "
        fin = self.finish if self.finish else DateUtil.now()
        time = round(DateUtil.secs_difference(self.start, fin)) if self.start else None
        # percentage = (
        #     (round(1000 * time / self.supertime) / 10)
        #     if (self.start and self.supertime)
        #     else None
        # )
        status = self.status or (
            sorted(self.events, key=lambda e: e.timestamp)[-1].status
            if self.events
            else TaskStatus.PROCESSING
        )

        name = self.name
        opts = []
        if self.shard is not None and self.shard >= 0:
            opts.append(f"shard-{self.shard}")
        if self.attempt and self.attempt > 1:
            opts.append(f"attempt-{self.attempt}")
        if len(opts) > 0:
            name += f" ({', '.join(opts)})"

        standard = pre + f"[{status.symbol()}] {name} ({second_formatter(time)})"

        col = ""
        uncol = ""

        if not monochrome:
            if status == TaskStatus.FAILED:
                col = _bcolors.FAIL
            elif status == TaskStatus.COMPLETED:
                col = _bcolors.OKGREEN
            # else:
            # col = _bcolors.UNDERLINE
            uncol = _bcolors.ENDC

        if (
            status != TaskStatus.COMPLETED
            or brief is False
            or (layer == 0 and njobs_in_parent == 1)
        ):
            if self.jobs:
                ppre = pre + tb
                subs: List[RunJobModel] = sorted(
                    self.jobs,
                    key=lambda j: (
                        j.shard or 0,
                        j.start if j.start else DateUtil.now(),
                    ),
                    reverse=False,
                )

                return (
                    col
                    + standard
                    + "".join(
                        [
                            "\n"
                            + j.format(
                                ppre, monochrome, brief, layer=layer + 1, **kwargs
                            )
                            for j in subs
                        ]
                    )
                    + uncol
                )

        fields: List[Tuple[str, str]] = []

        if status == TaskStatus.COMPLETED:
            if not self.finish:
                raise Exception(f"Finish was null for completed task: {self.name}")
            if self.cached:
                fields.append(("from cache", str(self.cached)))

        elif status == TaskStatus.RUNNING:
            fields.extend([("batchid", self.batchid), ("backend", self.backend)])

        elif status == TaskStatus.FAILED:
            if str(self.returncode) != "0":
                fields.append(["rc", str(self.returncode)])
            if self.stderr is not None:
                fields.append(("stderr", self.stderr))
            if self.error:
                fields.append(("error", self.error))
        elif status == TaskStatus.PROCESSING:
            pass
        elif status == TaskStatus.QUEUED:
            pass

        else:
            return (
                standard
                + f" :: Unimplemented status: '{status}' for task: '{self.name}'"
            )

        ppre = "\n" + " " * len(pre) + 2 * tb

        max_row_header_length = 0
        if len(fields) > 0:
            max_row_header_length = max(len(t[0]) for t in fields) + 0

        retval = standard + "".join(
            f"{ppre}{f[0]}:{' ' * (max_row_header_length - len(f[0]))} {f[1]}"
            for f in fields
            if f[1]
        )

        return col + retval + uncol
Example #24
0
 def __init__(self, jid: str, status: TaskStatus, timestamp: str):
     self.jid = jid
     self.status = status
     self.timestamp = timestamp
     if timestamp and isinstance(timestamp, str):
         self.timestamp = DateUtil.parse_iso(timestamp)
Example #25
0
    def __init__(
        self,
        id_: str,
        submission_id: str,
        run_id: str,
        parent: Optional[str],
        name: str,
        batchid: Optional[str] = None,
        shard: Optional[int] = None,
        attempt: Optional[int] = None,
        container: Optional[str] = None,
        status: TaskStatus = None,
        start: Union[str, datetime] = None,
        finish: Optional[Union[str, datetime]] = None,
        backend: Optional[str] = None,
        cached: bool = None,
        stdout: Optional[str] = None,
        stderr: Optional[str] = None,
        script: Optional[str] = None,
        error: Optional[str] = None,
        returncode: Optional[str] = None,
        memory: Optional[str] = None,
        cpu: Optional[str] = None,
        analysis: Optional[str] = None,
        # Optional
        jobs: Optional[list] = None,
        lastupdated: Union[str, datetime] = None,
        workdir: Optional[str] = None,
    ):
        self.id_ = id_
        self.submission_id = submission_id
        self.run_id = run_id
        self.parent = parent

        self.status = status if isinstance(status, TaskStatus) else TaskStatus(status)

        self.name = name
        self.batchid = batchid
        self.shard = None
        if shard is not None:
            if isinstance(shard, str) and shard.isdigit:
                shard = int(shard)
            if shard >= 0:
                self.shard = shard

        self.attempt = None
        if attempt is not None:
            if isinstance(attempt, str) and attempt.isdigit():
                attempt = int(attempt)
            if attempt > 1:
                self.attempt = attempt

        self.container = container

        self.backend = backend
        self.cached = cached

        self.stderr = stderr
        self.stdout = stdout
        self.error = error
        self.returncode = returncode
        self.workdir = workdir

        self.lastupdated = lastupdated or DateUtil.now()
        if isinstance(lastupdated, str):
            self.lastupdated = DateUtil.parse_iso(lastupdated)

        self.start = start
        self.finish = finish
        if start and isinstance(start, str):
            self.start = DateUtil.parse_iso(start)
        if finish and isinstance(finish, str):
            self.finish = DateUtil.parse_iso(finish)

        self.script = script
        self.memory = memory
        self.cpu = cpu
        self.analysis = analysis

        self.jobs: Optional[List[RunJobModel]] = jobs or None
        self.events = None
    def parse_standard_call(cls, parentid, stepname, call):
        parent = parentid
        jid = f"{parentid}_{stepname}" if parentid else stepname
        shard = call.get("shardIndex")
        attempt = call.get("attempt")

        is_shard = shard is not None and shard >= 0
        is_second_attempt = attempt is not None and attempt > 1

        # We'll rebase the parent job if there are any shards (so they all sit under one heading)
        # OR if it's a second attempt, we'll group it under the first attempt.

        if is_shard:
            parent = jid
            # We rely on the calling function to create a job with the parentId we're setting.
            jid += f"_shard-{shard}"
        if is_second_attempt:
            # There will already be a parent with this id_ (as this is the second attempt)
            parent = jid
            jid += f"_attempt-{attempt}"

        subjobs = []
        status = cromwell_status_to_status(
            call.get("status") or call.get("executionStatus")
        )
        start = call.get("start")
        finish = call.get("end")

        # if the call has "subworkflowMetadata", we have a different schema:
        if "subWorkflowMetadata" in call:
            # it's actually a workflow
            submeta = call.get("subWorkflowMetadata")
            for sn, subcalls in submeta.get("calls", {}).items():
                sn = ".".join(sn.split(".")[1:])
                subjobs.extend(cls.parse_standard_calls(jid, sn, subcalls))

            start = start or min(s.start for s in subjobs)
            finish = finish

        callroot = call.get("callRoot")

        return RunJobModel(
            id_=jid,
            submission_id=None,
            run_id=None,
            parent=parent,
            container=call.get(
                "dockerImageUsed", call.get("runtimeAttributes", {}).get("docker")
            ),
            name=stepname,
            status=status,
            batchid=call.get("jobId"),
            backend=None,
            stdout=call.get("stdout"),
            stderr=call.get("stderr"),
            start=DateUtil.parse_iso(start) if start is not None else DateUtil.now(),
            finish=DateUtil.parse_iso(finish) if finish is not None else None,
            jobs=subjobs or None,
            cached=call.get("callCaching").get("hit")
            if "callCaching" in call
            else False,
            shard=shard,
            attempt=attempt,
            analysis=None,
            memory=None,
            cpu=None,
            error=CromwellMetadata.unwrap_caused_by(call.get("failures")),
            returncode=call.get("returnCode"),
            workdir=call.get("callRoot"),
            script=("file:/" + os.path.join(callroot, "execution/script"))
            if callroot
            else None,
        )
Example #27
0
    def format(self, **kwargs):
        tb = "    "
        nl = "\n"

        start, finish = None, DateUtil.now()
        last_updated = None
        if self.runs:
            start_times = [s.start for s in self.runs if s.start]
            finish_times = [s.finish for s in self.runs if s.finish]
            last_updated_times = [
                s.last_updated for s in self.runs if s.last_updated
            ]

            if start_times:
                start = min(start_times)
            if finish_times and len(finish_times) == len(self.runs):
                finish = max(finish_times)
            if last_updated_times:
                last_updated = max(last_updated_times)

        fin = finish or DateUtil.now()

        duration = round((fin - start).total_seconds()) if start else 0

        updated_text = "Unknown"
        if last_updated:
            secs_ago = int((DateUtil.now() - last_updated).total_seconds())
            if secs_ago > 2:
                updated_text = second_formatter(secs_ago) + " ago"
            else:
                updated_text = "Just now"
            updated_text += f" ({last_updated.replace(microsecond=0).isoformat()})"

        engine_ids, rstatuses, errors = "", "", []

        if self.error:
            errors.append(f"GLOBAL: {self.error}")

        if self.runs:
            engine_ids = ", ".join(r.engine_id for r in self.runs
                                   if r.engine_id)
            errors.extend(r.error for r in self.runs if r.error)

            rstatuses = ", ".join(
                str(r.status.to_string()) for r in self.runs if r.status)

        statuses = self.status.to_string() if self.status else rstatuses
        ers = "\n".join(errors)

        return f"""\
SID:        {self.id_}
EngId:      {engine_ids}
Engine:     {self.engine_type}

Task Dir:   {self.output_dir}
Exec Dir:   {self.execution_dir}

Name:       {self.get_names() or 'N/A'} 
Status:     {statuses}
Duration:   {second_formatter(duration)}
Start:      {start.isoformat() if start else 'N/A'}
Finish:     {finish.isoformat() if finish else "N/A"}
Updated:    {updated_text}

Jobs: 
{nl.join(j.format(tb, **kwargs) for j in sorted(self.runs, key=lambda j: j.start or DateUtil.now()))}       


{("Error: " + ers) if ers else ''}
    """.strip()