def poll_metadata(self):

        if self._timer_thread.is_set() or self.should_stop:
            return

        for engine_id_to_poll in self.progress_callbacks:
            try:
                meta = self.metadata(engine_id_to_poll)
                if meta:
                    for callback in self.progress_callbacks[engine_id_to_poll]:
                        callback(meta)

            except Exception as e:
                Logger.critical(
                    f"Received a critical error ({repr(e)}) when getting metadata for "
                    f"Cromwell task {engine_id_to_poll}, hence terminating task with status=SUSPENDED"
                )
                meta = RunModel(
                    id_=None,
                    submission_id=None,
                    status=TaskStatus.SUSPENDED,
                    engine_id=engine_id_to_poll,
                    name=None,
                    execution_dir=None,
                )
                for callback in self.progress_callbacks[engine_id_to_poll]:
                    callback(meta)

        # call timer again
        time = self.get_poll_interval()
        threading.Timer(time, self.poll_metadata).start()
Esempio n. 2
0
    def __init__(
        self,
        wf,
        identifier: str,
        datatype: DataType,
        source: ConnectionSource,
        doc: OutputDocumentation = None,
        output_folder: Union[str, InputSelector,
                             List[Union[str, InputSelector]]] = None,
        output_name: Union[str, InputSelector] = None,
        skip_typecheck=False,
    ):
        super().__init__(wf, NodeType.OUTPUT, identifier)
        self.datatype = datatype

        # if source[0].node_type != NodeType.STEP:
        #     raise Exception(
        #         f"Unsupported connection type: {"Output"} → {source[0].node_type}"
        #     )

        stype = source[0].outputs()[source[1]].outtype
        snode = source[0]
        if isinstance(snode, StepNode) and snode.scatter:
            stype = Array(stype)

        if not skip_typecheck and not datatype.can_receive_from(stype):
            Logger.critical(
                f"Mismatch of types when joining to output node '{source[0].id()}.{source[1]}' to '{identifier}' "
                f"({stype.id()} -/→ {datatype.id()})")

        self.source = verify_or_try_get_source(source)
        self.doc = (doc if isinstance(doc, OutputDocumentation) else
                    OutputDocumentation(doc=doc))
        self.output_folder = output_folder
        self.output_name = output_name
Esempio n. 3
0
def second_formatter(secs):
    if secs is None:
        return "N/A"

    if not secs:
        return "0s"

    try:
        remainder = int(secs)
    except:
        Logger.critical(f"second_formatter received non-int type: '{secs}'")
        return str(secs)

    intervals = []
    ranges = [60, 3600, 86400]
    extensions = ["s", "m", "h", "d"]

    under_first_interval = False

    for r in ranges[::-1]:
        if under_first_interval or remainder >= r:
            under_first_interval = True
            val = remainder // r
            remainder -= val * r
            intervals.append(val)

    intervals.append(remainder)

    maxintervals = len(intervals) - 1
    outp = str(intervals[0]) + extensions[maxintervals]
    for i in range(1, len(intervals)):
        ivl = intervals[i]
        outp += ":" + str(ivl).zfill(2) + extensions[maxintervals - i]

    return outp
Esempio n. 4
0
    def __init__(
        self,
        wf,
        identifier: str,
        datatype: DataType,
        source: ConnectionSource,
        doc: str = None,
        output_tag: Union[str, InputSelector] = None,
        output_prefix: Union[
            str, InputSelector, List[Union[str, InputSelector]]
        ] = None,
    ):
        super().__init__(wf, NodeTypes.OUTPUT, identifier)
        self.datatype = datatype

        if source[0].node_type != NodeTypes.STEP:
            raise Exception(
                f"Unsupported connection type: {NodeTypes.OUTPUT} → {source[0].node_type}"
            )

        stype = source[0].outputs()[source[1]].output_type
        snode = source[0]
        if isinstance(snode, StepNode) and snode.scatter:
            stype = Array(stype)

        if not datatype.can_receive_from(stype):
            Logger.critical(
                f"Mismatch of types when joining to output node '{source[0].id()}.{source[1]}' to '{identifier}' "
                f"({stype.id()} -/→ {datatype.id()})"
            )

        self.source = verify_or_try_get_source(source)
        self.doc = doc
        self.output_tag = output_tag
        self.output_prefix = output_prefix
Esempio n. 5
0
    def resolve(path, workflow_spec, workflow_name):

        if not path:
            Logger.critical("Output path was invalid, changed to working directory")
            path = "."

        if ExportPathKeywords.workflow_spec in path and workflow_spec is None:
            raise Exception(
                f"path ('{path}') contained parameter {ExportPathKeywords.workflow_spec} "
                "but caller of .resolve did not pass language"
            )

        if ExportPathKeywords.workflow_name in path and workflow_name is None:
            raise Exception(
                f"path ('{path}') contained parameter {ExportPathKeywords.workflow_name} "
                "but caller of .resolve did not pass tool name"
            )

        path = p.expanduser(path)

        if (len(path) == 1 and path == ".") or path[:2] == "./":
            path = getcwd() + (path[1:] if len(path) > 1 else "")

        elif path[0] != "/":
            path = p.join(getcwd(), path)

        return path.replace(
            ExportPathKeywords.workflow_spec,
            workflow_spec.lower() if workflow_spec else "",
        ).replace(
            ExportPathKeywords.workflow_name,
            workflow_name.lower() if workflow_name else "",
        )
    def get_workflow_metadatadb(execpath, wid, readonly=False):

        connection = None
        sqlpath = WorkflowDbManager.get_sql_path_base(execpath)

        if not wid:

            Logger.debug("Opening database connection to get wid from: " +
                         sqlpath)
            try:
                connection = sqlite3.connect(f"file:{sqlpath}?mode=ro",
                                             uri=True)
            except:
                Logger.critical("Error when opening DB connection to: " +
                                sqlpath)
                raise

            wid = RunDbProvider(db=connection).get_latest()
            if not wid:
                raise Exception("Couldn't get WID in task directory")

        retval = WorkflowMetadataDbProvider(sqlpath, wid, readonly=readonly)
        if connection:
            connection.close()
        return retval
 def rm_dir(self, directory):
     Logger.info(f"Removing local directory '{directory}'")
     try:
         return shutil.rmtree(directory)
     except Exception as e:
         Logger.critical(f"Error removing directory '{directory}': {e}")
         return False
Esempio n. 8
0
        def get_recipe_for_keys(self, keys: List[str]):

            self.load_recipes()

            if not keys:
                return {}

            rec = {}
            for key in keys:
                found_key = False

                if key in self._files_by_key:
                    found_key = True
                    rec.update(
                        self.get_cascaded_dict_from_yamls(
                            self._files_by_key[key]))

                if key in self.recipes:
                    found_key = True
                    rec.update(self.recipes[key] or {})

                if not found_key:
                    Logger.critical(f"Couldn't find '{key}' in known recipes")

            return rec
Esempio n. 9
0
def do_docs(args):
    try:
        import webbrowser

        webbrowser.open(DOCS_URL)
    except Exception as e:
        Logger.critical(f"Failed to open {DOCS_URL} ({e})")
Esempio n. 10
0
def do_rm(args):
    wids = args.wid
    for wid in wids:
        try:
            ConfigManager.manager().remove_task(wid, keep_output=args.keep)
        except Exception as e:
            Logger.critical(f"Can't remove {wid}: " + str(e))
Esempio n. 11
0
    def add_tool(tool: Tool) -> bool:
        v: Optional[str] = tool.version()
        if not v:
            t = f"The tool {tool.id()} did not have a version and will not be registered"
            Logger.critical(t)
            return False

        return JanisShed._toolshed.register(tool.id().lower(), v.lower(), tool)
def do_rm(args):
    wids = args.wid
    for wid in wids:
        try:
            ConfigManager.get_from_path_or_submission_lazy(
                wid, readonly=True).remove_task(wid, keep_output=args.keep)
        except Exception as e:
            Logger.critical(f"Can't remove {wid}: " + str(e))
Esempio n. 13
0
 def get_cascaded_dict_from_yamls(files):
     d = {}
     for f in files:
         try:
             with open(f) as rl:
                 adr = ruamel.yaml.load(rl, Loader=ruamel.yaml.Loader)
                 d.update(adr)
         except Exception as e:
             Logger.critical(f"Couldn't parse file '{f}': {e}")
     return d
Esempio n. 14
0
    def add_tool(tool: Tool) -> bool:
        v: Optional[str] = tool.version()
        if not v:
            t = f"The tool {tool.id()} did not have a version and will not be registered"
            Logger.critical(t)
            return False
        Logger.log("Adding tool: " + tool.id())

        JanisShed._byclassname.register(tool.__class__.__name__, tool)
        return JanisShed._toolshed.register(tool.id().lower(), v.lower(), tool)
Esempio n. 15
0
    def check_types(self):
        from janis_core.workflow.workflow import InputNode, StepNode

        stoolin: TOutput = self.start.outputs()[
            self.stag
        ] if self.stag is not None else first_value(self.start.outputs())
        ftoolin: TInput = self.finish.inputs()[
            self.ftag
        ] if self.ftag is not None else first_value(self.finish.inputs())

        stype = stoolin.outtype
        ftype = ftoolin.intype

        start_is_scattered = (
            isinstance(self.start, StepNode) and self.start.scatter is not None
        )

        if start_is_scattered:
            Logger.log(
                f"This edge merges the inputs from '{full_dot(self.start, self.stag)}' for "
                f"'{full_dot(self.finish, self.ftag)}'"
            )
            stype = Array(stype)

        if self.scatter:
            if not isinstance(stype, Array):
                raise Exception(
                    f"Scatter was required for '{self.start.id()}.{self.stag} → '{self.finish.id()}.{self.ftag}' but "
                    f"the input type was {type(stype).__name__} and not an array"
                )
            stype = stype.subtype()

        source_has_default = (
            isinstance(self.start, InputNode) and self.start.default is not None
        )

        # Scatters are handled automatically by the StepTagInput Array unwrapping
        # Merges are handled automatically by the `start_is_scattered` Array wrap

        self.compatible_types = ftype.can_receive_from(stype, source_has_default)
        if not self.compatible_types:
            if isinstance(ftype, Array) and ftype.subtype().can_receive_from(stype):
                self.compatible_types = True

        if not self.compatible_types:

            s = full_dot(self.start, self.stag)
            f = full_dot(self.finish, self.ftag)
            message = (
                f"Mismatch of types when joining '{s}' to '{f}': "
                f"{stoolin.outtype.id()} -/→ {ftoolin.intype.id()}"
            )
            if isinstance(stype, Array) and ftype.can_receive_from(stype.subtype()):
                message += " (did you forget to SCATTER?)"
            Logger.critical(message)
    def link_copy_or_fail(source: str, dest: str, force=False):
        """
        Eventually move this to some generic util class
        :param source: Source to link from
        :param dest: Place to link to
        :param force: Overwrite destination if it exists
        :return:
        """
        try:

            to_copy = [(
                LocalFileScheme.prepare_path(source),
                LocalFileScheme.prepare_path(dest),
            )]

            while len(to_copy) > 0:
                s, d = to_copy.pop(0)

                # Check if path is Null/None
                if not s:
                    continue

                if not d:
                    continue

                if os.path.exists(d) and force:
                    Logger.debug(f"Destination exists, overwriting '{d}'")
                    if os.path.isdir(d):
                        rmtree(d)
                    else:
                        os.remove(d)
                Logger.log(f"Hard linking {s} → {d}")

                if os.path.isdir(s):
                    os.makedirs(d, exist_ok=True)
                    for f in os.listdir(s):
                        to_copy.append((os.path.join(s, f), os.path.join(d,
                                                                         f)))
                    continue
                try:
                    os.link(s, d)
                except FileExistsError:
                    Logger.critical(
                        "The file 'd' already exists. The force flag is required to overwrite."
                    )
                except Exception as e:
                    Logger.warn("Couldn't link file: " + str(e))

                    # if this fails, it should error
                    Logger.log(f"Copying file {s} → {d}")
                    copyfile(s, d)
        except Exception as e:
            Logger.critical(
                f"An unexpected error occurred when link/copying {source} -> {dest}: {e}"
            )
    def get_latest_workflow(path) -> str:
        try:
            connection = sqlite3.connect(
                f"file:{WorkflowDbManager.get_sql_path_base(path)}?mode=ro",
                uri=True)
            runDb = RunDbProvider(db=connection)
            return runDb.get_latest()

        except:
            Logger.critical("Error when opening DB connection to: " + path)
            raise
Esempio n. 18
0
 def terminate(self):
     self.should_terminate = True
     if self.logfp:
         try:
             self.logfp.flush()
             os.fsync(self.logfp.fileno())
         except Exception as e:
             # This isn't a proper error, there's nothing we could do
             # and doesn't prohibit the rest of the shutdown of Janis.
             Logger.critical("Couldn't flush engine stderr to disk: " +
                             str(e))
Esempio n. 19
0
 def _get_tool_entrypoints():
     ep = []
     eps = pkg_resources.iter_entry_points(group=EP.TOOLS)
     for entrypoint in eps:
         try:
             m = entrypoint.load()
             ep.append(m)
         except ImportError as e:
             t = f"Couldn't import janis data_type extension '{entrypoint.name}': {e}"
             Logger.critical(t)
             continue
     return ep
    def db_connection(self):
        path = self.get_sql_path()
        try:
            if self.readonly:
                Logger.debug(
                    "Opening database connection to in READONLY mode: " + path)
                return sqlite3.connect(f"file:{path}?mode=ro", uri=True)

            Logger.debug("Opening database connection: " + path)
            return sqlite3.connect(path)
        except:
            Logger.critical("Error when opening DB connection to: " + path)
            raise
Esempio n. 21
0
    def _get_datatype_entrypoints():
        import importlib_metadata

        ep = []
        eps = importlib_metadata.entry_points().get(EP.DATATYPES, [])
        for entrypoint in eps:
            try:
                m = entrypoint.load()
                ep.append(m)
            except ImportError as e:
                t = f"Couldn't import janis data_type extension '{entrypoint.name}': {e}"
                Logger.critical(t)
                continue
        return ep
Esempio n. 22
0
    def _get_tool_entrypoints():
        import importlib_metadata

        ep = []
        eps = importlib_metadata.entry_points().get(EP.TOOLS, [])
        for entrypoint in eps:
            try:
                m = entrypoint.load()
                ep.append(m)
            except ImportError as e:
                t = f"Couldn't import janis tools extension {EP.TOOLS} '{entrypoint.name}': {e}"
                Logger.critical(t)
                continue
        return ep
Esempio n. 23
0
    def get_config_from_script(self, execution_dir: str):
        try:
            import subprocess, os, json
            from janis_assistant.management.envvariables import EnvVariables
            from janis_assistant.engines.cromwell.cromwellconfiguration import (
                CromwellConfiguration, )

            file_path = os.getenv(EnvVariables.db_script_generator)
            Logger.debug(
                f"Found path '{EnvVariables.db_script_generator}' to generate database credentials"
            )
            if file_path is None:
                raise Exception(
                    f"Couldn't get database credentials as couldn't find value in env var '{EnvVariables.db_script_generator}'"
                )
            # if not os.path.exists(file_path):
            #     raise Exception(f"Couldn't locate script '{file_path}' to execute")

            try:
                val = collect_output_from_command(
                    f"{file_path} {execution_dir}",
                    stderr=Logger.guess_log,
                    shell=True)
            except Exception as e:
                Logger.critical(
                    f"Failed to generate database credentials ({repr(e)})")
                raise
            d = json.loads(val)
            Logger.debug("Received keys from database credentials script: " +
                         ", ".join(d.keys()))

            keys = {"username", "password", "database", "host"}
            missing_keys = {k for k in keys if k not in d}
            if len(missing_keys) > 0:
                raise Exception(
                    "The script to generate database credentials was missing the keys: "
                    + ", ".join(missing_keys))

            return CromwellConfiguration.Database.mysql(
                username=d["username"],
                password=d["password"],
                database=d["database"],
                url=d["host"],
            )
        except Exception as e:
            Logger.critical(
                "Failed to get database configuration details from script: " +
                repr(e))
            raise
    def cp_to(
        self,
        source,
        dest,
        force=False,
        report_progress: Optional[Callable[[float], None]] = None,
    ):
        if force:
            Logger.critical("SSHFileScheme does not support the 'force' flag")

        Logger.info(
            f"Secure copying (SCP) from local:{source} to {self.connectionstring}:{dest}"
        )
        args = ["scp", source, self.connectionstring + ":" + dest]
        subprocess.call(args)
Esempio n. 25
0
    def stop_engine(self):

        # we're going to abort!
        if self.process_id:
            try:
                import signal

                os.kill(self.process_id, signal.SIGTERM)
            except Exception as e:
                Logger.critical("Couldn't terminate CWLTool as " + str(e))

        else:
            Logger.critical(
                "Couldn't terminate CWLTool as there was no process ID")

        return self
Esempio n. 26
0
def translate_to_cwl_glob(glob, inputsdict, **debugkwargs):
    if not glob:
        return None

    if not isinstance(glob, Selector):
        Logger.critical(
            "String globs are being phased out from tool output selections, please use the provided "
            "Selector (InputSelector or WildcardSelector) classes. " +
            str(debugkwargs))
        return glob

    if isinstance(glob, InputSelector):

        if glob.input_to_select:
            if inputsdict is None or glob.input_to_select not in inputsdict:
                raise Exception(
                    "An internal error has occurred when generating the output glob for "
                    + glob.input_to_select)

            tinp: ToolInput = inputsdict[glob.input_to_select]
            intype = tinp.input_type
            if isinstance(intype, Filename):
                if isinstance(intype.prefix, InputSelector):
                    return intype.generated_filename(
                        inputs=prepare_filename_replacements_for(
                            intype.prefix, inputsdict=inputsdict))
                else:
                    return intype.generated_filename()
            else:
                expr = f"inputs.{glob.input_to_select}"
                if isinstance(intype, (File, Directory)):
                    expr = expr + ".basename"
                if tinp.default:
                    expr = f"(inputs.{glob.input_to_select} != null) ? {expr} : {tinp.default}"

                return f"$({expr})"

        return translate_input_selector(glob, code_environment=False)

    elif isinstance(glob, StringFormatter):
        return translate_string_formatter(glob)

    elif isinstance(glob, WildcardSelector):
        return glob.wildcard

    raise Exception("Unimplemented selector type: " + glob.__class__.__name__)
Esempio n. 27
0
    def start_from_paths(self, wid, source_path: str, input_path: str, deps_path: str):
        print("TMP: " + os.getenv("TMPDIR"))
        scale = ["--scale", str(self.scale)] if self.scale else []
        loglevel = ["--logLevel=" + self.loglevel] if self.loglevel else []
        cmd = ["toil-cwl-runner", "--stats", *loglevel, *scale, source_path, input_path]
        Logger.debug("Running command: '" + " ".join(cmd) + "'")
        process = subprocess.Popen(
            cmd, stdout=subprocess.PIPE, preexec_fn=os.setsid, stderr=subprocess.PIPE
        )

        Logger.info("CWLTool has started with pid=" + str(process.pid))

        for line in read_stdout(process):
            if "Path to job store directory is" in line:
                idx = line.index("Path to job store directory is")
                Logger.critical("JOBSTORE DIR: " + line[idx + 1 :])
            Logger.debug("toil: " + line)

        print("finished")
Esempio n. 28
0
def parse_if_dict(T: Type, val, path: str, skip_if_empty=True):
    if val is None:
        return None
    elif isinstance(val, T):
        return val
    elif isinstance(val, dict):
        if skip_if_empty and len(val) == 0:
            return None
        try:
            return T(**val)
        except TypeError as e:
            Logger.critical(
                f"Couldn't initialise key {path} of type '{T.__name__}', {str(e)}"
            )
            exit(1)
    raise Exception(
        f"Couldn't parse for type '{T.__name__}', "
        f"expected dict but received '{val.__class__.__name__}' for {str(val)}"
    )
Esempio n. 29
0
def translate_to_cwl_glob(glob, **debugkwargs):
    if not glob:
        return None

    if not isinstance(glob, Selector):
        Logger.critical(
            "String globs are being phased out from tool output selections, please use the provided "
            "Selector (InputSelector or WildcardSelector) classes. " +
            str(debugkwargs))
        return glob

    if isinstance(glob, InputSelector):
        return translate_input_selector(glob, code_environment=False)

    elif isinstance(glob, StringFormatter):
        return translate_string_formatter(glob)

    elif isinstance(glob, WildcardSelector):
        return glob.wildcard

    raise Exception("Unimplemented selector type: " + glob.__class__.__name__)
Esempio n. 30
0
    def _get_datatype_transformations_from_entrypoints():
        import importlib_metadata

        ep = []
        eps = importlib_metadata.entry_points().get(EP.TRANSFORMATIONS, [])
        for entrypoint in eps:
            try:
                m = entrypoint.load()
                if m is not None and isinstance(m, list):
                    ep.extend(m)
                else:
                    Logger.warn(
                        f"Janis transformation entrypoint {entrypoint.name}' was not a list (type {type(m)}). "
                        f"Only export a single list of transformations, for example: "
                        f"`janis_bioinformatics.transformations:transformations`"
                    )
            except ImportError as e:
                t = f"Couldn't import janis datatype_transformation extension '{entrypoint.name}': {e}"
                Logger.critical(t)
                continue
        return ep