Esempio n. 1
0
    def __init__(
        self, input_to_select, remove_file_extension=None, type_hint=File, **kwargs
    ):
        """
        :param input_to_select: The name of the input to select
        :param remove_file_extension: Call basename() and remove the file extension
        :param type_hint: Janis can't determine the type of the input to select until translation time,
            so providing a hint type might suppress false warnings. This is similar to using .as_type(dt)
        """

        if not isinstance(input_to_select, str):
            raise Exception(
                f"Expected input_to_select to be string, not {type(input_to_select)}: {str(input_to_select)}"
            )

        # maybe worth validating the input_to_select identifier
        self.input_to_select = input_to_select
        self.type_hint = get_instantiated_type(type_hint) or File()

        if "use_basename" in kwargs:
            use_basename = kwargs["use_basename"]
            if remove_file_extension is None:
                remove_file_extension = use_basename
            Logger.warn(
                f"The 'use_basename' key is deprecated, please use 'remove_file_extension' instead: "
                f'InputSelector("{self.input_to_select}", remove_file_extension={str(use_basename)})'
            )

        self.remove_file_extension = remove_file_extension
Esempio n. 2
0
    def stop_engine(self):
        if self._logger:
            self._logger.terminate()

        self.should_stop = True
        if self._timer_thread:
            self._timer_thread.set()

        if self._logfp:
            self._logfp.flush()
            os.fsync(self._logfp.fileno())
            self._logfp.close()

        if not self.process_id:
            Logger.warn("Could not find a cromwell process to end, SKIPPING")
            return
        Logger.info("Stopping cromwell")
        if self.process_id:
            try:
                process = os.getpgid(int(self.process_id))
                os.killpg(process, signal.SIGTERM)
                Logger.info("Stopped cromwell")
            except Exception as e:
                # can't do
                Logger.warn("Couldn't stop Cromwell process: " + str(e))
                pass
        else:
            Logger.warn(
                "Couldn't stop Cromwell process as Janis wasn't managing it")

        self.is_started = False
    def get_workflow_metadatadb(execpath, wid, readonly=False):

        connection = None
        sqlpath = WorkflowDbManager.get_sql_path_base(execpath)

        if not wid:

            Logger.debug("Opening database connection to get wid from: " +
                         sqlpath)
            try:
                connection = sqlite3.connect(f"file:{sqlpath}?mode=ro",
                                             uri=True)
            except:
                Logger.critical("Error when opening DB connection to: " +
                                sqlpath)
                raise

            wid = RunDbProvider(db=connection).get_latest()
            if not wid:
                raise Exception("Couldn't get WID in task directory")

        retval = WorkflowMetadataDbProvider(sqlpath, wid, readonly=readonly)
        if connection:
            connection.close()
        return retval
Esempio n. 4
0
    def __new__(cls, *args, **kwargs):
        multiplier = None
        src, *otherargs = args

        if len(otherargs) == 1:
            f = otherargs[0].lower()
            multiplier_heirarchy = [
                ("ki" in f, 1024),
                ("k" in f, 1000),
                ("mi" in f, 1.024),
                ("gi" in f, 0.001024),
                ("g" in f, 0.001),
            ]
            if not any(m[0] for m in multiplier_heirarchy):
                Logger.warn(
                    f"Couldn't determine prefix {f} for FileSizeOperator, defaulting to MB"
                )
            else:
                multiplier = [m[1] for m in multiplier_heirarchy if m[0] is True][0]

        instance = super(FileSizeOperator, cls).__new__(cls)
        instance.__init__(args[0])

        if multiplier is not None and multiplier != 1:
            return instance * multiplier
        return instance
Esempio n. 5
0
    def __init__(
        self,
        wf,
        identifier: str,
        datatype: DataType,
        source: ConnectionSource,
        doc: OutputDocumentation = None,
        output_folder: Union[str, InputSelector,
                             List[Union[str, InputSelector]]] = None,
        output_name: Union[str, InputSelector] = None,
        skip_typecheck=False,
    ):
        super().__init__(wf, NodeType.OUTPUT, identifier)
        self.datatype = datatype

        # if source[0].node_type != NodeType.STEP:
        #     raise Exception(
        #         f"Unsupported connection type: {"Output"} → {source[0].node_type}"
        #     )

        stype = source[0].outputs()[source[1]].outtype
        snode = source[0]
        if isinstance(snode, StepNode) and snode.scatter:
            stype = Array(stype)

        if not skip_typecheck and not datatype.can_receive_from(stype):
            Logger.critical(
                f"Mismatch of types when joining to output node '{source[0].id()}.{source[1]}' to '{identifier}' "
                f"({stype.id()} -/→ {datatype.id()})")

        self.source = verify_or_try_get_source(source)
        self.doc = (doc if isinstance(doc, OutputDocumentation) else
                    OutputDocumentation(doc=doc))
        self.output_folder = output_folder
        self.output_name = output_name
Esempio n. 6
0
def second_formatter(secs):
    if secs is None:
        return "N/A"

    if not secs:
        return "0s"

    try:
        remainder = int(secs)
    except:
        Logger.critical(f"second_formatter received non-int type: '{secs}'")
        return str(secs)

    intervals = []
    ranges = [60, 3600, 86400]
    extensions = ["s", "m", "h", "d"]

    under_first_interval = False

    for r in ranges[::-1]:
        if under_first_interval or remainder >= r:
            under_first_interval = True
            val = remainder // r
            remainder -= val * r
            intervals.append(val)

    intervals.append(remainder)

    maxintervals = len(intervals) - 1
    outp = str(intervals[0]) + extensions[maxintervals]
    for i in range(1, len(intervals)):
        ivl = intervals[i]
        outp += ":" + str(ivl).zfill(2) + extensions[maxintervals - i]

    return outp
Esempio n. 7
0
def do_docs(args):
    try:
        import webbrowser

        webbrowser.open(DOCS_URL)
    except Exception as e:
        Logger.critical(f"Failed to open {DOCS_URL} ({e})")
 def rm_dir(self, directory):
     Logger.info(f"Removing local directory '{directory}'")
     try:
         return shutil.rmtree(directory)
     except Exception as e:
         Logger.critical(f"Error removing directory '{directory}': {e}")
         return False
Esempio n. 9
0
    def __init__(self, format: str, **kwargs):
        self._format: str = format

        keywords, balance = get_keywords_between_braces(self._format)

        if balance > 0:
            Logger.warn(
                "There was an imbalance of braces in the string _format, this might cause issues with concatenation"
            )

        skwargs = set(kwargs.keys())

        if not keywords == skwargs:
            # what's the differences
            if not keywords.issubset(skwargs):
                raise IncorrectArgsException(
                    "The _format required additional arguments to be provided by "
                    "**kwargs, requires the keys:" +
                    ", ".join(keywords - skwargs))
            else:
                raise TooManyArgsException(
                    "The **kwargs contained unrecognised keys: " +
                    ", ".join(skwargs - keywords))

        self.kwargs = kwargs
    def translate_tool(
        self,
        tool,
        to_console=True,
        to_disk=False,
        export_path=None,
        with_docker=True,
        with_resource_overrides=False,
        max_cores=None,
        max_mem=None,
    ):

        tool_out = self.stringify_translated_tool(
            self.translate_tool_internal(
                tool,
                with_docker=with_docker,
                with_resource_overrides=with_resource_overrides,
            ))

        if to_console:
            print(tool_out)

        if to_disk:
            d = ExportPathKeywords.resolve(export_path,
                                           workflow_spec=self.name,
                                           workflow_name=tool.id())
            if not os.path.exists(d):
                os.makedirs(d)
            fn_tool = self.tool_filename(tool)
            with open(os.path.join(d, fn_tool), "w+") as wf:
                Logger.log(f"Writing {fn_tool} to disk")
                wf.write(tool_out)
                Logger.log(f"Wrote {fn_tool}  to disk")

        return tool_out
Esempio n. 11
0
    def __init__(
        self,
        wf,
        identifier: str,
        datatype: DataType,
        source: ConnectionSource,
        doc: str = None,
        output_tag: Union[str, InputSelector] = None,
        output_prefix: Union[
            str, InputSelector, List[Union[str, InputSelector]]
        ] = None,
    ):
        super().__init__(wf, NodeTypes.OUTPUT, identifier)
        self.datatype = datatype

        if source[0].node_type != NodeTypes.STEP:
            raise Exception(
                f"Unsupported connection type: {NodeTypes.OUTPUT} → {source[0].node_type}"
            )

        stype = source[0].outputs()[source[1]].output_type
        snode = source[0]
        if isinstance(snode, StepNode) and snode.scatter:
            stype = Array(stype)

        if not datatype.can_receive_from(stype):
            Logger.critical(
                f"Mismatch of types when joining to output node '{source[0].id()}.{source[1]}' to '{identifier}' "
                f"({stype.id()} -/→ {datatype.id()})"
            )

        self.source = verify_or_try_get_source(source)
        self.doc = doc
        self.output_tag = output_tag
        self.output_prefix = output_prefix
Esempio n. 12
0
    def __init__(self, **connections):
        super().__init__(metadata_class=WorkflowMetadata)

        self.connections = connections

        Logger.log(f"Creating workflow with identifier: '{self.id()}'")

        if not Validators.validate_identifier(self.id()):
            raise Exception(
                f"The identifier '{self.id()}' was invalid because {Validators.reason_for_failure(self.id())}"
            )

        # The following variables allow us to quickly check data about the graph
        self.nodes: Dict[str, Node] = {}

        self.input_nodes: Dict[str, InputNode] = {}
        self.step_nodes: Dict[str, StepNode] = {}
        self.output_nodes: Dict[str, OutputNode] = {}

        # Flags for different requirements that a workflow might need
        self.has_scatter = False
        self.has_subworkflow = False
        self.has_multiple_inputs = False

        # Now that we've initialised everything, we can "construct" the workflows for that subclass this class
        # else, for the WorkflowBuilder it will do nothing and they'll add workflows later
        self.constructor()
Esempio n. 13
0
        def get_recipe_for_keys(self, keys: List[str]):

            self.load_recipes()

            if not keys:
                return {}

            rec = {}
            for key in keys:
                found_key = False

                if key in self._files_by_key:
                    found_key = True
                    rec.update(
                        self.get_cascaded_dict_from_yamls(
                            self._files_by_key[key]))

                if key in self.recipes:
                    found_key = True
                    rec.update(self.recipes[key] or {})

                if not found_key:
                    Logger.critical(f"Couldn't find '{key}' in known recipes")

            return rec
Esempio n. 14
0
    def map_to_wdl(t: NativeType):
        import wdlgen as wdl

        if t == NativeTypes.kBool:
            return wdl.PrimitiveType.kBoolean
        elif t == NativeTypes.kInt:
            return wdl.PrimitiveType.kInt

        elif (t == NativeTypes.kLong or t == NativeTypes.kFloat
              or t == NativeTypes.kDouble):
            return wdl.PrimitiveType.kFloat
        elif t == NativeTypes.kStr:
            return wdl.PrimitiveType.kString
        elif t == NativeTypes.kFile:
            return wdl.PrimitiveType.kFile
        elif t == NativeTypes.kStdout:
            return wdl.PrimitiveType.kFile
        elif t == NativeTypes.kStderr:
            return wdl.PrimitiveType.kFile
        elif t == NativeTypes.kDirectory:
            Logger.log(
                "Using data_type 'Directory' for wdl, this requires cromwell>=37 and language=development"
            )
            return wdl.PrimitiveType.kDirectory
        elif t == NativeTypes.kArray:
            return wdl.ArrayType.kArray
        raise Exception(
            f"Unhandled primitive type {t}, expected one of {', '.join(NativeTypes.all)}"
        )
Esempio n. 15
0
    def get_value_for_key(d, key, default):
        val = d.get(key)
        if not val:
            return default.get(key) if default else None

        Logger.log(f"Got value '{val}' for key '{key}'")
        return val
Esempio n. 16
0
    def resolve(path, workflow_spec, workflow_name):

        if not path:
            Logger.critical("Output path was invalid, changed to working directory")
            path = "."

        if ExportPathKeywords.workflow_spec in path and workflow_spec is None:
            raise Exception(
                f"path ('{path}') contained parameter {ExportPathKeywords.workflow_spec} "
                "but caller of .resolve did not pass language"
            )

        if ExportPathKeywords.workflow_name in path and workflow_name is None:
            raise Exception(
                f"path ('{path}') contained parameter {ExportPathKeywords.workflow_name} "
                "but caller of .resolve did not pass tool name"
            )

        path = p.expanduser(path)

        if (len(path) == 1 and path == ".") or path[:2] == "./":
            path = getcwd() + (path[1:] if len(path) > 1 else "")

        elif path[0] != "/":
            path = p.join(getcwd(), path)

        return path.replace(
            ExportPathKeywords.workflow_spec,
            workflow_spec.lower() if workflow_spec else "",
        ).replace(
            ExportPathKeywords.workflow_name,
            workflow_name.lower() if workflow_name else "",
        )
    def poll_metadata(self):

        if self._timer_thread.is_set() or self.should_stop:
            return

        for engine_id_to_poll in self.progress_callbacks:
            try:
                meta = self.metadata(engine_id_to_poll)
                if meta:
                    for callback in self.progress_callbacks[engine_id_to_poll]:
                        callback(meta)

            except Exception as e:
                Logger.critical(
                    f"Received a critical error ({repr(e)}) when getting metadata for "
                    f"Cromwell task {engine_id_to_poll}, hence terminating task with status=SUSPENDED"
                )
                meta = RunModel(
                    id_=None,
                    submission_id=None,
                    status=TaskStatus.SUSPENDED,
                    engine_id=engine_id_to_poll,
                    name=None,
                    execution_dir=None,
                )
                for callback in self.progress_callbacks[engine_id_to_poll]:
                    callback(meta)

        # call timer again
        time = self.get_poll_interval()
        threading.Timer(time, self.poll_metadata).start()
Esempio n. 18
0
def do_rm(args):
    wids = args.wid
    for wid in wids:
        try:
            ConfigManager.manager().remove_task(wid, keep_output=args.keep)
        except Exception as e:
            Logger.critical(f"Can't remove {wid}: " + str(e))
    def rm_dir(self, directory):
        import urllib.request

        Logger.info(f"Issuing HTTP.DELETE request for directory '{directory}'")
        req = urllib.request.Request(directory)
        req.get_method = lambda: "DELETE"
        return urllib.request.urlopen(req)
Esempio n. 20
0
def do_version(_):
    from tabulate import tabulate
    import importlib_metadata

    from janis_assistant.__meta__ import __version__ as jr_version
    from janis_core.__meta__ import __version__ as jc_version
    import janis_core.toolbox.entrypoints as EP

    fields = [["janis-core", jc_version], ["janis-assistant", jr_version]]
    # eps = pkg_resources.iter_entry_points(group=EP.EXTENSIONS)
    eps = importlib_metadata.entry_points().get(EP.EXTENSIONS, [])
    skip_eps = {"assistant"}
    for entrypoint in eps:
        if entrypoint.name in skip_eps:
            continue
        try:
            version = entrypoint.load().__version__
            if version:
                fields.append(["janis-" + entrypoint.name, version])
            skip_eps.add(entrypoint.name)

        except Exception as e:
            Logger.log_ex(e)

    print(tabulate(fields))
Esempio n. 21
0
    def add_tool(tool: Tool) -> bool:
        v: Optional[str] = tool.version()
        if not v:
            t = f"The tool {tool.id()} did not have a version and will not be registered"
            Logger.critical(t)
            return False

        return JanisShed._toolshed.register(tool.id().lower(), v.lower(), tool)
def do_rm(args):
    wids = args.wid
    for wid in wids:
        try:
            ConfigManager.get_from_path_or_submission_lazy(
                wid, readonly=True).remove_task(wid, keep_output=args.keep)
        except Exception as e:
            Logger.critical(f"Can't remove {wid}: " + str(e))
 def get_file_size(self, path) -> Optional[int]:
     try:
         stat = os.stat(path)
         if not stat:
             return None
         return stat.st_size
     except Exception as e:
         Logger.warn(f"Couldn't get file size of path '{path}': {repr(e)}")
         return None
Esempio n. 24
0
    def add_tool(tool: Tool) -> bool:
        v: Optional[str] = tool.version()
        if not v:
            t = f"The tool {tool.id()} did not have a version and will not be registered"
            Logger.critical(t)
            return False
        Logger.log("Adding tool: " + tool.id())

        JanisShed._byclassname.register(tool.__class__.__name__, tool)
        return JanisShed._toolshed.register(tool.id().lower(), v.lower(), tool)
Esempio n. 25
0
    def cwl_type(self, has_default=False):
        inner_types = [a.cwl_type(has_default=has_default) for a in self.subtypes]
        try:
            inner_types = list(set(inner_types))
        except Exception as e:
            Logger.debug(f"Error creating set from ({inner_types}): {e}")

        if len(inner_types) == 1:
            return inner_types[0]
        return inner_types
Esempio n. 26
0
 def get_cascaded_dict_from_yamls(files):
     d = {}
     for f in files:
         try:
             with open(f) as rl:
                 adr = ruamel.yaml.load(rl, Loader=ruamel.yaml.Loader)
                 d.update(adr)
         except Exception as e:
             Logger.critical(f"Couldn't parse file '{f}': {e}")
     return d
Esempio n. 27
0
    def convert_generic_class(t,
                              ignore_fields=None,
                              get_string_repr_func=None,
                              workflow_id: str = None):
        options = []

        get_string_repr_func2 = lambda obj: (get_string_repr_func or
                                             JanisTranslator.get_string_repr)(
                                                 obj, workflow_id=workflow_id)

        try:
            has_init_dict = not isinstance(
                t, (Tool, WorkflowBase, PythonTool, StepNode)) and hasattr(
                    t, "init_dictionary")
        except KeyError:
            has_init_dict = False

        if has_init_dict:
            options.extend(f"{k}={get_string_repr_func2(v)}"
                           for k, v in t.init_dictionary().items())
        else:
            ignore_fields = set((ignore_fields if ignore_fields else []) +
                                ["self", "args", "kwargs"])

            params = inspect.signature(type(t).__init__).parameters
            param_map = {}
            if not isinstance(t, (StepNode, WorkflowBase)) and hasattr(
                    t, "init_key_map"):
                param_map = t.init_key_map
            # fields = fields_to_check if fields_to_check \
            #     else [f for f in dict(params).keys() if f not in ignore_fields]

            for fkey in params:
                if fkey in ignore_fields:
                    continue

                opts = params[fkey]

                t_key = param_map.get(fkey, fkey)
                if t_key is None:
                    continue

                if hasattr(t, t_key):
                    v = t.__getattribute__(t_key)
                else:
                    Logger.warn(
                        f"Object '{t.__class__.__name__}' didn't have attribute {t_key}, setting to None and it might get skipped"
                    )
                    v = None
                if (v is None and opts.default is None) or v == opts.default:
                    continue

                options.append(fkey + "=" + get_string_repr_func2(v))

        return f"{t.__class__.__name__}({', '.join(options)})"
Esempio n. 28
0
    def check_types(self):
        from janis_core.workflow.workflow import InputNode, StepNode

        stoolin: TOutput = self.start.outputs()[
            self.stag
        ] if self.stag is not None else first_value(self.start.outputs())
        ftoolin: TInput = self.finish.inputs()[
            self.ftag
        ] if self.ftag is not None else first_value(self.finish.inputs())

        stype = stoolin.outtype
        ftype = ftoolin.intype

        start_is_scattered = (
            isinstance(self.start, StepNode) and self.start.scatter is not None
        )

        if start_is_scattered:
            Logger.log(
                f"This edge merges the inputs from '{full_dot(self.start, self.stag)}' for "
                f"'{full_dot(self.finish, self.ftag)}'"
            )
            stype = Array(stype)

        if self.scatter:
            if not isinstance(stype, Array):
                raise Exception(
                    f"Scatter was required for '{self.start.id()}.{self.stag} → '{self.finish.id()}.{self.ftag}' but "
                    f"the input type was {type(stype).__name__} and not an array"
                )
            stype = stype.subtype()

        source_has_default = (
            isinstance(self.start, InputNode) and self.start.default is not None
        )

        # Scatters are handled automatically by the StepTagInput Array unwrapping
        # Merges are handled automatically by the `start_is_scattered` Array wrap

        self.compatible_types = ftype.can_receive_from(stype, source_has_default)
        if not self.compatible_types:
            if isinstance(ftype, Array) and ftype.subtype().can_receive_from(stype):
                self.compatible_types = True

        if not self.compatible_types:

            s = full_dot(self.start, self.stag)
            f = full_dot(self.finish, self.ftag)
            message = (
                f"Mismatch of types when joining '{s}' to '{f}': "
                f"{stoolin.outtype.id()} -/→ {ftoolin.intype.id()}"
            )
            if isinstance(stype, Array) and ftype.can_receive_from(stype.subtype()):
                message += " (did you forget to SCATTER?)"
            Logger.critical(message)
Esempio n. 29
0
 def terminate(self):
     self.should_terminate = True
     if self.logfp:
         try:
             self.logfp.flush()
             os.fsync(self.logfp.fileno())
         except Exception as e:
             # This isn't a proper error, there's nothing we could do
             # and doesn't prohibit the rest of the shutdown of Janis.
             Logger.critical("Couldn't flush engine stderr to disk: " +
                             str(e))
Esempio n. 30
0
        def __init__(self, d: dict, default: dict):
            d = d if d else {}

            self.id = JanisConfiguration.get_value_for_key(
                d, self.Keys.Id, default)

            Logger.log("Got template ID: " + str(self.id))

            # remove this id from the dictionary: https://stackoverflow.com/a/15411146/
            d.pop(self.Keys.Id.value, None)
            self.template = from_template(self.id, d)