def stop_engine(self):

        if not self.is_started:
            return Logger.debug(
                "Cromwell has already shut down, skipping shut down request")

        if self._logger:
            self._logger.terminate()

        self.should_stop = True

        if self._timer_thread:
            self._timer_thread.set()

        if not self.process_id:
            self.is_started = False
            Logger.info("Janis isn't managing Cromwell, skipping the shutdown")
            return
        Logger.info("Stopping cromwell")
        if self.process_id:
            try:
                process = os.getpgid(int(self.process_id))
                os.killpg(process, signal.SIGTERM)
                Logger.info("Stopped cromwell")
            except Exception as e:
                # can't do
                Logger.warn("Couldn't stop Cromwell process: " + str(e))
        else:
            Logger.warn(
                "Couldn't stop Cromwell process as Janis wasn't managing it")

        Logger.debug("Setting 'cromwell.is_started' to False")
        self.is_started = False
Esempio n. 2
0
    def __init__(self, format: str, **kwargs):
        self._format: str = format

        keywords, balance = get_keywords_between_braces(self._format)

        if balance > 0:
            Logger.warn(
                "There was an imbalance of braces in the string _format, this might cause issues with concatenation"
            )

        skwargs = set(kwargs.keys())

        if not keywords == skwargs:
            # what's the differences
            if not keywords.issubset(skwargs):
                raise IncorrectArgsException(
                    "The _format required additional arguments to be provided by "
                    "**kwargs, requires the keys:" +
                    ", ".join(keywords - skwargs))
            else:
                raise TooManyArgsException(
                    "The **kwargs contained unrecognised keys: " +
                    ", ".join(skwargs - keywords))

        self.kwargs = kwargs
Esempio n. 3
0
    def __init__(
        self, input_to_select, remove_file_extension=None, type_hint=File, **kwargs
    ):
        """
        :param input_to_select: The name of the input to select
        :param remove_file_extension: Call basename() and remove the file extension
        :param type_hint: Janis can't determine the type of the input to select until translation time,
            so providing a hint type might suppress false warnings. This is similar to using .as_type(dt)
        """

        if not isinstance(input_to_select, str):
            raise Exception(
                f"Expected input_to_select to be string, not {type(input_to_select)}: {str(input_to_select)}"
            )

        # maybe worth validating the input_to_select identifier
        self.input_to_select = input_to_select
        self.type_hint = get_instantiated_type(type_hint) or File()

        if "use_basename" in kwargs:
            use_basename = kwargs["use_basename"]
            if remove_file_extension is None:
                remove_file_extension = use_basename
            Logger.warn(
                f"The 'use_basename' key is deprecated, please use 'remove_file_extension' instead: "
                f'InputSelector("{self.input_to_select}", remove_file_extension={str(use_basename)})'
            )

        self.remove_file_extension = remove_file_extension
Esempio n. 4
0
    def run_delete_database_script(self, execution_dir: str):
        try:
            import subprocess, os
            from janis_assistant.management.envvariables import EnvVariables

            file_path = os.getenv(EnvVariables.db_script_generator_cleanup)

            if file_path is None:
                raise Exception(
                    f"Couldn't delete generated database credentials as couldn't find value in env var '{EnvVariables.db_script_generator_cleanup}'"
                )
            Logger.debug(
                f"Found path '{EnvVariables.db_script_generator_cleanup}' to delete database credentials"
            )
            # if not os.path.exists(file_path):
            #     raise Exception(f"Couldn't locate script '{file_path}' to execute")

            val = collect_output_from_command(f"{file_path} {execution_dir}",
                                              stderr=Logger.guess_log,
                                              shell=True)
            if val is not None and len(val) > 0:
                Logger.info(
                    f"Successfully deleted DB credentials and received message: {val}"
                )
            else:
                Logger.info("Deleted credentials with rc=0")
        except Exception as e:
            Logger.warn(
                f"Failed to delete database configuration details for execution directory '{execution_dir}': "
                + repr(e))
Esempio n. 5
0
    def __new__(cls, *args, **kwargs):
        multiplier = None
        src, *otherargs = args

        if len(otherargs) == 1:
            f = otherargs[0].lower()
            multiplier_heirarchy = [
                ("ki" in f, 1024),
                ("k" in f, 1000),
                ("mi" in f, 1.024),
                ("gi" in f, 0.001024),
                ("g" in f, 0.001),
            ]
            if not any(m[0] for m in multiplier_heirarchy):
                Logger.warn(
                    f"Couldn't determine prefix {f} for FileSizeOperator, defaulting to MB"
                )
            else:
                multiplier = [m[1] for m in multiplier_heirarchy if m[0] is True][0]

        instance = super(FileSizeOperator, cls).__new__(cls)
        instance.__init__(args[0])

        if multiplier is not None and multiplier != 1:
            return instance * multiplier
        return instance
 def get_file_size(self, path) -> Optional[int]:
     try:
         stat = os.stat(path)
         if not stat:
             return None
         return stat.st_size
     except Exception as e:
         Logger.warn(f"Couldn't get file size of path '{path}': {repr(e)}")
         return None
    def link_copy_or_fail(source: str, dest: str, force=False):
        """
        Eventually move this to some generic util class
        :param source: Source to link from
        :param dest: Place to link to
        :param force: Overwrite destination if it exists
        :return:
        """
        try:

            to_copy = [(
                LocalFileScheme.prepare_path(source),
                LocalFileScheme.prepare_path(dest),
            )]

            while len(to_copy) > 0:
                s, d = to_copy.pop(0)

                # Check if path is Null/None
                if not s:
                    continue

                if not d:
                    continue

                if os.path.exists(d) and force:
                    Logger.debug(f"Destination exists, overwriting '{d}'")
                    if os.path.isdir(d):
                        rmtree(d)
                    else:
                        os.remove(d)
                Logger.log(f"Hard linking {s} → {d}")

                if os.path.isdir(s):
                    os.makedirs(d, exist_ok=True)
                    for f in os.listdir(s):
                        to_copy.append((os.path.join(s, f), os.path.join(d,
                                                                         f)))
                    continue
                try:
                    os.link(s, d)
                except FileExistsError:
                    Logger.critical(
                        "The file 'd' already exists. The force flag is required to overwrite."
                    )
                except Exception as e:
                    Logger.warn("Couldn't link file: " + str(e))

                    # if this fails, it should error
                    Logger.log(f"Copying file {s} → {d}")
                    copyfile(s, d)
        except Exception as e:
            Logger.critical(
                f"An unexpected error occurred when link/copying {source} -> {dest}: {e}"
            )
Esempio n. 8
0
    def convert_generic_class(t,
                              ignore_fields=None,
                              get_string_repr_func=None,
                              workflow_id: str = None):
        options = []

        get_string_repr_func2 = lambda obj: (get_string_repr_func or
                                             JanisTranslator.get_string_repr)(
                                                 obj, workflow_id=workflow_id)

        try:
            has_init_dict = not isinstance(
                t, (Tool, WorkflowBase, PythonTool, StepNode)) and hasattr(
                    t, "init_dictionary")
        except KeyError:
            has_init_dict = False

        if has_init_dict:
            options.extend(f"{k}={get_string_repr_func2(v)}"
                           for k, v in t.init_dictionary().items())
        else:
            ignore_fields = set((ignore_fields if ignore_fields else []) +
                                ["self", "args", "kwargs"])

            params = inspect.signature(type(t).__init__).parameters
            param_map = {}
            if not isinstance(t, (StepNode, WorkflowBase)) and hasattr(
                    t, "init_key_map"):
                param_map = t.init_key_map
            # fields = fields_to_check if fields_to_check \
            #     else [f for f in dict(params).keys() if f not in ignore_fields]

            for fkey in params:
                if fkey in ignore_fields:
                    continue

                opts = params[fkey]

                t_key = param_map.get(fkey, fkey)
                if t_key is None:
                    continue

                if hasattr(t, t_key):
                    v = t.__getattribute__(t_key)
                else:
                    Logger.warn(
                        f"Object '{t.__class__.__name__}' didn't have attribute {t_key}, setting to None and it might get skipped"
                    )
                    v = None
                if (v is None and opts.default is None) or v == opts.default:
                    continue

                options.append(fkey + "=" + get_string_repr_func2(v))

        return f"{t.__class__.__name__}({', '.join(options)})"
    def test_connection(self):
        if not self.is_started:
            return False

        try:
            r = request.urlopen(self.url_test())
            return r.code == 200

        except Exception as e:
            Logger.warn(
                f"Couldn't connect to Cromwell ({self.host}): {repr(e)}")
            return False
Esempio n. 10
0
    def test_connection(self):
        if not self.is_started:
            return False

        try:
            r = request.urlopen(self.url_test())
            r.raise_for_status()
            return True

        except Exception as e:
            Logger.warn("Couldn't connect to Cromwell ({self.host}): " +
                        str(e))
            return False
Esempio n. 11
0
    def memory(self, hints: Dict[str, Any]):
        if self._memory is None:
            return None
        if isinstance(self._memory, (int, float, Selector)):
            return self._memory

        if callable(self._memory):
            return self._memory(hints)

        Logger.warn(
            f"Janis does not recognise {self._memory} ({type(self._memory)}) as a valid value for memory, returning 4GB"
        )
        return 4
Esempio n. 12
0
    def cpus(self, hints: Dict[str, Any]):
        if self._cpus is None:
            return None
        if isinstance(self._cpus, (int, float, Selector)):
            return self._cpus

        if callable(self._cpus):
            return self._cpus(hints)

        Logger.warn(
            f"Janis does not recognise {self._cpus} ({type(self._cpus)}) as a valid CPU value, returning 1"
        )
        return 1
Esempio n. 13
0
    def time(self, hints: Dict[str, Any]) -> Optional[Union[int, Selector]]:
        if self._time is None:
            return None
        if isinstance(self._time, (int, float, Selector)):
            return self._time

        if callable(self._time):
            return self._time(hints)

        Logger.warn(
            f"Janis does not recognise {self._memory} ({type(self._time)}) as a valid value for time, returning 86400 seconds"
        )
        return 86400
Esempio n. 14
0
    def disk(self, hints: Dict[str, Any]) -> Optional[Union[float, Selector]]:
        if self._disk is None:
            return None
        if isinstance(self._disk, (int, float, Selector)):
            return self._disk

        if callable(self._disk):
            return self._disk(hints)

        Logger.warn(
            f"Janis does not recognise {type(self._disk)} as a valid value for disk, returning None"
        )
        return None
Esempio n. 15
0
    def __init__(
        self,
        value: Any,
        prefix: Optional[str] = None,
        position: Optional[int] = 0,
        separate_value_from_prefix=None,
        doc: Optional[str] = None,
        shell_quote: bool = None,
    ):
        """
        A ``ToolArgument`` is a CLI parameter that cannot be override (at runtime).
        The value can


        :param value:
        :type value: ``str`` | ``janis.InputSelector`` | ``janis.StringFormatter``
        :param position: The position of the input to be applied. (Default = 0, after the base_command).
        :param prefix: The prefix to be appended before the element. (By default, a space will also be applied, see ``separate_value_from_prefix`` for more information)
        :param separate_value_from_prefix: (Default: True) Add a space between the prefix and value when ``True``.
        :param doc: Documentation string for the argument, this is used to generate the tool documentation and provide
        :param shell_quote: Stops shell quotes from being applied in all circumstances, useful when joining multiple commands together.
        """

        self.prefix: Optional[str] = prefix
        self.value = value
        self.position: Optional[int] = position
        self.is_expression = (
            isinstance(self.value, Selector)
            or (re.match(self.expr_pattern, self.value) is not None)
            if self.value
            else None
        )
        self.separate_value_from_prefix = separate_value_from_prefix
        self.doc = doc
        self.shell_quote = shell_quote

        if (
            self.prefix
            and self.separate_value_from_prefix is not None
            and not self.separate_value_from_prefix
            and not self.prefix.endswith("=")
        ):
            # I don't really know what this means.
            Logger.warn(
                f"Argument ({self.prefix} {self.value}) is not separating and did not end with ='"
            )
Esempio n. 16
0
        def __init__(self, default="Local", providers=Dict[str, Provider]):

            self.default = default
            self.providers = providers

            if default not in providers:
                if len(providers) == 1:
                    backend_key = next(iter(providers.keys()))
                    Logger.warn(
                        "The default tag '{default}' was not found in the providers, this was automatically "
                        "corrected to be '{backend_key}'.".format(
                            default=default, backend_key=backend_key))
                    self.default = default
                else:
                    raise Exception(
                        "The default tag '{default}' was not found in the providers and couldn't be "
                        "automatically corrected".format(default=default))
Esempio n. 17
0
    def stringify_translated_workflow(wf):
        try:
            import black

            try:
                return black.format_str(wf,
                                        mode=black.FileMode(line_length=82))
            except black.InvalidInput:
                Logger.warn(
                    "Check the generated Janis code carefully, as there might be a syntax error. You should report this error along with the workflow you're trying to generate from"
                )
        except ImportError:
            Logger.debug(
                "Janis can automatically format generated Janis code if you install black: https://github.com/psf/black"
            )

        return wf
Esempio n. 18
0
    def returntype(self):
        if isinstance(self.args[0], list):
            rettype = self.args[0][0].returntype()
        else:
            outer_rettype = get_instantiated_type(self.args[0].returntype())
            if not isinstance(outer_rettype, Array):
                # hmmm, this could be a bad input selector
                rettype = outer_rettype
                if not isinstance(self.args[0], InputSelector):
                    Logger.warn(
                        f'Expected return type of "{self.args[0]}" to be an array, '
                        f'but found {outer_rettype}, will return this as a returntype.'
                    )
            else:
                rettype = outer_rettype.subtype()

        rettype = copy(get_instantiated_type(rettype))
        rettype.optional = False
        return Array(rettype)
Esempio n. 19
0
    def _get_datatype_transformations_from_entrypoints():
        import importlib_metadata

        ep = []
        eps = importlib_metadata.entry_points().get(EP.TRANSFORMATIONS, [])
        for entrypoint in eps:
            try:
                m = entrypoint.load()
                if m is not None and isinstance(m, list):
                    ep.extend(m)
                else:
                    Logger.warn(
                        f"Janis transformation entrypoint {entrypoint.name}' was not a list (type {type(m)}). "
                        f"Only export a single list of transformations, for example: "
                        f"`janis_bioinformatics.transformations:transformations`"
                    )
            except ImportError as e:
                t = f"Couldn't import janis datatype_transformation extension '{entrypoint.name}': {e}"
                Logger.critical(t)
                continue
        return ep
Esempio n. 20
0
    def stop_engine(self):
        if self._logger:
            self._logger.terminate()

        self.should_stop = True
        if self._timer_thread:
            self._timer_thread.set()

        if self._logfp:
            self._logfp.flush()
            os.fsync(self._logfp.fileno())
            self._logfp.close()

        if not self.process_id:
            Logger.warn("Could not find a cromwell process to end, SKIPPING")
            return
        Logger.info("Stopping cromwell")
        if self.process_id:
            try:
                process = os.getpgid(int(self.process_id))
                os.killpg(process, signal.SIGTERM)
                Logger.info("Stopped cromwell")
            except Exception as e:
                # can't do
                Logger.warn("Couldn't stop Cromwell process: " + str(e))
                pass
        else:
            Logger.warn(
                "Couldn't stop Cromwell process as Janis wasn't managing it")

        self.is_started = False
Esempio n. 21
0
    def process_cls(cls, seen_modules, seen_classes: set, current_layer: int):
        try:
            if ismodule(cls):
                if current_layer <= JanisShed.MAX_RECURSION_DEPTH:
                    return JanisShed.traverse_module(
                        cls,
                        seen_modules,
                        seen_classes,
                        current_layer=current_layer + 1)
                return Logger.log(
                    f"Skip traversing module '{str(cls)}' as reached maximum depth ({JanisShed.MAX_RECURSION_DEPTH})"
                )
            elif isfunction(cls):
                return

            seen_classes.add(cls)
            if isclass(cls) and issubclass(cls, DataType):
                return JanisShed.add_type(cls)
            elif not hasattr(cls, "type") or not callable(cls.type):
                return

            if (cls == Tool or cls == Workflow or cls == CommandTool
                    or cls == CodeTool or cls == PythonTool
                    or cls == WorkflowBuilder or cls == CommandToolBuilder):
                return

            tp = cls.type()
            if isinstance(tp, ToolType) and tp in JanisShed.recognised_types:
                if isabstract(cls):
                    if issubclass(cls, Tool):
                        abstractmethods = list(cls.__abstractmethods__)
                        return Logger.warn(
                            f"The tool '{cls.__name__}' had abstract methods: "
                            + ", ".join(abstractmethods))
                    return
                ic = cls() if isclass(cls) else cls
                return JanisShed.add_tool(ic)

        except Exception as e:
            Logger.warn(f"{repr(e)} for type {str(cls)}")
    def _generate_call_times_from_calls(
        cls, calls, prefix="", include_subworkflow_total=True
    ) -> dict:
        dcalls = {}

        for call_key in calls:
            call = calls[call_key][0]

            s, f = call["start"], call.get("end")
            sd = DateUtil.parse_iso(s)
            fd = DateUtil.parse_iso(f) if f else None

            prefixed_call_key = prefix + call_key

            if prefixed_call_key in dcalls:
                Logger.warn(
                    "Doesn't know how to handle multiple instances of calls, skipping this one (with id: "
                    + call["id"]
                )
                continue

            is_subworkflow_total = "subWorkflowMetadata" in call
            if is_subworkflow_total:
                dcalls.update(
                    cls._generate_call_times_from_calls(
                        call["subWorkflowMetadata"]["calls"], prefix=call_key + "."
                    )
                )

            if not is_subworkflow_total or include_subworkflow_total:
                dcalls[prefixed_call_key] = {
                    "start": s,
                    "end": f,
                    "time": (fd - sd).total_seconds() if fd else "N/A",
                }

        return dcalls
    def outputs_task(self, identifier):
        url = self.url_outputs(identifier=identifier)
        try:
            r = request.urlopen(url)
            data = r.read()
            res = json.loads(data.decode(
                r.info().get_content_charset("utf-8")))
            outs = res.get("outputs")
        except Exception as e:
            return Logger.warn(
                f"Couldn't get outputs with identifier='${identifier}', got error: "
                + str(e))

        if not outs:
            return None
        parsed = [self.parse_output(k, v) for k, v in outs.items()]
        return {out[0]: out[1] for out in parsed}
Esempio n. 24
0
    def raw_metadata(self,
                     identifier,
                     expand_subworkflows=True) -> Optional[CromwellMetadata]:
        url = self.url_metadata(identifier=identifier,
                                expand_subworkflows=expand_subworkflows)

        if not self.last_contacted:
            self.last_contacted = datetime.now()

        Logger.log(
            f"Getting Cromwell metadata for task '{identifier}' with url: {url}"
        )
        try:
            r = request.urlopen(url)
            self.connectionerrorcount = 0

            self.last_contacted = datetime.now()

            data = r.read()
            jsonobj = json.loads(
                data.decode(r.info().get_content_charset("utf-8")))

            return CromwellMetadata(jsonobj)

        except request.HTTPError as e:

            if e.code == 404:
                # Usually means Cromwell hasn't loaded properly yet
                return None

            try:
                body = e.read().decode()
                jsonobj = json.loads(body) if body else {}
                message = jsonobj.get(
                    "message", "An unexpected error occurred: " + str(e))
                Logger.warn("Response when getting Cromwell metadata: " +
                            str(message))
            except Exception as ee:
                Logger.warn(str(e))
            finally:
                return None
        except request.URLError as e:
            self.connectionerrorcount += 1
            if (datetime.now() -
                    self.last_contacted).total_seconds() / 60 > self.timeout:
                self.something_has_happened_to_cromwell(
                    "last_updated_threshold")  # idk, pick a number
                return None
            if self.connectionerrorcount > 50:
                raise e
            else:
                Logger.warn("Error connecting to cromwell instance: " + str(e))
            return None
    def cp_from(
        self,
        source,
        dest,
        force=False,
        report_progress: Optional[Callable[[float], None]] = None,
    ):
        if force:
            Logger.critical("SSHFileScheme does not support the 'force' flag")
        args = ["scp", self.connectionstring + ":" + source, dest]

        if dest.endswith("bam"):
            return Logger.warn(
                "Manually skipped BAM file, as they're usually too big")

        if os.path.exists(dest):
            return Logger.log(f"Skipping as exists ({source} -> {dest}")

        Logger.info(
            f"Secure copying (SCP) from {self.connectionstring}:{source} to local:{dest}"
        )
        subprocess.call(args)
    def raw_metadata(
        self,
        identifier,
        expand_subworkflows=True,
        metadata_export_file_path: Optional[str] = None,
    ) -> Optional[CromwellMetadata]:
        url = self.url_metadata(identifier=identifier,
                                expand_subworkflows=expand_subworkflows)

        if not self.last_contacted:
            self.last_contacted = datetime.now()

        Logger.log(
            f"Getting Cromwell metadata for task '{identifier}' with url: {url}"
        )
        try:
            r = request.urlopen(url)
            self.connectionerrorcount = 0
            self.metadataerrorcount = 0

            self.last_contacted = datetime.now()

            data = r.read()
            jsonobj = json.loads(
                data.decode(r.info().get_content_charset("utf-8")))

            if metadata_export_file_path:
                try:
                    with open(metadata_export_file_path, "w+") as f:
                        json.dump(jsonobj, f)
                except Exception as e:
                    Logger.warn(
                        f"Couldn't persist Cromwell metadata json to '{metadata_export_file_path}': {repr(e)}"
                    )

            return CromwellMetadata(jsonobj)

        except request.HTTPError as e:

            if e.code == 404:
                # Usually means Cromwell hasn't loaded properly yet
                return None

            er_message = "An unexpected error occurred: " + str(e)
            try:
                body = e.read().decode()
                jsonobj = json.loads(body) if body else {}
                message = jsonobj.get("message")
                if message:
                    er_message = f"Response when getting Cromwell metadata: {message}"
            except Exception as ee:
                er_message = (
                    f"An additional error occurred while trying to determine the reason"
                    f" why Cromwell metadata failed (internal: {repr(ee)}): {repr(e)})"
                )
            finally:
                Logger.warn("Couldn't get Cromwell metadata: " +
                            str(er_message))
                self.metadataerrorcount += 1
                if self.metadataerrorcount > 5:
                    # we could check the status of the workflow, or we could just suspend it really
                    # 5 consecutive errors
                    raise e
                return None

        except (request.URLError, ConnectionResetError) as e:
            self.connectionerrorcount += 1
            minutes_not_able_to_contact_cromwell = (
                datetime.now() - self.last_contacted).total_seconds() / 60
            if minutes_not_able_to_contact_cromwell > self.timeout:
                message = (
                    f"Janis is receiving a ConnectionResetError when contacting the Cromwell instance "
                    f"({self.host}) {self.connectionerrorcount} times, and has been unable to connect to "
                    f"Cromwell for {minutes_not_able_to_contact_cromwell} minutes. "
                )
                if self.db_type and self.db_type == DatabaseTypeToUse.filebased:
                    ja_config_url = "https://janis.readthedocs.io/en/latest/references/configuration.html#cromwell"
                    message += (
                        "We've seen this issue more frequently when Janis is configuring Cromwell to use the "
                        "file-based database. We recommend configuring Janis to use a MySQL database through the "
                        f"`--mysql` flag, visitng '{ja_config_url}', or raising an issue on GitHub ({GITHUB_URL}) "
                        f"for more information.")
                Logger.warn(message)
                self.something_has_happened_to_cromwell(
                    "last_updated_threshold")  # idk, pick a number
                return None
            if self.connectionerrorcount > 15:
                raise e
            else:
                Logger.warn("Error connecting to cromwell instance: " +
                            repr(e))
            return None
Esempio n. 27
0
    def load_recipes(self, force=False):
        from os import listdir

        dirs: List[str] = []
        paths: List[str] = []

        paths_from_env = EnvVariables.recipe_paths.resolve(True)
        dirs_from_env = EnvVariables.recipe_directory.resolve(True) or []

        if paths_from_env:
            paths.extend(paths_from_env)
        if self.paths:
            paths.extend(self.paths)
        if dirs_from_env:
            dirs.extend(dirs_from_env)
        if self.directories:
            dirs.extend(self.directories)

        self._files_by_key = {}

        # Do if: force or (we haven't loaded recipes and we have recipes to load)
        if not (force or not self._loaded_recipes and (paths or dirs)):
            return

        import ruamel.yaml

        # Do the env first, then ones from the config can cascade over them

        for recipe_location in paths:
            try:
                with open(recipe_location) as rl:
                    adr = ruamel.yaml.load(rl, Loader=ruamel.yaml.Loader)
                    self.recipes.update(adr)

            except Exception as e:
                Logger.critical(
                    f"Couldn't load recipe '{recipe_location}': {e}")

        for d in dirs:
            if not os.path.exists(d):
                Logger.critical(
                    f"Couldn't find recipe directory: '{d}', skipping")
                continue
            if not os.path.isdir(d):
                Logger.critical(
                    f"The path listed as a recipe directory was not a directory: '{d}', skipping"
                )
                continue
            contents = listdir(d)
            for f in contents:
                fpath = os.path.join(d, f)
                parsed = self.parseable_yaml_filename_if_valid(fpath)
                if not parsed:
                    Logger.warn(
                        f"Skipping file within recipe directory '{fpath}' as it contained "
                        f"an unrecognised extension: '{os.path.splitext(fpath)[1]}"
                    )
                    continue

                key, value = parsed
                if key not in self._files_by_key:
                    self._files_by_key[key] = []
                self._files_by_key[key].append(value)

        self._loaded_recipes = True
    def __init__(
        self,
        logfile=None,
        confdir=None,
        identifier="cromwell",
        host=None,
        cromwelljar=None,
        config: CromwellConfiguration = None,
        config_path=None,
        execution_dir: str = None,
        polling_interval: Optional[int] = None,
        db_type: DatabaseTypeToUse = None,
    ):

        super().__init__(
            identifier,
            EngineType.cromwell,
            logfile=logfile,
            execution_dir=execution_dir,
        )

        # Heirarchy of configs:
        #   - Passed in config
        #   - Passed in configPath
        #   - Config available from JanisConfiguration
        #   - ConfigPath available from JanisConfiguration

        self.cromwelljar = cromwelljar
        self.connect_to_instance = True if host else False
        self.is_started = self.connect_to_instance

        self.host = host
        self.port = None
        self.config_path = None
        self._process = None
        self._logger = None
        self.stdout = []
        self.error_message = None
        self._timer_thread: Optional[threading.Event] = None
        self.config: Optional[CromwellConfiguration] = None
        # Last contacted is used to determine
        self.last_contacted = None
        self.timeout = 10  # minutes
        self.db_type: Optional[DatabaseTypeToUse] = db_type
        self.is_managing_cromwell = host is None
        if polling_interval is not None:
            polling_interval = int(polling_interval)
            if polling_interval < 3:
                Logger.warn(
                    f"The polling interval for Cromwell was {polling_interval} seconds, but Janis "
                    f"requires > 3 seconds to ensure metadata is processed correctly"
                )
                polling_interval = None

        self.polling_interval = polling_interval
        self._start_time = None

        self.connectionerrorcount = 0
        self.metadataerrorcount = 0

        self.should_stop = False

        if not self.connect_to_instance:

            # To avoid conflicts between version of Cromwell, we'll find an open
            # port, and allow Cromwell to bind there.

            self.config = None
            self.config_path = os.path.join(confdir, "cromwell.conf")
            self.find_or_generate_config(identifier,
                                         config=config,
                                         config_path=config_path)
    def run(self):
        finalstatus = None
        iserroring = False

        try:
            for c in iter(self.process.stderr.readline, "b"):
                if self.should_terminate:
                    return

                line = None
                if c:
                    line = c.decode("utf-8").rstrip()

                if not line:
                    if self.process.poll() is not None:
                        finalstatus = TaskStatus.ABORTED
                        Logger.warn(
                            f"CWLTool finished with rc={self.process.returncode} but janis "
                            f"was unable to capture the workflow status. Marking as aborted"
                        )
                        break
                    continue

                if self.logfp and not self.logfp.closed:
                    self.logfp.write(line + "\n")
                    self.logfp.flush()
                    os.fsync(self.logfp.fileno())

                lowline = line.lower().lstrip()
                if lowline.startswith("error"):
                    Logger.critical("cwltool: " + line)
                    iserroring = True

                elif lowline.startswith("warn"):
                    iserroring = False
                    Logger.warn("cwltool: " + line)

                elif lowline.startswith("info"):
                    iserroring = False
                    Logger.info("cwltool: " + line)
                    self.process_metadataupdate_if_match(line)

                else:
                    Logger.debug("cwltool: " + line)

                if iserroring:
                    self.error = (self.error or "") + "\n" + line

                if "final process status is" in lowline:
                    if "fail" in line.lower():
                        finalstatus = TaskStatus.FAILED
                    elif "success" in line.lower():
                        finalstatus = TaskStatus.COMPLETED
                    else:
                        finalstatus = TaskStatus.ABORTED
                    break

            j = ""
            Logger.info("Process has completed")
            if finalstatus == TaskStatus.COMPLETED:
                for c in iter(self.process.stdout.readline, "s"):
                    if not c:
                        continue
                    line = c.decode("utf-8").rstrip()
                    Logger.debug(line)
                    if self.logfp and not self.logfp.closed:
                        self.logfp.write(line + "\n")
                        self.logfp.flush()
                        os.fsync(self.logfp.fileno())
                    j += line
                    try:
                        self.outputs = json.loads(j)
                        break
                    except:
                        continue

            if self.error:
                Logger.critical("Janis detected a CWLTool error: " +
                                self.error)

            Logger.info("CWLTool detected transition to terminal status: " +
                        str(finalstatus))
            self.terminate()
            if self.exit_function:
                self.exit_function(self, finalstatus)

        except KeyboardInterrupt:
            self.should_terminate = True
            print("Detected keyboard interrupt")
            # raise
        except Exception as e:
            print("Detected another error")
            raise e