def translate_tool(
        self,
        tool,
        to_console=True,
        to_disk=False,
        export_path=None,
        with_docker=True,
        with_resource_overrides=False,
        max_cores=None,
        max_mem=None,
    ):

        tool_out = self.stringify_translated_tool(
            self.translate_tool_internal(
                tool,
                with_docker=with_docker,
                with_resource_overrides=with_resource_overrides,
            ))

        if to_console:
            print(tool_out)

        if to_disk:
            d = ExportPathKeywords.resolve(export_path,
                                           workflow_spec=self.name,
                                           workflow_name=tool.id())
            if not os.path.exists(d):
                os.makedirs(d)
            fn_tool = self.tool_filename(tool)
            with open(os.path.join(d, fn_tool), "w+") as wf:
                Logger.log(f"Writing {fn_tool} to disk")
                wf.write(tool_out)
                Logger.log(f"Wrote {fn_tool}  to disk")

        return tool_out
    def get_value_for_key(d, key, default):
        val = d.get(key)
        if not val:
            return default.get(key) if default else None

        Logger.log(f"Got value '{val}' for key '{key}'")
        return val
Beispiel #3
0
    def get(self, type_name, tag: Optional[str]) -> Optional[T]:
        if type_name not in self.registry:
            return None
        tagged_objs = self.registry[type_name]
        versions_without_default = set(tagged_objs.keys())
        if self.default_tag in versions_without_default:
            versions_without_default.remove(self.default_tag)

        Logger.debug(
            f"'{type_name}' has {len(versions_without_default)} versions ({', '.join(versions_without_default)})"
        )
        if tag is None or tag == self.default_tag:
            if self.default_tag in tagged_objs:
                Logger.info(
                    f"Using the default tag for '{type_name}' from {len(versions_without_default)} version(s): {', '.join(versions_without_default)}"
                )
                return tagged_objs.get(self.default_tag)[0]
            return None

        if tag not in tagged_objs:
            Logger.log(
                "Found collection '{tool}' in registry, but couldn't find tag '{tag}'"
                .format(tool=type_name, tag=tag))
            return None

        return tagged_objs[tag]
Beispiel #4
0
    def map_to_wdl(t: NativeType):
        import wdlgen as wdl

        if t == NativeTypes.kBool:
            return wdl.PrimitiveType.kBoolean
        elif t == NativeTypes.kInt:
            return wdl.PrimitiveType.kInt

        elif (t == NativeTypes.kLong or t == NativeTypes.kFloat
              or t == NativeTypes.kDouble):
            return wdl.PrimitiveType.kFloat
        elif t == NativeTypes.kStr:
            return wdl.PrimitiveType.kString
        elif t == NativeTypes.kFile:
            return wdl.PrimitiveType.kFile
        elif t == NativeTypes.kStdout:
            return wdl.PrimitiveType.kFile
        elif t == NativeTypes.kStderr:
            return wdl.PrimitiveType.kFile
        elif t == NativeTypes.kDirectory:
            Logger.log(
                "Using data_type 'Directory' for wdl, this requires cromwell>=37 and language=development"
            )
            return wdl.PrimitiveType.kDirectory
        elif t == NativeTypes.kArray:
            return wdl.ArrayType.kArray
        raise Exception(
            f"Unhandled primitive type {t}, expected one of {', '.join(NativeTypes.all)}"
        )
    def __init__(self, **connections):
        super().__init__(metadata_class=WorkflowMetadata)

        self.connections = connections

        Logger.log(f"Creating workflow with identifier: '{self.id()}'")

        if not Validators.validate_identifier(self.id()):
            raise Exception(
                f"The identifier '{self.id()}' was invalid because {Validators.reason_for_failure(self.id())}"
            )

        # The following variables allow us to quickly check data about the graph
        self.nodes: Dict[str, Node] = {}

        self.input_nodes: Dict[str, InputNode] = {}
        self.step_nodes: Dict[str, StepNode] = {}
        self.output_nodes: Dict[str, OutputNode] = {}

        # Flags for different requirements that a workflow might need
        self.has_scatter = False
        self.has_subworkflow = False
        self.has_multiple_inputs = False

        # Now that we've initialised everything, we can "construct" the workflows for that subclass this class
        # else, for the WorkflowBuilder it will do nothing and they'll add workflows later
        self.constructor()
Beispiel #6
0
    def hydrate(force=False, modules: list = None):
        # go get everything
        if JanisShed._has_been_hydrated and not force:
            return

        if not modules:
            modules = []
            modules.extend(JanisShed._get_datatype_entrypoints())
            modules.extend(JanisShed._get_tool_entrypoints())

        level = None
        cl = Logger.CONSOLE_LEVEL
        if JanisShed.should_trace:
            level = cl if cl >= LogLevel.DEBUG else LogLevel.DEBUG
        Logger.log(
            f"Setting CONSOLE_LEVEL to {LogLevel.get_str(level) or 'None'} while traversing modules"
        )
        Logger.set_console_level(level)
        seen_modules = set()
        seen_classes = set()
        for m in modules:
            JanisShed.traverse_module(m,
                                      seen_modules=seen_modules,
                                      seen_classes=seen_classes)
        Logger.set_console_level(cl)
        Logger.log(
            f"Restoring CONSOLE_LEVEL to {LogLevel.get_str(cl)} now that Janis shed has been hydrated"
        )

        JanisShed._has_been_hydrated = True
Beispiel #7
0
    def add_tool(tool: Tool) -> bool:
        v: Optional[str] = tool.version()
        if not v:
            t = f"The tool {tool.id()} did not have a version and will not be registered"
            Logger.critical(t)
            return False
        Logger.log("Adding tool: " + tool.id())

        JanisShed._byclassname.register(tool.__class__.__name__, tool)
        return JanisShed._toolshed.register(tool.id().lower(), v.lower(), tool)
Beispiel #8
0
    def check_types(self):
        from janis_core.workflow.workflow import InputNode, StepNode

        stoolin: TOutput = self.start.outputs()[
            self.stag
        ] if self.stag is not None else first_value(self.start.outputs())
        ftoolin: TInput = self.finish.inputs()[
            self.ftag
        ] if self.ftag is not None else first_value(self.finish.inputs())

        stype = stoolin.outtype
        ftype = ftoolin.intype

        start_is_scattered = (
            isinstance(self.start, StepNode) and self.start.scatter is not None
        )

        if start_is_scattered:
            Logger.log(
                f"This edge merges the inputs from '{full_dot(self.start, self.stag)}' for "
                f"'{full_dot(self.finish, self.ftag)}'"
            )
            stype = Array(stype)

        if self.scatter:
            if not isinstance(stype, Array):
                raise Exception(
                    f"Scatter was required for '{self.start.id()}.{self.stag} → '{self.finish.id()}.{self.ftag}' but "
                    f"the input type was {type(stype).__name__} and not an array"
                )
            stype = stype.subtype()

        source_has_default = (
            isinstance(self.start, InputNode) and self.start.default is not None
        )

        # Scatters are handled automatically by the StepTagInput Array unwrapping
        # Merges are handled automatically by the `start_is_scattered` Array wrap

        self.compatible_types = ftype.can_receive_from(stype, source_has_default)
        if not self.compatible_types:
            if isinstance(ftype, Array) and ftype.subtype().can_receive_from(stype):
                self.compatible_types = True

        if not self.compatible_types:

            s = full_dot(self.start, self.stag)
            f = full_dot(self.finish, self.ftag)
            message = (
                f"Mismatch of types when joining '{s}' to '{f}': "
                f"{stoolin.outtype.id()} -/→ {ftoolin.intype.id()}"
            )
            if isinstance(stype, Array) and ftype.can_receive_from(stype.subtype()):
                message += " (did you forget to SCATTER?)"
            Logger.critical(message)
    def link_copy_or_fail(source: str, dest: str, force=False):
        """
        Eventually move this to some generic util class
        :param source: Source to link from
        :param dest: Place to link to
        :param force: Overwrite destination if it exists
        :return:
        """
        try:

            to_copy = [(
                LocalFileScheme.prepare_path(source),
                LocalFileScheme.prepare_path(dest),
            )]

            while len(to_copy) > 0:
                s, d = to_copy.pop(0)

                # Check if path is Null/None
                if not s:
                    continue

                if not d:
                    continue

                if os.path.exists(d) and force:
                    Logger.debug(f"Destination exists, overwriting '{d}'")
                    if os.path.isdir(d):
                        rmtree(d)
                    else:
                        os.remove(d)
                Logger.log(f"Hard linking {s} → {d}")

                if os.path.isdir(s):
                    os.makedirs(d, exist_ok=True)
                    for f in os.listdir(s):
                        to_copy.append((os.path.join(s, f), os.path.join(d,
                                                                         f)))
                    continue
                try:
                    os.link(s, d)
                except FileExistsError:
                    Logger.critical(
                        "The file 'd' already exists. The force flag is required to overwrite."
                    )
                except Exception as e:
                    Logger.warn("Couldn't link file: " + str(e))

                    # if this fails, it should error
                    Logger.log(f"Copying file {s} → {d}")
                    copyfile(s, d)
        except Exception as e:
            Logger.critical(
                f"An unexpected error occurred when link/copying {source} -> {dest}: {e}"
            )
        def __init__(self, d: dict, default: dict):
            d = d if d else {}

            self.id = JanisConfiguration.get_value_for_key(
                d, self.Keys.Id, default)

            Logger.log("Got template ID: " + str(self.id))

            # remove this id from the dictionary: https://stackoverflow.com/a/15411146/
            d.pop(self.Keys.Id.value, None)
            self.template = from_template(self.id, d)
Beispiel #11
0
    def raw_metadata(self,
                     identifier,
                     expand_subworkflows=True) -> Optional[CromwellMetadata]:
        url = self.url_metadata(identifier=identifier,
                                expand_subworkflows=expand_subworkflows)

        if not self.last_contacted:
            self.last_contacted = datetime.now()

        Logger.log(
            f"Getting Cromwell metadata for task '{identifier}' with url: {url}"
        )
        try:
            r = request.urlopen(url)
            self.connectionerrorcount = 0

            self.last_contacted = datetime.now()

            data = r.read()
            jsonobj = json.loads(
                data.decode(r.info().get_content_charset("utf-8")))

            return CromwellMetadata(jsonobj)

        except request.HTTPError as e:

            if e.code == 404:
                # Usually means Cromwell hasn't loaded properly yet
                return None

            try:
                body = e.read().decode()
                jsonobj = json.loads(body) if body else {}
                message = jsonobj.get(
                    "message", "An unexpected error occurred: " + str(e))
                Logger.warn("Response when getting Cromwell metadata: " +
                            str(message))
            except Exception as ee:
                Logger.warn(str(e))
            finally:
                return None
        except request.URLError as e:
            self.connectionerrorcount += 1
            if (datetime.now() -
                    self.last_contacted).total_seconds() / 60 > self.timeout:
                self.something_has_happened_to_cromwell(
                    "last_updated_threshold")  # idk, pick a number
                return None
            if self.connectionerrorcount > 50:
                raise e
            else:
                Logger.warn("Error connecting to cromwell instance: " + str(e))
            return None
Beispiel #12
0
    def add_source(self, start: Node, stag: Optional[str], should_scatter) -> Edge:
        """
        Add a connection
        :param start:
        :param stag:
        :param should_scatter:
        :return:
        """

        from janis_core.workflow.workflow import StepNode

        stype = (
            start.outputs()[stag] if stag is not None else first_value(start.outputs())
        ).outtype
        ftype = (
            self.finish.inputs()[self.ftag]
            if self.ftag is not None
            else first_value(self.finish.inputs())
        ).intype

        start_is_scattered = isinstance(start, StepNode) and start.scatter is not None

        if start_is_scattered:
            Logger.log(
                f"This edge merges the inputs from '{full_dot(start, stag)}' for "
                f"'{full_dot(self.finish, self.ftag)}'"
            )
            stype = Array(stype)

        if should_scatter:
            if not isinstance(stype, Array):
                raise Exception(
                    f"Scatter was required for '{start.id()}.{stag} → '{self.finish.id()}.{self.ftag}' but "
                    f"the input type was {type(stype).__name__} and not an array"
                )
            stype = stype.subtype()

        if len(self.source_map) == 1 and start.id() not in self.source_map:
            self.multiple_inputs = True

            if not isinstance(ftype, Array):
                raise Exception(
                    f"Adding multiple inputs to '{self.finish.id()}' and '{ftype.id()}' is not an array"
                )

        if not isinstance(stype, Array) and isinstance(ftype, Array):
            # https://www.commonwl.org/user_guide/misc/#connect-a-solo-value-to-an-input-that-expects-an-array-of-that-type
            self.multiple_inputs = True

        e = Edge(start, stag, self.finish, self.ftag, should_scatter=should_scatter)
        self.source_map[start.id()] = e
        return e
Beispiel #13
0
    def __init__(self, source: Selector, finish: Node, ftag: Optional[str],
                 should_scatter):
        Logger.log(
            f"Creating edge: ({source} → "
            f"({NodeType.to_str(finish.node_type)}) '{finish.id()}.{ftag}'")

        self.source = source
        self.finish: Node = finish
        self.ftag: Optional[str] = ftag
        self.compatible_types: Optional[bool] = None
        self.scatter = should_scatter

        self.validate_tags()
        self.check_types()
Beispiel #14
0
    def __init__(self, id: str = None, **d):
        """
        :param id:
        :type id: The identifier of the template
        """
        from janis_assistant.templates import from_template

        self.id = (id or EnvVariables.default_template.resolve()
                   or "local")  # change default here
        self.templateconfig = {k: v for k, v in d.items()}

        Logger.log("Got template ID: " + str(self.id))

        self.template = from_template(self.id, self.templateconfig)
Beispiel #15
0
    def hydrate_datapoints():
        if JanisShed._has_hydrated_datatypes:
            return Logger.log(
                "Skipping hydrating datapoints (as already hydrated)")

        JanisShed.hydrate_from(JanisShed._get_datatype_entrypoints())
        JanisShed._has_hydrated_datatypes = True
Beispiel #16
0
    def get(self, type_name, tag: Optional[str]) -> Optional[T]:
        if type_name not in self.registry:
            return None
        tagged_objs = self.registry[type_name]

        if tag is None or tag == self.default_tag:
            if self.default_tag in tagged_objs:
                return tagged_objs.get(self.default_tag)[0]
            return None

        if tag not in tagged_objs:
            Logger.log(
                "Found collection '{tool}' in registry, but couldn't find tag '{tag}'"
                .format(tool=type_name, tag=tag))
            return None

        return tagged_objs[tag]
Beispiel #17
0
def get_keywords_between_braces(
    text, argument_validator=variable_name_validator
) -> Tuple[set, int]:
    counter = 0
    highest_level = -1
    start_idx = None
    matches = set()
    rejected = set()
    skipped = set()

    for i in range(len(text)):

        char = text[i]
        if char == "{":
            counter += 1
            highest_level = max(highest_level, counter)
            if start_idx is None:
                start_idx = i
        elif char == "}" and counter > 0:
            counter -= 1

            if start_idx is not None and counter == 0:
                match = text[start_idx + 1 : i]
                if highest_level > 1:
                    skipped.add(match)
                elif argument_validator is not None and not argument_validator(match):
                    rejected.add(match)
                else:
                    matches.add(match)
                highest_level = -1
                start_idx = None
    extra: List[str] = []
    if len(matches) > 0:
        extra.append("matches=" + ",".join(matches))
    if len(rejected) > 0:
        extra.append("rejected=" + ",".join(rejected))
    if len(skipped) > 0:
        extra.append("skipped=" + ",".join(skipped))

    extrastr = ""
    if len(extra) > 0:
        extrastr = " (" + " | ".join(extra) + ")"

    Logger.log(f"Recognised {len(matches)} matches in '{text}'" + extrastr)

    return matches, counter
Beispiel #18
0
    def hydrate_transformations():
        if JanisShed._has_hydrated_transformations:
            return Logger.log(
                "Skipping hydrating transformations (as already hydrated")
        transformations = JanisShed._get_datatype_transformations_from_entrypoints(
        )

        JanisShed._transformationgraph.add_edges(transformations)
        JanisShed._has_hydrated_transformations = True
Beispiel #19
0
 def hydrate_from(modules: list):
     level = None
     cl = Logger.CONSOLE_LEVEL
     if JanisShed.should_trace:
         level = cl if cl >= LogLevel.DEBUG else LogLevel.DEBUG
     Logger.log(
         f"Setting CONSOLE_LEVEL to {LogLevel.get_str(level) or 'None'} while traversing modules"
     )
     Logger.set_console_level(level)
     seen_modules = set()
     seen_classes = set()
     for m in modules:
         JanisShed.traverse_module(m,
                                   seen_modules=seen_modules,
                                   seen_classes=seen_classes)
     Logger.set_console_level(cl)
     Logger.log(
         f"Restoring CONSOLE_LEVEL to {LogLevel.get_str(cl)} now that Janis shed has been hydrated"
     )
Beispiel #20
0
    def traverse_module(module,
                        seen_modules: set,
                        seen_classes: set,
                        current_layer=1):
        if module.__name__ in seen_modules:
            return
        Logger.log("Traversing module " + str(module.__name__))
        seen_modules.add(module.__name__)

        q = {
            n: cls
            for n, cls in list(module.__dict__.items())
            if not n.startswith("__") and type(cls) != type
            and not (ismodule(cls) and cls.__name__ in seen_modules) and (
                not isinstance(cls, list) and cls not in seen_classes)
        }

        for k in q:
            cls = q[k]
            JanisShed.process_cls(cls, seen_modules, seen_classes,
                                  current_layer)
Beispiel #21
0
    def process_cls(cls, seen_modules, seen_classes: set, current_layer: int):
        try:
            if ismodule(cls):
                if current_layer <= JanisShed.MAX_RECURSION_DEPTH:
                    return JanisShed.traverse_module(
                        cls,
                        seen_modules,
                        seen_classes,
                        current_layer=current_layer + 1)
                return Logger.log(
                    f"Skip traversing module '{str(cls)}' as reached maximum depth ({JanisShed.MAX_RECURSION_DEPTH})"
                )
            elif isfunction(cls):
                return

            seen_classes.add(cls)
            if isclass(cls) and issubclass(cls, DataType):
                return JanisShed.add_type(cls)
            elif not hasattr(cls, "type") or not callable(cls.type):
                return

            if (cls == Tool or cls == Workflow or cls == CommandTool
                    or cls == CodeTool or cls == PythonTool
                    or cls == WorkflowBuilder or cls == CommandToolBuilder):
                return

            tp = cls.type()
            if isinstance(tp, str) and tp in JanisShed.recognised_types:
                if isabstract(cls):
                    if issubclass(cls, Tool):
                        abstractmethods = list(cls.__abstractmethods__)
                        return Logger.warn(
                            f"The tool '{cls.__name__}' had abstract methods: "
                            + ", ".join(abstractmethods))
                    return
                ic = cls() if isclass(cls) else cls
                return JanisShed.add_tool(ic)

        except Exception as e:
            Logger.log(f"{str(e)} for type {str(cls)}")
Beispiel #22
0
    def __init__(
        self,
        start: Node,
        stag: Optional[str],
        finish: Node,
        ftag: Optional[str],
        should_scatter,
    ):
        Logger.log(
            f"Creating edge: ({NodeType.to_str(start.node_type)}) '{start.id()}.{stag}' → "
            f"({NodeType.to_str(finish.node_type)}) '{finish.id()}.{ftag}'"
        )

        self.start: Node = start
        self.stag: Optional[str] = stag
        self.finish: Node = finish
        self.ftag: Optional[str] = ftag
        self.compatible_types: Optional[bool] = None
        self.scatter = should_scatter

        self.validate_tags()
        self.check_types()
Beispiel #23
0
    def process_cls(cls, seen_modules, seen_classes: set):
        try:
            if ismodule(cls):
                return JanisShed.traverse_module(cls, seen_modules, seen_classes)
            elif isfunction(cls) or isabstract(cls):
                return
            elif not isclass(cls):
                return

            seen_classes.add(cls)

            if issubclass(cls, DataType):
                return JanisShed.add_type(cls)
            elif not hasattr(cls, "type") or not callable(cls.type):
                return
            elif cls.type() == ToolTypes.Workflow:
                return JanisShed.add_tool(cls())
            elif cls.type() == ToolTypes.CommandTool:
                return JanisShed.add_tool(cls())

        except Exception as e:
            Logger.log(f"{str(e)} for type {type(cls)}")
Beispiel #24
0
    def __init__(
        self,
        start_type: ParseableType,
        finish_type: ParseableType,
        tool: Tool,
        relevant_tool_input: Optional[str] = None,
        relevant_tool_output: Optional[str] = None,
    ):
        self.type1 = get_instantiated_type(start_type)
        self.type2 = get_instantiated_type(finish_type)
        self.tool = tool

        connection_type = f"`{self.type1} -> {self.type2}`"

        Logger.log(
            f"Building transformation for {connection_type} using tool '{tool.id()}"
        )

        self.relevant_tool_input = self.evaluate_tool_input(
            relevant_tool_input)
        self.relevant_tool_output = self.evaluate_tool_output(
            relevant_tool_output)
    def cp_from(
        self,
        source,
        dest,
        force=False,
        report_progress: Optional[Callable[[float], None]] = None,
    ):
        if force:
            Logger.critical("SSHFileScheme does not support the 'force' flag")
        args = ["scp", self.connectionstring + ":" + source, dest]

        if dest.endswith("bam"):
            return Logger.warn(
                "Manually skipped BAM file, as they're usually too big")

        if os.path.exists(dest):
            return Logger.log(f"Skipping as exists ({source} -> {dest}")

        Logger.info(
            f"Secure copying (SCP) from {self.connectionstring}:{source} to local:{dest}"
        )
        subprocess.call(args)
 def start_engine(self):
     Logger.log("Cwltool doesn't run in a server mode, an instance will "
                "automatically be started when a task is created")
     return self
    def create_task(self,
                    wid,
                    source,
                    inputs: list,
                    dependencies,
                    workflow_type=None):
        # curl \
        #   -X POST "http://*****:*****@whole_genome_germline.cwl;type=" \
        #   -F "workflowInputs=@whole_genome_germline-local.yml;type=" \
        #   -F "[email protected];type=application/zip

        from requests import post

        url = self.url_create()

        max_inputs = 5
        if len(inputs) > max_inputs:
            raise Exception(
                "Too many inputs (yaml files). Proposed: automatic merge into one file."
            )

        files = {
            "workflowSource":
            source,
            "labels":
            json.dumps({"taskid": wid}),
            "workflowOptions":
            json.dumps({
                "google_labels": {
                    "taskid": wid
                },
                "monitoring_image": "quay.io/dinvlad/cromwell-monitor",
                "workflow_failure_mode": "ContinueWhilePossible",
            }),
        }

        if dependencies:
            files["workflowDependencies"] = dependencies

        for i in range(len(inputs)):
            k = "workflowInputs" + ("" if i == 0 else "_" + str(i + 1))
            files[k] = inputs[i]

        Logger.log("Posting to " + url)

        r = post(url, files=files)
        try:
            res = r.json()
        except Exception as e:
            Logger.log_ex(e)
            Logger.critical(r.text)
            raise e

        if not r.ok or r.status_code > 201 or res["status"] != "Submitted":
            raise Exception(res)

        task_id = res["id"]

        return task_id
    def raw_metadata(
        self,
        identifier,
        expand_subworkflows=True,
        metadata_export_file_path: Optional[str] = None,
    ) -> Optional[CromwellMetadata]:
        url = self.url_metadata(identifier=identifier,
                                expand_subworkflows=expand_subworkflows)

        if not self.last_contacted:
            self.last_contacted = datetime.now()

        Logger.log(
            f"Getting Cromwell metadata for task '{identifier}' with url: {url}"
        )
        try:
            r = request.urlopen(url)
            self.connectionerrorcount = 0
            self.metadataerrorcount = 0

            self.last_contacted = datetime.now()

            data = r.read()
            jsonobj = json.loads(
                data.decode(r.info().get_content_charset("utf-8")))

            if metadata_export_file_path:
                try:
                    with open(metadata_export_file_path, "w+") as f:
                        json.dump(jsonobj, f)
                except Exception as e:
                    Logger.warn(
                        f"Couldn't persist Cromwell metadata json to '{metadata_export_file_path}': {repr(e)}"
                    )

            return CromwellMetadata(jsonobj)

        except request.HTTPError as e:

            if e.code == 404:
                # Usually means Cromwell hasn't loaded properly yet
                return None

            er_message = "An unexpected error occurred: " + str(e)
            try:
                body = e.read().decode()
                jsonobj = json.loads(body) if body else {}
                message = jsonobj.get("message")
                if message:
                    er_message = f"Response when getting Cromwell metadata: {message}"
            except Exception as ee:
                er_message = (
                    f"An additional error occurred while trying to determine the reason"
                    f" why Cromwell metadata failed (internal: {repr(ee)}): {repr(e)})"
                )
            finally:
                Logger.warn("Couldn't get Cromwell metadata: " +
                            str(er_message))
                self.metadataerrorcount += 1
                if self.metadataerrorcount > 5:
                    # we could check the status of the workflow, or we could just suspend it really
                    # 5 consecutive errors
                    raise e
                return None

        except (request.URLError, ConnectionResetError) as e:
            self.connectionerrorcount += 1
            minutes_not_able_to_contact_cromwell = (
                datetime.now() - self.last_contacted).total_seconds() / 60
            if minutes_not_able_to_contact_cromwell > self.timeout:
                message = (
                    f"Janis is receiving a ConnectionResetError when contacting the Cromwell instance "
                    f"({self.host}) {self.connectionerrorcount} times, and has been unable to connect to "
                    f"Cromwell for {minutes_not_able_to_contact_cromwell} minutes. "
                )
                if self.db_type and self.db_type == DatabaseTypeToUse.filebased:
                    ja_config_url = "https://janis.readthedocs.io/en/latest/references/configuration.html#cromwell"
                    message += (
                        "We've seen this issue more frequently when Janis is configuring Cromwell to use the "
                        "file-based database. We recommend configuring Janis to use a MySQL database through the "
                        f"`--mysql` flag, visitng '{ja_config_url}', or raising an issue on GitHub ({GITHUB_URL}) "
                        f"for more information.")
                Logger.warn(message)
                self.something_has_happened_to_cromwell(
                    "last_updated_threshold")  # idk, pick a number
                return None
            if self.connectionerrorcount > 15:
                raise e
            else:
                Logger.warn("Error connecting to cromwell instance: " +
                            repr(e))
            return None
Beispiel #29
0
 def set_default(self, default: Any):
     Logger.log(
         f"Setting the default of '{self.finish.id()}.{self.ftag}' to be '{str(default)}'"
     )
     self.default = default
    def translate(
        self,
        workflow,
        to_console=True,
        tool_to_console=False,
        with_docker=True,
        with_resource_overrides=False,
        to_disk=False,
        write_inputs_file=True,
        export_path=ExportPathKeywords.default,
        should_validate=False,
        should_zip=True,
        merge_resources=False,
        hints=None,
        allow_null_if_not_optional=True,
        additional_inputs: Dict = None,
        max_cores=None,
        max_mem=None,
    ):

        # self.validate_inputs(workflow._inputs, allow_null_if_not_optional)

        tr_wf, tr_tools = self.translate_workflow(
            workflow,
            with_docker=with_docker,
            with_resource_overrides=with_resource_overrides,
        )
        tr_inp = self.build_inputs_file(
            workflow,
            recursive=False,
            merge_resources=merge_resources,
            hints=hints,
            additional_inputs=additional_inputs,
            max_cores=max_cores,
            max_mem=max_mem,
        )
        tr_res = self.build_resources_input(workflow, hints)

        str_wf = self.stringify_translated_workflow(tr_wf)
        str_inp = self.stringify_translated_inputs(tr_inp)
        str_tools = [(
            "tools/" + self.tool_filename(t),
            self.stringify_translated_workflow(tr_tools[t]),
        ) for t in tr_tools]
        str_resources = self.stringify_translated_inputs(tr_res)

        if to_console:
            print("=== WORKFLOW ===")
            print(str_wf)
            if tool_to_console:
                print("\n=== TOOLS ===")
                [print(f":: {t[0]} ::\n" + t[1]) for t in str_tools]
            print("\n=== INPUTS ===")
            print(str_inp)
            if not merge_resources and with_resource_overrides:
                print("\n=== RESOURCES ===")
                print(str_resources)

        d = ExportPathKeywords.resolve(export_path,
                                       workflow_spec=self.name,
                                       workflow_name=workflow.id())

        fn_workflow = self.workflow_filename(workflow)
        fn_inputs = self.inputs_filename(workflow)
        fn_resources = self.resources_filename(workflow)

        if to_disk and write_inputs_file:
            if not os.path.isdir(d):
                os.makedirs(d)

            with open(os.path.join(d, fn_inputs), "w+") as f:
                Logger.log(f"Writing {fn_inputs} to disk")
                f.write(str_inp)
                Logger.log(f"Written {fn_inputs} to disk")
        else:
            Logger.log("Skipping writing input (yaml) job file")

        if to_disk:

            toolsdir = os.path.join(d, "tools")
            if not os.path.isdir(toolsdir):
                os.makedirs(toolsdir)

            Logger.info(f"Exporting workflow files to '{d}'")

            with open(os.path.join(d, fn_workflow), "w+") as wf:
                Logger.log(f"Writing {fn_workflow} to disk")
                wf.write(str_wf)
                Logger.log(f"Wrote {fn_workflow}  to disk")

            for (fn_tool, str_tool) in str_tools:
                with open(os.path.join(d, fn_tool), "w+") as toolfp:
                    Logger.log(f"Writing {fn_tool} to disk")
                    toolfp.write(str_tool)
                    Logger.log(f"Written {fn_tool} to disk")

            if not merge_resources and with_resource_overrides:
                print("\n=== RESOURCES ===")
                with open(os.path.join(d, fn_resources), "w+") as wf:
                    Logger.log(f"Writing {fn_resources} to disk")
                    wf.write(str_wf)
                    Logger.log(f"Wrote {fn_resources}  to disk")
                print(str_resources)

            import subprocess

            if should_zip:
                Logger.info("Zipping tools")
                with Path(d):
                    FNULL = open(os.devnull, "w")
                    zip_result = subprocess.run(
                        ["zip", "-r", "tools.zip", "tools/"], stdout=FNULL)
                    if zip_result.returncode == 0:
                        Logger.info("Zipped tools")
                    else:
                        Logger.critical(zip_result.stderr)

            if should_validate:
                with Path(d):

                    Logger.info(f"Validating outputted {self.name}")

                    enved_vcs = [
                        (os.getenv(x[1:]) if x.startswith("$") else x)
                        for x in self.validate_command_for(
                            fn_workflow, fn_inputs, "tools/", "tools.zip")
                    ]

                    cwltool_result = subprocess.run(enved_vcs)
                    if cwltool_result.returncode == 0:
                        Logger.info("Exported workflow was validated by: " +
                                    " ".join(enved_vcs))
                    else:
                        Logger.critical(cwltool_result.stderr)

        return str_wf, str_inp, str_tools