예제 #1
0
def resolve_tool(
    tool: Union[str, j.CommandTool, Type[j.CommandTool], j.Workflow,
                Type[j.Workflow]],
    name=None,
    from_toolshed=False,
    force=False,
    only_toolbox=False,
):
    if isinstance(tool, j.Tool):
        return tool
    elif isclass(tool) and issubclass(tool, (j.Workflow, j.Tool)):
        return tool()

    if not isinstance(tool, str):
        raise TypeError(
            f"Janis is not sure how to resolve a workflow of type: '{type(tool)}'"
        )

    if not only_toolbox:
        fileschemewherelocated = FileScheme.get_type_by_prefix(tool.lower())
        if fileschemewherelocated:
            Logger.info(
                f"Detected remote workflow to localise from '{fileschemewherelocated.__name__}'"
            )
            # Get some unique name for the workflow
            import hashlib

            fn = hashlib.md5(tool.lower().encode()).hexdigest() + ".py"
            outdir = os.path.join(JanisConfiguration.manager().configdir,
                                  "cached")
            os.makedirs(outdir, exist_ok=True)
            dest = os.path.join(outdir, fn)
            Logger.log(f"Localising '{tool}' to '{dest}'")

            fileschemewherelocated("internal").cp_from(
                source=tool.lower(),
                dest=dest,
                report_progress=lambda progress: print(
                    f"Download progress: {progress}"),
                force=force,
            )
            tool = dest

        wf = get_janis_workflow_from_searchname(tool,
                                                ".",
                                                name=name,
                                                include_commandtools=True)

        if wf:
            return wf

    if from_toolshed:
        v = None
        if ":" in tool:
            ps = tool.split(":")
            workflow, v = ps[0], ps[1]

        wf = j.JanisShed.get_tool(tool, v)

        return wf
예제 #2
0
    def _download_remote_files(self, test_logic: TTestExpectedOutput):
        """
        Download remote test files (only expected output files) to a cache directory

        :param test_logic: an object that holds information about an expected output
        :type test_logic: TTestExpectedOutput
        :return: None
        :rtype: None
        """

        file_attributes = ["expected_file", "file_diff_source"]
        for att in file_attributes:
            if not hasattr(test_logic, att):
                raise Exception(f"{test_logic.__class__} has no attribute {att}")

            source = getattr(test_logic, att)

            if source:

                test_helpers.verify_janis_assistant_installed()
                from janis_assistant.management.filescheme import (
                    FileScheme,
                    LocalFileScheme,
                )

                # f = FileScheme(source)
                if not FileScheme.is_local_path(source):
                    fs = FileScheme.get_filescheme_for_url(source)
                    last_modified = fs.last_modified(source)

                    local_file_path = os.path.join(
                        self.cached_input_files_dir,
                        f"{test_helpers.hash_filename(source, last_modified)}_{os.path.basename(source)}",
                    )

                    # Only download if the file does not already exist
                    if not os.path.exists(local_file_path):
                        Logger.info(f"Downloading remote file to {local_file_path}")

                        os.makedirs(self.cached_input_files_dir, exist_ok=True)
                        fs.cp_from(source, local_file_path)
                    else:
                        Logger.info(
                            f"Skip downloading remote file. File {source} already exists in {local_file_path}"
                        )

                    setattr(test_logic, att, local_file_path)
예제 #3
0
    def generate_file_path(cls, source: str, dest_dir: str):
        fs = FileScheme.get_type_by_prefix(source)()
        date_modified = fs.last_modified(source)
        local_filename = (
            f"{cls.hash_filename(source, date_modified)}_{os.path.basename(source)}"
        )

        return os.path.join(dest_dir, local_filename)
 def insert_inputs_from_dict(
     self,
     inputs: dict,
     run_id: str = RunModel.DEFAULT_ID,
     file_input_ids: Set[str] = None,
 ):
     if file_input_ids is None:
         file_input_ids = set()
     return self.insert_or_update_many([
         WorkflowInputModel(
             id_=k,
             submission_id=self.submission_id,
             run_id=run_id,
             value=v,
             size=(FileScheme.get_type_by_prefix(v).get_file_size(v))
             if k in file_input_ids else None,
         ) for k, v in inputs.items()
     ])
    def check_base_with_type(inp: TInput, intype: DataType, val, suffix=""):
        doesnt_exist = {}
        if isinstance(intype, Array):
            subtype = intype.subtype()
            if not isinstance(val, list):
                raise Exception(
                    f"Expected {inp.id()} to be list, but {str(val)} was a {type(val)}"
                )
            for innerval, idx in zip(val, range(len(val))):
                nsuffix = f"{suffix}[{idx}]"
                doesnt_exist.update(
                    InputChecker.check_base_with_type(inp,
                                                      subtype,
                                                      innerval,
                                                      suffix=nsuffix))
            return doesnt_exist

        inpid = inp.id() + suffix

        if isinstance(val, list):
            raise Exception(
                f"Expected singular item for {inp.id()}, received list.")

        fs = FileScheme.get_filescheme_for_url(val)

        if not fs.exists(val):
            doesnt_exist[inpid] = val

        if not isinstance(intype, File):
            return doesnt_exist

        InputChecker.check_extensions(inpid, intype, val)

        secs = intype.secondary_files() or []
        for sec in secs:
            sec_filename = apply_secondary_file_format_to_filename(val, sec)
            if not InputChecker.check_if_input_exists(fs, sec_filename):
                secsuffix = sec.replace("^", "").replace(".", "")
                doesnt_exist[inp.id() + "_" + secsuffix +
                             suffix] = ("(SECONDARY) " + sec_filename)

        return doesnt_exist
예제 #6
0
    def copy_output(
        self,
        fs: FileScheme,
        outputid,
        prefix,
        tag,
        secondaries,
        extension,
        iscopyable,
        engine_output: Union[WorkflowOutputModel, Any, List[Any]],
        shard=None,
    ):

        # the output_folder is an array of an array, for each

        if isinstance(engine_output, list):
            outs = []
            nshards = len(engine_output)
            prev_shards = shard or []

            # This is a little complicated, we want to output the set of tags that have the same length as we
            # we have shards. We'll only let this work if there's one element in the array with the appropriate amount
            # of shards.

            # find the index

            def find_element_where_length_is(iterable, n):
                if iterable is None:
                    return None
                for i in range(len(iterable)):
                    ii = iterable[i]
                    if isinstance(ii, list) and len(ii) == n:
                        return i
                return None

            def explode_at_index(iterable, index_to_explode, index_to_select):
                ar = iterable[:index_to_explode] + [
                    iterable[index_to_explode][index_to_select]
                ]
                if index_to_explode + 1 < len(iterable):
                    ar.extend(iterable[1 + tag_index_to_explode :])
                return ar

            tag_index_to_explode = find_element_where_length_is(tag, nshards)

            for i in range(nshards):
                eout = engine_output[i]

                new_shard = [*prev_shards, i]

                # choose tag
                new_prefix = prefix
                if isinstance(new_prefix, list) and len(new_prefix) > 1:
                    new_prefix = new_prefix[i]
                    new_shard = new_shard[min(len(new_shard), 1) :]

                new_tag = tag
                if tag_index_to_explode is not None:
                    new_tag = explode_at_index(tag, tag_index_to_explode, i)
                    new_shard = new_shard[min(len(new_shard), 1) :]

                outs.append(
                    self.copy_output(
                        fs,
                        outputid=outputid,
                        tag=new_tag,
                        prefix=new_prefix,
                        engine_output=eout,
                        shard=new_shard,
                        secondaries=secondaries,
                        extension=extension,
                        iscopyable=iscopyable,
                    )
                )

            return [o[0] for o in outs], [o[1] for o in outs]

        final_tags = tag

        outfn = outputid

        if final_tags and any(isinstance(t, list) for t in final_tags):
            Logger.critical(
                f"One of the final output tags {str(final_tags)} was still an array, outputs will be written directly into the output directory"
            )
            final_tags = None

        if prefix:
            if isinstance(prefix, list):
                if len(prefix) > 1:
                    Logger.critical(
                        f"Expected only one output_name for this copy, but found ({', '.join(prefix)}) [{len(prefix)}], using the first outputname"
                    )
                else:
                    outfn = prefix[0]
            else:
                outfn = prefix

        if final_tags is None:
            final_tags = []

        outdir = os.path.join(self.path, "/".join(final_tags))

        fs.mkdirs(outdir)

        if shard is not None:
            for s in shard:
                outfn += f"_shard-{s}"

        # copy output

        original_filepath = None
        newoutputfilepath = os.path.join(outdir, outfn)

        if isinstance(engine_output, WorkflowOutputModel):
            original_filepath = engine_output.originalpath
            if original_filepath and iscopyable:
                ext = extension or get_extension(engine_output.originalpath)
                if ext:
                    dot = "" if ext[0] == "." else "."
                    outfn += dot + ext
                    newoutputfilepath += dot + ext
                fs.cp_from(engine_output.originalpath, newoutputfilepath, force=True)
            elif engine_output.value:
                if isinstance(fs, LocalFileScheme):
                    # Write engine_output to outpath
                    with open(newoutputfilepath, "w+") as outfile:
                        outfile.write(str(engine_output.value))
        else:
            original_filepath = engine_output
            if isinstance(fs, LocalFileScheme):
                # Write engine_output to outpath
                with open(newoutputfilepath, "w+") as outfile:
                    outfile.write(str(engine_output))

        for sec in secondaries or []:
            frompath = apply_secondary_file_format_to_filename(original_filepath, sec)
            tofn = apply_secondary_file_format_to_filename(outfn, sec)
            topath = os.path.join(outdir, tofn)
            fs.cp_from(frompath, topath, force=True)

        return [original_filepath, newoutputfilepath]
예제 #7
0
    def localise_inputs(
        self,
        inpid: str,
        inptype: DataType,
        dest_dir: str,
        source: Union[str, List[str]],
        localise_secondary_files: bool = True,
    ):
        if isinstance(source, list):
            return [
                self.localise_inputs(inpid, inptype, dest_dir, s)
                for s in source
            ]

        fs = FileScheme.get_type_by_prefix(source)()
        if isinstance(fs, LocalFileScheme):
            return source

        out_path = self.generate_file_path(source, dest_dir)
        if os.path.exists(out_path):
            Logger.info(
                f"A file already exists when localising '{inpid}' at '{out_path}'. If this isn't the right file, "
                f"you'll need to manually remove this file before proceeding")
        else:
            try:
                Logger.info(f"Downloading file from {source} -> {out_path}")
                fs.cp_from(source, out_path)
            except Exception as e:
                Logger.critical(
                    f"Couldn't localise source from {source} -> {out_path}: {repr(e)}"
                )
                raise

        if localise_secondary_files:
            try:
                # Handle normal input type or array input type
                secondary_files = inptype.secondary_files()
                if inptype.is_array():
                    secondary_files = inptype.subtype().secondary_files()

                for sec in secondary_files or []:
                    sec_source = apply_secondary_file_format_to_filename(
                        source, sec)
                    out_sec_path = apply_secondary_file_format_to_filename(
                        out_path, sec)

                    if os.path.exists(out_sec_path):
                        Logger.info(
                            f"The secondary file for {inpid} ({sec}) already exists when localising '{inpid}' at '{out_sec_path}'. If this isn't the right file, "
                            f"you'll need to manually remove this file before proceeding"
                        )
                    elif not fs.exists(sec_source):
                        Logger.warn(
                            f"Couldn't find the secondary file for {inpid}, expected at {sec_source}, skipping for now"
                        )
                    else:
                        fs.cp_from(sec_source, out_sec_path)

            except Exception as e:
                Logger.critical(
                    f"Couldn't localise secondary file due to: {e}")

        return out_path
def guess_datatype_by_filename(filename: str):
    """
    We'll try to guess which datatype a file with name 'filename' is.
    Primarily, this will look at the extension, and whether the secondary files exist
    :param filename:
    :return:
    """
    dts = JanisShed.get_all_datatypes()
    fs = FileScheme.get_type_by_prefix(filename)()
    if not isinstance(fs, LocalFileScheme):
        Logger.warn(
            f"The filescheme detected by Janis for '{filename}' was not LOCAL. This guess datatype process may rely on "
            f"polling the {fs.id()} file system to check if related files exist. This might have some financial cost involved."
        )

    file_exists_map = {}

    # each match has a score
    matches: List[Tuple[int, File]] = []

    for datatype in dts:
        if isclass(datatype):
            if not issubclass(datatype, File):
                continue
            datatype = get_instantiated_type(datatype)
        elif not isinstance(datatype, File):
            continue
        if not datatype.extension:
            continue
        datatype: File = datatype

        extensions = {datatype.extension, *(datatype.alternate_extensions or [])}

        matching_extension = None
        for ext in extensions:
            if filename.endswith(ext):
                matching_extension = ext
                break

        secondaries_match = True

        if datatype.secondary_files():
            for secondary in datatype.secondary_files():
                secondary_filename = apply_secondary_file_format_to_filename(
                    filename, secondary
                )
                if secondary not in file_exists_map:
                    file_exists_map[secondary] = fs.exists(secondary_filename)
                if not file_exists_map[secondary]:
                    secondaries_match = False
                    break
            if secondaries_match is False:
                continue

        # we got here, we're G

        if matching_extension is not None and secondaries_match:
            extension_reward = len(matching_extension) * EXTENSION_REWARD_MULTIPLER
            secondaries_reward = (
                len(datatype.secondary_files() or []) * SECONDARIES_REWARD_MULTIPLER
            )
            score = extension_reward + secondaries_reward

            matches.append((score, datatype))

    if len(matches) == 0:
        return None
    elif len(matches) == 1:
        return matches[0][1]
    else:
        matches = sorted(matches, key=lambda a: a[0], reverse=True)
        matched_dt = matches[0][1]
        ranked = ", ".join(f"{match[1].name()} ({match[0]})" for match in matches[1:])
        Logger.debug(
            f"There were {len(matches)} for matching datatypes. Using {matched_dt.name()} ({matches[0][0]}) "
            f"as it was the best match from: {ranked}"
        )
        return matched_dt
 def check_if_input_exists(fs: FileScheme, path: str):
     return fs.exists(path)