예제 #1
0
    def __init__(
            self,
            wid: str,
            container: Type[Container],
            datadirectory: str,
            forwardedport: int,
            confdir: str,
            containerdir: str,  # for singularity containers
    ):
        import os.path

        Logger.debug(
            f"Preparing {container.__name__} MySQL container with info: wid={wid}, port={forwardedport}, confdir={confdir}"
        )

        self._containertype = container

        self.container: Container = container(
            self.MYSQL_CONTAINERNAME,
            instancename="mariadb-" + wid,
            containerdir=containerdir,
        )
        self.datadirectory = datadirectory
        self.forwardedport = forwardedport
        self.confdir = confdir
        self.startupscriptsdir = os.path.join(self.confdir, "startup")
        self.sqlconfdir = os.path.join(self.confdir, "conf")
        self.mysqldoverride = os.path.join(self.confdir, "mysqld")
예제 #2
0
    def exec_command(self, command):

        cmd = ["docker", "exec", "-i", self.dockerid]
        requiresshell = not isinstance(command, list)
        cmd.extend(command) if isinstance(command,
                                          list) else cmd.append(command)
        try:
            Logger.info("Executing command: " + " ".join(cmd))
            val = (subprocess.check_output(
                cmd, shell=requiresshell).decode("ascii").strip())

        except subprocess.CalledProcessError as e:
            Logger.critical(
                f"Docker exec_command failed '{e}': {e.output or e.stderr}")

            # check the logs
            try:
                logs_command = ["docker", "logs", self.dockerid]
                Logger.info("Checking docker logs: " + " ".join(logs_command))
                Logger.debug(subprocess.check_output(logs_command))
            except:
                Logger.critical(
                    f"Failed to get logs for container {self.dockerid}")

            return (str(e), e.returncode)

        return val.strip() if val else val, 0
    def get_digest(self, info: ContainerInfo) -> Optional[str]:
        try:
            token = self.get_token(info)
        except Exception as e:
            Logger.critical(
                f"Couldn't get digest for container (couldn't get token): '{str(info)}': {str(e)}"
            )
            return None
        if token:
            Logger.debug(
                f"Got token for '{info}': {token[: min(5, len(token) - 1)]}..."
            )

        try:
            req = self.build_request(info, token)
            Logger.debug(f"Requesting digest from: {req.full_url}")
            with request.urlopen(req) as response:
                rheaders = response.headers
                digest = rheaders.get("etag",
                                      rheaders.get("Docker-Content-Digest"))

            if digest is not None:
                digest = digest.replace("'", "").replace('"', "")

            return digest

        except Exception as e:
            Logger.critical(
                f"Couldn't get digest for container '{str(info)}': {str(e)}")
 def __init__(self, dblocation, tablename, readonly=False):
     sqlitedict.logger.disabled = True
     ro = "r" if readonly else "c"
     Logger.debug(
         f"Opening connection to {dblocation}/{tablename} with mode {ro}")
     self.kvdb = sqlitedict.SqliteDict(dblocation,
                                       tablename=tablename,
                                       autocommit=True,
                                       flag=ro)
예제 #5
0
    def start_container(self):

        command = ["singularity", "instance", "start"]

        # if self.environment_variables:
        #     command.extend(f"-e{k}={v}" for k, v in self.environment_variables.items())

        if self.bindpoints:
            command.extend(f"-B{v}:{k}" for k, v in self.bindpoints.items())

        if self.exposedports:
            command.extend([
                "--net",
                "--network=none",
                "--network-args",
                *[
                    f"portmap={v}:{k}/tcp"
                    for k, v in self.exposedports.items()
                ],
            ])

        try:
            newenv = os.environ
            for k in self.environment_variables:
                newenv["SINGULARITYENV_" + k] = str(
                    self.environment_variables[k])

            if not self.instancename:
                self.instancename = generate_new_id(set())

            command.extend([self.container_path(), self.instancename])
            Logger.info("Starting singularity with command: " +
                        " ".join(command))

            out = subprocess.check_output(command,
                                          env=newenv,
                                          stderr=subprocess.STDOUT)
            Logger.debug(f"Singularity output: {out}")

            singrun = ["singularity", "run", "instance://" + self.instancename]
            Logger.debug("Started container, now running " + str(singrun))
            startprocess = subprocess.Popen(singrun,
                                            stdout=subprocess.PIPE,
                                            stderr=subprocess.STDOUT)
            self.run_logger = ProcessLogger(
                startprocess,
                prefix="mysql: ",
                logfp=None,
                exit_function=self.runlogger_didexit,
            )

        except subprocess.CalledProcessError as e:
            raise Exception(
                f"An error occurred while starting a singularity container: {str(e)}.\n\tOutput: {str(e.output)}"
            )
    def create_task_base(self, wf: Workflow, job: PreparedJob):

        forbiddenids = set()
        if job.store_in_central_db:
            try:
                with self.with_cursor() as cursor:
                    forbiddenids = set(
                        t[0] for t in cursor.execute("SELECT id FROM tasks").fetchall()
                    )
            except sqlite3.OperationalError as e:
                if "no such column: id" in repr(e):
                    from shutil import move

                    dt = datetime.utcnow()
                    np = f"{job.db_path}.original-{dt.strftime('%Y%m%d')}"
                    Logger.warn(f"Moving old janis-db to '{np}'")
                    move(job.db_path, np)
                    self._taskDB = None
                    return self.create_task_base(wf=wf, job=job)
                raise

        submission_id = generate_new_id(forbiddenids)

        output_dir = fully_qualify_filename(job.output_dir)

        if not job.execution_dir:
            job.execution_dir = os.path.join(output_dir, "janis")
            Logger.debug(
                f"No execution-dir was provided, constructed one from the output-dir: {job.execution_dir}"
            )
        job.execution_dir = fully_qualify_filename(job.execution_dir)

        Logger.info(
            f"Starting task with id = '{submission_id}' | output dir: {job.output_dir} | execution dir: {job.execution_dir}"
        )

        row = TaskRow(
            submission_id, execution_dir=job.execution_dir, output_dir=output_dir
        )
        WorkflowManager.create_dir_structure(job.execution_dir)

        if job.store_in_central_db:
            self.get_lazy_db_connection().insert_task(row)
        else:
            Logger.info(
                f"Not storing task '{submission_id}' in database. To watch, use: 'janis watch {output_dir}'"
            )

        if self._connection:
            self._connection.commit()
            self._connection.close()
            self._taskDB = None
            self._connection = None
        return row
def deserialize_inner(val):
    if val is None:
        return None
    try:
        return json.loads(val)
    except Exception as ex:
        # tbh, sometimes the mysql database converts '"myvalue"' -> 'myvalue' (dropping the quotes), we'll do
        Logger.debug(
            f"Couldn't deserialize value, using string representation instead (value: {repr(val)}): {repr(ex)}"
        )
        return str(val)
    def db_connection(self):
        try:
            if self.readonly:
                Logger.debug(
                    "Opening database connection to in READONLY mode: " + self.db_path
                )
                return sqlite3.connect(f"file:{self.db_path}?mode=ro", uri=True)

            Logger.debug("Opening database connection: " + self.db_path)
            return sqlite3.connect(self.db_path)
        except:
            Logger.critical("Error when opening DB connection to: " + self.db_path)
            raise
예제 #9
0
    def db_connection(self):
        config = JanisConfiguration.manager()
        try:
            if self.readonly:
                Logger.debug(
                    "Opening database connection to in READONLY mode: " + config.dbpath
                )
                return sqlite3.connect(f"file:{config.dbpath}?mode=ro", uri=True)

            Logger.debug("Opening database connection: " + config.dbpath)
            return sqlite3.connect(config.dbpath)
        except:
            Logger.critical("Error when opening DB connection to: " + config.dbpath)
            raise
예제 #10
0
    def get(
        self,
        keys: Union[str, List[str]] = "*",
        where: Tuple[str, List[any]] = None,
        allow_operational_errors=True,
    ) -> Optional[List[T]]:
        jkeys = ", ".join(keys) if isinstance(keys, list) else keys
        if jkeys == "*":
            keys = [t.dbalias for t in self._base.keymap()]
            jkeys = ", ".join(keys) if isinstance(keys, list) else keys

        values = []
        whereclauses = []
        if self._scopes:
            scopes = self._scopes.items()
            whereclauses.extend(f"{k} = ?" for k, _ in scopes)
            values.extend(v for _, v in scopes)

        if where:
            whereclauses.append(where[0])
            values.extend(where[1])

        query = f"SELECT {jkeys} FROM {self._tablename}"

        if whereclauses:
            query += f" WHERE {' AND '.join(whereclauses)}"

        with self.with_cursor() as cursor:
            try:
                rows = cursor.execute(query, values).fetchall()
            except OperationalError as e:
                if not allow_operational_errors:
                    raise e
                if "readonly database" in str(e):
                    # mfranklin: idk, this sometimes happens. We're doing a select query, idk sqlite3 driver...
                    Logger.debug(
                        f"Got readonly error when running query: '{query}', skipping for now"
                    )
                    return None
                elif "locked" in str(e):
                    Logger.debug(
                        f"We hit the janis database.{self._tablename} at the same time the janis process wrote to it, we'll skip for now "
                    )
                    return None
                raise

        parsedrows = [self._base.deserialize(keys, r) for r in rows]
        return parsedrows
예제 #11
0
    def save_metadata_if_required(self):
        if self.database.progressDB.has(ProgressKeys.savedMetadata):
            return Logger.debug(f"Workflow '{self.wid}' has saved metadata, skipping")

        engine = self.get_engine()

        metadir = self.get_path_for_component(self.WorkflowManagerPath.metadata)
        if isinstance(engine, Cromwell):
            import json

            meta = engine.raw_metadata(self.get_engine_wid()).meta
            with open(os.path.join(metadir, "metadata.json"), "w+") as fp:
                json.dump(meta, fp)

        elif isinstance(engine, CWLTool):
            import json

            meta = engine.metadata(self.wid)
            self.set_status(meta.status)
            with open(os.path.join(metadir, "metadata.json"), "w+") as fp:
                json.dump(meta.outputs, fp)

        else:
            raise Exception(
                f"Don't know how to save metadata for engine '{engine.id()}'"
            )

        self.database.progressDB.set(ProgressKeys.savedMetadata)
예제 #12
0
    def ensure_downloaded(self):

        pathed_container = self.container_path()

        if os.path.exists(pathed_container):
            return True

        command = self.get_build_instructions_for(pathed_container,
                                                  self.container)
        Logger.info("Couldn't find singularity container, building with: " +
                    " ".join(command))
        try:
            build_result = subprocess.check_output(command)
            Logger.debug(build_result)
        except subprocess.CalledProcessError as e:
            Logger.log_ex(e)
예제 #13
0
    def submit_workflow_if_required(self):
        if self.database.progressDB.has(ProgressKeys.submitWorkflow):
            return Logger.log(f"Workflow '{self.wid}' has submitted, skipping")

        fn_wf = self.database.workflowmetadata.submission_workflow
        fn_inp = self.database.workflowmetadata.submission_inputs
        fn_deps = self.database.workflowmetadata.submission_resources

        engine = self.get_engine()

        Logger.debug(f"Submitting task '{self.wid}' to '{engine.id()}'")
        self._engine_wid = engine.start_from_paths(self.wid, fn_wf, fn_inp, fn_deps)
        self.database.workflowmetadata.engine_wid = self._engine_wid

        Logger.info(
            f"Submitted workflow ({self.wid}), got engine id = '{self.get_engine_wid()}'"
        )
        self.database.progressDB.set(ProgressKeys.submitWorkflow)
예제 #14
0
 def number_of_jobs(self):
     query = "SELECT COUNT(*) FROM jobs WHERE submission_id = ?"
     values = [self.submission_id]
     with self.with_cursor() as cursor:
         try:
             row = cursor.execute(query, values).fetchone()
             return row[0]
         except OperationalError as e:
             if "readonly database" in str(e):
                 # mfranklin: idk, this sometimes happens. We're doing a select query, idk sqlite3 driver...
                 Logger.debug(
                     f"Got readonly error when running query: '{query}', skipping for now"
                 )
                 return None
             elif "locked" in str(e):
                 Logger.debug(
                     f"We hit the janis database.{self._tablename} at the same time the janis process wrote to it, we'll skip for now "
                 )
                 return None
             raise
    def check_extensions(inpid: str, datatype: DataType, path: str):
        """
        This method only WARNS about incorrect extension
        """

        if not isinstance(datatype, File):
            return

        if not isinstance(path, str):
            Logger.warn(
                f"Expecting string type input '{inpid}' of file File, but received '{type(path)}'"
            )

        # check extension (and in future, secondaries)
        pre_extensions = [
            datatype.extension,
            *list(datatype.alternate_extensions or []),
        ]
        extensions = {ext for ext in pre_extensions if ext is not None}

        if len(extensions) == 0:
            # skip because no extension
            return

        has_extension = False
        for ext in extensions:
            if path.endswith(ext):
                has_extension = True
                break

        if has_extension:
            # looks like we're sweet
            Logger.debug(
                f"Validated that the input for {inpid} had the expected extension for {datatype.id()}"
            )
            return

        Logger.warn(
            f"The input for '{inpid}' ({datatype.name()}) did not have the expected extension "
            f"{' OR '.join(extensions)}: {path}. ")
예제 #16
0
    def try_get_outputs_for(self, inpid, wf, inputs, output_dir, description):

        from janis_assistant.main import WorkflowManager, run_with_outputs

        if os.path.exists(output_dir):
            try:
                wm = WorkflowManager.from_path_get_latest_manager(
                    output_dir, readonly=True
                )
                outs_raw = wm.database.outputsDB.get()
                outs = {
                    o.id_: o.value or o.new_path
                    for o in outs_raw
                    if o.value or o.new_path
                }
                if len(outs) > 0:
                    out_val = first_value(outs)
                    Logger.info(
                        f"Using cached value of transformation ({description}) for {inpid}: {out_val}"
                    )
                    return out_val
                Logger.log(
                    f"Didn't get any outputs from previous workflow manager when deriving input {inpid} ({description})"
                )
            except Exception as e:
                Logger.debug(
                    f"Couldn't get outputs from existing output_path for {inpid}, '{output_dir}' ({description}): {e}"
                )

        outs = run_with_outputs(wf, inputs=inputs, output_dir=output_dir)
        if not outs or len(outs) < 1:
            Logger.critical(
                f"Couldn't get outputs from transformation ({description}) for '{inpid}'"
            )
            return None

        return first_value(outs)
예제 #17
0
    def copy_outputs_if_required(self):
        if self.database.progressDB.has(ProgressKeys.copiedOutputs):
            return Logger.debug(f"Workflow '{self.wid}' has copied outputs, skipping")

        if self.database.workflowmetadata.status != TaskStatus.COMPLETED:
            return Logger.warn(
                f"Skipping copying outputs as workflow "
                f"status was not completed ({self.database.workflowmetadata.status})"
            )

        wf_outputs = self.database.outputsDB.get_all()
        engine_outputs = self.get_engine().outputs_task(self.get_engine_wid())
        eoutkeys = engine_outputs.keys()
        fs = self.environment.filescheme

        for out in wf_outputs:
            eout = engine_outputs.get(out.tag)

            if eout is None:
                Logger.warn(
                    f"Couldn't find expected output with tag {out.tag}, found outputs ({', '.join(eoutkeys)}"
                )
                continue
            originalfile, newfilepath = self.copy_output(
                fs=fs,
                outputid=out.tag,
                prefix=out.prefix,
                tag=out.tags,
                secondaries=out.secondaries,
                extension=out.extension,
                engine_output=eout,
                iscopyable=out.iscopyable,
            )

            if isinstance(originalfile, list):
                originalfile = recursively_join(originalfile, "|")

            if isinstance(newfilepath, list):
                newfilepath = recursively_join(newfilepath, "|")

            self.database.outputsDB.update_paths(
                tag=out.tag, original_path=originalfile, new_path=newfilepath
            )

        self.database.progressDB.set(ProgressKeys.copiedOutputs)
        Logger.info(f"View the task outputs: file://{self.get_task_path()}")
    def send_email(subject: str, body: str):
        import tempfile, os

        nots = PreparedJob.instance().notifications

        mail_program = nots.mail_program

        if not mail_program:
            return Logger.debug(
                "Skipping email send as no mail program is configured")

        if not nots.email or nots.email.lower() == "none":
            Logger.debug("Skipping notify status change as no email")
            return

        emails: List[str] = (nots.email if isinstance(nots.email, list) else
                             nots.email.split(","))
        Logger.debug(f"Sending email with subject {subject} to {emails}")

        email_template = f"""\
Content-Type: text/html
To: {"; ".join(emails)}
From: {nots.from_email}
Subject: {subject}

{body}"""

        # 2020-08-24 mfranklin: Write to disk and cat, because some emails are just too big
        fd, path = tempfile.mkstemp()
        try:
            with os.fdopen(fd, "w") as tmp:
                # do stuff with temp file
                tmp.write(email_template)

            command = f"cat '{path}' | {mail_program}"
            Logger.log("Sending email with command: " +
                       str(command.replace("\n", "\\n")))
            try:
                subprocess.call(command, shell=True)
                Logger.debug("Sent email successfully")
            except Exception as e:
                Logger.critical(
                    f"Couldn't send email '{subject}' to {emails}: {e}")
        finally:
            os.remove(path)
예제 #19
0
    def show_status_screen(self, **kwargs):
        """
        This function just polls the database for metadata every so often,
        and simply displays it. It will keep doing that until the task
        moves into a TERMINAL status.

        It's presumed that there's a janis-monitor that's watching the engine.

        The kwargs argument is for passing through formatting/refresh options
        through from the command-line.
        """

        if self.database.progressDB.has(ProgressKeys.workflowMovedToFinalState):
            meta = self.database.get_metadata()
            formatted = meta.format(**kwargs)
            print(formatted)
            return Logger.debug(f"Workflow '{self.wid}' has already finished, skipping")

        bl = None

        # try:
        #     import blessed
        #
        #     bl = blessed
        #
        # except Exception as e:
        #     txt = (
        #         "Couldn't load 'blessed' for screen display, defaulting back to clear(): "
        #         + str(e)
        #     )
        #     Logger.warn(txt)

        if bl is not None:
            self.poll_stored_metadata_with_blessed(bl)
        else:
            self.poll_stored_metadata_with_clear(**kwargs)
예제 #20
0
    def prepare_and_output_workflow_to_evaluate_if_required(
        self,
        tool: Tool,
        translator: TranslatorBase,
        validation: Optional[ValidationRequirements],
        batchrun: Optional[BatchRunRequirements],
        hints: Dict[str, str],
        additional_inputs: dict,
        max_cores=None,
        max_memory=None,
        allow_empty_container=False,
        container_override: dict = None,
        check_files=True,
    ) -> Tool:
        if self.database.progressDB.has(ProgressKeys.saveWorkflow):
            return Logger.info(f"Saved workflow from task '{self.wid}', skipping.")

        Logger.debug(f"Saving workflow with id '{tool.id()}' to {translator.name}")

        outdir_workflow = self.get_path_for_component(self.WorkflowManagerPath.workflow)
        translator.translate(
            tool,
            to_console=False,
            to_disk=True,
            hints=hints,
            # This is just the base tool, we're going to potentially transform the inputs
            # and we only really care about the inputs for the workflow we're going to run.
            # We'll store the original workflow to run for provenance, but not to easily rerun
            write_inputs_file=False,
            export_path=os.path.join(outdir_workflow, "original"),
            allow_empty_container=allow_empty_container,
            container_override=container_override,
        )

        Logger.info(f"Saved workflow with id '{tool.id()}' to '{outdir_workflow}'")

        modifiers = []
        if validation:
            modifiers.append(ValidatorPipelineModifier(validation))

        if batchrun:
            modifiers.append(BatchPipelineModifier(batchrun))

        modifiers.append(InputFileQualifierModifier)
        # THIS ONE SHOULD BE LAST

        modifiers.append(InputChecker(check_file_existence=check_files))

        tool_to_evaluate, additional_inputs = PipelineModifierBase.apply_many(
            modifiers, tool, additional_inputs, hints=hints
        )

        translator.translate(
            tool_to_evaluate,
            to_console=False,
            to_disk=True,
            with_resource_overrides=True,
            merge_resources=True,
            hints=hints,
            write_inputs_file=True,
            export_path=outdir_workflow,
            additional_inputs=additional_inputs,
            max_cores=max_cores,
            max_mem=max_memory,
            allow_empty_container=allow_empty_container,
            container_override=container_override,
        )

        self.evaluate_output_params(
            wf=tool_to_evaluate, additional_inputs=additional_inputs
        )

        self.database.progressDB.set(ProgressKeys.saveWorkflow)
        return tool_to_evaluate
    def do_bed_fasta_contig_check(tool: Tool, inputs: Dict[str, any]):
        from janis_bioinformatics.data_types import Fasta, Bed, BedTabix

        supported_bed_types = (Bed, BedTabix)

        beds_inputs = []
        refs = []

        for i in tool.tool_inputs():
            if isinstance(i.intype, supported_bed_types) or (
                    isinstance(i.intype, Array)
                    and isinstance(i.intype.subtype(), supported_bed_types)):
                beds_inputs.append(i)

            if (isinstance(i.intype, Fasta) and i.intype.secondary_files()
                    and ".fai" in i.intype.secondary_files()):
                refs.append(i)

        if len(refs) == 0:
            return
        if len(refs) > 1:
            Logger.info(
                "Skipping bioinformatics FASTA-BED file checks as there were more than 1 reference"
            )

        for inp_ref in refs:
            value_ref = inputs[inp_ref.id()]
            if not value_ref:
                Logger.warn(
                    f"Skipping '{inp_ref.id()}' as no value was provided")
                continue

            ref_contigs = ContigChecker.get_list_of_contigs_from_fastafai(
                value_ref + ".fai")

            if not ref_contigs:
                Logger.debug(
                    f"Didn't get any contigs from ref {value_ref}.fai, skipping..."
                )
                continue

            for inp_bed in beds_inputs:
                value_bed = inputs[inp_bed.id()]
                is_array = isinstance(value_bed, list)
                beds = value_bed if is_array else [value_bed]
                for b_idx in range(len(beds)):
                    bed = beds[b_idx]

                    bed_contigs = ContigChecker.get_list_of_contigs_from_bed(
                        bed)

                    missing_contigs = bed_contigs - ref_contigs
                    if missing_contigs:
                        inpname = (f"{inp_bed.id()}.{b_idx}"
                                   if is_array else inp_bed.id())
                        contiglist = (", ".join(missing_contigs)
                                      if len(missing_contigs) < 5 else
                                      (", ".join(list(missing_contigs)[:3]) +
                                       "..."))
                        Logger.warn(
                            f"The BED file '{inpname}' contained {len(missing_contigs)} contigs ({contiglist}) that were missing from the reference: {value_ref}"
                        )
def guess_datatype_by_filename(filename: str):
    """
    We'll try to guess which datatype a file with name 'filename' is.
    Primarily, this will look at the extension, and whether the secondary files exist
    :param filename:
    :return:
    """
    dts = JanisShed.get_all_datatypes()
    fs = FileScheme.get_type_by_prefix(filename)()
    if not isinstance(fs, LocalFileScheme):
        Logger.warn(
            f"The filescheme detected by Janis for '{filename}' was not LOCAL. This guess datatype process may rely on "
            f"polling the {fs.id()} file system to check if related files exist. This might have some financial cost involved."
        )

    file_exists_map = {}

    # each match has a score
    matches: List[Tuple[int, File]] = []

    for datatype in dts:
        if isclass(datatype):
            if not issubclass(datatype, File):
                continue
            datatype = get_instantiated_type(datatype)
        elif not isinstance(datatype, File):
            continue
        if not datatype.extension:
            continue
        datatype: File = datatype

        extensions = {datatype.extension, *(datatype.alternate_extensions or [])}

        matching_extension = None
        for ext in extensions:
            if filename.endswith(ext):
                matching_extension = ext
                break

        secondaries_match = True

        if datatype.secondary_files():
            for secondary in datatype.secondary_files():
                secondary_filename = apply_secondary_file_format_to_filename(
                    filename, secondary
                )
                if secondary not in file_exists_map:
                    file_exists_map[secondary] = fs.exists(secondary_filename)
                if not file_exists_map[secondary]:
                    secondaries_match = False
                    break
            if secondaries_match is False:
                continue

        # we got here, we're G

        if matching_extension is not None and secondaries_match:
            extension_reward = len(matching_extension) * EXTENSION_REWARD_MULTIPLER
            secondaries_reward = (
                len(datatype.secondary_files() or []) * SECONDARIES_REWARD_MULTIPLER
            )
            score = extension_reward + secondaries_reward

            matches.append((score, datatype))

    if len(matches) == 0:
        return None
    elif len(matches) == 1:
        return matches[0][1]
    else:
        matches = sorted(matches, key=lambda a: a[0], reverse=True)
        matched_dt = matches[0][1]
        ranked = ", ".join(f"{match[1].name()} ({match[0]})" for match in matches[1:])
        Logger.debug(
            f"There were {len(matches)} for matching datatypes. Using {matched_dt.name()} ({matches[0][0]}) "
            f"as it was the best match from: {ranked}"
        )
        return matched_dt
    def __init__(
        self,
        config_dir: str = None,
        db_path: str = None,
        execution_dir=None,
        engine: str = None,
        cromwell: JanisConfigurationCromwell = None,
        template: JanisConfigurationTemplate = None,
        notifications: JanisConfigurationNotifications = None,
        environment: JanisConfigurationEnvironment = None,
        run_in_background: bool = None,
        digest_cache_location: str = None,
        # job information
        inputs: Dict = None,
        output_dir: str = None,
        keep_intermediate_files: bool = None,
        recipes: List[str] = None,
        hints: Dict[str, str] = None,
        allow_empty_container: bool = None,
        container_override: Dict[str, str] = None,
        skip_digest_lookup: bool = None,
        skip_digest_cache: bool = None,
        batchrun: Union[BatchRunRequirements, Dict] = None,
        store_in_central_db: bool = None,
        skip_file_check: bool = None,
        strict_inputs: bool = False,
        validation: ValidationRequirements = None,
        should_watch_if_background: bool = False,
        call_caching_enabled: bool = None,
        container_type: str = None,
        workflow_reference: str = None,
        post_run_script: str = None,
    ):
        """

        :param config_dir: The config_dir specifies where the janis.db, cromwell and workflow cache is.
        :param output_dir: This directory to copy outputs to. By default, the execution occurs in this directory (under <output>/janis) unless overriden with the '--exeution-dir' argument
        :param execution_dir: The directory which Janis meta and execution data is placed. If no execution directory is specified, it uses the path '<outputdir>/janis/'. Note that some templates may override the intermediate computation directory.
        :param engine:
        :param cromwell:
        :param template:
        :param notifications:
        :param run_in_background:
        :param digest_cache_location:
        :param inputs: YAML or JSON inputs file to provide values for the workflow (can specify multiple times)
        :param keep_intermediate_files: Do not remove execution directory on successful complete
        :param recipes:
        :param hints:
        :param allow_empty_container:
        :param container_override:
        :param skip_digest_lookup:
        :param skip_digest_cache:
        :param batchrun:
        :param store_in_central_db:
        :param skip_file_check:
        :param strict_inputs:
        :param validation:
        :param should_watch_if_background:
        :param cromwell_db_type:
        :param post_run_script:
        """
        self.config_dir = config_dir
        self.db_path = db_path

        self.output_dir = output_dir

        self.environment: JanisConfigurationEnvironment = parse_if_dict(
            JanisConfigurationEnvironment,
            environment or {},
            "environment",
            skip_if_empty=False,
        )

        self.engine = EngineType(engine) if engine else None

        requires_cromwell_config = self.engine == EngineType.cromwell
        self.cromwell: JanisConfigurationCromwell = parse_if_dict(
            JanisConfigurationCromwell,
            cromwell or {},
            "cromwell",
            skip_if_empty=not requires_cromwell_config,
        )
        self.template: JanisConfigurationTemplate = parse_if_dict(
            JanisConfigurationTemplate,
            template or {},
            "template",
            skip_if_empty=False)
        self.notifications: JanisConfigurationNotifications = parse_if_dict(
            JanisConfigurationNotifications,
            notifications or {},
            "notifications",
            skip_if_empty=False,
        )
        self._workflow_reference = workflow_reference
        self.inputs = inputs
        self.hints = hints
        self.output_dir = output_dir
        self.execution_dir = execution_dir
        self.keep_intermediate_files = keep_intermediate_files
        self.recipes = recipes
        self.allow_empty_container = allow_empty_container
        self.container_override = container_override
        self.skip_digest_lookup = skip_digest_lookup
        self.skip_digest_cache = skip_digest_cache
        self.batchrun: Optional[BatchRunRequirements] = parse_if_dict(
            BatchRunRequirements, batchrun, "batchrun")
        self.validation: Optional[ValidationRequirements] = parse_if_dict(
            ValidationRequirements, validation, "validation")
        self.store_in_central_db = store_in_central_db
        self.skip_file_check = skip_file_check
        self.strict_inputs = strict_inputs
        self.should_watch_if_background = should_watch_if_background

        self.run_in_background = run_in_background
        self.digest_cache_location = digest_cache_location
        self.call_caching_enabled = call_caching_enabled

        self.post_run_script = post_run_script

        self.container_type = ContainerType(container_type)
        self._container = get_container_by_name(container_type)

        if not self._instance:
            Logger.debug("Setting prepared job")
            PreparedJob._instance = self
        else:
            Logger.debug("Setting prepared job, when already set")
            PreparedJob._instance = self
예제 #24
0
    def check_input_for_correctness(self, inpid: str, dt: DataType, value: any):
        if isinstance(dt, Array):
            if isinstance(value, list):
                return [
                    self.check_input_for_correctness(f"{inpid}[{idx}]", dt.subtype(), v)
                    for idx, v in zip(range(len(value)), value)
                ]

        if not isinstance(dt, File):
            return value

        if not isinstance(value, str):
            Logger.warn(
                f"Expecting string type input '{inpid}' for type File, but received '{type(value)}'. Janis won't transform this value, but you should confirm your inputs."
            )
            return value

        guessed_datatype = guess_datatype_by_filename(value)

        if not guessed_datatype:
            Logger.info(
                f"Couldn't guess datatype for {value}. Returning the value instead."
            )
            return value

        if dt.can_receive_from(guessed_datatype):
            Logger.debug(f"Input '{inpid}' had a compatible type")
            return value

        message_prefix = (
            f"The value for input '{inpid}' did not match the expected type {dt.name()} "
            f"through the extension and / or existence of secondary files"
        )
        if not guessed_datatype:
            Logger.warn(
                message_prefix
                + f"\nand Janis couldn't guess the datatype from the input for {inpid} and value '{value}'."
            )
            return value
        try:
            transformation = JanisShed.get_transformation_graph().find_connection(
                guessed_datatype, dt
            )
            steps = (
                "".join(t.type1.name() + " -> " for t in transformation)
                + transformation[-1].type2.name()
            )
            Logger.warn(
                message_prefix
                + f",\nJanis guessed the actual datatype for '{inpid}' from data '{value}' to be {guessed_datatype.id()}, "
                f"and Janis was able to determine a transformation in {len(transformation)} step(s): {steps}"
            )
            wf = JanisTransformation.convert_transformations_to_workflow(transformation)

            trans = wf.translate("wdl", to_console=False)[0]
            Logger.debug(
                f"Transforming {inpid} ({guessed_datatype.name()} -> {dt.name()}): {trans}"
            )
        except Exception as e:
            Logger.warn(
                message_prefix
                + f",\nbut Janis couldn't find a transformation between the guessed and expected type:"
                f" {guessed_datatype.name()} -> {dt.name()}: {str(e)}"
            )
            return value

        # maybe do some other things with respect to the path

        try:
            return self.try_get_outputs_for(
                inpid=inpid,
                wf=wf,
                inputs={wf.tool_inputs()[0].id(): value},
                output_dir=os.path.join(self.cache_dir, inpid),
                description=f"{guessed_datatype.name()} -> {dt.name()}",
            )

        except Exception as e:
            Logger.critical(
                f"An internal error occurred when performing the transformation for {inpid} "
                f"({guessed_datatype.name()} -> {dt.name()}): {str(e)}"
            )
            Logger.debug(traceback.format_exc())

            return value
 def stop(self):
     Logger.debug("Received STOP request for mySQL container")
     self.container.stop_container()
def get_workflow_from_file(file, name, include_commandtools=False):
    # How to import a module given the full path
    # https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path
    import importlib.util

    try:
        import sys

        basefilename = os.path.basename(file)

        sys.path.append(os.path.dirname(file))
        spec = importlib.util.spec_from_file_location("module.name", file)
        foo = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(foo)
        ptypes = get_janis_from_module_spec(
            foo, include_commandtools=include_commandtools, name=name)

    except Exception as e:
        raise Exception(
            f"Unrecognised python file when getting workflow / command tool: {file} :: {e}"
        )

    # Per https://github.com/PMCC-BioinformaticsCore/janis-core/issues/31, we'll use the following process:
    # 	1. If a `name` is defined:
    # 	    - Force parse every token with a case-insensitive match
    # 	    - If a single item is returned from a case-sensitive match, then use that
    # 	2. If multiple workflows are defined in the same file, use the last defined workflow
    # 	   - This covers the existing _If a single workflow is defined, use that_ case
    # 	3. If no tools were found, raise an Exception
    # 	4. If multiple tools are defined in the file, use the last one:
    # 	   - If a name was defined, `warn` the user that the case-insensitive match returned no results and use the last one
    # 	   - Otherwise, just tell the user we'll use the last defined tool

    ptypes_casesensitive = [(k, v) for (k, v) in ptypes if k == name]

    if len(ptypes_casesensitive) == 1:
        return ptypes_casesensitive[0][1]

    if name is None:
        mains = [v for (k, v) in ptypes if k == "__JANIS_ENTRYPOINT"]
        if len(mains) > 0:
            Logger.debug(
                "Using workflow defined by '__JANIS_ENTRYPOINT' as no name was used"
            )
            return mains[0]

    wftypes = [
        t for t in ptypes
        if (issubclass(t[1], WorkflowBase
                       ) if isclass(t[1]) else isinstance(t[1], WorkflowBase))
    ]
    detected_tokens = ", ".join(f"'{x[0]}' ({x[1].__class__.__name__})"
                                for x in ptypes)

    if len(wftypes) > 0:
        if len(wftypes) > 1:
            if name:
                Logger.warn(
                    f"Providing the `--name` parameter performs a case-insensitive search for the tokens in "
                    f"'{basefilename}, and a case-sensitive search returned no results. You had {len(wftypes)} "
                    f"tokens that matched this search. Janis will use the last one, defined as "
                    f"'{ptypes[-1][0]}' from: {detected_tokens}")
            else:
                Logger.info(
                    f"Multiple workflows were found in '{basefilename}', using '{wftypes[-1][0]}'"
                )
        return wftypes[-1][1]

    if len(ptypes) == 0:
        raise Exception(
            f"There were no valid tools in '{file}', try running with the `--name YourToolName` parameter "
            f"to get more information (it might have abstract / unimplemented methods)."
        )
    if len(ptypes) > 1:

        if name:
            Logger.warn(
                f"Providing the `--name` parameter performs a case-insensitive search for the tokens in "
                f"'{basefilename}, and a case-sensitive search returned no results. You had {len(ptypes)} "
                f"tokens that matched this search. Janis will use the last one, defined as "
                f"'{ptypes[-1][0]}' from: {detected_tokens}")
        else:
            Logger.info(
                f"There were multiple tools (an no workflows) detected in {basefilename}, "
                f"Janis will use '{ptypes[-1][0]}' (the last defined)")

    return ptypes[-1][1]