Beispiel #1
0
def get_janis_from_module_spec(spec, include_commandtools=False):
    """
    Get all the Janis.Workflow's that are defined in the file (__module__ == 'module.name')
    :return: List of all the subclasses of a workflow
    """

    if include_commandtools:
        Logger.log("Expanded search to commandtools in " + str(spec))

    potentials = []
    for k, ptype in spec.__dict__.items():
        if isinstance(ptype, Workflow):
            potentials.append((k, ptype))
            continue
        if not callable(ptype):
            continue
        if isabstract(ptype):
            continue
        if not isclass(ptype):
            continue
        if ptype.__module__ != "module.name":
            continue
        if ptype == Workflow:
            continue
        if issubclass(ptype, Workflow):
            potentials.append((k, ptype()))
        if include_commandtools and issubclass(ptype, CommandTool):
            potentials.append((k, ptype()))

    return potentials
def prepare_tool(
    tool: Tool,
    toolversions: List[str],
    isorphan: bool,
    is_published_pipeline: bool = False,
):
    # Stuff to list on the documentation page:
    #   - Versions of tools
    #   - Generated command
    #   - Cool if it grouped the tools by vendor
    #   -

    if not tool:
        return None
    try:
        if is_published_pipeline:
            return ""
        if tool.type() == ToolType.CommandTool:
            return prepare_commandtool_page(tool, toolversions)
        elif tool.type() == ToolType.Workflow:
            return prepare_workflow_page(tool, toolversions)
        elif tool.type() == ToolType.CodeTool:
            return prepare_code_tool_page(tool, toolversions)
    except Exception as e:
        traceback.print_exc()
        Logger.critical("Couldn't generate documentation for " + tool.id() +
                        " " + str(e))
    def query_tasks(self, status, name) -> Dict[str, RunModel]:

        rows: List[TaskRow] = self.get_lazy_db_connection().get_all_tasks()

        failed = []
        relevant = {}

        for row in rows:
            if not os.path.exists(row.execution_dir):
                failed.append(row.submission_id)
                continue
            try:
                metadb = WorkflowManager.has(
                    row.execution_dir,
                    submission_id=row.submission_id,
                    name=name,
                    status=status,
                )
                if metadb:
                    model = metadb
                    model.outdir = row.output_dir
                    relevant[row.submission_id] = model
            except Exception as e:
                Logger.critical(f"Couldn't check workflow '{row.submission_id}': {e}")
                failed.append(row.submission_id)

        if failed:
            failedstr = ", ".join(failed)
            Logger.warn(
                f"Couldn't get information for tasks: {failedstr}, run"
                f"'janis cleanup' to clean up your tasks."
            )

        return relevant
def pickle_obj(obj):
    if obj is None:
        return None
    try:
        return pickle.dumps(obj, protocol=2)
    except Exception as ex:
        Logger.warn(f"Couldn't pickle {repr(obj)} as encountered {repr(ex)}")
        return None
def unpickle_obj(obj):
    if obj is None:
        return None
    try:
        return pickle.loads(obj)
    except Exception as ex:
        Logger.warn(f"Couldn't unpickle {repr(obj)} as encountered {repr(ex)}")
        return None
 def __init__(self, host, repository, image, tag, chash: str):
     self.host = host
     self.repository = repository
     self.image = image
     self.tag = tag
     self.chash = chash
     if image is None:
         Logger.warn(f"{str(self)} didn't have an image, so setting to None")
         self.image = "ubuntu"
 def __init__(self, dblocation, tablename, readonly=False):
     sqlitedict.logger.disabled = True
     ro = "r" if readonly else "c"
     Logger.debug(
         f"Opening connection to {dblocation}/{tablename} with mode {ro}")
     self.kvdb = sqlitedict.SqliteDict(dblocation,
                                       tablename=tablename,
                                       autocommit=True,
                                       flag=ro)
 def get_token(self, info: ContainerInfo) -> Optional[str]:
     req = self.build_token_request(info)
     if req is None:
         return None
     Logger.log(f"Requesting auth token for {info}: " + req.full_url)
     response = request.urlopen(req)
     data = response.read()
     res = json.loads(
         data.decode(response.info().get_content_charset("utf-8")))
     return res.get("token")
Beispiel #9
0
 def populate_cache(self):
     self._id_cache = set()
     idkeys = set(self.get_id_keys())
     idkeys_ordered = list(idkeys)
     prows = f"SELECT {', '.join(idkeys_ordered)} FROM {self._tablename}"
     with self.with_cursor() as cursor:
         Logger.log("Running query: " + str(prows))
         rows = cursor.execute(prows).fetchall()
         for row in rows:
             self._id_cache.add(row)
Beispiel #10
0
    def exec_command(self, command):
        cmd = ["singularity", "run", "instance://" + self.instancename]
        cmd.extend(command) if isinstance(command, list) else cmd.append(command)

        try:
            val = subprocess.check_output(cmd)
            return val.strip().decode() if val else val, 0
        except subprocess.CalledProcessError as e:
            Logger.critical("Singularity exec_command failed")
            return str(e), e.returncode
    def __setattr__(self, name, value):

        if name in self.attributes_to_persist:
            try:
                self.kvdb[name] = value
            except Exception as e:
                Logger.critical(f"Failed to write {name}={value} due to: {e}")
            return

        super().__setattr__(name, value)
def deserialize_inner(val):
    if val is None:
        return None
    try:
        return json.loads(val)
    except Exception as ex:
        # tbh, sometimes the mysql database converts '"myvalue"' -> 'myvalue' (dropping the quotes), we'll do
        Logger.debug(
            f"Couldn't deserialize value, using string representation instead (value: {repr(val)}): {repr(ex)}"
        )
        return str(val)
Beispiel #13
0
 def stop_container(self):
     try:
         Logger.info(f"Stopping mysql container '{self.instancename}'")
         if self.run_logger:
             self.run_logger.terminate()
         cmd = ["singularity", "instance", "stop", self.instancename]
         return subprocess.check_output(cmd)
     except subprocess.CalledProcessError as e:
         Logger.critical(
             f"Couldn't stop singularity instance '{self.instancename}': {e}"
         )
 def mark_paused(self):
     try:
         self.database.workflowmetadata.please_pause = True
         Logger.info(
             "Marked workflow as paused, this may take some time properly pause"
         )
         self.database.workflowmetadata.commit()
         return True
     except Exception as e:
         Logger.critical("Couldn't mark paused: " + str(e))
         return False
    def set_status(self, status: TaskStatus, force_notification=False):
        prev = self.database.workflowmetadata.status

        if prev == status and not force_notification:
            return

        Logger.info("Status changed to: " + str(status))
        self.database.workflowmetadata.status = status
        self.database.commit()
        # send an email here

        NotificationManager.notify_status_change(status, self.database.get_metadata())
Beispiel #16
0
def from_template(name, options) -> EnvironmentTemplate:
    template = get_template(name)
    if not template:
        raise Exception(
            f"Couldn't find Configuration template with name: '{name}'")

    Logger.log(f"Got template '{template.__name__}' from id = {name}")

    validate_template_params(template, options)
    newoptions = {**options}
    # newoptions.pop("template")

    return template(**newoptions)
Beispiel #17
0
def get_tag_and_cleanup_prefix(
    prefix, ) -> Optional[Tuple[str, str, bool, Optional[DataType]]]:
    """
    :param prefix:
    :return: (raw_element, potentialID, hasSeparator, potentialType)
    """
    # cases:
    # -a ADAPTER
    # --adapter=ADAPTER
    # --quality-cutoff=[5'CUTOFF,]3'CUTOFF
    el = prefix.lstrip()
    has_equals = False
    pretag = None
    potential_type = None

    # if prefix is split by ':' or split by
    if ":" in el or "=" in el:
        parts = None
        if ":" in el:
            parts = el.split(":")
        elif "=" in el:
            parts = el.split("=")
            has_equals = True

        if len(parts) > 2:
            Logger.warn(
                f"Unexpected number of components in the tag '{el}' to guess the type, using '{parts[0]}' and skipping type inference"
            )
        else:
            el, pt = parts[0], guess_type(parts[1])

            if not potential_type and pt:
                potential_type = pt

    if " " in el:
        el = el.split(" ")[0]

    titleComponents = [l.strip().lower() for l in el.split("-") if l]
    if len(titleComponents) == 0:
        Logger.critical(
            f"Title components for tag '{prefix}' does not have a component")
        return None
    tag = "_".join(titleComponents)

    if tag.lower() in common_replacements:
        tag = common_replacements[tag.lower()]

    if tag.lower() == "outputfilename":
        potential_type = Filename

    return el, tag, has_equals, potential_type
    def get_list_of_contigs_from_bed(bedfile: str) -> Set[str]:
        try:
            contigs = set()
            with open_potentially_compressed_file(bedfile) as fp:
                for l in fp:
                    contig: str = l.split("\t")[0]
                    if contig:
                        contigs.add(contig.strip())
            return contigs

        except Exception as e:
            Logger.critical(
                f"Couldn't get contigs from bedfile {bedfile}: {str(e)}")
            return set()
    def get_list_of_contigs_from_fastafai(fai_idx: str) -> Set[str]:
        # Structure contig, size, location, basesPerLine and bytesPerLine
        try:
            contigs = set()
            with open_potentially_compressed_file(fai_idx) as f:
                for l in f:
                    contigs.add(l.split("\t")[0])

            return contigs

        except Exception as e:
            Logger.critical(
                f"Couldn't get contigs from reference {fai_idx}: {str(e)}")
            return set()
    def get_digest(self, info: ContainerInfo) -> Optional[str]:
        try:
            token = self.get_token(info)
        except Exception as e:
            Logger.critical(
                f"Couldn't get digest for container (couldn't get token): '{str(info)}': {str(e)}"
            )
            return None
        if token:
            Logger.debug(
                f"Got token for '{info}': {token[: min(5, len(token) - 1)]}..."
            )

        try:
            req = self.build_request(info, token)
            Logger.debug(f"Requesting digest from: {req.full_url}")
            with request.urlopen(req) as response:
                rheaders = response.headers
                digest = rheaders.get("etag",
                                      rheaders.get("Docker-Content-Digest"))

            if digest is not None:
                digest = digest.replace("'", "").replace('"', "")

            return digest

        except Exception as e:
            Logger.critical(
                f"Couldn't get digest for container '{str(info)}': {str(e)}")
    def remove_exec_dir(self):
        status = self.database.workflowmetadata.status

        keep_intermediate = self.database.workflowmetadata.keepexecutiondir
        if (
            not keep_intermediate
            and status is not None
            and status == TaskStatus.COMPLETED
        ):
            execdir = self.get_path_for_component(self.WorkflowManagerPath.execution)
            if execdir and execdir != "None":
                Logger.info("Cleaning up execution directory")
                self.environment.filescheme.rm_dir(execdir)
                self.database.progressDB.set(ProgressKeys.cleanedUp)
Beispiel #22
0
def guess_type(potential_type: str):
    if not potential_type:
        return None
    l = potential_type.lower()
    hopeful_type = JanisShed.get_datatype(l)

    if not hopeful_type:
        if "st" in potential_type:
            hopeful_type = String

    if hopeful_type:
        Logger.info(
            f"Found type {hopeful_type.__name__} from tag: {potential_type}")

    return hopeful_type
Beispiel #23
0
def load_templates_if_required():
    import importlib_metadata

    global additional_templates
    if additional_templates is None:
        additional_templates = {}
        eps = importlib_metadata.entry_points().get(TEMPLATE_EP, [])
        for entrypoint in eps:  # pkg_resources.iter_entry_points(group=TEMPLATE_EP):
            try:
                additional_templates[entrypoint.name] = entrypoint.load()
            except ImportError as e:
                Logger.critical(
                    f"Couldn't import janis template '{entrypoint.name}': {e}"
                )
                continue
 def get_by_id(self,
               submission_id,
               allow_operational_errors=True) -> Optional[SubmissionModel]:
     s = self.get(
         where=("id = ?", [submission_id]),
         allow_operational_errors=allow_operational_errors,
     )
     if s is None:
         return None
     if len(s) != 1:
         Logger.warn(
             f"Couldn't get submission with id={submission_id}, query returned {len(s)} results."
         )
         return None
     return s[0]
Beispiel #25
0
def run_test_case(
    tool_id: str,
    test_case: str,
    engine: EngineType,
    output: Optional[Dict] = None,
    config: str = None,
) -> Dict[str, Any]:
    tool = test_helpers.get_one_tool(tool_id)

    if not tool:
        raise Exception(f"Tool {tool_id} not found")

    runner = ToolTestSuiteRunner(tool, config=config)
    tests_to_run = [
        tc for tc in tool.tests() if tc.name.lower() == test_case.lower()
    ]

    if not tests_to_run:
        raise Exception(f"Test case {test_case} not found")

    if len(tests_to_run) > 1:
        raise Exception(
            f"There is more than one test case with the same name {test_case}")

    if output is not None:
        Logger.info(
            "Dryrun: validating test using provided output data without running the workflow"
        )

    failed = set()
    succeeded = set()
    execution_error = ""

    try:
        failed, succeeded, output = runner.run_one_test_case(t=tests_to_run[0],
                                                             engine=engine,
                                                             output=output)
    except Exception as e:
        execution_error = str(e)
    except SystemExit as e:
        execution_error = f"Workflow execution failed (exit code: {e.code})"

    return {
        "failed": list(failed),
        "succeeded": list(succeeded),
        "output": output,
        "execution_error": execution_error,
    }
Beispiel #26
0
def send_slack_notification(result: Dict, option: NotificationOption):
    Logger.info("sending notification to Slack")

    if len(result["failed"]) == 0 and not result["execution_error"]:
        failed = False
        status = "Test Succeeded"
        icon = ":white_check_mark:"
    else:
        failed = True
        status = "Test Failed"
        icon = ":x:"

    test_description = ""
    if option.test_id:
        test_description = f" *{option.test_id}*"

    summary_block = {
        "type": "section",
        "text": {
            "type": "mrkdwn",
            "text": f"{icon} {status}{test_description}: {option.tool_name} - {option.test_case}",
        },
    }

    blocks = [summary_block]

    if failed and result["failed"]:
        failed_expected_output = []

        for f in result["failed"]:
            failed_expected_output.append(f":black_small_square: {f}")

        failed_block = {
            "type": "section",
            "text": {"type": "mrkdwn", "text": "\n".join(failed_expected_output)},
        }

        blocks.append(failed_block)

    if result["execution_error"]:
        text = result["execution_error"].replace("\n", "<br />")
        execution_error_block = {
            "type": "section",
            "text": {"type": "mrkdwn", "text": f"{result['execution_error']}"},
        }

        blocks.append(execution_error_block)

    request = {"blocks": blocks}
    resp = requests.post(url=option.url, json=request)

    if resp.status_code == requests.codes.ok:
        Logger.info("Notification sent")
    else:
        Logger.warn("Failed to send slack notification")
        Logger.warn(f"{resp.status_code}: {resp.text}")

    return resp.status_code, resp.text
Beispiel #27
0
    def get(
        self,
        keys: Union[str, List[str]] = "*",
        where: Tuple[str, List[any]] = None,
        allow_operational_errors=True,
    ) -> Optional[List[T]]:
        jkeys = ", ".join(keys) if isinstance(keys, list) else keys
        if jkeys == "*":
            keys = [t.dbalias for t in self._base.keymap()]
            jkeys = ", ".join(keys) if isinstance(keys, list) else keys

        values = []
        whereclauses = []
        if self._scopes:
            scopes = self._scopes.items()
            whereclauses.extend(f"{k} = ?" for k, _ in scopes)
            values.extend(v for _, v in scopes)

        if where:
            whereclauses.append(where[0])
            values.extend(where[1])

        query = f"SELECT {jkeys} FROM {self._tablename}"

        if whereclauses:
            query += f" WHERE {' AND '.join(whereclauses)}"

        with self.with_cursor() as cursor:
            try:
                rows = cursor.execute(query, values).fetchall()
            except OperationalError as e:
                if not allow_operational_errors:
                    raise e
                if "readonly database" in str(e):
                    # mfranklin: idk, this sometimes happens. We're doing a select query, idk sqlite3 driver...
                    Logger.debug(
                        f"Got readonly error when running query: '{query}', skipping for now"
                    )
                    return None
                elif "locked" in str(e):
                    Logger.debug(
                        f"We hit the janis database.{self._tablename} at the same time the janis process wrote to it, we'll skip for now "
                    )
                    return None
                raise

        parsedrows = [self._base.deserialize(keys, r) for r in rows]
        return parsedrows
Beispiel #28
0
    def start(self):
        """
        """

        self.prepare_mysql_dirs()

        self.container.ensure_downloaded()

        # before we start, we want to create a Database for Cromwell, we can do this by
        # binding a directory of scripts to /docker-entrypoint-initdb.d (runs *.sh, *.sql, *.sql.gz)
        # Source: https://hub.docker.com/_/mysql#initializing-a-fresh-instance

        self.container.bindpoints = {
            "/var/lib/mysql": self.datadirectory,
            "/var/run/mysqld": self.mysqldoverride,
            "/etc/mysql/": self.sqlconfdir,
            "/docker-entrypoint-initdb.d": self.startupscriptsdir,
        }

        if isinstance(self.container, Docker):
            self.container.exposedports = {self.forwardedport: None}

        self.container.environment_variables["MYSQL_ALLOW_EMPTY_PASSWORD"] = 1
        self.container.environment_variables["MYSQL_INITDB_SKIP_TZINFO"] = 1

        self.container.start_container()
        # Have to wait for it to initialise
        sleep(10)
        cmd = [
            "mysqladmin",
            "ping",
            "-h",
            f"127.0.0.1",
            "-P",
            str(self.forwardedport),
            "-u",
            "root",
            "--wait=60",
        ]
        while True:
            (response, rc) = self.container.exec_command(cmd)
            if response == "mysqld is alive":
                return
            if response:
                Logger.critical("MySQL error: " + str(response))
                raise Exception(response)
            else:
                Logger.critical(rc)
def do_fromwdl(args):
    from janis_core import WdlParser, Logger

    Logger.info(f"Loading WDL file: {args.wdlfile}")
    tool = WdlParser.from_doc(args.wdlfile)

    Logger.info(f"Loaded {tool.type()}: {tool.versioned_id()}")

    translated = tool.translate(
        args.translation,
        to_console=args.output is None,
        to_disk=args.output is not None,
        export_path=args.output,
    )

    return translated
    def save_metadata_if_required(self):
        if self.database.progressDB.has(ProgressKeys.savedMetadata):
            return Logger.debug(f"Workflow '{self.wid}' has saved metadata, skipping")

        engine = self.get_engine()

        metadir = self.get_path_for_component(self.WorkflowManagerPath.metadata)
        if isinstance(engine, Cromwell):
            import json

            meta = engine.raw_metadata(self.get_engine_wid()).meta
            with open(os.path.join(metadir, "metadata.json"), "w+") as fp:
                json.dump(meta, fp)

        elif isinstance(engine, CWLTool):
            import json

            meta = engine.metadata(self.wid)
            self.set_status(meta.status)
            with open(os.path.join(metadir, "metadata.json"), "w+") as fp:
                json.dump(meta.outputs, fp)

        else:
            raise Exception(
                f"Don't know how to save metadata for engine '{engine.id()}'"
            )

        self.database.progressDB.set(ProgressKeys.savedMetadata)