def start_mysql_and_prepare_cromwell_config(self):
        scriptsdir = self.get_path_for_component(self.WorkflowManagerPath.mysql)

        containerdir = self.get_path_for_component(self.WorkflowManagerPath.database)
        conf = JanisConfiguration.manager()
        if (
            conf
            and conf.template
            and isinstance(conf.template.template, SingularityEnvironmentTemplate)
        ):
            containerdir = conf.template.template.singularity_container_dir

        self.dbcontainer = MySql(
            wid=self.wid,
            container=JanisConfiguration.manager().container,
            datadirectory=self.get_path_for_component(
                self.WorkflowManagerPath.database
            ),
            confdir=scriptsdir,
            forwardedport=find_free_port(),
            containerdir=containerdir,
        )
        self.dbcontainer.start()

        port = self.dbcontainer.forwardedport
        return CromwellConfiguration.Database.mysql(
            username="******", url=f"127.0.0.1:{port}"
        )
Example #2
0
def generate_inputs(
    tool: Union[str, j.CommandTool, j.Workflow],
    all=False,
    name=None,
    force=False,
    additional_inputs=None,
    with_resources=False,
    quality_type: List[InputQualityType] = None,
    recipes: List[str] = None,
    hints: dict = None,
):
    toolref = resolve_tool(tool, name, from_toolshed=True, force=force)
    inputsdict = None
    if additional_inputs:
        inputsfile = get_file_from_searchname(additional_inputs, ".")
        inputsdict = parse_dict(inputsfile)

    values_to_ignore = set()
    if recipes:
        jc = JanisConfiguration.manager()
        for k in jc.recipes.get_recipe_for_keys(recipes):
            values_to_ignore.add(k)

    if not toolref:
        raise Exception("Couldn't find workflow with name: " + str(tool))

    return toolref.generate_inputs_override(
        additional_inputs=inputsdict,
        with_resource_overrides=with_resources,
        include_defaults=all,
        values_to_ignore=values_to_ignore,
        quality_type=quality_type,
        hints=hints,
    )
Example #3
0
    def find_or_generate_config(self, identifier,
                                config: CromwellConfiguration, config_path):
        from janis_assistant.management.configuration import JanisConfiguration

        jc = JanisConfiguration.manager()

        if config:
            self.config = config

        elif config_path:
            shutil.copyfile(config_path, self.config_path)

        elif jc.cromwell.configpath:
            shutil.copyfile(jc.cromwell.configpath, self.config_path)

        else:
            self.config: CromwellConfiguration = jc.template.template.engine_config(
                EngineType.cromwell, jc) or CromwellConfiguration()
            if not self.config.system:
                self.config.system = CromwellConfiguration.System()
            self.config.system.cromwell_id = identifier
            self.config.system.cromwell_id_random_suffix = False
            self.config.system.job_shell = "/bin/sh"

        if self.config:

            if self.config.backend:
                if len(self.config.backend.providers) == 1:
                    cnf: CromwellConfiguration.Backend.Provider = first_value(
                        self.config.backend.providers)
                    if not cnf.config.root:
                        cnf.config.root = self.execution_dir
            else:
                self.config.backend = CromwellConfiguration.Backend.with_new_local_exec_dir(
                    self.execution_dir)
    def send_email(subject: str, body: str):

        nots = JanisConfiguration.manager().notifications

        mail_program = nots.mail_program

        if not mail_program:
            return Logger.log("Skipping email send as no mail program is configured")

        if not nots.email or nots.email.lower() == "none":
            Logger.log("Skipping notify status change as no email")
            return

        emails: List[str] = nots.email if isinstance(
            nots.email, list
        ) else nots.email.split(",")

        email_template = f"""\
Content-Type: text/html
To: {"; ".join(emails)}
From: [email protected]
Subject: {subject}

{body}"""

        command = f"echo '{email_template}' | {mail_program}"
        Logger.log("Sending email with command: " + str(command.replace("\n", "\\n")))
        try:
            subprocess.call(command, shell=True)
        except Exception as e:
            Logger.critical(f"Couldn't send email '{subject}' to {emails}: {e}")
Example #5
0
def resolve_tool(
    tool: Union[str, j.CommandTool, Type[j.CommandTool], j.Workflow,
                Type[j.Workflow]],
    name=None,
    from_toolshed=False,
    force=False,
    only_toolbox=False,
):
    if isinstance(tool, j.Tool):
        return tool
    elif isclass(tool) and issubclass(tool, (j.Workflow, j.Tool)):
        return tool()

    if not isinstance(tool, str):
        raise TypeError(
            f"Janis is not sure how to resolve a workflow of type: '{type(tool)}'"
        )

    if not only_toolbox:
        fileschemewherelocated = FileScheme.get_type_by_prefix(tool.lower())
        if fileschemewherelocated:
            Logger.info(
                f"Detected remote workflow to localise from '{fileschemewherelocated.__name__}'"
            )
            # Get some unique name for the workflow
            import hashlib

            fn = hashlib.md5(tool.lower().encode()).hexdigest() + ".py"
            outdir = os.path.join(JanisConfiguration.manager().configdir,
                                  "cached")
            os.makedirs(outdir, exist_ok=True)
            dest = os.path.join(outdir, fn)
            Logger.log(f"Localising '{tool}' to '{dest}'")

            fileschemewherelocated("internal").cp_from(
                source=tool.lower(),
                dest=dest,
                report_progress=lambda progress: print(
                    f"Download progress: {progress}"),
                force=force,
            )
            tool = dest

        wf = get_janis_workflow_from_searchname(tool,
                                                ".",
                                                name=name,
                                                include_commandtools=True)

        if wf:
            return wf

    if from_toolshed:
        v = None
        if ":" in tool:
            ps = tool.split(":")
            workflow, v = ps[0], ps[1]

        wf = j.JanisShed.get_tool(tool, v)

        return wf
def do_translate(args):
    jc = JanisConfiguration.initial_configuration(args.config)

    container_override = parse_container_override_format(
        args.container_override)

    hints = {
        k[5:]: v
        for k, v in vars(args).items()
        if k.startswith("hint_") and v is not None
    }

    inputs = args.inputs or []
    # the args.extra_inputs parameter are inputs that we MUST match
    # we'll need to parse them manually and then pass them to fromjanis as requiring a match
    # required_inputs = parse_additional_arguments(args.extra_inputs)

    translate(
        config=jc,
        tool=args.workflow,
        translation=args.translation,
        name=args.name,
        output_dir=args.output_dir,
        force=args.no_cache,
        allow_empty_container=args.allow_empty_container,
        container_override=container_override,
        skip_digest_lookup=args.skip_digest_lookup,
        skip_digest_cache=args.skip_digest_cache,
        inputs=inputs,
        recipes=args.recipe,
        hints=hints,
    )
Example #7
0
def do_inputs(args):

    if args.config or args.recipes:
        JanisConfiguration.initial_configuration(args.config)

    quality_type = None

    if args.user:
        quality_type = [InputQualityType.user]
    elif args.static:
        quality_type = [
            InputQualityType.static, InputQualityType.configuration
        ]

    hints = {
        k[5:]: v
        for k, v in vars(args).items()
        if k.startswith("hint_") and v is not None
    }

    outd = generate_inputs(
        args.workflow,
        all=args.all,
        name=args.name,
        force=args.no_cache,
        additional_inputs=args.inputs,
        with_resources=args.resources,
        quality_type=quality_type,
        recipes=args.recipes,
        hints=hints,
    )

    if args.json:
        outs = json.dumps(outd,
                          sort_keys=True,
                          indent=4,
                          separators=(",", ": "))
    else:
        outs = ruamel.yaml.dump(outd, default_flow_style=False)

    if args.output:
        with open(args.output, "w+") as out:
            out.write(str(outs))
    else:
        print(outs, file=sys.stdout)
Example #8
0
    def find_or_generate_config(self, config: CWLToolConfiguration):
        from janis_assistant.management.configuration import JanisConfiguration

        jc = JanisConfiguration.manager()

        if config:
            self.config = config
        else:
            self.config = (jc.template.template.engine_config(
                EngineType.cwltool, jc) or CWLToolConfiguration())
    def from_path_with_wid(path, wid, readonly=False):
        """
        :param wid: Workflow ID
        :param path: Path to workflow
        :return: TaskManager after resuming (might include a wait)
        """
        # get everything and pass to constructor
        # database path

        path = WorkflowManager.get_task_path_for(path)
        if not os.path.exists(path):
            raise FileNotFoundError(f"Couldn't find path '{path}'")

        db = WorkflowDbManager.get_workflow_metadatadb(path, wid, readonly=readonly)

        if not wid:
            wid = db.wid  # .get_meta_info(InfoKeys.taskId)

        if not wid:
            raise Exception(f"Couldn't find workflow with id '{wid}'")

        envid = db.environment  # .get_meta_info(InfoKeys.environment)
        eng = db.engine
        fs = db.filescheme
        env = Environment(envid, eng, fs)

        try:
            JanisConfiguration._managed = db.configuration
        except Exception as e:
            Logger.critical(
                "The JanisConfiguration could not be loaded from the DB, this might be due to an older version, we'll load your current config instead. Error: "
                + str(e)
            )
            JanisConfiguration.initial_configuration(None)

        db.close()

        tm = WorkflowManager(outdir=path, wid=wid, environment=env, readonly=readonly)
        return tm
Example #10
0
    def db_connection(self):
        config = JanisConfiguration.manager()
        try:
            if self.readonly:
                Logger.debug(
                    "Opening database connection to in READONLY mode: " + config.dbpath
                )
                return sqlite3.connect(f"file:{config.dbpath}?mode=ro", uri=True)

            Logger.debug("Opening database connection: " + config.dbpath)
            return sqlite3.connect(config.dbpath)
        except:
            Logger.critical("Error when opening DB connection to: " + config.dbpath)
            raise
Example #11
0
    def start_from_paths(self, wid, source_path: str, input_path: str,
                         deps_path: str):

        from janis_assistant.management.configuration import JanisConfiguration

        jc = JanisConfiguration.manager()

        self.taskmeta = {
            "start": DateUtil.now(),
            "status": TaskStatus.PROCESSING,
            "jobs": {},
        }
        config: CWLToolConfiguration = self.config

        if Logger.CONSOLE_LEVEL == LogLevel.VERBOSE:
            config.debug = True

        config.disable_color = True

        # more options
        if not config.tmpdir_prefix:
            config.outdir = self.execution_dir + "/"
            config.tmpdir_prefix = self.execution_dir + "/"
            config.leave_tmpdir = True

        if jc.call_caching_enabled:
            config.cachedir = os.path.join(self.execution_dir, "cached/")

        cmd = config.build_command_line(source_path, input_path)

        Logger.debug("Running command: '" + " ".join(cmd) + "'")

        process = subprocess.Popen(cmd,
                                   stdout=subprocess.PIPE,
                                   preexec_fn=os.setsid,
                                   stderr=subprocess.PIPE)
        self.taskmeta["status"] = TaskStatus.RUNNING
        Logger.info("CWLTool has started with pid=" + str(process.pid))
        self.process_id = process.pid

        self._logger = CWLToolLogger(
            wid,
            process,
            logfp=open(self.logfile, "a+"),
            metadata_callback=self.task_did_update,
            exit_function=self.task_did_exit,
        )

        return wid
Example #12
0
    def get_config():
        """
        This is here to lazily instantiate the config
        """
        nonlocal cached_outd
        if not cached_outd:

            outd = JanisConfiguration.default()

            if templatename:
                tmpl = janistemplates.get_template(templatename)
                schema = janistemplates.get_schema_for_template(tmpl)

                mapped_schema_to_default = {
                    s.identifier: s.default
                    for s in schema if s.default is not None
                }

                # parse extra params
                description = dedent(tmpl.__doc__) if tmpl.__doc__ else None

                parser = InitArgParser(templatename,
                                       schema,
                                       description=description)
                parsed = parser.parse_args(unparsed_init_args)

                try:
                    # "easier to ask for forgiveness than permission" https://stackoverflow.com/a/610923
                    keys_to_skip = set(tmpl.ignore_init_keys)
                except AttributeError:
                    Logger.log(
                        f"Template '{templatename}' didn't have 'ignore_init_keys'"
                    )
                    keys_to_skip = set()

                outd[JanisConfiguration.Keys.Engine] = EngineType.cromwell
                outd[JanisConfiguration.Keys.Template] = {
                    s.id(): parsed.get(s.id(),
                                       mapped_schema_to_default.get(s.id()))
                    for s in schema if (s.identifier in parsed) or (
                        s.identifier in mapped_schema_to_default
                        and s.identifier not in keys_to_skip)
                }
                outd[JanisConfiguration.Keys.Template][
                    JanisConfiguration.JanisConfigurationTemplate.Keys.
                    Id] = templatename

            cached_outd = stringify_dict_keys_or_return_value(outd)
        return cached_outd
Example #13
0
    def __init__(self, readonly=False):

        # Before the manager() is called, someone (the CLI definitely) MUST call
        # JanisConfiguration.inital_configuration(potential_config_paths), this
        # will search os.env for potential configs
        config = JanisConfiguration.manager()
        self.readonly = readonly
        self.is_new = not os.path.exists(config.dbpath)

        cp = os.path.dirname(config.dbpath)
        os.makedirs(cp, exist_ok=True)
        if config.outputdir:
            os.makedirs(config.outputdir, exist_ok=True)

        self._connection: Optional[sqlite3.Connection] = None
        self._taskDB: Optional[TasksDbProvider] = None
Example #14
0
def get_engine_from_eng(eng, wid, logfile, confdir, execdir: str, **kwargs):

    if eng == "cromwell":
        url = kwargs.get(
            "cromwell_url") or JanisConfiguration.manager().cromwell.url
        if url:
            Logger.info("Found cromwell_url: " + url)
        return Cromwell(
            identifier=f"cromwell-{wid}",
            logfile=logfile,
            confdir=confdir,
            host=url,
            cromwelljar=kwargs.get("cromwell_jar"),
            execution_dir=execdir,
        )

    return get_engine_type(eng)(logfile=logfile, execution_dir=execdir)
Example #15
0
    def config(self):
        if self._config is None:

            from janis_assistant.management.configuration import JanisConfiguration

            if not self._raw_config:
                pass
            elif isinstance(self._raw_config, JanisConfiguration):
                self._config = self._raw_config
            elif isinstance(self._raw_config, str):
                self._config = JanisConfiguration.initial_configuration(
                    path=self._raw_config
                )
            else:
                raise ValueError(
                    f"Unrecognised type for janis configuration {self._raw_config} (type: {type(self._raw_config)})"
                )
        return self._config
Example #16
0
def do_runtest(args):
    config = None
    if args.config:
        config = JanisConfiguration.initial_configuration(path=args.config)

    runner_path = test_runner.__file__

    cli_args = sys.argv[2:]
    run_test_commands = ["python", runner_path] + cli_args

    if config:
        commands = config.template.template.prepare_run_test_command(
            run_test_commands)
    else:
        commands = run_test_commands

    joined_command = "' '".join(commands)
    Logger.info(f"Deploying test with command: '{joined_command}'")
    subprocess.run(commands)
Example #17
0
def fromjanis(
    workflow: Union[str, j.Tool, Type[j.Tool]],
    name: str = None,
    engine: Union[str, Engine] = None,
    filescheme: Union[str, FileScheme] = LocalFileScheme(),
    validation_reqs=None,
    batchrun_reqs=None,
    hints: Optional[Dict[str, str]] = None,
    output_dir: Optional[str] = None,
    dryrun: bool = False,
    inputs: Union[str, dict] = None,
    required_inputs: dict = None,
    watch=True,
    max_cores=None,
    max_memory=None,
    force=False,
    keep_intermediate_files=False,
    recipes=None,
    run_in_background=True,
    run_in_foreground=None,
    dbconfig=None,
    only_toolbox=False,
    no_store=False,
    allow_empty_container=False,
    check_files=True,
    container_override: dict = None,
    **kwargs,
):
    cm = ConfigManager.manager()
    jc = JanisConfiguration.manager()

    wf: Optional[Tool] = resolve_tool(
        tool=workflow,
        name=name,
        from_toolshed=True,
        only_toolbox=only_toolbox,
        force=force,
    )
    if not wf:
        raise Exception("Couldn't find workflow with name: " + str(workflow))

    # if isinstance(tool, j.CommandTool):
    #     tool = tool.wrapped_in_wf()
    # elif isinstance(tool, j.CodeTool):
    #     tool = tool.wrapped_in_wf()

    # organise inputs
    inputsdict = {}

    if recipes:
        valuesfromrecipe = jc.recipes.get_recipe_for_keys(recipes)
        inputsdict.update(valuesfromrecipe)

    inputsdict.update(
        cascade_inputs(
            wf=wf,
            inputs=inputs,
            required_inputs=required_inputs,
            batchrun_options=batchrun_reqs,
        ))

    row = cm.create_task_base(wf,
                              outdir=output_dir,
                              store_in_centraldb=not no_store)
    print(row.wid, file=sys.stdout)

    engine = engine or jc.engine

    eng = get_engine_from_eng(
        engine,
        wid=row.wid,
        execdir=WorkflowManager.get_path_for_component_and_dir(
            row.outputdir, WorkflowManager.WorkflowManagerPath.execution),
        confdir=WorkflowManager.get_path_for_component_and_dir(
            row.outputdir, WorkflowManager.WorkflowManagerPath.configuration),
        logfile=os.path.join(
            WorkflowManager.get_path_for_component_and_dir(
                row.outputdir, WorkflowManager.WorkflowManagerPath.logs),
            "engine.log",
        ),
        watch=watch,
        **kwargs,
    )
    fs = get_filescheme_from_fs(filescheme, **kwargs)
    environment = Environment(f"custom_{wf.id()}", eng, fs)

    try:

        # Note: run_in_foreground can be None, so
        # (not (run_in_foreground is True)) != (run_in_foreground is False)

        should_run_in_background = (run_in_background is True
                                    or jc.run_in_background is True
                                    ) and not (run_in_foreground is True)

        tm = cm.start_task(
            wid=row.wid,
            tool=wf,
            environment=environment,
            validation_requirements=validation_reqs,
            batchrun_requirements=batchrun_reqs,
            task_path=row.outputdir,
            hints=hints,
            inputs_dict=inputsdict,
            dryrun=dryrun,
            watch=watch,
            max_cores=max_cores,
            max_memory=max_memory,
            keep_intermediate_files=keep_intermediate_files,
            run_in_background=should_run_in_background,
            dbconfig=dbconfig,
            allow_empty_container=allow_empty_container,
            container_override=container_override,
            check_files=check_files,
        )
        Logger.log("Finished starting task task")
        return tm

    except KeyboardInterrupt:
        Logger.info("Exiting...")

    except Exception as e:
        # Have to make sure we stop the engine if something happens when creating the task that causes
        # janis to exit early
        environment.engine.stop_engine()
        raise e
Example #18
0
    def create_task_base(self, wf: Workflow, outdir=None, store_in_centraldb=True):
        config = JanisConfiguration.manager()

        """
        If you don't spec
        
        """

        if not outdir and not config.outputdir:
            raise Exception(
                f"You must specify an output directory (or specify an '{JanisConfiguration.Keys.OutputDir.value}' "
                f"in your configuration)"
            )

        default_outdir = None

        if config.outputdir:
            default_outdir = os.path.join(config.outputdir, wf.id())

        forbiddenids = set()
        if store_in_centraldb:
            with self.with_cursor() as cursor:
                forbiddenids = set(
                    t[0] for t in cursor.execute("SELECT wid FROM tasks").fetchall()
                )
        if outdir:
            if os.path.exists(outdir):
                # this should theoretically scoop through all the ones in the taskDB and
                # add them to the forbidden ones, though this might cause more issues for now.
                forbiddenids = forbiddenids.union(set(os.listdir(outdir)))
        else:
            if os.path.exists(default_outdir):
                forbiddenids = forbiddenids.union(set(os.listdir(default_outdir)))

        wid = generate_new_id(forbiddenids)

        task_path = outdir
        if not task_path:
            od = default_outdir
            dt = datetime.now().strftime("%Y%m%d_%H%M%S")
            task_path = os.path.join(od, f"{dt}_{wid}/")

        task_path = fully_qualify_filename(task_path)

        Logger.info(f"Starting task with id = '{wid}'")

        row = TaskRow(wid, task_path)
        WorkflowManager.create_dir_structure(task_path)

        if store_in_centraldb:
            self.get_lazy_db_connection().insert_task(row)
        else:
            Logger.info(
                f"Not storing task '{wid}' in database. To watch, use: 'janis watch {task_path}'"
            )

        if self._connection:
            self._connection.commit()
            self._connection.close()
            self._taskDB = None
            self._connection = None
        return row
def run_with_outputs(
    tool: Union[j.CommandTool, j.Workflow],
    inputs: Dict[str, any],
    output_dir: str,
    config: JanisConfiguration = None,
    engine: Optional[str] = None,
    workflow_reference: Optional[str] = None,
):
    """
    Run and WAIT for a Janis workflow to complete. This helper method runs a workflow,
    and returns a dictionary of output values to their output tag. This method MAY throw,
    so ensure it's try-catch wrapped.
    :param tool: An INSTANTIATED tool definition. Seek this from the 'get_janis_workflow_from_searchname' earlier
    :param inputs: A dictionary of pure input values, not file paths.
    :param output_dir: Where to run the execution
    :param config: Optional config, else choose the default at $HOME/.janis/janis.conf
    :param workflow_reference: A reference to the workflow being run, this gets used to write a run.sh file
    :return: A dictionary of output values by the output tag
    """

    job = prepare_job(
        tool=tool,
        output_dir=output_dir,
        required_inputs=inputs,
        jc=config or JanisConfiguration.initial_configuration(None),
        # params to be automatically evaluated
        execution_dir=None,
        inputs={},
        allow_empty_container=False,
        check_files=True,
        container_override={},
        skip_digest_cache=False,
        skip_digest_lookup=False,
        batchrun_reqs=None,
        validation_reqs=None,
        engine=engine,
        hints={},
        keep_intermediate_files=False,
        max_cores=None,
        max_memory=None,
        max_duration=None,
        no_store=True,
        recipes=[],
        run_in_background=None,
        run_in_foreground=None,
        strict_inputs=False,
        watch=False,
        workflow_reference=workflow_reference,
        # don't do extra preprocessing steps
        run_prepare_processing=False,
        localise_all_files=True,
    )

    wm = run_from_jobfile(tool, jobfile=job, wait=True)
    if not wm:
        Logger.critical(f"An error occurred when running workflow {tool.id()}")
        return None

    if not wm.database:
        Logger.critical(
            f"An error occurred when getting the outputs for workflow {tool.id()}"
        )
        return None

    status = wm.database.get_uncached_status()
    if status != TaskStatus.COMPLETED:
        error = ""
        if wm.database.submission_metadata and wm.database.submission_metadata.metadata:
            error = wm.database.submission_metadata.metadata.error or ""
        Logger.critical(
            f"The workflow {tool.id()} ended with status {status}, and hence won't return outputs. Error: {error}"
        )
        return None

    outs = wm.database.outputsDB.get()
    return {
        o.id_: o.value or o.new_path
        for o in outs if o.value or o.new_path
    }
Example #20
0
    def start_engine(self, additional_cromwell_options: List[str] = None):

        from janis_assistant.management.configuration import JanisConfiguration

        jc = JanisConfiguration.manager()

        self.timeout = jc.cromwell.timeout or 10

        if self.test_connection():
            Logger.info("Engine has already been started")
            return self

        if self.connect_to_instance:
            self.is_started = True
            Logger.info(
                "Cromwell environment discovered, skipping local instance")
            return self

        if self._process:
            self.is_started = True
            Logger.info(
                f"Discovered Cromwell instance (pid={self._process}), skipping start"
            )
            return self

        if self.config:

            with open(self.config_path, "w+") as f:
                f.writelines(self.config.output())

        Logger.log("Finding cromwell jar")
        cromwell_loc = self.resolve_jar(self.cromwelljar)

        Logger.info(f"Starting cromwell ({os.path.basename(cromwell_loc)})...")
        cmd = ["java", "-DLOG_MODE=standard"]

        if jc.cromwell and jc.cromwell.memory:
            cmd.extend([
                f"-Xmx{jc.cromwell.memory}M",
                f"-Xms{max(jc.cromwell.memory//2, 1)}M"
            ])

        if Logger.CONSOLE_LEVEL == LogLevel.VERBOSE:
            cmd.append("-DLOG_LEVEL=DEBUG")

        if additional_cromwell_options:
            cmd.extend(additional_cromwell_options)

        self.port = find_free_port()
        self.host = f"127.0.0.1:{self.port}"

        cmd.append(f"-Dwebservice.port={self.port}")
        cmd.append(f"-Dwebservice.interface=127.0.0.1")

        if self.config_path and os.path.exists(self.config_path):
            Logger.debug("Using configuration file for Cromwell: " +
                         self.config_path)
            cmd.append("-Dconfig.file=" + self.config_path)
        cmd.extend(["-jar", cromwell_loc, "server"])

        Logger.debug(f"Starting Cromwell with command: '{' '.join(cmd)}'")
        self._process = subprocess.Popen(
            cmd,
            stdout=subprocess.PIPE,
            # preexec_fn creates a process group https://stackoverflow.com/a/4791612/
            preexec_fn=os.setsid,
        )
        Logger.info("Cromwell is starting with pid=" + str(self._process.pid))
        Logger.debug(
            "Cromwell will start the HTTP server, reading logs to determine when this occurs"
        )

        self._logfp = open(self.logfile, "a+")
        Logger.info("Will log to file" if bool(self._logfp
                                               ) else "Will NOT log to file")

        for c in iter(self._process.stdout.readline,
                      "b"):  # replace '' with b'' for Python 3

            rc = self._process.poll()
            if rc is not None:
                Logger.critical(
                    f"Cromwell has exited with rc={rc}. The last lines of the logfile ({self.logfile}):"
                )
                Logger.critical(tail(self._logfp, 10))
                return

            line = c.decode("utf-8").rstrip()

            if not line:
                continue

            if self._logfp and not self._logfp.closed:
                self._logfp.write(line + "\n")
                self._logfp.flush()
                os.fsync(self._logfp.fileno())

            Logger.debug("Cromwell: " + line)

            # self.stdout.append(str(c))
            if "service started on" in line:
                self.process_id = self._process.pid
                Logger.info("Service successfully started with pid=" +
                            str(self._process.pid))
                break
            # elif ansi_escape.match():
            #     raise Exception(cd)

        self.is_started = True

        if self._process:
            self._logger = ProcessLogger(
                process=self._process,
                prefix="Cromwell: ",
                logfp=self._logfp,
                # exit_function=self.something_has_happened_to_cromwell,
            )

        return self
    def from_janis(
        wid: str,
        outdir: str,
        tool: Tool,
        environment: Environment,
        hints: Dict[str, str],
        validation_requirements: Optional[ValidationRequirements],
        batchrun_requirements: Optional[BatchRunRequirements],
        inputs_dict: dict = None,
        dryrun=False,
        watch=True,
        max_cores=None,
        max_memory=None,
        keep_intermediate_files=False,
        run_in_background=True,
        dbconfig=None,
        allow_empty_container=False,
        container_override: dict = None,
        check_files=True,
    ):

        jc = JanisConfiguration.manager()

        # output directory has been created

        environment.identifier += "_" + wid

        tm = WorkflowManager(wid=wid, outdir=outdir, environment=environment)

        tm.database.runs.insert(wid)

        tm.database.workflowmetadata.wid = wid
        tm.database.workflowmetadata.engine = environment.engine
        tm.database.workflowmetadata.filescheme = environment.filescheme
        tm.database.workflowmetadata.environment = environment.id()
        tm.database.workflowmetadata.name = tool.id()
        tm.database.workflowmetadata.start = DateUtil.now()
        tm.database.workflowmetadata.executiondir = None
        tm.database.workflowmetadata.keepexecutiondir = keep_intermediate_files
        tm.database.workflowmetadata.configuration = jc
        tm.database.workflowmetadata.dbconfig = dbconfig

        # This is the only time we're allowed to skip the tm.set_status
        # This is a temporary stop gap until "notification on status" is implemented.
        # tm.set_status(TaskStatus.PROCESSING)
        tm.database.workflowmetadata.status = TaskStatus.PROCESSING

        tm.database.commit()

        spec = get_ideal_specification_for_engine(environment.engine)
        spec_translator = get_translator(spec)
        tool_evaluate = tm.prepare_and_output_workflow_to_evaluate_if_required(
            tool=tool,
            translator=spec_translator,
            validation=validation_requirements,
            batchrun=batchrun_requirements,
            hints=hints,
            additional_inputs=inputs_dict,
            max_cores=max_cores or jc.environment.max_cores,
            max_memory=max_memory or jc.environment.max_ram,
            allow_empty_container=allow_empty_container,
            container_override=container_override,
            check_files=check_files,
        )

        outdir_workflow = tm.get_path_for_component(
            WorkflowManager.WorkflowManagerPath.workflow
        )

        tm.database.workflowmetadata.submission_workflow = os.path.join(
            outdir_workflow, spec_translator.filename(tool_evaluate)
        )
        tm.database.workflowmetadata.submission_inputs = os.path.join(
            outdir_workflow, spec_translator.inputs_filename(tool_evaluate)
        )
        tm.database.workflowmetadata.submission_resources = os.path.join(
            outdir_workflow, spec_translator.dependencies_filename(tool_evaluate)
        )

        tm.database.commit()

        if not dryrun:
            if (
                not run_in_background
                and jc.template
                and jc.template.template
                and jc.template.template.can_run_in_foreground is False
            ):
                raise Exception(
                    f"Your template '{jc.template.template.__class__.__name__}' is not allowed to run "
                    f"in the foreground, try adding the '--background' argument"
                )
            tm.start_or_submit(run_in_background=run_in_background, watch=watch)
        else:
            tm.set_status(TaskStatus.DRY_RUN)

        tm.database.commit()

        return tm
Example #22
0
    def resolve_jar(cromwelljar):
        from janis_assistant.management.configuration import JanisConfiguration

        man = JanisConfiguration.manager()
        if not man:
            raise Exception(
                f"No configuration was initialised. This is "
                f"likely an error, and you should raise an issue at {ISSUE_URL}"
            )

        potentials = []

        fromenv = EnvVariables.cromwelljar.resolve(False)

        if cromwelljar:
            potentials.append(os.path.expanduser(cromwelljar))
        if man.cromwell.jarpath:
            potentials.append(os.path.expanduser(man.cromwell.jarpath))
        if fromenv:
            potentials.append(fromenv)
        potentials.extend(
            reversed(
                sorted(glob(os.path.join(man.configdir + "cromwell-*.jar")))))

        valid_paths = [p for p in potentials if os.path.exists(p)]
        if len(potentials) > 0:
            if len(valid_paths) == 0:
                raise Exception(
                    "Couldn't find cromwelljar at any of the required paths: "
                    + ", ".join(potentials))
            cromwelljar = valid_paths[0]

        if not cromwelljar:

            progress_is_loaded = False
            try:
                import progressbar

                progress_is_loaded = True
            except:
                Logger.critical("Couldn't find progressbar module")

            pbar = None

            def show_progress(block_num, block_size, total_size):
                nonlocal pbar
                if pbar is None and progress_is_loaded:
                    pbar = progressbar.ProgressBar(maxval=total_size)
                downloaded = block_num * block_size
                if downloaded < total_size:
                    if pbar:
                        pbar.update(downloaded)
                    else:
                        print(
                            f"\rProgress: {round(downloaded * 100 / total_size)}%",
                            end="",
                            file=sys.stderr,
                        )

                else:
                    if pbar:
                        pbar.finish()
                        pbar = None
                    else:
                        print("\rCompleted download of cromwell",
                              file=sys.stderr)

            cromwellurl, cromwellfilename = Cromwell.get_latest_cromwell_url()
            Logger.info(
                f"Couldn't find cromwell at any of the usual spots, downloading '{cromwellfilename}' now"
            )
            cromwelljar = os.path.join(man.configdir, cromwellfilename)
            request.urlretrieve(cromwellurl, cromwelljar, show_progress)
            Logger.info(f"Downloaded {cromwellfilename}")

        return cromwelljar
Example #23
0
 def test_get_state(self):
     jc = JanisConfiguration({})
     self.assertIsInstance(jc.__getstate__(), dict)
Example #24
0
 def test_cromwell(self):
     jc = JanisConfiguration({})
     self.assertIsNone(jc.cromwell.some_other_random_attribute)
Example #25
0
def do_run(args):
    jc = JanisConfiguration.initial_configuration(args.config)

    validation_reqs, batchrun_reqs = None, None

    if args.validation_fields:
        Logger.info("Will prepare validation")
        validation_reqs = ValidationRequirements(
            truthVCF=args.validation_truth_vcf,
            reference=args.validation_reference,
            fields=args.validation_fields,
            intervals=args.validation_intervals,
        )

    if args.batchrun:
        Logger.info("Will prepare batch run")
        batchrun_reqs = BatchRunRequirements(fields=args.batchrun_fields,
                                             groupby=args.batchrun_groupby)

    hints = {
        k[5:]: v
        for k, v in vars(args).items()
        if k.startswith("hint_") and v is not None
    }

    # the args.extra_inputs parameter are inputs that we MUST match
    # we'll need to parse them manually and then pass them to fromjanis as requiring a match
    required_inputs = parse_additional_arguments(args.extra_inputs)

    inputs = args.inputs or []
    # we'll manually suck "inputs" out of the extra parms, otherwise it's actually really
    # annoying if you forget to put the inputs before the workflow positional argument.
    # TBH, we could automatically do this for all params, but it's a little trickier

    if "inputs" in required_inputs:
        ins = required_inputs.pop("inputs")
        inputs.extend(ins if isinstance(ins, list) else [ins])
    if "i" in required_inputs:
        ins = required_inputs.pop("i")
        inputs.extend(ins if isinstance(ins, list) else [ins])

    keep_intermediate_files = args.keep_intermediate_files is True

    db_config = jc.cromwell.get_database_config_helper()

    if args.mysql:
        db_config.should_manage_mysql = True

    if args.no_database:
        db_config.skip_database = True

    if args.development:
        # no change for using mysql, as a database is the default
        keep_intermediate_files = True
        JanisConfiguration.manager().cromwell.call_caching_enabled = True

    wid = fromjanis(
        args.workflow,
        name=args.name,
        validation_reqs=validation_reqs,
        batchrun_reqs=batchrun_reqs,
        engine=args.engine,
        filescheme=args.filescheme,
        hints=hints,
        output_dir=args.output_dir,
        inputs=inputs,
        required_inputs=required_inputs,
        filescheme_ssh_binding=args.filescheme_ssh_binding,
        cromwell_url=args.cromwell_url,
        watch=args.progress,
        max_cores=args.max_cores,
        max_mem=args.max_memory,
        force=args.no_cache,
        recipes=args.recipe,
        keep_intermediate_files=keep_intermediate_files,
        run_in_background=(args.background is True),
        run_in_foreground=(args.foreground is True),
        dbconfig=db_config,
        only_toolbox=args.toolbox,
        no_store=args.no_store,
        allow_empty_container=args.allow_empty_container,
        check_files=not args.skip_file_check,
        container_override=parse_container_override_format(
            args.container_override),
    )

    Logger.info("Exiting")
    raise SystemExit
Example #26
0
 def test_1(self):
     jc = JanisConfiguration({})
     self.assertIsNone(jc.random_attribute)
def prepare_from_args(
        args, run_prepare_processing: bool) -> Tuple[PreparedJob, Tool]:
    jc = JanisConfiguration.initial_configuration(path=args.config)

    # the args.extra_inputs parameter are inputs that we MUST match
    # we'll need to parse them manually and then pass them to fromjanis as requiring a match
    required_inputs = parse_additional_arguments(args.extra_inputs)

    inputs = args.inputs or []
    # we'll manually suck "inputs" out of the extra parms, otherwise it's actually really
    # annoying if you forget to put the inputs before the workflow positional argument.
    # TBH, we could automatically do this for all params, but it's a little trickier

    if "inputs" in required_inputs:
        ins = required_inputs.pop("inputs")
        inputs.extend(ins if isinstance(ins, list) else [ins])
    if "i" in required_inputs:
        ins = required_inputs.pop("i")
        inputs.extend(ins if isinstance(ins, list) else [ins])

    validation, batchrun = None, None
    if args.validation_fields:
        Logger.info("Will prepare validation")
        validation = ValidationRequirements(
            truthVCF=args.validation_truth_vcf,
            reference=args.validation_reference,
            fields=args.validation_fields,
            intervals=args.validation_intervals,
        )

    if args.batchrun:
        Logger.info("Will prepare batch run")
        batchrun = BatchRunRequirements(fields=args.batchrun_fields,
                                        groupby=args.batchrun_groupby)

    db_type: Optional[DatabaseTypeToUse] = None
    if args.no_database:
        db_type = DatabaseTypeToUse.none
    elif args.mysql:
        db_type = DatabaseTypeToUse.managed

    try:
        source_hints = args.source_hint
    except:
        source_hints = []

    wf, wf_reference = resolve_tool(
        tool=args.workflow,
        name=args.name,
        from_toolshed=True,
        only_toolbox=args.toolbox,
        force=args.no_cache,
    )

    job = prepare_job(
        run_prepare_processing=run_prepare_processing,
        tool=wf,
        workflow_reference=wf_reference,
        jc=jc,
        engine=args.engine,
        output_dir=args.output_dir,
        execution_dir=args.execution_dir,
        max_cores=args.max_cores,
        max_memory=args.max_memory,
        max_duration=args.max_duration,
        run_in_foreground=args.foreground is True,
        run_in_background=args.background is True,
        check_files=not args.skip_file_check,
        container_override=parse_container_override_format(
            args.container_override),
        skip_digest_lookup=args.skip_digest_lookup,
        skip_digest_cache=args.skip_digest_cache,
        hints={
            k[5:]: v
            for k, v in vars(args).items()
            if k.startswith("hint_") and v is not None
        },
        validation_reqs=validation,
        batchrun_reqs=batchrun,
        # inputs
        inputs=inputs,
        required_inputs=required_inputs,
        watch=args.progress,
        recipes=args.recipe,
        keep_intermediate_files=args.keep_intermediate_files is True,
        no_store=args.no_store,
        allow_empty_container=args.allow_empty_container,
        strict_inputs=args.strict_inputs,
        db_type=db_type,
        source_hints=source_hints,
        post_run_script=args.post_run_script,
        localise_all_files=args.localise_all_files,
    )

    return job, wf