Beispiel #1
0
    def __init__(
        self,
        recipes: dict = None,
        paths: Union[str, List[str]] = None,
        directories: Union[str, List[str]] = None,
    ):
        """
        :param recipes: A dictionary of input values, keyed by the recipe name.
        :type recipes: dict
        :param paths: a list of ``*.yaml`` files, where each path contains a dictionary of input values, keyed by the recipe name, similar to the previous recipes name.
        :type paths: List[str]
        :param directories: a directory of ``*.yaml`` files, where the ``*`` is the recipe name.
        :type directories: List[str]
        """
        self.recipes = recipes or {}
        self.paths: Optional[List[str]] = None
        self.directories: Optional[List[str]] = None

        self._files_by_key = None
        self._loaded_recipes = False

        if paths:
            if isinstance(paths, list):
                self.paths = [fully_qualify_filename(d) for d in paths]
            else:
                self.paths = fully_qualify_filename(paths)

        if directories:
            if isinstance(directories, list):
                self.directories = [
                    fully_qualify_filename(d) for d in directories
                ]
            else:
                self.directories = fully_qualify_filename(directories)
    def create_task_base(self, wf: Workflow, job: PreparedJob):

        forbiddenids = set()
        if job.store_in_central_db:
            try:
                with self.with_cursor() as cursor:
                    forbiddenids = set(
                        t[0] for t in cursor.execute("SELECT id FROM tasks").fetchall()
                    )
            except sqlite3.OperationalError as e:
                if "no such column: id" in repr(e):
                    from shutil import move

                    dt = datetime.utcnow()
                    np = f"{job.db_path}.original-{dt.strftime('%Y%m%d')}"
                    Logger.warn(f"Moving old janis-db to '{np}'")
                    move(job.db_path, np)
                    self._taskDB = None
                    return self.create_task_base(wf=wf, job=job)
                raise

        submission_id = generate_new_id(forbiddenids)

        output_dir = fully_qualify_filename(job.output_dir)

        if not job.execution_dir:
            job.execution_dir = os.path.join(output_dir, "janis")
            Logger.debug(
                f"No execution-dir was provided, constructed one from the output-dir: {job.execution_dir}"
            )
        job.execution_dir = fully_qualify_filename(job.execution_dir)

        Logger.info(
            f"Starting task with id = '{submission_id}' | output dir: {job.output_dir} | execution dir: {job.execution_dir}"
        )

        row = TaskRow(
            submission_id, execution_dir=job.execution_dir, output_dir=output_dir
        )
        WorkflowManager.create_dir_structure(job.execution_dir)

        if job.store_in_central_db:
            self.get_lazy_db_connection().insert_task(row)
        else:
            Logger.info(
                f"Not storing task '{submission_id}' in database. To watch, use: 'janis watch {output_dir}'"
            )

        if self._connection:
            self._connection.commit()
            self._connection.close()
            self._taskDB = None
            self._connection = None
        return row
    def __init__(
        self, outdir: str, wid: str, environment: Environment = None, readonly=False
    ):
        # do stuff here
        self.wid = wid

        self._failed_engine_attempts = None

        # hydrate from here if required
        self._engine_wid = None
        self.path = fully_qualify_filename(outdir)
        self.create_dir_structure(self.path)

        self.database = WorkflowDbManager(
            wid, self.get_task_path_safe(), readonly=readonly
        )
        self.environment = environment
        self.dbcontainer: MySql = None
        self.main_queue = queue.Queue()

        self._prev_status = None
        self._engine: Optional[Engine] = None

        if not self.wid:
            self.wid = self.get_engine_wid()
Beispiel #4
0
    def process_container_dir(container_dir):
        if container_dir is not None:
            return fully_qualify_filename(container_dir)

        from os import getenv

        envs_to_search = ["CWL_SINGULARITY_CACHE", "SINGULARITY_TMPDIR"]
        for env in envs_to_search:
            e = getenv(env)
            if e:
                return fully_qualify_filename(e)

        raise Exception(
            "Couldn't find a directory to cache singularity containers, please provide a "
            "'container_dir' to your template, or set one of the following env variables: "
            + ", ".join(envs_to_search)
        )
    def fully_qualify_filename_array_or_single(value: Union[str, List[str],
                                                            List[List[str]]]):
        """

        :param value:
        :return:
        """
        if isinstance(value, list):
            return [
                InputFileQualifierModifier.
                fully_qualify_filename_array_or_single(t) for t in value
            ]
        return fully_qualify_filename(value)
Beispiel #6
0
    def from_wid(self, wid, readonly=False):
        self.readonly = readonly
        with self.with_cursor() as cursor:
            path = cursor.execute(
                "SELECT outputdir FROM tasks where wid=?", (wid,)
            ).fetchone()
        if not path:
            expanded_path = fully_qualify_filename(wid)
            if os.path.exists(expanded_path):
                return WorkflowManager.from_path_get_latest(
                    expanded_path, readonly=readonly
                )

            raise Exception(f"Couldn't find task with id='{wid}'")
        return WorkflowManager.from_path_with_wid(path[0], wid=wid, readonly=readonly)
    def __init__(self, db_path: Optional[str], readonly=False):

        self.readonly = readonly
        if not db_path:
            config_dir = EnvVariables.config_dir.resolve(True)
            Logger.log(
                f"db_path wasn't provided to config manager, using config_dir: '{config_dir}/janis.db'"
            )
            db_path = fully_qualify_filename(os.path.join(config_dir, "janis.db"))
        self.db_path = db_path
        self.is_new = not os.path.exists(db_path)

        cp = os.path.dirname(db_path)
        os.makedirs(cp, exist_ok=True)

        self._connection: Optional[sqlite3.Connection] = None
        self._taskDB: Optional[TasksDbProvider] = None
def generate_output_dir_from(wf_id, output_dir, jc_output_dir):
    if not output_dir and not jc_output_dir:
        raise Exception(
            f"You must specify an output directory (or specify an 'output_dir' "
            f"in your configuration)")

    default_outdir = None
    if jc_output_dir:
        default_outdir = os.path.join(jc_output_dir, wf_id)

    if not output_dir:
        od = default_outdir
        dt = datetime.now().strftime("%Y%m%d_%H%M%S")
        output_dir = os.path.join(od, dt)

    output_dir = fully_qualify_filename(output_dir)

    return output_dir
    def get_from_path_or_submission_lazy(
        submission_id, readonly: bool, db_path: Optional[str] = None,
    ):
        """
        2020-10-01 mfranklin:
            Probably the method you want to get a WorkflowManager from submissionID:

        :return: WorkflowManager of the submission_id (or THROWS)
        """
        expanded_path = fully_qualify_filename(submission_id)
        if os.path.exists(expanded_path):
            return WorkflowManager.from_path_get_latest_manager(
                expanded_path, readonly=readonly
            )

        return ConfigManager(
            db_path=db_path, readonly=True
        ).get_from_path_or_submission(
            submission_id=submission_id, readonly=readonly, perform_path_check=False
        )
    def get_row_for_submission_id_or_path(self, submission_id) -> TaskRow:

        potential_submission = self.get_lazy_db_connection().get_by_id(submission_id)
        if potential_submission:
            return potential_submission

        expanded_path = fully_qualify_filename(submission_id)
        if os.path.exists(expanded_path):
            (execpath, sid) = WorkflowManager.from_path_get_latest_submission_id(
                expanded_path
            )
            return TaskRow(
                execution_dir=execpath,
                submission_id=sid,
                output_dir=None,
                timestamp=None,
            )

        raise Exception(
            f"Couldn't find task with id='{submission_id}', and no directory was found."
        )
    def get_from_path_or_submission(
        self, submission_id, readonly: bool, perform_path_check=True
    ):
        if perform_path_check:
            expanded_path = fully_qualify_filename(submission_id)
            if os.path.exists(expanded_path):
                return WorkflowManager.from_path_get_latest_manager(
                    expanded_path, readonly=readonly
                )

        potential_submission = self.get_lazy_db_connection().get_by_id(submission_id)
        if potential_submission:
            return WorkflowManager.from_path_with_submission_id(
                potential_submission.execution_dir,
                submission_id=submission_id,
                readonly=readonly,
            )

        raise Exception(
            f"Couldn't find task with id='{submission_id}', and no directory was found "
        )
def init_template(
    templatename,
    stream=None,
    unparsed_init_args=None,
    output_location=None,
    force=False,
):
    """
    :param templatename:
    :param force:
    :return:
    """
    import ruamel.yaml

    outpath = fully_qualify_filename(output_location
                                     or EnvVariables.config_path.resolve(True))

    cached_outd = None

    def get_config():
        """
        This is here to lazily instantiate the config
        """
        nonlocal cached_outd
        if not cached_outd:

            outd = JanisConfiguration.default()

            if templatename:
                tmpl = janistemplates.get_template(templatename)
                schema = janistemplates.get_schema_for_template(tmpl)

                mapped_schema_to_default = {
                    s.identifier: s.default
                    for s in schema if s.default is not None
                }

                # parse extra params
                description = dedent(tmpl.__doc__) if tmpl.__doc__ else None

                parser = InitArgParser(templatename,
                                       schema,
                                       description=description)
                parsed = parser.parse_args(unparsed_init_args)

                try:
                    # "easier to ask for forgiveness than permission" https://stackoverflow.com/a/610923
                    keys_to_skip = set(tmpl.ignore_init_keys)
                except AttributeError:
                    Logger.log(
                        f"Template '{templatename}' didn't have 'ignore_init_keys'"
                    )
                    keys_to_skip = set()

                outd["engine"] = EngineType.cromwell
                outd["template"] = {
                    s.id(): parsed.get(s.id(),
                                       mapped_schema_to_default.get(s.id()))
                    for s in schema if (s.identifier in parsed) or (
                        s.identifier in mapped_schema_to_default
                        and s.identifier not in keys_to_skip)
                }
                outd["template"]["id"] = templatename

            cached_outd = stringify_dict_keys_or_return_value(outd)
        return cached_outd

    if any(k in unparsed_init_args for k in ("-h", "--help")):
        get_config()

    does_exist = os.path.exists(outpath)
    if does_exist and not force:
        Logger.info(
            f"Janis will skip writing config as file exists at: '{outpath}'")
    else:
        if does_exist:
            Logger.info(f"Overwriting template at '{outpath}'")
        else:
            Logger.info(f"Saving Janis config to '{outpath}'")

        os.makedirs(os.path.dirname(outpath), exist_ok=True)
        val = get_config()
        with open(outpath, "w+") as configpath:
            ruamel.yaml.dump(val, configpath, default_flow_style=False)

    if stream:
        ruamel.yaml.dump(get_config(), sys.stdout, default_flow_style=False)
Beispiel #13
0
    def create_task_base(self, wf: Workflow, outdir=None, store_in_centraldb=True):
        config = JanisConfiguration.manager()

        """
        If you don't spec
        
        """

        if not outdir and not config.outputdir:
            raise Exception(
                f"You must specify an output directory (or specify an '{JanisConfiguration.Keys.OutputDir.value}' "
                f"in your configuration)"
            )

        default_outdir = None

        if config.outputdir:
            default_outdir = os.path.join(config.outputdir, wf.id())

        forbiddenids = set()
        if store_in_centraldb:
            with self.with_cursor() as cursor:
                forbiddenids = set(
                    t[0] for t in cursor.execute("SELECT wid FROM tasks").fetchall()
                )
        if outdir:
            if os.path.exists(outdir):
                # this should theoretically scoop through all the ones in the taskDB and
                # add them to the forbidden ones, though this might cause more issues for now.
                forbiddenids = forbiddenids.union(set(os.listdir(outdir)))
        else:
            if os.path.exists(default_outdir):
                forbiddenids = forbiddenids.union(set(os.listdir(default_outdir)))

        wid = generate_new_id(forbiddenids)

        task_path = outdir
        if not task_path:
            od = default_outdir
            dt = datetime.now().strftime("%Y%m%d_%H%M%S")
            task_path = os.path.join(od, f"{dt}_{wid}/")

        task_path = fully_qualify_filename(task_path)

        Logger.info(f"Starting task with id = '{wid}'")

        row = TaskRow(wid, task_path)
        WorkflowManager.create_dir_structure(task_path)

        if store_in_centraldb:
            self.get_lazy_db_connection().insert_task(row)
        else:
            Logger.info(
                f"Not storing task '{wid}' in database. To watch, use: 'janis watch {task_path}'"
            )

        if self._connection:
            self._connection.commit()
            self._connection.close()
            self._taskDB = None
            self._connection = None
        return row
Beispiel #14
0
 def test_ignore_gcs(self):
     fn = "gcs://janis/readthedocs/io.txt"
     self.assertEqual(fn, fully_qualify_filename(fn))
Beispiel #15
0
 def test_ignore_http(self):
     fn = "http://janis.readthedocs.io"
     self.assertEqual(fn, fully_qualify_filename(fn))
Beispiel #16
0
 def test_nonrelative(self):
     fn = "/test/my.txt"
     self.assertEqual(fn, fully_qualify_filename(fn))
Beispiel #17
0
    def test_user_expand(self):
        from os.path import expanduser

        fn = "~/my.txt"
        self.assertEqual(expanduser(fn), fully_qualify_filename(fn))
Beispiel #18
0
 def test_dot_relative(self):
     fn = "my.txt"
     self.assertEqual(ospathjoin(self.cwd, fn),
                      fully_qualify_filename("./" + fn))
Beispiel #19
0
    def __init__(
        self,
        output_dir: str = EnvVariables.output_dir.resolve(False),
        execution_dir: str = EnvVariables.exec_dir.resolve(False),
        call_caching_enabled: bool = True,
        engine: str = EngineType.cromwell.value,
        cromwell: Union[JanisConfigurationCromwell, dict] = None,
        template: Union[JanisConfigurationTemplate, dict] = None,
        recipes: Union[JanisConfigurationRecipes, dict] = None,
        notifications: Union[JanisConfigurationNotifications, dict] = None,
        environment: Union[JanisConfigurationEnvironment, dict] = None,
        run_in_background: bool = None,
        digest_cache_location: str = None,
        container: Union[str, Container] = None,
        search_paths: List[str] = None,
    ):
        """
        :param engine: Default engine to use
        :type engine: "cromwell" | "cwltool"
        :param template: Specify options for a Janis template for configuring an execution environment
        :type template: JanisConfigurationTemplate
        :param cromwell: A dictionary for how to configure Cromwell for Janis
        :type cromwell: JanisConfigurationCromwell
        :param recipes: Configure recipes in Janis
        :type recipes: JanisConfigurationRecipes
        :param notifications: Configure Janis notifications
        :type notifications: JanisConfigurationNotifications
        :param environment: Additional ways to configure the execution environment for Janis
        :type environment: JanisConfigurationEnvironment
        :param output_dir: A directory that Janis will use to generate a new output directory for each janis-run
        :param execution_dir: Move all execution to a static directory outside the regular output directory.
        :param call_caching_enabled: (default: true) call-caching is enabled for subsequent runs, on the SAME output directory
        :param run_in_background: By default, run workflows as a background process. In a SLURM environment, this might submit Janis as a SLURM job.
        :type run_in_background: bool
        :param digest_cache_location: A cache of docker tags to its digest that Janis uses replaces your docker tag with it's `digest <https://docs.docker.com/engine/reference/commandline/pull/#pull-an-image-by-digest-immutable-identifier>`_.
        :type digest_cache_location: str
        :param container: Container technology to use, important for checking if container environment is available and running mysql instance.
        :type container: "docker" | "singularity"
        :param search_paths: A list of paths to check when looking for python files and input files
        :type search_paths: List[str]
        """

        self.config_dir = EnvVariables.config_dir.resolve(True)
        self.db_path = fully_qualify_filename(
            os.path.join(self.config_dir, "janis.db"))
        self.digest_cache_location = digest_cache_location
        if not digest_cache_location:
            self.digest_cache_location = os.path.join(self.config_dir,
                                                      "digest_cache")

        self.output_dir = output_dir
        self.execution_dir = execution_dir
        self.search_paths = search_paths or [os.path.expanduser("~/janis/")]

        self.engine = engine
        self.call_caching_enabled = call_caching_enabled
        self.run_in_background = run_in_background

        self.recipes = parse_if_dict(
            JanisConfigurationRecipes,
            recipes or {},
            "recipes",
            skip_if_empty=False,
        )

        self.template = parse_if_dict(JanisConfigurationTemplate,
                                      template or {},
                                      "template",
                                      skip_if_empty=False)
        self.cromwell: JanisConfigurationCromwell = parse_if_dict(
            JanisConfigurationCromwell,
            cromwell or {},
            "cromwell",
            skip_if_empty=False)
        self.notifications: JanisConfigurationNotifications = parse_if_dict(
            JanisConfigurationNotifications,
            notifications or {},
            "notifications",
            skip_if_empty=False,
        )
        self.environment: JanisConfigurationEnvironment = parse_if_dict(
            JanisConfigurationEnvironment,
            environment or {},
            "environment",
            skip_if_empty=False,
        )

        # Get's set by the template for now, but eventually we should be able to look it up
        self.container = None
        if container:
            if isinstance(container, Container):
                self.container = container
            else:
                self.container = get_container_by_name(container)

        if self.template and self.template.template:
            self.template.template.post_configuration_hook(self)