Esempio n. 1
0
def get_source(
    path,
    sourcecache: sourcecache.SourceCache,
    basedir=None,
    wildcards=None,
    params=None,
):
    if wildcards is not None and params is not None:
        if isinstance(path, SourceFile):
            path = path.get_path_or_uri()
        # Format path if wildcards are given.
        path = infer_source_file(format(path, wildcards=wildcards, params=params))

    if basedir is not None:
        basedir = infer_source_file(basedir)

    source_file = infer_source_file(path, basedir)
    with sourcecache.open(source_file) as f:
        source = f.read()

    language = get_language(source_file, source)

    is_local = isinstance(source_file, LocalSourceFile)

    return source_file, source, language, is_local
Esempio n. 2
0
    def __init__(
        self,
        workflow,
        env_file=None,
        env_name=None,
        env_dir=None,
        container_img=None,
        cleanup=None,
    ):
        self.file = env_file
        if env_file is not None:
            self.file = infer_source_file(env_file)
        self.name = env_name
        if env_name is not None:
            assert env_file is None, "bug: both env_file and env_name specified"

        self.frontend = workflow.conda_frontend
        self.workflow = workflow

        self._container_img = container_img
        self._env_dir = env_dir or (containerize.CONDA_ENV_PATH
                                    if self.is_containerized else
                                    workflow.persistence.conda_env_path)
        self._hash = None
        self._content_hash = None
        self._content = None
        self._content_deploy = None
        self._content_pin = None
        self._path = None
        self._archive_file = None
        self._cleanup = cleanup
        self._singularity_args = workflow.singularity_args
Esempio n. 3
0
    def pin_file(self):
        pin_file = Path(self.file.get_path_or_uri()).with_suffix(
            f".{self.conda.platform}.pin.txt")

        if pin_file.exists():
            return infer_source_file(pin_file)
        else:
            return None
Esempio n. 4
0
def get_path(path, prefix=None):
    if not is_url(path):
        if prefix is None:
            prefix = PREFIX
        elif prefix.startswith("git+file"):
            parts = path.split("/")
            path = "/" + "/".join(parts[1:]) + "@" + parts[0]
        path = prefix + path
    return infer_source_file(path)
Esempio n. 5
0
def find_extension(path,
                   sourcecache: SourceCache,
                   extensions=[".py", ".R", ".Rmd", ".jl"]):
    for ext in extensions:
        if path.endswith("wrapper{}".format(ext)):
            return path

    path = infer_source_file(path)
    for ext in extensions:
        script = path.join("wrapper{}".format(ext))

        if sourcecache.exists(script):
            return script
Esempio n. 6
0
    def __init__(self,
                 env_file,
                 workflow,
                 env_dir=None,
                 container_img=None,
                 cleanup=None):
        self.file = infer_source_file(env_file)

        self.frontend = workflow.conda_frontend
        self.workflow = workflow

        self._container_img = container_img
        self._env_dir = env_dir or (containerize.CONDA_ENV_PATH
                                    if self.is_containerized else
                                    workflow.persistence.conda_env_path)
        self._hash = None
        self._content_hash = None
        self._content = None
        self._path = None
        self._archive_file = None
        self._cleanup = cleanup
        self._singularity_args = workflow.singularity_args
Esempio n. 7
0
    def expand_conda_env(self, wildcards, params=None, input=None):
        from snakemake.common import is_local_file
        from snakemake.sourcecache import SourceFile, infer_source_file
        from snakemake.deployment.conda import (
            is_conda_env_file,
            CondaEnvFileSpec,
            CondaEnvNameSpec,
        )

        conda_env = self._conda_env
        if callable(conda_env):
            conda_env, _ = self.apply_input_function(conda_env,
                                                     wildcards=wildcards,
                                                     params=params,
                                                     input=input)

        if conda_env is None:
            return None

        if is_conda_env_file(conda_env):
            if not isinstance(conda_env, SourceFile):
                if is_local_file(conda_env) and not os.path.isabs(conda_env):
                    # Conda env file paths are considered to be relative to the directory of the Snakefile
                    # hence we adjust the path accordingly.
                    # This is not necessary in case of receiving a SourceFile.
                    conda_env = self.basedir.join(conda_env)
                else:
                    # infer source file from unmodified uri or path
                    conda_env = infer_source_file(conda_env)

            conda_env = CondaEnvFileSpec(conda_env, rule=self)
        else:
            conda_env = CondaEnvNameSpec(conda_env)

        conda_env = conda_env.apply_wildcards(wildcards, self)
        conda_env.check()

        return conda_env
Esempio n. 8
0
def validate(data, schema, set_default=True):
    """Validate data with JSON schema at given path.

    Args:
        data (object): data to validate. Can be a config dict or a pandas data frame.
        schema (str): Path to JSON schema used for validation. The schema can also be
            in YAML format. If validating a pandas data frame, the schema has to
            describe a row record (i.e., a dict with column names as keys pointing
            to row values). See https://json-schema.org. The path is interpreted
            relative to the Snakefile when this function is called.
        set_default (bool): set default values defined in schema. See
            https://python-jsonschema.readthedocs.io/en/latest/faq/ for more
            information
    """
    frame = inspect.currentframe().f_back
    workflow = frame.f_globals.get("workflow")

    if workflow and workflow.modifier.skip_validation:
        # skip if a corresponding modifier has been defined
        return

    try:
        import jsonschema
        from jsonschema import validators, RefResolver
    except ImportError:
        raise WorkflowError(
            "The Python 3 package jsonschema must be installed "
            "in order to use the validate directive.")

    schemafile = infer_source_file(schema)

    if isinstance(schemafile,
                  LocalSourceFile) and not schemafile.isabs() and workflow:
        # if workflow object is not available this has not been started from a workflow
        schemafile = workflow.current_basedir.join(schemafile)

    source = (workflow.sourcecache.open(schemafile)
              if workflow else schemafile.get_path_or_uri())
    schema = _load_configfile(source, filetype="Schema")
    if isinstance(schemafile, LocalSourceFile):
        resolver = RefResolver(
            urljoin("file:", schemafile.get_path_or_uri()),
            schema,
            handlers={
                "file":
                lambda uri: _load_configfile(re.sub("^file://", "", uri))
            },
        )
    else:
        resolver = RefResolver(
            schemafile.get_path_or_uri(),
            schema,
        )

    # Taken from https://python-jsonschema.readthedocs.io/en/latest/faq/
    def extend_with_default(validator_class):
        validate_properties = validator_class.VALIDATORS["properties"]

        def set_defaults(validator, properties, instance, schema):
            for property, subschema in properties.items():
                if "default" in subschema:
                    instance.setdefault(property, subschema["default"])

            for error in validate_properties(validator, properties, instance,
                                             schema):
                yield error

        return validators.extend(validator_class, {"properties": set_defaults})

    Validator = validators.validator_for(schema)
    if Validator.META_SCHEMA["$schema"] != schema["$schema"]:
        logger.warning(
            "No validator found for JSON Schema version identifier '{}'".
            format(schema["$schema"]))
        logger.warning(
            "Defaulting to validator for JSON Schema version '{}'".format(
                Validator.META_SCHEMA["$schema"]))
        logger.warning("Note that schema file may not be validated correctly.")
    DefaultValidator = extend_with_default(Validator)

    if not isinstance(data, dict):
        try:
            import pandas as pd

            recordlist = []
            if isinstance(data, pd.DataFrame):
                for i, record in enumerate(data.to_dict("records")):
                    record = {
                        k: v
                        for k, v in record.items() if not pd.isnull(v)
                    }
                    try:
                        if set_default:
                            DefaultValidator(
                                schema, resolver=resolver).validate(record)
                            recordlist.append(record)
                        else:
                            jsonschema.validate(record,
                                                schema,
                                                resolver=resolver)
                    except jsonschema.exceptions.ValidationError as e:
                        raise WorkflowError(
                            "Error validating row {} of data frame.".format(i),
                            e)
                if set_default:
                    newdata = pd.DataFrame(recordlist, data.index)
                    newcol = ~newdata.columns.isin(data.columns)
                    n = len(data.columns)
                    for col in newdata.loc[:, newcol].columns:
                        data.insert(n, col, newdata.loc[:, col])
                        n = n + 1
                return
        except ImportError:
            pass
        raise WorkflowError("Unsupported data type for validation.")
    else:
        try:
            if set_default:
                DefaultValidator(schema, resolver=resolver).validate(data)
            else:
                jsonschema.validate(data, schema, resolver=resolver)
        except jsonschema.exceptions.ValidationError as e:
            raise WorkflowError("Error validating config file.", e)
Esempio n. 9
0
 def post_deploy_file(self):
     if self.file:
         deploy_file = Path(
             self.file.get_path_or_uri()).with_suffix(".post-deploy.sh")
         if deploy_file.exists():
             return infer_source_file(deploy_file)
Esempio n. 10
0
def notebook(
    path,
    basedir,
    input,
    output,
    params,
    wildcards,
    threads,
    resources,
    log,
    config,
    rulename,
    conda_env,
    conda_base_path,
    container_img,
    singularity_args,
    env_modules,
    bench_record,
    jobid,
    bench_iteration,
    cleanup_scripts,
    shadow_dir,
    edit,
    runtime_sourcecache_path,
):
    """
    Load a script from the given basedir + path and execute it.
    """
    draft = False
    if edit is not None:
        if is_local_file(path):
            if not os.path.isabs(path):
                local_path = os.path.join(basedir, path)
            else:
                local_path = path
            if not os.path.exists(local_path):
                # draft the notebook, it does not exist yet
                language = None
                draft = True
                path = "file://{}".format(os.path.abspath(local_path))
                if path.endswith(".py.ipynb"):
                    language = "jupyter_python"
                elif path.endswith(".r.ipynb"):
                    language = "jupyter_r"
                else:
                    raise WorkflowError(
                        "Notebook to edit has to end on .py.ipynb or .r.ipynb in order "
                        "to decide which programming language shall be used.")
        else:
            raise WorkflowError(
                "Notebook {} is not local, but edit mode is only allowed for "
                "local notebooks.".format(path))

    if not draft:
        path, source, language, is_local = get_source(
            path, SourceCache(runtime_sourcecache_path), basedir, wildcards,
            params)
    else:
        source = None
        is_local = True
        path = infer_source_file(path)

    exec_class = get_exec_class(language)

    executor = exec_class(
        path,
        source,
        basedir,
        input,
        output,
        params,
        wildcards,
        threads,
        resources,
        log,
        config,
        rulename,
        conda_env,
        conda_base_path,
        container_img,
        singularity_args,
        env_modules,
        bench_record,
        jobid,
        bench_iteration,
        cleanup_scripts,
        shadow_dir,
        is_local,
    )

    if edit is None:
        executor.evaluate(edit=edit)
    elif edit.draft_only:
        executor.draft()
        msg = "Generated skeleton notebook:\n{} ".format(path)
        if conda_env and not container_img:
            msg += (
                "\n\nEditing with VSCode:\nOpen notebook, run command 'Select notebook kernel' (Ctrl+Shift+P or Cmd+Shift+P), and choose:"
                "\n{}\n".format(
                    str(
                        Path(conda_env) / "bin" /
                        executor.get_interpreter_exec())))
            msg += ("\nEditing with Jupyter CLI:"
                    "\nconda activate {}\njupyter notebook {}\n".format(
                        conda_env, path))
        logger.info(msg)
    elif draft:
        executor.draft_and_edit(listen=edit)
    else:
        executor.evaluate(edit=edit)