def get_source( path, sourcecache: sourcecache.SourceCache, basedir=None, wildcards=None, params=None, ): if wildcards is not None and params is not None: if isinstance(path, SourceFile): path = path.get_path_or_uri() # Format path if wildcards are given. path = infer_source_file(format(path, wildcards=wildcards, params=params)) if basedir is not None: basedir = infer_source_file(basedir) source_file = infer_source_file(path, basedir) with sourcecache.open(source_file) as f: source = f.read() language = get_language(source_file, source) is_local = isinstance(source_file, LocalSourceFile) return source_file, source, language, is_local
def __init__( self, workflow, env_file=None, env_name=None, env_dir=None, container_img=None, cleanup=None, ): self.file = env_file if env_file is not None: self.file = infer_source_file(env_file) self.name = env_name if env_name is not None: assert env_file is None, "bug: both env_file and env_name specified" self.frontend = workflow.conda_frontend self.workflow = workflow self._container_img = container_img self._env_dir = env_dir or (containerize.CONDA_ENV_PATH if self.is_containerized else workflow.persistence.conda_env_path) self._hash = None self._content_hash = None self._content = None self._content_deploy = None self._content_pin = None self._path = None self._archive_file = None self._cleanup = cleanup self._singularity_args = workflow.singularity_args
def pin_file(self): pin_file = Path(self.file.get_path_or_uri()).with_suffix( f".{self.conda.platform}.pin.txt") if pin_file.exists(): return infer_source_file(pin_file) else: return None
def get_path(path, prefix=None): if not is_url(path): if prefix is None: prefix = PREFIX elif prefix.startswith("git+file"): parts = path.split("/") path = "/" + "/".join(parts[1:]) + "@" + parts[0] path = prefix + path return infer_source_file(path)
def find_extension(path, sourcecache: SourceCache, extensions=[".py", ".R", ".Rmd", ".jl"]): for ext in extensions: if path.endswith("wrapper{}".format(ext)): return path path = infer_source_file(path) for ext in extensions: script = path.join("wrapper{}".format(ext)) if sourcecache.exists(script): return script
def __init__(self, env_file, workflow, env_dir=None, container_img=None, cleanup=None): self.file = infer_source_file(env_file) self.frontend = workflow.conda_frontend self.workflow = workflow self._container_img = container_img self._env_dir = env_dir or (containerize.CONDA_ENV_PATH if self.is_containerized else workflow.persistence.conda_env_path) self._hash = None self._content_hash = None self._content = None self._path = None self._archive_file = None self._cleanup = cleanup self._singularity_args = workflow.singularity_args
def expand_conda_env(self, wildcards, params=None, input=None): from snakemake.common import is_local_file from snakemake.sourcecache import SourceFile, infer_source_file from snakemake.deployment.conda import ( is_conda_env_file, CondaEnvFileSpec, CondaEnvNameSpec, ) conda_env = self._conda_env if callable(conda_env): conda_env, _ = self.apply_input_function(conda_env, wildcards=wildcards, params=params, input=input) if conda_env is None: return None if is_conda_env_file(conda_env): if not isinstance(conda_env, SourceFile): if is_local_file(conda_env) and not os.path.isabs(conda_env): # Conda env file paths are considered to be relative to the directory of the Snakefile # hence we adjust the path accordingly. # This is not necessary in case of receiving a SourceFile. conda_env = self.basedir.join(conda_env) else: # infer source file from unmodified uri or path conda_env = infer_source_file(conda_env) conda_env = CondaEnvFileSpec(conda_env, rule=self) else: conda_env = CondaEnvNameSpec(conda_env) conda_env = conda_env.apply_wildcards(wildcards, self) conda_env.check() return conda_env
def validate(data, schema, set_default=True): """Validate data with JSON schema at given path. Args: data (object): data to validate. Can be a config dict or a pandas data frame. schema (str): Path to JSON schema used for validation. The schema can also be in YAML format. If validating a pandas data frame, the schema has to describe a row record (i.e., a dict with column names as keys pointing to row values). See https://json-schema.org. The path is interpreted relative to the Snakefile when this function is called. set_default (bool): set default values defined in schema. See https://python-jsonschema.readthedocs.io/en/latest/faq/ for more information """ frame = inspect.currentframe().f_back workflow = frame.f_globals.get("workflow") if workflow and workflow.modifier.skip_validation: # skip if a corresponding modifier has been defined return try: import jsonschema from jsonschema import validators, RefResolver except ImportError: raise WorkflowError( "The Python 3 package jsonschema must be installed " "in order to use the validate directive.") schemafile = infer_source_file(schema) if isinstance(schemafile, LocalSourceFile) and not schemafile.isabs() and workflow: # if workflow object is not available this has not been started from a workflow schemafile = workflow.current_basedir.join(schemafile) source = (workflow.sourcecache.open(schemafile) if workflow else schemafile.get_path_or_uri()) schema = _load_configfile(source, filetype="Schema") if isinstance(schemafile, LocalSourceFile): resolver = RefResolver( urljoin("file:", schemafile.get_path_or_uri()), schema, handlers={ "file": lambda uri: _load_configfile(re.sub("^file://", "", uri)) }, ) else: resolver = RefResolver( schemafile.get_path_or_uri(), schema, ) # Taken from https://python-jsonschema.readthedocs.io/en/latest/faq/ def extend_with_default(validator_class): validate_properties = validator_class.VALIDATORS["properties"] def set_defaults(validator, properties, instance, schema): for property, subschema in properties.items(): if "default" in subschema: instance.setdefault(property, subschema["default"]) for error in validate_properties(validator, properties, instance, schema): yield error return validators.extend(validator_class, {"properties": set_defaults}) Validator = validators.validator_for(schema) if Validator.META_SCHEMA["$schema"] != schema["$schema"]: logger.warning( "No validator found for JSON Schema version identifier '{}'". format(schema["$schema"])) logger.warning( "Defaulting to validator for JSON Schema version '{}'".format( Validator.META_SCHEMA["$schema"])) logger.warning("Note that schema file may not be validated correctly.") DefaultValidator = extend_with_default(Validator) if not isinstance(data, dict): try: import pandas as pd recordlist = [] if isinstance(data, pd.DataFrame): for i, record in enumerate(data.to_dict("records")): record = { k: v for k, v in record.items() if not pd.isnull(v) } try: if set_default: DefaultValidator( schema, resolver=resolver).validate(record) recordlist.append(record) else: jsonschema.validate(record, schema, resolver=resolver) except jsonschema.exceptions.ValidationError as e: raise WorkflowError( "Error validating row {} of data frame.".format(i), e) if set_default: newdata = pd.DataFrame(recordlist, data.index) newcol = ~newdata.columns.isin(data.columns) n = len(data.columns) for col in newdata.loc[:, newcol].columns: data.insert(n, col, newdata.loc[:, col]) n = n + 1 return except ImportError: pass raise WorkflowError("Unsupported data type for validation.") else: try: if set_default: DefaultValidator(schema, resolver=resolver).validate(data) else: jsonschema.validate(data, schema, resolver=resolver) except jsonschema.exceptions.ValidationError as e: raise WorkflowError("Error validating config file.", e)
def post_deploy_file(self): if self.file: deploy_file = Path( self.file.get_path_or_uri()).with_suffix(".post-deploy.sh") if deploy_file.exists(): return infer_source_file(deploy_file)
def notebook( path, basedir, input, output, params, wildcards, threads, resources, log, config, rulename, conda_env, conda_base_path, container_img, singularity_args, env_modules, bench_record, jobid, bench_iteration, cleanup_scripts, shadow_dir, edit, runtime_sourcecache_path, ): """ Load a script from the given basedir + path and execute it. """ draft = False if edit is not None: if is_local_file(path): if not os.path.isabs(path): local_path = os.path.join(basedir, path) else: local_path = path if not os.path.exists(local_path): # draft the notebook, it does not exist yet language = None draft = True path = "file://{}".format(os.path.abspath(local_path)) if path.endswith(".py.ipynb"): language = "jupyter_python" elif path.endswith(".r.ipynb"): language = "jupyter_r" else: raise WorkflowError( "Notebook to edit has to end on .py.ipynb or .r.ipynb in order " "to decide which programming language shall be used.") else: raise WorkflowError( "Notebook {} is not local, but edit mode is only allowed for " "local notebooks.".format(path)) if not draft: path, source, language, is_local = get_source( path, SourceCache(runtime_sourcecache_path), basedir, wildcards, params) else: source = None is_local = True path = infer_source_file(path) exec_class = get_exec_class(language) executor = exec_class( path, source, basedir, input, output, params, wildcards, threads, resources, log, config, rulename, conda_env, conda_base_path, container_img, singularity_args, env_modules, bench_record, jobid, bench_iteration, cleanup_scripts, shadow_dir, is_local, ) if edit is None: executor.evaluate(edit=edit) elif edit.draft_only: executor.draft() msg = "Generated skeleton notebook:\n{} ".format(path) if conda_env and not container_img: msg += ( "\n\nEditing with VSCode:\nOpen notebook, run command 'Select notebook kernel' (Ctrl+Shift+P or Cmd+Shift+P), and choose:" "\n{}\n".format( str( Path(conda_env) / "bin" / executor.get_interpreter_exec()))) msg += ("\nEditing with Jupyter CLI:" "\nconda activate {}\njupyter notebook {}\n".format( conda_env, path)) logger.info(msg) elif draft: executor.draft_and_edit(listen=edit) else: executor.evaluate(edit=edit)