def get_source(path, basedir=".", wildcards=None, params=None): source = None if not path.startswith("http") and not path.startswith("git+file"): if path.startswith("file://"): path = path[7:] elif path.startswith("file:"): path = path[5:] if not os.path.isabs(path): path = smart_join(basedir, path, abspath=True) if is_local_file(path): path = "file://" + path if wildcards is not None and params is not None: # Format path if wildcards are given. path = format(path, wildcards=wildcards, params=params) if path.startswith("file://"): sourceurl = "file:" + pathname2url(path[7:]) elif path.startswith("git+file"): source = git_content(path).encode() (root_path, file_path, version) = split_git_path(path) path = path.rstrip("@" + version) else: sourceurl = path if source is None: with urlopen(sourceurl) as source: source = source.read() language = get_language(path, source) return path, source, language
def join(self, path): if isinstance(path, SourceFile): path = path.get_path_or_uri() return self.__class__(smart_join(self.get_path_or_uri(), path))
def local_path(self): path = self.path[7:] if not os.path.isabs(path): return smart_join(self.basedir, path) return path
def validate(data, schema, set_default=True): """Validate data with JSON schema at given path. Args: data (object): data to validate. Can be a config dict or a pandas data frame. schema (str): Path to JSON schema used for validation. The schema can also be in YAML format. If validating a pandas data frame, the schema has to describe a row record (i.e., a dict with column names as keys pointing to row values). See https://json-schema.org. The path is interpreted relative to the Snakefile when this function is called. set_default (bool): set default values defined in schema. See https://python-jsonschema.readthedocs.io/en/latest/faq/ for more information """ frame = inspect.currentframe().f_back workflow = frame.f_globals.get("workflow") if workflow and workflow.modifier.skip_validation: # skip if a corresponding modifier has been defined return try: import jsonschema from jsonschema import validators, RefResolver except ImportError: raise WorkflowError( "The Python 3 package jsonschema must be installed " "in order to use the validate directive.") schemafile = schema if not os.path.isabs(schemafile): frame = inspect.currentframe().f_back # if workflow object is not available this has not been started from a workflow if workflow: schemafile = smart_join(workflow.current_basedir, schemafile) source = workflow.sourcecache.open(schemafile) if workflow else schemafile schema = _load_configfile(source, filetype="Schema") if is_local_file(schemafile): resolver = RefResolver( urljoin("file:", schemafile), schema, handlers={ "file": lambda uri: _load_configfile(re.sub("^file://", "", uri)) }, ) else: resolver = RefResolver( schemafile, schema, ) # Taken from https://python-jsonschema.readthedocs.io/en/latest/faq/ def extend_with_default(validator_class): validate_properties = validator_class.VALIDATORS["properties"] def set_defaults(validator, properties, instance, schema): for property, subschema in properties.items(): if "default" in subschema: instance.setdefault(property, subschema["default"]) for error in validate_properties(validator, properties, instance, schema): yield error return validators.extend(validator_class, {"properties": set_defaults}) Validator = validators.validator_for(schema) if Validator.META_SCHEMA["$schema"] != schema["$schema"]: logger.warning( "No validator found for JSON Schema version identifier '{}'". format(schema["$schema"])) logger.warning( "Defaulting to validator for JSON Schema version '{}'".format( Validator.META_SCHEMA["$schema"])) logger.warning("Note that schema file may not be validated correctly.") DefaultValidator = extend_with_default(Validator) if not isinstance(data, dict): try: import pandas as pd recordlist = [] if isinstance(data, pd.DataFrame): for i, record in enumerate(data.to_dict("records")): record = { k: v for k, v in record.items() if not pd.isnull(v) } try: if set_default: DefaultValidator( schema, resolver=resolver).validate(record) recordlist.append(record) else: jsonschema.validate(record, schema, resolver=resolver) except jsonschema.exceptions.ValidationError as e: raise WorkflowError( "Error validating row {} of data frame.".format(i), e) if set_default: newdata = pd.DataFrame(recordlist, data.index) newcol = ~newdata.columns.isin(data.columns) n = len(data.columns) for col in newdata.loc[:, newcol].columns: data.insert(n, col, newdata.loc[:, col]) n = n + 1 return except ImportError: pass raise WorkflowError("Unsupported data type for validation.") else: try: if set_default: DefaultValidator(schema, resolver=resolver).validate(data) else: jsonschema.validate(data, schema, resolver=resolver) except jsonschema.exceptions.ValidationError as e: raise WorkflowError("Error validating config file.", e)