Esempio n. 1
0
def get_source(path, basedir=".", wildcards=None, params=None):
    source = None
    if not path.startswith("http") and not path.startswith("git+file"):
        if path.startswith("file://"):
            path = path[7:]
        elif path.startswith("file:"):
            path = path[5:]
        if not os.path.isabs(path):
            path = smart_join(basedir, path, abspath=True)
        if is_local_file(path):
            path = "file://" + path
    if wildcards is not None and params is not None:
        # Format path if wildcards are given.
        path = format(path, wildcards=wildcards, params=params)
    if path.startswith("file://"):
        sourceurl = "file:" + pathname2url(path[7:])
    elif path.startswith("git+file"):
        source = git_content(path).encode()
        (root_path, file_path, version) = split_git_path(path)
        path = path.rstrip("@" + version)
    else:
        sourceurl = path

    if source is None:
        with urlopen(sourceurl) as source:
            source = source.read()

    language = get_language(path, source)

    return path, source, language
Esempio n. 2
0
 def join(self, path):
     if isinstance(path, SourceFile):
         path = path.get_path_or_uri()
     return self.__class__(smart_join(self.get_path_or_uri(), path))
Esempio n. 3
0
 def local_path(self):
     path = self.path[7:]
     if not os.path.isabs(path):
         return smart_join(self.basedir, path)
     return path
Esempio n. 4
0
def validate(data, schema, set_default=True):
    """Validate data with JSON schema at given path.

    Args:
        data (object): data to validate. Can be a config dict or a pandas data frame.
        schema (str): Path to JSON schema used for validation. The schema can also be
            in YAML format. If validating a pandas data frame, the schema has to
            describe a row record (i.e., a dict with column names as keys pointing
            to row values). See https://json-schema.org. The path is interpreted
            relative to the Snakefile when this function is called.
        set_default (bool): set default values defined in schema. See
            https://python-jsonschema.readthedocs.io/en/latest/faq/ for more
            information
    """
    frame = inspect.currentframe().f_back
    workflow = frame.f_globals.get("workflow")

    if workflow and workflow.modifier.skip_validation:
        # skip if a corresponding modifier has been defined
        return

    try:
        import jsonschema
        from jsonschema import validators, RefResolver
    except ImportError:
        raise WorkflowError(
            "The Python 3 package jsonschema must be installed "
            "in order to use the validate directive.")

    schemafile = schema

    if not os.path.isabs(schemafile):
        frame = inspect.currentframe().f_back
        # if workflow object is not available this has not been started from a workflow
        if workflow:
            schemafile = smart_join(workflow.current_basedir, schemafile)

    source = workflow.sourcecache.open(schemafile) if workflow else schemafile
    schema = _load_configfile(source, filetype="Schema")
    if is_local_file(schemafile):
        resolver = RefResolver(
            urljoin("file:", schemafile),
            schema,
            handlers={
                "file":
                lambda uri: _load_configfile(re.sub("^file://", "", uri))
            },
        )
    else:
        resolver = RefResolver(
            schemafile,
            schema,
        )

    # Taken from https://python-jsonschema.readthedocs.io/en/latest/faq/
    def extend_with_default(validator_class):
        validate_properties = validator_class.VALIDATORS["properties"]

        def set_defaults(validator, properties, instance, schema):
            for property, subschema in properties.items():
                if "default" in subschema:
                    instance.setdefault(property, subschema["default"])

            for error in validate_properties(validator, properties, instance,
                                             schema):
                yield error

        return validators.extend(validator_class, {"properties": set_defaults})

    Validator = validators.validator_for(schema)
    if Validator.META_SCHEMA["$schema"] != schema["$schema"]:
        logger.warning(
            "No validator found for JSON Schema version identifier '{}'".
            format(schema["$schema"]))
        logger.warning(
            "Defaulting to validator for JSON Schema version '{}'".format(
                Validator.META_SCHEMA["$schema"]))
        logger.warning("Note that schema file may not be validated correctly.")
    DefaultValidator = extend_with_default(Validator)

    if not isinstance(data, dict):
        try:
            import pandas as pd

            recordlist = []
            if isinstance(data, pd.DataFrame):
                for i, record in enumerate(data.to_dict("records")):
                    record = {
                        k: v
                        for k, v in record.items() if not pd.isnull(v)
                    }
                    try:
                        if set_default:
                            DefaultValidator(
                                schema, resolver=resolver).validate(record)
                            recordlist.append(record)
                        else:
                            jsonschema.validate(record,
                                                schema,
                                                resolver=resolver)
                    except jsonschema.exceptions.ValidationError as e:
                        raise WorkflowError(
                            "Error validating row {} of data frame.".format(i),
                            e)
                if set_default:
                    newdata = pd.DataFrame(recordlist, data.index)
                    newcol = ~newdata.columns.isin(data.columns)
                    n = len(data.columns)
                    for col in newdata.loc[:, newcol].columns:
                        data.insert(n, col, newdata.loc[:, col])
                        n = n + 1
                return
        except ImportError:
            pass
        raise WorkflowError("Unsupported data type for validation.")
    else:
        try:
            if set_default:
                DefaultValidator(schema, resolver=resolver).validate(data)
            else:
                jsonschema.validate(data, schema, resolver=resolver)
        except jsonschema.exceptions.ValidationError as e:
            raise WorkflowError("Error validating config file.", e)