Esempio n. 1
0
def validate(data, schema):
    from jsonschema import RefResolver, validators, FormatChecker, Draft4Validator
    from urllib.parse import urljoin
    from snakemake.io import _load_configfile

    schemafile = schema
    schema = _load_configfile(schema, filetype="Schema")

    resolver = RefResolver(
        urljoin('file:', schemafile),
        schema,
        handlers={
            'file': lambda uri: _load_configfile(re.sub("^file://", "", uri))
        })

    format_checker = FormatChecker()

    def path_exists(validator, properties, instance, schema):
        if properties and not Path(instance).expanduser().exists():
            yield jsonschema.exceptions.ValidationError(
                "{} does not exist".format(instance))

    @format_checker.checks('file')
    def check_filepath(value):
        path = Path(value)
        return path.is_file() if path.exists() else True

    @format_checker.checks('directory')
    def check_directory(value):
        path = Path(value)
        return path.is_dir() if path.exists() else True

    all_validators = dict(Draft4Validator.VALIDATORS)
    all_validators['must_exist'] = path_exists

    Validator = validators.create(meta_schema=Draft4Validator.META_SCHEMA,
                                  validators=all_validators)

    validator = Validator(schema,
                          resolver=resolver,
                          format_checker=format_checker)

    errors = []

    if not isinstance(data, dict):
        for row in data.to_dict('records'):
            print(row)
            for ve in validator.iter_errors(row):
                key = ve.relative_path.pop() if len(
                    ve.relative_path) > 0 else None
                errors.append(MarsValidationError(ve.instance, key,
                                                  ve.message))
    else:
        for ve in validator.iter_errors(data):
            key = ve.relative_path.pop() if len(ve.relative_path) > 0 else None
            errors.append(MarsValidationError(ve.instance, key, ve.message))
    return errors
Esempio n. 2
0
def validate(data, schema):
    """Validate data with JSON schema at given path.

    Arguments
    data -- data to validate. Can be a config dict or a pandas data frame.
    schema -- Path to JSON schema used for validation. The schema can also be
        in YAML format. If validating a pandas data frame, the schema has to
        describe a row record (i.e., a dict with column names as keys pointing
        to row values). See http://json-schema.org. The path is interpreted
        relative to the Snakefile when this function is called.
    """
    try:
        import jsonschema
    except ImportError:
        raise WorkflowError(
            "The Python 3 package jsonschema must be installed "
            "in order to use the validate directive.")

    if not os.path.isabs(schema):
        frame = inspect.currentframe().f_back
        # if workflow object is not available this has not been started from a workflow
        if "workflow" in frame.f_globals:
            workflow = frame.f_globals["workflow"]
            schema = os.path.join(workflow.current_basedir, schema)

    schema = _load_configfile(schema, filetype="Schema")

    if not isinstance(data, dict):
        try:
            import pandas as pd
            if isinstance(data, pd.DataFrame):
                for i, record in enumerate(data.to_dict("records")):
                    record = {
                        k: v
                        for k, v in record.items() if not pd.isnull(v)
                    }
                    try:
                        jsonschema.validate(record, schema)
                    except jsonschema.exceptions.ValidationError as e:
                        raise WorkflowError(
                            "Error validating row {} of data frame.".format(i),
                            e)
                return
        except ImportError:
            pass
        raise WorkflowError("Unsupported data type for validation.")
    else:
        try:
            jsonschema.validate(data, schema)
        except jsonschema.exceptions.ValidationError as e:
            raise WorkflowError("Error validating config file.", e)
Esempio n. 3
0
def validate(data, schema, set_default=True):
    """Validate data with JSON schema at given path.

    Args:
        data (object): data to validate. Can be a config dict or a pandas data frame.
        schema (str): Path to JSON schema used for validation. The schema can also be
            in YAML format. If validating a pandas data frame, the schema has to
            describe a row record (i.e., a dict with column names as keys pointing
            to row values). See https://json-schema.org. The path is interpreted
            relative to the Snakefile when this function is called.
        set_default (bool): set default values defined in schema. See
            https://python-jsonschema.readthedocs.io/en/latest/faq/ for more
            information
    """
    frame = inspect.currentframe().f_back
    workflow = frame.f_globals.get("workflow")

    if workflow and workflow.modifier.skip_validation:
        # skip if a corresponding modifier has been defined
        return

    try:
        import jsonschema
        from jsonschema import validators, RefResolver
    except ImportError:
        raise WorkflowError(
            "The Python 3 package jsonschema must be installed "
            "in order to use the validate directive.")

    schemafile = infer_source_file(schema)

    if isinstance(schemafile,
                  LocalSourceFile) and not schemafile.isabs() and workflow:
        # if workflow object is not available this has not been started from a workflow
        schemafile = workflow.current_basedir.join(schemafile)

    source = (workflow.sourcecache.open(schemafile)
              if workflow else schemafile.get_path_or_uri())
    schema = _load_configfile(source, filetype="Schema")
    if isinstance(schemafile, LocalSourceFile):
        resolver = RefResolver(
            urljoin("file:", schemafile.get_path_or_uri()),
            schema,
            handlers={
                "file":
                lambda uri: _load_configfile(re.sub("^file://", "", uri))
            },
        )
    else:
        resolver = RefResolver(
            schemafile.get_path_or_uri(),
            schema,
        )

    # Taken from https://python-jsonschema.readthedocs.io/en/latest/faq/
    def extend_with_default(validator_class):
        validate_properties = validator_class.VALIDATORS["properties"]

        def set_defaults(validator, properties, instance, schema):
            for property, subschema in properties.items():
                if "default" in subschema:
                    instance.setdefault(property, subschema["default"])

            for error in validate_properties(validator, properties, instance,
                                             schema):
                yield error

        return validators.extend(validator_class, {"properties": set_defaults})

    Validator = validators.validator_for(schema)
    if Validator.META_SCHEMA["$schema"] != schema["$schema"]:
        logger.warning(
            "No validator found for JSON Schema version identifier '{}'".
            format(schema["$schema"]))
        logger.warning(
            "Defaulting to validator for JSON Schema version '{}'".format(
                Validator.META_SCHEMA["$schema"]))
        logger.warning("Note that schema file may not be validated correctly.")
    DefaultValidator = extend_with_default(Validator)

    if not isinstance(data, dict):
        try:
            import pandas as pd

            recordlist = []
            if isinstance(data, pd.DataFrame):
                for i, record in enumerate(data.to_dict("records")):
                    record = {
                        k: v
                        for k, v in record.items() if not pd.isnull(v)
                    }
                    try:
                        if set_default:
                            DefaultValidator(
                                schema, resolver=resolver).validate(record)
                            recordlist.append(record)
                        else:
                            jsonschema.validate(record,
                                                schema,
                                                resolver=resolver)
                    except jsonschema.exceptions.ValidationError as e:
                        raise WorkflowError(
                            "Error validating row {} of data frame.".format(i),
                            e)
                if set_default:
                    newdata = pd.DataFrame(recordlist, data.index)
                    newcol = ~newdata.columns.isin(data.columns)
                    n = len(data.columns)
                    for col in newdata.loc[:, newcol].columns:
                        data.insert(n, col, newdata.loc[:, col])
                        n = n + 1
                return
        except ImportError:
            pass
        raise WorkflowError("Unsupported data type for validation.")
    else:
        try:
            if set_default:
                DefaultValidator(schema, resolver=resolver).validate(data)
            else:
                jsonschema.validate(data, schema, resolver=resolver)
        except jsonschema.exceptions.ValidationError as e:
            raise WorkflowError("Error validating config file.", e)