def test_validate(data_url, schema, validates):
    data = requests.get(data_url).json()
    with requests_mock.Mocker() as m:
        m.get(ref_release_url, json=release_schema)
        m.get(ref_versioned_release_url, json=versioned_release_schema)
        #for error in validator(schema).iter_errors(data):
        #    print(error)
        assert validator(schema).is_valid(data) == validates
Esempio n. 2
0
def get_schema_validation_errors(json_data, schema_url, current_app):
    schema = requests.get(schema_url).json()
    validation_errors = collections.defaultdict(list)
    format_checker = FormatChecker()
    if current_app == 'cove-360':
        format_checker.checkers['date-time'] = (datetime_or_date, ValueError)
    for n, e in enumerate(validator(schema, format_checker=format_checker).iter_errors(json_data)):
        validation_errors[e.message].append("/".join(str(item) for item in e.path))
    return dict(validation_errors)
def test_valid(filename, schema):
    errors = 0

    with open(filename) as f:
        data = json.load(f)

    for error in validator(schema, format_checker=FormatChecker()).iter_errors(data):
        errors += 1
        warnings.warn(json.dumps(error.instance, indent=2))
        warnings.warn('{} ({})\n'.format(error.message, '/'.join(error.absolute_schema_path)))

    assert errors == 0, '{} is invalid. See warnings below.'.format(filename)
def test_valid(filename, schema):
    errors = 0

    with open(filename) as f:
        data = json.load(f)

    for error in validator(schema, format_checker=FormatChecker()).iter_errors(data):
        errors += 1
        warnings.warn(json.dumps(error.instance, indent=2, separators=(',', ': ')))
        warnings.warn('{} ({})\n'.format(error.message, '/'.join(error.absolute_schema_path)))

    assert errors == 0, '{} is invalid. See warnings below.'.format(filename)
Esempio n. 5
0
def test_valid(filename, schema):
    errors = 0

    with open(filename) as f:
        data = json.load(f)
    if filename.endswith('-versioned.json') or filename.endswith('-compiled.json'):
        data = [data]

    for datum in data:
        for error in validator(schema, format_checker=FormatChecker()).iter_errors(datum):
            errors += 1
            warnings.warn(json.dumps(error.instance, indent=2))
            warnings.warn(f"{error.message} ({'/'.join(error.absolute_schema_path)})\n")

    assert errors == 0, f'{filename} is invalid. See warnings below.'
Esempio n. 6
0
    def handle(self):
        components = urlparse(self.args.schema)
        if components.scheme == 'file':
            with open(self.args.schema[7:]) as f:
                schema = json_load(f)
        else:
            schema = requests.get(self.args.schema).json()

        format_checker = FormatChecker()
        if self.args.check_urls:

            def check_url(instance):
                # See https://github.com/Julian/jsonschema/blob/master/jsonschema/_format.py
                if not isinstance(instance, str_types):
                    return True
                rfc3987.parse(instance, rule='URI')  # raises ValueError
                try:
                    response = requests.get(instance,
                                            timeout=self.args.timeout)
                    result = response.status_code in (200, )
                    if not result:
                        print('HTTP {} on GET {}'.format(
                            response.status_code, instance))
                    return result
                except requests.exceptions.Timeout:
                    print('Timedout on GET {}'.format(instance))
                    return False

            format_checker.checks('uri', raises=(ValueError))(check_url)

        for i, line in enumerate(self.buffer()):
            try:
                data = json_loads(line)
                errors = False
                for error in validator(
                        schema,
                        format_checker=format_checker).iter_errors(data):
                    print('item {}: {} ({})'.format(
                        i, error.message,
                        '/'.join(error.absolute_schema_path)))
                    errors = True
                if not errors and self.args.verbose:
                    print('item {}: no errors'.format(i))
            except json.decoder.JSONDecodeError as e:
                raise CommandError('item {}: JSON error: {}'.format(i, e))
Esempio n. 7
0
def test_codelist():
    """
    Ensures all codelists files are valid against codelist-schema.json.
    """

    path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..",
                        "schema", "codelist-schema.json")
    with open(path) as f:
        codelist_schema = json.load(f)

    any_errors = False

    for path, name, text, fieldnames, rows in walk_csv_data():
        codes_seen = set()
        if is_codelist(fieldnames):
            data = []
            for row_index, row in enumerate(rows, 2):
                code = row["code"]
                if code in codes_seen:
                    any_errors = True
                    warnings.warn('{}: Duplicate code "{}" on row {}'.format(
                        path, code, row_index))
                codes_seen.add(code)

                item = {}
                for k, v in row.items():
                    if k == "code" or v:
                        item[k] = v
                    else:
                        item[k] = None
                data.append(item)

            schema = codelist_schema
            for error in validator(
                    schema, format_checker=FormatChecker()).iter_errors(data):
                any_errors = True
                warnings.warn("{}: {} ({})\n".format(
                    path, error.message, "/".join(error.absolute_schema_path)))

    assert not any_errors
def test_registry():
    configuration = {
        # Id must be unique in extensions.csv.
        'extensions.csv': {'Id': None},
        # Version and Base URL must be unique, within the scope of a given Id, in extension_versions.csv.
        'extension_versions.csv': {'Version': 'Id', 'Base URL': 'Id'},
    }

    # Keep track of extension identifiers, to ensure consistency across files.
    identifiers = {}

    for csv_basename, uniqueness in configuration.items():
        schema_basename = '{}-schema.json'.format(os.path.splitext(csv_basename)[0])

        with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', 'schema', schema_basename)) as f:
            schema = json.load(f)

        # Count the occurrences of a key-value pair, within a given scope.
        seen = {}
        for key, scope in uniqueness.items():
            if scope:
                seen[scope] = defaultdict(lambda: defaultdict(set))
            else:
                seen[key] = set()

        with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', csv_basename)) as f:
            reader = csv.DictReader(f)
            for row in reader:
                id = row['Id']

                for key in reader.fieldnames:
                    if not row[key]:
                        del row[key]

                for error in validator(schema, format_checker=FormatChecker()).iter_errors(row):
                    raise Exception('{}: {} ({})\n'.format(id, error.message, '/'.join(error.absolute_schema_path)))

                # Validate that URLs resolve.
                if row.get('Base URL'):
                    response = requests.get(row['Base URL'] + 'extension.json')
                    response.raise_for_status()
                if row.get('Download URL'):
                    response = requests.get(row['Download URL'])
                    response.raise_for_status()

                # Validate the uniqueness of a key-value pair, within a given scope.
                for key, scope in uniqueness.items():
                    value = row[key]
                    if scope:
                        if value in seen[scope][row[scope]][key]:
                            raise Exception('{}: Duplicate {} "{}" in scope of {} "{}" on line {}'.format(
                                csv_basename, key, value, scope, row[scope], reader.line_num))
                        seen[scope][row[scope]][key].add(value)
                    else:
                        if value in seen[key]:
                            raise Exception('{}: Duplicate {} "{}" on line {}'.format(
                                csv_basename, key, value, reader.line_num))
                        seen[key].add(value)

                if csv_basename == 'extensions.csv':
                    identifiers[id] = 0
                # Ensure every version belongs to a known extension.
                elif id in identifiers:
                    identifiers[id] += 1
                else:
                    raise Exception('extension_versions.csv: Id "{}" not in extensions.csv'.format(id))

    # Ensure every extension has at least one version.
    for id, count in identifiers.items():
        if not count:
            raise Exception('extensions.csv: Id "{}" not in extension_versions.csv'.format(id))
def validate_json_schema(path,
                         data,
                         schema,
                         full_schema=not is_extension,
                         top=cwd):
    """
    Prints and asserts errors in a JSON Schema.
    """
    errors = 0

    # Non-OCDS schema don't:
    # * pair "enum" and "codelist"
    # * disallow "null" in "type" of "items"
    # * UpperCamelCase definitions and lowerCamelCase properties
    # * allow "null" in the "type" of optional fields
    # * include "id" fields in objects within arrays
    # * require "title", "description" and "type" properties
    json_schema_exceptions = {
        'json-schema-draft-4.json',
        'meta-schema.json',
        'meta-schema-patch.json',
    }
    ocds_schema_exceptions = {
        'codelist-schema.json',
        'extension-schema.json',
        'extensions-schema.json',
        'extension_versions-schema.json',
        'dereferenced-release-schema.json',
    }
    exceptions = json_schema_exceptions | ocds_schema_exceptions
    allow_null = repo_name != 'infrastructure'

    for error in validator(schema,
                           format_checker=FormatChecker()).iter_errors(data):
        errors += 1
        warnings.warn(
            json.dumps(error.instance, indent=2, separators=(',', ': ')))
        warnings.warn('ERROR: {} ({})\n'.format(
            error.message, '/'.join(error.absolute_schema_path)))

    if errors:
        warnings.warn('ERROR: {} is not valid JSON Schema ({} errors)'.format(
            path, errors))

    if all(basename not in path for basename in exceptions):
        kwargs = {}
        if 'versioned-release-validation-schema.json' in path:
            kwargs['additional_valid_types'] = ['object']
        errors += validate_items_type(path, data, **kwargs)
        errors += validate_codelist_enum(path, data)
        errors += validate_letter_case(path, data)
        errors += validate_merge_properties(path, data)

    # `full_schema` is set to not expect extensions to repeat information from core.
    if full_schema:
        exceptions_plus_versioned = exceptions | {
            'versioned-release-validation-schema.json',
        }

        exceptions_plus_versioned_and_packages = exceptions_plus_versioned | {
            'record-package-schema.json',
            'release-package-schema.json',
        }

        # Extensions aren't expected to repeat referenced `definitions`.
        errors += validate_ref(path, data)

        if all(basename not in path for basename in exceptions_plus_versioned):
            # Extensions aren't expected to repeat `title`, `description`, `type`.
            errors += validate_title_description_type(path, data)
            # Extensions aren't expected to repeat referenced `definitions`.
            errors += validate_object_id(path, JsonRef.replace_refs(data))

        if all(basename not in path
               for basename in exceptions_plus_versioned_and_packages):
            # Extensions aren't expected to repeat `required`. Packages don't have merge rules.
            errors += validate_null_type(path, data, allow_null=allow_null)

            # Extensions aren't expected to repeat referenced codelist CSV files.
            # TODO: This code assumes each schema uses all codelists. So, for now, skip package schema.
            codelist_files = set()
            for csvpath, reader in walk_csv_data(top):
                parts = csvpath.replace(top, '').split(
                    os.sep
                )  # maybe inelegant way to isolate consolidated extension
                if is_codelist(reader) and (
                        # Take all codelists in extensions.
                    (is_extension and not is_profile) or
                        # Take non-extension codelists in core, and non-core codelists in profiles.
                        not any(c in parts
                                for c in ('extensions', 'patched'))):
                    name = os.path.basename(csvpath)
                    if name.startswith(('+', '-')):
                        if name[1:] not in external_codelists:
                            errors += 1
                            warnings.warn(
                                'ERROR: {} {} modifies non-existent codelist'.
                                format(path, name))
                    else:
                        codelist_files.add(name)

            codelist_values = collect_codelist_values(path, data)
            if is_extension:
                all_codelist_files = codelist_files | external_codelists
            else:
                all_codelist_files = codelist_files

            unused_codelists = [
                codelist for codelist in codelist_files
                if codelist not in codelist_values
            ]
            missing_codelists = [
                codelist for codelist in codelist_values
                if codelist not in all_codelist_files
            ]

            if unused_codelists:
                errors += 1
                warnings.warn('ERROR: {} has unused codelists: {}'.format(
                    path, ', '.join(unused_codelists)))
            if missing_codelists:
                errors += 1
                warnings.warn(
                    'ERROR: repository is missing codelists: {}'.format(
                        ', '.join(missing_codelists)))
    else:
        errors += validate_deep_properties(path, data)

    assert errors == 0, 'One or more JSON Schema files are invalid. See warnings below.'
Esempio n. 10
0
from jsonschema.validators import Draft4Validator as validator


def validate_against_schema(raw_data="", schema_name="release-package-schema"):
    status = 'input-valid'
    error = None

    try:
        data = json.loads(raw_data)
    except (TypeError, ValueError), e:
        status = 'input-error'
        error = e
        return status, error, None

    schema = json.load(
        open(local(__file__).dirname +
             '/schemas/{schema_name}.json'.format(schema_name=schema_name))
    )

    error_list = []
    for n, e in enumerate(validator(schema).iter_errors(data)):
        error_list.append(e)
        if n >= 100:
            break

    if error_list:
        status = 'validation-error'
        return status, error_list, schema

    return status, error, schema
Esempio n. 11
0
def get_schema_validation_errors(json_data, schema_obj, schema_name, cell_src_map, heading_src_map, extra_checkers=None):
    if schema_name == 'record-package-schema.json':
        pkg_schema_obj = schema_obj.get_record_pkg_schema_obj()
    else:
        pkg_schema_obj = schema_obj.get_release_pkg_schema_obj()

    validation_errors = collections.defaultdict(list)
    format_checker = FormatChecker()
    if extra_checkers:
        format_checker.checkers.update(extra_checkers)

    if getattr(schema_obj, 'extended', None):
        resolver = CustomRefResolver('', pkg_schema_obj, schema_file=schema_obj.extended_schema_file)
    else:
        resolver = CustomRefResolver('', pkg_schema_obj, schema_url=schema_obj.schema_host)

    our_validator = validator(pkg_schema_obj, format_checker=format_checker, resolver=resolver)
    for n, e in enumerate(our_validator.iter_errors(json_data)):
        message = e.message
        path = "/".join(str(item) for item in e.path)
        path_no_number = "/".join(str(item) for item in e.path if not isinstance(item, int))

        validator_type = e.validator
        if e.validator in ('format', 'type'):
            validator_type = e.validator_value
            if isinstance(e.validator_value, list):
                validator_type = e.validator_value[0]

            new_message = validation_error_lookup.get(validator_type)
            if new_message:
                message = new_message

        value = {"path": path}
        cell_reference = cell_src_map.get(path)

        if cell_reference:
            first_reference = cell_reference[0]
            if len(first_reference) == 4:
                value["sheet"], value["col_alpha"], value["row_number"], value["header"] = first_reference
            if len(first_reference) == 2:
                value["sheet"], value["row_number"] = first_reference

        if not isinstance(e.instance, (dict, list)):
            value["value"] = e.instance

        if e.validator == 'required':
            field_name = e.message
            if len(e.path) > 2:
                if isinstance(e.path[-2], int):
                    parent_name = e.path[-1]
                else:
                    parent_name = e.path[-2]

                field_name = str(parent_name) + ":" + e.message
            heading = heading_src_map.get(path_no_number + '/' + e.message)
            if heading:
                field_name = heading[0][1]
                value['header'] = heading[0][1]
            message = "'{}' is missing but required".format(field_name)
        if e.validator == 'enum':
            if "isCodelist" in e.schema:
                continue
            header = value.get('header')
            if not header:
                header = e.path[-1]
            message = "Invalid code found in '{}'".format(header)

        unique_validator_key = [validator_type, message, path_no_number]
        validation_errors[json.dumps(unique_validator_key)].append(value)
    return dict(validation_errors)
Esempio n. 12
0
def get_schema_validation_errors(json_data,
                                 schema_obj,
                                 schema_name,
                                 cell_src_map,
                                 heading_src_map,
                                 extra_checkers=None):
    if schema_name == 'record-package-schema.json':
        pkg_schema_obj = schema_obj.get_record_pkg_schema_obj()
    else:
        pkg_schema_obj = schema_obj.get_release_pkg_schema_obj()

    validation_errors = collections.defaultdict(list)
    format_checker = FormatChecker()
    if extra_checkers:
        format_checker.checkers.update(extra_checkers)

    if getattr(schema_obj, 'extended', None):
        resolver = CustomRefResolver(
            '',
            pkg_schema_obj,
            schema_url=schema_obj.schema_host,
            schema_file=schema_obj.extended_schema_file,
            file_schema_name=schema_obj.release_schema_name)
    else:
        resolver = CustomRefResolver('',
                                     pkg_schema_obj,
                                     schema_url=schema_obj.schema_host)

    our_validator = validator(pkg_schema_obj,
                              format_checker=format_checker,
                              resolver=resolver)
    for e in our_validator.iter_errors(json_data):
        message_safe = None
        message = e.message
        path = "/".join(str(item) for item in e.path)
        path_no_number = "/".join(
            str(item) for item in e.path if not isinstance(item, int))

        value = {"path": path}
        cell_reference = cell_src_map.get(path)

        if cell_reference:
            first_reference = cell_reference[0]
            if len(first_reference) == 4:
                value["sheet"], value["col_alpha"], value["row_number"], value[
                    "header"] = first_reference
            if len(first_reference) == 2:
                value["sheet"], value["row_number"] = first_reference

        header = value.get('header')
        if not header and len(e.path):
            header = e.path[-1]

        validator_type = e.validator
        if e.validator in ('format', 'type'):
            validator_type = e.validator_value
            null_clause = ''
            if isinstance(e.validator_value, list):
                validator_type = e.validator_value[0]
                if 'null' not in e.validator_value:
                    null_clause = 'is not null, and'
            else:
                null_clause = 'is not null, and'

            message_template = validation_error_template_lookup.get(
                validator_type, message)
            message_safe_template = validation_error_template_lookup_safe.get(
                validator_type)
            if message_template:
                message = message_template.format(header, null_clause)
            if message_safe_template:
                message_safe = format_html(message_safe_template, header,
                                           null_clause)

        if e.validator == 'oneOf' and e.validator_value[0] == {
                'format': 'date-time'
        }:
            # Give a nice date related error message for 360Giving date `oneOf`s.
            message = validation_error_template_lookup['date-time']
            message_safe = format_html(
                validation_error_template_lookup_safe['date-time'])
            validator_type = 'date-time'

        if not isinstance(e.instance, (dict, list)):
            value["value"] = e.instance

        if e.validator == 'required':
            field_name = e.message
            parent_name = None
            if len(e.path) > 2:
                if isinstance(e.path[-1], int):
                    parent_name = e.path[-2]
                else:
                    parent_name = e.path[-1]

            heading = heading_src_map.get(path_no_number + '/' + e.message)
            if heading:
                field_name = heading[0][1]
                value['header'] = heading[0][1]
            if parent_name:
                message = "'{}' is missing but required within '{}'".format(
                    field_name, parent_name)
                message_safe = format_html(
                    "<code>{}</code> is missing but required within <code>{}</code>",
                    field_name, parent_name)
            else:
                message = "'{}' is missing but required".format(field_name)
                message_safe = format_html(
                    "<code>{}</code> is missing but required", field_name,
                    parent_name)

        if e.validator == 'enum':
            if "isCodelist" in e.schema:
                continue
            message = "Invalid code found in '{}'".format(header)
            message_safe = format_html("Invalid code found in <code>{}</code>",
                                       header)

        if e.validator == 'pattern':
            message_safe = format_html(
                '<code>{}</code> does not match the regex <code>{}</code>',
                header, e.validator_value)

        if e.validator == 'minItems' and e.validator_value == 1:
            message_safe = format_html(
                '<code>{}</code> is too short. You must supply at least one value, or remove the item entirely (unless it’s required).',
                e.instance)

        if e.validator == 'minLength' and e.validator_value == 1:
            message_safe = format_html(
                '<code>"{}"</code> is too short. Strings must be at least one character. This error typically indicates a missing value.',
                e.instance)

        if message_safe is None:
            message_safe = escape(message)

        unique_validator_key = {
            'message_type': validator_type,
            'message': message,
            'message_safe': conditional_escape(message_safe),
            'path_no_number': path_no_number
        }
        validation_errors[json.dumps(unique_validator_key,
                                     sort_keys=True)].append(value)
    return dict(validation_errors)
Esempio n. 13
0
import glob
import json
import os

from jsonschema import FormatChecker
from jsonschema.validators import Draft4Validator as validator

current_path = os.path.dirname(os.path.realpath(__file__))

gathered_json = {
    "last_updated": str(datetime.datetime.utcnow()),
    "extensions": []
}

with open('entry-schema.json') as fp:
    entry_validator = validator(json.load(fp), format_checker=FormatChecker())

exit_status = 0

for directory in glob.glob(current_path + "/*"):
    if os.path.isdir(directory):
        entry_json_file = os.path.join(directory, "entry.json")

        with open(entry_json_file) as fp:
            entry_obj = json.load(fp)
            if entry_validator.is_valid(entry_obj):

                # This loop is temporary only so docs work while transistioning onto new format
                for item in entry_obj:
                    item["documentation_url"] = item["documentationUrl"]["en"]
                    item["url"] = item["url"][:-14]
def test_codelist():
    """
    Ensures all codelists files are valid against codelist-schema.json.
    """
    exceptions = {
        'currency.csv': "'Description' is a required property",
        'language.csv': "'Description' is a required property",
        'mediaType.csv': "'Description' is a required property",
        # ocds_countryCode_extension
        'country.csv': "'Description' is a required property",
        # ocds_coveredBy_extension
        'coveredBy.csv': "'Description' is a required property",
        # ocds_medicine_extension
        'administrationRoute.csv': "'Description' is a required property",
        'container.csv': "None is not of type 'string'",
        'dosageForm.csv': "None is not of type 'string'",
    }

    array_columns = ('Framework', 'Section')

    path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..',
                        'schema', 'codelist-schema.json')
    if os.path.isfile(path):
        with open(path) as f:
            codelist_schema = json.load(f)
    else:
        url = 'https://raw.githubusercontent.com/open-contracting/standard-maintenance-scripts/main/schema/codelist-schema.json'  # noqa: E501
        codelist_schema = requests.get(url).json()

    minus_schema = {
        "$schema": "http://json-schema.org/draft-04/schema#",
        "type": "array",
        "items": {
            "type": "object",
            "required": ["Code"],
            "additionalProperties": False,
            "properties": {
                "Code": {
                    "title": "Code",
                    "description": "The value to use in OCDS data.",
                    "type": "string",
                    "pattern": "^[A-Za-z0-9-]*$"
                }
            }
        }
    }

    any_errors = False

    for path, name, text, fieldnames, rows in walk_csv_data():
        codes_seen = set()
        if is_codelist(fieldnames):
            data = []
            for row_index, row in enumerate(rows, 2):
                code = row['Code']
                if code in codes_seen:
                    any_errors = True
                    warnings.warn(
                        f'{path}: Duplicate code "{code}" on row {row_index}')
                codes_seen.add(code)

                item = {}
                for k, v in row.items():
                    if k in array_columns:
                        item[k] = v.split(', ')
                    elif k == 'Code' or v:
                        item[k] = v
                    else:
                        item[k] = None
                data.append(item)

            if os.path.basename(path).startswith('-'):
                schema = minus_schema
            else:
                schema = codelist_schema

            for error in validator(
                    schema, format_checker=FormatChecker()).iter_errors(data):
                if error.message != exceptions.get(os.path.basename(path)):
                    any_errors = True
                    warnings.warn(
                        f"{path}: {error.message} ({'/'.join(error.absolute_schema_path)})\n"
                    )

    assert not any_errors