def test_validate(data_url, schema, validates): data = requests.get(data_url).json() with requests_mock.Mocker() as m: m.get(ref_release_url, json=release_schema) m.get(ref_versioned_release_url, json=versioned_release_schema) #for error in validator(schema).iter_errors(data): # print(error) assert validator(schema).is_valid(data) == validates
def get_schema_validation_errors(json_data, schema_url, current_app): schema = requests.get(schema_url).json() validation_errors = collections.defaultdict(list) format_checker = FormatChecker() if current_app == 'cove-360': format_checker.checkers['date-time'] = (datetime_or_date, ValueError) for n, e in enumerate(validator(schema, format_checker=format_checker).iter_errors(json_data)): validation_errors[e.message].append("/".join(str(item) for item in e.path)) return dict(validation_errors)
def test_valid(filename, schema): errors = 0 with open(filename) as f: data = json.load(f) for error in validator(schema, format_checker=FormatChecker()).iter_errors(data): errors += 1 warnings.warn(json.dumps(error.instance, indent=2)) warnings.warn('{} ({})\n'.format(error.message, '/'.join(error.absolute_schema_path))) assert errors == 0, '{} is invalid. See warnings below.'.format(filename)
def test_valid(filename, schema): errors = 0 with open(filename) as f: data = json.load(f) for error in validator(schema, format_checker=FormatChecker()).iter_errors(data): errors += 1 warnings.warn(json.dumps(error.instance, indent=2, separators=(',', ': '))) warnings.warn('{} ({})\n'.format(error.message, '/'.join(error.absolute_schema_path))) assert errors == 0, '{} is invalid. See warnings below.'.format(filename)
def test_valid(filename, schema): errors = 0 with open(filename) as f: data = json.load(f) if filename.endswith('-versioned.json') or filename.endswith('-compiled.json'): data = [data] for datum in data: for error in validator(schema, format_checker=FormatChecker()).iter_errors(datum): errors += 1 warnings.warn(json.dumps(error.instance, indent=2)) warnings.warn(f"{error.message} ({'/'.join(error.absolute_schema_path)})\n") assert errors == 0, f'{filename} is invalid. See warnings below.'
def handle(self): components = urlparse(self.args.schema) if components.scheme == 'file': with open(self.args.schema[7:]) as f: schema = json_load(f) else: schema = requests.get(self.args.schema).json() format_checker = FormatChecker() if self.args.check_urls: def check_url(instance): # See https://github.com/Julian/jsonschema/blob/master/jsonschema/_format.py if not isinstance(instance, str_types): return True rfc3987.parse(instance, rule='URI') # raises ValueError try: response = requests.get(instance, timeout=self.args.timeout) result = response.status_code in (200, ) if not result: print('HTTP {} on GET {}'.format( response.status_code, instance)) return result except requests.exceptions.Timeout: print('Timedout on GET {}'.format(instance)) return False format_checker.checks('uri', raises=(ValueError))(check_url) for i, line in enumerate(self.buffer()): try: data = json_loads(line) errors = False for error in validator( schema, format_checker=format_checker).iter_errors(data): print('item {}: {} ({})'.format( i, error.message, '/'.join(error.absolute_schema_path))) errors = True if not errors and self.args.verbose: print('item {}: no errors'.format(i)) except json.decoder.JSONDecodeError as e: raise CommandError('item {}: JSON error: {}'.format(i, e))
def test_codelist(): """ Ensures all codelists files are valid against codelist-schema.json. """ path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", "schema", "codelist-schema.json") with open(path) as f: codelist_schema = json.load(f) any_errors = False for path, name, text, fieldnames, rows in walk_csv_data(): codes_seen = set() if is_codelist(fieldnames): data = [] for row_index, row in enumerate(rows, 2): code = row["code"] if code in codes_seen: any_errors = True warnings.warn('{}: Duplicate code "{}" on row {}'.format( path, code, row_index)) codes_seen.add(code) item = {} for k, v in row.items(): if k == "code" or v: item[k] = v else: item[k] = None data.append(item) schema = codelist_schema for error in validator( schema, format_checker=FormatChecker()).iter_errors(data): any_errors = True warnings.warn("{}: {} ({})\n".format( path, error.message, "/".join(error.absolute_schema_path))) assert not any_errors
def test_registry(): configuration = { # Id must be unique in extensions.csv. 'extensions.csv': {'Id': None}, # Version and Base URL must be unique, within the scope of a given Id, in extension_versions.csv. 'extension_versions.csv': {'Version': 'Id', 'Base URL': 'Id'}, } # Keep track of extension identifiers, to ensure consistency across files. identifiers = {} for csv_basename, uniqueness in configuration.items(): schema_basename = '{}-schema.json'.format(os.path.splitext(csv_basename)[0]) with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', 'schema', schema_basename)) as f: schema = json.load(f) # Count the occurrences of a key-value pair, within a given scope. seen = {} for key, scope in uniqueness.items(): if scope: seen[scope] = defaultdict(lambda: defaultdict(set)) else: seen[key] = set() with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', csv_basename)) as f: reader = csv.DictReader(f) for row in reader: id = row['Id'] for key in reader.fieldnames: if not row[key]: del row[key] for error in validator(schema, format_checker=FormatChecker()).iter_errors(row): raise Exception('{}: {} ({})\n'.format(id, error.message, '/'.join(error.absolute_schema_path))) # Validate that URLs resolve. if row.get('Base URL'): response = requests.get(row['Base URL'] + 'extension.json') response.raise_for_status() if row.get('Download URL'): response = requests.get(row['Download URL']) response.raise_for_status() # Validate the uniqueness of a key-value pair, within a given scope. for key, scope in uniqueness.items(): value = row[key] if scope: if value in seen[scope][row[scope]][key]: raise Exception('{}: Duplicate {} "{}" in scope of {} "{}" on line {}'.format( csv_basename, key, value, scope, row[scope], reader.line_num)) seen[scope][row[scope]][key].add(value) else: if value in seen[key]: raise Exception('{}: Duplicate {} "{}" on line {}'.format( csv_basename, key, value, reader.line_num)) seen[key].add(value) if csv_basename == 'extensions.csv': identifiers[id] = 0 # Ensure every version belongs to a known extension. elif id in identifiers: identifiers[id] += 1 else: raise Exception('extension_versions.csv: Id "{}" not in extensions.csv'.format(id)) # Ensure every extension has at least one version. for id, count in identifiers.items(): if not count: raise Exception('extensions.csv: Id "{}" not in extension_versions.csv'.format(id))
def validate_json_schema(path, data, schema, full_schema=not is_extension, top=cwd): """ Prints and asserts errors in a JSON Schema. """ errors = 0 # Non-OCDS schema don't: # * pair "enum" and "codelist" # * disallow "null" in "type" of "items" # * UpperCamelCase definitions and lowerCamelCase properties # * allow "null" in the "type" of optional fields # * include "id" fields in objects within arrays # * require "title", "description" and "type" properties json_schema_exceptions = { 'json-schema-draft-4.json', 'meta-schema.json', 'meta-schema-patch.json', } ocds_schema_exceptions = { 'codelist-schema.json', 'extension-schema.json', 'extensions-schema.json', 'extension_versions-schema.json', 'dereferenced-release-schema.json', } exceptions = json_schema_exceptions | ocds_schema_exceptions allow_null = repo_name != 'infrastructure' for error in validator(schema, format_checker=FormatChecker()).iter_errors(data): errors += 1 warnings.warn( json.dumps(error.instance, indent=2, separators=(',', ': '))) warnings.warn('ERROR: {} ({})\n'.format( error.message, '/'.join(error.absolute_schema_path))) if errors: warnings.warn('ERROR: {} is not valid JSON Schema ({} errors)'.format( path, errors)) if all(basename not in path for basename in exceptions): kwargs = {} if 'versioned-release-validation-schema.json' in path: kwargs['additional_valid_types'] = ['object'] errors += validate_items_type(path, data, **kwargs) errors += validate_codelist_enum(path, data) errors += validate_letter_case(path, data) errors += validate_merge_properties(path, data) # `full_schema` is set to not expect extensions to repeat information from core. if full_schema: exceptions_plus_versioned = exceptions | { 'versioned-release-validation-schema.json', } exceptions_plus_versioned_and_packages = exceptions_plus_versioned | { 'record-package-schema.json', 'release-package-schema.json', } # Extensions aren't expected to repeat referenced `definitions`. errors += validate_ref(path, data) if all(basename not in path for basename in exceptions_plus_versioned): # Extensions aren't expected to repeat `title`, `description`, `type`. errors += validate_title_description_type(path, data) # Extensions aren't expected to repeat referenced `definitions`. errors += validate_object_id(path, JsonRef.replace_refs(data)) if all(basename not in path for basename in exceptions_plus_versioned_and_packages): # Extensions aren't expected to repeat `required`. Packages don't have merge rules. errors += validate_null_type(path, data, allow_null=allow_null) # Extensions aren't expected to repeat referenced codelist CSV files. # TODO: This code assumes each schema uses all codelists. So, for now, skip package schema. codelist_files = set() for csvpath, reader in walk_csv_data(top): parts = csvpath.replace(top, '').split( os.sep ) # maybe inelegant way to isolate consolidated extension if is_codelist(reader) and ( # Take all codelists in extensions. (is_extension and not is_profile) or # Take non-extension codelists in core, and non-core codelists in profiles. not any(c in parts for c in ('extensions', 'patched'))): name = os.path.basename(csvpath) if name.startswith(('+', '-')): if name[1:] not in external_codelists: errors += 1 warnings.warn( 'ERROR: {} {} modifies non-existent codelist'. format(path, name)) else: codelist_files.add(name) codelist_values = collect_codelist_values(path, data) if is_extension: all_codelist_files = codelist_files | external_codelists else: all_codelist_files = codelist_files unused_codelists = [ codelist for codelist in codelist_files if codelist not in codelist_values ] missing_codelists = [ codelist for codelist in codelist_values if codelist not in all_codelist_files ] if unused_codelists: errors += 1 warnings.warn('ERROR: {} has unused codelists: {}'.format( path, ', '.join(unused_codelists))) if missing_codelists: errors += 1 warnings.warn( 'ERROR: repository is missing codelists: {}'.format( ', '.join(missing_codelists))) else: errors += validate_deep_properties(path, data) assert errors == 0, 'One or more JSON Schema files are invalid. See warnings below.'
from jsonschema.validators import Draft4Validator as validator def validate_against_schema(raw_data="", schema_name="release-package-schema"): status = 'input-valid' error = None try: data = json.loads(raw_data) except (TypeError, ValueError), e: status = 'input-error' error = e return status, error, None schema = json.load( open(local(__file__).dirname + '/schemas/{schema_name}.json'.format(schema_name=schema_name)) ) error_list = [] for n, e in enumerate(validator(schema).iter_errors(data)): error_list.append(e) if n >= 100: break if error_list: status = 'validation-error' return status, error_list, schema return status, error, schema
def get_schema_validation_errors(json_data, schema_obj, schema_name, cell_src_map, heading_src_map, extra_checkers=None): if schema_name == 'record-package-schema.json': pkg_schema_obj = schema_obj.get_record_pkg_schema_obj() else: pkg_schema_obj = schema_obj.get_release_pkg_schema_obj() validation_errors = collections.defaultdict(list) format_checker = FormatChecker() if extra_checkers: format_checker.checkers.update(extra_checkers) if getattr(schema_obj, 'extended', None): resolver = CustomRefResolver('', pkg_schema_obj, schema_file=schema_obj.extended_schema_file) else: resolver = CustomRefResolver('', pkg_schema_obj, schema_url=schema_obj.schema_host) our_validator = validator(pkg_schema_obj, format_checker=format_checker, resolver=resolver) for n, e in enumerate(our_validator.iter_errors(json_data)): message = e.message path = "/".join(str(item) for item in e.path) path_no_number = "/".join(str(item) for item in e.path if not isinstance(item, int)) validator_type = e.validator if e.validator in ('format', 'type'): validator_type = e.validator_value if isinstance(e.validator_value, list): validator_type = e.validator_value[0] new_message = validation_error_lookup.get(validator_type) if new_message: message = new_message value = {"path": path} cell_reference = cell_src_map.get(path) if cell_reference: first_reference = cell_reference[0] if len(first_reference) == 4: value["sheet"], value["col_alpha"], value["row_number"], value["header"] = first_reference if len(first_reference) == 2: value["sheet"], value["row_number"] = first_reference if not isinstance(e.instance, (dict, list)): value["value"] = e.instance if e.validator == 'required': field_name = e.message if len(e.path) > 2: if isinstance(e.path[-2], int): parent_name = e.path[-1] else: parent_name = e.path[-2] field_name = str(parent_name) + ":" + e.message heading = heading_src_map.get(path_no_number + '/' + e.message) if heading: field_name = heading[0][1] value['header'] = heading[0][1] message = "'{}' is missing but required".format(field_name) if e.validator == 'enum': if "isCodelist" in e.schema: continue header = value.get('header') if not header: header = e.path[-1] message = "Invalid code found in '{}'".format(header) unique_validator_key = [validator_type, message, path_no_number] validation_errors[json.dumps(unique_validator_key)].append(value) return dict(validation_errors)
def get_schema_validation_errors(json_data, schema_obj, schema_name, cell_src_map, heading_src_map, extra_checkers=None): if schema_name == 'record-package-schema.json': pkg_schema_obj = schema_obj.get_record_pkg_schema_obj() else: pkg_schema_obj = schema_obj.get_release_pkg_schema_obj() validation_errors = collections.defaultdict(list) format_checker = FormatChecker() if extra_checkers: format_checker.checkers.update(extra_checkers) if getattr(schema_obj, 'extended', None): resolver = CustomRefResolver( '', pkg_schema_obj, schema_url=schema_obj.schema_host, schema_file=schema_obj.extended_schema_file, file_schema_name=schema_obj.release_schema_name) else: resolver = CustomRefResolver('', pkg_schema_obj, schema_url=schema_obj.schema_host) our_validator = validator(pkg_schema_obj, format_checker=format_checker, resolver=resolver) for e in our_validator.iter_errors(json_data): message_safe = None message = e.message path = "/".join(str(item) for item in e.path) path_no_number = "/".join( str(item) for item in e.path if not isinstance(item, int)) value = {"path": path} cell_reference = cell_src_map.get(path) if cell_reference: first_reference = cell_reference[0] if len(first_reference) == 4: value["sheet"], value["col_alpha"], value["row_number"], value[ "header"] = first_reference if len(first_reference) == 2: value["sheet"], value["row_number"] = first_reference header = value.get('header') if not header and len(e.path): header = e.path[-1] validator_type = e.validator if e.validator in ('format', 'type'): validator_type = e.validator_value null_clause = '' if isinstance(e.validator_value, list): validator_type = e.validator_value[0] if 'null' not in e.validator_value: null_clause = 'is not null, and' else: null_clause = 'is not null, and' message_template = validation_error_template_lookup.get( validator_type, message) message_safe_template = validation_error_template_lookup_safe.get( validator_type) if message_template: message = message_template.format(header, null_clause) if message_safe_template: message_safe = format_html(message_safe_template, header, null_clause) if e.validator == 'oneOf' and e.validator_value[0] == { 'format': 'date-time' }: # Give a nice date related error message for 360Giving date `oneOf`s. message = validation_error_template_lookup['date-time'] message_safe = format_html( validation_error_template_lookup_safe['date-time']) validator_type = 'date-time' if not isinstance(e.instance, (dict, list)): value["value"] = e.instance if e.validator == 'required': field_name = e.message parent_name = None if len(e.path) > 2: if isinstance(e.path[-1], int): parent_name = e.path[-2] else: parent_name = e.path[-1] heading = heading_src_map.get(path_no_number + '/' + e.message) if heading: field_name = heading[0][1] value['header'] = heading[0][1] if parent_name: message = "'{}' is missing but required within '{}'".format( field_name, parent_name) message_safe = format_html( "<code>{}</code> is missing but required within <code>{}</code>", field_name, parent_name) else: message = "'{}' is missing but required".format(field_name) message_safe = format_html( "<code>{}</code> is missing but required", field_name, parent_name) if e.validator == 'enum': if "isCodelist" in e.schema: continue message = "Invalid code found in '{}'".format(header) message_safe = format_html("Invalid code found in <code>{}</code>", header) if e.validator == 'pattern': message_safe = format_html( '<code>{}</code> does not match the regex <code>{}</code>', header, e.validator_value) if e.validator == 'minItems' and e.validator_value == 1: message_safe = format_html( '<code>{}</code> is too short. You must supply at least one value, or remove the item entirely (unless it’s required).', e.instance) if e.validator == 'minLength' and e.validator_value == 1: message_safe = format_html( '<code>"{}"</code> is too short. Strings must be at least one character. This error typically indicates a missing value.', e.instance) if message_safe is None: message_safe = escape(message) unique_validator_key = { 'message_type': validator_type, 'message': message, 'message_safe': conditional_escape(message_safe), 'path_no_number': path_no_number } validation_errors[json.dumps(unique_validator_key, sort_keys=True)].append(value) return dict(validation_errors)
import glob import json import os from jsonschema import FormatChecker from jsonschema.validators import Draft4Validator as validator current_path = os.path.dirname(os.path.realpath(__file__)) gathered_json = { "last_updated": str(datetime.datetime.utcnow()), "extensions": [] } with open('entry-schema.json') as fp: entry_validator = validator(json.load(fp), format_checker=FormatChecker()) exit_status = 0 for directory in glob.glob(current_path + "/*"): if os.path.isdir(directory): entry_json_file = os.path.join(directory, "entry.json") with open(entry_json_file) as fp: entry_obj = json.load(fp) if entry_validator.is_valid(entry_obj): # This loop is temporary only so docs work while transistioning onto new format for item in entry_obj: item["documentation_url"] = item["documentationUrl"]["en"] item["url"] = item["url"][:-14]
def test_codelist(): """ Ensures all codelists files are valid against codelist-schema.json. """ exceptions = { 'currency.csv': "'Description' is a required property", 'language.csv': "'Description' is a required property", 'mediaType.csv': "'Description' is a required property", # ocds_countryCode_extension 'country.csv': "'Description' is a required property", # ocds_coveredBy_extension 'coveredBy.csv': "'Description' is a required property", # ocds_medicine_extension 'administrationRoute.csv': "'Description' is a required property", 'container.csv': "None is not of type 'string'", 'dosageForm.csv': "None is not of type 'string'", } array_columns = ('Framework', 'Section') path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', 'schema', 'codelist-schema.json') if os.path.isfile(path): with open(path) as f: codelist_schema = json.load(f) else: url = 'https://raw.githubusercontent.com/open-contracting/standard-maintenance-scripts/main/schema/codelist-schema.json' # noqa: E501 codelist_schema = requests.get(url).json() minus_schema = { "$schema": "http://json-schema.org/draft-04/schema#", "type": "array", "items": { "type": "object", "required": ["Code"], "additionalProperties": False, "properties": { "Code": { "title": "Code", "description": "The value to use in OCDS data.", "type": "string", "pattern": "^[A-Za-z0-9-]*$" } } } } any_errors = False for path, name, text, fieldnames, rows in walk_csv_data(): codes_seen = set() if is_codelist(fieldnames): data = [] for row_index, row in enumerate(rows, 2): code = row['Code'] if code in codes_seen: any_errors = True warnings.warn( f'{path}: Duplicate code "{code}" on row {row_index}') codes_seen.add(code) item = {} for k, v in row.items(): if k in array_columns: item[k] = v.split(', ') elif k == 'Code' or v: item[k] = v else: item[k] = None data.append(item) if os.path.basename(path).startswith('-'): schema = minus_schema else: schema = codelist_schema for error in validator( schema, format_checker=FormatChecker()).iter_errors(data): if error.message != exceptions.get(os.path.basename(path)): any_errors = True warnings.warn( f"{path}: {error.message} ({'/'.join(error.absolute_schema_path)})\n" ) assert not any_errors