def _parse_json(self, jsonfile): """ Parses JSON as well as resolves any `$ref`s, including references to local files and remote (HTTP/S) files. """ schema = jsonref.load_uri(jsonfile, base_uri=None, jsonschema=True) return schema
def _get_merge_rules_from_url_or_path(schema): if schema.startswith('http'): deref_schema = jsonref.load_uri(schema) else: with open(schema) as f: deref_schema = jsonref.load(f) return _get_merge_rules_from_dereferenced_schema(deref_schema)
def load(source,load_as_ref): full_uri = '{}{}'.format(base_uri,source) log.debug('trying to load rel: %s full uri: %s base %s',source,full_uri,base_uri) if not load_as_ref: return jsonref.load_uri(full_uri, base_uri = base_uri, loader = yamlloader) else: return jsonref.JsonRef.replace_refs({'$ref':source}, base_uri = base_uri, loader = yamlloader)
def process_schema(schema): schema = schema or get_latest_schema_uri() if schema.startswith('http'): deref_schema = jsonref.load_uri(schema) else: with open(schema) as f: deref_schema = jsonref.load(f) return dict(merge_rule_generate(deref_schema['properties'], tuple()))
def _get_hcert_schema(version: str): print('Loading HCERT schema ...') if version == '1.0.0': return load_uri( 'https://raw.githubusercontent.com/ehn-digital-green-development/ehn-dgc-schema/release/1.0.0/' 'DGC.combined-schema.json') elif version == '1.0.1': return load_uri( 'https://raw.githubusercontent.com/ehn-digital-green-development/ehn-dgc-schema/release/1.0.1/' 'DGC.combined-schema.json') elif version == '1.1.0': return load_uri( 'https://raw.githubusercontent.com/ehn-digital-green-development/ehn-dgc-schema/release/1.1.0/' 'DGC.combined-schema.json') elif version == '1.2.0': return load_uri( 'https://raw.githubusercontent.com/ehn-digital-green-development/ehn-dgc-schema/release/1.2.0/' 'DGC.combined-schema.json') elif version == '1.2.1': return load_uri( 'https://raw.githubusercontent.com/ehn-digital-green-development/ehn-dgc-schema/release/1.2.1/' 'DCC.combined-schema.json') elif version == '1.3.0': return load_uri( 'https://raw.githubusercontent.com/ehn-digital-green-development/ehn-dgc-schema/release/1.3.0/' 'DCC.combined-schema.json') else: # Change default schema URL to version 1.3.0 return load_uri( 'https://raw.githubusercontent.com/ehn-digital-green-development/ehn-dgc-schema/release/1.3.0/' 'DCC.combined-schema.json')
def get_jsonschema_csl_validator(): """ Return a jsonschema validator for the CSL Item JSON Schema """ import jsonref import jsonschema url = 'https://github.com/dhimmel/schema/raw/manubot/csl-data.json' # Use jsonref to workaround https://github.com/Julian/jsonschema/issues/447 schema = jsonref.load_uri(url, jsonschema=True) Validator = jsonschema.validators.validator_for(schema) Validator.check_schema(schema) return Validator(schema)
def _load_json_schema(filename): """ Loads the schema file of the given name. The filename is relative to the root schema directory. JSON and YAML formats are supported. """ check_schema_base_path() loader = LocalJsonLoader(_SCHEMA_BASE_PATH) src_uri = 'file:///{}'.format(filename) base_uri = '{}{}'.format(_SCHEMA_BASE_URI, filename) return jsonref.load_uri(src_uri, base_uri=base_uri, loader=loader, jsonschema=True, load_on_repr=False)
def handle(self): deref_schema = jsonref.load_uri(self.args.schema) metadata, engine = self.create_db(self.args.database_url, deref_schema, drop=self.args.drop) for data in self.items(): if is_record_package(data): releases = [] for record in data['records']: releases.extend(record['releases']) if 'compiledRelease' in record: releases.append(record['compiledRelease']) elif is_release_package(data) or is_record(data): releases = data['releases'] else: # release releases = [data] self.upload_file(metadata, engine, releases)
def flatten_dict(data, path=tuple()): schema = jsonref.load_uri(settings.GRANT_SCHEMA) schema_titles = dict(flatten_schema_titles(schema)) for key, value in data.items(): field = ": ".join(path + (key,)) if isinstance(value, list): string_list = [] for item in value: if isinstance(item, dict): yield from flatten_dict(item, path + (key,)) if isinstance(item, str): string_list.append(item) if string_list: yield schema_titles.get(field) or field, ", ".join(string_list) elif isinstance(value, dict): yield from flatten_dict(value, path + (key,)) else: yield schema_titles.get(field) or field, value
def flatten_dict(data, path=tuple()): schema = jsonref.load_uri(settings.GRANT_SCHEMA) schema_titles = dict(flatten_schema_titles(schema)) for key, value in data.items(): field = ": ".join(path + (key, )) if isinstance(value, list): string_list = [] for item in value: if isinstance(item, dict): yield from flatten_dict(item, path + (key, )) if isinstance(item, str): string_list.append(item) if string_list: yield schema_titles.get(field) or field, ", ".join(string_list) elif isinstance(value, dict): yield from flatten_dict(value, path + (key, )) else: yield schema_titles.get(field) or field, value
def stats(request): text_query = request.GET.get('text_query') if not text_query: text_query = '*' context = {'text_query': text_query or ''} es = get_es() mapping = es.indices.get_mapping(index=settings.ES_INDEX) all_fields = list(flatten_mapping(mapping[settings.ES_INDEX]['mappings']['grant']['properties'])) query = {"query": {"bool": {"must": {"query_string": {"query": text_query}}, "filter": {}}}, "aggs": {}} schema = jsonref.load_uri(settings.GRANT_SCHEMA) schema_fields = set(flatten_schema(schema)) for field in all_fields: query["aggs"][field + ":terms"] = {"terms": {"field": field, "size": 5}} query["aggs"][field + ":missing"] = {"missing": {"field": field}} query["aggs"][field + ":cardinality"] = {"cardinality": {"field": field}} if context['text_query'] == '*': context['text_query'] = '' field_info = collections.defaultdict(dict) results = es.search(body=query, index=settings.ES_INDEX, size=0) for field, aggregation in results['aggregations'].items(): field_name, agg_type = field.split(':') field_info[field_name]["in_schema"] = field_name in schema_fields if agg_type == 'terms': field_info[field_name]["terms"] = aggregation["buckets"] if agg_type == 'missing': field_info[field_name]["found"] = results['hits']['total'] - aggregation["doc_count"] if agg_type == 'cardinality': field_info[field_name]["distinct"] = aggregation["value"] context['field_info'] = sorted(field_info.items(), key=lambda val: -val[1]["found"]) context['results'] = results return render(request, "stats.html", context=context)
def validate_extension(extension): """ Validation of the extension domain if one is included. """ error_flag = 0 error_string = '' if isinstance(extension, dict): try: schema = jsonref.load_uri(extension['extension_schema']) try: print("Loaded Extension Schema: ", schema['title']) name = schema['title'] error_string, error_flag = bco_validator(schema, extension) # For if the schema has no ['title'] except KeyError: print("Loaded Extension Schema: ", schema['$id']) name = schema['$id'] except json.decoder.JSONDecodeError: print('Failed to load extension schema', schema['$id']) error_flag += 1 except TypeError: print('Failed to load extension schema. \nInvalid format ', ) print(extension) error_string += json.dumps(extension) error_flag += 1 else: print('Invalid BCO extension format') error_string += json.dumps(extension) error_flag = 1 if error_flag == 0: print(name + ' PASSED \U0001F44D') return error_string, error_flag
if __name__ == '__main__': parser = argparse.ArgumentParser( description='Get some ocds data tabularized') parser.add_argument('database_url', help='sqlalchemy database url') parser.add_argument('files', help='json files to upload to db', nargs='+') parser.add_argument('--merge', help='say if you want to ocds merge the files', action='store_true') parser.add_argument('--drop', help='drop all current tables', action='store_true') parser.add_argument( '--schema_url', help='release-schema.json file used, defaults to 1.', default= 'http://ocds.open-contracting.org/standard/r/1__0__2/release-schema.json' ) args = parser.parse_args() deref_schema = jsonref.load_uri(args.schema_url) metadata, engine = create_db(args.database_url, deref_schema, drop=args.drop) upload_files(metadata, engine, deref_schema, args.files, args.merge)
def validate_bco(options): """ # Check for schema compliance. # Arguments # --------- # object_pass: the object being checked. # Check the object against the provided schema. """ error_flags = 0 error_strings = '' bco_dict = load_bco(options) if options.schema is None: try: schema = jsonref.load_uri(bco_dict['spec_version']) print("Loaded Schema: ", schema['title'], ' from ', bco_dict['spec_version']) except KeyError: print('Failed to load the provided Schema OR none was provided.' \ + ' Using default instead') schema = jsonref.load_uri(str('https://opensource.ieee.org/2791-object' \ + '/ieee-2791-schema/-/raw/master/2791object.json')) except json.decoder.JSONDecodeError: print('Failed to load the provided Schema OR none was provided.' \ + ' Using default instead') schema = jsonref.load_uri(str('https://opensource.ieee.org/2791-object' \ + '/ieee-2791-schema/-/raw/master/2791object.json')) print("Loaded default schema: ", schema['title']) print("BioCompute Object: ", bco_dict['provenance_domain']['name']) except ValueError: print('Failed to load the provided Schema OR none was provided.' \ + ' Using default instead') schema = jsonref.load_uri(str('https://opensource.ieee.org/2791-object' \ + '/ieee-2791-schema/-/raw/master/2791object.json')) else: if os.path.exists(options.schema): base_uri = 'file://{}/'.format(os.path.dirname \ (os.path.abspath(options.schema.name))) print(base_uri) schema = jsonref.load \ (options.schema, base_uri=base_uri, jsonschema=True) try: print("Schema: ", schema['title']) print("File location: ", base_uri) print("BioCompute Object: ", bco_dict['provenance_domain']['name']) except json.decoder.JSONDecodeError: pass elif url_valid(options.schema): try: schema = jsonref.load_uri(options.schema) print("Loaded Schema: ", schema['title'], ' from ', options.schema) except json.decoder.JSONDecodeError: print('Failed to load the provided Schema.' \ + ' Using default instead') schema = jsonref.load_uri(str('https://opensource.ieee.org/2791-object' \ + '/ieee-2791-schema/-/raw/master/2791object.json')) error_string, error_flag = bco_validator(schema, bco_dict) error_flags += error_flag error_strings += error_string if 'extension_domain' in bco_dict.keys(): for extension in bco_dict['extension_domain']: error_string, error_flag = validate_extension(extension) error_flags += error_flag error_strings += error_string if error_flags == 0: print('BCO VALID \U0001F389') try: os.remove("error.log") except OSError: pass else: with open('error.log', 'w') as file: file.write(error_strings) print('Encountered', error_flags, 'errors while validating. \U0001F61E' \ + '\n See "error.log" for more detail')
def remote_registration_validator(schema_home, profiles): "Load up a validator based on the remote schema" schema_uri = f"{schema_home}/{profiles['registration']}/main.schema.json" schema = json.load_uri(schema_uri, jsonschema=True) return compile_schema(schema)
def load(uri): return jsonref.load_uri('{}/{}'.format(base_uri, uri), base_uri=base_uri, loader=yamlloader)
def get_mapping_sheet_from_url(url): schema = jsonref.load_uri(url) return _get_mapping_sheet(schema)
def handle(self): deref_schema = jsonref.load_uri(self.args.schema) metadata, engine = self.create_db(self.args.database_url, deref_schema, drop=self.args.drop) self.upload_files(metadata, engine, deref_schema)
def read_json_schema_from_url(url): """ reads a json file from url and returns a python dictionary without reference""" return jsonref.load_uri(url)
def _get_hcert_schema(): print('Loading HCERT schema ...') return load_uri('https://id.uvci.eu/DGC.schema.json')