def users_validate(jsonfile, verbose, debug): """Check users validation.""" click.secho('Validate user file', fg='green') path = current_jsonschemas.url_to_path(get_schema_for_resource('ptrn')) ptrn_schema = current_jsonschemas.get_schema(path=path) ptrn_schema = records_state.replace_refs(ptrn_schema) # TODO: get user schema path programaticly # path = current_jsonschemas.url_to_path(get_schema_for_resource('user')) path = 'users/user-v0.0.1.json' user_schema = current_jsonschemas.get_schema(path=path) user_schema = records_state.replace_refs(user_schema) merger_schema = {"properties": {"required": {"mergeStrategy": "append"}}} merger = Merger(merger_schema) schema = merger.merge(user_schema, ptrn_schema) schema['required'] = [ s for s in schema['required'] if s not in ['$schema', 'user_id'] ] datas = read_json_record(jsonfile) for idx, data in enumerate(datas): if verbose: click.echo(f'\tTest record: {idx}') try: validate(data, schema) except ValidationError as err: click.secho( f'Error validate in record: {idx} pid: {data.get("pid")}', fg='red') if debug: click.secho(str(err)) else: trace_lines = traceback.format_exc(1).split('\n') click.secho(trace_lines[3].strip())
def get_remote_json(self, uri, **kwargs): """Get remote json. Adds loading of $ref locally for the application instance. See: github invenio-jsonschemas ext.py. :param uri: The URI of the JSON document to load. :param kwargs: Keyword arguments passed to json.loads(). :returns: resolved json schema. """ path = current_jsonschemas.url_to_path(uri) if path: result = current_jsonschemas.get_schema(path=path) else: result = super(JsonLoader, self).get_remote_json(uri, **kwargs) return result
def get_document_types_from_schema(schema='doc'): """Create document type definition from schema.""" path = current_jsonschemas.url_to_path(get_schema_for_resource(schema)) schema = current_jsonschemas.get_schema(path=path) schema = _records_state.replace_refs(schema) schema_types = schema.get('properties', {}).get('type', {}).get('items', {}).get('oneOf', []) doc_types = {} for schema_type in schema_types: doc_types[schema_type['title']] = {} sub_types = schema_type.get('properties', {}).get('subtype', {}).get('enum', []) for sub_type in sub_types: doc_types[schema_type['title']][sub_type] = True return doc_types
def resolve_schema_by_url(url): """Get Schema object for given url.""" path = current_jsonschemas.url_to_path(url) try: _, _, name, major, minor, patch = parse_path(path) schema = Schema.query \ .filter_by(name=name, major=major, minor=minor, patch=patch)\ .one() except (NoResultFound, AttributeError): raise JSONSchemaNotFound(schema=url) return schema
def create_csv(record_type, json_file, output_directory, lazy, verbose, create_pid): """Create csv files from json. :param verbose: Verbose. """ click.secho(f"Create CSV files for: {record_type} from: {json_file}", fg='green') path = current_jsonschemas.url_to_path( get_schema_for_resource(record_type)) add_schema = get_schema_for_resource(record_type) schema = current_jsonschemas.get_schema(path=path) schema = _records_state.replace_refs(schema) count = 0 errors_count = 0 with open(json_file) as infile: if lazy: # try to lazy read json file (slower, better memory management) records = read_json_record(infile) else: # load everything in memory (faster, bad memory management) records = json.load(infile) file_name_pidstore = os.path.join(output_directory, f'{record_type}_pidstore.csv') click.secho(f'\t{file_name_pidstore}', fg='green') file_pidstore = open(file_name_pidstore, 'w') file_name_metadata = os.path.join(output_directory, f'{record_type}_metadata.csv') click.secho(f'\t{file_name_metadata}', fg='green') file_metadata = open(file_name_metadata, 'w') file_name_pids = os.path.join(output_directory, f'{record_type}_pids.csv') click.secho(f'\t{file_name_pids}', fg='green') file_pids = open(file_name_pids, 'w') file_name_errors = os.path.join(output_directory, f'{record_type}_errors.json') file_errors = open(file_name_errors, 'w') file_errors.write('[') for count, record in enumerate(records, 1): pid = record.get('pid') if create_pid: pid = str(count) record['pid'] = pid uuid = str(uuid4()) if verbose: click.secho(f'{count}\t{record_type}\t{pid}:{uuid}') date = str(datetime.utcnow()) record['$schema'] = add_schema try: validate(record, schema) file_metadata.write(csv_metadata_line(record, uuid, date)) file_pidstore.write( csv_pidstore_line(record_type, pid, uuid, date)) file_pids.write(pid + '\n') except Exception as err: click.secho( f'{count}\t{record_type}: Error validate in record: ', fg='red') click.secho(str(err)) if errors_count > 0: file_errors.write(',') errors_count += 1 file_errors.write('\n') for line in json.dumps(record, indent=2).split('\n'): file_errors.write(' ' + line + '\n') file_pidstore.close() file_metadata.close() file_pids.close() file_errors.write('\n]') file_errors.close() if errors_count == 0: os.remove(file_name_errors) click.secho(f'Created: {count-errors_count} Errors: {errors_count}', fg='yellow')
def users_validate(jsonfile, verbose, debug): """Check users validation.""" click.secho('Validate user file: ', fg='green', nl=False) click.echo(f'{jsonfile.name}') path = current_jsonschemas.url_to_path(get_schema_for_resource('ptrn')) ptrn_schema = current_jsonschemas.get_schema(path=path) ptrn_schema = records_state.replace_refs(ptrn_schema) # TODO: get user schema path programaticly # path = current_jsonschemas.url_to_path(get_schema_for_resource('user')) path = 'users/user-v0.0.1.json' user_schema = current_jsonschemas.get_schema(path=path) user_schema = records_state.replace_refs(user_schema) merger_schema = {"properties": {"required": {"mergeStrategy": "append"}}} merger = Merger(merger_schema) schema = merger.merge(user_schema, ptrn_schema) schema['required'] = [ s for s in schema['required'] if s not in ['$schema', 'user_id'] ] datas = read_json_record(jsonfile) librarien_roles_users = {} for idx, data in enumerate(datas): if verbose: click.echo(f'\tTest record: {idx} pid: {data.get("pid")}') try: validate(data, schema) patron = data.get('patron', {}) if patron and patron.get('communication_channel') == 'email'\ and data.get('email') is None \ and patron.get('additional_communication_email') is None: raise ValidationError('At least one email should be defined ' 'for an email communication channel.') librarian_roles = [ Patron.ROLE_SYSTEM_LIBRARIAN, Patron.ROLE_LIBRARIAN ] roles = data.get('roles', []) if any(role in librarian_roles for role in roles): if not data.get('libraries'): raise ValidationError('Missing libraries') # test multiple librarien, roles for same user username = data.get('username') if username in librarien_roles_users: raise ValidationError('Multiple librarian roles') else: librarien_roles_users[username] = 1 birth_date = data.get('birth_date') if birth_date[0] == '0': raise ValidationError(f'Wrong birth date: {birth_date}') except ValidationError as err: click.secho( f'Error validate in record: {idx} pid: {data.get("pid")} ' f'username: {data.get("username")}', fg='red') if debug: click.secho(str(err)) else: trace_lines = traceback.format_exc(1).split('\n') click.secho(trace_lines[3].strip())