Ejemplo n.º 1
0
def users_validate(jsonfile, verbose, debug):
    """Check users validation."""
    click.secho('Validate user file', fg='green')

    path = current_jsonschemas.url_to_path(get_schema_for_resource('ptrn'))
    ptrn_schema = current_jsonschemas.get_schema(path=path)
    ptrn_schema = records_state.replace_refs(ptrn_schema)
    # TODO: get user schema path programaticly
    # path = current_jsonschemas.url_to_path(get_schema_for_resource('user'))
    path = 'users/user-v0.0.1.json'
    user_schema = current_jsonschemas.get_schema(path=path)
    user_schema = records_state.replace_refs(user_schema)

    merger_schema = {"properties": {"required": {"mergeStrategy": "append"}}}
    merger = Merger(merger_schema)
    schema = merger.merge(user_schema, ptrn_schema)
    schema['required'] = [
        s for s in schema['required'] if s not in ['$schema', 'user_id']
    ]

    datas = read_json_record(jsonfile)
    for idx, data in enumerate(datas):
        if verbose:
            click.echo(f'\tTest record: {idx}')
        try:
            validate(data, schema)
        except ValidationError as err:
            click.secho(
                f'Error validate in record: {idx} pid: {data.get("pid")}',
                fg='red')
            if debug:
                click.secho(str(err))
            else:
                trace_lines = traceback.format_exc(1).split('\n')
                click.secho(trace_lines[3].strip())
Ejemplo n.º 2
0
    def get_remote_json(self, uri, **kwargs):
        """Get remote json.

        Adds loading of $ref locally for the application instance.
        See: github invenio-jsonschemas ext.py.
        :param uri: The URI of the JSON document to load.
        :param kwargs: Keyword arguments passed to json.loads().
        :returns: resolved json schema.
        """
        path = current_jsonschemas.url_to_path(uri)
        if path:
            result = current_jsonschemas.get_schema(path=path)
        else:
            result = super(JsonLoader, self).get_remote_json(uri, **kwargs)
        return result
Ejemplo n.º 3
0
def get_document_types_from_schema(schema='doc'):
    """Create document type definition from schema."""
    path = current_jsonschemas.url_to_path(get_schema_for_resource(schema))
    schema = current_jsonschemas.get_schema(path=path)
    schema = _records_state.replace_refs(schema)
    schema_types = schema.get('properties',
                              {}).get('type', {}).get('items',
                                                      {}).get('oneOf', [])
    doc_types = {}
    for schema_type in schema_types:
        doc_types[schema_type['title']] = {}
        sub_types = schema_type.get('properties', {}).get('subtype',
                                                          {}).get('enum', [])
        for sub_type in sub_types:
            doc_types[schema_type['title']][sub_type] = True
    return doc_types
Ejemplo n.º 4
0
def create_csv(record_type, json_file, output_directory, lazy, verbose,
               create_pid):
    """Create csv files from json.

    :param verbose: Verbose.
    """
    click.secho(f"Create CSV files for: {record_type} from: {json_file}",
                fg='green')

    path = current_jsonschemas.url_to_path(
        get_schema_for_resource(record_type))
    add_schema = get_schema_for_resource(record_type)
    schema = current_jsonschemas.get_schema(path=path)
    schema = _records_state.replace_refs(schema)
    count = 0
    errors_count = 0
    with open(json_file) as infile:
        if lazy:
            # try to lazy read json file (slower, better memory management)
            records = read_json_record(infile)
        else:
            # load everything in memory (faster, bad memory management)
            records = json.load(infile)

        file_name_pidstore = os.path.join(output_directory,
                                          f'{record_type}_pidstore.csv')
        click.secho(f'\t{file_name_pidstore}', fg='green')
        file_pidstore = open(file_name_pidstore, 'w')
        file_name_metadata = os.path.join(output_directory,
                                          f'{record_type}_metadata.csv')
        click.secho(f'\t{file_name_metadata}', fg='green')
        file_metadata = open(file_name_metadata, 'w')
        file_name_pids = os.path.join(output_directory,
                                      f'{record_type}_pids.csv')
        click.secho(f'\t{file_name_pids}', fg='green')
        file_pids = open(file_name_pids, 'w')
        file_name_errors = os.path.join(output_directory,
                                        f'{record_type}_errors.json')
        file_errors = open(file_name_errors, 'w')
        file_errors.write('[')

        for count, record in enumerate(records, 1):
            pid = record.get('pid')
            if create_pid:
                pid = str(count)
                record['pid'] = pid
            uuid = str(uuid4())
            if verbose:
                click.secho(f'{count}\t{record_type}\t{pid}:{uuid}')
            date = str(datetime.utcnow())
            record['$schema'] = add_schema
            try:
                validate(record, schema)
                file_metadata.write(csv_metadata_line(record, uuid, date))
                file_pidstore.write(
                    csv_pidstore_line(record_type, pid, uuid, date))
                file_pids.write(pid + '\n')
            except Exception as err:
                click.secho(
                    f'{count}\t{record_type}: Error validate in record: ',
                    fg='red')
                click.secho(str(err))
                if errors_count > 0:
                    file_errors.write(',')
                errors_count += 1
                file_errors.write('\n')
                for line in json.dumps(record, indent=2).split('\n'):
                    file_errors.write('  ' + line + '\n')

        file_pidstore.close()
        file_metadata.close()
        file_pids.close()
        file_errors.write('\n]')
        file_errors.close()
    if errors_count == 0:
        os.remove(file_name_errors)
    click.secho(f'Created: {count-errors_count} Errors: {errors_count}',
                fg='yellow')
Ejemplo n.º 5
0
def users_validate(jsonfile, verbose, debug):
    """Check users validation."""
    click.secho('Validate user file: ', fg='green', nl=False)
    click.echo(f'{jsonfile.name}')

    path = current_jsonschemas.url_to_path(get_schema_for_resource('ptrn'))
    ptrn_schema = current_jsonschemas.get_schema(path=path)
    ptrn_schema = records_state.replace_refs(ptrn_schema)
    # TODO: get user schema path programaticly
    # path = current_jsonschemas.url_to_path(get_schema_for_resource('user'))
    path = 'users/user-v0.0.1.json'
    user_schema = current_jsonschemas.get_schema(path=path)
    user_schema = records_state.replace_refs(user_schema)

    merger_schema = {"properties": {"required": {"mergeStrategy": "append"}}}
    merger = Merger(merger_schema)
    schema = merger.merge(user_schema, ptrn_schema)
    schema['required'] = [
        s for s in schema['required'] if s not in ['$schema', 'user_id']
    ]

    datas = read_json_record(jsonfile)
    librarien_roles_users = {}
    for idx, data in enumerate(datas):
        if verbose:
            click.echo(f'\tTest record: {idx} pid: {data.get("pid")}')
        try:
            validate(data, schema)
            patron = data.get('patron', {})
            if patron and patron.get('communication_channel') == 'email'\
               and data.get('email') is None \
               and patron.get('additional_communication_email') is None:
                raise ValidationError('At least one email should be defined '
                                      'for an email communication channel.')
            librarian_roles = [
                Patron.ROLE_SYSTEM_LIBRARIAN, Patron.ROLE_LIBRARIAN
            ]
            roles = data.get('roles', [])
            if any(role in librarian_roles for role in roles):
                if not data.get('libraries'):
                    raise ValidationError('Missing libraries')
                # test multiple librarien, roles for same user
                username = data.get('username')
                if username in librarien_roles_users:
                    raise ValidationError('Multiple librarian roles')
                else:
                    librarien_roles_users[username] = 1

            birth_date = data.get('birth_date')
            if birth_date[0] == '0':
                raise ValidationError(f'Wrong birth date: {birth_date}')

        except ValidationError as err:
            click.secho(
                f'Error validate in record: {idx} pid: {data.get("pid")} '
                f'username: {data.get("username")}',
                fg='red')
            if debug:
                click.secho(str(err))
            else:
                trace_lines = traceback.format_exc(1).split('\n')
                click.secho(trace_lines[3].strip())
Ejemplo n.º 6
0
def build_docs(schemas):
    """Generates API docs for included / specified data models."""
    from json_schema_for_humans.generate import generate_from_file_object

    for schema_path in schemas or current_jsonschemas.list_schemas():
        click.secho(f'Generating docs for schema {schema_path}')
        schema = current_jsonschemas.get_schema(schema_path,
                                                with_refs=False,
                                                resolved=False)

        try:
            schema = JsonRef.replace_refs(
                schema,
                jsonschema=True,
                base_uri=current_app.config.get('JSONSCHEMAS_HOST'),
                loader=_records_state.loader_cls(),
            )

            # TODO: this is necessary to resolve JSONRefs in allOf
            schema = json.loads(
                json.dumps(schema, default=lambda x: x.__subject__))

            # Resolve definition schemas
            if 'definitions' in schema:
                definitions = list(schema['definitions'].keys())
                # Consider only a first definition as a schema for now
                schema = schema['definitions'][definitions[0]]

            click.secho(f'Schema resolved to: {json.dumps(schema)}',
                        color='blue')

        except JsonRefError as e:
            click.secho(f'Error resolving schema: {e}. Skipping...',
                        color='red')
            continue

        # Generate and save html docs for the schema
        with tempfile.NamedTemporaryFile(mode="w+") as schema_source:
            schema_source.write(json.dumps(schema))
            schema_source.flush()

            with open(
                    f'docs/schemas/{basename(schema_path.rstrip(".json"))}.html',
                    mode='w+') as result_file:
                click.secho(f'Writing schema docs to {result_file.name}',
                            color='green')
                generate_from_file_object(schema_file=schema_source,
                                          result_file=result_file,
                                          minify=True,
                                          expand_buttons=True)

    # Generate and save schema index page
    index_md = r"""---
layout: default
---

# Data Models Schema Docs

"""
    for f in os.listdir('docs/schemas/'):
        if f.endswith('.html'):
            index_md += f'- [{f.rstrip(".html")}](./{f})\n'

    with open(f'docs/schemas/index.md', mode='w+') as index_file:
        index_file.write(index_md)
 def get_resolved_record_schema(self, obj):
     """Resolve refs in record schema."""
     schema = current_jsonschemas.get_schema(obj.record_path,
                                             with_refs=True,
                                             resolved=True)
     return copy.deepcopy(schema)  # so all the JSONRefs get resoved