def test_get_vocabulary(uri, is_known): if is_known: vocabulary = Catalogue.get_vocabulary(URI(uri)) assert vocabulary.uri == uri else: with pytest.raises(CatalogueError): Catalogue.get_vocabulary(URI(uri))
def test_translate_iso19115_to_datacite(): input_schema = catalog.get_schema( URI('https://odp.saeon.ac.za/schema/metadata/saeon/iso19115')) input_json = catalog.load_json( URI('https://odp.saeon.ac.za/schema/metadata/saeon/iso19115-example.json' )) output_json = catalog.load_json( URI('https://odp.saeon.ac.za/schema/metadata/saeon/datacite4-example-translated.json' )) result = input_schema.evaluate(JSON(input_json)) patch = result.output('patch', scheme='saeon/datacite4') translation = result.output('translation', scheme='saeon/datacite4') assert JSONPatch(*patch).evaluate(None) == translation # work in progress # assert translation == output_json assert translation.keys() == output_json.keys() for k in translation: if k == 'contributors': # todo: resolve leftover empty arrays/objects when there are # no source values to fill them continue assert translation[k] == output_json[k]
def test_add_directory_invalid_dir(setup_tmpdir): tmpdir_path, subdir_name, jsonfile_name = setup_tmpdir # base_dir is a file with pytest.raises(CatalogueError): Catalogue.add_directory( URI('http://example.com/'), pathlib.Path(tmpdir_path) / subdir_name / jsonfile_name) # base_dir does not exist with pytest.raises(CatalogueError): Catalogue.add_directory(URI('http://example.com/'), pathlib.Path(tmpdir_path) / 'foo')
def test_add_directory_and_load_json(base_uri, setup_tmpdir): tmpdir_path, subdir_name, jsonfile_name = setup_tmpdir Catalogue.add_directory(URI(base_uri), pathlib.Path(tmpdir_path)) json_doc = Catalogue.load_json( URI(f'{base_uri}{subdir_name}/{jsonfile_name}')) assert json_doc == json_example # incorrect base URI with pytest.raises(CatalogueError): Catalogue.load_json( URI(f'http://example.net/{subdir_name}/{jsonfile_name}')) # incorrect file name with pytest.raises(CatalogueError): Catalogue.load_json(URI(f'{base_uri}{subdir_name}/baz'))
def test_validity(): input_schema = catalog.get_schema( URI('https://odp.saeon.ac.za/schema/metadata/saeon/iso19115')) input_json = catalog.load_json( URI('https://odp.saeon.ac.za/schema/metadata/saeon/iso19115-example')) output_schema = catalog.get_schema( URI('https://odp.saeon.ac.za/schema/metadata/saeon/datacite4')) output_json = catalog.load_json( URI('https://odp.saeon.ac.za/schema/metadata/saeon/datacite4-example-translated' )) assert input_schema.validate().valid assert input_schema.evaluate(JSON(input_json)).valid assert output_schema.validate().valid assert output_schema.evaluate(JSON(output_json)).valid
def test_uri_parts(): uri = URI(example) assert uri.scheme == 'http' assert uri.authority == 'example.com' assert uri.path == '/foo' assert uri.query == 'bar' assert uri.fragment == 'baz'
def validate(schema, part, standard): if standard in legacy: cls = legacy[standard] try: jsonschema.validate(part.json, schema.json, cls=cls) return (True, '') except jsonschema.ValidationError as e: return (False, str(e)) except jsonschema.SchemaError as e: raise ValueError("Schema is invalid:\n{0}\n\n{1}".format( str(e), schema.content)) return (is_valid, message) else: catalogue = create_catalog('2019-09', '2020-12') compiled_schema = JSONSchema(schema.json, metaschema_uri=URI(standard)) if not compiled_schema.validate().valid: raise ValueError("Schema is invalid:\n{0}\n\n{1}".format( "INVALID SCHEMA", schema.content)) elif part.json == (1+1j): return (False, 'INVALID JSON') else: jsonValue = JSON.loads(part.content) validation_result = compiled_schema.evaluate(jsonValue) if validation_result.valid: return (True, ''); else: return (False, 'VALIDATION ERROR');
async def list_catalogs(paginator: Paginator = Depends(), ): return paginator.paginate( select(Catalog), lambda row: CatalogModel( id=row.Catalog.id, schema_id=row.Catalog.schema_id, schema_uri=row.Catalog.schema.uri, schema_=schema_catalog.get_schema(URI(row.Catalog.schema.uri)). value, ))
async def list_tags(paginator: Paginator = Depends(), ): return paginator.paginate( select(Tag), lambda row: TagModel( id=row.Tag.id, cardinality=row.Tag.cardinality, public=row.Tag.public, scope_id=row.Tag.scope_id, schema_id=row.Tag.schema_id, schema_uri=row.Tag.schema.uri, schema_=schema_catalog.get_schema(URI(row.Tag.schema.uri)).value, ))
async def get_tag(tag_id: str, ): tag = Session.execute( select(Tag).where(Tag.id == tag_id)).scalar_one_or_none() if not tag: raise HTTPException(HTTP_404_NOT_FOUND) return TagModel( id=tag.id, cardinality=tag.cardinality, public=tag.public, scope_id=tag.scope_id, schema_id=tag.schema_id, schema_uri=tag.schema.uri, schema_=schema_catalog.get_schema(URI(tag.schema.uri)).value, )
async def list_schemas( schema_type: SchemaType = None, paginator: Paginator = Depends(), ): stmt = select(Schema) if schema_type: stmt = stmt.where(Schema.type == schema_type) return paginator.paginate( stmt, lambda row: SchemaModel( id=row.Schema.id, type=row.Schema.type, uri=row.Schema.uri, schema_=schema_catalog.get_schema(URI(row.Schema.uri)).value, ) )
async def get_schema( schema_id: str, ): schema = Session.execute( select(Schema). where(Schema.id == schema_id) ).scalar_one_or_none() if not schema: raise HTTPException(HTTP_404_NOT_FOUND) return SchemaModel( id=schema.id, type=schema.type, uri=schema.uri, schema_=schema_catalog.get_schema(URI(schema.uri)).value, )
def _evaluate_record(self, record_id: str, timestamp: datetime) -> bool: """Evaluate a record model (API) against the publication schema for a catalog, and commit the result to the catalog_record table. The catalog_record entry is stamped with the `timestamp` of the latest contributing change (from catalog/record/record_tag/collection_tag). """ catalog = Session.get(Catalog, self.catalog_id) record = Session.get(Record, record_id) catalog_record = (Session.get(CatalogRecord, (self.catalog_id, record_id)) or CatalogRecord(catalog_id=self.catalog_id, record_id=record_id)) record_model = output_record_model(record) record_json = JSON(record_model.dict()) publication_schema = schema_catalog.get_schema(URI(catalog.schema.uri)) if (result := publication_schema.evaluate(record_json)).valid: catalog_record.validity = result.output('flag') catalog_record.published = True catalog_record.published_record = self._create_published_record(record_model).dict() self._save_published_doi(record_model)
def test_uri(ptr: str, uri: str, canonical: bool): rootschema = JSONSchema(id_example, metaschema_uri=metaschema_uri_2020_12) schema: JSONSchema = JSONPointer.parse_uri_fragment( ptr[1:]).evaluate(rootschema) assert schema == Catalogue.get_schema(uri := URI(uri)) if canonical: # 'canonical' is as per the JSON Schema spec; however, we skip testing of # anchored URIs since we have only one way to calculate a schema's canonical URI if (fragment := uri.fragment) and not fragment.startswith('/'): return if fragment: # allow chars in the RFC3986 'sub-delims' set in the 'safe' arg, # since these are allowed by the 'fragment' definition; in particular, # this means we don't percent encode '$' uri = uri.copy( fragment=urllib.parse.quote(fragment, safe="/!$&'()*+,;=")) else: # remove empty fragment uri = uri.copy(fragment=False) assert schema.canonical_uri == uri
def test_create_uri(value): uri = URI(value) assert urllib.parse.unquote(str(uri)) == urllib.parse.unquote(value) assert eval(repr(uri)) == uri
def test_copy_uri(kwargs, result): assert URI(example).copy(**kwargs) == URI(result)
import pathlib import pytest from jschon import Catalogue, JSON, JSONSchema, URI, JSONEvaluator from jschon.utils import json_loadf from tests import metaschema_uri_2019_09, metaschema_uri_2020_12 testsuite_dir = pathlib.Path(__file__).parent / 'JSON-Schema-Test-Suite' Catalogue.add_directory( base_uri=URI('http://localhost:1234/'), base_dir=testsuite_dir / 'remotes', ) def pytest_generate_tests(metafunc): argnames = ('metaschema_uri', 'schema', 'data', 'valid') argvalues = [] testids = [] only_version = metafunc.config.getoption("testsuite_version") include_optionals = metafunc.config.getoption("testsuite_optionals") include_formats = metafunc.config.getoption("testsuite_formats") base_dir = testsuite_dir / 'tests' version_dirs = { '2019-09': (metaschema_uri_2019_09, base_dir / 'draft2019-09'), '2020-12': (metaschema_uri_2020_12, base_dir / 'draft2020-12'), }
def test_load_json_invalid_uri(uri): with pytest.raises(CatalogueError): Catalogue.load_json(URI(uri))
@router.get( '/{catalog_id}', response_model=CatalogModel, dependencies=[Depends(Authorize(ODPScope.CATALOG_READ))], ) async def get_catalog(catalog_id: str, ): if not (catalog := Session.get(Catalog, catalog_id)): raise HTTPException(HTTP_404_NOT_FOUND) return CatalogModel( id=catalog.id, schema_id=catalog.schema_id, schema_uri=catalog.schema.uri, schema_=schema_catalog.get_schema(URI(catalog.schema.uri)).value, ) @router.get( '/{catalog_id}/records', response_model=Page[PublishedRecordModel], dependencies=[Depends(Authorize(ODPScope.CATALOG_READ))], ) async def list_published_records( catalog_id: str, paginator: Paginator = Depends(), ): if not Session.get(Catalog, catalog_id): raise HTTPException(HTTP_404_NOT_FOUND)
def test_add_directory_invalid_uri(base_uri, setup_tmpdir): tmpdir_path, subdir_name, jsonfile_name = setup_tmpdir with pytest.raises(CatalogueError): Catalogue.add_directory(URI(base_uri), pathlib.Path(tmpdir_path))
def test_base_uri(ptr: str, base_uri: str): rootschema = JSONSchema(id_example, metaschema_uri=metaschema_uri_2020_12) schema: JSONSchema = JSONPointer.parse_uri_fragment( ptr[1:]).evaluate(rootschema) assert schema.base_uri == URI(base_uri)
def schema_md5(uri: str) -> str: """Return an MD5 hash of the (serialized) schema identified by uri.""" schema = schema_catalog.get_schema(URI(uri)) return hashlib.md5(str(schema).encode()).hexdigest()
import hashlib import re from datetime import datetime from pathlib import Path from urllib.parse import urlparse from jschon import LocalSource, URI, create_catalog from jschon.translation import translation_filter schema_catalog = create_catalog('2020-12', 'translation') schema_catalog.add_uri_source( URI('https://odp.saeon.ac.za/schema/'), LocalSource(Path(__file__).parent.parent.parent / 'schema', suffix='.json'), ) def schema_md5(uri: str) -> str: """Return an MD5 hash of the (serialized) schema identified by uri.""" schema = schema_catalog.get_schema(URI(uri)) return hashlib.md5(str(schema).encode()).hexdigest() @translation_filter('date-to-year') def date_to_year(date: str) -> int: return datetime.strptime(date, '%Y-%m-%d').year @translation_filter('base-url') def base_url(url: str) -> str: u = urlparse(url)
from fastapi import HTTPException from jschon import JSONSchema, URI from sqlalchemy import select from starlette.status import HTTP_422_UNPROCESSABLE_ENTITY from odp.api.models import RecordModelIn, TagInstanceModelIn from odp.db import Session from odp.db.models import Schema, SchemaType, Tag from odp.lib.schema import schema_catalog async def get_tag_schema(tag_instance_in: TagInstanceModelIn) -> JSONSchema: if not (tag := Session.execute( select(Tag).where( Tag.id == tag_instance_in.tag_id)).scalar_one_or_none()): raise HTTPException(HTTP_422_UNPROCESSABLE_ENTITY, 'Invalid tag id') schema = Session.get(Schema, (tag.schema_id, SchemaType.tag)) return schema_catalog.get_schema(URI(schema.uri)) async def get_metadata_schema(record_in: RecordModelIn) -> JSONSchema: if not (schema := Session.get(Schema, (record_in.schema_id, SchemaType.metadata))): raise HTTPException(HTTP_422_UNPROCESSABLE_ENTITY, 'Invalid schema id') return schema_catalog.get_schema(URI(schema.uri))
from jschon import URI from jschon.catalogue import jsonschema_2019_09, jsonschema_2020_12 jsonschema_2019_09.initialize() jsonschema_2020_12.initialize() metaschema_uri_2019_09 = URI("https://json-schema.org/draft/2019-09/schema") metaschema_uri_2020_12 = URI("https://json-schema.org/draft/2020-12/schema") example_schema = { "$id": "dynamicRef8_main.json", "$defs": { "inner": { "$id": "dynamicRef8_inner.json", "$dynamicAnchor": "foo", "title": "inner", "additionalProperties": { "$dynamicRef": "#foo" } } }, "if": { "propertyNames": { "pattern": "^[a-m]" } }, "then": { "title": "any type of node", "$id": "dynamicRef8_anyLeafNode.json", "$dynamicAnchor": "foo", "$ref": "dynamicRef8_main.json#/$defs/inner"