def create_credential(filename): """Reads a credential document from filename and returns the compact JSON-LD representation of the credential.""" with open(filename, 'r') as f: doc = json.load(f) context = doc.pop('@context', {}) jsonld.set_document_loader(jsonld.requests_document_loader(timeout=5)) credential = jsonld.compact(doc, context) return credential
def flatten_metadata_graph(obj): from pyld import jsonld # simplify graph into a sequence of one dict per known dataset, even # if multiple meta data set from different sources exist for the same # dataset. # cache schema requests; this also avoid the need for network access # for previously "visited" schemas jsonld.set_document_loader(_cached_load_document) # TODO cache entire graphs to prevent repeated term resolution for # subsequent calls return jsonld.flatten(obj, ctx={"@context": "http://schema.org/"})
def load_document_local(url): doc = {"contextUrl": None, "documentUrl": None, "document": ""} if url == "http://iiif.io/api/presentation/2/context.json": fn = "contexts/context_20.json" elif url in ["http://www.w3.org/ns/oa.jsonld", "http://www.w3.org/ns/oa-context-20130208.json"]: fn = "contexts/context_oa.json" else: fn = "contexts/context_10.json" fh = file(fn) data = fh.read() fh.close() doc["document"] = data return doc jsonld.set_document_loader(load_document_local) # Load our (very simple) frame fh = file("contexts/annotation_frame.json") data = fh.read() fh.close() annoframe = json.loads(data) # Output Context URI contextURI = "http://www.w3.org/ns/oa-context-20130208.json" def convert(anno): # check we're already parsed if not type(anno) == dict:
for k in KEYS_TO_EXPAND: if k in newObj.keys(): if isinstance(newObj.get(k), list): v = [expand(lv.get('@id')) for lv in newObj.get(k)] v = v if v != [None] else None else: v = expand(newObj[k]) if bool(v): newObj[k] = v return (newObj if bool(newObj) else None) else: expanded = [expand(n, keepUndefined) for n in newObj] return (expanded if bool(expanded) else None) jsonld.set_document_loader(jsonld.requests_document_loader(timeout=10)) # compact a document according to a particular context # see: http://json-ld.org/spec/latest/json-ld/#compacted-document-form expanded = expand( 'https://raw.githubusercontent.com/ReproNim/schema-standardization/master/activities/BeckAnxietyInventory/BeckAnxietyInventory_schema' ) print(json.dumps(expanded, indent=2)) # Output: # { # "@context": {...}, # "image": "http://manu.sporny.org/images/manu.png", # "homepage": "http://manu.sporny.org/", # "name": "Manu Sporny"
fh.close() return data def load_document_and_cache(url): if docCache.has_key(url): return docCache[url] doc = {'contextUrl': None, 'documentUrl': None, 'document': ''} data = fetch(url) doc['document'] = data docCache[url] = doc return doc set_document_loader(load_document_and_cache) # And load up the AAT:Label mapping once fh = file('extensions/aat_labels.json') data = fh.read() fh.close() aat_labels = json.loads(data) def fetch_aat_label(what): url = what.replace("aat:", "http://vocab.getty.edu/aat/") url += ".jsonld" try: resp = requests.get(url) aatjs = json.loads(resp.text) except:
from typing import Any, Dict # cache the downloaded "schemas", otherwise the library is super slow # (https://github.com/digitalbazaar/pyld/issues/70) _CACHE: Dict[str, Any] = {} LOADER = jsonld.requests_document_loader() def _caching_document_loader(url: str) -> Any: if url in _CACHE: return _CACHE[url] resp = LOADER(url) _CACHE[url] = resp return resp jsonld.set_document_loader(_caching_document_loader) def options_hash(doc): doc = dict(doc['signature']) for k in ['type', 'id', 'signatureValue']: if k in doc: del doc[k] doc['@context'] = 'https://w3id.org/identity/v1' normalized = jsonld.normalize(doc, {'algorithm': 'URDNA2015', 'format': 'application/nquads'}) h = hashlib.new('sha256') h.update(normalized.encode('utf-8')) return h.hexdigest() def doc_hash(doc):
doc["contextUrl"] = link_header["target"] return doc except JsonLdError as e: raise e except Exception as cause: raise JsonLdError( "Could not retrieve a JSON-LD document from the URL.", "jsonld.LoadDocumentError", code="loading document failed", cause=cause, ) return loader jsonld.set_document_loader(session_document_loader()) # jsonld.set_document_loader(jsonld.aiohttp_document_loader()) def dict_merge(dct, merge_dct): """ Recursive dict merge. Inspired by :meth:``dict.update()``, instead of updating only top-level keys, dict_merge recurses down into dicts nested to an arbitrary depth, updating keys. The ``merge_dct`` is merged into ``dct``. :param dct: dict onto which the merge is executed :param merge_dct: dct merged into dct :return: None """ for k, v in merge_dct.items(): if isinstance(dct.get(k), dict) and isinstance(merge_dct[k], collections.Mapping):
'document': '' } contexts_dir = os.path.join(os.path.dirname(__file__), 'contexts') if url == "http://iiif.io/api/presentation/2/context.json": fn = os.path.join(contexts_dir, 'context_21.json') else: fn = os.path.join(contexts_dir, 'context_10.json') fh = open(fn) data = fh.read() fh.close() doc['document'] = data return doc if jsonld: jsonld.set_document_loader(load_document_local) class ManifestReader(object): """Read manifest of other presentation API resource.""" contexts = { '0.9': 'http://www.shared-canvas.org/ns/context.json', '1.0': 'http://www.shared-canvas.org/ns/context.json', '2.0': 'http://iiif.io/api/presentation/2/context.json', '2.1': 'http://iiif.io/api/presentation/2/context.json' } def __init__(self, data, version=None): """Initialize with data and optional version.
import sys import asyncio from aiohttp import web import aiohttp_cors import uvloop import os import json from .conf.logging import LOG from .conf.config import init_db_pool from .utils.data import fetch_autocomplete, fetch_acronym, fetch_stats from pyld import jsonld routes = web.RouteTableDef() asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) jsonld.set_document_loader(jsonld.aiohttp_document_loader(timeout=1000)) context = { "dc": "http://purl.org/dc/elements/1.1/", "acr": "http://example.com/vocab#", "xsd": "http://www.w3.org/2001/XMLSchema#", "skos": "http://www.w3.org/2004/02/skos/core#", "acr:contains": { "@type": "@id" } } @routes.get('/health') async def healthcheck(request): """Test health, will always return ok.""" LOG.info('Healthcheck called')
"""Make PyLD use cached documents (contexts, frames, etc.) if available.""" import logging from pyld import jsonld from .local_cache import in_cache def cached_load_document(url): """Read local cached copy of URL if available, else fallback to network.""" filepath = in_cache(url) if (filepath is None): logging.debug("Using default loader to get %s" % (url)) return(jsonld.load_document(url)) else: logging.debug("Reading %s from %s" % (url, filepath)) data = open(filepath, 'r').read() doc = { 'contextUrl': None, 'documentUrl': None, 'document': data } return doc # On load set up PyLD code to use cached loader jsonld.set_document_loader(cached_load_document)
def load_document_and_cache(url): if docCache.has_key(url): return docCache[url] doc = { 'contextUrl': None, 'documentUrl': None, 'document': '' } data = fetch(url) doc['document'] = data; docCache[url] = doc return doc jsonld.set_document_loader(load_document_and_cache) class Validator(object): def __init__(self): self.rdflib_class_map = { "Annotation": "oa:Annotation", "Dataset": "dctypes:Dataset", "Image": "dctypes:StillImage", "Video": "dctypes:MovingImage", "Audio": "dctypes:Sound", "Text": "dctypes:Text", "TextualBody": "oa:TextualBody", "ResourceSelection": "oa:ResourceSelection", "SpecificResource": "oa:SpecificResource",
from pyld.jsonld import compact, frame, from_rdf, to_rdf, expand, set_document_loader try: # If we have a context file in our working directory, load it fh = open("linked-art.json") context_data = fh.read() fh.close() def cached_context(url): return { "contextUrl": None, "documentUrl": "https://linked.art/ns/v1/linked-art.json", "document": context_data } set_document_loader(cached_context) except: # Guess we don't... pass class RdfWriter(Writer): def __init__(self, **kwargs): self.format = kwargs.pop("format", "xml") self.logger = logging.getLogger(__name__) super(RdfWriter, self).__init__(**kwargs) def write_resources(self, graph_id=None, resourceinstanceids=None, **kwargs):
def create_app(**kwargs): app = Flask(__name__) with app.app_context(): app.cfg = Cfg() startup_msg = 'starting' if kwargs: app.testing = True app.cfg.set_debug_config(**kwargs) startup_msg += ' in testing mode with kwargs:' for k, v in kwargs.items(): startup_msg += ' {}={}'.format(k, v) log(startup_msg) class RegexConverter(BaseConverter): """ Make it possible to distinguish routes by <regex("[exampl]"):>. https://stackoverflow.com/a/5872904 """ def __init__(self, url_map, *items): super(RegexConverter, self).__init__(url_map) self.regex = items[0] app.url_map.converters['regex'] = RegexConverter if app.cfg.use_frbs(): log('using Firebase') cred = firebase_admin.credentials.Certificate(app.cfg.frbs_conf()) firebase_admin.initialize_app(cred) else: log('NOT using Firebase') app.config['SQLALCHEMY_DATABASE_URI'] = app.cfg.db_uri() app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False from jsonkeeper.models import db db.init_app(app) db.create_all() jsonld.set_document_loader(jsonld.requests_document_loader(timeout=7)) for code in default_exceptions.keys(): """ Make app return exceptions in JSON form. Also add CORS headers. Based on http://flask.pocoo.org/snippets/83/ but updated to use register_error_handler method. Note: this doesn't seem to work for 405 Method Not Allowed in an Apache + gnunicorn setup. """ @app.errorhandler(code) def make_json_error(error): resp = jsonify(message=str(error)) resp.status_code = (error.code if isinstance( error, HTTPException) else 500) return add_CORS_headers(resp) @app.after_request def set_response_headers(response): response.headers['Cache-Control'] = ('private, no-store, no-cache,' ' must-revalidate') # response.headers['Pragma'] = 'no-cache' # response.headers['Expires'] = '0' return response from jsonkeeper.views import jk app.register_blueprint(jk) if app.cfg.garbage_collection_interval() > 0: log('initializing garbage collection') scheduler = BackgroundScheduler() scheduler.start() scheduler.add_job( func=collect_garbage, trigger=IntervalTrigger( seconds=app.cfg.garbage_collection_interval()), id='garbage_collection_job', name='collect garbage according to interval set in config', replace_existing=True) atexit.register(lambda: scheduler.shutdown()) return app
def __init__(self, logger): self._logger = logger requests = jsonld.requests_document_loader(timeout=10) jsonld.set_document_loader(requests)
import os import sys import json from pyld import jsonld from pyld_document_loader import my_requests_document_loader if __name__ == '__main__': fjson = sys.argv[1] with open(fjson, 'r') as f: doc = json.loads(f.read()) jsonld.set_document_loader(my_requests_document_loader()) data = jsonld.flatten(doc) jpath, jfile = os.path.split(fjson) flatten_file = f'{jpath}{os.path.sep}flatten_{jfile}' with open(flatten_file,'w') as f: f.write(json.dumps(data))