def ingest(ntriples): graph = Graph() graph.parse(data=ntriples, format='nt') expanded = jsonld.expand( json.loads( graph.serialize(format='json-ld').decode('utf-8'))) mandatory_props = [ 'http://www.bbc.co.uk/search/schema/title', 'http://www.bbc.co.uk/search/schema/url' ] for json_object in expanded: uri = json_object['@id'] valid = True for prop in mandatory_props: if prop not in json_object: logging.warning( "Not indexing %s due to missing property: %s", uri, prop) valid = False if valid: es.index(index='bbc', body=jsonld.expand(json_object)[0], doc_type='item', id=uri)
def read(self): data = self.data if type(data) == dict: js = data else: try: js = json.loads(data) except: # could be badly encoded utf-8 with BOM data = data.decode('utf-8') if data[0] == u'\ufeff': data = data[1:].strip() try: js = json.loads(data) except: raise SerializationError("Data is not valid JSON", data) # Try to see if we're valid JSON-LD before further testing version = self.getVersion(js) factory = self.buildFactory(version) self.factory = factory top = self.readObject(js) if jsonld: try: jsonld.expand(js) except Exception, e: raise SerializationError("Data is not valid JSON-LD: %r" % e, data)
def test_context_caching(self): data = { "@context": "https://linked.art/ns/v1/linked-art.json", "id": "https://linked.art/example/object/3", "type": "HumanMadeObject", "_label": "Black and White Photograph of 'St. Sebastian'", "classified_as": [{"id": "http://vocab.getty.edu/aat/300128359", "type": "Type", "_label": "Black and White Photograph"}], } fetch = rdffile.fetch def tempFetch(url): raise Exception("This should not happen becauase we cached the doc") # rdffile.fetch = tempFetch # # first we test that we can override the fetch function and confirm that it gets called # with self.assertRaises(Exception): # jsonld_document = expand(data) # now set the function back and test normally rdffile.fetch = fetch jsonld_document = expand(data) self.assertTrue(data["@context"] in rdffile.docCache) # now set it to the temp fetch and confirm that the tempFetch isn't called on subsequent uses as it was initially rdffile.fetch = tempFetch jsonld_document = expand(data) rdffile.fetch = fetch # now invalidate the cache and make sure it refreshes the doc rdffile.docCache[data["@context"]]["expires"] = datetime.datetime.now() jsonld_document = expand(data) self.assertTrue(rdffile.docCache[data["@context"]]["expires"] > datetime.datetime.now()) self.assertTrue(data["@context"] in rdffile.docCache)
def test_expand_with_base_from_context(): """Expand the document trying to rely upon @base inside its @context.""" # This does not work! assert jsonld.expand(JSONLD_DOCUMENT) != EXPANDED_JSONLD_DOCUMENT # Because the expanded version looks like this: assert jsonld.expand(JSONLD_DOCUMENT) == [ { '@id': 'Robot', # NOT EXPANDED '@type': ['http://www.w3.org/2000/01/rdf-schema#Class'], }, { '@id': 'Rover', 'http://www.w3.org/2000/01/rdf-schema#subClassOf': [ { '@id': 'Robot' }, # NOT EXPANDED ], }, { '@id': 'opportunity', # NOT EXPANDED '@type': [ # EXPANDED, but this is because of @vocab not because of @base 'https://example.com/robotics/Rover', ], 'http://www.w3.org/2000/01/rdf-schema#label': [ { '@value': 'Opportunity' }, ], }, ]
def load_file(path_or_url, started=False, http_kwargs={}): try: data = jsonld.expand(path_or_url) if len(data) == 1: if "@id" not in data[0]: data[0]["@id"] = path_or_url except jsonld.JsonLdError as e: if 'only "http" and "https"' in str(e): lgr.debug("Reloading with local server") root = os.path.dirname(path_or_url) if not started: stop, port = start_server(**http_kwargs) else: if "port" not in http_kwargs: raise KeyError("port key missing in http_kwargs") port = http_kwargs["port"] base_url = f"http://*****:*****@id" not in data[0]: data[0]["@id"] = base_url + os.path.basename(path_or_url) else: raise return data
def read(self): """Read Presentation API resource.""" data = self.data if type(data) in [dict, OrderedDict]: js = data else: try: js = json.loads(data) except: # could be badly encoded utf-8 with BOM try: data = data.decode('utf-8') except: #Py3 does not have decode on str which is unicode already pass if data[0] == u'\ufeff': data = data[1:].strip() try: js = json.loads(data) except: raise SerializationError("Data is not valid JSON", data) # Try to see if we're valid JSON-LD before further testing version = self.getVersion(js) factory = self.buildFactory(version) self.factory = factory top = self.readObject(js) if jsonld: try: jsonld.expand(js) except Exception as e: raise raise SerializationError("Data is not valid JSON-LD: %r" % e, data) return top
def test_parse_compacted_jsonld(self): self.pool.loan_to(self.patron) data = dict() data["@type"] = "http://www.w3.org/ns/oa#Annotation" data["http://www.w3.org/ns/oa#motivatedBy"] = { "@id": Annotation.IDLING } data["http://www.w3.org/ns/oa#hasBody"] = { "@type": "http://www.w3.org/ns/oa#TextualBody", "http://www.w3.org/ns/oa#bodyValue": "A good description of the topic that bears further investigation", "http://www.w3.org/ns/oa#hasPurpose": { "@id": "http://www.w3.org/ns/oa#describing" } } data["http://www.w3.org/ns/oa#hasTarget"] = { "http://www.w3.org/ns/oa#hasSource": { "@id": self.identifier.urn }, "http://www.w3.org/ns/oa#hasSelector": { "@type": "http://www.w3.org/ns/oa#FragmentSelector", "http://www.w3.org/1999/02/22-rdf-syntax-ns#value": "epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/3:10)" } } data_json = json.dumps(data) expanded = jsonld.expand(data)[0] annotation = AnnotationParser.parse(self._db, data_json, self.patron) eq_(self.patron.id, annotation.patron_id) eq_(self.identifier.id, annotation.identifier_id) eq_(Annotation.IDLING, annotation.motivation) eq_(True, annotation.active) eq_(json.dumps(expanded["http://www.w3.org/ns/oa#hasTarget"][0]), annotation.target) eq_(json.dumps(expanded["http://www.w3.org/ns/oa#hasBody"][0]), annotation.content)
def handle_question(sentence): response = requests.get('https://qa.askplatyp.us/v0/ask', params={ 'q': sentence, 'lang': 'und' }) if response.status_code != 200: return 'Our system failed, sorry for the troubles.' try: data = jsonld.expand(response.json()) results = [] context_subject = None context_predicate = None term = None for root in data: for value in root.get('http://www.w3.org/ns/hydra/core#member', []): new_term = value.get('http://askplatyp.us/vocab#term', [''])[0] if term is not None and new_term != term: return format_result_sentence(results, context_subject, context_predicate) term = new_term for result in value.get('http://schema.org/result', []): for k, v in result.get('@reverse', {}).items(): new_context_predicate = from_caml_case(k.replace('http://schema.org/', '')) new_context_subject = format_element(v[0]) if context_subject is not None and new_context_subject != context_subject: return format_result_sentence(results, context_subject, context_predicate) context_subject = new_context_subject context_predicate = new_context_predicate results.append(format_element(result)) return format_result_sentence(results, context_subject, context_predicate) except Exception as e: print(e) return 'Our system failed, sorry for the troubles.'
def jsonld(self, with_context=False, context_uri=None, prefix=None, base=None, expanded=False, **kwargs): result = self.serializable(**kwargs) if expanded: result = jsonld.expand( result, options={ 'expandContext': [ self._context, { 'prefix': prefix, 'endpoint': prefix } ] } )[0] if not with_context: try: del result['@context'] except KeyError: pass elif context_uri: result['@context'] = context_uri else: result['@context'] = self._context return result
def as_rdf_graph(element: YAMLRoot, contexts: CONTEXTS_PARAM_TYPE) -> Graph: """ Convert element into an RDF graph guided by the context(s) in contexts :param element: element to represent in RDF :param contexts: JSON-LD context(s) in the form of: * file name * URL * JSON String * dict * JSON Object * A list containing elements of any type named above :return: rdflib Graph containing element """ # TODO: figure out what to do with multi-contexts and other params here if isinstance(contexts, str): if '://' not in contexts: contexts = f"file://{os.path.abspath(contexts)}" rdf_jsonld = expand(json_dumper.dumps(element), options=dict(expandContext=contexts)) g = rdflib_graph_from_pyld_jsonld(rdf_jsonld) # TODO: find the official prefix loader module. For the moment we pull this from the namespaces module with open(os.path.join(LD_11_DIR, 'termci_namespaces.context.jsonld')) as cf: prefixes = json.load(cf) for pfx, ns in prefixes['@context'].items(): if isinstance(ns, dict): if '@id' in ns and ns.get('@prefix', True): ns = ns['@id'] else: continue if not ns.startswith('@'): g.bind(pfx, ns) return g
def test_get_collection_item_json_ld(config, api_): req = mock_request({'f': 'jsonld'}) rsp_headers, code, response = api_.get_collection_item(req, 'obs', '371') assert rsp_headers['Content-Type'] == FORMAT_TYPES[F_JSONLD] assert rsp_headers['Content-Language'] == 'en-US' feature = json.loads(response) assert '@context' in feature assert feature['@context'][ 0] == 'https://geojson.org/geojson-ld/geojson-context.jsonld' assert len(feature['@context']) > 1 assert 'schema' in feature['@context'][1] assert feature['@context'][1]['schema'] == 'https://schema.org/' assert feature['properties']['stn_id'] == '35' assert feature['id'].startswith('http://') assert feature['id'].endswith('/collections/obs/items/371') expanded = jsonld.expand(feature)[0] assert expanded['@id'].startswith('http://') assert expanded['@id'].endswith('/collections/obs/items/371') assert expanded['https://purl.org/geojson/vocab#properties'][0][ 'https://schema.org/identifier'][0][ '@type'] == 'https://schema.org/Text' assert expanded['https://purl.org/geojson/vocab#properties'][0][ 'https://schema.org/identifier'][0]['@value'] == '35' req = mock_request({'f': 'jsonld', 'lang': 'fr'}) rsp_headers, code, response = api_.get_collection_item(req, 'obs', '371') assert rsp_headers['Content-Type'] == FORMAT_TYPES[F_JSONLD] assert rsp_headers['Content-Language'] == 'fr-CA'
def test_get_collection_items_json_ld(config, api_): req = mock_request({'f': 'jsonld', 'limit': 2}) rsp_headers, code, response = api_.get_collection_items(req, 'obs') assert rsp_headers['Content-Type'] == FORMAT_TYPES[F_JSONLD] # No language requested: return default from YAML assert rsp_headers['Content-Language'] == 'en-US' collection = json.loads(response) assert '@context' in collection assert collection['@context'][ 0] == 'https://geojson.org/geojson-ld/geojson-context.jsonld' assert len(collection['@context']) > 1 assert 'schema' in collection['@context'][1] assert collection['@context'][1]['schema'] == 'https://schema.org/' expanded = jsonld.expand(collection)[0] featuresUri = 'https://purl.org/geojson/vocab#features' assert len(expanded[featuresUri]) == 2 geometryUri = 'https://purl.org/geojson/vocab#geometry' assert all((geometryUri in f) for f in expanded[featuresUri]) assert all((f[geometryUri][0]['@type'][0] == 'https://purl.org/geojson/vocab#Point') for f in expanded[featuresUri]) propertiesUri = 'https://purl.org/geojson/vocab#properties' assert all(propertiesUri in f for f in expanded[featuresUri]) assert all( len(f[propertiesUri][0].keys()) > 0 for f in expanded[featuresUri]) assert all(('https://schema.org/observationDate' in f[propertiesUri][0]) for f in expanded[featuresUri]) assert all((f[propertiesUri][0]['https://schema.org/observationDate'][0] ['@type'] == 'https://schema.org/DateTime') for f in expanded[featuresUri]) assert any((f[propertiesUri][0]['https://schema.org/observationDate'][0] ['@value'] == '2001-10-30T14:24:55Z') for f in expanded[featuresUri])
def _context(ex): return jsonld.expand({ "@context": { "dts": "https://w3id.org/dts/api#" }, "dts:citeStructure": ex })[0][str(_dts.term("citeStructure"))]
def read_resource(self, data, use_ids=False, resourceid=None): self.use_ids = use_ids if not isinstance(data, list): data = [data] for jsonld in data: self.errors = {} jsonld = expand(jsonld)[0] graphid = self.get_graph_id(jsonld["@type"][0]) if graphid: graph = GraphProxy.objects.get(graphid=graphid) graphtree = graph.get_tree() if use_ids == True: resourceinstanceid = self.get_resource_id(jsonld["@id"]) if resourceinstanceid is None: raise Exception( 'The @id of the resource was not supplied, or was null, or the URI was not correctly formatted' ) resource = Resource.objects.get(pk=resourceinstanceid) else: resource = Resource() resource.graph_id = graphid resource.pk = resourceid self.resolve_node_ids(jsonld, graph=graphtree, resource=resource) self.resources.append(resource) return data
def test_get_collection_items_json_ld(config, api_): req_headers = make_req_headers() rsp_headers, code, response = api_.get_collection_items( req_headers, { 'f': 'jsonld', 'limit': 2 }, 'obs') assert rsp_headers['Content-Type'] == 'application/ld+json' collection = json.loads(response) assert '@context' in collection assert collection['@context'][ 0] == 'https://geojson.org/geojson-ld/geojson-context.jsonld' assert len(collection['@context']) > 1 assert 'schema' in collection['@context'][1] assert collection['@context'][1]['schema'] == 'https://schema.org/' expanded = jsonld.expand(collection)[0] featuresUri = 'https://purl.org/geojson/vocab#features' assert len(expanded[featuresUri]) == 2 geometryUri = 'https://purl.org/geojson/vocab#geometry' assert all((geometryUri in f) for f in expanded[featuresUri]) assert all((f[geometryUri][0]['@type'][0] == 'https://purl.org/geojson/vocab#Point') for f in expanded[featuresUri]) propertiesUri = 'https://purl.org/geojson/vocab#properties' assert all(propertiesUri in f for f in expanded[featuresUri]) assert all( len(f[propertiesUri][0].keys()) > 0 for f in expanded[featuresUri]) assert all(('https://schema.org/observationDate' in f[propertiesUri][0]) for f in expanded[featuresUri]) assert all((f[propertiesUri][0]['https://schema.org/observationDate'][0][ '@type'] == 'https://schema.org/DateTime') for f in expanded[featuresUri]) assert any((f[propertiesUri][0]['https://schema.org/observationDate'][0][ '@value'] == '2001-10-30T14:24:55Z') for f in expanded[featuresUri])
def jsonld(self, with_context=False, context_uri=None, prefix=None, expanded=False): result = self.serializable() if expanded: result = jsonld.expand(result, options={ 'base': prefix, 'expandContext': self._context })[0] if not with_context: try: del result['@context'] except KeyError: pass elif context_uri: result['@context'] = context_uri else: result['@context'] = self._context return result
def test_root_structured_data(config, api_): req_headers = make_req_headers() rsp_headers, code, response = api_.landing_page( req_headers, {"f": "jsonld"}) root = json.loads(response) assert rsp_headers['Content-Type'] == 'application/ld+json' assert rsp_headers['X-Powered-By'].startswith('pygeoapi') assert isinstance(root, dict) assert 'description' in root assert root['description'] == 'pygeoapi provides an API to geospatial data' assert '@context' in root assert root['@context'] == 'https://schema.org/docs/jsonldcontext.jsonld' expanded = jsonld.expand(root)[0] assert '@type' in expanded assert 'http://schema.org/DataCatalog' in expanded['@type'] assert 'http://schema.org/description' in expanded assert root['description'] == expanded['http://schema.org/description'][0][ '@value'] assert 'http://schema.org/keywords' in expanded assert len(expanded['http://schema.org/keywords']) == 3 assert '@value' in expanded['http://schema.org/keywords'][0].keys() assert 'http://schema.org/provider' in expanded assert expanded['http://schema.org/provider'][0]['@type'][ 0] == 'http://schema.org/Organization' assert expanded['http://schema.org/name'][0]['@value'] == root['name']
def test_describe_collections_json_ld(config, api_): req_headers = make_req_headers() rsp_headers, code, response = api_.describe_collections( req_headers, {'f': 'jsonld'}, 'obs') collection = json.loads(response) assert '@context' in collection expanded = jsonld.expand(collection)[0] # Metadata is about a schema:DataCollection that contains a schema:Dataset assert not expanded['@id'].endswith('obs') assert 'http://schema.org/dataset' in expanded assert len(expanded['http://schema.org/dataset']) == 1 dataset = expanded['http://schema.org/dataset'][0] assert dataset['@type'][0] == 'http://schema.org/Dataset' assert len(dataset['http://schema.org/distribution']) == 10 assert all(dist['@type'][0] == 'http://schema.org/DataDownload' for dist in dataset['http://schema.org/distribution']) assert 'http://schema.org/Organization' in expanded[ 'http://schema.org/provider'][0]['@type'] assert 'http://schema.org/Place' in dataset[ 'http://schema.org/spatial'][0]['@type'] assert 'http://schema.org/GeoShape' in dataset[ 'http://schema.org/spatial'][0]['http://schema.org/geo'][0]['@type'] assert dataset['http://schema.org/spatial'][0]['http://schema.org/geo'][ 0]['http://schema.org/box'][0]['@value'] == '-180,-90 180,90' assert 'http://schema.org/temporalCoverage' in dataset assert dataset['http://schema.org/temporalCoverage'][0][ '@value'] == '2000-10-30T18:24:39+00:00/2007-10-30T08:57:29+00:00'
def jsonld(self, with_context=True, context_uri=None, prefix=None, expanded=False): ser = self.serializable() result = jsonld.compact(ser, self._context, options={ 'base': prefix, 'expandContext': self._context, 'senpy': prefix }) if context_uri: result['@context'] = context_uri if expanded: result = jsonld.expand(result, options={ 'base': prefix, 'expandContext': self._context }) if not with_context: del result['@context'] return result
def usePyld(): # Grab the vitals used in frame 1 query = {"fmql": "DESCRIBE 120_5 FILTER(.02=2-9&.01>2008-04-01)", "format": "JSON-LD"} queryURL = FMQLEP + "?" + urllib.urlencode(query) jreply = json.loads(urllib2.urlopen(queryURL).read()) json.dump(jreply, open("fmql_FMQL_F1.json", "w"), indent=2) # Grab the vitals used in frame 2 query = {"fmql": "DESCRIBE 120_5 FILTER(.02=2-9&.01>2008-04-01)", "format": "JSON-LD2"} queryURL = FMQLEP + "?" + urllib.urlencode(query) jreply = json.loads(urllib2.urlopen(queryURL).read()) json.dump(jreply, open("fmql_FMQL_F2.json", "w"), indent=2) # Let's produce different forms of JSON-LD (and RDF) from this # 1. Expanded form print "pyld expand ..." expanded = jsonld.expand(jreply) json.dump(expanded, open("pyld_EXP_FMQLEX.json", "w"), indent=2) # 2. Compact it - using the basic context of framing 1 print "pyld compact ..." compact = jsonld.compact(jreply, json.load(open("vsfmcontextBase.json"))) json.dump(compact, open("pyld_COMP_FMQLEX.json", "w"), indent=2) # 3. Dump RDF -- only nquads are supported ... others return errors print "pyld tordf ..." open("pyld_RDF_FMQLEX.rdf", "w").write(jsonld.to_rdf(jreply, {"format": "application/nquads"}))
def json_expand(self): ''' Expand the JSON-LD using the context. ''' if self.source_data is not None: self.expanded = unlistify( jsonld.expand(json.loads(self.source_data)))
def do_expansion(doc): # expand a document, removing its context # see: https://json-ld.org/spec/latest/json-ld/#expanded-document-form expanded = jsonld.expand(doc) print("EXPANDED") print(json.dumps(expanded, indent=2))
def get_volumes(data): """ Takes a data structure in the canonical HathiTrust JSON-LD format and expands the dataset. Traverses the edm:gathers relation to find all HT volume IDs. Returns a list of volume IDs for use with the `htrc.metadata` and `htrc.volume` modules. """ # Remove all namespaces to ensure proper referencing data = jsonld.expand(data) # Build up the list of volumes. Because the JSON-LD `@graph` may # contain multiple worksets, this code uses a set representation # to ensure that duplicates are removed volumes = set() for obj in data: # retrieve list of entities gathered gathers = obj.get('http://www.europeana.eu/schemas/edm/gathers', []) gathers = [ vol['@id'].replace('http://hdl.handle.net/2027/', '') for vol in gathers ] # Check if `gathers` has any elements to ensure we don't add [] # to the list of volumes. if gathers: volumes.update(gathers) # return the list representation, maintains a more consistent interface return list(volumes)
def json_expand(self): ''' Expand the JSON-LD using the context. ''' if self.source_data is not None: self.expanded = unlistify(jsonld.expand( json.loads(self.source_data)))
def query(self, irc, msg, args, channel, optlist, request): """[--locale <language>] <request> Sends a request to the PPP and returns answers.""" locale = self.registryValue('language', channel) bold = self.registryValue('formats.bold', channel) for (key, value) in optlist: if key in ('locale', 'lang', 'language'): locale = value response = self.request(channel, request, locale) response = jsonld.expand(response) seen = set() replies = [] def add_reply(r): normalized = r.strip() if normalized in seen: return replies.append(r) seen.add(normalized) for collection in response: for member in collection['http://www.w3.org/ns/hydra/core#member']: for result in member['http://schema.org/result']: add_reply(format_result(result, locale, bold)) irc.replies(replies)
def expand_compact_for_context(wa, context): '''assumes anno has @context.''' context = wa['@context'] try: compacted = jsonld.compact(wa, context, compactArrays=False) except Exception as e: msg = 'compaction for context({}) of anno({}) failed: {}'.format( context, wa['id'], str(e)) raise e try: expanded = jsonld.expand(compacted) except Exception as e: msg = 'expansion for context({}) of anno({}) failed: {}'.format( context, wa['id'], str(e)) raise e try: translated = jsonld.compact(expanded, context, compactArrays=False) except Exception as e: msg = 'translation for context({}) of anno({}) failed: {}'.format( CATCH_CONTEXT_IRI, wa['id'], str(e)) raise e return translated
def usePyld(): try: jsonld except: print "=== can't do pyld demos' as package pyld isn't installed - Download and install from https://github.com/digitalbazaar/pyld" return # Grab the vitals query = {"fmql": "DESCRIBE 120_5 FILTER(.02=2-9&.01>2008-04-01)", "format": "JSON-LD"} queryURL = FMQLEP + "?" + urllib.urlencode(query) jreply = json.loads(urllib2.urlopen(queryURL).read()) json.dump(jreply, open("fmql_FMQL.json", "w"), indent=2) # nix all but @graph and @context jreply = {"@context": jreply["@context"], "@graph": jreply["@graph"]} # Let's produce different forms of JSON-LD (and RDF) from this # 1. Expanded form print "pyld expand ..." expanded = jsonld.expand(jreply) json.dump(expanded, open("pyld_EXP_FMQLEX.json", "w"), indent=2) # 2. Compact it - using the basic context print "pyld compact ..." compact = jsonld.compact(jreply, {"livevista": "http://livevista.caregraf.info/"}) json.dump(compact, open("pyld_COMP_FMQLEX.json", "w"), indent=2) # 3. Dump RDF -- only nquads are supported ... others return errors print "pyld tordf ..." open("pyld_RDF_FMQLEX.rdf", "w").write(jsonld.to_rdf(jreply, {"format": "application/nquads"})) print
def expand_entity_params(data, context, app): """Expand entity params""" context_list = [] status = 0 error = '' try: if context: if context in app.context_dict.keys(): context_list.append(app.context_dict[context]) else: context_list.append(context) context_list.append(app.context_dict[default_context]) if data['attrs']: for count in range(0, len(data['attrs'])): if not validators.url(data['attrs'][count]): com = { "@context": context_list, data['attrs'][count]: data['attrs'][count] } expanded = jsonld.expand(com) data['attrs'][count] = list(expanded[0].keys())[0] status = 1 except Exception as e: app.logger.error("Error: expand_entity_params") app.logger.error(traceback.format_exc()) error = 'Error in expand entity params.' return data, status, error
def csv_generate(csv_filename, capturemodel_uri, delimiter='|'): """ Generate a CSV from a JSON uri. Example URIs: NLW WW1 (production): 'https://crowd.library.wales/s/war-tribunal-records/annotation-studio/open/resource' NLW GLE (dev): 'http://nlw-omeka.digtest.co.uk/s/site-one/annotation-studio/open/resource' IDA: https://omeka.dlcs-ida.org/s/ida/annotation-studio/open/tagging :param csv_filename: output filename :param capturemodel_uri: uri for capturemodel json :param delimiter: delimiter for CSV """ all_fields = initialise() with open(csv_filename, 'wb') as csv_out: # open CSV and write header row dw = csv.DictWriter(csv_out, delimiter=delimiter, fieldnames=all_fields) dw.writeheader() # get the capture model capture_model = get_model(capturemodel_uri) capture_model[ '@context'] = master_context # change to context with additional namespaces expanded = jsonld.expand(capture_model) # expand the JSON-LD # recursively parse the expanded JSON-LD, returning a sorted list of dictionaries dw_list = sorted(parse_expanded(expanded, row_list=[]), key=itemgetter('dcterms:identifier')) for d in dw_list: dw.writerow(d)
def test_expand_with_with_explicit_base(): """Expand the document using the explicitly specified base.""" assert jsonld.expand( JSONLD_DOCUMENT, options={ 'base': 'https://example.com/robotics/', }, ) == EXPANDED_JSONLD_DOCUMENT
def read_resource(self, data, use_ids=False, resourceid=None, graphid=None): if graphid is None and self.graphtree is None: raise Exception("No graphid supplied to read_resource") elif self.graphtree is None: self.graphtree = self.process_graph(graphid) # Ensure we've reset from any previous call self.errors = {} self.resources = [] self.resource = None self.use_ids = use_ids if not isinstance(data, list): data = [data] # Force use_ids if there is more than one record being passed in if len(data) > 1: self.use_ids = True for jsonld_document in data: jsonld_document = expand(jsonld_document)[0] # Possibly bail very early if jsonld_document["@type"][0] != self.graphtree["class"]: raise ValueError( "Instance does not have same top level class as model") if self.use_ids: resourceinstanceid = self.get_resource_id( jsonld_document["@id"]) if resourceinstanceid is None: self.logger.error( "The @id of the resource was not supplied, was null or URI was not correctly formatted" ) raise Exception( "The @id of the resource was not supplied, was null or URI was not correctly formatted" ) self.logger.debug( "Using resource instance ID found: {0}".format( resourceinstanceid)) else: self.logger.debug( "`use_ids` setting is set to False, ignoring @id from the data if any" ) self.resource = Resource() if resourceid is not None: self.resource.pk = resourceid self.resource.graph_id = graphid self.resources.append(self.resource) ### --- Process Instance --- # now walk the instance and align to the tree result = {"data": [jsonld_document["@id"]]} self.data_walk(jsonld_document, self.graphtree, result)
def create_vcrecord(self, cred_dict: dict) -> VCRecord: """Return VCRecord from a credential dict.""" proofs = cred_dict.get("proof") or [] proof_types = None if type(proofs) is dict: proofs = [proofs] if proofs: proof_types = [proof.get("type") for proof in proofs] contexts = [ctx for ctx in cred_dict.get("@context") if type(ctx) is str] if "@graph" in cred_dict: for enclosed_data in cred_dict.get("@graph"): if ( enclosed_data["id"].startswith("urn:") and "credentialSubject" in enclosed_data ): cred_dict.update(enclosed_data) del cred_dict["@graph"] break given_id = cred_dict.get("id") if given_id and self.check_if_cred_id_derived(given_id): given_id = str(uuid4()) # issuer issuer = cred_dict.get("issuer") if type(issuer) is dict: issuer = issuer.get("id") # subjects subject_ids = None subjects = cred_dict.get("credentialSubject") if subjects: if type(subjects) is dict: subjects = [subjects] subject_ids = [ subject.get("id") for subject in subjects if ("id" in subject) ] else: cred_dict["credentialSubject"] = {} # Schemas schemas = cred_dict.get("credentialSchema", []) if type(schemas) is dict: schemas = [schemas] schema_ids = [schema.get("id") for schema in schemas] expanded = jsonld.expand(cred_dict) types = JsonLdProcessor.get_values( expanded[0], "@type", ) return VCRecord( contexts=contexts, expanded_types=types, issuer_id=issuer, subject_ids=subject_ids, proof_types=proof_types, given_id=given_id, cred_value=cred_dict, schema_ids=schema_ids, )
def transform_jsonld(self, obj): """Compact JSON according to context.""" rec = copy.deepcopy(obj) rec.update(self.context) compacted = jsonld.compact(rec, self.context) if not self.expanded: return compacted else: return jsonld.expand(compacted)[0]
def dump(self, obj): """Compact JSON according to context.""" rec = copy.deepcopy(super(JSONLDSerializer, self).dump(obj)) rec.update(self.context) compacted = jsonld.compact(rec, self.context) if not self.expanded: return compacted else: return jsonld.expand(compacted)[0]
def parse(cls, _db, data, patron): if patron.synchronize_annotations != True: return PATRON_NOT_OPTED_IN_TO_ANNOTATION_SYNC try: data = json.loads(data) data = jsonld.expand(data) except ValueError, e: return INVALID_ANNOTATION_FORMAT
def record2jsonld(record, context): from pyld import jsonld import json import rdflib_jsonld from rdflib import Graph import copy rec = copy.deepcopy(record) rec.update(context) compacted = jsonld.compact(rec, context) return jsonld.expand(compacted)
def _parse_json_ld(filename): # just some experiments with open(filename) as data_f: data = json.load(data_f) compacted = compact_with_json_ld_context(data) expanded = jsonld.expand(compacted) normalized = jsonld.normalize( data, {'algorithm': 'URDNA2015', 'format': 'application/nquads'}) print(json.dumps(expanded, indent=2))
def __expanded(self): if self.env.document_loader: document_loader = self.env.document_loader else: document_loader = default_loader options = {"expandContext": self.env.implied_context} if document_loader: options["documentLoader"] = document_loader return jsonld.expand(self.__jsobj, options)
def parse(cls, _db, data, patron): if patron.synchronize_annotations != True: return PATRON_NOT_OPTED_IN_TO_ANNOTATION_SYNC try: data = json.loads(data) if 'id' in data and data['id'] is None: del data['id'] data = jsonld.expand(data) except ValueError, e: return INVALID_ANNOTATION_FORMAT
def add_BD_fields(jsonld_str, esdoc): expanded = jsonld.expand(json.loads(jsonld_str)) logger.info("EXPANDED: "+json.dumps(expanded, indent=2)) framed = jsonld.frame(expanded, dts_jsonld_frame) logger.info("FRAMED: "+json.dumps(expanded, indent=2)) for field, obj in dts_fields.items(): # logger.info("obj: "+obj) esdoc[field] = [] # append all the matching values to the ES field for val in [match.value for match in obj['expr'].find(framed)]: esdoc[field].append(val)
def index(): request_body = request.get_data().decode('utf-8') doc = json.loads(request_body) body = {'jsonld': jsonld.expand(doc)} g = Graph() g.parse(data=request_body, format='json-ld') for uri in g.subjects(predicate=RDF.type, object=URIRef('http://www.bbc.co.uk/search/schema/ContentItem')): es.index(index='bbc', body=body, doc_type='item', id=str(uri)) return 'Accepted!', 202
def test_json_ld(self, book_context, book_schema, simple_book_record): from pyld import jsonld import json import rdflib_jsonld from rdflib import Graph import copy rec = copy.deepcopy(simple_book_record) rec.update(book_context) compacted = jsonld.compact(rec, book_context) expanded = jsonld.expand(compacted) graph = Graph().parse(data=json.dumps(expanded, indent=2), format="json-ld") print(graph.serialize(format="json-ld"))
def test_parse_jsonld_with_context(self): self.pool.loan_to(self.patron) data = self._sample_jsonld() data_json = json.dumps(data) expanded = jsonld.expand(data)[0] annotation = AnnotationParser.parse(self._db, data_json, self.patron) eq_(self.patron.id, annotation.patron_id) eq_(self.identifier.id, annotation.identifier_id) eq_(Annotation.IDLING, annotation.motivation) eq_(True, annotation.active) eq_(json.dumps(expanded["http://www.w3.org/ns/oa#hasTarget"][0]), annotation.target) eq_(json.dumps(expanded["http://www.w3.org/ns/oa#hasBody"][0]), annotation.content)
def expand_json(metadata, context=DEFAULT_CONTEXT): """ Expand json, but be sure to use our documentLoader. By default this expands with DEFAULT_CONTEXT, but if you do not need this, you can safely set this to None. # @@: Is the above a good idea? Maybe it should be set to None by # default. """ options = { "documentLoader": load_context} if context is not None: options["expandContext"] = context return jsonld.expand(metadata, options=options)
def ontology_context(self): """Construct a context file for the ontology""" expanded = jsonld.expand(self.compacted_jsonld) body = {"a": "@type", "uri": "@id"} ids_with_duplicate_shortcuts = [] for x in expanded: entity = Entity(x) (name, entry) = entity.context if name is None: continue if name in body: ids_with_duplicate_shortcuts.append(body[name]['@id']) ids_with_duplicate_shortcuts.append(entry['@id']) body[name] = entry return {'@context': body}, ids_with_duplicate_shortcuts
def get_jsonld(self, context, new_context={}, format="full"): """Return the JSON-LD serialization. :param: context the context to use for raw publishing; each SmartJsonLD instance is expected to have a default context associated. :param: new_context the context to use for formatted publishing, usually supplied by the client; used by the 'compacted', 'framed', and 'normalized' formats. :param: format the publishing format; can be 'full', 'inline', 'compacted', 'expanded', 'flattened', 'framed' or 'normalized'. Note that 'full' and 'inline' are synonims, referring to the document form which includes the context; for more information see: [http://www.w3.org/TR/json-ld/] """ from pyld import jsonld if isinstance(context, six.string_types): ctx = self.get_context(context) elif isinstance(context, dict): ctx = context else: raise TypeError('JSON-LD context must be a string or dictionary') try: doc = self.translate(context, ctx) except NotImplementedError: # model does not require translation doc = self.dumps(clean=True) doc["@context"] = ctx if format in ["full", "inline"]: return doc if format == "compacted": return jsonld.compact(doc, new_context) elif format == "expanded": return jsonld.expand(doc) elif format == "flattened": return jsonld.flatten(doc) elif format == "framed": return jsonld.frame(doc, new_context) elif format == "normalized": return jsonld.normalize(doc, new_context) raise ValueError('Invalid JSON-LD serialization format')
def jsonld(self, with_context=True, context_uri=None, prefix=None, expanded=False): ser = self.serializable() result = jsonld.compact( ser, self._context, options={ 'base': prefix, 'expandContext': self._context, 'senpy': prefix }) if context_uri: result['@context'] = context_uri if expanded: result = jsonld.expand( result, options={'base': prefix, 'expandContext': self._context}) if not with_context: del result['@context'] return result
def main(self): print('PyLD Unit Tests') print('Use -h or --help to view options.') # add program options self.parser.add_option('-f', '--file', dest='file', help='The single test file to run', metavar='FILE') self.parser.add_option('-d', '--directory', dest='directory', help='The directory full of test files', metavar='DIR') self.parser.add_option('-e', '--earl', dest='earl', help='The filename to write the EARL report to', metavar='EARL') self.parser.add_option('-v', '--verbose', dest='verbose', action='store_true', default=False, help='Prints verbose test data') # parse options (self.options, args) = self.parser.parse_args() # check if file or directory were specified if self.options.file == None and self.options.directory == None: raise Exception('No test file or directory specified.') # check if file was specified, exists, and is file if self.options.file is not None: if (os.path.exists(self.options.file) and os.path.isfile(self.options.file)): # add manifest file to the file list self.manifest_files.append(os.path.abspath(self.options.file)) else: raise Exception('Invalid test file: "%s"' % self.options.file) # check if directory was specified, exists and is dir if self.options.directory is not None: if (os.path.exists(self.options.directory) and os.path.isdir(self.options.directory)): # load manifest files from test directory for test_dir, dirs, files in os.walk(self.options.directory): for manifest in files: # add all .jsonld manifest files to the file list if (manifest.find('manifest') != -1 and manifest.endswith('.jsonld')): self.manifest_files.append( join(test_dir, manifest)) else: raise Exception('Invalid test directory: "%s"' % self.options.directory) # see if any manifests have been specified if len(self.manifest_files) == 0: raise Exception('No manifest files found.') passed = 0 failed = 0 total = 0 # run the tests from each manifest file for manifest_file in self.manifest_files: test_dir = os.path.dirname(manifest_file) manifest = json.load(open(manifest_file, 'r')) count = 1 for test in manifest['sequence']: # skip unsupported types skip = True test_type = test['@type'] for tt in test_type: if tt in SKIP_TEST_TYPES: skip = True break if tt in TEST_TYPES: skip = False if skip: # print 'Skipping test: "%s" ...' % test['name'] continue print('JSON-LD/%s %04d/%s...' % ( manifest['name'], count, test['name']), end=' ') total += 1 count += 1 # read input file with open(join(test_dir, test['input'])) as f: if test['input'].endswith('.jsonld'): input = json.load(f) else: input = f.read().decode('utf8') # read expect file with open(join(test_dir, test['expect'])) as f: if test['expect'].endswith('.jsonld'): expect = json.load(f) else: expect = f.read().decode('utf8') result = None # JSON-LD options options = { 'base': 'http://json-ld.org/test-suite/tests/' + test['input'], 'useNativeTypes': True } success = False try: if 'jld:ExpandTest' in test_type: result = jsonld.expand(input, options) elif 'jld:CompactTest' in test_type: ctx = json.load(open(join(test_dir, test['context']))) result = jsonld.compact(input, ctx, options) elif 'jld:FlattenTest' in test_type: result = jsonld.flatten(input, None, options) elif 'jld:FrameTest' in test_type: frame = json.load(open(join(test_dir, test['frame']))) result = jsonld.frame(input, frame, options) elif 'jld:FromRDFTest' in test_type: result = jsonld.from_rdf(input, options) elif 'jld:ToRDFTest' in test_type: options['format'] = 'application/nquads' result = jsonld.to_rdf(input, options) elif 'jld:NormalizeTest' in test_type: options['format'] = 'application/nquads' result = jsonld.normalize(input, options) # check the expected value against the test result success = deep_compare(expect, result) if success: passed += 1 print('PASS') else: failed += 1 print('FAIL') if not success or self.options.verbose: print('Expect:', json.dumps(expect, indent=2)) print('Result:', json.dumps(result, indent=2)) except jsonld.JsonLdError as e: print('\nError: ', e) failed += 1 print('FAIL') # add EARL report assertion EARL['subjectOf'].append({ '@type': 'earl:Assertion', 'earl:assertedBy': EARL['doap:developer']['@id'], 'earl:mode': 'earl:automatic', 'earl:test': ('http://json-ld.org/test-suite/tests/' + os.path.basename(manifest_file) + test.get('@id', '')), 'earl:result': { '@type': 'earl:TestResult', 'dc:date': datetime.datetime.utcnow().isoformat(), 'earl:outcome': ('earl:' + 'passed' if success else 'failed') } }) if self.options.earl: f = open(self.options.earl, 'w') f.write(json.dumps(EARL, indent=2)) f.close() print('Done. Total:%d Passed:%d Failed:%d' % (total, passed, failed))
def main(self): print 'PyLD Unit Tests' print 'Use -h or --help to view options.' # add program options self.parser.add_option('-f', '--file', dest='file', help='The single test file to run', metavar='FILE') self.parser.add_option('-d', '--directory', dest='directory', help='The directory full of test files', metavar='DIR') self.parser.add_option('-v', '--verbose', dest='verbose', action='store_true', default=False, help='Prints verbose test data') # parse options (self.options, args) = self.parser.parse_args() # check if file or directory were specified if self.options.file == None and self.options.directory == None: raise Exception('No test file or directory specified.') # check if file was specified, exists, and is file if self.options.file is not None: if (os.path.exists(self.options.file) and os.path.isfile(self.options.file)): # add manifest file to the file list self.manifest_files.append(os.path.abspath(self.options.file)) else: raise Exception('Invalid test file: "%s"' % self.options.file) # check if directory was specified, exists and is dir if self.options.directory is not None: if (os.path.exists(self.options.directory) and os.path.isdir(self.options.directory)): # load manifest files from test directory for test_dir, dirs, files in os.walk(self.options.directory): for manifest in files: # add all .jsonld manifest files to the file list if (manifest.find('manifest') != -1 and manifest.endswith('.jsonld')): self.manifest_files.append( join(test_dir, manifest)) else: raise Exception('Invalid test directory: "%s"' % self.options.directory) # see if any manifests have been specified if len(self.manifest_files) == 0: raise Exception('No manifest files found.') passed = 0 failed = 0 total = 0 # run the tests from each manifest file for manifest_file in self.manifest_files: test_dir = os.path.dirname(manifest_file) manifest = json.load(open(manifest_file, 'r')) count = 1 for test in manifest['sequence']: # skip unsupported types skip = True test_type = test['@type'] for tt in TEST_TYPES: if tt in test_type: skip = False break if skip: print 'Skipping test: "%s" ...' % test['name'] continue print 'JSON-LD/%s %04d/%s...' % ( manifest['name'], count, test['name']), total += 1 count += 1 # read input file with open(join(test_dir, test['input'])) as f: if test['input'].endswith('.jsonld'): input = json.load(f) else: input = f.read().decode('utf8') # read expect file with open(join(test_dir, test['expect'])) as f: if test['expect'].endswith('.jsonld'): expect = json.load(f) else: expect = f.read().decode('utf8') result = None # JSON-LD options options = { 'base': 'http://json-ld.org/test-suite/tests/' + test['input']} try: if 'jld:NormalizeTest' in test_type: options['format'] = 'application/nquads' result = jsonld.normalize(input, options) elif 'jld:ExpandTest' in test_type: result = jsonld.expand(input, options) elif 'jld:CompactTest' in test_type: ctx = json.load(open(join(test_dir, test['context']))) result = jsonld.compact(input, ctx, options) elif 'jld:FrameTest' in test_type: frame = json.load(open(join(test_dir, test['frame']))) result = jsonld.frame(input, frame, options) elif 'jld:FromRDFTest' in test_type: result = jsonld.from_rdf(input, options) elif 'jld:ToRDFTest' in test_type: options['format'] = 'application/nquads' result = jsonld.to_rdf(input, options) # check the expected value against the test result success = deep_compare(expect, result) if success: passed += 1 print 'PASS' else: failed += 1 print 'FAIL' if not success or self.options.verbose: print 'Expect:', json.dumps(expect, indent=2) print 'Result:', json.dumps(result, indent=2) except jsonld.JsonLdError as e: print '\nError: ', e failed += 1 print 'FAIL' print 'Done. Total:%d Passed:%d Failed:%d' % (total, passed, failed)
def expanded_jsonld(self): """Return the ontology in expanded JSON-LD format as an object.""" return jsonld.expand(self.compacted_jsonld)
def main(): doc = json.load(sys.stdin) expanded = jsonld.expand(doc) print(json.dumps(expanded, indent=2))
def main(self): print "PyLD TestRunner" print "Use -h or --help to view options." # add program options self.parser.add_option("-f", "--file", dest="file", help="The single test file to run", metavar="FILE") self.parser.add_option("-d", "--directory", dest="directory", help="The directory full of test files", metavar="DIR") self.parser.add_option("-v", "--verbose", dest="verbose", action="store_true", default=False, help="Prints verbose test data") # parse options (self.options, args) = self.parser.parse_args() # check if file or directory were specified if self.options.file == None and self.options.directory == None: print "No test file or directory specified." return # check if file was specified, exists and is file if self.options.file != None: if (os.path.exists(self.options.file) and os.path.isfile(self.options.file)): # add test file to the file list self.testfiles.append(os.path.abspath(self.options.file)) self.testdir = os.path.dirname(self.options.file) else: print "Invalid test file." return # check if directory was specified, exists and is dir if self.options.directory != None: if (os.path.exists(self.options.directory) and os.path.isdir(self.options.directory)): # load test files from test directory for self.testdir, dirs, files in os.walk(self.options.directory): for testfile in files: # add all .test files to the file list if testfile.endswith(".test"): self.testfiles.append(join(self.testdir, testfile)) else: print "Invalid test directory." return # see if any tests have been specified if len(self.testfiles) == 0: print "No tests found." return # FIXME: #self.testFiles.sort() run = 0 passed = 0 failed = 0 # run the tests from each test file for testfile in self.testfiles: # test group in test file testgroup = json.load(open(testfile, 'r')) count = 1 for test in testgroup['tests']: print 'Test: %s %04d/%s...' % ( testgroup['group'], count, test['name']), run += 1 count += 1 # open the input and expected result json files inputFd = open(join(self.testdir, test['input'])) expectFd = open(join(self.testdir, test['expect'])) inputJson = json.load(inputFd) expectJson = json.load(expectFd) resultJson = None testType = test['type'] if testType == 'normalize': resultJson = jsonld.normalize(inputJson) elif testType == 'expand': resultJson = jsonld.expand(inputJson) elif testType == 'compact': contextFd = open(join(self.testdir, test['context'])) contextJson = json.load(contextFd) resultJson = jsonld.compact(contextJson, inputJson) elif testType == 'frame': frameFd = open(join(self.testdir, test['frame'])) frameJson = json.load(frameFd) resultJson = jsonld.frame(inputJson, frameJson) else: print "Unknown test type." # check the expected value against the test result if expectJson == resultJson: passed += 1 print 'PASS' if self.options.verbose: print 'Expect:', json.dumps(expectJson, indent=4) print 'Result:', json.dumps(resultJson, indent=4) else: failed += 1 print 'FAIL' print 'Expect:', json.dumps(expectJson, indent=4) print 'Result:', json.dumps(resultJson, indent=4) print "Tests run: %d, Tests passed: %d, Tests Failed: %d" % (run, passed, failed)
(options, args) = parser.parse_args() if len(args) != 1: parser.error("Error: incorrect number of arguments, try --help") doc = get_demo_record(file_name=args[0], verbose=options.verbose) validate(doc) if options.verbose: from rerodoc.dojson.book import book2marc print(json.dumps(book2marc.do(doc), indent=2)) from rerodoc.dojson.utils import get_context context = get_context("book") doc.update(context) if options.verbose: print("Input record in json format:") print(json.dumps(doc, indent=2)) compacted = jsonld.compact(doc, context) #print compacted expanded = jsonld.expand(compacted) #import pprint #pprint.pprint(expanded) #flattened = jsonld.flatten(doc) #framed = jsonld.frame(doc, context) #normalized = jsonld.normalize(doc, {'format': 'application/nquads'}) graph = Graph().parse(data=json.dumps(compacted, indent=2), format="json-ld") print(graph.serialize(format=options.format))
} } while True: os.system("clear") gene_id = raw_input("input gene id\n") url = url_mod % int(gene_id) try: r = requests.get(url) r.raise_for_status() except HTTPError: print "no id" data = r.json() print "\nraw json data" print json.dumps(data) data["@context"] = context["@context"] print "\ncompacted data" print json.dumps(data) doc = jsonld.expand(data) print "\nexpanded data" print json.dumps(doc) data_nor = jsonld.normalize(doc, {'format': 'application/nquads'}) print "\n N-Quads data" print data_nor raw_input("input any key to continue")
context = "http://www.w3.org/ns/anno.jsonld" frameURI = "https://raw.githubusercontent.com/w3c/web-annotation/gh-pages/jsonld/annotation_frame.jsonld" # ontology = "https://www.w3.org/ns/oa.ttl" ontology = "https://raw.githubusercontent.com/w3c/web-annotation/gh-pages/vocab/wd/ontology/oa.ttl" data = fetch(context) context_js = json.loads(data) data = fetch(example) example_js = json.loads(data) data = fetch(frameURI) frame_js = json.loads(data) # Test1: JSON-LD context document can be parsed without errors by JSON-LD validators # Context document is parsable if it can be loaded and used to expand the example try: expanded = expand(example_js, context_js) except: print "Context is invalid, failed Test 1" # Test2: JSON-LD context document can be used to convert JSON-LD serialized Annotations into RDF triples. try: jsonld_nq = to_rdf(example_js, {"base": "http://example.org/", "format": "application/nquads"}) except: print "Cannot use context to convert JSON-LD to NQuads" # Test3: Graphs produced are isomorphic try: rl_g = validator.json_to_rdf(example_js) g = ConjunctiveGraph()