def merge(files): """Merge a set of Feils FILES. This will expand out the felis FILES so that it is easy to override values (using @Id), then normalize to a single output. """ graph = [] for file in files: schema_obj = yaml.load(file, Loader=yaml.SafeLoader) if "@graph" not in schema_obj: schema_obj["@type"] = "felis:Schema" schema_obj["@context"] = DEFAULT_CONTEXT graph.extend(jsonld.flatten(schema_obj)) updated_map = {} for item in graph: _id = item["@id"] item_to_update = updated_map.get(_id, item) if item_to_update and item_to_update != item: logger.debug(f"Overwriting {_id}") item_to_update.update(item) updated_map[_id] = item_to_update merged = { "@context": DEFAULT_CONTEXT, "@graph": list(updated_map.values()) } normalized = _normalize(merged) _dump(normalized)
def do_flatten(doc): # flatten a document # see: https://json-ld.org/spec/latest/json-ld/#flattened-document-form flattened = jsonld.flatten(doc) # all deep-level trees flattened to the top-level print("FLATTENED") print(json.dumps(flattened, indent=2))
async def retrieve(request): """Retrieve data specific to an acronym, URL should be persistent.""" db_pool = request.app['pool'] term = request.match_info['acronymid'] term_type = request.match_info['acronymtype'] LOG.info('retrieve') data = await fetch_acronym(db_pool, term, term_type) doc = { "@id": f'acr:{data[0]["index"]}', "dc:title": data[0]["title"], "skos:label": data[0]["title"], "dc:type": { "@id": f'acr:{data[0]["acronymtype"]}', "@type": 'skos:Concept', "skos:label": data[0]["acronymtype"] }, "dc:language": data[0]["language"], "dc:description": data[0]["description"] } if request.content_type == 'application/ld+json': response = jsonld.flatten(doc, context) return web.json_response(response, content_type='application/ld+json', dumps=json.dumps) elif request.content_type == 'application/n-quads': response = jsonld.normalize(doc, { 'algorithm': 'URDNA2015', 'format': 'application/n-quads' }) return web.Response(text=response, content_type='application/n-quads') else: response = {"data": data} return web.json_response(response, content_type='application/json', dumps=json.dumps)
def __init__(self, name: str, document: Any, url: Optional[str] = None, soup: Optional[BeautifulSoup] = None): if '@context' not in document or 'schema.org' in document['@context']: # schema.org, as it is commonly used, has a bug. # Very often 'http://schema.org' is used as the '@context' value, but the website does not ever return a valid JSON-LD schema. # It appears it used to, with the help of content-negotiation: https://webmasters.stackexchange.com/questions/123409 # But this has stopped working. So we hard code in this default context to fix. document['@context'] = JSONLD.DEFAULT_CONTEXT else: # TODO: Support other schemas logging.warning( f"You're using a schema which is not schema.org ({document['@context']}). Support is limited at the moment (particularly for introspection)." ) super().__init__(name) self.context = document['@context'] self.graph = jsonld.flatten(document, self.context)['@graph'] self._introspect() if url: self.url = url if soup: self.soup = soup
def _introspect(self): schema = None if self.context == JSONLD.DEFAULT_CONTEXT: schema = get_schemaorg_schema() else: # TODO: Support other schemas schema = None if schema: self.introspection_graph = jsonld.flatten(schema, self.context)['@graph']
def process_jsonld(self, data, original, **kwargs): if isinstance(original, (list, tuple)): return data method = request.args.get("process", "compact") context = {"@context": {"@vocab": "http://neurostore.org/nimads/"}} if method == "flatten": return jsonld.flatten(data, context) elif method == "expand": return jsonld.expand(data) else: return jsonld.compact(data, context)
def _serialize(self, obj: typing.Union[_T, typing.Iterable[_T]], *, many: bool = False): """Serialize ``obj`` to jsonld.""" if many and obj is not None: return [ self._serialize(d, many=False) for d in typing.cast(typing.Iterable[_T], obj) ] if isinstance(obj, Proxy): proxy_schema = obj.__proxy_schema__ if (not obj.__proxy_initialized__ and isinstance(proxy_schema, type(self)) and proxy_schema.flattened == self.flattened): # if proxy was not accessed and we use the same schema, return original data return obj.__proxy_original_data__ # resolve Proxy object obj = obj.__wrapped__ ret = self.dict_class() for attr_name, field_obj in self.dump_fields.items(): value = field_obj.serialize(attr_name, obj, accessor=self.get_attribute) if value is missing: continue key = field_obj.data_key if field_obj.data_key is not None else attr_name reverse = getattr(field_obj, "reverse", False) if reverse: if "@reverse" not in ret: ret["@reverse"] = self.dict_class() ret["@reverse"][key] = value else: ret[key] = value if "@id" not in ret or not ret["@id"]: ret["@id"] = self.opts.id_generation_strategy(ret, obj) # add type rdf_type = self.opts.rdf_type if not rdf_type: raise ValueError("No class type specified for schema") ret["@type"] = normalize_type(rdf_type) if self.flattened and self._top_level: ret = jsonld.flatten(ret) return ret
def flatten_metadata_graph(obj): from pyld import jsonld # simplify graph into a sequence of one dict per known dataset, even # if multiple meta data set from different sources exist for the same # dataset. # cache schema requests; this also avoid the need for network access # for previously "visited" schemas jsonld.set_document_loader(_cached_load_document) # TODO cache entire graphs to prevent repeated term resolution for # subsequent calls return jsonld.flatten(obj, ctx={"@context": "http://schema.org/"})
def as_quad_stream( self, raw_data: TextIO, iri: Optional[URIRef], context: LDContext, root_loader: Loader, ) -> Iterable[Quad]: """Read JSON-LD data into a quad stream.""" document = self.as_jsonld_document(raw_data) document = assign_key_if_not_present( document=document, key='@id', default_value=str(iri), ) document = assign_key_if_not_present( document=document, key=str(OCTA.subjectOf), default_value={ '@id': str(iri), }, ) try: document = jsonld.expand( document, options={ 'expandContext': context, 'documentLoader': root_loader, # Explanation: # https://github.com/digitalbazaar/pyld/issues/143 'base': str(LOCAL), }, ) except JsonLdError as err: raise ExpandError( message=str(err), document=document, context=context, iri=iri, ) from err document = jsonld.flatten(document) return list( parse_quads( quads_document=jsonld.to_rdf(document), graph=iri, blank_node_prefix=str(iri), ), )
def test_import_jsonld_into_named_graph(flatten_before_import: bool): """Test named graphs we use.""" graph = ConjunctiveGraph() jsonld_document = JSONLD_DOCUMENT if flatten_before_import: jsonld_document = jsonld.flatten(jsonld_document) serialized_document = json.dumps(jsonld_document, indent=4) graph.parse( data=serialized_document, format='json-ld', publicID=PUBLIC_ID, ) print(graph.serialize(format='n3').decode('utf-8')) # Make sure only one NAMED GRAPH is created on import. assert list(map( operator.itemgetter(Variable('g')), graph.query( 'SELECT DISTINCT ?g WHERE { GRAPH ?g { ?s ?p ?o . } } ORDER BY ?g', ).bindings, )) == [ URIRef('https://myblog.net/rdf/'), ] # The information in @included section was properly parsed. assert graph.query(''' SELECT * WHERE { GRAPH ?g { ex:Rover rdfs:subClassOf ex:Robot . } } ''', initNs=NAMESPACES).bindings == [{ Variable('g'): PUBLIC_ID, }] # The information in the root was properly parsed. assert graph.query(''' SELECT * WHERE { GRAPH ?g { blog:JSONLD-and-named-graphs a schema:blogPost . } } ''', initNs=NAMESPACES).bindings == [{ Variable('g'): PUBLIC_ID, }]
def parse(self): try: jsonld_flat = jsonld.flatten( self.feed,Feed.jsonld_ctx,{"base":self.base} ) except Exception as e: raise Exception( "Could not flatten JSON-LD for feed: '{0}' [{1}]".format( self.base, str(e) ) ) for node in jsonld_flat["@graph"]: if self.hasType(node, "simplevod:SimpleVoD"): self.title = node["title"] if ( "hasMemberGroup" in node and "@list" in node["hasMemberGroup"] ): for group in node["hasMemberGroup"]["@list"]: self.categories.append(self.get(group["@id"])) elif self.hasType(node, "simplevod:SimpleVoDCategory"): category = self.get(node["@id"]) category.update({ "isCategory": True, "title": node["title"], "items": [] }) if "hasMember" in node and "@list" in node["hasMember"]: for item in node["hasMember"]["@list"]: category["items"].append(self.get(item["@id"])) elif self.hasType(node, "TVProgramme"): item = self.get(node["@id"]) item.update({ "isItem": True, "title": node["title"], "video": self.get(node["isInstantiatedBy"]["@id"]), "productcode": node["videodb:productcode"] }) elif self.hasType(node, "MediaResource"): video = self.get(node["@id"]) video.update({ "videoref": node["videodb:videoref"] })
def get_jsonld(self, context, new_context={}, format="full"): """Return the JSON-LD serialization. :param: context the context to use for raw publishing; each SmartJsonLD instance is expected to have a default context associated. :param: new_context the context to use for formatted publishing, usually supplied by the client; used by the 'compacted', 'framed', and 'normalized' formats. :param: format the publishing format; can be 'full', 'inline', 'compacted', 'expanded', 'flattened', 'framed' or 'normalized'. Note that 'full' and 'inline' are synonims, referring to the document form which includes the context; for more information see: [http://www.w3.org/TR/json-ld/] """ from pyld import jsonld if isinstance(context, six.string_types): ctx = self.get_context(context) elif isinstance(context, dict): ctx = context else: raise TypeError('JSON-LD context must be a string or dictionary') try: doc = self.translate(context, ctx) except NotImplementedError: # model does not require translation doc = self.dumps(clean=True) doc["@context"] = ctx if format in ["full", "inline"]: return doc if format == "compacted": return jsonld.compact(doc, new_context) elif format == "expanded": return jsonld.expand(doc) elif format == "flattened": return jsonld.flatten(doc) elif format == "framed": return jsonld.frame(doc, new_context) elif format == "normalized": return jsonld.normalize(doc, new_context) raise ValueError('Invalid JSON-LD serialization format')
def flat_map(response): """ Transform a response using JSON-LD's "flatten" operation, and return a dictionary mapping resources (as fully-qualified URLs) to their values (also containing fully-qualified URLs). """ # The URL in '@context' may not be available yet, because we probably # haven't deployed. So replace the response's "@context" with the # contents of that file. response['@context'] = CONTEXT['@context'] # jsonld.flatten gives us a list of objects, which all have @id values # (unless they're awkward "blank nodes", like definitions of features). # The @id values are unique after flattening, so we can make a dictionary # keyed by them. result = {} flat_objects = jsonld.flatten(response) for obj in flat_objects: if '@id' in obj: result[obj['@id']] = obj return result
def modify_tap(start_schema_at, files): """Modify TAP information in Felis schema FILES. This command has some utilities to aid in rewriting felis FILES in specific ways. It will write out a merged version of these files. """ count = 0 graph = [] for file in files: schema_obj = yaml.load(file, Loader=yaml.SafeLoader) if "@graph" not in schema_obj: schema_obj["@type"] = "felis:Schema" schema_obj["@context"] = DEFAULT_CONTEXT schema_index = schema_obj.get("tap:schema_index") if not schema_index or (schema_index and schema_index > start_schema_at): schema_index = start_schema_at + count count += 1 schema_obj["tap:schema_index"] = schema_index graph.extend(jsonld.flatten(schema_obj)) merged = {"@context": DEFAULT_CONTEXT, "@graph": graph} normalized = _normalize(merged) _dump(normalized)
def _serialize(self, obj: typing.Union[_T, typing.Iterable[_T]], *, many: bool = False): """Serialize ``obj`` to jsonld.""" if many and obj is not None: return [ self._serialize(d, many=False) for d in typing.cast(typing.Iterable[_T], obj) ] ret = self.dict_class() for attr_name, field_obj in self.dump_fields.items(): value = field_obj.serialize(attr_name, obj, accessor=self.get_attribute) if value is missing: continue key = field_obj.data_key if field_obj.data_key is not None else attr_name reverse = getattr(field_obj, "reverse", False) if reverse: if "@reverse" not in ret: ret["@reverse"] = self.dict_class() ret["@reverse"][key] = value else: ret[key] = value # add type rdf_type = self.opts.rdf_type if not rdf_type: raise ValueError("No class type specified for schema") ret["@type"] = normalize_type(rdf_type) if self.flattened: ret = jsonld.flatten(ret) return ret
def main(): itemFile = sys.argv[1] with open(itemFile, "r") as f: item = json.load(f) numItems = len(item.keys()) - 2 # -2 for context and id context = {} ctxtResolver(item["@context"], context) doc = {k: v for k, v in item.items() if k not in "@context"} item = {} item["@context"] = context item.update(doc) expanded = jsonld.expand(item) print("Expanded") print(json.dumps(expanded, indent=2)) print("\n\n") compacted = jsonld.compact(expanded, context) print("Compacted") print(json.dumps(compacted, indent=2)) print("\n\n") flattened = jsonld.flatten(compacted, context) print("Flattened") print(json.dumps(flattened, indent=2)) print("\n\n") normalized = jsonld.normalize(item, { 'algorithm': 'URDNA2015', 'format': 'application/n-quads' }) print("Normalized") print(json.dumps(normalized, indent=2)) print("\n\n") inpPropNames = set(item.keys()) - set(["@context", "latestResourceData"]) expPropNames = set([k.split("/")[-1] for k in expanded[0].keys()]) compPropNames = set([k.split(":")[-1] for k in compacted.keys()]) print("Missing from expanded" + str(inpPropNames - expPropNames)) print("Missing from compacted" + str(inpPropNames - compPropNames))
def main(self): print('PyLD Unit Tests') print('Use -h or --help to view options.') # add program options self.parser.add_option('-f', '--file', dest='file', help='The single test file to run', metavar='FILE') self.parser.add_option('-d', '--directory', dest='directory', help='The directory full of test files', metavar='DIR') self.parser.add_option('-e', '--earl', dest='earl', help='The filename to write the EARL report to', metavar='EARL') self.parser.add_option('-v', '--verbose', dest='verbose', action='store_true', default=False, help='Prints verbose test data') # parse options (self.options, args) = self.parser.parse_args() # check if file or directory were specified if self.options.file == None and self.options.directory == None: raise Exception('No test file or directory specified.') # check if file was specified, exists, and is file if self.options.file is not None: if (os.path.exists(self.options.file) and os.path.isfile(self.options.file)): # add manifest file to the file list self.manifest_files.append(os.path.abspath(self.options.file)) else: raise Exception('Invalid test file: "%s"' % self.options.file) # check if directory was specified, exists and is dir if self.options.directory is not None: if (os.path.exists(self.options.directory) and os.path.isdir(self.options.directory)): # load manifest files from test directory for test_dir, dirs, files in os.walk(self.options.directory): for manifest in files: # add all .jsonld manifest files to the file list if (manifest.find('manifest') != -1 and manifest.endswith('.jsonld')): self.manifest_files.append(join( test_dir, manifest)) else: raise Exception('Invalid test directory: "%s"' % self.options.directory) # see if any manifests have been specified if len(self.manifest_files) == 0: raise Exception('No manifest files found.') passed = 0 failed = 0 total = 0 # run the tests from each manifest file for manifest_file in self.manifest_files: test_dir = os.path.dirname(manifest_file) manifest = json.load(open(manifest_file, 'r')) count = 1 for test in manifest['sequence']: # skip unsupported types skip = True test_type = test['@type'] for tt in test_type: if tt in SKIP_TEST_TYPES: skip = True break if tt in TEST_TYPES: skip = False if skip: # print 'Skipping test: "%s" ...' % test['name'] continue print('JSON-LD/%s %04d/%s...' % (manifest['name'], count, test['name']), end=' ') total += 1 count += 1 # read input file with open(join(test_dir, test['input'])) as f: if test['input'].endswith('.jsonld'): input = json.load(f) else: input = f.read().decode('utf8') # read expect file with open(join(test_dir, test['expect'])) as f: if test['expect'].endswith('.jsonld'): expect = json.load(f) else: expect = f.read().decode('utf8') result = None # JSON-LD options options = { 'base': 'http://json-ld.org/test-suite/tests/' + test['input'], 'useNativeTypes': True } success = False try: if 'jld:ExpandTest' in test_type: result = jsonld.expand(input, options) elif 'jld:CompactTest' in test_type: ctx = json.load(open(join(test_dir, test['context']))) result = jsonld.compact(input, ctx, options) elif 'jld:FlattenTest' in test_type: result = jsonld.flatten(input, None, options) elif 'jld:FrameTest' in test_type: frame = json.load(open(join(test_dir, test['frame']))) result = jsonld.frame(input, frame, options) elif 'jld:FromRDFTest' in test_type: result = jsonld.from_rdf(input, options) elif 'jld:ToRDFTest' in test_type: options['format'] = 'application/nquads' result = jsonld.to_rdf(input, options) elif 'jld:NormalizeTest' in test_type: options['format'] = 'application/nquads' result = jsonld.normalize(input, options) # check the expected value against the test result success = deep_compare(expect, result) if success: passed += 1 print('PASS') else: failed += 1 print('FAIL') if not success or self.options.verbose: print('Expect:', json.dumps(expect, indent=2)) print('Result:', json.dumps(result, indent=2)) except jsonld.JsonLdError as e: print('\nError: ', e) failed += 1 print('FAIL') # add EARL report assertion EARL['subjectOf'].append({ '@type': 'earl:Assertion', 'earl:assertedBy': EARL['doap:developer']['@id'], 'earl:mode': 'earl:automatic', 'earl:test': ('http://json-ld.org/test-suite/tests/' + os.path.basename(manifest_file) + test.get('@id', '')), 'earl:result': { '@type': 'earl:TestResult', 'dc:date': datetime.datetime.utcnow().isoformat(), 'earl:outcome': ('earl:' + 'passed' if success else 'failed') } }) if self.options.earl: f = open(self.options.earl, 'w') f.write(json.dumps(EARL, indent=2)) f.close() print('Done. Total:%d Passed:%d Failed:%d' % (total, passed, failed))
# see: http://json-ld.org/spec/latest/json-ld/#expanded-document-form expanded = jsonld.expand(compacted) print(json.dumps(expanded, indent=2)) # Output: # [{ # "http://schema.org/image": [{"@id": "http://manu.sporny.org/images/manu.png"}], # "http://schema.org/name": [{"@value": "Manu Sporny"}], # "http://schema.org/url": [{"@id": "http://manu.sporny.org/"}] # }] # expand using URLs jsonld.expand('http://example.org/doc') # flatten a document # see: http://json-ld.org/spec/latest/json-ld/#flattened-document-form flattened = jsonld.flatten(doc) # all deep-level trees flattened to the top-level # frame a document # see: http://json-ld.org/spec/latest/json-ld-framing/#introduction framed = jsonld.frame(doc, frame) # document transformed into a particular tree structure per the given frame # normalize a document using the RDF Dataset Normalization Algorithm # (URDNA2015), see: http://json-ld.github.io/normalization/spec/ normalized = jsonld.normalize( doc, {'algorithm': 'URDNA2015', 'format': 'application/n-quads'}) # normalized is a string that is a canonical representation of the document # that can be used for hashing, comparison, etc.
from w3lib.html import get_base_url import json from pyld_document_loader import my_requests_document_loader def get_schema_context(): r = requests.get('https://schema.org/docs/jsonldcontext.json') return r.json() def get_jsons(url): r = requests.get(url) base_url = get_base_url(r.text, r.url) data = extruct.extract(r.text, base_url=base_url) return data['json-ld'] if __name__ == '__main__': jsonld.set_document_loader(my_requests_document_loader()) ctx = get_schema_context() url = 'https://www.imdb.com/title/tt7126948/' doc = get_jsons(url) #compacted = jsonld.compact(doc, ctx) data = jsonld.flatten(doc, ctx=ctx) print(json.dumps(data, indent=2)) with open('imdb_pyld.json', 'w') as f: f.write(json.dumps(data))
def test_page(ark): global elements elements = [] if request.cookies.get("fairscapeAuth") is None: token = request.headers.get("Authorization") else: token = request.cookies.get("fairscapeAuth") data_jsonld = requests.get(EG_URL + ark, headers={ "Authorization": token }).json() if 'error' in data_jsonld.keys(): return data_jsonld['error'] # try: try: data_jsonld_flat = jsonld.flatten(data_jsonld) # except Exception as cause: raise JsonLdError('Error flattening JSON-LD content ', cause) # print("\nflattened JSON-LD content\n", json.dumps(data_jsonld_flat, indent=2)) elements = [] # contains nodes and edges nodes = [] # vertices edges = [] # links between vertices id_position = {} # mapping of each @id to a number counter = 0 # TODO: to check if http://schema.org/name missing for level in data_jsonld_flat: if level.get('@id') is None or '_:b' in level[ '@id']: # flattening generates a blank node _:b when @id is missing print('Error: found blank node for missing @id at: ', level) sys.exit() if level.get(['@type'][0]) is None: print('Error: missing @type at: ', level) sys.exit() nodes_data = {} nodes_element = {} nodes_element['id'] = counter nodes_element['@id'] = level['@id'] if os.environ.get("LOCAL", False): nodes_element['href'] = 'http://*****:*****@id'] # href in cytoscape to open as a URI else: nodes_element['href'] = MDS_URL + level[ '@id'] # href in cytoscape to open as a URI nodes_element['@type'] = level['@type'][0] nodes_element['type'] = level['@type'][ 0] # @type cannot be retrieved as node(@type) nodes_element['name'] = level['https://schema.org/name'][0]['@value'] nodes_element['info'] = 'Name: ' + level['https://schema.org/name'][0]['@value'] + '\nType: ' + level['@type'][0] \ + '\nPID: ' + level['@id'] # all attributes together nodes_data['data'] = nodes_element nodes.append(nodes_data) id_position[level['@id']] = counter counter += 1 # print('\nNodes\n', json.dumps(nodes, indent=2)) # populate edges for item in data_jsonld_flat: source_id = item[ '@id'] # chooses @id as source at each level for an edge for key, value in item.items( ): # iterates through each flattened level if isinstance(value, list): for i in value: if isinstance(i, dict): if '@id' in i.keys(): edges_data = {} edges_element = {} edges_element['source'] = id_position[source_id] edges_element['target'] = id_position[i['@id']] edges_element['label'] = key edges_data['data'] = edges_element edges.append(edges_data) # print('\nEdges\n', json.dumps(edges, indent=2)) # copies all nodes and edges inside elements elements = nodes.copy() for element in edges: elements.append(element) # Convert multiple edges such that e1(v1, v2), e2(v1, v2), e3(v1, v2) => # Multiples edges between v1, v2 such as http://schema.org/founder, http://schema.org/member become [founder, member] source = [] target = [] label = [] for edge_data in edges: for edge in edge_data.values(): for key, value in edge.items(): if key == 'source': source.append(value) if key == 'target': target.append(value) if key == 'label': label.append(value) d = {'source': source, 'target': target, 'label': label} df = pd.DataFrame(data=d) # print('\nAll Edges\n', df) df_edge_has_common_nodes = df[df.duplicated(subset=['source', 'target'], keep=False)] # print('\nEdges with common nodes\n', df_edge_has_common_nodes) df_unique = df.drop_duplicates(subset=['source', 'target'], keep=False) # print('\nUnique Edges\n', df_unique) df_merged_edge_has_common_nodes = df_edge_has_common_nodes.groupby(['source', 'target'], as_index=False) \ .agg({'label': ','.join}) # print('\nMerged unique & non-unique edges\n', df_merged_edge_has_common_nodes) uri_prefix_suffix_dict_list = [ ] # Maps uri prefix to its suffix e.g. {http://schema.org/ : member" # populate common edge labels within [...] e.g. [founder, member, ...] def get_property_labels(labels): property_list = str(labels).split(',') if len(property_list) == 1: uri_prefix_suffix_dict = {} suffix = property_list[0].split('/')[-1] prefix = property_list[0].replace(suffix, '') uri_prefix_suffix_dict[prefix] = suffix uri_prefix_suffix_dict_list.append(uri_prefix_suffix_dict) return suffix elif len(property_list) > 1: property_list_size = len(property_list) # prop_list = [] # sending as a list does not add [] around the labels props_list = '[' # this string adds the anticipated [] for prop in property_list: uri_prefix_suffix_dict = {} suffix = prop.split('/')[-1] prefix = prop.replace(suffix, '') uri_prefix_suffix_dict[prefix] = suffix uri_prefix_suffix_dict_list.append(uri_prefix_suffix_dict) # prop_list.append(suffix) property_list_size -= 1 if property_list_size > 0: props_list += suffix + ', ' else: props_list += suffix # return prop_list return props_list + ']' elements = [] # reinitialize empty nodes and edges # Populate only unique edges which are not shared between two nodes for index, row in df_unique.iterrows(): edge_data = {} edges_element = {} edges_element['source'] = row['source'] edges_element['target'] = row['target'] property_label = get_property_labels(row['label']) if property_label is None: print('ERROR: Could not find property label!') sys.exit() edges_element['label'] = property_label edge_data['data'] = edges_element elements.append(edge_data) # Populate only those edges which are shared between two vertices for index, row in df_merged_edge_has_common_nodes.iterrows(): edge_data = {} edges_element = {} edges_element['source'] = row['source'] edges_element['target'] = row['target'] property_labels = get_property_labels(row['label']) if property_labels is None: print('ERROR: Could not find property labels!') sys.exit() edges_element['label'] = property_labels edge_data['data'] = edges_element elements.append(edge_data) # print('\nRefined Edges\n', elements) # Adding the nodes def is_node_in_edges(node, edges): edge_nodes = set() for edge_data_value in edges: edge_nodes.add(edge_data_value['data']['source']) edge_nodes.add(edge_data_value['data']['target']) if node['data']['id'] in edge_nodes: return True else: return False for node in nodes: if is_node_in_edges(node, edges): elements.append(node) # except: # return "Visual Failed. Probably missing type, name, or ID" print('Made it to render.') if os.environ.get("LOCAL", False): return render_template('local_index.html') return render_template('index.html')
from pyld import jsonld import json from os.path import join doc = json.load(open(join("../json-instances/", "PDB-5AEM.jsonld"))) print("loaded jsonld", doc) context = json.load(open(join("../json-schemas/contexts/", "dataset_sdo_context.jsonld"))) print("loaded context") compacted = jsonld.compact(doc, context) print("-------------COMPACTED") print(json.dumps(compacted, indent=2)) expanded = jsonld.expand(compacted) print("-------------EXPANDED") print(json.dumps(expanded, indent=2)) flattened = jsonld.flatten(compacted)
def flatten(document): """Flatten OA JSON-LD.""" # See http://www.w3.org/TR/json-ld-api/#flattening return jsonld.flatten(document)
uri_generator=UriPatternGenerator('http://id.trees.org/types/%s'), concept_scheme=ConceptScheme('http://id.trees.org')) # Generate a doc for a cs doc = jsonld_dumper(provider, CONTEXT) msg = 'Conceptscheme' print(msg) print(len(msg) * '=') print(json.dumps(doc, indent=2)) # Print an expanded doc expanded = jsonld.expand(doc, CONTEXT) msg = 'Conceptscheme expanded' print(msg) print(len(msg) * '=') print(json.dumps(expanded, indent=2)) # Compact the doc again compacted = jsonld.compact(expanded, CONTEXT) msg = 'Conceptscheme compacted again' print(msg) print(len(msg) * '=') print(json.dumps(compacted, indent=2)) # And now flatten it flattened = jsonld.flatten(compacted, CONTEXT) msg = 'Conceptscheme flattened' print(msg) print(len(msg) * '=') print(json.dumps(flattened, indent=2))
def the_function(self, source, context, expand_context): """ actually flatten """ return jsonld.flatten(source, context, dict(expandContext=expand_context))
def _norm(self, ld): j = json.loads(ld) flat = jsonld.flatten(j) return json.dumps(flat)
# expand a document, removing its context # see: http://json-ld.org/spec/latest/json-ld/#expanded-document-form expanded = jsonld.expand(compacted) print(json.dumps(expanded, indent=2)) # Output: # { # "http://schema.org/image": [{"@id": "http://manu.sporny.org/images/manu.png"}], # "http://schema.org/name": [{"@value": "Manu Sporny"}], # "http://schema.org/url": [{"@id": "http://manu.sporny.org/"}] # } # expand using URLs jsonld.expand('http://example.org/doc') # flatten a document # see: http://json-ld.org/spec/latest/json-ld/#flattened-document-form flattened = jsonld.flatten(doc) # all deep-level trees flattened to the top-level # frame a document # see: http://json-ld.org/spec/latest/json-ld-framing/#introduction framed = jsonld.frame(doc, frame) # document transformed into a particular tree structure per the given frame # normalize a document normalized = jsonld.normalize(doc, {'format': 'application/nquads'}) # normalized is a string that is a canonical representation of the document # that can be used for hashing
def test_import_jsonld_into_named_graph(): """Test named graphs we use.""" graph = ConjunctiveGraph() serialized_document = json.dumps(jsonld.expand( jsonld.flatten(JSONLD_DOCUMENT, ), ), indent=4) graph.parse( data=serialized_document, format='json-ld', # All the semantic data about my blog is stored in a particular # named graph. publicID=PUBLIC_ID, ) assert list( map( operator.itemgetter(Variable('g')), graph.query( 'SELECT DISTINCT ?g WHERE { GRAPH ?g { ?s ?p ?o . } } ORDER BY ?g', ).bindings, )) == [ PUBLIC_ID, ] assert graph.query(''' SELECT * WHERE { ?subclass_of_robot rdfs:subClassOf ex:Robot . } ''', initNs=NAMESPACES).bindings == [{ Variable('subclass_of_robot'): URIRef('https://example.org/Rover'), }] # THIS FAILS! # The information about ex:Rover cannot be found if I specify the GRAPH. # Meaning, this information is not in one of the named graphs. assert graph.query(''' SELECT * WHERE { GRAPH ?g { ex:Rover rdfs:subClassOf ex:Robot . } } ''', initNs=NAMESPACES).bindings == [{ Variable('g'): PUBLIC_ID, }] # `publicID` was used for the part of data which was on the top level # of the document. assert graph.query(''' SELECT * WHERE { GRAPH ?g { blog:JSONLD-and-named-graphs a schema:blogPost . } } ''', initNs=NAMESPACES).bindings == [{ Variable('g'): PUBLIC_ID, }]
from pyld import jsonld import json import os doc = json.load(open(os.join(".." + os.sep + "json-instances" + os.sep + "", "PDB-5AEM.jsonld"))) print("loaded jsonld", doc) context = json.load(open(os.join(".." + os.sep + "json-schemas" + os.sep + "contexts" + os.sep, "dataset_sdo_context.jsonld"))) print("loaded context") compacted = jsonld.compact(doc, context) print("-------------COMPACTED") print(json.dumps(compacted, indent=2)) expanded = jsonld.expand(compacted) print("-------------EXPANDED") print(json.dumps(expanded, indent=2)) flattened = jsonld.flatten(compacted)
def main(self): print('PyLD Unit Tests') print('Use -h or --help to view options.') # add program options self.parser.add_option('-f', '--file', dest='file', help='The single test file to run', metavar='FILE') self.parser.add_option('-d', '--directory', dest='directory', help='The directory full of test files', metavar='DIR') self.parser.add_option('-e', '--earl', dest='earl', help='The filename to write the EARL report to', metavar='EARL') self.parser.add_option('-v', '--verbose', dest='verbose', action='store_true', default=False, help='Prints verbose test data') # parse options (self.options, args) = self.parser.parse_args() # check if file or directory were specified if self.options.file == None and self.options.directory == None: raise Exception('No test file or directory specified.') # check if file was specified, exists, and is file if self.options.file is not None: if (os.path.exists(self.options.file) and os.path.isfile(self.options.file)): # add manifest file to the file list self.manifest_files.append(os.path.abspath(self.options.file)) else: raise Exception('Invalid test file: "%s"' % self.options.file) # check if directory was specified, exists and is dir if self.options.directory is not None: if (os.path.exists(self.options.directory) and os.path.isdir(self.options.directory)): # load manifest files from test directory for test_dir, dirs, files in os.walk(self.options.directory): for manifest in files: # add all .jsonld manifest files to the file list if (manifest.find('manifest') != -1 and manifest.endswith('.jsonld')): self.manifest_files.append( join(test_dir, manifest)) else: raise Exception('Invalid test directory: "%s"' % self.options.directory) # see if any manifests have been specified if len(self.manifest_files) == 0: raise Exception('No manifest files found.') passed = 0 failed = 0 total = 0 # run the tests from each manifest file for manifest_file in self.manifest_files: test_dir = os.path.dirname(manifest_file) manifest = json.load(open(manifest_file, 'r')) count = 1 for test in manifest['sequence']: # skip unsupported types skip = True test_type = test['@type'] for tt in test_type: if tt in SKIP_TEST_TYPES: skip = True break if tt in TEST_TYPES: skip = False if skip: # print 'Skipping test: "%s" ...' % test['name'] continue print('JSON-LD/%s %04d/%s...' % ( manifest['name'], count, test['name']), end=' ') total += 1 count += 1 # read input file with open(join(test_dir, test['input'])) as f: if test['input'].endswith('.jsonld'): input = json.load(f) else: input = f.read().decode('utf8') # read expect file with open(join(test_dir, test['expect'])) as f: if test['expect'].endswith('.jsonld'): expect = json.load(f) else: expect = f.read().decode('utf8') result = None # JSON-LD options options = { 'base': 'http://json-ld.org/test-suite/tests/' + test['input'], 'useNativeTypes': True } success = False try: if 'jld:ExpandTest' in test_type: result = jsonld.expand(input, options) elif 'jld:CompactTest' in test_type: ctx = json.load(open(join(test_dir, test['context']))) result = jsonld.compact(input, ctx, options) elif 'jld:FlattenTest' in test_type: result = jsonld.flatten(input, None, options) elif 'jld:FrameTest' in test_type: frame = json.load(open(join(test_dir, test['frame']))) result = jsonld.frame(input, frame, options) elif 'jld:FromRDFTest' in test_type: result = jsonld.from_rdf(input, options) elif 'jld:ToRDFTest' in test_type: options['format'] = 'application/nquads' result = jsonld.to_rdf(input, options) elif 'jld:NormalizeTest' in test_type: options['format'] = 'application/nquads' result = jsonld.normalize(input, options) # check the expected value against the test result success = deep_compare(expect, result) if success: passed += 1 print('PASS') else: failed += 1 print('FAIL') if not success or self.options.verbose: print('Expect:', json.dumps(expect, indent=2)) print('Result:', json.dumps(result, indent=2)) except jsonld.JsonLdError as e: print('\nError: ', e) failed += 1 print('FAIL') # add EARL report assertion EARL['subjectOf'].append({ '@type': 'earl:Assertion', 'earl:assertedBy': EARL['doap:developer']['@id'], 'earl:mode': 'earl:automatic', 'earl:test': ('http://json-ld.org/test-suite/tests/' + os.path.basename(manifest_file) + test.get('@id', '')), 'earl:result': { '@type': 'earl:TestResult', 'dc:date': datetime.datetime.utcnow().isoformat(), 'earl:outcome': ('earl:' + 'passed' if success else 'failed') } }) if self.options.earl: f = open(self.options.earl, 'w') f.write(json.dumps(EARL, indent=2)) f.close() print('Done. Total:%d Passed:%d Failed:%d' % (total, passed, failed))
import os import sys import json from pyld import jsonld from pyld_document_loader import my_requests_document_loader if __name__ == '__main__': fjson = sys.argv[1] with open(fjson, 'r') as f: doc = json.loads(f.read()) jsonld.set_document_loader(my_requests_document_loader()) data = jsonld.flatten(doc) jpath, jfile = os.path.split(fjson) flatten_file = f'{jpath}{os.path.sep}flatten_{jfile}' with open(flatten_file,'w') as f: f.write(json.dumps(data))