def _add_ld_keys(rsc: [Resource, Dict], context: Optional[Union[Dict, List, str]], base: Optional[str]) -> Dict: local_attrs = dict() local_context = None items = rsc.__dict__.items() if isinstance(rsc, Resource) else rsc.items() for k, v in items: if k not in Resource._RESERVED: if k == "context": if v != context: local_context = Context(v) base = local_context.base else: key = LD_KEYS.get(k, k) if key == "@id" and local_context is not None: local_attrs[key] = local_context.resolve(v) else: if isinstance(v, Resource) or isinstance(v, Dict): local_attrs[key] = _add_ld_keys(v, context, base) elif isinstance(v, list): local_attrs[key] = [ _add_ld_keys(item, context, base) if isinstance(item, Resource) or isinstance(item, Dict) else item for item in v ] else: if isinstance(v, LazyAction): raise ValueError( "can't convert, resource contains LazyActions") local_attrs[key] = v.replace( base, "") if base and isinstance(v, str) else v return local_attrs
def test_load_context_from_url(): context_url = "https://json-ld.org/contexts/person.jsonld" context = Context(context_url, context_url) assert is_valid_document(context.document) assert context.expand("affiliation") == "http://schema.org/affiliation" assert context.iri == context_url assert context.is_http_iri() is True
def __init__(self, endpoint: str, org: str, prj: str, token: str, model_context: Context, max_connections: int): nexus.config.set_environment(endpoint) nexus.config.set_token(token) self.organisation = org self.project = prj self.model_context = model_context self.context = Context(self.get_project_context()) self.context_cache: Dict = dict() self.metadata_context = Context(self.resolve_context(NEXUS_CONTEXT), NEXUS_CONTEXT) self.max_connections = max_connections self.headers = { "Authorization": "Bearer " + token, "Content-Type": "application/ld+json", "Accept": "application/ld+json" } self.headers_sparql = { "Authorization": "Bearer " + token, "Content-Type": "application/sparql-query", "Accept": "application/ld+json" } self.url_resources = "/".join( (endpoint, "resources", quote_plus(org), quote_plus(prj))) self.url_files = "/".join( (endpoint, "files", quote_plus(org), quote_plus(prj))) self.sparql_endpoint = "/".join( (endpoint, "views", quote_plus(org), quote_plus(prj), "nxv:defaultSparqlIndex", "sparql")) # This async to work on jupyter notebooks nest_asyncio.apply()
def _remove_ld_keys( dictionary: dict, context: Context, to_resource: Optional[bool] = True) -> Union[Dict, Resource]: local_attrs = dict() for k, v in dictionary.items(): if k == "@context": if v != context: local_attrs["context"] = v else: if k == "@id": local_attrs["id"] = context.resolve(v) elif k.startswith("@") and k in LD_KEYS.values(): local_attrs[k[1:]] = v else: if isinstance(v, dict): local_attrs[k] = _remove_ld_keys(v, context, to_resource) elif isinstance(v, list): local_attrs[k] = [ _remove_ld_keys(item, context, to_resource) if isinstance(item, dict) else item for item in v ] else: if k in context.terms: if context.terms[k].type == "@id": v = context.shrink_iri(v) local_attrs[k] = v if to_resource: return Resource(**local_attrs) else: return local_attrs
def test_load_context_from_file(context_file_path, context_iri_file): with open(context_file_path) as f: context_json = json.loads(f.read()) context = Context(context_iri_file, context_iri_file) assert is_valid_document(context.document) assert context.document == context_json assert context.iri is context_iri_file assert context.is_http_iri() is False
def __init__(self, graph: Graph, context_iri: Optional[str] = None) -> None: if context_iri is None: raise ConfigurationError(f"RdfModel requires a context") self._graph = graph self._context_cache = dict() self.classes_to_shapes = self._build_shapes_map() resolved_context = self.resolve_context(context_iri) self.context = Context(resolved_context, context_iri) self.types_to_shapes: Dict = self._build_types_to_shapes()
def test_load_context_from_list(custom_context, context_iri_file, model_prefixes): test_context = _merge_jsonld(custom_context, context_iri_file) context = Context(test_context) assert is_valid_document(context.document) assert context.iri is None assert context.is_http_iri() is False assert context.base == "http://example.org/" assert context.vocab == "http://example.org/vocab/" assert context.expand("Person") == "http://schema.org/Person" assert context.prefixes == model_prefixes
def __init__(self, endpoint: str, org: str, prj: str, token: str, model_context: Context, max_connections: int, searchendpoints: Dict): nexus.config.set_environment(endpoint) nexus.config.set_token(token) self.endpoint = endpoint self.organisation = org self.project = prj self.model_context = model_context self.context = Context(self.get_project_context()) self.context_cache: Dict = dict() self.max_connections = max_connections self.headers = { "Authorization": "Bearer " + token, "Content-Type": "application/ld+json", "Accept": "application/ld+json" } self.headers_sparql = { "Authorization": "Bearer " + token, "Content-Type": "application/sparql-query", "Accept": "application/ld+json" } self.headers_upload = { "Authorization": "Bearer " + token, "Accept": "application/ld+json", } self.headers_download = { "Authorization": "Bearer " + token, "Accept": "*/*" } self.url_files = "/".join( (self.endpoint, "files", quote_plus(org), quote_plus(prj))) self.url_resources = "/".join( (self.endpoint, "resources", quote_plus(org), quote_plus(prj))) self.metadata_context = Context(self.resolve_context(NEXUS_CONTEXT), NEXUS_CONTEXT) sparql_view = searchendpoints['sparql'][ 'endpoint'] if searchendpoints and "sparql" in searchendpoints else "nxv:defaultSparqlIndex" self.sparql_endpoint = "/".join( (self.endpoint, "views", quote_plus(org), quote_plus(prj), quote_plus(sparql_view), "sparql")) # The following code is for async to work on jupyter notebooks try: asyncio.get_event_loop() nest_asyncio.apply() except RuntimeError: pass
def _make_jsonld_expanded(resource, store_metadata, context): data = dict() if hasattr(resource, "id"): data["@id"] = resource.id ctx = Context(resource.context) if hasattr(resource, "context") else Context(context) latitude_term = ctx.terms.get("latitude") if latitude_term.type: latitude_node = { "@type": latitude_term.type, "@value": resource.geo["latitude"] } else: latitude_node = resource.geo["latitude"] geo_expanded = { latitude_term.id: latitude_node } data.update({ "@type": ctx.expand(resource.type), ctx.expand("description"): resource.description, ctx.expand("geo"): geo_expanded, ctx.expand("image"): {"@id": resource.image}, ctx.expand("name"): resource.name }) if store_metadata and resource._store_metadata is not None: data.update(metadata_data_expanded) return data
def context(self): document = { "@context": { "contribution": { "@id": "https://neuroshapes.org/contribution", "@type": "@id" }, "agent": { "@id": "http://www.w3.org/ns/prov#agent", "@type": "@id" }, "Person": "http://schema.org/Person", "address": "http://schema.org/address", "name": "http://schema.org/name", "postalCode": "http://schema.org/postalCode", "streetAddress": "http://schema.org/streetAddress", "deprecated": "https://bluebrain.github.io/nexus/vocabulary/deprecated" } } return Context(document)
def resolve_context(self, iri: str, local_only: Optional[bool] = False) -> Dict: if iri in self.context_cache: return self.context_cache[iri] try: context_to_resolve = self.store_local_context if iri == self.store_context else iri url = "/".join((self.url_resolver, "_", quote_plus(context_to_resolve))) response = requests.get(url, headers=self.headers) response.raise_for_status() resource = response.json() except Exception as e: if local_only is False: try: context = Context(context_to_resolve) except URLError: raise ValueError(f"{context_to_resolve} is not resolvable") else: document = context.document["@context"] else: raise ValueError(f"{context_to_resolve} is not resolvable") else: document = json.loads(json.dumps(resource["@context"])) if isinstance(document, list): if self.store_context in document: document.remove(self.store_context) if self.store_local_context in document: document.remove(self.store_local_context) self.context_cache.update({context_to_resolve: document}) return document
def resolve_context(self, iri: str, local_only: Optional[bool] = False) -> Dict: if iri in self.context_cache: return self.context_cache[iri] try: context_id = NEXUS_CONTEXT_SOURCE if iri == NEXUS_CONTEXT else quote_plus( iri) url = "/".join((self.url_resources, "_", context_id)) response = requests.get(url, headers=self.headers) response.raise_for_status() resource = response.json() except Exception as e: if local_only is False: try: context = Context(iri) except URLError: raise ValueError(f"{iri} is not resolvable") else: document = context.document["@context"] else: raise ValueError(f"{iri} is not resolvable") else: document = json.loads(json.dumps(resource["@context"])) self.context_cache.update({iri: document}) return document
def _from_jsonld_one(data: Dict) -> Resource: if "@context" in data: try: resolved_context = Context(data["@context"]) except URLError: raise ValueError("context not resolvable") else: return _remove_ld_keys(data, resolved_context) else: raise NotImplementedError("not implemented yet (expanded json-ld)")
def _make_jsonld_compacted(r, store_metadata): data = dict() data["@context"] = model_context.iri if model_context.is_http_iri( ) else model_context.document["@context"] if hasattr(r, "id"): data["@id"] = r.id nested_context = Context(custom_context) data.update({ "@type": r.type, "name": r.name, "founder": { "@type": r.founder.type, "@id": nested_context.resolve(r.founder.id), "name": r.founder.name } }) if hasattr(r, "id") and store_metadata: data.update(metadata_data_compacted) return data
def resolve_context(self, iri: str) -> Dict: if iri in self._context_cache: return self._context_cache[iri] else: try: context = Context(iri) except FileNotFoundError as e: raise ValueError(e) else: self._context_cache.update({iri: context.document}) return context.document
def _as_graphs(resource: Resource, store_metadata: bool, context: Context, metadata_context: Context) -> Tuple[Graph, Graph, Dict]: """Returns a data and a metadata graph""" if hasattr(resource, "context"): output_context = resource.context else: output_context = context.iri if context.is_http_iri( ) else context.document["@context"] converted = _add_ld_keys(resource, output_context, context.base) converted["@context"] = context.document["@context"] return _dicts_to_graph(converted, resource._store_metadata, store_metadata, metadata_context) + (converted, )
def _resource_context(resource: Resource, model_context: Context, context_resolver: Callable) -> Context: if hasattr(resource, "context"): if model_context and resource.context == model_context.iri: context = model_context else: iri = resource.context if isinstance(resource.context, str) else None try: document = recursive_resolve(resource.context, context_resolver) context = Context(document, iri) except (HTTPError, URLError, NotSupportedError): try: context = Context(resource.context, iri) except URLError: raise ValueError(f"{resource.context} is not resolvable") else: context = model_context if context is None: raise NotSupportedError("no available context") return context
def rewrite_sparql(query: str, context: Context) -> str: """Rewrite local property and type names from Model.template() as IRIs. Local names are mapped to IRIs by using a JSON-LD context, i.e. { "@context": { ... }} from a kgforge.core.commons.Context. In the case of contexts using prefixed names, prefixes are added to the SPARQL query prologue. In the case of non available contexts and vocab then the query is returned unchanged. """ ctx = { k: v["@id"] if isinstance(v, Dict) else v for k, v in context.document["@context"].items() } prefixes = context.prefixes has_prefixes = prefixes is not None and len(prefixes.keys()) > 0 if ctx.get("type") == "@type": if "rdf" in prefixes: ctx["type"] = "rdf:type" else: ctx["type"] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" def replace(match: Match) -> str: m4 = match.group(4) if m4 is None: return match.group(0) else: v = ctx.get( m4, ":" + m4) if str(m4).lower() not in SPARQL_CLAUSES and not str( m4).startswith("https") and context.has_vocab() else m4 m5 = match.group(5) if "//" in v: return f"<{v}>{m5}" else: return f"{v}{m5}" g4 = r"([a-zA-Z_]+)" g5 = r"([.;]?)" g0 = rf"((?<=[\s,[(/|!^])((a|true|false)|{g4}){g5}(?=[\s,\])/|?*+]))" g6 = r"(('[^']+')|('''[^\n\r]+''')|(\"[^\"]+\")|(\"\"\"[^\n\r]+\"\"\"))" rx = rf"{g0}|{g6}|(?<=< )(.*)(?= >)" qr = re.sub(rx, replace, query, flags=re.VERBOSE) if not has_prefixes or "prefix" in str(qr).lower(): return qr else: pfx = "\n".join(f"PREFIX {k}: <{v}>" for k, v in prefixes.items()) if context.has_vocab(): pfx = "\n".join([pfx, f"PREFIX : <{context.vocab}>"]) return f"{pfx}\n{qr}"
def resolve_context(self, iri: str, local_only: Optional[bool] = False) -> Dict: if iri in self.context_cache: return self.context_cache[iri] try: resource = nexus.resources.fetch(self.organisation, self.project, iri) except nexus.HTTPError: if local_only is False: try: context = Context(iri) except URLError: raise ValueError(f"{iri} is not resolvable") else: document = context.document["@context"] else: raise ValueError(f"{iri} is not resolvable") else: document = json.loads(json.dumps(resource["@context"])) self.context_cache.update({iri: document}) return document
def test_rewrite_sparql_unknownterm_missing_vocab(custom_context): context_object = Context(document=custom_context) assert not context_object.has_vocab() with pytest.raises(QueryingError): query = "SELECT ?x WHERE { Graph ?g { ?id propertyNotInContext/name/anotherPropertyNotInContext ?x }}" rewrite_sparql(query, context_object)
class RdfService: def __init__(self, graph: Graph, context_iri: Optional[str] = None) -> None: if context_iri is None: raise ConfigurationError(f"RdfModel requires a context") self._graph = graph self._context_cache = dict() self.classes_to_shapes = self._build_shapes_map() resolved_context = self.resolve_context(context_iri) self.context = Context(resolved_context, context_iri) self.types_to_shapes: Dict = self._build_types_to_shapes() def schema_source_id(self, schema_iri: str) -> str: # POLICY Should return the id of the resource containing the schema raise NotImplementedError() @abstractmethod def materialize(self, iri: URIRef) -> NodeProperties: """Triggers the collection of properties of a given Shape node Args: iri: the URI of the node to start collection Returns: A NodeProperty object with the collected properties """ raise NotImplementedError() def validate(self, resource: Resource): try: shape_iri = self.types_to_shapes[resource.type] except AttributeError: raise TypeError("resource requires a type attribute") else: data_graph = as_graph(resource, False, self.context, None, None) return self._validate(shape_iri, data_graph) @abstractmethod def _validate(self, iri: str, data_graph: Graph) -> Tuple[bool, Graph, str]: raise NotImplementedError() @abstractmethod def resolve_context(self, iri: str) -> Dict: """For a given IRI return its resolved context recursively""" raise NotImplementedError() @abstractmethod def generate_context(self) -> Dict: """Generates a JSON-LD context with the classes and terms present in the SHACL graph.""" raise NotImplementedError() @abstractmethod def _build_shapes_map(self) -> Dict: """Queries the source and returns a map of owl:Class to sh:NodeShape""" raise NotImplementedError() def _build_types_to_shapes(self): """Iterates the classes_to_shapes dictionary to create a term to shape dictionary filtering the terms available in the context """ types_to_shapes: Dict = dict() for k, v in self.classes_to_shapes.items(): term = self.context.find_term(str(k)) if term: key = term.name if term.name not in types_to_shapes: types_to_shapes[term.name] = v else: print("WARN: duplicated term", key, k, [key], v) return types_to_shapes def _generate_context(self) -> Dict: """Materializes all Types into templates and parses the templates to generate a context""" # FIXME: the status of this function is experimental # TODO: check if there are conflicting terms, and throw error context = dict() prefixes = dict() types_ = dict() terms = dict() def traverse_properties(properties) -> Tuple[Dict, Dict]: l_prefixes = dict() l_terms = dict() for property_ in properties: if hasattr(property_, "path"): if property_.path != RDF.type and str(property_.path) != "id": v_prefix, v_namespace, v_name = self._graph.compute_qname(property_.path) l_prefixes.update({v_prefix: str(v_namespace)}) term_obj = {"@id": ":".join((v_prefix, v_name))} if hasattr(property_, "id"): term_obj.update({"@type": "@id"}) if hasattr(property_, "values"): if isinstance(property_.values, str) or len(property_.values) == 1: if isinstance(property_.values, list): obj_type = property_.values[0] else: obj_type = property_.values if obj_type in target_classes: term_obj.update({"@type": "@id"}) else: try: px, ns, n = self.graph.compute_qname(obj_type) l_prefixes.update({px: str(ns)}) if str(ns) == str(XSD): term_obj.update({"@type": ":".join((px, n))}) except Exception: pass l_terms.update({v_name: term_obj}) if hasattr(property_, "properties"): l_p, l_t = traverse_properties(property_.properties) l_prefixes.update(l_p) l_terms.update(l_t) return l_prefixes, l_terms target_classes = list() for k in self.classes_to_shapes.keys(): key = as_term(k) if key not in target_classes: target_classes.append(key) else: # TODO: should this raise an error? print("duplicated term", key, k) for type_, shape in self.classes_to_shapes.items(): t_prefix, t_namespace, t_name = self._graph.compute_qname(type_) prefixes.update({t_prefix: str(t_namespace)}) types_.update({t_name: {"@id": ":".join((t_prefix, t_name))}}) node = self.materialize(shape) if hasattr(node, "properties"): p, t = traverse_properties(node.properties) prefixes.update(p) terms.update(t) context.update({key: prefixes[key] for key in sorted(prefixes)}) context.update({key: types_[key] for key in sorted(types_)}) context.update({key: terms[key] for key in sorted(terms)}) return {"@context": context} if len(context) > 0 else None
def test_rewrite_sparql(query, expected): context_object = Context(document=context) result = rewrite_sparql(query, context_object) assert result == expected
def _generate_context(self) -> Context: document = self.service.generate_context() if document: return Context(document)
def test_load_context_fail(): context_url = "https://unresolvable.context.org" with pytest.raises(URLError): Context(context_url)
def __init__(self, endpoint: str, org: str, prj: str, token: str, model_context: Context, max_connection: int, searchendpoints: Dict, store_context: str, store_local_context: str, namespace: str, project_property: str, deprecated_property: bool, content_type: str, accept: str, files_upload_config: Dict, files_download_config: Dict, **params): nexus.config.set_environment(endpoint) self.endpoint = endpoint self.organisation = org self.project = prj self.model_context = model_context self.context_cache: Dict = dict() self.max_connection = max_connection self.params = copy.deepcopy(params) self.store_context = store_context self.store_local_context = store_local_context self.namespace = namespace self.project_property = project_property self.deprecated_property = deprecated_property self.default_sparql_index = f"{self.namespace}defaultSparqlIndex" self.default_es_index = f"{self.namespace}defaultElasticSearchIndex" self.headers = { "Content-Type": content_type, "Accept": accept } sparql_config = searchendpoints['sparql'] if searchendpoints and "sparql" in searchendpoints else None elastic_config = searchendpoints['elastic'] if searchendpoints and "elastic" in searchendpoints else None self.headers_sparql = { "Content-Type": sparql_config['Content-Type'] if sparql_config and 'Content-Type' in sparql_config else "text/plain", "Accept": sparql_config['Accept'] if sparql_config and 'Accept' in sparql_config else "application/sparql-results+json" } self.headers_elastic = { "Content-Type": elastic_config['Content-Type'] if elastic_config and 'Content-Type' in elastic_config else "application/json", "Accept": elastic_config['Accept'] if elastic_config and 'Accept' in elastic_config else "application/json" } self.headers_upload = { "Accept": files_upload_config.pop("Accept"), } self.headers_download = { "Accept": files_download_config.pop("Accept") } if token is not None: nexus.config.set_token(token) self.headers["Authorization"] = "Bearer " + token self.headers_sparql["Authorization"] = "Bearer " + token self.headers_elastic["Authorization"] = "Bearer " + token self.headers_upload["Authorization"] = "Bearer " + token self.headers_download["Authorization"] = "Bearer " + token self.context = Context(self.get_project_context()) self.url_base_files = "/".join((self.endpoint, "files")) self.url_files = "/".join((self.url_base_files, quote_plus(org), quote_plus(prj))) self.url_resources = "/".join((self.endpoint, "resources", quote_plus(org), quote_plus(prj))) self.url_resolver = "/".join((self.endpoint,"resolvers", quote_plus(org), quote_plus(prj))) self.metadata_context = Context(recursive_resolve(self.store_context, self.resolve_context), store_context) sparql_view = sparql_config['endpoint'] if sparql_config and "endpoint" in sparql_config else self.default_sparql_index elastic_view = elastic_config['endpoint'] if elastic_config and "endpoint" in elastic_config else self.default_es_index self.sparql_endpoint = dict() self.elastic_endpoint = dict() self.sparql_endpoint["endpoint"] = "/".join((self.endpoint, "views", quote_plus(org), quote_plus(prj), quote_plus(sparql_view), "sparql")) self.sparql_endpoint["type"] = "sparql" self.elastic_endpoint["endpoint"] = "/".join((self.endpoint, "views", quote_plus(org), quote_plus(prj), quote_plus(elastic_view), "_search")) self.elastic_endpoint["type"] = "elastic" # The following code is for async to work on jupyter notebooks try: asyncio.get_event_loop() nest_asyncio.apply() except RuntimeError: pass
def model_context(context_iri_file) -> Context: return Context(context_iri_file, context_iri_file)
def metadata_context() -> Context: document = { "deprecated": "https://store.net/vocabulary/deprecated", "version": "https://store.net/vocabulary/version" } return Context(document, "http://store.org/metadata.json")
def test_rewrite_sparql(query, expected): prefixes_string_vocab = "\n".join( [prefixes_string, f"PREFIX : <http://example.org/vocab/>"]) context_object = Context(document=context) result = rewrite_sparql(query, context_object) assert result == prefixes_string_vocab + expected
def test_load_context_as_dict(custom_context): context = Context(custom_context) assert is_valid_document(context.document) assert context.document == custom_context assert context.iri is None assert context.is_http_iri() is False
def test_rewrite_sparql_missingvocab(custom_context): query = "SELECT ?name WHERE { <http://exaplpe.org/1234> name ?name }" expected = "PREFIX foaf: <http://xmlns.com/foaf/0.1/>\nSELECT ?name WHERE { <http://exaplpe.org/1234> foaf:name ?name }" context_object = Context(document=custom_context) result = rewrite_sparql(query, context_object) assert result == expected