def _populate_graph(self): graph = ConjunctiveGraph(identifier=self._generate_namedgraph_uri()) graph.namespace_manager = namespace_manager subject = URIRef(self._get_document_uri()) graph.add((subject, RDFS.isDefinedBy, URIRef(self._generate_about_uri()))) self._add_about_triples(graph) graph.add((subject, RDF.type, self.ns[self.get_rdf_type()])) if self.source_uri and self.source_uri != self.document_uri: graph.add((subject, OWL.sameAs, URIRef(self.source_uri))) for key, value in self.get_graph_mapping().items(): if isinstance(key, URIRef): predicate = key elif isinstance(key, str) and key.startswith('http://'): predicate = URIRef(key) elif isinstance(key, str) and ":" in key: ns, label = key.split(":") ns = self.ns_dict.get(ns) predicate = URIRef("{}/{}".format(str(ns).rstrip('/'), label)) else: raise ValueError("unknown predicate key in mapping dict: {} => ".format(key, value)) if type(value) in [str, float, int] and value: if isinstance(value, str) and any([value.startswith(uri_prefix) for uri_prefix in ["http", "urn"]]): value = URIRef(value) else: value = Literal(value) elif type(value) in [Literal, URIRef]: value = value else: logger.warn("Unsupported datatype {} for value {}".format(type(value), value)) if value: graph.add((subject, predicate, value)) graph.namespace_manager = namespace_manager return graph
def parse(self, source, graph, encoding="utf-8"): if encoding not in [None, "utf-8"]: raise Exception( ("TriG files are always utf-8 encoded, ", "I was passed: %s") % encoding) # we're currently being handed a Graph, not a ConjunctiveGraph assert graph.store.context_aware, "TriG Parser needs a context-aware store!" conj_graph = ConjunctiveGraph(store=graph.store) conj_graph.default_context = graph # TODO: CG __init__ should have a # default_context arg # TODO: update N3Processor so that it can use conj_graph as the sink conj_graph.namespace_manager = graph.namespace_manager sink = RDFSink(conj_graph) baseURI = conj_graph.absolutize(source.getPublicId() or source.getSystemId() or "") p = TrigSinkParser(sink, baseURI=baseURI, turtle=True) p.loadStream(source.getByteStream()) for prefix, namespace in p._bindings.items(): conj_graph.bind(prefix, namespace)
def get_graph_from_sparql_results(sparql_json, named_graph=None): if len(sparql_json['results']['bindings']) == 0: return ConjunctiveGraph(), 0 sparql_vars = sparql_json['head']['vars'] if 'g' in sparql_vars: if not named_graph: named_graph = sparql_json['results']['bindings'][0]['g']['value'] sparql_vars.remove('g') triple_levels = RDFModel.get_context_triples(sparql_json['head']['vars']) nr_levels = len(triple_levels) if named_graph: named_graph = URIRef(named_graph) graph = ConjunctiveGraph(identifier=named_graph) graph.namespace_manager = namespace_manager for binding in sparql_json['results']['bindings']: binding_levels = RDFModel.get_context_levels(len(binding.keys())) for s, p, o in triple_levels[:binding_levels]: subject = URIRef(binding[s]['value']) if binding[s]['type'] == 'bnode': subject = BNode(binding[s]['value']) predicate = URIRef(binding[p]['value']) obj = RDFModel.get_object_from_sparql_result(binding[o]) graph.add((subject, predicate, obj)) # materialize inferences for subject, obj in graph.subject_objects( predicate=URIRef("http://www.openarchives.org/ore/terms/isAggregatedBy")): graph.add((obj, URIRef("http://www.openarchives.org/ore/terms/aggregates"), subject)) graph.remove((subject, URIRef("http://www.openarchives.org/ore/terms/isAggregatedBy"), obj)) return graph, nr_levels
def parse(self, source, graph, encoding="utf-8"): if encoding not in [None, "utf-8"]: raise Exception( ("TriG files are always utf-8 encoded, ", "I was passed: %s") % encoding) # we're currently being handed a Graph, not a ConjunctiveGraph assert graph.store.context_aware, "TriG Parser needs a context-aware store!" conj_graph = ConjunctiveGraph(store=graph.store, identifier=graph.identifier) conj_graph.default_context = graph # TODO: CG __init__ should have a # default_context arg # TODO: update N3Processor so that it can use conj_graph as the sink conj_graph.namespace_manager = graph.namespace_manager sink = RDFSink(conj_graph) baseURI = conj_graph.absolutize( source.getPublicId() or source.getSystemId() or "") p = TrigSinkParser(sink, baseURI=baseURI, turtle=True) p.loadStream(source.getByteStream()) for prefix, namespace in p._bindings.items(): conj_graph.bind(prefix, namespace)
def getSubgraph(g, subject, max_depth=100): """ Retrieve the subgraph of g with subject. Given the graph ``g``, extract the subgraph identified as the object of the triple with subject ``subject``. Args: g (Graph): Source graph subject (URIRef): Subject of the root of the subgraph to retrieve max_depth (integer): Maximum recursion depth Returns: (Graph) The subgraph of g with subject. Example: .. jupyter-execute:: examples/code/eg_getsubgraph_01.py """ sg = ConjunctiveGraph() sg.namespace_manager = NamespaceManager(g) sg += g.triples((subject, None, None)) inflateSubgraph(g, sg, sg, max_depth=max_depth) return sg
def get_graph(self, with_mappings=False, include_mapping_target=False, acceptance=False, target_uri=None): """Get Graph instance of this EDMRecord. :param target_uri: target_uri if you want a sub-selection of the whole graph :param acceptance: if the acceptance data should be listed :param include_mapping_target: Boolean also include the mapping target triples in graph :param with_mappings: Boolean integrate the ProxyMapping into the graph """ rdf_string = self.source_rdf if acceptance and self.acceptance_rdf: rdf_string = self.acceptance_rdf graph = ConjunctiveGraph(identifier=self.named_graph) graph.namespace_manager = namespace_manager graph.parse(data=rdf_string, format='nt') if with_mappings: proxy_resources, graph = ProxyResource.update_proxy_resource_uris(self.dataset, graph) self.proxy_resources.add(*proxy_resources) for proxy_resource in proxy_resources: graph = graph + proxy_resource.to_graph(include_mapping_target=include_mapping_target) if target_uri and not target_uri.endswith("/about") and target_uri != self.document_uri: g = Graph(identifier=URIRef(self.named_graph)) subject = URIRef(target_uri) for p, o in graph.predicate_objects(subject=subject): g.add((subject, p, o)) graph = g return graph
def __init__(self, endpoint): graph = ConjunctiveGraph('SPARQLStore') graph.open(endpoint) graph.namespace_manager = ns_mgr self.graph = graph self.default_graph = \ 'http://vitro.mannlib.cornell.edu/default/vitro-kb-2'
def catalyst_graph_for(file): if file.startswith('/'): file = 'file://'+file logging.info("InferenceStore catalyst_graph_for started") # quads = jsonld.to_rdf(file, {'format': 'application/nquads'}) logging.info("InferenceStore JSON-LD loaded") g = ConjunctiveGraph() g.namespace_manager = namespace_manager # g.parse(data=quads, format='nquads') g.load(file, format="json-ld") logging.info("InferenceStore base graph loaded") f = FuXiInferenceStore.get_instance() # get the inference engine cl = f.get_inference(g) logging.info("InferenceStore inference graph loaded") union_g = rdflib.ConjunctiveGraph() for s,p,o in g.triples( (None, None, None) ): union_g.add( (s,p,o) ) for s,p,o in cl.triples( (None, None, None) ): union_g.add( (s,p,o) ) logging.info("InferenceStore union graph prepared") return union_g
def catalyst_graph_for(file): if file.startswith("/"): file = "file://" + file logging.info("InferenceStore catalyst_graph_for started") # quads = jsonld.to_rdf(file, {'format': 'application/nquads'}) logging.info("InferenceStore JSON-LD loaded") g = ConjunctiveGraph() g.namespace_manager = namespace_manager # g.parse(data=quads, format='nquads') g.load(file, format="json-ld") logging.info("InferenceStore base graph loaded") f = FuXiInferenceStore.get_instance() # get the inference engine cl = f.get_inference(g) logging.info("InferenceStore inference graph loaded") union_g = rdflib.ConjunctiveGraph() for s, p, o in g.triples((None, None, None)): union_g.add((s, p, o)) for s, p, o in cl.triples((None, None, None)): union_g.add((s, p, o)) logging.info("InferenceStore union graph prepared") return union_g
def loadSOGraph( filename=None, data=None, publicID=None, normalize=True, deslop=True, format="json-ld", ): """ Load RDF string or file to an RDFLib ConjunctiveGraph Creates a ConjunctiveGraph from the provided file or text. If both are provided then text is used. NOTE: Namespace use of ``<http://schema.org>``, ``<https://schema.org>``, or ``<http://schema.org/>`` is normalized to ``<https://schema.org/>`` if ``normalize`` is True. NOTE: Case of ``SO:`` properties in `SO_TERMS` is adjusted consistency if ``deslop`` is True Args: filename (string): path to RDF file on disk data (string): RDF text publicID (string): (from rdflib) The logical URI to use as the document base. If None specified the document location is used. normalize (boolean): Normalize the use of schema.org namespace deslop (boolean): Adjust schema.org terms for case consistency format (string): The serialization format of the RDF to load Returns: ConjunctiveGraph: The loaded graph Example: .. jupyter-execute:: examples/code/eg_loadsograph_01.py """ g = ConjunctiveGraph() if data is not None: g.parse(data=data, format=format, publicID=publicID) elif filename is not None: g.parse(filename, format=format, publicID=publicID) if not (normalize or deslop): return g # Now normalize the graph namespace use to https://schema.org/ ns = NamespaceManager(g) ns.bind(SO_PREFIX, SCHEMA_ORG, override=True, replace=True) g2 = ConjunctiveGraph() g2.namespace_manager = ns for s, p, o in g: trip = [s, p, o] if normalize: for i, t in enumerate(trip): trip[i] = _normalizeTerm(t) if deslop: for i, t in enumerate(trip): trip[i] = _desloppifyTerm(g, t) g2.add(trip) return g2
def init_graph(graph_type=None): """ Helper to initialize a VIVO graph with namespace manager. """ if graph_type == 'conjunctive': g = ConjunctiveGraph() else: g = Graph() g.namespace_manager = ns_mgr return g
def catalyst_graph_for(file): if file.startswith('/'): file = 'file://'+file logging.info("InferenceStore catalyst_graph_for started") # quads = jsonld.to_rdf(file, {'format': 'application/nquads'}) logging.info("InferenceStore JSON-LD loaded") g = ConjunctiveGraph() g.namespace_manager = namespace_manager # g.parse(data=quads, format='nquads') g.load(file, format="json-ld") logging.info("InferenceStore base graph loaded") # get the inference engine get_inference_store().get_inference(g) logging.info("InferenceStore inference graph loaded") return g
if domain and not in_range(s, domain): print("Not in domain: ", s, p, o) if range_ and not in_range(o, range_): print("Not in range: ", s, p, o) if __name__ == '__main__': parser = ArgumentParser() parser.add_argument('--check_properties', '-p', action='store_true', help="check property domain and range against the ontology. Slow.") parser.add_argument('input_fname', help="the input file") args = parser.parse_args() json = load(open(args.input_fname)) context = load(open(join(dirname(__file__), 'context.jsonld'))) suspicious = list(check_keys(json, context['@context'])) suspicious.sort() if suspicious: print("Suspicious keys:") for key in suspicious: print(key) if args.check_properties: from pyld import jsonld quads = jsonld.to_rdf('file:'+args.input_fname, {'format': 'application/nquads'}) ontology = load_ontology() g = ConjunctiveGraph() g.namespace_manager = ontology.namespace_manager g.parse(data=quads, format='nquads') for c in g.contexts(): c.namespace_manager = ontology.namespace_manager check_props(g, ontology)