def get_graph(self): """Processes every Resource and Property related to 'self'""" #rdflib.ConjunctiveGraph because rdflib.Graph does not allow # usage of parsing plugins graph = rdflib.graph.ConjunctiveGraph(identifier=self.uri) if not hasattr(self, '_graph'): if hasattr(self, 'errors') and len(self.errors) != 0: logging.error("Origin %s has Errors --> can't process " ".get_graph()" % self.uri) return graph assert hasattr(self, "_graph"), ("graph has to be processed before executing get_graph()") # Problems with namespacemapping here: # 1) namespace bindings are not really necessary to validate # isomorphic graphs but the resulting graph is is different # if they miss # 2) doesn't detect duplicate definitions of namespaces namespace_dict = safe_dict(dict(self._graph.namespace_manager.namespaces())) for prefix, namespace in safe_dict(namespace_dict).items(): graph.bind(prefix=prefix, namespace=namespace) new_ns = dict(graph.namespace_manager.namespaces()) assert namespace_dict == new_ns, [(k, v) for k, v in safe_dict(namespace_dict).items() if not k in safe_dict(new_ns).keys()] for resource in self.get_resources(): # __dict__ converts rdflib.urirefs to strings for keys --> # convert back the dict's items back to uriref # {'foaf': 'http:/....', ...} for property, values in resource.__dict__.items(): # skip internals if str(property).startswith("_") or property == "pk": continue if property.startswith("http://"): property = rdflib.URIRef(property) else: property = pyattr2predicate(property, namespace_dict) assert isinstance(property, rdflib.URIRef), "property %s is not a URIRef object" % property if isinstance(values, set): for v in values: graph.add(triple_yield(resource, property, v)) else: v = values graph.add(triple_yield(resource, property, v)) return graph
def check_shortcut_consistency(): """Checks every known Origin for inconsistent namespacemappings""" global_namespace_dict = {} for origin in Origin.objects.all(): if hasattr(origin, "_graph"): for k, v in safe_dict(origin._graph.namespace_manager.namespaces()): if k in global_namespace_dict: assert global_namespace_dict[k] == v else: global_namespace_dict[k] = v
def populate_resources(self, graph): namespace_short_notation_reverse_dict = { unicode(rdflib_url): prefix for rdflib_url, prefix in reverse_dict( safe_dict(dict(graph.namespace_manager.namespaces())) ).items() } reference_time = datetime.datetime.now() for subject, predicate, obj_ect in graph: assert hasattr(subject, "n3") # workaround for rdflib's unicode problems assert predicate.encode('utf8') if self.handle_owl_imports: if (predicate == rdflib.OWL.imports and type(obj_ect) == rdflib.URIRef): uri = get_slash_url(obj_ect) origin, created = Origin.objects.get_or_create(uri=uri) logger.info("Interrupting to process owl:imports %s" "first" % (origin.uri)) origin.GET() if (( self.only_follow_uris is not None and predicate in self.only_follow_uris ) or self.only_follow_uris is None): if type(obj_ect) == rdflib.URIRef: # wrong scheme mailto, tel, callto --> should be Literal? if is_valid_url(obj_ect): obj_uriref = get_slash_url(obj_ect) Origin.objects.get_or_create(uri=obj_uriref) resource, _created = Resource.objects.get_or_create(uri=subject, origin=self.origin) resource._add_property(predicate, obj_ect, namespace_short_notation_reverse_dict) now = datetime.datetime.now() self.origin.graph_handler_time = now - reference_time for resource in self.origin.get_resources(): resource._has_changes = False