Beispiel #1
0
    def get_graph(self):
        """Processes every Resource and Property related to 'self'"""
        #rdflib.ConjunctiveGraph because rdflib.Graph does not allow
        # usage of parsing plugins
        graph = rdflib.graph.ConjunctiveGraph(identifier=self.uri)

        if not hasattr(self, '_graph'):
            if hasattr(self, 'errors') and len(self.errors) != 0:
                logging.error("Origin %s has Errors --> can't process "
                              ".get_graph()" % self.uri)
                return graph
            assert hasattr(self, "_graph"), ("graph has to be processed before executing get_graph()")

        # Problems with namespacemapping here:
        #  1) namespace bindings are not really necessary to validate
        #     isomorphic graphs but the resulting graph is is different
        #     if they miss
        #  2) doesn't detect duplicate definitions of namespaces
        namespace_dict = safe_dict(dict(self._graph.namespace_manager.namespaces()))

        for prefix, namespace in safe_dict(namespace_dict).items():
            graph.bind(prefix=prefix, namespace=namespace)
        new_ns = dict(graph.namespace_manager.namespaces())

        assert namespace_dict == new_ns, [(k, v) for k, v in safe_dict(namespace_dict).items() if not k in safe_dict(new_ns).keys()]

        for resource in self.get_resources():
            # __dict__ converts rdflib.urirefs to strings for keys -->
            # convert back the dict's items back to uriref
            # {'foaf': 'http:/....', ...}

            for property, values in resource.__dict__.items():

                # skip internals
                if str(property).startswith("_") or property == "pk":
                    continue

                if property.startswith("http://"):
                    property = rdflib.URIRef(property)
                else:
                    property = pyattr2predicate(property, namespace_dict)

                assert isinstance(property, rdflib.URIRef), "property %s is not a URIRef object" % property

                if isinstance(values, set):
                    for v in values:
                        graph.add(triple_yield(resource, property, v))
                else:
                    v = values
                    graph.add(triple_yield(resource, property, v))

        return graph
Beispiel #2
0
def check_shortcut_consistency():
    """Checks every known Origin for inconsistent namespacemappings"""
    global_namespace_dict = {}
    for origin in Origin.objects.all():
        if hasattr(origin, "_graph"):
            for k, v in safe_dict(origin._graph.namespace_manager.namespaces()):
                if k in global_namespace_dict:
                    assert global_namespace_dict[k] == v
                else:
                    global_namespace_dict[k] = v
Beispiel #3
0
    def populate_resources(self, graph):
        namespace_short_notation_reverse_dict = {
            unicode(rdflib_url): prefix
            for rdflib_url, prefix in reverse_dict(
                safe_dict(dict(graph.namespace_manager.namespaces()))
            ).items()
        }
        reference_time = datetime.datetime.now()

        for subject, predicate, obj_ect in graph:
            assert hasattr(subject, "n3")

            # workaround for rdflib's unicode problems
            assert predicate.encode('utf8')

            if self.handle_owl_imports:
                if (predicate == rdflib.OWL.imports and type(obj_ect) == rdflib.URIRef):
                    uri = get_slash_url(obj_ect)
                    origin, created = Origin.objects.get_or_create(uri=uri)

                    logger.info("Interrupting to process owl:imports %s"
                                "first" % (origin.uri))
                    origin.GET()

            if ((
                self.only_follow_uris is not None and predicate in self.only_follow_uris
            ) or self.only_follow_uris is None):
                if type(obj_ect) == rdflib.URIRef:
                    # wrong scheme mailto, tel, callto --> should be Literal?
                    if is_valid_url(obj_ect):
                        obj_uriref = get_slash_url(obj_ect)
                        Origin.objects.get_or_create(uri=obj_uriref)

            resource, _created = Resource.objects.get_or_create(uri=subject, origin=self.origin)
            resource._add_property(predicate, obj_ect, namespace_short_notation_reverse_dict)

        now = datetime.datetime.now()
        self.origin.graph_handler_time = now - reference_time

        for resource in self.origin.get_resources():
            resource._has_changes = False