def get(self, uri, origin=None): """If the authoratative Origin to the Resource does not exist and no origin is given then DoesNotExist is returned. Assumption is to only trust validated sources. Alternative: this could point to source with most parameters given etc because user may want to just have the Resource with most content/know everything about a uri. --> If DoesNotExist occurs Resources with uri might still exist but no validated Resources exist. """ uri = get_rdflib_uriref(uri) if not origin: filter_result = list(self.filter(_uri=uri)) if not filter_result: raise self.model.DoesNotExist if len(filter_result) == 1: # return only match return filter_result[0] else: raise self.model.DoesNotExist( "Please pass the exact " "Origin. The Resource you are looking for is " "provided by the Origins: %s" % ", ".join([unicode(r._origin.uri) for r in filter_result]) ) pk = self.get_pk(origin_uri=origin.uri, uri=uri) return super(ResourceManager, self).get(pk=pk) assert 0, "implement!"
def test_get_rdflib_uriref_result(self): test_cases = [ ("http://web.de/test?query=bla", rdflib.URIRef("http://web.de/test?query=bla")), ] for test, result in test_cases: self.assertEqual(get_rdflib_uriref(test), result, msg=test)
def create(self, uri, origin, **kwargs): # from ldtools.origin import Origin <-- import circle problem # assert isinstance(origin, Origin), "Origin instance required" assert origin is not None assert origin.processed, ("Origin has to be processed before creating more Resource objects: origin.GET()") uri = get_rdflib_uriref(uri) pk = self.get_pk(origin_uri=origin.uri, uri=uri) return super(ResourceManager, self).create( pk=pk, _uri=uri, _origin=origin, **kwargs)
def create(self, uri, BACKEND=None): uri = get_rdflib_uriref(uri) if not uri == get_slash_url(uri): msg = ("URI passed to Origin Manager was not a slash URI: %s. " "Fixed now." % uri) logger.debug(msg) uri = get_slash_url(uri) backend = BACKEND if BACKEND else RestBackend() origin = super(OriginManager, self).create( pk=uri, uri=uri, backend=backend) return self.post_create_hook(origin)
def get_or_create(self, uri, **kwargs): uri = get_rdflib_uriref(uri) if not uri == get_slash_url(uri): msg = ("URI passed to Origin Manager was not a slash URI: %s. " "Fixed now." % uri) logger.warning(msg) uri = get_slash_url(uri) try: if kwargs: logger.warning("kwargs are ignored for get.") return self.get(uri), False except self.model.DoesNotExist: return self.create(uri, **kwargs), True
def get_authoritative_resource(uri, create_nonexistent_origin=True): """Tries to return the Resource object from the authoritative origin uri""" uri = get_rdflib_uriref(uri) origin_uri = get_slash_url(uri) authoritative_origin = Origin.objects.filter(uri=origin_uri) authoritative_origin_list = list(authoritative_origin) if len(authoritative_origin_list) == 1: origin = authoritative_origin_list[0] else: if create_nonexistent_origin: origin, created = Origin.objects.get_or_create(uri=origin_uri) else: raise Resource.DoesNotExist( "No authoritative Resource found for %s" % uri) if not origin.has_unsaved_changes(): origin.GET(only_follow_uris=[], raise_errors=False) authoritative_resource = Resource.objects.get(uri=uri, origin=origin) return authoritative_resource
def get(self, uri, **kwargs): """Retrieves Origin object from Store""" uri = get_rdflib_uriref(uri) return super(OriginManager, self).get(pk=uri)
def execute_ldtools( verbosity, origin_urls, depth, follow_all, follow_uris, socket_timeout, GRAPH_SIZE_LIMIT, print_all_resources, only_print_uris, only_print_uri_content, only_negotiate ): set_colored_logger(verbosity) # customize Origin.objects.post_create_hook for performance reasons def custom_post_create_hook(origin): origin.timedelta = datetime.timedelta(minutes=5) return origin Origin.objects.post_create_hook = custom_post_create_hook url_count = len(origin_urls) if url_count > 1: logger.info("Retrieving content of %s URLs" % url_count) if follow_all: only_follow_uris = None logging.info("Following all URIs") elif follow_uris: only_follow_uris = follow_uris logging.info("Following values matching: %s" % ", ".join(only_follow_uris)) else: only_follow_uris = [] if socket_timeout: import socket logger.info("Setting socket timeout to %s" % socket_timeout) socket.setdefaulttimeout(socket_timeout) kw = dict(raise_errors=False) if GRAPH_SIZE_LIMIT: kw["GRAPH_SIZE_LIMIT"] = GRAPH_SIZE_LIMIT for url in origin_urls: url = get_slash_url(url) origin, created = Origin.objects.get_or_create(url) logger.info("Retrieving content of %s" % origin.uri) if only_negotiate or only_print_uri_content: try: data = origin.backend.GET( uri=origin.uri, httphandler=urllib2.HTTPHandler(debuglevel=1)) except Exception as exc: print(exc) continue if only_print_uri_content: print('\n', data, '\n') else: origin.GET(only_follow_uris=only_follow_uris, **kw) if only_negotiate or only_print_uri_content: sys.exit(0) if depth: for round in range(depth): for origin in Origin.objects.all(): origin.GET(only_follow_uris=only_follow_uris, **kw) for orig_url in origin_urls: url = get_slash_url(orig_url) origin = Origin.objects.get(url) for r in origin.get_resources(): if r._uri == get_rdflib_uriref(orig_url): logger.info(u"Printing all available information " "about {0}".format(r._uri)) if hasattr(r, "_has_changes"): delattr(r, "_has_changes") if hasattr(r, "pk"): delattr(r, "pk") pprint.pprint(r.__dict__) if print_all_resources: all_resources = Resource.objects.all() if (only_print_uris): for resource in all_resources: print(resource) else: for r in all_resources: if hasattr(r, "_has_changes"): delattr(r, "_has_changes") if hasattr(r, "pk"): delattr(r, "pk") pprint.pprint(r.__dict__)
def get_or_create(self, uri, origin=None): uri = get_rdflib_uriref(uri) try: return self.get(uri=uri, origin=origin), False except self.model.DoesNotExist: return self.create(uri=uri, origin=origin), True