def load_locally(url): options = test.data.get('option', {}) content_type = options.get('contentType') url_no_frag = strip_fragment(url) if not content_type and url_no_frag.endswith('.jsonld'): content_type = 'application/ld+json' if not content_type and url_no_frag.endswith('.json'): content_type = 'application/json' if not content_type and url_no_frag.endswith('.html'): content_type = 'text/html' if not content_type: content_type = 'application/octet-stream' doc = { 'contentType': content_type, 'contextUrl': None, 'documentUrl': url, 'document': None } if options and url == test.base: if ('redirectTo' in options and options.get('httpStatus') >= 300): doc['documentUrl'] = (test.manifest.data['baseIri'] + options['redirectTo']) elif 'httpLink' in options: link_header = options.get('httpLink', '') if isinstance(link_header, list): link_header = ','.join(link_header) linked_context = jsonld.parse_link_header(link_header).get( 'http://www.w3.org/ns/json-ld#context') if linked_context and content_type != 'application/ld+json': if isinstance(linked_context, list): raise Exception('multiple context link headers') doc['contextUrl'] = linked_context['target'] linked_alternate = jsonld.parse_link_header(link_header).get( 'alternate') # if not JSON-LD, alternate may point there if (linked_alternate and linked_alternate.get('type') == 'application/ld+json' and not re.match( r'^application\/(\w*\+)?json$', content_type)): doc['contentType'] = 'application/ld+json' doc['documentUrl'] = jsonld.prepend_base( url, linked_alternate['target']) global ROOT_MANIFEST_DIR if doc['documentUrl'].find(':') == -1: filename = os.path.join(ROOT_MANIFEST_DIR, doc['documentUrl']) doc['documentUrl'] = 'file://' + filename else: filename = test.dirname + strip_fragment( strip_base(doc['documentUrl'])) try: doc['document'] = read_file(filename) except: raise Exception('loading document failed') return doc
def load_locally(url): doc = {'contextUrl': None, 'documentUrl': url, 'document': None} options = test.data.get('option') if options and url == test.base: if ('redirectTo' in options and options.get('httpStatus') >= 300): doc['documentUrl'] = ( test.manifest.data['baseIri'] + options['redirectTo']) elif 'httpLink' in options: content_type = options.get('contentType') if not content_type and url.endswith('.jsonld'): content_type = 'application/ld+json' link_header = options.get('httpLink', '') if isinstance(link_header, list): link_header = ','.join(link_header) link_header = jsonld.parse_link_header( link_header).get('http://www.w3.org/ns/json-ld#context') if link_header and content_type != 'application/ld+json': if isinstance(link_header, list): raise Exception('multiple context link headers') doc['contextUrl'] = link_header['target'] global ROOT_MANIFEST_DIR if doc['documentUrl'].find(':') == -1: filename = os.path.join(ROOT_MANIFEST_DIR, doc['documentUrl']) doc['documentUrl'] = 'file://' + filename else: #filename = os.path.join( # ROOT_MANIFEST_DIR, doc['documentUrl'][len(base):]) filename = ROOT_MANIFEST_DIR + strip_base(doc['documentUrl']) try: doc['document'] = read_json(filename) except: raise Exception('loading document failed') return doc
def load_locally(url): doc = {"contextUrl": None, "documentUrl": url, "document": None} options = test.data.get("option") if options and url == test.base: if "redirectTo" in options and options.get("httpStatus") >= 300: doc["documentUrl"] = test.manifest.data["baseIri"] + options["redirectTo"] elif "httpLink" in options: content_type = options.get("contentType") if not content_type and url.endswith(".jsonld"): content_type = "application/ld+json" link_header = options.get("httpLink", "") if isinstance(link_header, list): link_header = ",".join(link_header) link_header = jsonld.parse_link_header(link_header).get("http://www.w3.org/ns/json-ld#context") if link_header and content_type != "application/ld+json": if isinstance(link_header, list): raise Exception("multiple context link headers") doc["contextUrl"] = link_header["target"] global ROOT_MANIFEST_DIR if doc["documentUrl"].find(":") == -1: filename = os.path.join(ROOT_MANIFEST_DIR, doc["documentUrl"]) doc["documentUrl"] = "file://" + filename else: # filename = os.path.join( # ROOT_MANIFEST_DIR, doc['documentUrl'][len(base):]) filename = ROOT_MANIFEST_DIR + doc["documentUrl"][len(base) :] try: doc["document"] = read_json(filename) except: raise Exception("loading document failed") return doc
def loader(url): """ Retrieves JSON-LD at the given URL. :param url: the URL to retrieve. :return: the RemoteDocument. """ try: # validate URL pieces = urllib_parse.urlparse(url) if (not all([pieces.scheme, pieces.netloc]) or pieces.scheme not in ['http', 'https'] or set(pieces.netloc) > set( string.ascii_letters + string.digits + '-.:')): raise JsonLdError( 'URL could not be dereferenced; only "http" and "https" ' 'URLs are supported.', 'jsonld.InvalidUrl', {'url': url}, code='loading document failed') if secure and pieces.scheme != 'https': raise JsonLdError( 'URL could not be dereferenced; secure mode enabled and ' 'the URL\'s scheme is not "https".', 'jsonld.InvalidUrl', {'url': url}, code='loading document failed') headers = { 'Accept': 'application/ld+json, application/json' } response = requests.get(url, headers=headers, **kwargs) doc = { 'contextUrl': None, 'documentUrl': response.url, 'document': response.json() } content_type = response.headers.get('content-type') link_header = response.headers.get('link') if link_header and content_type != 'application/ld+json': link_header = parse_link_header(link_header).get( LINK_HEADER_REL) # only 1 related link header permitted if isinstance(link_header, list): raise JsonLdError( 'URL could not be dereferenced, it has more than one ' 'associated HTTP Link Header.', 'jsonld.LoadDocumentError', {'url': url}, code='multiple context link headers') if link_header: doc['contextUrl'] = link_header['target'] return doc except JsonLdError as e: raise e except Exception as cause: raise JsonLdError( 'Could not retrieve a JSON-LD document from the URL.', 'jsonld.LoadDocumentError', code='loading document failed', cause=cause)
def loader(url): """ Retrieves JSON-LD at the given URL. :param url: the URL to retrieve. :return: the RemoteDocument. """ try: # validate URL pieces = urllib_parse.urlparse(url) if (not all([pieces.scheme, pieces.netloc]) or pieces.scheme not in ['http', 'https'] or set(pieces.netloc) > set(string.ascii_letters + string.digits + '-.:')): raise JsonLdError( 'URL could not be dereferenced; only "http" and "https" ' 'URLs are supported.', 'jsonld.InvalidUrl', {'url': url}, code='loading document failed') if secure and pieces.scheme != 'https': raise JsonLdError( 'URL could not be dereferenced; secure mode enabled and ' 'the URL\'s scheme is not "https".', 'jsonld.InvalidUrl', {'url': url}, code='loading document failed') headers = {'Accept': 'application/ld+json, application/json'} response = requests.get(url, headers=headers, **kwargs) doc = { 'contextUrl': None, 'documentUrl': response.url, 'document': response.json() } content_type = response.headers.get('content-type') link_header = response.headers.get('link') if link_header and content_type != 'application/ld+json': link_header = parse_link_header(link_header).get( LINK_HEADER_REL) # only 1 related link header permitted if isinstance(link_header, list): raise JsonLdError( 'URL could not be dereferenced, it has more than one ' 'associated HTTP Link Header.', 'jsonld.LoadDocumentError', {'url': url}, code='multiple context link headers') if link_header: doc['contextUrl'] = link_header['target'] return doc except JsonLdError as e: raise e except Exception as cause: raise JsonLdError( 'Could not retrieve a JSON-LD document from the URL.', 'jsonld.LoadDocumentError', code='loading document failed', cause=cause)
def loader(url): """ Retrieves JSON-LD at the given URL. :param url: the URL to retrieve. :return: the RemoteDocument. """ try: # validate URL pieces = urllib_parse.urlparse(url) if (not all([pieces.scheme, pieces.netloc]) or pieces.scheme not in ["http", "https"] or set(pieces.netloc) > set(string.ascii_letters + string.digits + "-.:")): raise JsonLdError( 'URL could not be dereferenced; only "http" and "https" ' "URLs are supported.", "jsonld.InvalidUrl", {"url": url}, code="loading document failed", ) if secure and pieces.scheme != "https": raise JsonLdError( "URL could not be dereferenced; secure mode enabled and " 'the URL\'s scheme is not "https".', "jsonld.InvalidUrl", {"url": url}, code="loading document failed", ) headers = {"Accept": "application/ld+json, application/json"} response = session.get(url, headers=headers, **kwargs) doc = { "contextUrl": None, "documentUrl": response.url, "document": response.json(), } content_type = response.headers.get("content-type") link_header = response.headers.get("link") if link_header and content_type != "application/ld+json": link_header = parse_link_header(link_header).get( LINK_HEADER_REL) # only 1 related link header permitted if isinstance(link_header, list): raise JsonLdError( "URL could not be dereferenced, it has more than one " "associated HTTP Link Header.", "jsonld.LoadDocumentError", {"url": url}, code="multiple context link headers", ) if link_header: doc["contextUrl"] = link_header["target"] return doc except JsonLdError as e: raise e except Exception as cause: raise JsonLdError( "Could not retrieve a JSON-LD document from the URL.", "jsonld.LoadDocumentError", code="loading document failed", cause=cause, )
def loader(url, options={}): """ Retrieves JSON-LD at the given URL. :param url: the URL to retrieve. :return: the RemoteDocument. """ try: # validate URL pieces = urllib_parse.urlparse(url) if (not all([pieces.scheme, pieces.netloc]) or pieces.scheme not in ['http', 'https'] or set(pieces.netloc) > set(string.ascii_letters + string.digits + '-.:')): raise JsonLdError( 'URL could not be dereferenced; only "http" and "https" ' 'URLs are supported.', 'jsonld.InvalidUrl', {'url': url}, code='loading document failed') if secure and pieces.scheme != 'https': raise JsonLdError( 'URL could not be dereferenced; secure mode enabled and ' 'the URL\'s scheme is not "https".', 'jsonld.InvalidUrl', {'url': url}, code='loading document failed') headers = options.get('headers') if headers is None: headers = {'Accept': 'application/ld+json, application/json'} response = requests.get(url, headers=headers, **kwargs) content_type = response.headers.get('content-type') if not content_type: content_type = 'application/octet-stream' doc = { 'contentType': content_type, 'contextUrl': None, 'documentUrl': response.url, } link_header = response.headers.get('link') if link_header: linked_context = parse_link_header(link_header).get( LINK_HEADER_REL) # only 1 related link header permitted if linked_context and content_type != 'application/ld+json': if isinstance(linked_context, list): raise JsonLdError( "URL could not be dereferenced, " "it has more than one " "associated HTTP Link Header.", "jsonld.LoadDocumentError", {"url": url}, code="multiple context link headers") doc["contextUrl"] = linked_context["target"] linked_alternate = parse_link_header(link_header).get( 'alternate') # if not JSON-LD, alternate may point there if (linked_alternate and linked_alternate.get('type') == 'application/ld+json' and not re.match( r'^application\/(\w*\+)?json$', content_type)): doc['contentType'] = 'application/ld+json' doc['documentUrl'] = prepend_base( url, linked_alternate['target']) return loader(doc['documentUrl'], options=options) doc["document"] = response.json() return doc except JsonLdError as e: raise e except Exception as cause: raise JsonLdError( 'Could not retrieve a JSON-LD document from the URL.', 'jsonld.LoadDocumentError', code='loading document failed', cause=cause)
async def async_loader(url, headers): """ Retrieves JSON-LD at the given URL asynchronously. :param url: the URL to retrieve. :return: the RemoteDocument. """ try: # validate URL pieces = urllib_parse.urlparse(url) if (not all([pieces.scheme, pieces.netloc]) or pieces.scheme not in ['http', 'https'] or set(pieces.netloc) > set(string.ascii_letters + string.digits + '-.:')): raise JsonLdError( 'URL could not be dereferenced; ' 'only "http" and "https" URLs are supported.', 'jsonld.InvalidUrl', {'url': url}, code='loading document failed') if secure and pieces.scheme != 'https': raise JsonLdError( 'URL could not be dereferenced; ' 'secure mode enabled and ' 'the URL\'s scheme is not "https".', 'jsonld.InvalidUrl', {'url': url}, code='loading document failed') async with aiohttp.ClientSession(loop=loop) as session: async with session.get(url, headers=headers, **kwargs) as response: # Allow any content_type in trying to parse json # similar to requests library json_body = await response.json(content_type=None) content_type = response.headers.get('content-type') if not content_type: content_type = 'application/octet-stream' doc = { 'contentType': content_type, 'contextUrl': None, 'documentUrl': response.url.human_repr(), 'document': json_body } link_header = response.headers.get('link') if link_header: linked_context = parse_link_header(link_header).get( LINK_HEADER_REL) # only 1 related link header permitted if linked_context and content_type != 'application/ld+json': if isinstance(linked_context, list): raise JsonLdError( 'URL could not be dereferenced, ' 'it has more than one ' 'associated HTTP Link Header.', 'jsonld.LoadDocumentError', {'url': url}, code='multiple context link headers') doc['contextUrl'] = linked_context['target'] linked_alternate = parse_link_header(link_header).get( 'alternate') # if not JSON-LD, alternate may point there if (linked_alternate and linked_alternate.get('type') == 'application/ld+json' and not re.match(r'^application\/(\w*\+)?json$', content_type)): doc['contentType'] = 'application/ld+json' doc['documentUrl'] = jsonld.prepend_base( url, linked_alternate['target']) return doc except JsonLdError as e: raise e except Exception as cause: raise JsonLdError( 'Could not retrieve a JSON-LD document from the URL.', 'jsonld.LoadDocumentError', code='loading document failed', cause=cause)