def render_get(self, request): query = query_split(request) eps = self.common_rd.get_endpoints() if 'd' in query: eps = (e for e in eps if e.d == query['d']) if 'ep' in query: eps = (e for e in eps if e.ep == query['ep']) if 'gp' in query: pass # FIXME if 'et' in query: eps = (e for e in eps if e.et == query['et']) candidates = itertools.chain(*(e.get_conned_links().links for e in eps)) for other_query in query: if other_query in ('d', 'ep', 'gp', 'et', 'page', 'count'): continue candidates = (l for l in candidates if getattr(l, other_query) == query[other_query]) try: candidates = list(candidates) if 'page' in query: candidates = candidates[int(query['page']) * int(query['count'])] if 'count' in query: candidates = candidates[:int(query['count'])] except (KeyError, ValueError): raise BadRequest("page requires count, and both must be ints") return aiocoap.Message(payload=str( LinkHeader(candidates)).encode('utf8'), content_format=40)
def render_get(self, request): query = query_split(request) candidates = self.common_rd.get_endpoints() if 'd' in query: candidates = (c for c in candidates if c.d == query['d']) if 'ep' in query: candidates = (c for c in candidates if c.ep == query['ep']) if 'gp' in query: pass # FIXME if 'rt' in query: pass # FIXME if 'et' in query: candidates = (c for c in candidates if c.et == query['et']) try: candidates = list(candidates) if 'page' in query: candidates = candidates[int(query['page']) * int(query['count'])] if 'count' in query: candidates = candidates[:int(query['count'])] except (KeyError, ValueError): raise BadRequest("page requires count, and both must be ints") result = [c.get_host_link() for c in candidates] return aiocoap.Message(payload=str( LinkHeader(result)).encode('utf8'), content_format=40)
def render_get(self, request): query = query_split(request) candidates = self.common_rd.get_endpoints() for search_key, search_value in query.items(): if search_key in ('page', 'count'): continue # filtered last if search_value.endswith('*'): matches = lambda x, start=search_value[:-1]: x.startswith(start) else: matches = lambda x: x == search_value if search_key in ('if', 'rt'): candidates = (c for c in candidates if any(any(matches(x) for x in getattr(r, search_key, '').split()) for r in c.get_conned_links().links)) continue if search_key == 'href': candidates = (c for c in candidates if matches(c.href) or any(matches(r.href) for r in c.get_conned_links().links) ) continue candidates = (c for c in candidates if (search_key in c.registration_parameters and matches(c.registration_parameters[search_key])) or any(_link_matches(r, search_key, matches) for r in c.get_conned_links().links) ) candidates = _paginate(candidates, query) result = [c.get_host_link() for c in candidates] return aiocoap.Message(payload=str(LinkHeader(result)).encode('utf8'), content_format=40)
def timegate(request, url): # impose an arbitrary length-limit on the submitted URL, so that the headers don't become illegally large url = url_with_qs_and_hash(url, request.META['QUERY_STRING'])[:500] data = memento_data_for_url(request, url) if not data: return HttpResponseNotFound('404 page not found\n') accept_datetime = request.META.get('HTTP_ACCEPT_DATETIME') if accept_datetime: accept_datetime = parse_date(accept_datetime) if not accept_datetime: return HttpResponseBadRequest('Invalid value for Accept-Datetime.') else: accept_datetime = timezone.now() accept_datetime = accept_datetime.replace(tzinfo=tzutc()) target, target_datetime = closest(map(lambda m: m.values(), data['mementos']['list']), accept_datetime) response = redirect(target) response['Vary'] = 'accept-datetime' response['Link'] = str( LinkHeader([ Rel(data['original_uri'], rel='original'), Rel(data['timegate_uri'], rel='timegate'), Rel(data['timemap_uri']['link_format'], rel='timemap', type='application/link-format'), Rel(data['timemap_uri']['json_format'], rel='timemap', type='application/json'), Rel(data['timemap_uri']['html_format'], rel='timemap', type='text/html'), Rel(data['mementos']['first']['uri'], rel='first memento', datetime=datetime_to_http_date(data['mementos']['first']['datetime'])), Rel(data['mementos']['last']['uri'], rel='last memento', datetime=datetime_to_http_date(data['mementos']['last']['datetime'])), Rel(target, rel='memento', datetime=datetime_to_http_date(target_datetime)), ]) ) return response
def render_get(self, request): query = query_split(request) candidates = self.common_rd.get_endpoints() # FIXME which of the below can be done on the generated host links with # generic filtering rules, which would for example do =...* right? if 'href' in query: candidates = (c for c in candidates if c.href == query['href']) if 'd' in query: candidates = (c for c in candidates if c.d == query['d']) if 'ep' in query: candidates = (c for c in candidates if c.ep == query['ep']) if 'gp' in query: pass # FIXME if 'rt' in query: pass # FIXME if 'et' in query: candidates = (c for c in candidates if c.et == query['et']) try: candidates = list(candidates) if 'page' in query: candidates = candidates[int(query['page']) * int(query['count']):] if 'count' in query: candidates = candidates[:int(query['count'])] except (KeyError, ValueError): raise BadRequest("page requires count, and both must be ints") result = [c.get_host_link() for c in candidates] return aiocoap.Message(payload=str(LinkHeader(result)).encode('utf8'), content_format=40)
def memento_datetime(response): """Add Memento-Datetime header.""" response.headers['Memento-Datetime'] = http_date( memento.updated) response.headers['Vary'] = 'accept-datetime, accept' response.headers['Link'] = LinkHeader( [Link(request.url, rel='original timegate')]) return response
def __init__(self, path, network_con, delete_cb, update_cb, registration_parameters): # note that this can not modify d and ep any more, since they were # already used in keying to a path self.path = path self.links = LinkHeader([]) self._delete_cb = delete_cb self._update_cb = update_cb self.update_params(network_con, registration_parameters, is_initial=True)
def __init__(self, con, timeout_cb, ep, d=None, lt=None, et=None): self.links = LinkHeader([]) self.ep = ep self.d = d self.lt = lt or 86400 self.et = et self.con = con self.timeout_cb = timeout_cb self._set_timeout()
def get_all_links(self): result = [] for l in self.links.links: href = self.con + l.href if l.href.startswith('/') else l.href data = [[ k, self.con + v if (k == 'anchor' and v.startswith('/')) else v ] for (k, v) in l.attr_pairs] result.append(Link(href, data)) return LinkHeader(result)
def __init__(self, path, con, delete_cb, ep, d=None, lt=None, et=None): self.path = path self.links = LinkHeader([]) self.ep = ep self.d = d self.lt = lt or 86400 self.et = et self.con = con self._delete_cb = delete_cb self._set_timeout()
def headers(self): data, original = self._data links = self.envelope_links(self.representation, data, original).headers headers = { 'Link': str(LinkHeader([Link(v, rel=k) for k, v in links.items()])) } if self.size: headers.update({ 'X-Page': self.page, 'X-PageSize': self.size, 'X-Total': self.count, }) return headers, links
def generate_link_headers(metadata): """ Generate Link: HTTP headers for API navigation. :param metadata: Dictionary with none, some or all of the keys 'next', 'prev' and 'last' defined. The values are the corresponding pre-generated links. """ link_metadata = {k: v for k, v in metadata.iteritems() if k in LINK_HEADERS} links = [] for k, v in link_metadata.iteritems(): links.append(Link(v, rel=k)) # e.g. Link("http://example.com/foo", rel="next") return str(LinkHeader(links)) # RFC compliant headers e.g.
def assert_link_match( links: link_header.LinkHeader, rel: str, path: str, query: Dict[str, List[str]] = None, ): if query is None: query = {} matching_links = links.links_by_attr_pairs([("rel", rel)]) assert len(matching_links) == 1 link = matching_links[0] href = urlparse(link.href) assert href.path == path # noinspection PyDeepBugsBinOperator assert query.items() <= parse_qs(href.query).items()
def feed_rss(id): feed = Feed.query.get(id) rss = feed.rss() f = open('rss.xml', 'w') f.write(rss.rss()) f.close() headers = {} headers['Link'] = str( LinkHeader( [Link(feed.hub, rel="hub"), Link(feed.get_rss_url(), rel="self")])) return make_response(rss.rss(), 200, headers)
def get_conned_links(self): """Produce a LinkHeader object that represents all statements in the registration, resolved to the registration's con (and thus suitable for serving from the lookup interface). If protocol negotiation is implemented and con becomes a list, this function will probably grow parameters that hint at which con to use. """ result = [] for l in self.links.links: if 'anchor' in l: data = [(k, v) for (k, v) in l.attr_pairs if k != 'anchor'] + [['anchor', urljoin(self.con, l.anchor)]] else: data = l.attr_pairs + [['anchor', self.con]] result.append(Link(l.href, data)) return LinkHeader(result)
def render_get(self, request): query = query_split(request) eps = self.common_rd.get_endpoints() candidates = sum( ([(e, l) for l in e.get_all_links().links] for e in eps), []) if 'd' in query: candidates = ([e, l] for [e, l] in candidates if e.d == query['d']) if 'ep' in query: candidates = ([e, l] for [e, l] in candidates if e.ep == query['ep']) if 'gp' in query: pass # FIXME if 'rt' in query: candidates = ([e, l] for [e, l] in candidates if query['rt'] in l.rt) if 'et' in query: candidates = ([e, l] for [e, l] in candidates if e.et == query['et']) for other_query in query: if other_query in ('d', 'ep', 'gp', 'rt', 'et', 'page', 'count'): continue candidates = ([e, l] for [e, l] in candidates if getattr(e, other_query) == query[other_query]) try: candidates = list(candidates) if 'page' in query: candidates = candidates[int(query['page']) * int(query['count'])] if 'count' in query: candidates = candidates[:int(query['count'])] except (KeyError, ValueError): raise BadRequest("page requires count, and both must be ints") result = [l for (e, l) in candidates] return aiocoap.Message(payload=str( LinkHeader(result)).encode('utf8'), content_format=40)
async def render_get(self, request): query = query_split(request) eps = self.common_rd.get_endpoints() candidates = ((e, c) for e in eps for c in e.get_conned_links().links) for search_key, search_value in query.items(): if search_key in ('page', 'count'): continue # filtered last # FIXME: maybe we need query_split to turn ?rt=foo&obs into {'rt': # 'foo', 'obs': True} to match on obs, and then this needs more # type checking if search_value.endswith('*'): matches = lambda x, start=search_value[:-1]: x.startswith(start ) else: matches = lambda x: x == search_value if search_key in ('if', 'rt'): candidates = ((e, c) for (e, c) in candidates if any( matches(x) for x in getattr(c, search_key, '').split())) continue if search_key == 'href': candidates = ((e, c) for (e, c) in candidates if matches(c.href) or matches(e.href)) continue candidates = ((e, c) for (e, c) in candidates if _link_matches(c, search_key, matches) or (search_key in e.registration_parameters and matches(e.registration_parameters[search_key]))) # strip endpoint candidates = (c for (e, c) in candidates) candidates = _paginate(candidates, query) return aiocoap.Message(payload=str( LinkHeader(candidates)).encode('utf8'), content_format=40)
def paginate(self, item_list: list): if not self.do_paginate: return item_list count = len(item_list) links = [ Link(self.request.url, rel="self"), Link(self.first_url(), rel="first"), Link(self.last_url(count), rel="last"), ] next = self.next_url(count) if next is not None: links.append(Link(next, rel="next")) prev = self.prev_url() if prev is not None: links.append(Link(prev, rel="prev")) self.response.headers["Link"] = str(LinkHeader(links)) return item_list[self.offset():self.offset() + self.per_page]
def assert_no_link(links: link_header.LinkHeader, rel: str): assert len(links.links_by_attr_pairs([("rel", rel)])) == 0
def get_taxonomy_term(code=None, slug=None, prefer=None, page=None, size=None, status_code=200, q=None): try: taxonomy = current_flask_taxonomies.get_taxonomy(code) prefer = taxonomy.merge_select(prefer) current_flask_taxonomies.permissions.taxonomy_term_read.enforce( request=request, taxonomy=taxonomy, slug=slug) if INCLUDE_DELETED in prefer: status_cond = sqlalchemy.sql.true() else: status_cond = TaxonomyTerm.status == TermStatusEnum.alive return_descendants = INCLUDE_DESCENDANTS in prefer if return_descendants: query = current_flask_taxonomies.descendants_or_self( TermIdentification(taxonomy=code, slug=slug), levels=prefer.options.get('levels', None), status_cond=status_cond, return_descendants_count=INCLUDE_DESCENDANTS_COUNT in prefer, return_descendants_busy_count=INCLUDE_STATUS in prefer) else: query = current_flask_taxonomies.filter_term( TermIdentification(taxonomy=code, slug=slug), status_cond=status_cond, return_descendants_count=INCLUDE_DESCENDANTS_COUNT in prefer, return_descendants_busy_count=INCLUDE_STATUS in prefer) if q: query = current_flask_taxonomies.apply_term_query(query, q, code) paginator = Paginator(prefer, query, page if return_descendants else None, size if return_descendants else None, json_converter=lambda data: build_descendants( data, prefer, root_slug=None), allow_empty=INCLUDE_SELF not in prefer, single_result=INCLUDE_SELF in prefer, has_query=q is not None) return paginator.jsonify(status_code=status_code) except NoResultFound: term = current_flask_taxonomies.filter_term( TermIdentification(taxonomy=code, slug=slug), status_cond=sqlalchemy.sql.true()).one_or_none() if not term: json_abort( 404, { "message": "%s was not found on the server" % request.url, "reason": "does-not-exist" }) elif term.obsoleted_by_id: obsoleted_by = term.obsoleted_by obsoleted_by_links = obsoleted_by.links() return Response( json.dumps({ 'links': term.links(representation=prefer).envelope, 'status': 'moved' }), status=301, headers={ 'Location': obsoleted_by_links.headers['self'], 'Link': str( LinkHeader([ Link(v, rel=k) for k, v in term.links( representation=prefer).envelope.items() ])) }, content_type='application/json') else: json_abort( 410, { "message": "%s was not found on the server" % request.url, "reason": "deleted" }) except: traceback.print_exc() raise
def memento_response(memento, uri_r, first=None, last=None, has_timemap=False): """Return a 302 redirection to the best Memento for a resource. It includes necessary headers including datetime requested by the user. :param memento: (The URI string, dt obj) of the best memento. :param uri_r: The original resource's complete URI. :param first: (Optional) (URI string, dt obj) of the first memento. :param last: (Optional) (URI string, dt obj) of the last memento. :param has_timemap: Flag indicating that the handler accepts TimeMap requests too. Default True. :return: The ``Response`` object. """ # Gather links containing original and if availible: TimeMap, first, last # TimeGate link not allowed here links = [Link(uri_r, rel='original')] if has_timemap: for response_type, mime in ( ('link', 'application/link-format'), ('json', 'application/json'), ): links.append( Link(url_for('timemap', dict(response_type=response_type, uri_r=uri_r), force_external=True), rel='timemap', type=mime)) (uri_m, dt_m) = memento (uri_last, dt_last) = (uri_first, dt_first) = (None, None) if last: (uri_last, dt_last) = last if first: (uri_first, dt_first) = first if first and last and uri_first == uri_last: # There's only one memento (first = best = last) assert (uri_last == uri_m) links.append( Link(uri_m, rel='first last memento', datetime=http_date(dt_m))) else: if first: links.append( Link(uri_m, rel='first memento', datetime=http_date(dt_first))) if (uri_first != uri_m and uri_last != uri_m): # The best memento is neither the first nor the last links.append(Link(uri_m, rel='memento', datetime=http_date(dt_m))) if last: links.append( Link(uri_m, rel='last memento', datetime=http_date(dt_last))) # Builds the response headers headers = [ ('Date', http_date(datetime.utcnow())), ('Vary', 'accept-datetime'), ('Content-Length', '0'), ('Content-Type', 'text/plain; charset=UTF-8'), ('Connection', 'close'), ('Location', uri_m), ('Link', str(LinkHeader(links))), ] return Response(None, headers=headers, status=302)
def single_permalink(request, guid): """ Given a Perma ID, serve it up. """ raw_user_agent = request.META.get('HTTP_USER_AGENT', '') # Create a canonical version of guid (non-alphanumerics removed, hyphens every 4 characters, uppercase), # and forward to that if it's different from current guid. canonical_guid = Link.get_canonical_guid(guid) # We only do the redirect if the correctly-formatted GUID actually exists -- # this prevents actual 404s from redirecting with weird formatting. link = get_object_or_404(Link.objects.all_with_deleted(), guid=canonical_guid) if canonical_guid != guid: return HttpResponsePermanentRedirect( reverse('single_permalink', args=[canonical_guid])) # Forward to replacement link if replacement_link is set. if link.replacement_link_id: return HttpResponseRedirect( reverse('single_permalink', args=[link.replacement_link_id])) # If we get an unrecognized archive type (which could be an old type like 'live' or 'pdf'), forward to default version serve_type = request.GET.get('type') if serve_type is None: serve_type = 'source' elif serve_type not in valid_serve_types: return HttpResponsePermanentRedirect( reverse('single_permalink', args=[canonical_guid])) # serve raw WARC if serve_type == 'warc_download': return stream_warc_if_permissible(link, request.user) # handle requested capture type if serve_type == 'image': capture = link.screenshot_capture # not all Perma Links have screenshots; if no screenshot is present, # forward to primary capture for playback or for appropriate error message if (not capture or capture.status != 'success') and link.primary_capture: return HttpResponseRedirect( reverse('single_permalink', args=[guid])) else: capture = link.primary_capture # if primary capture did not work, but screenshot did work, forward to screenshot if ( not capture or capture.status != 'success' ) and link.screenshot_capture and link.screenshot_capture.status == 'success': return HttpResponseRedirect( reverse('single_permalink', args=[guid]) + "?type=image") try: capture_mime_type = capture.mime_type() except AttributeError: # If capture is deleted, then mime type does not exist. Catch error. capture_mime_type = None # Special handling for mobile pdf viewing because it can be buggy # Redirecting to a download page if on mobile redirect_to_download_view = redirect_to_download(capture_mime_type, raw_user_agent) # If this record was just created by the current user, show them a new record message new_record = request.user.is_authenticated and link.created_by_id == request.user.id and not link.user_deleted \ and link.creation_timestamp > timezone.now() - timedelta(seconds=300) # Provide the max upload size, in case the upload form is used max_size = settings.MAX_ARCHIVE_FILE_SIZE / 1024 / 1024 if not link.submitted_description: link.submitted_description = "This is an archive of %s from %s" % ( link.submitted_url, link.creation_timestamp.strftime("%A %d, %B %Y")) logger.info(f"Preparing context for {link.guid}") context = { 'link': link, 'redirect_to_download_view': redirect_to_download_view, 'mime_type': capture_mime_type, 'can_view': request.user.can_view(link), 'can_edit': request.user.can_edit(link), 'can_delete': request.user.can_delete(link), 'can_toggle_private': request.user.can_toggle_private(link), 'capture': capture, 'serve_type': serve_type, 'new_record': new_record, 'this_page': 'single_link', 'max_size': max_size, 'link_url': settings.HOST + '/' + link.guid, 'protocol': protocol(), } if context['can_view'] and link.can_play_back(): try: logger.info(f"Initializing play back of {link.guid}") wr_username = link.init_replay_for_user(request) except Exception: # noqa # We are experiencing many varieties of transient flakiness in playback: # second attempts, triggered by refreshing the page, almost always seem to work. # While we debug... let's give playback a second try here, and see if this # noticeably improves user experience. logger.exception( f"First attempt to init replay of {link.guid} failed. (Retrying: observe whether this error recurs.)" ) time.sleep(settings.WR_PLAYBACK_RETRY_AFTER) logger.info(f"Initializing play back of {link.guid} (2nd try)") wr_username = link.init_replay_for_user(request) logger.info( f"Updating context with WR playback information for {link.guid}") context.update({ 'wr_host': settings.PLAYBACK_HOST, 'wr_prefix': link.wr_iframe_prefix(wr_username), 'wr_url': capture.url, 'wr_timestamp': link.creation_timestamp.strftime('%Y%m%d%H%M%S'), }) logger.info(f"Rendering template for {link.guid}") response = render(request, 'archive/single-link.html', context) # Adjust status code if link.user_deleted: response.status_code = 410 elif not context['can_view'] and link.is_private: response.status_code = 403 # Add memento headers, when appropriate logger.info(f"Deciding whether to include memento headers for {link.guid}") if link.is_visible_to_memento(): logger.info(f"Including memento headers for {link.guid}") response['Memento-Datetime'] = datetime_to_http_date( link.creation_timestamp) # impose an arbitrary length-limit on the submitted URL, so that this header doesn't become illegally large url = link.submitted_url[:500] response['Link'] = str( LinkHeader([ Rel(url, rel='original'), Rel(timegate_url(request, url), rel='timegate'), Rel(timemap_url(request, url, 'link'), rel='timemap', type='application/link-format'), Rel(timemap_url(request, url, 'json'), rel='timemap', type='application/json'), Rel(timemap_url(request, url, 'html'), rel='timemap', type='text/html'), Rel(memento_url(request, link), rel='memento', datetime=datetime_to_http_date(link.creation_timestamp)), ])) logger.info(f"Returning response for {link.guid}") return response
def make_links(root="http://foobar.com/api/dts", **kwargs): header = LinkHeader( [Link(root + "/" + val, rel=key) for key, val in kwargs.items()]) return {"Link": str(header)}