Example #1
0
    def render_get(self, request):
        query = query_split(request)

        eps = self.common_rd.get_endpoints()
        if 'd' in query:
            eps = (e for e in eps if e.d == query['d'])
        if 'ep' in query:
            eps = (e for e in eps if e.ep == query['ep'])
        if 'gp' in query:
            pass  # FIXME
        if 'et' in query:
            eps = (e for e in eps if e.et == query['et'])

        candidates = itertools.chain(*(e.get_conned_links().links
                                       for e in eps))
        for other_query in query:
            if other_query in ('d', 'ep', 'gp', 'et', 'page', 'count'):
                continue
            candidates = (l for l in candidates
                          if getattr(l, other_query) == query[other_query])

        try:
            candidates = list(candidates)
            if 'page' in query:
                candidates = candidates[int(query['page']) *
                                        int(query['count'])]
            if 'count' in query:
                candidates = candidates[:int(query['count'])]
        except (KeyError, ValueError):
            raise BadRequest("page requires count, and both must be ints")

        return aiocoap.Message(payload=str(
            LinkHeader(candidates)).encode('utf8'),
                               content_format=40)
Example #2
0
        def render_get(self, request):
            query = query_split(request)

            candidates = self.common_rd.get_endpoints()
            if 'd' in query:
                candidates = (c for c in candidates if c.d == query['d'])
            if 'ep' in query:
                candidates = (c for c in candidates if c.ep == query['ep'])
            if 'gp' in query:
                pass  # FIXME
            if 'rt' in query:
                pass  # FIXME
            if 'et' in query:
                candidates = (c for c in candidates if c.et == query['et'])

            try:
                candidates = list(candidates)
                if 'page' in query:
                    candidates = candidates[int(query['page']) *
                                            int(query['count'])]
                if 'count' in query:
                    candidates = candidates[:int(query['count'])]
            except (KeyError, ValueError):
                raise BadRequest("page requires count, and both must be ints")

            result = [c.get_host_link() for c in candidates]

            return aiocoap.Message(payload=str(
                LinkHeader(result)).encode('utf8'),
                                   content_format=40)
Example #3
0
    def render_get(self, request):
        query = query_split(request)

        candidates = self.common_rd.get_endpoints()

        for search_key, search_value in query.items():
            if search_key in ('page', 'count'):
                continue # filtered last

            if search_value.endswith('*'):
                matches = lambda x, start=search_value[:-1]: x.startswith(start)
            else:
                matches = lambda x: x == search_value

            if search_key in ('if', 'rt'):
                candidates = (c for c in candidates if any(any(matches(x) for x in getattr(r, search_key, '').split()) for r in c.get_conned_links().links))
                continue

            if search_key == 'href':
                candidates = (c for c in candidates if
                        matches(c.href) or
                        any(matches(r.href) for r in c.get_conned_links().links)
                        )
                continue

            candidates = (c for c in candidates if
                    (search_key in c.registration_parameters and matches(c.registration_parameters[search_key])) or
                    any(_link_matches(r, search_key, matches) for r in c.get_conned_links().links)
                    )

        candidates = _paginate(candidates, query)

        result = [c.get_host_link() for c in candidates]

        return aiocoap.Message(payload=str(LinkHeader(result)).encode('utf8'), content_format=40)
Example #4
0
def timegate(request, url):
    # impose an arbitrary length-limit on the submitted URL, so that the headers don't become illegally large
    url = url_with_qs_and_hash(url, request.META['QUERY_STRING'])[:500]
    data = memento_data_for_url(request, url)
    if not data:
        return HttpResponseNotFound('404 page not found\n')

    accept_datetime = request.META.get('HTTP_ACCEPT_DATETIME')
    if accept_datetime:
        accept_datetime = parse_date(accept_datetime)
        if not accept_datetime:
            return HttpResponseBadRequest('Invalid value for Accept-Datetime.')
    else:
        accept_datetime = timezone.now()
    accept_datetime = accept_datetime.replace(tzinfo=tzutc())

    target, target_datetime = closest(map(lambda m: m.values(), data['mementos']['list']), accept_datetime)

    response = redirect(target)
    response['Vary'] = 'accept-datetime'
    response['Link'] = str(
        LinkHeader([
            Rel(data['original_uri'], rel='original'),
            Rel(data['timegate_uri'], rel='timegate'),
            Rel(data['timemap_uri']['link_format'], rel='timemap', type='application/link-format'),
            Rel(data['timemap_uri']['json_format'], rel='timemap', type='application/json'),
            Rel(data['timemap_uri']['html_format'], rel='timemap', type='text/html'),
            Rel(data['mementos']['first']['uri'], rel='first memento', datetime=datetime_to_http_date(data['mementos']['first']['datetime'])),
            Rel(data['mementos']['last']['uri'], rel='last memento', datetime=datetime_to_http_date(data['mementos']['last']['datetime'])),
            Rel(target, rel='memento', datetime=datetime_to_http_date(target_datetime)),
        ])
    )
    return response
Example #5
0
    def render_get(self, request):
        query = query_split(request)

        candidates = self.common_rd.get_endpoints()
        # FIXME which of the below can be done on the generated host links with
        # generic filtering rules, which would for example do =...* right?
        if 'href' in query:
            candidates = (c for c in candidates if c.href == query['href'])
        if 'd' in query:
            candidates = (c for c in candidates if c.d == query['d'])
        if 'ep' in query:
            candidates = (c for c in candidates if c.ep == query['ep'])
        if 'gp' in query:
            pass  # FIXME
        if 'rt' in query:
            pass  # FIXME
        if 'et' in query:
            candidates = (c for c in candidates if c.et == query['et'])

        try:
            candidates = list(candidates)
            if 'page' in query:
                candidates = candidates[int(query['page']) *
                                        int(query['count']):]
            if 'count' in query:
                candidates = candidates[:int(query['count'])]
        except (KeyError, ValueError):
            raise BadRequest("page requires count, and both must be ints")

        result = [c.get_host_link() for c in candidates]

        return aiocoap.Message(payload=str(LinkHeader(result)).encode('utf8'),
                               content_format=40)
Example #6
0
 def memento_datetime(response):
     """Add Memento-Datetime header."""
     response.headers['Memento-Datetime'] = http_date(
         memento.updated)
     response.headers['Vary'] = 'accept-datetime, accept'
     response.headers['Link'] = LinkHeader(
         [Link(request.url, rel='original timegate')])
     return response
Example #7
0
        def __init__(self, path, network_con, delete_cb, update_cb, registration_parameters):
            # note that this can not modify d and ep any more, since they were
            # already used in keying to a path
            self.path = path
            self.links = LinkHeader([])

            self._delete_cb = delete_cb
            self._update_cb = update_cb
            self.update_params(network_con, registration_parameters, is_initial=True)
Example #8
0
        def __init__(self, con, timeout_cb, ep, d=None, lt=None, et=None):
            self.links = LinkHeader([])
            self.ep = ep
            self.d = d
            self.lt = lt or 86400
            self.et = et
            self.con = con

            self.timeout_cb = timeout_cb
            self._set_timeout()
Example #9
0
 def get_all_links(self):
     result = []
     for l in self.links.links:
         href = self.con + l.href if l.href.startswith('/') else l.href
         data = [[
             k, self.con + v if
             (k == 'anchor' and v.startswith('/')) else v
         ] for (k, v) in l.attr_pairs]
         result.append(Link(href, data))
     return LinkHeader(result)
Example #10
0
        def __init__(self, path, con, delete_cb, ep, d=None, lt=None, et=None):
            self.path = path
            self.links = LinkHeader([])
            self.ep = ep
            self.d = d
            self.lt = lt or 86400
            self.et = et
            self.con = con

            self._delete_cb = delete_cb
            self._set_timeout()
Example #11
0
    def headers(self):
        data, original = self._data
        links = self.envelope_links(self.representation, data, original).headers
        headers = {
            'Link': str(LinkHeader([Link(v, rel=k) for k, v in links.items()]))
        }
        if self.size:
            headers.update({
                'X-Page': self.page,
                'X-PageSize': self.size,
                'X-Total': self.count,
            })

        return headers, links
Example #12
0
def generate_link_headers(metadata):
    """
    Generate Link: HTTP headers for API navigation.

    :param metadata: Dictionary with none, some or all of the
    keys 'next', 'prev' and 'last' defined. The values are the
    corresponding pre-generated links.
    """
    link_metadata = {k: v for k, v in metadata.iteritems() if k in LINK_HEADERS}

    links = []
    for k, v in link_metadata.iteritems():
        links.append(Link(v, rel=k))  # e.g. Link("http://example.com/foo", rel="next")

    return str(LinkHeader(links))  # RFC compliant headers e.g.
def assert_link_match(
    links: link_header.LinkHeader,
    rel: str,
    path: str,
    query: Dict[str, List[str]] = None,
):
    if query is None:
        query = {}
    matching_links = links.links_by_attr_pairs([("rel", rel)])
    assert len(matching_links) == 1
    link = matching_links[0]
    href = urlparse(link.href)
    assert href.path == path
    # noinspection PyDeepBugsBinOperator
    assert query.items() <= parse_qs(href.query).items()
Example #14
0
def feed_rss(id):
    feed = Feed.query.get(id)

    rss = feed.rss()

    f = open('rss.xml', 'w')
    f.write(rss.rss())
    f.close()

    headers = {}
    headers['Link'] = str(
        LinkHeader(
            [Link(feed.hub, rel="hub"),
             Link(feed.get_rss_url(), rel="self")]))

    return make_response(rss.rss(), 200, headers)
Example #15
0
        def get_conned_links(self):
            """Produce a LinkHeader object that represents all statements in
            the registration, resolved to the registration's con (and thus
            suitable for serving from the lookup interface).

            If protocol negotiation is implemented and con becomes a list, this
            function will probably grow parameters that hint at which con to
            use.
            """
            result = []
            for l in self.links.links:
                if 'anchor' in l:
                    data = [(k, v) for (k, v) in l.attr_pairs if k != 'anchor'] + [['anchor', urljoin(self.con, l.anchor)]]
                else:
                    data = l.attr_pairs + [['anchor', self.con]]
                result.append(Link(l.href, data))
            return LinkHeader(result)
Example #16
0
        def render_get(self, request):
            query = query_split(request)

            eps = self.common_rd.get_endpoints()
            candidates = sum(
                ([(e, l) for l in e.get_all_links().links] for e in eps), [])
            if 'd' in query:
                candidates = ([e, l] for [e, l] in candidates
                              if e.d == query['d'])
            if 'ep' in query:
                candidates = ([e, l] for [e, l] in candidates
                              if e.ep == query['ep'])
            if 'gp' in query:
                pass  # FIXME
            if 'rt' in query:
                candidates = ([e, l] for [e, l] in candidates
                              if query['rt'] in l.rt)
            if 'et' in query:
                candidates = ([e, l] for [e, l] in candidates
                              if e.et == query['et'])
            for other_query in query:
                if other_query in ('d', 'ep', 'gp', 'rt', 'et', 'page',
                                   'count'):
                    continue
                candidates = ([e, l] for [e, l] in candidates
                              if getattr(e, other_query) == query[other_query])

            try:
                candidates = list(candidates)
                if 'page' in query:
                    candidates = candidates[int(query['page']) *
                                            int(query['count'])]
                if 'count' in query:
                    candidates = candidates[:int(query['count'])]
            except (KeyError, ValueError):
                raise BadRequest("page requires count, and both must be ints")

            result = [l for (e, l) in candidates]

            return aiocoap.Message(payload=str(
                LinkHeader(result)).encode('utf8'),
                                   content_format=40)
Example #17
0
    async def render_get(self, request):
        query = query_split(request)

        eps = self.common_rd.get_endpoints()
        candidates = ((e, c) for e in eps for c in e.get_conned_links().links)

        for search_key, search_value in query.items():
            if search_key in ('page', 'count'):
                continue  # filtered last

            # FIXME: maybe we need query_split to turn ?rt=foo&obs into {'rt':
            # 'foo', 'obs': True} to match on obs, and then this needs more
            # type checking
            if search_value.endswith('*'):
                matches = lambda x, start=search_value[:-1]: x.startswith(start
                                                                          )
            else:
                matches = lambda x: x == search_value

            if search_key in ('if', 'rt'):
                candidates = ((e, c) for (e, c) in candidates if any(
                    matches(x) for x in getattr(c, search_key, '').split()))
                continue

            if search_key == 'href':
                candidates = ((e, c) for (e, c) in candidates
                              if matches(c.href) or matches(e.href))
                continue

            candidates = ((e, c) for (e, c) in candidates
                          if _link_matches(c, search_key, matches) or
                          (search_key in e.registration_parameters
                           and matches(e.registration_parameters[search_key])))

        # strip endpoint
        candidates = (c for (e, c) in candidates)

        candidates = _paginate(candidates, query)

        return aiocoap.Message(payload=str(
            LinkHeader(candidates)).encode('utf8'),
                               content_format=40)
Example #18
0
    def paginate(self, item_list: list):
        if not self.do_paginate:
            return item_list

        count = len(item_list)
        links = [
            Link(self.request.url, rel="self"),
            Link(self.first_url(), rel="first"),
            Link(self.last_url(count), rel="last"),
        ]

        next = self.next_url(count)
        if next is not None:
            links.append(Link(next, rel="next"))

        prev = self.prev_url()
        if prev is not None:
            links.append(Link(prev, rel="prev"))

        self.response.headers["Link"] = str(LinkHeader(links))

        return item_list[self.offset():self.offset() + self.per_page]
def assert_no_link(links: link_header.LinkHeader, rel: str):
    assert len(links.links_by_attr_pairs([("rel", rel)])) == 0
Example #20
0
def get_taxonomy_term(code=None,
                      slug=None,
                      prefer=None,
                      page=None,
                      size=None,
                      status_code=200,
                      q=None):
    try:
        taxonomy = current_flask_taxonomies.get_taxonomy(code)
        prefer = taxonomy.merge_select(prefer)

        current_flask_taxonomies.permissions.taxonomy_term_read.enforce(
            request=request, taxonomy=taxonomy, slug=slug)

        if INCLUDE_DELETED in prefer:
            status_cond = sqlalchemy.sql.true()
        else:
            status_cond = TaxonomyTerm.status == TermStatusEnum.alive

        return_descendants = INCLUDE_DESCENDANTS in prefer

        if return_descendants:
            query = current_flask_taxonomies.descendants_or_self(
                TermIdentification(taxonomy=code, slug=slug),
                levels=prefer.options.get('levels', None),
                status_cond=status_cond,
                return_descendants_count=INCLUDE_DESCENDANTS_COUNT in prefer,
                return_descendants_busy_count=INCLUDE_STATUS in prefer)
        else:
            query = current_flask_taxonomies.filter_term(
                TermIdentification(taxonomy=code, slug=slug),
                status_cond=status_cond,
                return_descendants_count=INCLUDE_DESCENDANTS_COUNT in prefer,
                return_descendants_busy_count=INCLUDE_STATUS in prefer)
        if q:
            query = current_flask_taxonomies.apply_term_query(query, q, code)
        paginator = Paginator(prefer,
                              query,
                              page if return_descendants else None,
                              size if return_descendants else None,
                              json_converter=lambda data: build_descendants(
                                  data, prefer, root_slug=None),
                              allow_empty=INCLUDE_SELF not in prefer,
                              single_result=INCLUDE_SELF in prefer,
                              has_query=q is not None)

        return paginator.jsonify(status_code=status_code)

    except NoResultFound:
        term = current_flask_taxonomies.filter_term(
            TermIdentification(taxonomy=code, slug=slug),
            status_cond=sqlalchemy.sql.true()).one_or_none()
        if not term:
            json_abort(
                404, {
                    "message": "%s was not found on the server" % request.url,
                    "reason": "does-not-exist"
                })
        elif term.obsoleted_by_id:
            obsoleted_by = term.obsoleted_by
            obsoleted_by_links = obsoleted_by.links()
            return Response(
                json.dumps({
                    'links': term.links(representation=prefer).envelope,
                    'status': 'moved'
                }),
                status=301,
                headers={
                    'Location':
                    obsoleted_by_links.headers['self'],
                    'Link':
                    str(
                        LinkHeader([
                            Link(v, rel=k) for k, v in term.links(
                                representation=prefer).envelope.items()
                        ]))
                },
                content_type='application/json')
        else:
            json_abort(
                410, {
                    "message": "%s was not found on the server" % request.url,
                    "reason": "deleted"
                })
    except:
        traceback.print_exc()
        raise
Example #21
0
def memento_response(memento, uri_r, first=None, last=None, has_timemap=False):
    """Return a 302 redirection to the best Memento for a resource.

    It includes necessary headers including datetime requested by the user.

    :param memento: (The URI string, dt obj) of the best memento.
    :param uri_r: The original resource's complete URI.
    :param first: (Optional) (URI string, dt obj) of the first memento.
    :param last: (Optional) (URI string, dt obj) of the last memento.
    :param has_timemap: Flag indicating that the handler accepts
        TimeMap requests too. Default True.
    :return: The ``Response`` object.
    """
    # Gather links containing original and if availible: TimeMap, first, last
    # TimeGate link not allowed here
    links = [Link(uri_r, rel='original')]
    if has_timemap:
        for response_type, mime in (
            ('link', 'application/link-format'),
            ('json', 'application/json'),
        ):
            links.append(
                Link(url_for('timemap',
                             dict(response_type=response_type, uri_r=uri_r),
                             force_external=True),
                     rel='timemap',
                     type=mime))

    (uri_m, dt_m) = memento
    (uri_last, dt_last) = (uri_first, dt_first) = (None, None)
    if last:
        (uri_last, dt_last) = last
    if first:
        (uri_first, dt_first) = first
    if first and last and uri_first == uri_last:
        # There's only one memento (first = best = last)
        assert (uri_last == uri_m)
        links.append(
            Link(uri_m, rel='first last memento', datetime=http_date(dt_m)))
    else:
        if first:
            links.append(
                Link(uri_m, rel='first memento', datetime=http_date(dt_first)))
        if (uri_first != uri_m and uri_last != uri_m):
            # The best memento is neither the first nor the last
            links.append(Link(uri_m, rel='memento', datetime=http_date(dt_m)))
        if last:
            links.append(
                Link(uri_m, rel='last memento', datetime=http_date(dt_last)))

    # Builds the response headers
    headers = [
        ('Date', http_date(datetime.utcnow())),
        ('Vary', 'accept-datetime'),
        ('Content-Length', '0'),
        ('Content-Type', 'text/plain; charset=UTF-8'),
        ('Connection', 'close'),
        ('Location', uri_m),
        ('Link', str(LinkHeader(links))),
    ]
    return Response(None, headers=headers, status=302)
Example #22
0
def single_permalink(request, guid):
    """
    Given a Perma ID, serve it up.
    """
    raw_user_agent = request.META.get('HTTP_USER_AGENT', '')

    # Create a canonical version of guid (non-alphanumerics removed, hyphens every 4 characters, uppercase),
    # and forward to that if it's different from current guid.
    canonical_guid = Link.get_canonical_guid(guid)

    # We only do the redirect if the correctly-formatted GUID actually exists --
    # this prevents actual 404s from redirecting with weird formatting.
    link = get_object_or_404(Link.objects.all_with_deleted(),
                             guid=canonical_guid)

    if canonical_guid != guid:
        return HttpResponsePermanentRedirect(
            reverse('single_permalink', args=[canonical_guid]))

    # Forward to replacement link if replacement_link is set.
    if link.replacement_link_id:
        return HttpResponseRedirect(
            reverse('single_permalink', args=[link.replacement_link_id]))

    # If we get an unrecognized archive type (which could be an old type like 'live' or 'pdf'), forward to default version
    serve_type = request.GET.get('type')
    if serve_type is None:
        serve_type = 'source'
    elif serve_type not in valid_serve_types:
        return HttpResponsePermanentRedirect(
            reverse('single_permalink', args=[canonical_guid]))

    # serve raw WARC
    if serve_type == 'warc_download':
        return stream_warc_if_permissible(link, request.user)

    # handle requested capture type
    if serve_type == 'image':
        capture = link.screenshot_capture

        # not all Perma Links have screenshots; if no screenshot is present,
        # forward to primary capture for playback or for appropriate error message
        if (not capture
                or capture.status != 'success') and link.primary_capture:
            return HttpResponseRedirect(
                reverse('single_permalink', args=[guid]))
    else:
        capture = link.primary_capture

        # if primary capture did not work, but screenshot did work, forward to screenshot
        if (
                not capture or capture.status != 'success'
        ) and link.screenshot_capture and link.screenshot_capture.status == 'success':
            return HttpResponseRedirect(
                reverse('single_permalink', args=[guid]) + "?type=image")

    try:
        capture_mime_type = capture.mime_type()
    except AttributeError:
        # If capture is deleted, then mime type does not exist. Catch error.
        capture_mime_type = None

    # Special handling for mobile pdf viewing because it can be buggy
    # Redirecting to a download page if on mobile
    redirect_to_download_view = redirect_to_download(capture_mime_type,
                                                     raw_user_agent)

    # If this record was just created by the current user, show them a new record message
    new_record = request.user.is_authenticated and link.created_by_id == request.user.id and not link.user_deleted \
                 and link.creation_timestamp > timezone.now() - timedelta(seconds=300)

    # Provide the max upload size, in case the upload form is used
    max_size = settings.MAX_ARCHIVE_FILE_SIZE / 1024 / 1024

    if not link.submitted_description:
        link.submitted_description = "This is an archive of %s from %s" % (
            link.submitted_url,
            link.creation_timestamp.strftime("%A %d, %B %Y"))

    logger.info(f"Preparing context for {link.guid}")
    context = {
        'link': link,
        'redirect_to_download_view': redirect_to_download_view,
        'mime_type': capture_mime_type,
        'can_view': request.user.can_view(link),
        'can_edit': request.user.can_edit(link),
        'can_delete': request.user.can_delete(link),
        'can_toggle_private': request.user.can_toggle_private(link),
        'capture': capture,
        'serve_type': serve_type,
        'new_record': new_record,
        'this_page': 'single_link',
        'max_size': max_size,
        'link_url': settings.HOST + '/' + link.guid,
        'protocol': protocol(),
    }

    if context['can_view'] and link.can_play_back():
        try:
            logger.info(f"Initializing play back of {link.guid}")
            wr_username = link.init_replay_for_user(request)
        except Exception:  # noqa
            # We are experiencing many varieties of transient flakiness in playback:
            # second attempts, triggered by refreshing the page, almost always seem to work.
            # While we debug... let's give playback a second try here, and see if this
            # noticeably improves user experience.
            logger.exception(
                f"First attempt to init replay of {link.guid} failed. (Retrying: observe whether this error recurs.)"
            )
            time.sleep(settings.WR_PLAYBACK_RETRY_AFTER)
            logger.info(f"Initializing play back of {link.guid} (2nd try)")
            wr_username = link.init_replay_for_user(request)

        logger.info(
            f"Updating context with WR playback information for {link.guid}")
        context.update({
            'wr_host':
            settings.PLAYBACK_HOST,
            'wr_prefix':
            link.wr_iframe_prefix(wr_username),
            'wr_url':
            capture.url,
            'wr_timestamp':
            link.creation_timestamp.strftime('%Y%m%d%H%M%S'),
        })

    logger.info(f"Rendering template for {link.guid}")
    response = render(request, 'archive/single-link.html', context)

    # Adjust status code
    if link.user_deleted:
        response.status_code = 410
    elif not context['can_view'] and link.is_private:
        response.status_code = 403

    # Add memento headers, when appropriate
    logger.info(f"Deciding whether to include memento headers for {link.guid}")
    if link.is_visible_to_memento():
        logger.info(f"Including memento headers for {link.guid}")
        response['Memento-Datetime'] = datetime_to_http_date(
            link.creation_timestamp)
        # impose an arbitrary length-limit on the submitted URL, so that this header doesn't become illegally large
        url = link.submitted_url[:500]
        response['Link'] = str(
            LinkHeader([
                Rel(url, rel='original'),
                Rel(timegate_url(request, url), rel='timegate'),
                Rel(timemap_url(request, url, 'link'),
                    rel='timemap',
                    type='application/link-format'),
                Rel(timemap_url(request, url, 'json'),
                    rel='timemap',
                    type='application/json'),
                Rel(timemap_url(request, url, 'html'),
                    rel='timemap',
                    type='text/html'),
                Rel(memento_url(request, link),
                    rel='memento',
                    datetime=datetime_to_http_date(link.creation_timestamp)),
            ]))
    logger.info(f"Returning response for {link.guid}")
    return response
Example #23
0
def make_links(root="http://foobar.com/api/dts", **kwargs):
    header = LinkHeader(
        [Link(root + "/" + val, rel=key) for key, val in kwargs.items()])
    return {"Link": str(header)}