Example #1
0
    def _add_memento_links(self, url, full_prefix, memento_dt, memento_ts,
                           status_headers, is_timegate, is_proxy, coll=None):

        # memento url + header
        if not memento_dt and memento_ts:
            memento_dt = timestamp_to_http_date(memento_ts)

        if memento_dt:
            status_headers.headers.append(('Memento-Datetime', memento_dt))

            if is_proxy:
                memento_url = url
            else:
                memento_url = full_prefix + memento_ts + self.replay_mod
                memento_url += '/' + url
        else:
            memento_url = None

        timegate_url, timemap_url = self._get_timegate_timemap(url, full_prefix)

        link = []
        if not is_proxy:
            link.append(MementoUtils.make_link(url, 'original'))
            link.append(MementoUtils.make_link(timegate_url, 'timegate'))
            link.append(MementoUtils.make_link(timemap_url, 'timemap'))

        if memento_dt:
            link.append(MementoUtils.make_memento_link(memento_url, 'memento', memento_dt, coll))

        link_str = ', '.join(link)

        status_headers.headers.append(('Link', link_str))

        if is_timegate:
            status_headers.headers.append(('Vary', 'accept-datetime'))
Example #2
0
    def make_timemap_memento_link(cls,
                                  cdx,
                                  datetime=None,
                                  rel='memento',
                                  end=',\n',
                                  memento_format=None):
        """Creates a memento link string for a timemap

        :param dict cdx: The cdx object
        :param str|None datetime: The datetime
        :param str rel: The rel type
        :param str end: Optional string appended to the end of the created link string
        :param str|None memento_format: Optional string used to format the URL
        :return: A memento link string
        :rtype: str
        """
        url = cdx.get('url')
        if not url:
            url = 'file://{0}:{1}:{2}'.format(cdx.get('filename'),
                                              cdx.get('offset'),
                                              cdx.get('length'))

        if not datetime:
            datetime = timestamp_to_http_date(cdx['timestamp'])

        return cls.make_memento_link(
            url, rel, datetime, cdx.get('source-coll'), memento_format) + end
Example #3
0
    def _assert_memento(self, resp, url, ts, fmod, dt=''):
        dt = dt or timestamp_to_http_date(ts)

        links = self.get_links(resp)

        assert MEMENTO_DATETIME in resp.headers
        assert resp.headers[MEMENTO_DATETIME] == dt

        # memento link
        memento_link = self.make_memento_link(url, ts, dt, fmod)
        assert memento_link in links

        # content location
        assert '/pywb/{1}{0}/{2}'.format(
            fmod, ts, url) in resp.headers['Content-Location']

        # content location part of memento link
        assert resp.headers['Content-Location'] in memento_link

        # timegate link
        assert self.make_timegate_link(url, '') in links

        # timemap link
        assert self.make_timemap_link(url) in links

        # original
        assert self.make_original_link(url) in links
Example #4
0
    def _assert_memento(self, resp, url, ts, fmod, dt=''):
        dt = dt or timestamp_to_http_date(ts)

        links = self.get_links(resp)

        assert MEMENTO_DATETIME in resp.headers
        assert resp.headers[MEMENTO_DATETIME] == dt

        # memento link
        memento_link = self.make_memento_link(url, ts, dt, fmod)
        assert memento_link in links

        # content location
        assert '/pywb/{1}{0}/{2}'.format(fmod, ts, url) in resp.headers['Content-Location']

        # content location part of memento link
        assert resp.headers['Content-Location'] in memento_link

        # timegate link
        assert self.make_timegate_link(url, fmod) in links

        # timemap link
        assert self.make_timemap_link(url) in links

        # original
        assert self.make_original_link(url) in links
Example #5
0
    def make_timemap_memento_link(cls, cdx, datetime=None, rel='memento', end=',\n'):
        url = cdx.get('url')
        if not url:
            url = 'file://{0}:{1}:{2}'.format(cdx.get('filename'), cdx.get('offset'), cdx.get('length'))

        if not datetime:
            datetime = timestamp_to_http_date(cdx['timestamp'])

        return cls.make_memento_link(url, rel, datetime, cdx.get('source-coll')) + end
Example #6
0
    def make_timemap_memento_link(cls,
                                  cdx,
                                  datetime=None,
                                  rel='memento',
                                  end=',\n'):
        url = cdx.get('url')
        if not url:
            url = 'file://{0}:{1}:{2}'.format(cdx.get('filename'),
                                              cdx.get('offset'),
                                              cdx.get('length'))

        if not datetime:
            datetime = timestamp_to_http_date(cdx['timestamp'])

        return cls.make_memento_link(url, rel, datetime,
                                     cdx.get('source-coll')) + end
Example #7
0
    def get_timegate_links(self, params, timestamp):
        url = res_template(self.timegate_url, params)
        accept_dt = timestamp_to_http_date(timestamp)
        try:
            headers = self._get_headers(params)
            headers['Accept-Datetime'] = accept_dt
            res = self.sesh.head(url, headers=headers)
            res.raise_for_status()
        except Exception as e:
            self.logger.debug('FAILED: ' + str(e))
            raise NotFoundException(url)

        links = res.headers.get('Link')

        if not links:
            raise NotFoundException(url)

        return links
Example #8
0
    def _add_memento_links(self,
                           url,
                           full_prefix,
                           memento_dt,
                           memento_ts,
                           status_headers,
                           is_timegate,
                           is_proxy,
                           coll=None,
                           pref_applied=None,
                           mod=None,
                           is_memento=True):
        """Adds the memento link headers to supplied StatusAndHeaders instance

        :param str url: The URI-R being rewritten
        :param str full_prefix: The replay prefix
        :param str|None memento_dt: The memento datetime for the URI-R being rewritten
        :param str memento_ts: The memento timestamp
        :param warcio.StatusAndHeaders status_headers:
        :param bool is_timegate: Are we returning a response for a timegate
        :param bool is_proxy: Are we operating in proxy mode
        :param str|None coll: The collection the URI-R is from
        :param str|None pref_applied:
        :param str|None mod: The rewrite modifier
        :param bool is_memento:
        :rtype: None
        """

        replay_mod = mod or self.replay_mod

        # memento url + header
        if not memento_dt and memento_ts:
            memento_dt = timestamp_to_http_date(memento_ts)

        if memento_dt:
            if is_memento:
                status_headers.headers.append(('Memento-Datetime', memento_dt))

            if is_proxy:
                memento_url = url
            else:
                memento_url = full_prefix + memento_ts + replay_mod
                memento_url += '/' + url
        else:
            memento_url = None

        timegate_url, timemap_url = self._get_timegate_timemap(
            url, full_prefix, mod)

        link = []
        if not is_proxy:
            link.append(MementoUtils.make_link(url, 'original'))
            link.append(MementoUtils.make_link(timegate_url, 'timegate'))
            link.append(MementoUtils.make_link(timemap_url, 'timemap'))

        if memento_dt:
            link.append(
                MementoUtils.make_memento_link(memento_url, 'memento',
                                               memento_dt, coll))

        link_str = ', '.join(link)

        status_headers.headers.append(('Link', link_str))

        vary = ''
        if is_timegate:
            vary = 'accept-datetime'

        if pref_applied:
            vary = 'Prefer' if not vary else vary + ', Prefer'
            status_headers.headers.append(('Preference-Applied', pref_applied))

        if vary:
            status_headers.headers.append(('Vary', vary))