Esempio n. 1
0
    def _add_cache_headers(self, new_headers, http_cache):
        try:
            age = int(http_cache)
        except:
            age = 0

        if age <= 0:
            new_headers.append(('Cache-Control', 'no-cache; no-store'))
        else:
            dt = datetime.utcnow()
            dt = dt + timedelta(seconds=age)
            new_headers.append(('Cache-Control', 'max-age=' + str(age)))
            new_headers.append(('Expires', datetime_to_http_date(dt)))
Esempio n. 2
0
    def _add_cache_headers(self, new_headers, http_cache):
        try:
            age = int(http_cache)
        except:
            age = 0

        if age <= 0:
            new_headers.append(('Cache-Control', 'no-cache; no-store'))
        else:
            dt = datetime.utcnow()
            dt = dt + timedelta(seconds=age)
            new_headers.append(('Cache-Control', 'max-age=' + str(age)))
            new_headers.append(('Expires', datetime_to_http_date(dt)))
Esempio n. 3
0
    def __call__(self, cdx, params):
        entry = self.load_resource(cdx, params)
        if not entry:
            return None, None

        warc_headers, other_headers, stream = entry

        out_headers = {}
        out_headers['WebAgg-Type'] = 'warc'
        out_headers['WebAgg-Source-Coll'] = quote(cdx.get('source', ''), safe=':/')
        out_headers['Content-Type'] = 'application/warc-record'

        if not warc_headers:
            if other_headers:
                out_headers['Link'] = other_headers.get('Link')
                out_headers['Memento-Datetime'] = other_headers.get('Memento-Datetime')
                out_headers['Content-Length'] = other_headers.get('Content-Length')

            return out_headers, StreamIter(stream)

        out_headers['Link'] = MementoUtils.make_link(
                                warc_headers.get_header('WARC-Target-URI'),
                                'original')

        memento_dt = iso_date_to_datetime(warc_headers.get_header('WARC-Date'))
        out_headers['Memento-Datetime'] = datetime_to_http_date(memento_dt)

        warc_headers_buff = warc_headers.to_bytes()

        lenset = self._set_content_len(warc_headers.get_header('Content-Length'),
                                     out_headers,
                                     len(warc_headers_buff))

        streamiter = StreamIter(stream,
                                header1=warc_headers_buff,
                                header2=other_headers)

        if not lenset:
            out_headers['Transfer-Encoding'] = 'chunked'
            streamiter = chunk_encode_iter(streamiter)

        return out_headers, streamiter
Esempio n. 4
0
def _make_cache_headers():
    cache_headers = [('Content-Length', '123'),
                     ('Cache-Control', 'max-age=10'),
                     ('Expires', datetime_to_http_date(datetime.now())),
                     ('ETag', '123456')]
    return cache_headers
Esempio n. 5
0
    def load_resource(self, cdx, params):
        load_url = cdx.get('load_url')
        if not load_url:
            return None

        if params.get('content_type') == VideoLoader.CONTENT_TYPE:
            return None

        input_req = params['_input_req']

        req_headers = input_req.get_req_headers()

        dt = timestamp_to_datetime(cdx['timestamp'])

        if cdx.get('memento_url'):
            req_headers['Accept-Datetime'] = datetime_to_http_date(dt)

        method = input_req.get_req_method()
        data = input_req.get_req_body()

        p = PreparedRequest()
        p.prepare_url(load_url, None)
        p.prepare_headers(None)
        p.prepare_auth(None, load_url)

        auth = p.headers.get('Authorization')
        if auth:
            req_headers['Authorization'] = auth

        load_url = p.url

        try:
            upstream_res = self.pool.urlopen(method=method,
                                             url=load_url,
                                             body=data,
                                             headers=req_headers,
                                             redirect=False,
                                             assert_same_host=False,
                                             preload_content=False,
                                             decode_content=False,
                                             retries=self.num_retries,
                                             timeout=params.get('_timeout'))

        except Exception as e:
            raise LiveResourceException(load_url)

        memento_dt = upstream_res.headers.get('Memento-Datetime')
        if memento_dt:
            dt = http_date_to_datetime(memento_dt)
            cdx['timestamp'] = datetime_to_timestamp(dt)
        elif cdx.get('memento_url'):
        # if 'memento_url' set and no Memento-Datetime header present
        # then its an error
            return None

        agg_type = upstream_res.headers.get('WebAgg-Type')
        if agg_type == 'warc':
            cdx['source'] = unquote(upstream_res.headers.get('WebAgg-Source-Coll'))
            return None, upstream_res.headers, upstream_res

        self.raise_on_self_redirect(params, cdx,
                                    str(upstream_res.status),
                                    upstream_res.headers.get('Location'))


        if upstream_res.version == 11:
            version = '1.1'
        else:
            version = '1.0'

        status = 'HTTP/{version} {status} {reason}\r\n'
        status = status.format(version=version,
                               status=upstream_res.status,
                               reason=upstream_res.reason)

        http_headers_buff = status

        orig_resp = upstream_res._original_response

        try:  #pragma: no cover
        #PY 3
            resp_headers = orig_resp.headers._headers
            for n, v in resp_headers:
                if n.lower() in self.SKIP_HEADERS:
                    continue

                http_headers_buff += n + ': ' + v + '\r\n'
        except:  #pragma: no cover
        #PY 2
            resp_headers = orig_resp.msg.headers
            for n, v in zip(orig_resp.getheaders(), resp_headers):
                if n in self.SKIP_HEADERS:
                    continue

                http_headers_buff += v

        http_headers_buff += '\r\n'
        http_headers_buff = http_headers_buff.encode('latin-1')

        try:
            fp = upstream_res._fp.fp
            if hasattr(fp, 'raw'):  #pragma: no cover
                fp = fp.raw
            remote_ip = fp._sock.getpeername()[0]
        except:  #pragma: no cover
            remote_ip = None

        warc_headers = {}

        warc_headers['WARC-Type'] = 'response'
        warc_headers['WARC-Record-ID'] = self._make_warc_id()
        warc_headers['WARC-Target-URI'] = cdx['url']
        warc_headers['WARC-Date'] = datetime_to_iso_date(dt)
        if remote_ip:
            warc_headers['WARC-IP-Address'] = remote_ip

        warc_headers['Content-Type'] = 'application/http; msgtype=response'

        self._set_content_len(upstream_res.headers.get('Content-Length', -1),
                              warc_headers,
                              len(http_headers_buff))

        warc_headers = StatusAndHeaders('WARC/1.0', warc_headers.items())
        return (warc_headers, http_headers_buff, upstream_res)
Esempio n. 6
0
def _make_cache_headers():
    cache_headers = [('Content-Length', '123'),
                     ('Cache-Control', 'max-age=10'),
                     ('Expires', datetime_to_http_date(datetime.now())),
                     ('ETag', '123456')]
    return cache_headers