def _add_cache_headers(self, new_headers, http_cache): try: age = int(http_cache) except: age = 0 if age <= 0: new_headers.append(('Cache-Control', 'no-cache; no-store')) else: dt = datetime.utcnow() dt = dt + timedelta(seconds=age) new_headers.append(('Cache-Control', 'max-age=' + str(age))) new_headers.append(('Expires', datetime_to_http_date(dt)))
def __call__(self, cdx, params): entry = self.load_resource(cdx, params) if not entry: return None, None warc_headers, other_headers, stream = entry out_headers = {} out_headers['WebAgg-Type'] = 'warc' out_headers['WebAgg-Source-Coll'] = quote(cdx.get('source', ''), safe=':/') out_headers['Content-Type'] = 'application/warc-record' if not warc_headers: if other_headers: out_headers['Link'] = other_headers.get('Link') out_headers['Memento-Datetime'] = other_headers.get('Memento-Datetime') out_headers['Content-Length'] = other_headers.get('Content-Length') return out_headers, StreamIter(stream) out_headers['Link'] = MementoUtils.make_link( warc_headers.get_header('WARC-Target-URI'), 'original') memento_dt = iso_date_to_datetime(warc_headers.get_header('WARC-Date')) out_headers['Memento-Datetime'] = datetime_to_http_date(memento_dt) warc_headers_buff = warc_headers.to_bytes() lenset = self._set_content_len(warc_headers.get_header('Content-Length'), out_headers, len(warc_headers_buff)) streamiter = StreamIter(stream, header1=warc_headers_buff, header2=other_headers) if not lenset: out_headers['Transfer-Encoding'] = 'chunked' streamiter = chunk_encode_iter(streamiter) return out_headers, streamiter
def _make_cache_headers(): cache_headers = [('Content-Length', '123'), ('Cache-Control', 'max-age=10'), ('Expires', datetime_to_http_date(datetime.now())), ('ETag', '123456')] return cache_headers
def load_resource(self, cdx, params): load_url = cdx.get('load_url') if not load_url: return None if params.get('content_type') == VideoLoader.CONTENT_TYPE: return None input_req = params['_input_req'] req_headers = input_req.get_req_headers() dt = timestamp_to_datetime(cdx['timestamp']) if cdx.get('memento_url'): req_headers['Accept-Datetime'] = datetime_to_http_date(dt) method = input_req.get_req_method() data = input_req.get_req_body() p = PreparedRequest() p.prepare_url(load_url, None) p.prepare_headers(None) p.prepare_auth(None, load_url) auth = p.headers.get('Authorization') if auth: req_headers['Authorization'] = auth load_url = p.url try: upstream_res = self.pool.urlopen(method=method, url=load_url, body=data, headers=req_headers, redirect=False, assert_same_host=False, preload_content=False, decode_content=False, retries=self.num_retries, timeout=params.get('_timeout')) except Exception as e: raise LiveResourceException(load_url) memento_dt = upstream_res.headers.get('Memento-Datetime') if memento_dt: dt = http_date_to_datetime(memento_dt) cdx['timestamp'] = datetime_to_timestamp(dt) elif cdx.get('memento_url'): # if 'memento_url' set and no Memento-Datetime header present # then its an error return None agg_type = upstream_res.headers.get('WebAgg-Type') if agg_type == 'warc': cdx['source'] = unquote(upstream_res.headers.get('WebAgg-Source-Coll')) return None, upstream_res.headers, upstream_res self.raise_on_self_redirect(params, cdx, str(upstream_res.status), upstream_res.headers.get('Location')) if upstream_res.version == 11: version = '1.1' else: version = '1.0' status = 'HTTP/{version} {status} {reason}\r\n' status = status.format(version=version, status=upstream_res.status, reason=upstream_res.reason) http_headers_buff = status orig_resp = upstream_res._original_response try: #pragma: no cover #PY 3 resp_headers = orig_resp.headers._headers for n, v in resp_headers: if n.lower() in self.SKIP_HEADERS: continue http_headers_buff += n + ': ' + v + '\r\n' except: #pragma: no cover #PY 2 resp_headers = orig_resp.msg.headers for n, v in zip(orig_resp.getheaders(), resp_headers): if n in self.SKIP_HEADERS: continue http_headers_buff += v http_headers_buff += '\r\n' http_headers_buff = http_headers_buff.encode('latin-1') try: fp = upstream_res._fp.fp if hasattr(fp, 'raw'): #pragma: no cover fp = fp.raw remote_ip = fp._sock.getpeername()[0] except: #pragma: no cover remote_ip = None warc_headers = {} warc_headers['WARC-Type'] = 'response' warc_headers['WARC-Record-ID'] = self._make_warc_id() warc_headers['WARC-Target-URI'] = cdx['url'] warc_headers['WARC-Date'] = datetime_to_iso_date(dt) if remote_ip: warc_headers['WARC-IP-Address'] = remote_ip warc_headers['Content-Type'] = 'application/http; msgtype=response' self._set_content_len(upstream_res.headers.get('Content-Length', -1), warc_headers, len(http_headers_buff)) warc_headers = StatusAndHeaders('WARC/1.0', warc_headers.items()) return (warc_headers, http_headers_buff, upstream_res)