def _check_accept_dt(self, wb_url, environ): """Returns T/F indicating if the supplied WbUrl instance is for a timegate request :param WbUrl wb_url: The URL to be checked :param dict environ: The wsgi environment object for the request :return: T/F indicating if the WbUrl is for timegate request :rtype: bool """ is_timegate = False if wb_url.is_latest_replay(): accept_dt = environ.get('HTTP_ACCEPT_DATETIME') is_timegate = True if accept_dt: try: wb_url.timestamp = http_date_to_timestamp(accept_dt) except Exception: raise UpstreamException(400, url=wb_url.url, details='Invalid Accept-Datetime') # return WbResponse.text_response('Invalid Accept-Datetime', status='400 Bad Request') wb_url.type = wb_url.REPLAY elif 'pywb_proxy_default_timestamp' in environ: wb_url.timestamp = environ['pywb_proxy_default_timestamp'] wb_url.type = wb_url.REPLAY return is_timegate
def _check_accept_dt(self, wb_url, environ): is_timegate = False if wb_url.is_latest_replay(): accept_dt = environ.get('HTTP_ACCEPT_DATETIME') is_timegate = True if accept_dt: try: wb_url.timestamp = http_date_to_timestamp(accept_dt) except: raise UpstreamException(400, url=wb_url.url, details='Invalid Accept-Datetime') #return WbResponse.text_response('Invalid Accept-Datetime', status='400 Bad Request') wb_url.type = wb_url.REPLAY return is_timegate
def links_to_cdxobject(self, link_header, def_name): results = MementoUtils.parse_links(link_header, def_name) original = results['original']['url'] key = canonicalize(original) mementos = results['mementos'] for val in mementos: dt = val['datetime'] ts = http_date_to_timestamp(dt) cdx = CDXObject() cdx['urlkey'] = key cdx['timestamp'] = ts cdx['url'] = original cdx['mem_rel'] = val.get('rel', '') cdx['memento_url'] = val['url'] load_url = self._get_replay_url(cdx['timestamp'], original) cdx['load_url'] = load_url yield cdx
def make_memento_link(cls, url, type, dt, coll=None, memento_format=None): """Creates a memento link string :param str url: A URL :param str type: The rel type :param str dt: The datetime of the URL :param str|None coll: Optional name of a collection :param str|None memento_format: Optional string used to format the supplied URL :return: A memento link string :rtype: str """ if memento_format: memento_format = memento_format.format( url=url, timestamp=http_date_to_timestamp(dt)) else: memento_format = url res = '<{0}>; rel="{1}"; datetime="{2}"'.format( memento_format, type, dt) if coll: res += '; collection="{0}"'.format(coll) return res
def process_record(self, record, flow): headers = flow.response.headers url = flow.request.req_url scheme = flow.request.req_scheme if not self.content_rewriter: return record.http_headers, StreamIO(record.raw_stream) cookie_rewriter = None template_params = flow.extra_data environ = { 'pywb_proxy_magic': self.proxy_magic, 'webrec.template_params': template_params } wb_url = WbUrl(url) wb_prefix = '' host_prefix = flow.request.req_scheme + '://' + self.proxy_magic urlrewriter = SchemeOnlyUrlRewriter(wb_url, '') if flow.request.headers.get('X-Requested-With', '').lower() == 'xmlhttprequest': urlrewriter.rewrite_opts['is_ajax'] = True head_insert_func = (self.head_insert_view.create_insert_func( wb_url, wb_prefix, host_prefix, url, environ, False)) urlkey = canonicalize(wb_url.url) cdx = CDXObject() cdx['urlkey'] = urlkey cdx['timestamp'] = http_date_to_timestamp( headers.get('Memento-Datetime')) cdx['url'] = wb_url.url if headers.get('Webagg-Source-Coll') == 'live': cdx['is_live'] = 'true' result = self.content_rewriter.rewrite_content( urlrewriter, record.http_headers, record.raw_stream, head_insert_func, urlkey, cdx, cookie_rewriter, environ) status_headers, gen, is_rw = result status_headers.remove_header('Content-Security-Policy') # check for content-length res = status_headers.get_header('content-length') try: if int(res) > 0: return status_headers, IterIdent(gen) except: pass # need to either chunk or buffer to get content-length if flow.request.http_version == 'HTTP/1.1': status_headers.remove_header('content-length') status_headers.headers.append(('Transfer-Encoding', 'chunked')) #gen = chunk_encode_iter(gen) else: gen = buffer_iter(status_headers, gen) return status_headers, IterIdent(gen)