def __call__(self, env): is_https = (env['REQUEST_METHOD'] == 'CONNECT') ArchivalRouter.ensure_rel_uri_set(env) # for non-https requests, check non-proxy urls if not is_https: url = env['REL_REQUEST_URI'] if not url.startswith(('http://', 'https://')): return None env['pywb.proxy_scheme'] = 'http' route = None coll = None matcher = None response = None ts = None # check resolver, for pre connect resolve if self.resolver.pre_connect: route, coll, matcher, ts, response = self.resolver.resolve(env) if response: return response # do connect, then get updated url if is_https: response = self.handle_connect(env) if response: return response url = env['REL_REQUEST_URI'] else: parts = urlsplit(env['REL_REQUEST_URI']) hostport = parts.netloc.split(':', 1) env['pywb.proxy_host'] = hostport[0] env['pywb.proxy_port'] = hostport[1] if len(hostport) == 2 else '' env['pywb.proxy_req_uri'] = parts.path if parts.query: env['pywb.proxy_req_uri'] += '?' + parts.query env['pywb.proxy_query'] = parts.query if self.resolver.supports_switching: env['pywb_proxy_magic'] = self.magic_name # route (static) and other resources to archival replay if env['pywb.proxy_host'] == self.magic_name: env['REL_REQUEST_URI'] = env['pywb.proxy_req_uri'] # special case for proxy install response = self.handle_cert_install(env) if response: return response return None # check resolver, post connect if not self.resolver.pre_connect: route, coll, matcher, ts, response = self.resolver.resolve(env) if response: return response rel_prefix = '' custom_prefix = env.get('HTTP_PYWB_REWRITE_PREFIX', '') if custom_prefix: host_prefix = custom_prefix urlrewriter_class = UrlRewriter abs_prefix = True # always rewrite to absolute here rewrite_opts = dict(no_match_rel=True) else: host_prefix = env['pywb.proxy_scheme'] + '://' + self.magic_name urlrewriter_class = SchemeOnlyUrlRewriter abs_prefix = False rewrite_opts = {} # special case for proxy calendar if (env['pywb.proxy_host'] == 'query.' + self.magic_name): url = env['pywb.proxy_req_uri'][1:] rel_prefix = '/' if ts is not None: url = ts + '/' + url wbrequest = route.request_class(env, request_uri=url, wb_url_str=url, coll=coll, host_prefix=host_prefix, rel_prefix=rel_prefix, wburl_class=route.handler.get_wburl_type(), urlrewriter_class=urlrewriter_class, use_abs_prefix=abs_prefix, rewrite_opts=rewrite_opts, is_proxy=True) if matcher: route.apply_filters(wbrequest, matcher) # full rewrite and banner if self.use_wombat and self.use_banner: wbrequest.wb_url.mod = '' elif self.use_banner: # banner only, no rewrite wbrequest.wb_url.mod = 'bn_' else: # unaltered, no rewrite or banner wbrequest.wb_url.mod = 'uo_' response = route.handler(wbrequest) if not response: return None # add extra headers for replay responses if wbrequest.wb_url and wbrequest.wb_url.is_replay(): response.status_headers.replace_headers(self.extra_headers) # check for content-length res = response.status_headers.get_header('content-length') try: if int(res) > 0: return response except: pass # need to either chunk or buffer to get content-length if env.get('SERVER_PROTOCOL') == 'HTTP/1.1': response.status_headers.remove_header('content-length') response.status_headers.headers.append(('Transfer-Encoding', 'chunked')) response.body = self._chunk_encode(response.body) else: response.body = self._buffer_response(response.status_headers, response.body) return response
def __call__(self, env): is_https = (env['REQUEST_METHOD'] == 'CONNECT') ArchivalRouter.ensure_rel_uri_set(env) # for non-https requests, check non-proxy urls if not is_https: url = env['REL_REQUEST_URI'] if not url.startswith(('http://', 'https://')): return None env['pywb.proxy_scheme'] = 'http' route = None coll = None matcher = None response = None ts = None # check resolver, for pre connect resolve if self.resolver.pre_connect: route, coll, matcher, ts, response = self.resolver.resolve(env) if response: return response # do connect, then get updated url if is_https: response = self.handle_connect(env) if response: return response url = env['REL_REQUEST_URI'] else: parts = urlparse.urlsplit(env['REL_REQUEST_URI']) hostport = parts.netloc.split(':', 1) env['pywb.proxy_host'] = hostport[0] env['pywb.proxy_port'] = hostport[1] if len(hostport) == 2 else '' env['pywb.proxy_req_uri'] = parts.path if parts.query: env['pywb.proxy_req_uri'] += '?' + parts.query env['pywb.proxy_query'] = parts.query env['pywb_proxy_magic'] = self.magic_name # route (static) and other resources to archival replay if env['pywb.proxy_host'] == self.magic_name: env['REL_REQUEST_URI'] = env['pywb.proxy_req_uri'] # special case for proxy install response = self.handle_cert_install(env) if response: return response return None # check resolver, post connect if not self.resolver.pre_connect: route, coll, matcher, ts, response = self.resolver.resolve(env) if response: return response host_prefix = env['pywb.proxy_scheme'] + '://' + self.magic_name rel_prefix = '' # special case for proxy calendar if (env['pywb.proxy_host'] == 'query.' + self.magic_name): url = env['pywb.proxy_req_uri'][1:] rel_prefix = '/' if ts is not None: url = ts + '/' + url wbrequest = route.request_class(env, request_uri=url, wb_url_str=url, coll=coll, host_prefix=host_prefix, rel_prefix=rel_prefix, wburl_class=route.handler.get_wburl_type(), urlrewriter_class=HttpsUrlRewriter, use_abs_prefix=False, is_proxy=True) if matcher: route.apply_filters(wbrequest, matcher) # full rewrite and banner if self.use_wombat and self.use_banner: wbrequest.wb_url.mod = '' elif self.use_banner: # banner only, no rewrite wbrequest.wb_url.mod = 'bn_' else: # unaltered, no rewrite or banner wbrequest.wb_url.mod = 'id_' response = route.handler(wbrequest) if wbrequest.wb_url and wbrequest.wb_url.is_replay(): response.status_headers.replace_headers(self.extra_headers) return response
def __call__(self, env): is_https = (env['REQUEST_METHOD'] == 'CONNECT') ArchivalRouter.ensure_rel_uri_set(env) # for non-https requests, check non-proxy urls if not is_https: url = env['REL_REQUEST_URI'] if not url.startswith(('http://', 'https://')): return None env['pywb.proxy_scheme'] = 'http' route = None coll = None matcher = None response = None ts = None # check resolver, for pre connect resolve if self.resolver.pre_connect: route, coll, matcher, ts, response = self.resolver.resolve(env) if response: return response # do connect, then get updated url if is_https: response = self.handle_connect(env) if response: return response url = env['REL_REQUEST_URI'] else: parts = urlsplit(env['REL_REQUEST_URI']) hostport = parts.netloc.split(':', 1) env['pywb.proxy_host'] = hostport[0] env['pywb.proxy_port'] = hostport[1] if len(hostport) == 2 else '' env['pywb.proxy_req_uri'] = parts.path if parts.query: env['pywb.proxy_req_uri'] += '?' + parts.query env['pywb.proxy_query'] = parts.query if self.resolver.supports_switching: env['pywb_proxy_magic'] = self.magic_name # route (static) and other resources to archival replay if env['pywb.proxy_host'] == self.magic_name: env['REL_REQUEST_URI'] = env['pywb.proxy_req_uri'] # special case for proxy install response = self.handle_cert_install(env) if response: return response return None # check resolver, post connect if not self.resolver.pre_connect: route, coll, matcher, ts, response = self.resolver.resolve(env) if response: return response rel_prefix = '' custom_prefix = env.get('HTTP_PYWB_REWRITE_PREFIX', '') if custom_prefix: host_prefix = custom_prefix urlrewriter_class = UrlRewriter abs_prefix = True # always rewrite to absolute here rewrite_opts = dict(no_match_rel=True) else: host_prefix = env['pywb.proxy_scheme'] + '://' + self.magic_name urlrewriter_class = SchemeOnlyUrlRewriter abs_prefix = False rewrite_opts = {} # special case for proxy calendar if (env['pywb.proxy_host'] == 'query.' + self.magic_name): url = env['pywb.proxy_req_uri'][1:] rel_prefix = '/' if ts is not None: url = ts + '/' + url wbrequest = route.request_class( env, request_uri=url, wb_url_str=url, coll=coll, host_prefix=host_prefix, rel_prefix=rel_prefix, wburl_class=route.handler.get_wburl_type(), urlrewriter_class=urlrewriter_class, use_abs_prefix=abs_prefix, rewrite_opts=rewrite_opts, is_proxy=True) if matcher: route.apply_filters(wbrequest, matcher) # full rewrite and banner if self.use_wombat and self.use_banner: wbrequest.wb_url.mod = '' elif self.use_banner: # banner only, no rewrite wbrequest.wb_url.mod = 'bn_' else: # unaltered, no rewrite or banner wbrequest.wb_url.mod = 'uo_' response = route.handler(wbrequest) if not response: return None # add extra headers for replay responses if wbrequest.wb_url and wbrequest.wb_url.is_replay(): response.status_headers.replace_headers(self.extra_headers) # check for content-length res = response.status_headers.get_header('content-length') try: if int(res) > 0: return response except: pass # need to either chunk or buffer to get content-length if env.get('SERVER_PROTOCOL') == 'HTTP/1.1': response.status_headers.remove_header('content-length') response.status_headers.headers.append( ('Transfer-Encoding', 'chunked')) response.body = self._chunk_encode(response.body) else: response.body = self._buffer_response(response.status_headers, response.body) return response