def __call__(self, environ, start_response): remote_uri = self.pick_remote_uri(environ) if self.is_opencore: environ_copy = environ.copy() # With varnish on port 80 proxying to the opencore stack entrypoint, # HTTP_HOST doesn't include the :80 bit. (I don't know about other # frontends.) Just to be safe, we'll decompose HTTP_HOST into its # parts, and if the port information is missing, we'll set port 80. # # The virtual host monster needs this information. If it's missing, # opencore will generate links with the port that Zope is served on. parts = environ['HTTP_HOST'].split(':') environ_copy['HTTP_HOST'] = parts[0] if len(parts) > 1: environ_copy['frontend_port'] = parts[1] else: environ_copy['frontend_port'] = '80' remote_uri = ''.join([ remote_uri, (vhm_template % environ_copy), self.site_root, 'VirtualHostRoot']) environ['HTTP_X_OPENPLANS_DOMAIN'] = environ['HTTP_HOST'].split(':')[0] app = proxyapp.ForcedProxy( remote=remote_uri, force_host=True) # work around bug in WSGIFilter from webob import Request request = Request(environ).copy() resp = request.get_response(app) if self.rewrite_links: resp = rewrite_links( Request(environ), resp, url_normalize(remote_uri), url_normalize(Request(environ).application_url), url_normalize('%s://%s%s' % ( request.scheme, request.host, request.path_qs)), ) return resp(environ, start_response)
def proxy_to_dest(self, request, dest): """Do the actual proxying, without applying any transformations""" # We need to remove caching headers, since the upstream parts of Deliverance # can't handle Not-Modified responses. # Not using request.copy because I don't want to copy wsgi.input request = Request(request.environ.copy()) request.remove_conditional_headers() try: proxy_req = self.construct_proxy_request(request, dest) except TypeError: return self.proxy_to_file(request, dest) proxy_req.path_info += request.path_info if proxy_req.query_string and request.query_string: proxy_req.query_string = '%s&%s' % \ (proxy_req.query_string, request.query_string) elif request.query_string: proxy_req.query_string = request.query_string proxy_req.accept_encoding = None try: resp = proxy_req.get_response(proxy_exact_request) if resp.status_int == 500: print('Request:') print(proxy_req) print('Response:') print(resp) except socket.error as e: ## FIXME: really wsgiproxy should handle this ## FIXME: which error? ## 502 HTTPBadGateway, 503 HTTPServiceUnavailable, 504 HTTPGatewayTimeout? if isinstance(e.args, tuple) and len(e.args) > 1: error = e.args[1] else: error = str(e) resp = exc.HTTPServiceUnavailable( 'Could not proxy the request to %s:%s : %s' % (proxy_req.server_name, proxy_req.server_port, error)) dest = url_normalize(dest) orig_base = url_normalize(request.application_url) proxied_url = url_normalize( '%s://%s%s' % (proxy_req.scheme, proxy_req.host, proxy_req.path_qs)) return resp, orig_base, dest, proxied_url
def __call__(self, environ, start_response): remote_uri = self.pick_remote_uri(environ) if self.is_opencore: environ_copy = environ.copy() # With varnish on port 80 proxying to the opencore stack entrypoint, # HTTP_HOST doesn't include the :80 bit. (I don't know about other # frontends.) Just to be safe, we'll decompose HTTP_HOST into its # parts, and if the port information is missing, we'll set port 80. # # The virtual host monster needs this information. If it's missing, # opencore will generate links with the port that Zope is served on. parts = environ['HTTP_HOST'].split(':') environ_copy['HTTP_HOST'] = parts[0] if len(parts) > 1: environ_copy['frontend_port'] = parts[1] else: environ_copy['frontend_port'] = '80' remote_uri = ''.join([ remote_uri, (vhm_template % environ_copy), self.site_root, 'VirtualHostRoot' ]) environ['HTTP_X_OPENPLANS_DOMAIN'] = environ['HTTP_HOST'].split(':')[0] app = proxyapp.ForcedProxy(remote=remote_uri, force_host=True) # work around bug in WSGIFilter from webob import Request request = Request(environ).copy() resp = request.get_response(app) if self.rewrite_links: resp = rewrite_links( Request(environ), resp, url_normalize(remote_uri), url_normalize(Request(environ).application_url), url_normalize('%s://%s%s' % (request.scheme, request.host, request.path_qs)), ) return resp(environ, start_response)
def proxy_to_wsgi(self, request, wsgi_app): """ Forward a request to an inner wsgi app """ orig_base = url_normalize(request.application_url) ## FIXME: should this be request.copy()? proxy_req = Request(request.environ.copy()) resp = proxy_req.get_response(wsgi_app) return resp, orig_base, None, None
def construct_proxy_request(self, request, dest): """ returns a new Request object constructed by copying `request` and replacing its url with the url passed in as `dest` @raises TypeError if `dest` is a file:// url; this can be caught by the caller and handled accordingly """ dest = url_normalize(dest) scheme, netloc, path, query, fragment = urlparse.urlsplit(dest) path = urllib.unquote(path) assert not fragment, ( "Unexpected fragment: %r" % fragment) proxy_req = Request(request.environ.copy()) proxy_req.path_info = path proxy_req.server_name = netloc.split(':', 1)[0] if ':' in netloc: proxy_req.server_port = netloc.split(':', 1)[1] elif scheme == 'http': proxy_req.server_port = '80' elif scheme == 'https': proxy_req.server_port = '443' elif scheme == 'file': raise TypeError ## FIXME: is TypeError too general? else: assert 0, "bad scheme: %r (from %r)" % (scheme, dest) if not self.keep_host: proxy_req.host = netloc proxy_req.query_string = query proxy_req.scheme = scheme proxy_req.headers['X-Forwarded-For'] = request.remote_addr proxy_req.headers['X-Forwarded-Scheme'] = request.scheme proxy_req.headers['X-Forwarded-Server'] = request.host ## FIXME: something with path? proxy_req.headers['X-Forwarded-Path'] ## (now we are only doing it with strip_script_name) if self.strip_script_name: proxy_req.headers['X-Forwarded-Path'] = proxy_req.script_name proxy_req.script_name = '' return proxy_req
class Proxy(object): """Represents one ``<proxy>`` element. This both matches requests, applies transformations, then sends off the request. It also does local file serving when proxying to ``file:`` URLs. """ def __init__(self, match, dest, request_modifications, response_modifications, strip_script_name=True, keep_host=False, source_location=None, classes=None, editable=False, wsgi=None): self.match = match self.match.proxy = self self.dest = dest self.strip_script_name = strip_script_name self.keep_host = keep_host self.request_modifications = request_modifications self.response_modifications = response_modifications self.source_location = source_location self.classes = classes self.editable = editable self.wsgi = wsgi def get_endpoint(self): ## FIXME: should we assert that one of these is not None? I think so return self.dest or self.wsgi def log_description(self, log=None): """The debugging description for use in log display""" parts = [] if log is None: parts.append('<proxy') else: parts.append('<<a href="%s" target="_blank">proxy</a>' % log.link_to(self.source_location, source=True)) ## FIXME: defaulting to true is bad if not self.strip_script_name: parts.append('strip-script-name="0"') if self.keep_host: parts.append('keep-host="1"') if self.editable: parts.append('editable="1"') parts.append('><br>\n') parts.append(' ' + self.get_endpoint().log_description(log)) parts.append('<br>\n') if self.request_modifications: if len(self.request_modifications) > 1: parts.append(' %i request modifications<br>\n' % len(self.request_modifications)) else: parts.append(' 1 request modification<br>\n') if self.response_modifications: if len(self.response_modifications) > 1: parts.append(' %i response modifications<br>\n' % len(self.response_modifications)) else: parts.append(' 1 response modification<br>\n') parts.append('</proxy>') return ' '.join(parts) @classmethod def parse_xml(cls, el, source_location): """Parse this document from an XML/etree element""" assert el.tag == 'proxy' match = ProxyMatch.parse_xml(el, source_location) dest = None wsgi = None request_modifications = [] response_modifications = [] strip_script_name = True keep_host = False editable = asbool(el.get('editable')) rewriting_links = None ## FIXME: this inline validation is a bit brittle because it is ## order-dependent, but validation errors generally aren't for child in el: if child.tag == 'dest': if dest is not None: raise DeliveranceSyntaxError( "You cannot have more than one <dest> tag (second tag: %s)" % xml_tostring(child), element=child, source_location=source_location) if wsgi is not None: raise DeliveranceSyntaxError( "You cannot have both a <dest> tag and a <wsgi> tag (second tag: %s)" % xml_tostring(child), element=child, source_location=source_location) dest = ProxyDest.parse_xml(child, source_location) elif child.tag == 'wsgi': if wsgi is not None: raise DeliveranceSyntaxError( "You cannot have more than one <wsgi> tag (second tag: %s)" % xml_tostring(child), element=child, source_location=source_location) if dest is not None: raise DeliveranceSyntaxError( "You cannot have both a <dest> tag and a <wsgi> tag (second tag: %s)" % xml_tostring(child), element=child, source_location=source_location) if rewriting_links is not None: raise DeliveranceSyntaxError( "You cannot use ``<response rewrite-links='1'>`` in a proxy with a ``<wsgi>`` tag", element=child, source_location=source_location) wsgi = ProxyWsgi.parse_xml(child, source_location) elif child.tag == 'transform': if child.get('strip-script-name'): strip_script_name = asbool(child.get('strip-script-name')) if child.get('keep-host'): keep_host = asbool(child.get('keep-host')) ## FIXME: error on other attrs elif child.tag == 'request': request_modifications.append( ProxyRequestModification.parse_xml(child, source_location)) elif child.tag == 'response': mod = ProxyResponseModification.parse_xml(child, source_location) if mod.rewrite_links == True: rewriting_links = mod if wsgi is not None: raise DeliveranceSyntaxError( "You cannot use ``<response rewrite-links='1'>`` in a proxy with a ``<wsgi>`` tag", element=child, source_location=source_location) response_modifications.append(mod) elif child.tag is Comment: continue else: raise DeliveranceSyntaxError( "Unknown tag in <proxy>: %s" % xml_tostring(child), element=child, source_location=source_location) if editable: if not dest: ## FIXME: should this always be a test? raise DeliveranceSyntaxError("You must have a <dest> tag", element=el, source_location=source_location) try: href = uri_template_substitute( dest.href, dict(here=posixpath.dirname(source_location))) except KeyError: raise DeliveranceSyntaxError( 'You can only use <proxy editable="1"> if you have a <dest href="..."> that only contains {here} (you have %s)' % (dest.href)) if not href.startswith('file:'): raise DeliveranceSyntaxError( 'You can only use <proxy editable="1"> if you have a <dest href="file:///..."> (you have %s)' % (dest)) classes = el.get('class', '').split() or None inst = cls(match, dest, request_modifications, response_modifications, strip_script_name=strip_script_name, keep_host=keep_host, source_location=source_location, classes=classes, editable=editable, wsgi=wsgi) match.proxy = inst return inst def forward_request(self, environ, start_response): """Forward this request to the remote server, or serve locally. This also applies all the request and response transformations. """ request = Request(environ) prefix = self.match.strip_prefix() if prefix: if prefix.endswith('/'): prefix = prefix[:-1] path_info = request.path_info if not path_info.startswith(prefix + '/') and not path_info == prefix: log = environ['deliverance.log'] log.warn( self, "The match would strip the prefix %r from the request " "path (%r), but they do not match" % (prefix + '/', path_info)) else: request.script_name = request.script_name + prefix request.path_info = path_info[len(prefix):] log = request.environ['deliverance.log'] for modifier in self.request_modifications: request = modifier.modify_request(request, log) if self.dest and self.dest.next: raise AbortProxy dest, wsgiapp = None, None if self.dest: dest = self.dest(request, log) log.debug(self, '<proxy> matched; forwarding request to %s' % dest) else: wsgi_app = self.wsgi(request, log) log.debug(self, '<proxy> matched; forwarding request to %s' % wsgi_app) if self.classes: log.debug(self, 'Adding class="%s" to page' % ' '.join(self.classes)) existing_classes = request.environ.setdefault('deliverance.page_classes', []) existing_classes.extend(self.classes) if dest is not None: response, orig_base, proxied_base, proxied_url = self.proxy_to_dest(request, dest) else: ## FIXME: proxied_base and proxied_url don't really have a meaning here, ## but the modifier signature expects them response, orig_base, proxied_base, proxied_url = self.proxy_to_wsgi(request, wsgi_app) for modifier in self.response_modifications: response = modifier.modify_response(request, response, orig_base, proxied_base, proxied_url, log) return response(environ, start_response) def construct_proxy_request(self, request, dest): """ returns a new Request object constructed by copying `request` and replacing its url with the url passed in as `dest` @raises TypeError if `dest` is a file:// url; this can be caught by the caller and handled accordingly """ dest = url_normalize(dest) scheme, netloc, path, query, fragment = urlparse.urlsplit(dest) path = urllib.unquote(path) assert not fragment, ( "Unexpected fragment: %r" % fragment) proxy_req = Request(request.environ.copy()) proxy_req.path_info = path proxy_req.server_name = netloc.split(':', 1)[0] if ':' in netloc: proxy_req.server_port = netloc.split(':', 1)[1] elif scheme == 'http': proxy_req.server_port = '80' elif scheme == 'https': proxy_req.server_port = '443' elif scheme == 'file': raise TypeError ## FIXME: is TypeError too general? else: assert 0, "bad scheme: %r (from %r)" % (scheme, dest) if not self.keep_host: proxy_req.host = netloc proxy_req.query_string = query proxy_req.scheme = scheme proxy_req.headers['X-Forwarded-For'] = request.remote_addr proxy_req.headers['X-Forwarded-Scheme'] = request.scheme proxy_req.headers['X-Forwarded-Server'] = request.host ## FIXME: something with path? proxy_req.headers['X-Forwarded-Path'] ## (now we are only doing it with strip_script_name) if self.strip_script_name: proxy_req.headers['X-Forwarded-Path'] = proxy_req.script_name proxy_req.script_name = '' return proxy_req def proxy_to_wsgi(self, request, wsgi_app): """ Forward a request to an inner wsgi app """ orig_base = url_normalize(request.application_url) ## FIXME: should this be request.copy()? proxy_req = Request(request.environ.copy()) resp = proxy_req.get_response(wsgi_app) return resp, orig_base, None, None def proxy_to_dest(self, request, dest): """Do the actual proxying, without applying any transformations""" # Not using request.copy because I don't want to copy wsgi.input: try: proxy_req = self.construct_proxy_request(request, dest) except TypeError: return self.proxy_to_file(request, dest) proxy_req.path_info += request.path_info if proxy_req.query_string and request.query_string: proxy_req.query_string = '%s&%s' % \ (proxy_req.query_string, request.query_string) elif request.query_string: proxy_req.query_string = request.query_string proxy_req.accept_encoding = None try: resp = proxy_req.get_response(proxy_exact_request) if resp.status_int == 500: print 'Request:' print proxy_req print 'Response:' print resp except socket.error, e: ## FIXME: really wsgiproxy should handle this ## FIXME: which error? ## 502 HTTPBadGateway, 503 HTTPServiceUnavailable, 504 HTTPGatewayTimeout? if isinstance(e.args, tuple) and len(e.args) > 1: error = e.args[1] else: error = str(e) resp = exc.HTTPServiceUnavailable( 'Could not proxy the request to %s:%s : %s' % (proxy_req.server_name, proxy_req.server_port, error)) dest = url_normalize(dest) orig_base = url_normalize(request.application_url) proxied_url = url_normalize('%s://%s%s' % (proxy_req.scheme, proxy_req.host, proxy_req.path_qs)) return resp, orig_base, dest, proxied_url