Exemple #1
0
    def __call__(self, environ, start_response):
        remote_uri = self.pick_remote_uri(environ)

        if self.is_opencore:
            environ_copy = environ.copy()

            # With varnish on port 80 proxying to the opencore stack entrypoint,
            # HTTP_HOST doesn't include the :80 bit. (I don't know about other
            # frontends.) Just to be safe, we'll decompose HTTP_HOST into its
            # parts, and if the port information is missing, we'll set port 80.
            #
            # The virtual host monster needs this information. If it's missing,
            # opencore will generate links with the port that Zope is served on.

            parts = environ['HTTP_HOST'].split(':')
            environ_copy['HTTP_HOST'] = parts[0]
            if len(parts) > 1:
                environ_copy['frontend_port'] = parts[1]
            else:
                environ_copy['frontend_port'] = '80'
            remote_uri = ''.join([
                remote_uri,
                (vhm_template % environ_copy),
                self.site_root,
                'VirtualHostRoot'])

        environ['HTTP_X_OPENPLANS_DOMAIN'] = environ['HTTP_HOST'].split(':')[0]

        app = proxyapp.ForcedProxy(
            remote=remote_uri,
            force_host=True)

        # work around bug in WSGIFilter
        from webob import Request
        request = Request(environ).copy()

        resp = request.get_response(app)

        if self.rewrite_links:
            resp = rewrite_links(
                Request(environ), resp,
                url_normalize(remote_uri),
                url_normalize(Request(environ).application_url),
                url_normalize('%s://%s%s' % (
                        request.scheme, 
                        request.host,
                        request.path_qs)),
                )
        
        return resp(environ, start_response)
Exemple #2
0
    def proxy_to_dest(self, request, dest):
        """Do the actual proxying, without applying any transformations"""
        # We need to remove caching headers, since the upstream parts of Deliverance
        # can't handle Not-Modified responses.
        # Not using request.copy because I don't want to copy wsgi.input
        request = Request(request.environ.copy())
        request.remove_conditional_headers()

        try:
            proxy_req = self.construct_proxy_request(request, dest)
        except TypeError:
            return self.proxy_to_file(request, dest)

        proxy_req.path_info += request.path_info

        if proxy_req.query_string and request.query_string:
            proxy_req.query_string = '%s&%s' % \
                (proxy_req.query_string, request.query_string)
        elif request.query_string:
            proxy_req.query_string = request.query_string

        proxy_req.accept_encoding = None
        try:
            resp = proxy_req.get_response(proxy_exact_request)
            if resp.status_int == 500:
                print('Request:')
                print(proxy_req)
                print('Response:')
                print(resp)
        except socket.error as e:
            ## FIXME: really wsgiproxy should handle this
            ## FIXME: which error?
            ## 502 HTTPBadGateway, 503 HTTPServiceUnavailable, 504 HTTPGatewayTimeout?
            if isinstance(e.args, tuple) and len(e.args) > 1:
                error = e.args[1]
            else:
                error = str(e)
            resp = exc.HTTPServiceUnavailable(
                'Could not proxy the request to %s:%s : %s' %
                (proxy_req.server_name, proxy_req.server_port, error))

        dest = url_normalize(dest)
        orig_base = url_normalize(request.application_url)
        proxied_url = url_normalize(
            '%s://%s%s' %
            (proxy_req.scheme, proxy_req.host, proxy_req.path_qs))

        return resp, orig_base, dest, proxied_url
    def __call__(self, environ, start_response):
        remote_uri = self.pick_remote_uri(environ)

        if self.is_opencore:
            environ_copy = environ.copy()

            # With varnish on port 80 proxying to the opencore stack entrypoint,
            # HTTP_HOST doesn't include the :80 bit. (I don't know about other
            # frontends.) Just to be safe, we'll decompose HTTP_HOST into its
            # parts, and if the port information is missing, we'll set port 80.
            #
            # The virtual host monster needs this information. If it's missing,
            # opencore will generate links with the port that Zope is served on.

            parts = environ['HTTP_HOST'].split(':')
            environ_copy['HTTP_HOST'] = parts[0]
            if len(parts) > 1:
                environ_copy['frontend_port'] = parts[1]
            else:
                environ_copy['frontend_port'] = '80'
            remote_uri = ''.join([
                remote_uri, (vhm_template % environ_copy), self.site_root,
                'VirtualHostRoot'
            ])

        environ['HTTP_X_OPENPLANS_DOMAIN'] = environ['HTTP_HOST'].split(':')[0]

        app = proxyapp.ForcedProxy(remote=remote_uri, force_host=True)

        # work around bug in WSGIFilter
        from webob import Request
        request = Request(environ).copy()

        resp = request.get_response(app)

        if self.rewrite_links:
            resp = rewrite_links(
                Request(environ),
                resp,
                url_normalize(remote_uri),
                url_normalize(Request(environ).application_url),
                url_normalize('%s://%s%s' %
                              (request.scheme, request.host, request.path_qs)),
            )

        return resp(environ, start_response)
Exemple #4
0
    def proxy_to_wsgi(self, request, wsgi_app):
        """ Forward a request to an inner wsgi app """
        orig_base = url_normalize(request.application_url)

        ## FIXME: should this be request.copy()?
        proxy_req = Request(request.environ.copy())
        resp = proxy_req.get_response(wsgi_app)

        return resp, orig_base, None, None
Exemple #5
0
    def proxy_to_wsgi(self, request, wsgi_app):
        """ Forward a request to an inner wsgi app """
        orig_base = url_normalize(request.application_url)

        ## FIXME: should this be request.copy()?
        proxy_req = Request(request.environ.copy())
        resp = proxy_req.get_response(wsgi_app)

        return resp, orig_base, None, None
Exemple #6
0
    def construct_proxy_request(self, request, dest):
        """ 
        returns a new Request object constructed by copying `request`
        and replacing its url with the url passed in as `dest`

        @raises TypeError if `dest` is a file:// url; this can be
        caught by the caller and handled accordingly
        """

        dest = url_normalize(dest)
        scheme, netloc, path, query, fragment = urlparse.urlsplit(dest)
        path = urllib.unquote(path)
        
        assert not fragment, (
            "Unexpected fragment: %r" % fragment)

        proxy_req = Request(request.environ.copy())

        proxy_req.path_info = path

        proxy_req.server_name = netloc.split(':', 1)[0]
        if ':' in netloc:
            proxy_req.server_port = netloc.split(':', 1)[1]
        elif scheme == 'http':
            proxy_req.server_port = '80'
        elif scheme == 'https':
            proxy_req.server_port = '443'
        elif scheme == 'file':
            raise TypeError ## FIXME: is TypeError too general?
        else:
            assert 0, "bad scheme: %r (from %r)" % (scheme, dest)
        if not self.keep_host:
            proxy_req.host = netloc

        proxy_req.query_string = query
        proxy_req.scheme = scheme

        proxy_req.headers['X-Forwarded-For'] = request.remote_addr
        proxy_req.headers['X-Forwarded-Scheme'] = request.scheme
        proxy_req.headers['X-Forwarded-Server'] = request.host

        ## FIXME: something with path? proxy_req.headers['X-Forwarded-Path']
        ## (now we are only doing it with strip_script_name)
        if self.strip_script_name:
            proxy_req.headers['X-Forwarded-Path'] = proxy_req.script_name
            proxy_req.script_name = ''

        return proxy_req
Exemple #7
0
    def construct_proxy_request(self, request, dest):
        """ 
        returns a new Request object constructed by copying `request`
        and replacing its url with the url passed in as `dest`

        @raises TypeError if `dest` is a file:// url; this can be
        caught by the caller and handled accordingly
        """

        dest = url_normalize(dest)
        scheme, netloc, path, query, fragment = urlparse.urlsplit(dest)
        path = urllib.unquote(path)
        
        assert not fragment, (
            "Unexpected fragment: %r" % fragment)

        proxy_req = Request(request.environ.copy())

        proxy_req.path_info = path

        proxy_req.server_name = netloc.split(':', 1)[0]
        if ':' in netloc:
            proxy_req.server_port = netloc.split(':', 1)[1]
        elif scheme == 'http':
            proxy_req.server_port = '80'
        elif scheme == 'https':
            proxy_req.server_port = '443'
        elif scheme == 'file':
            raise TypeError ## FIXME: is TypeError too general?
        else:
            assert 0, "bad scheme: %r (from %r)" % (scheme, dest)
        if not self.keep_host:
            proxy_req.host = netloc

        proxy_req.query_string = query
        proxy_req.scheme = scheme

        proxy_req.headers['X-Forwarded-For'] = request.remote_addr
        proxy_req.headers['X-Forwarded-Scheme'] = request.scheme
        proxy_req.headers['X-Forwarded-Server'] = request.host

        ## FIXME: something with path? proxy_req.headers['X-Forwarded-Path']
        ## (now we are only doing it with strip_script_name)
        if self.strip_script_name:
            proxy_req.headers['X-Forwarded-Path'] = proxy_req.script_name
            proxy_req.script_name = ''

        return proxy_req
Exemple #8
0
class Proxy(object):
    """Represents one ``<proxy>`` element.

    This both matches requests, applies transformations, then sends
    off the request.  It also does local file serving when proxying to
    ``file:`` URLs.
    """

    def __init__(self, match, dest,
                 request_modifications, response_modifications,
                 strip_script_name=True, keep_host=False,
                 source_location=None, classes=None, editable=False,
                 wsgi=None):
        self.match = match
        self.match.proxy = self
        self.dest = dest
        self.strip_script_name = strip_script_name
        self.keep_host = keep_host
        self.request_modifications = request_modifications
        self.response_modifications = response_modifications
        self.source_location = source_location
        self.classes = classes
        self.editable = editable
        self.wsgi = wsgi

    def get_endpoint(self):
        ## FIXME: should we assert that one of these is not None?  I think so
        return self.dest or self.wsgi

    def log_description(self, log=None):
        """The debugging description for use in log display"""
        parts = []
        if log is None:
            parts.append('&lt;proxy')
        else:
            parts.append('&lt;<a href="%s" target="_blank">proxy</a>' 
                         % log.link_to(self.source_location, source=True))
        ## FIXME: defaulting to true is bad
        if not self.strip_script_name:
            parts.append('strip-script-name="0"')
        if self.keep_host:
            parts.append('keep-host="1"')
        if self.editable:
            parts.append('editable="1"')
        parts.append('&gt;<br>\n')
        parts.append('&nbsp;' + self.get_endpoint().log_description(log))
        parts.append('<br>\n')
        if self.request_modifications:
            if len(self.request_modifications) > 1:
                parts.append('&nbsp;%i request modifications<br>\n' 
                             % len(self.request_modifications))
            else:
                parts.append('&nbsp;1 request modification<br>\n')
        if self.response_modifications:
            if len(self.response_modifications) > 1:
                parts.append('&nbsp;%i response modifications<br>\n' 
                             % len(self.response_modifications))
            else:
                parts.append('&nbsp;1 response modification<br>\n')
        parts.append('&lt;/proxy&gt;')
        return ' '.join(parts)

    @classmethod
    def parse_xml(cls, el, source_location):
        """Parse this document from an XML/etree element"""
        assert el.tag == 'proxy'
        match = ProxyMatch.parse_xml(el, source_location)
        dest = None
        wsgi = None
        request_modifications = []
        response_modifications = []
        strip_script_name = True
        keep_host = False
        editable = asbool(el.get('editable'))
        rewriting_links = None

        ## FIXME: this inline validation is a bit brittle because it is
        ##        order-dependent, but validation errors generally aren't
        for child in el:
            if child.tag == 'dest':
                if dest is not None:
                    raise DeliveranceSyntaxError(
                        "You cannot have more than one <dest> tag (second tag: %s)"
                        % xml_tostring(child),
                        element=child, source_location=source_location)
                if wsgi is not None:
                    raise DeliveranceSyntaxError(
                        "You cannot have both a <dest> tag and a <wsgi> tag (second tag: %s)"
                        % xml_tostring(child),
                        element=child, source_location=source_location)
                dest = ProxyDest.parse_xml(child, source_location)
            elif child.tag == 'wsgi':
                if wsgi is not None:
                    raise DeliveranceSyntaxError(
                        "You cannot have more than one <wsgi> tag (second tag: %s)"
                        % xml_tostring(child),
                        element=child, source_location=source_location)
                if dest is not None:
                    raise DeliveranceSyntaxError(
                        "You cannot have both a <dest> tag and a <wsgi> tag (second tag: %s)"
                        % xml_tostring(child),
                        element=child, source_location=source_location)
                if rewriting_links is not None:
                    raise DeliveranceSyntaxError(
                        "You cannot use ``<response rewrite-links='1'>`` in a proxy with a ``<wsgi>`` tag",
                        element=child, source_location=source_location)
                    
                wsgi = ProxyWsgi.parse_xml(child, source_location)

            elif child.tag == 'transform':
                if child.get('strip-script-name'):
                    strip_script_name = asbool(child.get('strip-script-name'))
                if child.get('keep-host'):
                    keep_host = asbool(child.get('keep-host'))
                ## FIXME: error on other attrs
            elif child.tag == 'request':
                request_modifications.append(
                    ProxyRequestModification.parse_xml(child, source_location))
            elif child.tag == 'response':
                mod = ProxyResponseModification.parse_xml(child, source_location)
                if mod.rewrite_links == True:
                    rewriting_links = mod
                    
                if wsgi is not None:
                    raise DeliveranceSyntaxError(
                        "You cannot use ``<response rewrite-links='1'>`` in a proxy with a ``<wsgi>`` tag",
                        element=child, source_location=source_location)

                response_modifications.append(mod)
                    
            elif child.tag is Comment:
                continue
            else:
                raise DeliveranceSyntaxError(
                    "Unknown tag in <proxy>: %s" % xml_tostring(child),
                    element=child, source_location=source_location)
        if editable:
            if not dest:
                ## FIXME: should this always be a test?
                raise DeliveranceSyntaxError("You must have a <dest> tag",
                                             element=el, source_location=source_location)
            try:
                href = uri_template_substitute(
                    dest.href, dict(here=posixpath.dirname(source_location)))
            except KeyError:
                raise DeliveranceSyntaxError(
                    'You can only use <proxy editable="1"> if you have a <dest href="..."> that only contains {here} (you have %s)'
                    % (dest.href))
            if not href.startswith('file:'):
                raise DeliveranceSyntaxError(
                    'You can only use <proxy editable="1"> if you have a <dest href="file:///..."> (you have %s)'
                    % (dest))
        classes = el.get('class', '').split() or None
        inst = cls(match, dest, request_modifications, response_modifications,
                   strip_script_name=strip_script_name, keep_host=keep_host,
                   source_location=source_location, classes=classes,
                   editable=editable, wsgi=wsgi)
        match.proxy = inst
        return inst

    def forward_request(self, environ, start_response):
        """Forward this request to the remote server, or serve locally.

        This also applies all the request and response transformations.
        """
        request = Request(environ)
        prefix = self.match.strip_prefix()
        if prefix:
            if prefix.endswith('/'):
                prefix = prefix[:-1]
            path_info = request.path_info
            if not path_info.startswith(prefix + '/') and not path_info == prefix:
                log = environ['deliverance.log']
                log.warn(
                    self, "The match would strip the prefix %r from the request "
                    "path (%r), but they do not match"
                    % (prefix + '/', path_info))
            else:
                request.script_name = request.script_name + prefix
                request.path_info = path_info[len(prefix):]
        log = request.environ['deliverance.log']
        for modifier in self.request_modifications:
            request = modifier.modify_request(request, log)
        if self.dest and self.dest.next:
            raise AbortProxy

        dest, wsgiapp = None, None
        if self.dest:
            dest = self.dest(request, log)
            log.debug(self, '<proxy> matched; forwarding request to %s' % dest)
        else:
            wsgi_app = self.wsgi(request, log)
            log.debug(self, '<proxy> matched; forwarding request to %s' % wsgi_app)

        if self.classes:
            log.debug(self, 'Adding class="%s" to page' % ' '.join(self.classes))
            existing_classes = request.environ.setdefault('deliverance.page_classes', [])
            existing_classes.extend(self.classes)

        if dest is not None:
            response, orig_base, proxied_base, proxied_url = self.proxy_to_dest(request, dest)
        else:
            ## FIXME: proxied_base and proxied_url don't really have a meaning here,
            ##        but the modifier signature expects them
            response, orig_base, proxied_base, proxied_url = self.proxy_to_wsgi(request, wsgi_app)

        for modifier in self.response_modifications:
            response = modifier.modify_response(request, response, orig_base, 
                                                proxied_base, proxied_url, log)
        return response(environ, start_response)

    def construct_proxy_request(self, request, dest):
        """ 
        returns a new Request object constructed by copying `request`
        and replacing its url with the url passed in as `dest`

        @raises TypeError if `dest` is a file:// url; this can be
        caught by the caller and handled accordingly
        """

        dest = url_normalize(dest)
        scheme, netloc, path, query, fragment = urlparse.urlsplit(dest)
        path = urllib.unquote(path)
        
        assert not fragment, (
            "Unexpected fragment: %r" % fragment)

        proxy_req = Request(request.environ.copy())

        proxy_req.path_info = path

        proxy_req.server_name = netloc.split(':', 1)[0]
        if ':' in netloc:
            proxy_req.server_port = netloc.split(':', 1)[1]
        elif scheme == 'http':
            proxy_req.server_port = '80'
        elif scheme == 'https':
            proxy_req.server_port = '443'
        elif scheme == 'file':
            raise TypeError ## FIXME: is TypeError too general?
        else:
            assert 0, "bad scheme: %r (from %r)" % (scheme, dest)
        if not self.keep_host:
            proxy_req.host = netloc

        proxy_req.query_string = query
        proxy_req.scheme = scheme

        proxy_req.headers['X-Forwarded-For'] = request.remote_addr
        proxy_req.headers['X-Forwarded-Scheme'] = request.scheme
        proxy_req.headers['X-Forwarded-Server'] = request.host

        ## FIXME: something with path? proxy_req.headers['X-Forwarded-Path']
        ## (now we are only doing it with strip_script_name)
        if self.strip_script_name:
            proxy_req.headers['X-Forwarded-Path'] = proxy_req.script_name
            proxy_req.script_name = ''

        return proxy_req

    def proxy_to_wsgi(self, request, wsgi_app):
        """ Forward a request to an inner wsgi app """
        orig_base = url_normalize(request.application_url)

        ## FIXME: should this be request.copy()?
        proxy_req = Request(request.environ.copy())
        resp = proxy_req.get_response(wsgi_app)

        return resp, orig_base, None, None

    def proxy_to_dest(self, request, dest):
        """Do the actual proxying, without applying any transformations"""
        # Not using request.copy because I don't want to copy wsgi.input:

        try:
            proxy_req = self.construct_proxy_request(request, dest)
        except TypeError:
            return self.proxy_to_file(request, dest)

        proxy_req.path_info += request.path_info

        if proxy_req.query_string and request.query_string:
            proxy_req.query_string = '%s&%s' % \
                (proxy_req.query_string, request.query_string)
        elif request.query_string:
            proxy_req.query_string = request.query_string

        proxy_req.accept_encoding = None
        try:
            resp = proxy_req.get_response(proxy_exact_request)
            if resp.status_int == 500:
                print 'Request:'
                print proxy_req
                print 'Response:'
                print resp
        except socket.error, e:
            ## FIXME: really wsgiproxy should handle this
            ## FIXME: which error?
            ## 502 HTTPBadGateway, 503 HTTPServiceUnavailable, 504 HTTPGatewayTimeout?
            if isinstance(e.args, tuple) and len(e.args) > 1:
                error = e.args[1]
            else:
                error = str(e)
            resp = exc.HTTPServiceUnavailable(
                'Could not proxy the request to %s:%s : %s' 
                % (proxy_req.server_name, proxy_req.server_port, error))

        dest = url_normalize(dest)
        orig_base = url_normalize(request.application_url)
        proxied_url = url_normalize('%s://%s%s' % (proxy_req.scheme, 
                                                   proxy_req.host,
                                                   proxy_req.path_qs))
        
        return resp, orig_base, dest, proxied_url