コード例 #1
0
ファイル: proxy.py プロジェクト: chdorner/pywb
    def __call__(self, env):
        is_https = (env['REQUEST_METHOD'] == 'CONNECT')
        ArchivalRouter.ensure_rel_uri_set(env)

        # for non-https requests, check non-proxy urls
        if not is_https:
            url = env['REL_REQUEST_URI']

            if not url.startswith(('http://', 'https://')):
                return None

            env['pywb.proxy_scheme'] = 'http'

        route = None
        coll = None
        matcher = None
        response = None
        ts = None

        # check resolver, for pre connect resolve
        if self.resolver.pre_connect:
            route, coll, matcher, ts, response = self.resolver.resolve(env)
            if response:
                return response

        # do connect, then get updated url
        if is_https:
            response = self.handle_connect(env)
            if response:
                return response

            url = env['REL_REQUEST_URI']
        else:
            parts = urlsplit(env['REL_REQUEST_URI'])
            hostport = parts.netloc.split(':', 1)
            env['pywb.proxy_host'] = hostport[0]
            env['pywb.proxy_port'] = hostport[1] if len(hostport) == 2 else ''
            env['pywb.proxy_req_uri'] = parts.path
            if parts.query:
                env['pywb.proxy_req_uri'] += '?' + parts.query
                env['pywb.proxy_query'] = parts.query

        if self.resolver.supports_switching:
            env['pywb_proxy_magic'] = self.magic_name

        # route (static) and other resources to archival replay
        if env['pywb.proxy_host'] == self.magic_name:
            env['REL_REQUEST_URI'] = env['pywb.proxy_req_uri']

            # special case for proxy install
            response = self.handle_cert_install(env)
            if response:
                return response

            return None

        # check resolver, post connect
        if not self.resolver.pre_connect:
            route, coll, matcher, ts, response = self.resolver.resolve(env)
            if response:
                return response

        rel_prefix = ''

        custom_prefix = env.get('HTTP_PYWB_REWRITE_PREFIX', '')
        if custom_prefix:
            host_prefix = custom_prefix
            urlrewriter_class = UrlRewriter
            abs_prefix = True
            # always rewrite to absolute here
            rewrite_opts = dict(no_match_rel=True)
        else:
            host_prefix = env['pywb.proxy_scheme'] + '://' + self.magic_name
            urlrewriter_class = SchemeOnlyUrlRewriter
            abs_prefix = False
            rewrite_opts = {}

        # special case for proxy calendar
        if (env['pywb.proxy_host'] == 'query.' + self.magic_name):
            url = env['pywb.proxy_req_uri'][1:]
            rel_prefix = '/'

        if ts is not None:
            url = ts + '/' + url

        wbrequest = route.request_class(env,
                              request_uri=url,
                              wb_url_str=url,
                              coll=coll,
                              host_prefix=host_prefix,
                              rel_prefix=rel_prefix,
                              wburl_class=route.handler.get_wburl_type(),
                              urlrewriter_class=urlrewriter_class,
                              use_abs_prefix=abs_prefix,
                              rewrite_opts=rewrite_opts,
                              is_proxy=True)

        if matcher:
            route.apply_filters(wbrequest, matcher)

        # full rewrite and banner
        if self.use_wombat and self.use_banner:
            wbrequest.wb_url.mod = ''
        elif self.use_banner:
        # banner only, no rewrite
            wbrequest.wb_url.mod = 'bn_'
        else:
        # unaltered, no rewrite or banner
            wbrequest.wb_url.mod = 'uo_'

        response = route.handler(wbrequest)
        if not response:
            return None

        # add extra headers for replay responses
        if wbrequest.wb_url and wbrequest.wb_url.is_replay():
            response.status_headers.replace_headers(self.extra_headers)

        # check for content-length
        res = response.status_headers.get_header('content-length')
        try:
            if int(res) > 0:
                return response
        except:
            pass

        # need to either chunk or buffer to get content-length
        if env.get('SERVER_PROTOCOL') == 'HTTP/1.1':
            response.status_headers.remove_header('content-length')
            response.status_headers.headers.append(('Transfer-Encoding', 'chunked'))
            response.body = self._chunk_encode(response.body)
        else:
            response.body = self._buffer_response(response.status_headers,
                                                  response.body)

        return response
コード例 #2
0
ファイル: proxy.py プロジェクト: robertknight/pywb
    def __call__(self, env):
        is_https = (env['REQUEST_METHOD'] == 'CONNECT')
        ArchivalRouter.ensure_rel_uri_set(env)

        # for non-https requests, check non-proxy urls
        if not is_https:
            url = env['REL_REQUEST_URI']

            if not url.startswith(('http://', 'https://')):
                return None

            env['pywb.proxy_scheme'] = 'http'

        route = None
        coll = None
        matcher = None
        response = None
        ts = None

        # check resolver, for pre connect resolve
        if self.resolver.pre_connect:
            route, coll, matcher, ts, response = self.resolver.resolve(env)
            if response:
                return response

        # do connect, then get updated url
        if is_https:
            response = self.handle_connect(env)
            if response:
                return response

            url = env['REL_REQUEST_URI']
        else:
            parts = urlparse.urlsplit(env['REL_REQUEST_URI'])
            hostport = parts.netloc.split(':', 1)
            env['pywb.proxy_host'] = hostport[0]
            env['pywb.proxy_port'] = hostport[1] if len(hostport) == 2 else ''
            env['pywb.proxy_req_uri'] = parts.path
            if parts.query:
                env['pywb.proxy_req_uri'] += '?' + parts.query
                env['pywb.proxy_query'] = parts.query

        env['pywb_proxy_magic'] = self.magic_name

        # route (static) and other resources to archival replay
        if env['pywb.proxy_host'] == self.magic_name:
            env['REL_REQUEST_URI'] = env['pywb.proxy_req_uri']

            # special case for proxy install
            response = self.handle_cert_install(env)
            if response:
                return response

            return None

        # check resolver, post connect
        if not self.resolver.pre_connect:
            route, coll, matcher, ts, response = self.resolver.resolve(env)
            if response:
                return response

        host_prefix = env['pywb.proxy_scheme'] + '://' + self.magic_name
        rel_prefix = ''

        # special case for proxy calendar
        if (env['pywb.proxy_host'] == 'query.' + self.magic_name):
            url = env['pywb.proxy_req_uri'][1:]
            rel_prefix = '/'

        if ts is not None:
            url = ts + '/' + url

        wbrequest = route.request_class(env,
                              request_uri=url,
                              wb_url_str=url,
                              coll=coll,
                              host_prefix=host_prefix,
                              rel_prefix=rel_prefix,
                              wburl_class=route.handler.get_wburl_type(),
                              urlrewriter_class=HttpsUrlRewriter,
                              use_abs_prefix=False,
                              is_proxy=True)

        if matcher:
            route.apply_filters(wbrequest, matcher)

        # full rewrite and banner
        if self.use_wombat and self.use_banner:
            wbrequest.wb_url.mod = ''
        elif self.use_banner:
        # banner only, no rewrite
            wbrequest.wb_url.mod = 'bn_'
        else:
        # unaltered, no rewrite or banner
            wbrequest.wb_url.mod = 'id_'

        response = route.handler(wbrequest)

        if wbrequest.wb_url and wbrequest.wb_url.is_replay():
            response.status_headers.replace_headers(self.extra_headers)

        return response
コード例 #3
0
    def __call__(self, env):
        is_https = (env['REQUEST_METHOD'] == 'CONNECT')
        ArchivalRouter.ensure_rel_uri_set(env)

        # for non-https requests, check non-proxy urls
        if not is_https:
            url = env['REL_REQUEST_URI']

            if not url.startswith(('http://', 'https://')):
                return None

            env['pywb.proxy_scheme'] = 'http'

        route = None
        coll = None
        matcher = None
        response = None
        ts = None

        # check resolver, for pre connect resolve
        if self.resolver.pre_connect:
            route, coll, matcher, ts, response = self.resolver.resolve(env)
            if response:
                return response

        # do connect, then get updated url
        if is_https:
            response = self.handle_connect(env)
            if response:
                return response

            url = env['REL_REQUEST_URI']
        else:
            parts = urlsplit(env['REL_REQUEST_URI'])
            hostport = parts.netloc.split(':', 1)
            env['pywb.proxy_host'] = hostport[0]
            env['pywb.proxy_port'] = hostport[1] if len(hostport) == 2 else ''
            env['pywb.proxy_req_uri'] = parts.path
            if parts.query:
                env['pywb.proxy_req_uri'] += '?' + parts.query
                env['pywb.proxy_query'] = parts.query

        if self.resolver.supports_switching:
            env['pywb_proxy_magic'] = self.magic_name

        # route (static) and other resources to archival replay
        if env['pywb.proxy_host'] == self.magic_name:
            env['REL_REQUEST_URI'] = env['pywb.proxy_req_uri']

            # special case for proxy install
            response = self.handle_cert_install(env)
            if response:
                return response

            return None

        # check resolver, post connect
        if not self.resolver.pre_connect:
            route, coll, matcher, ts, response = self.resolver.resolve(env)
            if response:
                return response

        rel_prefix = ''

        custom_prefix = env.get('HTTP_PYWB_REWRITE_PREFIX', '')
        if custom_prefix:
            host_prefix = custom_prefix
            urlrewriter_class = UrlRewriter
            abs_prefix = True
            # always rewrite to absolute here
            rewrite_opts = dict(no_match_rel=True)
        else:
            host_prefix = env['pywb.proxy_scheme'] + '://' + self.magic_name
            urlrewriter_class = SchemeOnlyUrlRewriter
            abs_prefix = False
            rewrite_opts = {}

        # special case for proxy calendar
        if (env['pywb.proxy_host'] == 'query.' + self.magic_name):
            url = env['pywb.proxy_req_uri'][1:]
            rel_prefix = '/'

        if ts is not None:
            url = ts + '/' + url

        wbrequest = route.request_class(
            env,
            request_uri=url,
            wb_url_str=url,
            coll=coll,
            host_prefix=host_prefix,
            rel_prefix=rel_prefix,
            wburl_class=route.handler.get_wburl_type(),
            urlrewriter_class=urlrewriter_class,
            use_abs_prefix=abs_prefix,
            rewrite_opts=rewrite_opts,
            is_proxy=True)

        if matcher:
            route.apply_filters(wbrequest, matcher)

        # full rewrite and banner
        if self.use_wombat and self.use_banner:
            wbrequest.wb_url.mod = ''
        elif self.use_banner:
            # banner only, no rewrite
            wbrequest.wb_url.mod = 'bn_'
        else:
            # unaltered, no rewrite or banner
            wbrequest.wb_url.mod = 'uo_'

        response = route.handler(wbrequest)
        if not response:
            return None

        # add extra headers for replay responses
        if wbrequest.wb_url and wbrequest.wb_url.is_replay():
            response.status_headers.replace_headers(self.extra_headers)

        # check for content-length
        res = response.status_headers.get_header('content-length')
        try:
            if int(res) > 0:
                return response
        except:
            pass

        # need to either chunk or buffer to get content-length
        if env.get('SERVER_PROTOCOL') == 'HTTP/1.1':
            response.status_headers.remove_header('content-length')
            response.status_headers.headers.append(
                ('Transfer-Encoding', 'chunked'))
            response.body = self._chunk_encode(response.body)
        else:
            response.body = self._buffer_response(response.status_headers,
                                                  response.body)

        return response