Example #1
0
    def handle_cert_install(self, env):
        if env['pywb.proxy_req_uri'] in ('/', '/index.html', '/index.html'):
            available = (self.ca is not None)

            if self.proxy_cert_dl_view:
                return (self.proxy_cert_dl_view.
                         render_response(available=available,
                                         pem_path=self.CERT_DL_PEM,
                                         p12_path=self.CERT_DL_P12))

        elif env['pywb.proxy_req_uri'] == self.CERT_DL_PEM:
            if not self.ca:
                return None

            buff = ''
            with open(self.ca.ca_file, 'rb') as fh:
                buff = fh.read()

            content_type = 'application/x-x509-ca-cert'

            return WbResponse.text_response(buff,
                                            content_type=content_type)

        elif env['pywb.proxy_req_uri'] == self.CERT_DL_P12:
            if not self.ca:
                return None

            buff = self.ca.get_root_PKCS12()

            content_type = 'application/x-pkcs12'

            return WbResponse.text_response(buff,
                                            content_type=content_type)
Example #2
0
    def _get_video_info(self, wbrequest, info_url=None, video_url=None):
        if not video_url:
            video_url = wbrequest.wb_url.url

        if not info_url:
            info_url = wbrequest.wb_url.url

        cache_key = None
        if self.recording:
            cache_key = self._get_cache_key('v:', video_url)

        info = self.live_fetcher.get_video_info(video_url)
        if info is None:  #pragma: no cover
            msg = ('youtube-dl is not installed, pip install youtube-dl to ' +
                   'enable improved video proxy')

            return WbResponse.text_response(text=msg, status='404 Not Found')

        #if info and info.formats and len(info.formats) == 1:

        content_type = self.YT_DL_TYPE
        metadata = json.dumps(info)

        if (self.recording and cache_key):
            headers = self._live_request_headers(wbrequest)
            headers['Content-Type'] = content_type

            if info_url.startswith('https://'):
                info_url = info_url.replace('https', 'http', 1)

            response = self.live_fetcher.add_metadata(info_url, headers, metadata)

            self._cache[cache_key] = '1'

        return WbResponse.text_response(metadata, content_type=content_type)
Example #3
0
    def _get_video_info(self, wbrequest, info_url=None, video_url=None):
        if not video_url:
            video_url = wbrequest.wb_url.url

        if not info_url:
            info_url = wbrequest.wb_url.url

        cache_key = None
        if self.recording:
            cache_key = self._get_cache_key('v:', video_url)

        info = self.live_fetcher.get_video_info(video_url)
        if info is None:  #pragma: no cover
            msg = ('youtube-dl is not installed, pip install youtube-dl to ' +
                   'enable improved video proxy')

            return WbResponse.text_response(text=msg, status='404 Not Found')

        #if info and info.formats and len(info.formats) == 1:

        content_type = self.YT_DL_TYPE
        metadata = json.dumps(info)

        if (self.recording and cache_key):
            headers = self._live_request_headers(wbrequest)
            headers['Content-Type'] = content_type

            if info_url.startswith('https://'):
                info_url = info_url.replace('https', 'http', 1)

            response = self.live_fetcher.add_metadata(info_url, headers, metadata)

            self._cache[cache_key] = '1'

        return WbResponse.text_response(metadata, content_type=content_type)
Example #4
0
 def render_search_page(self, wbrequest, **kwargs):
     if self.search_view:
         return self.search_view.render_response(wbrequest=wbrequest,
                                                 prefix=wbrequest.wb_prefix,
                                                 **kwargs)
     else:
         return WbResponse.text_response('No Lookup Url Specified')
Example #5
0
def test_resp_1():
    resp = vars(WbResponse.text_response('Test'))

    expected = {'body': [b'Test'], 'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK',
                headers = [('Content-Type', 'text/plain; charset=utf-8'), ('Content-Length', '4')])}

    assert(resp == expected)
Example #6
0
 def render_response(self, **kwargs):
     template_result = self.render_to_string(**kwargs)
     status = kwargs.get('status', '200 OK')
     content_type = kwargs.get('content_type', 'text/html; charset=utf-8')
     return WbResponse.text_response(template_result,
                                     status=status,
                                     content_type=content_type)
Example #7
0
 def render_response(self, status='200 OK', content_type='text/html; charset=utf-8', **template_kwargs):
     template_context = dict(
         template_kwargs,
         status=status,
         content_type=content_type)
     template_result = loader.render_to_string(self.filename, template_context, request=self.fake_request)
     return WbResponse.text_response(unicode(template_result), status=status, content_type=content_type)
    def __call__(self, wbrequest):
        wb_url = wbrequest.wb_url

        res = redis_client.get_all_embeds(wb_url)

        return WbResponse.text_response(json.dumps(res),
                                        content_type='application/json')
Example #9
0
 def render_response(self, **kwargs):
     template_result = self.render_to_string(**kwargs)
     status = kwargs.get('status', '200 OK')
     content_type = kwargs.get('content_type', 'text/html; charset=utf-8')
     return WbResponse.text_response(template_result.encode('utf-8'),
                                     status=status,
                                     content_type=content_type)
Example #10
0
 def render_response(self, **kwargs):
     template_result = self.render_to_string(**dict(kwargs,
                                                  STATIC_URL=settings.STATIC_URL,
                                                  DEBUG=settings.DEBUG))
     status = kwargs.get('status', '200 OK')
     content_type = kwargs.get('content_type', 'text/html; charset=utf-8')
     return WbResponse.text_response(template_result.encode('utf-8'), status=status, content_type=content_type)
Example #11
0
 def render_search_page(self, wbrequest, **kwargs):
     if self.search_view:
         return self.search_view.render_response(wbrequest=wbrequest,
                                                 prefix=wbrequest.wb_prefix,
                                                 **kwargs)
     else:
         return WbResponse.text_response('No Lookup Url Specified')
Example #12
0
    def handle_exception(self, env, exc, print_trace):
        error_view = None

        if hasattr(self.wb_router, 'error_view'):
            error_view = self.wb_router.error_view

        if hasattr(exc, 'status'):
            if callable(exc.status):
                status = exc.status()
            else:
                status = exc.status
            # wsgi requires status
            #  - to have at least 4 characters and
            #  - to start with a number / integer
            if type(status) == int:
                status = '{} Exception {}'.format(status, type(exc).__name__)
            elif type(status) == str and status[0].isdigit():
                pass
            else:
                status = '500 Internal Server Error'
        else:
            status = '500 Internal Server Error'

        if hasattr(exc, 'url'):
            err_url = exc.url
        else:
            err_url = None

        if len(exc.args):
            err_msg = str(exc.args[0])

        if print_trace:
            import traceback
            err_details = traceback.format_exc()
            print(err_details)
        else:
            logging.info(err_msg)
            err_details = None

        if error_view:
            if err_url and isinstance(err_url, str):
                err_url = to_native_str(err_url, 'utf-8')
            if err_msg and isinstance(err_msg, str):
                err_msg = to_native_str(err_msg, 'utf-8')

            return error_view.render_response(exc_type=type(exc).__name__,
                                              err_msg=err_msg,
                                              err_details=err_details,
                                              status=status,
                                              env=env,
                                              err_url=err_url)
        else:
            msg = status + ' Error: '
            if err_msg:
                msg += err_msg

            #msg = msg.encode('utf-8', 'ignore')
            return WbResponse.text_response(msg, status=status)
Example #13
0
def test_resp_4():
    resp = vars(WbResponse.text_response('Test').add_range(10, 4, 100))

    expected = {'body': [b'Test'], 'status_headers': StatusAndHeaders(protocol = '', statusline = '206 Partial Content',
                headers = [ ('Content-Type', 'text/plain; charset=utf-8'),
                  ('Content-Length', '4'),
                  ('Content-Range', 'bytes 10-13/100'),
                  ('Accept-Ranges', 'bytes')])}

    assert(resp == expected)
Example #14
0
    def _get_video_info(self, wbrequest, info_url=None, video_url=None):
        if not self.youtubedl:
            self.youtubedl = YoutubeDLWrapper()

        if not video_url:
            video_url = wbrequest.wb_url.url

        if not info_url:
            info_url = wbrequest.wb_url.url

        cache_key = None
        if self.proxies:
            cache_key = self._get_cache_key('v:', video_url)

        info = self.youtubedl.extract_info(video_url)
        if info is None:  #pragma: no cover
            msg = ('youtube-dl is not installed, pip install youtube-dl to ' +
                   'enable improved video proxy')

            return WbResponse.text_response(text=msg, status='404 Not Found')

        #if info and info.formats and len(info.formats) == 1:

        content_type = self.YT_DL_TYPE
        metadata = json.dumps(info)

        if (self.proxies and cache_key):
            headers = self._live_request_headers(wbrequest)
            headers['Content-Type'] = content_type

            info_url = HttpsUrlRewriter.remove_https(info_url)

            response = requests.request(method='PUTMETA',
                                        url=info_url,
                                        data=metadata,
                                        headers=headers,
                                        proxies=self.proxies,
                                        verify=False)

            self._cache[cache_key] = '1'

        return WbResponse.text_response(metadata, content_type=content_type)
Example #15
0
    def check_single_url(self, wbrequest, perms_checker):
        urlkey = self.url_canon(wbrequest.wb_url.url)

        if not perms_checker.allow_url_lookup(urlkey):
            response_text = BLOCK
        else:
            response_text = ALLOW

        #TODO: other types of checking
        return WbResponse.text_response(response_text,
                                        content_type=RESPONSE_TYPE)
Example #16
0
    def check_single_url(self, wbrequest, perms_checker):
        urlkey = self.url_canon(wbrequest.wb_url.url)
        urlkey = urlkey.encode('utf-8')

        if not perms_checker.allow_url_lookup(urlkey):
            response_text = BLOCK
        else:
            response_text = ALLOW

        #TODO: other types of checking
        return WbResponse.text_response(response_text,
                                        content_type=RESPONSE_TYPE)
Example #17
0
    def _make_response(self, wbrequest, status_headers, gen, is_rewritten):
        # only redirect for non-identity and non-embeds
        if not wbrequest.wb_url.is_embed and not wbrequest.wb_url.is_identity:
            content_type = status_headers.get_header("Content-Type")
            tpl_name = self.templates.get(content_type)

            if tpl_name is not None:
                tpl = env.get_template(tpl_name)
                result = tpl.render(url=wbrequest.wb_url.url)
                return WbResponse.text_response(result.encode("utf-8-sig"), content_type="text/html")

        return super(TemplateRewriteHandler, self)._make_response(wbrequest, status_headers, gen, is_rewritten)
Example #18
0
 def render_response(self,
                     status='200 OK',
                     content_type='text/html; charset=utf-8',
                     **template_kwargs):
     template_context = dict(template_kwargs,
                             status=status,
                             content_type=content_type)
     template_result = loader.render_to_string(self.filename,
                                               template_context,
                                               request=self.fake_request)
     return WbResponse.text_response(unicode(template_result),
                                     status=status,
                                     content_type=content_type)
Example #19
0
def test_resp_1():
    resp = vars(WbResponse.text_response('Test'))

    expected = {
        'body': [b'Test'],
        'status_headers':
        StatusAndHeaders(protocol='',
                         statusline='200 OK',
                         headers=[('Content-Type',
                                   'text/plain; charset=utf-8'),
                                  ('Content-Length', '4')])
    }

    assert (resp == expected)
Example #20
0
 def render_response(self,
                     status='200 OK',
                     content_type='text/html; charset=utf-8',
                     **template_kwargs):
     template_context = dict(template_kwargs,
                             status=status,
                             content_type=content_type)
     template_result = loader.render_to_string(self.filename,
                                               template_context,
                                               request=self.fake_request)
     # We have to cast the Django SafeText class to str because wsgiref can't handle subclasses
     return WbResponse.text_response(str(template_result),
                                     status=status,
                                     content_type=content_type)
Example #21
0
    def _make_response(self, wbrequest, status_headers, gen, is_rewritten):
        # only redirect for non-identity and non-embeds
        if not wbrequest.wb_url.is_embed and not wbrequest.wb_url.is_identity:
            content_type = status_headers.get_header('Content-Type')
            tpl_name = self.templates.get(content_type)

            if tpl_name is not None:
                tpl = env.get_template(tpl_name)
                result = tpl.render(url=wbrequest.wb_url.url)
                return WbResponse.text_response(result.encode('utf-8-sig'),
                                                content_type='text/html')

        return super(TemplateRewriteHandler,
                     self)._make_response(wbrequest, status_headers, gen,
                                          is_rewritten)
Example #22
0
def test_resp_4():
    resp = vars(WbResponse.text_response('Test').add_range(10, 4, 100))

    expected = {
        'body': [b'Test'],
        'status_headers':
        StatusAndHeaders(protocol='',
                         statusline='206 Partial Content',
                         headers=[('Content-Type',
                                   'text/plain; charset=utf-8'),
                                  ('Content-Length', '4'),
                                  ('Content-Range', 'bytes 10-13/100'),
                                  ('Accept-Ranges', 'bytes')])
    }

    assert (resp == expected)
Example #23
0
    def handle_exception(self, env, exc, print_trace):
        error_view = None

        if hasattr(self.wb_router, 'error_view'):
            error_view = self.wb_router.error_view

        if hasattr(exc, 'status'):
            status = exc.status()
        else:
            status = '500 Internal Server Error'

        if hasattr(exc, 'url'):
            err_url = exc.url
        else:
            err_url = None

        if len(exc.args):
            err_msg = exc.args[0]

        if print_trace:
            import traceback
            err_details = traceback.format_exc()
            print(err_details)
        else:
            logging.info(err_msg)
            err_details = None

        if error_view:
            if err_url and isinstance(err_url, str):
                err_url = to_native_str(err_url, 'utf-8')
            if err_msg and isinstance(err_msg, str):
                err_msg = to_native_str(err_msg, 'utf-8')

            return error_view.render_response(exc_type=type(exc).__name__,
                                              err_msg=err_msg,
                                              err_details=err_details,
                                              status=status,
                                              env=env,
                                              err_url=err_url)
        else:
            msg = status + ' Error: '
            if err_msg:
                msg += err_msg

            #msg = msg.encode('utf-8', 'ignore')
            return WbResponse.text_response(msg,
                                           status=status)
Example #24
0
    def handle_exception(self, env, exc, print_trace):
        error_view = None

        if hasattr(self.wb_router, 'error_view'):
            error_view = self.wb_router.error_view

        if hasattr(exc, 'status'):
            status = exc.status()
        else:
            status = '500 Internal Server Error'

        if hasattr(exc, 'url'):
            err_url = exc.url
        else:
            err_url = None

        if len(exc.args):
            err_msg = exc.args[0]

        if print_trace:
            import traceback
            err_details = traceback.format_exc()
            print(err_details)
        else:
            logging.info(err_msg)
            err_details = None

        if error_view:
            if err_url and isinstance(err_url, str):
                err_url = to_native_str(err_url, 'utf-8')
            if err_msg and isinstance(err_msg, str):
                err_msg = to_native_str(err_msg, 'utf-8')

            return error_view.render_response(exc_type=type(exc).__name__,
                                              err_msg=err_msg,
                                              err_details=err_details,
                                              status=status,
                                              env=env,
                                              err_url=err_url)
        else:
            msg = status + ' Error: '
            if err_msg:
                msg += err_msg

            #msg = msg.encode('utf-8', 'ignore')
            return WbResponse.text_response(msg,
                                           status=status)
Example #25
0
    def __call__(self, wbrequest):
        params = self.extract_params_from_wsgi_env(wbrequest.env)

        try:
            cdx_iter = self.index_handler.load_cdx(wbrequest, params)
        except NotFoundException:
            msg = 'No Captures found for: ' + params.get('url')
            if params.get('output') == 'json':
                msg = json.dumps(dict(error=msg))
                content_type = 'application/json'
            else:
                content_type = 'text/plain'

            return WbResponse.text_response(msg,
                                            content_type=content_type,
                                            status='404 Not Found')

        return WbResponse.text_stream(cdx_iter, content_type='text/plain')
Example #26
0
    def __call__(self, wbrequest):
        params = self.extract_params_from_wsgi_env(wbrequest.env)

        try:
            cdx_iter = self.index_handler.load_cdx(wbrequest, params)
        except NotFoundException:
            msg = 'No Captures found for: ' + params.get('url')
            if params.get('output') == 'json':
                msg = json.dumps(dict(error=msg))
                content_type='application/json'
            else:
                content_type='text/plain'

            return WbResponse.text_response(msg, content_type=content_type,
                                            status='404 Not Found')

        return WbResponse.text_stream(cdx_iter,
                                      content_type='text/plain')
Example #27
0
    def _make_response(self, wbrequest, status_headers, gen, is_rewritten):
        # only redirect for non-identity and non-embeds
        if not wbrequest.wb_url.is_embed and not wbrequest.wb_url.is_identity:
            content_type = status_headers.get_header('Content-Type')
            tpl_name = self.templates.get(content_type)

            if tpl_name is not None:
                tpl = env.get_template(tpl_name)
                tpl_params = {'url': wbrequest.wb_url.url}
                tpl_params.update(wbrequest.env.get('pywb.template_params',
                                                    {}))
                result = tpl.render(**tpl_params)
                return WbResponse.text_response(
                    result.encode('utf-8-sig'),
                    content_type=b'text/html; charset=utf-8')

        return super(TemplateRewriteHandler,
                     self)._make_response(wbrequest, status_headers, gen,
                                          is_rewritten)
Example #28
0
    def _make_response(self, wbrequest, status_headers, gen, is_rewritten):
        # only redirect for non-identity and non-embeds
        if not wbrequest.wb_url.is_embed and not wbrequest.wb_url.is_identity:
            content_type = status_headers.get_header("Content-Type")
            cleaned_content_type = _lookup_key(content_type)
            tpl_name = self.templates.get(cleaned_content_type)

            if tpl_name is not None:
                tpl = env.get_template(tpl_name)
                tpl_params = {"url": wbrequest.wb_url.url}
                tpl_params.update(wbrequest.env.get("pywb.template_params", {}))
                result = tpl.render(**tpl_params)
                return WbResponse.text_response(
                    result.encode("utf-8-sig"), content_type=b"text/html; charset=utf-8"
                )

        return super(TemplateRewriteHandler, self)._make_response(
            wbrequest, status_headers, gen, is_rewritten
        )
Example #29
0
    def _make_response(self, wbrequest, status_headers, gen, is_rewritten):
        # only redirect for non-identity and non-embeds
        if not wbrequest.wb_url.is_embed and not wbrequest.wb_url.is_identity:
            content_type = status_headers.get_header('Content-Type')
            tpl_name = self.templates.get(content_type)

            if tpl_name is not None:
                tpl = env.get_template(tpl_name)
                tpl_params = {'url': wbrequest.wb_url.url}
                tpl_params.update(wbrequest.env.get('pywb.template_params', {}))
                result = tpl.render(**tpl_params)
                return WbResponse.text_response(result.encode('utf-8-sig'),
                                                content_type=b'text/html; charset=utf-8')

        return super(TemplateRewriteHandler, self)._make_response(
            wbrequest,
            status_headers,
            gen,
            is_rewritten)
Example #30
0
    def _make_response(self, wbrequest, status_headers, gen, is_rewritten):
        # only redirect for non-identity and non-embeds
        if not wbrequest.wb_url.is_embed and not wbrequest.wb_url.is_identity:
            content_type = status_headers.get_header("Content-Type")
            cleaned_content_type = _lookup_key(content_type)
            tpl_name = self.templates.get(cleaned_content_type)

            if tpl_name is not None:
                tpl = env.get_template(tpl_name)
                tpl_params = {"url": wbrequest.wb_url.url}
                tpl_params.update(wbrequest.env.get("pywb.template_params",
                                                    {}))
                result = tpl.render(**tpl_params)
                return WbResponse.text_response(
                    result.encode("utf-8-sig"),
                    content_type=b"text/html; charset=utf-8")

        return super(TemplateRewriteHandler,
                     self)._make_response(wbrequest, status_headers, gen,
                                          is_rewritten)
Example #31
0
    def _get_video_info(self, wbrequest, info_url=None, video_url=None):
        if not self.youtubedl:
            self.youtubedl = YoutubeDLWrapper()

        if not video_url:
            video_url = wbrequest.wb_url.url

        if not info_url:
            info_url = wbrequest.wb_url.url

        cache_key = None
        if self.proxies:
            cache_key = self._get_cache_key('v:', video_url)

        info = self.youtubedl.extract_info(video_url)

        #if info and info.formats and len(info.formats) == 1:

        content_type = self.YT_DL_TYPE
        metadata = json.dumps(info)

        if (self.proxies and cache_key):
            headers = self._live_request_headers(wbrequest)
            headers['Content-Type'] = content_type

            info_url = HttpsUrlRewriter.remove_https(info_url)

            response = requests.request(method='PUTMETA',
                                        url=info_url,
                                        data=metadata,
                                        headers=headers,
                                        proxies=self.proxies,
                                        verify=False)

            self._cache[cache_key] = '1'

        return WbResponse.text_response(metadata, content_type=content_type)
Example #32
0
    def handle_connect(self, env):
        sock = self.get_request_socket(env)
        if not sock:
            return WbResponse.text_response('HTTPS Proxy Not Supported',
                                            '405 HTTPS Proxy Not Supported')

        sock.send(b'HTTP/1.0 200 Connection Established\r\n')
        sock.send(b'Proxy-Connection: close\r\n')
        sock.send(b'Server: pywb proxy\r\n')
        sock.send(b'\r\n')

        hostname, port = env['REL_REQUEST_URI'].split(':')

        if not self.use_wildcard:
            certfile = self.ca.cert_for_host(hostname)
        else:
            certfile = self.ca.get_wildcard_cert(hostname)

        try:
            ssl_sock = ssl.wrap_socket(
                sock,
                server_side=True,
                certfile=certfile,
                #ciphers="ALL",
                suppress_ragged_eofs=False,
                ssl_version=ssl.PROTOCOL_SSLv23)
            env['pywb.proxy_ssl_sock'] = ssl_sock

            buffreader = BufferedReader(ssl_sock, block_size=self.BLOCK_SIZE)

            statusline = to_native_str(buffreader.readline().rstrip())

        except Exception as se:
            raise BadRequestException(se.message)

        statusparts = statusline.split(' ')

        if len(statusparts) < 3:
            raise BadRequestException('Invalid Proxy Request: ' + statusline)

        env['REQUEST_METHOD'] = statusparts[0]
        env['REL_REQUEST_URI'] = ('https://' +
                                  env['REL_REQUEST_URI'].replace(':443', '') +
                                  statusparts[1])

        env['SERVER_PROTOCOL'] = statusparts[2].strip()

        env['pywb.proxy_scheme'] = 'https'

        env['pywb.proxy_host'] = hostname
        env['pywb.proxy_port'] = port
        env['pywb.proxy_req_uri'] = statusparts[1]

        queryparts = env['REL_REQUEST_URI'].split('?', 1)
        env['PATH_INFO'] = queryparts[0]
        env['QUERY_STRING'] = queryparts[1] if len(queryparts) > 1 else ''
        env['pywb.proxy_query'] = env['QUERY_STRING']

        while True:
            line = to_native_str(buffreader.readline())
            if line:
                line = line.rstrip()

            if not line:
                break

            parts = line.split(':', 1)
            if len(parts) < 2:
                continue

            name = parts[0].strip()
            value = parts[1].strip()

            name = name.replace('-', '_').upper()

            if name not in ('CONTENT_LENGTH', 'CONTENT_TYPE'):
                name = 'HTTP_' + name

            env[name] = value

        env['wsgi.input'] = buffreader
Example #33
0
File: views.py Project: peval/pywb
 def render_response(self, **kwargs):
     template_result = self.render_to_string(**kwargs)
     status = kwargs.get("status", "200 OK")
     content_type = kwargs.get("content_type", "text/html; charset=utf-8")
     return WbResponse.text_response(template_result.encode("utf-8"), status=status, content_type=content_type)
Example #34
0
 def handle_magic_page(self, env):
     coll, ts = self.get_proxy_coll_ts(env)
     ip = self._get_ip(env)
     res = json.dumps({"ip": ip, "coll": coll, "ts": ts})
     return WbResponse.text_response(res, content_type="application/json")
Example #35
0
    def handle_connect(self, env):
        sock = self.get_request_socket(env)
        if not sock:
            return WbResponse.text_response('HTTPS Proxy Not Supported',
                                            '405 HTTPS Proxy Not Supported')

        sock.send('HTTP/1.0 200 Connection Established\r\n')
        sock.send('Server: pywb proxy\r\n')
        sock.send('\r\n')

        hostname, port = env['REL_REQUEST_URI'].split(':')

        if not self.use_wildcard:
            certfile = self.ca.cert_for_host(hostname)
        else:
            certfile = self.ca.get_wildcard_cert(hostname)

        try:
            ssl_sock = ssl.wrap_socket(sock,
                                       server_side=True,
                                       certfile=certfile,
                                       #ciphers="ALL",
                                       suppress_ragged_eofs=False,
                                       ssl_version=ssl.PROTOCOL_SSLv23
                                       )
            env['pywb.proxy_ssl_sock'] = ssl_sock

            buffreader = BufferedReader(ssl_sock, block_size=self.BLOCK_SIZE)

            statusline = buffreader.readline().rstrip()

        except Exception as se:
            raise BadRequestException(se.message)

        statusparts = statusline.split(' ')

        if len(statusparts) < 3:
            raise BadRequestException('Invalid Proxy Request: ' + statusline)

        env['REQUEST_METHOD'] = statusparts[0]
        env['REL_REQUEST_URI'] = ('https://' +
                                  env['REL_REQUEST_URI'].replace(':443', '') +
                                  statusparts[1])

        env['SERVER_PROTOCOL'] = statusparts[2].strip()

        env['pywb.proxy_scheme'] = 'https'

        env['pywb.proxy_host'] = hostname
        env['pywb.proxy_port'] = port
        env['pywb.proxy_req_uri'] = statusparts[1]

        queryparts = env['REL_REQUEST_URI'].split('?', 1)
        env['PATH_INFO'] = queryparts[0]
        env['QUERY_STRING'] = queryparts[1] if len(queryparts) > 1 else ''
        env['pywb.proxy_query'] = env['QUERY_STRING']

        while True:
            line = buffreader.readline()
            if line:
                line = line.rstrip()

            if not line:
                break

            parts = line.split(':', 1)
            if len(parts) < 2:
                continue

            name = parts[0].strip()
            value = parts[1].strip()

            name = name.replace('-', '_').upper()

            if name not in ('CONTENT_LENGTH', 'CONTENT_TYPE'):
                name = 'HTTP_' + name

            env[name] = value

        remain = buffreader.rem_length()
        if remain > 0:
            remainder = buffreader.read(self.BLOCK_SIZE)
            env['wsgi.input'] = BufferedReader(ssl_sock,
                                               block_size=self.BLOCK_SIZE,
                                               starting_data=remainder)
Example #36
0
 def handle_magic_page(self, env):
     coll, ts = self.get_proxy_coll_ts(env)
     ip = self._get_ip(env)
     res = json.dumps({'ip': ip, 'coll': coll, 'ts': ts})
     return WbResponse.text_response(res, content_type='application/json')
Example #37
0
    def render_content(self, wb_url, kwargs, environ):
        wb_url = WbUrl(wb_url)

        host_prefix = self.get_host_prefix(environ)
        rel_prefix = self.get_rel_prefix(environ)
        full_prefix = host_prefix + rel_prefix

        resp = self.handle_custom_response(environ, wb_url,
                                           full_prefix, host_prefix, kwargs)
        if resp is not None:
            content_type = 'text/html'

            # if not replay outer frame, specify utf-8 charset
            if not self.is_framed_replay(wb_url):
                content_type += '; charset=utf-8'

            return WbResponse.text_response(resp, content_type=content_type)

        urlrewriter = UrlRewriter(wb_url,
                                  prefix=full_prefix,
                                  full_prefix=full_prefix,
                                  rel_prefix=rel_prefix)

        self.unrewrite_referrer(environ)

        urlkey = canonicalize(wb_url.url)

        inputreq = RewriteInputRequest(environ, urlkey, wb_url.url,
                                       self.content_rewriter)

        inputreq.include_post_query(wb_url.url)

        mod_url = None
        use_206 = False
        rangeres = None

        readd_range = False
        async_record_url = None

        if kwargs.get('type') in ('record', 'patch'):
            rangeres = inputreq.extract_range()

            if rangeres:
                mod_url, start, end, use_206 = rangeres

                # if bytes=0- Range request,
                # simply remove the range and still proxy
                if start == 0 and not end and use_206:
                    wb_url.url = mod_url
                    inputreq.url = mod_url

                    del environ['HTTP_RANGE']
                    readd_range = True
                else:
                    async_record_url = mod_url

        skip = async_record_url is not None

        setcookie_headers = None
        if self.cookie_tracker:
            cookie_key = self.get_cookie_key(kwargs)
            res = self.cookie_tracker.get_cookie_headers(wb_url.url, cookie_key)
            inputreq.extra_cookie, setcookie_headers = res

        r = self._do_req(inputreq, wb_url, kwargs, skip)

        if r.status_code >= 400:
            error = None
            try:
                error = r.raw.read()
                r.raw.close()
            except:
                pass

            if error:
                error = error.decode('utf-8')
            else:
                error = ''

            details = dict(args=kwargs, error=error)
            raise UpstreamException(r.status_code, url=wb_url.url, details=details)

        if async_record_url:
            environ.pop('HTTP_RANGE', '')
            gevent.spawn(self._do_async_req,
                         inputreq,
                         async_record_url,
                         wb_url,
                         kwargs,
                         False)

        record = self.loader.parse_record_stream(r.raw)

        cdx = CDXObject()
        cdx['urlkey'] = urlkey
        cdx['timestamp'] = http_date_to_timestamp(r.headers.get('Memento-Datetime'))
        cdx['url'] = wb_url.url

        self._add_custom_params(cdx, r.headers, kwargs)

        if readd_range:
            content_length = (record.status_headers.
                              get_header('Content-Length'))
            try:
                content_length = int(content_length)
                record.status_headers.add_range(0, content_length,
                                                   content_length)
            except (ValueError, TypeError):
                pass

        if self.is_ajax(environ):
            head_insert_func = None
            urlrewriter.rewrite_opts['is_ajax'] = True
        else:
            top_url = self.get_top_url(full_prefix, wb_url, cdx, kwargs)
            head_insert_func = (self.head_insert_view.
                                    create_insert_func(wb_url,
                                                       full_prefix,
                                                       host_prefix,
                                                       top_url,
                                                       environ,
                                                       self.framed_replay))

        cookie_rewriter = None
        if self.cookie_tracker:
            cookie_rewriter = self.cookie_tracker.get_rewriter(urlrewriter,
                                                               cookie_key)

        result = self.content_rewriter.rewrite_content(urlrewriter,
                                               record.status_headers,
                                               record.stream,
                                               head_insert_func,
                                               urlkey,
                                               cdx,
                                               cookie_rewriter,
                                               environ)

        status_headers, gen, is_rw = result

        if setcookie_headers:
            status_headers.headers.extend(setcookie_headers)

        return WbResponse(status_headers, gen)
Example #38
0
    def handle_error(self, environ, ue):
        error_html = self.error_view.render_to_string(environ,
                                                      err_msg=ue.url,
                                                      err_details=ue.msg)

        return WbResponse.text_response(error_html, content_type='text/html')
Example #39
0
 def __call__(self, wbrequest):
     return WbResponse.text_response(str(wbrequest))
Example #40
0
 def __call__(self, wbrequest):
     return WbResponse.text_response(str(wbrequest))