Example #1
0
    def proxy_fetch(self, env, url):
        """Proxy mode only endpoint that handles OPTIONS requests and COR fetches for Preservation Worker.

        Due to normal cross-origin browser restrictions in proxy mode, auto fetch worker cannot access the CSS rules
        of cross-origin style sheets and must re-fetch them in a manner that is CORS safe. This endpoint facilitates
        that by fetching the stylesheets for the auto fetch worker and then responds with its contents

        :param dict env: The WSGI environment dictionary
        :param str url:  The URL of the resource to be fetched
        :return: WbResponse that is either response to an Options request or the results of fetching url
        :rtype: WbResponse
        """
        if not self.is_proxy_enabled(env):
            # we are not in proxy mode so just respond with forbidden
            return WbResponse.text_response(
                'proxy mode must be enabled to use this endpoint',
                status='403 Forbidden')

        if env.get('REQUEST_METHOD') == 'OPTIONS':
            return WbResponse.options_response(env)

        # ensure full URL
        request_url = env['REQUEST_URI']
        # replace with /id_ so we do not get rewritten
        url = request_url.replace('/proxy-fetch', '/id_')
        # update WSGI environment object
        env['REQUEST_URI'] = self.proxy_coll + url
        env['PATH_INFO'] = env['PATH_INFO'].replace('/proxy-fetch',
                                                    self.proxy_coll + '/id_')
        # make request using normal serve_content
        response = self.serve_content(env, self.proxy_coll, url)
        # for WR
        if isinstance(response, WbResponse):
            response.add_access_control_headers(env=env)
        return response
Example #2
0
    def serve_cdx(self, environ, coll='$root'):
        """Make the upstream CDX query for a collection and response with the results of the query

        :param dict environ: The WSGI environment dictionary for the request
        :param str coll: The name of the collection this CDX query is for
        :return: The WbResponse containing the results of the CDX query
        :rtype: WbResponse
        """
        base_url = self.rewriterapp.paths['cdx-server']

        # if coll == self.all_coll:
        #    coll = '*'

        cdx_url = base_url.format(coll=coll)

        if environ.get('QUERY_STRING'):
            cdx_url += '&' if '?' in cdx_url else '?'
            cdx_url += environ.get('QUERY_STRING')

        if self.query_limit:
            cdx_url += '&' if '?' in cdx_url else '?'
            cdx_url += 'limit=' + str(self.query_limit)

        try:
            res = requests.get(cdx_url, stream=True)

            content_type = res.headers.get('Content-Type')

            return WbResponse.bin_stream(StreamIter(res.raw),
                                         content_type=content_type)

        except Exception as e:
            return WbResponse.text_response('Error: ' + str(e),
                                            status='400 Bad Request')
Example #3
0
    def put_custom_record(self, environ, coll):
        chunks = []
        while True:
            buff = environ["wsgi.input"].read()
            print("LEN", len(buff))
            if not buff:
                break

            chunks.append(buff)

        data = b"".join(chunks)

        params = dict(parse_qsl(environ.get("QUERY_STRING")))

        rec_type = "resource"

        headers = {"Content-Type": environ.get("CONTENT_TYPE", "text/plain")}

        target_uri = params.get("url")

        if not target_uri:
            return WbResponse.json_response({"error": "no url"})

        timestamp = params.get("timestamp")
        if timestamp:
            headers["WARC-Date"] = timestamp_to_iso_date(timestamp)

        put_url = self.custom_record_path.format(url=target_uri,
                                                 coll=coll,
                                                 rec_type=rec_type)
        res = requests.put(put_url, headers=headers, data=data)

        res = res.json()

        return WbResponse.json_response(res)
Example #4
0
    def proxy_fetch(self, env, url):
        """Proxy mode only endpoint that handles OPTIONS requests and COR fetches for Preservation Worker.

        Due to normal cross-origin browser restrictions in proxy mode, auto fetch worker cannot access the CSS rules
        of cross-origin style sheets and must re-fetch them in a manner that is CORS safe. This endpoint facilitates
        that by fetching the stylesheets for the auto fetch worker and then responds with its contents

        :param dict env: The WSGI environment dictionary
        :param str url:  The URL of the resource to be fetched
        :return: WbResponse that is either response to an Options request or the results of fetching url
        :rtype: WbResponse
        """
        if not self.is_proxy_enabled(env):
            # we are not in proxy mode so just respond with forbidden
            return WbResponse.text_response('proxy mode must be enabled to use this endpoint',
                                            status='403 Forbidden')

        if env.get('REQUEST_METHOD') == 'OPTIONS':
            return WbResponse.options_response(env)

        # ensure full URL
        request_url = env['REQUEST_URI']
        # replace with /id_ so we do not get rewritten
        url = request_url.replace('/proxy-fetch', '/id_')
        # update WSGI environment object
        env['REQUEST_URI'] = self.proxy_coll + url
        env['PATH_INFO'] = env['PATH_INFO'].replace('/proxy-fetch', self.proxy_coll + '/id_')
        # make request using normal serve_content
        response = self.serve_content(env, self.proxy_coll, url)
        # for WR
        if isinstance(response, WbResponse):
            response.add_access_control_headers(env=env)
        return response
Example #5
0
    def serve_cdx(self, environ, coll='$root'):
        """Make the upstream CDX query for a collection and response with the results of the query

        :param dict environ: The WSGI environment dictionary for the request
        :param str coll: The name of the collection this CDX query is for
        :return: The WbResponse containing the results of the CDX query
        :rtype: WbResponse
        """
        base_url = self.rewriterapp.paths['cdx-server']

        #if coll == self.all_coll:
        #    coll = '*'

        cdx_url = base_url.format(coll=coll)

        if environ.get('QUERY_STRING'):
            cdx_url += '&' if '?' in cdx_url else '?'
            cdx_url += environ.get('QUERY_STRING')

        try:
            res = requests.get(cdx_url, stream=True)

            content_type = res.headers.get('Content-Type')

            return WbResponse.bin_stream(StreamIter(res.raw),
                                         content_type=content_type)

        except Exception as e:
            return WbResponse.text_response('Error: ' + str(e), status='400 Bad Request')
Example #6
0
def test_wbresponse_options_response():
    res = WbResponse.options_response(dict(HTTP_ORIGIN='http://example.com'))
    assert ('Access-Control-Allow-Origin',
            'http://example.com') in res.status_headers.headers
    res = WbResponse.options_response(dict(HTTP_REFERER='http://example.com'))
    assert ('Access-Control-Allow-Origin',
            'http://example.com') in res.status_headers.headers
    res = WbResponse.options_response(dict())
    assert ('Access-Control-Allow-Origin', '*') in res.status_headers.headers
    res = WbResponse.options_response(dict(HTTP_ORIGIN=None))
    assert ('Access-Control-Allow-Origin', '*') in res.status_headers.headers
    res = WbResponse.options_response(dict(HTTP_REFERER=None))
    assert ('Access-Control-Allow-Origin', '*') in res.status_headers.headers
Example #7
0
    def serve_static(self, environ, coll='', filepath=''):
        """Serve a static file associated with a specific collection or one of pywb's own static assets

        :param dict environ: The WSGI environment dictionary for the request
        :param str coll: The collection the static file is associated with
        :param str filepath: The file path (relative to the collection) for the static assest
        :return: The WbResponse for the static asset
        :rtype: WbResponse
        """
        proxy_enabled = self.is_proxy_enabled(environ)
        if proxy_enabled and environ.get('REQUEST_METHOD') == 'OPTIONS':
            return WbResponse.options_response(environ)
        if coll:
            path = os.path.join(self.warcserver.root_dir, coll,
                                self.static_dir)
        else:
            path = self.static_dir

        environ['pywb.static_dir'] = path
        try:
            response = self.static_handler(environ, filepath)
            if proxy_enabled:
                response.add_access_control_headers(env=environ)
            return response
        except Exception:
            self.raise_not_found(environ, 'static_file_not_found', filepath)
Example #8
0
    def _check_refer_redirect(self, environ):
        """Returns a WbResponse for a HTTP 307 redirection if the HTTP referer header is the same as the HTTP host header

        :param dict environ: The WSGI environment dictionary for the request
        :return: WbResponse HTTP 307 redirection
        :rtype: WbResponse
        """
        referer = environ.get('HTTP_REFERER')
        if not referer:
            return

        host = environ.get('HTTP_HOST')
        if host not in referer:
            return

        inx = referer[1:].find('http')
        if not inx:
            inx = referer[1:].find('///')
            if inx > 0:
                inx + 1

        if inx < 0:
            return

        url = referer[inx + 1:]
        host = referer[:inx + 1]

        orig_url = environ['PATH_INFO']
        if environ.get('QUERY_STRING'):
            orig_url += '?' + environ['QUERY_STRING']

        full_url = host + urljoin(url, orig_url)
        return WbResponse.redir_response(full_url, '307 Redirect')
Example #9
0
    def serve_coll_page(self, environ, coll='$root'):
        """Render and serve a collections search page (search.html).

        :param dict environ: The WSGI environment dictionary for the request
        :param str coll: The name of the collection to serve the collections search page for
        :return: The WbResponse containing the collections search page
        :rtype: WbResponse
        """
        if not self.is_valid_coll(coll):
            self.raise_not_found(environ, 'coll_not_found', coll)

        self.setup_paths(environ, coll)

        coll_config = self.get_coll_config(coll)
        metadata = coll_config.get('metadata')

        view = BaseInsertView(self.rewriterapp.jinja_env, 'search.html')

        wb_prefix = environ.get('SCRIPT_NAME', '')
        if wb_prefix:
            wb_prefix += '/'

        content = view.render_to_string(environ,
                                        wb_prefix=wb_prefix,
                                        coll=coll,
                                        coll_config=coll_config,
                                        metadata=metadata)

        return WbResponse.text_response(
            content, content_type='text/html; charset="utf-8"')
Example #10
0
    def serve_listing(self, environ):
        result = {
            'fixed': self.warcserver.list_fixed_routes(),
            'dynamic': self.warcserver.list_dynamic_routes()
        }

        return WbResponse.json_response(result)
Example #11
0
    def _check_refer_redirect(self, environ):
        referer = environ.get('HTTP_REFERER')
        if not referer:
            return

        host = environ.get('HTTP_HOST')
        if host not in referer:
            return

        inx = referer[1:].find('http')
        if not inx:
            inx = referer[1:].find('///')
            if inx > 0:
                inx + 1

        if inx < 0:
            return

        url = referer[inx + 1:]
        host = referer[:inx + 1]

        orig_url = environ['PATH_INFO']
        if environ.get('QUERY_STRING'):
            orig_url += '?' + environ['QUERY_STRING']

        full_url = host + urljoin(url, orig_url)
        return WbResponse.redir_response(full_url, '307 Redirect')
Example #12
0
    def format_response(self,
                        response,
                        wb_url,
                        full_prefix,
                        is_timegate,
                        is_proxy,
                        timegate_closest_ts=None):
        memento_ts = None
        if not isinstance(response, WbResponse):
            content_type = 'text/html'

            # if not replay outer frame, specify utf-8 charset
            if not self.is_framed_replay(wb_url):
                content_type += '; charset=utf-8'
            else:
                memento_ts = timegate_closest_ts or wb_url.timestamp

            response = WbResponse.text_response(response,
                                                content_type=content_type)

        if self.enable_memento and response.status_headers.statusline.startswith(
                '200'):
            self._add_memento_links(wb_url.url,
                                    full_prefix,
                                    None,
                                    memento_ts,
                                    response.status_headers,
                                    is_timegate,
                                    is_proxy,
                                    is_memento=not is_timegate)
        return response
Example #13
0
    def serve_record(self, environ, coll='$root', url=''):
        if coll in self.warcserver.list_fixed_routes():
            return WbResponse.text_response(
                'Error: Can Not Record Into Custom Collection "{0}"'.format(
                    coll))

        return self.serve_content(environ, coll, url, record=True)
Example #14
0
    def _check_refer_redirect(self, environ):
        """Returns a WbResponse for a HTTP 307 redirection if the HTTP referer header is the same as the HTTP host header

        :param dict environ: The WSGI environment dictionary for the request
        :return: WbResponse HTTP 307 redirection
        :rtype: WbResponse
        """
        referer = environ.get('HTTP_REFERER')
        if not referer:
            return

        host = environ.get('HTTP_HOST')
        if host not in referer:
            return

        inx = referer[1:].find('http')
        if not inx:
            inx = referer[1:].find('///')

        if inx < 0:
            return

        url = referer[inx + 1:]
        host = referer[:inx + 1]

        orig_url = environ['PATH_INFO']
        if environ.get('QUERY_STRING'):
            orig_url += '?' + environ['QUERY_STRING']

        full_url = host + urljoin(url, orig_url)
        return WbResponse.redir_response(full_url, '307 Redirect')
Example #15
0
    def exit(self, environ=None):
        import uwsgi
        import signal

        resp = WbResponse.json_response({})
        os.kill(uwsgi.masterpid(), signal.SIGTERM)
        return resp
Example #16
0
    def get_wacz(self, environ, coll):
        # if self.pending_count != 0 or self.pending_size != 0:
        #    return WbResponse.json_response(
        #        {"error": "not_ready"}, status="404 Not Found"
        #    )

        params = dict(parse_qsl(environ.get("QUERY_STRING")))

        archive_dir = os.path.join("collections", coll, "archive")
        all_warcs = [
            os.path.join(archive_dir, name) for name in os.listdir(archive_dir)
        ]
        all_warcs.append("-o")
        all_warcs.append("/tmp/out/archive.wacz")

        url = params.get("url")
        if url:
            all_warcs.append("--url")
            all_warcs.append(url)

        try:
            wacz_main(all_warcs)
        except Exception as e:
            print(e)

        return WbResponse.json_response({"done": "/tmp/out/archive.wacz"})
Example #17
0
    def serve_coll_page(self, environ, coll='$root'):
        """Render and serve a collections search page (search.html).

        :param dict environ: The WSGI environment dictionary for the request
        :param str coll: The name of the collection to serve the collections search page for
        :return: The WbResponse containing the collections search page
        :rtype: WbResponse
        """
        if not self.is_valid_coll(coll):
            self.raise_not_found(environ, 'No handler for "/{0}"'.format(coll))

        self.setup_paths(environ, coll)

        metadata = self.get_metadata(coll)

        view = BaseInsertView(self.rewriterapp.jinja_env, 'search.html')

        wb_prefix = environ.get('SCRIPT_NAME')
        if wb_prefix:
            wb_prefix += '/'

        content = view.render_to_string(environ,
                                        wb_prefix=wb_prefix,
                                        metadata=metadata,
                                        coll=coll)

        return WbResponse.text_response(content, content_type='text/html; charset="utf-8"')
Example #18
0
    def serve_static(self, environ, coll='', filepath=''):
        """Serve a static file associated with a specific collection or one of pywb's own static assets

        :param dict environ: The WSGI environment dictionary for the request
        :param str coll: The collection the static file is associated with
        :param str filepath: The file path (relative to the collection) for the static assest
        :return: The WbResponse for the static asset
        :rtype: WbResponse
        """
        proxy_enabled = self.is_proxy_enabled(environ)
        if proxy_enabled and environ.get('REQUEST_METHOD') == 'OPTIONS':
            return WbResponse.options_response(environ)
        if coll:
            path = os.path.join(self.warcserver.root_dir, coll, self.static_dir)
        else:
            path = self.static_dir

        environ['pywb.static_dir'] = path
        try:
            response = self.static_handler(environ, filepath)
            if proxy_enabled:
                response.add_access_control_headers(env=environ)
            return response
        except:
            self.raise_not_found(environ, 'Static File Not Found: {0}'.format(filepath))
Example #19
0
def test_wbresponse_encode_stream():
    stream = [
        u'\u00c3'
    ]  # Unicode Character 'LATIN CAPITAL LETTER A WITH TILDE' (U+00C3)
    expected = [b'\xc3\x83']
    encoding_stream = WbResponse.encode_stream(stream)
    assert inspect.isgenerator(encoding_stream)
    assert list(encoding_stream) == expected
Example #20
0
def test_wbresponse_json_response():
    body = dict(pywb=1, wr=2)
    res = WbResponse.json_response(body)
    status_headers = res.status_headers
    assert status_headers.statusline == '200 OK'
    assert ('Content-Type',
            'application/json; charset=utf-8') in status_headers.headers
    assert json.loads(res.body[0]) == body
Example #21
0
    def _not_found_response(self, environ, url):
        resp = self.not_found_view.render_to_string(environ,
                                                    url=url,
                                                    err_msg="Not Found")

        return WbResponse.text_response(resp,
                                        status='404 Not Found',
                                        content_type='text/html')
Example #22
0
    def _error_response(self, environ, wbe):
        status = wbe.status()

        resp = self.error_view.render_to_string(environ,
                                                err_msg=wbe.url,
                                                err_details=wbe.msg,
                                                err_status=wbe.status_code)

        return WbResponse.text_response(resp, status=status, content_type='text/html')
Example #23
0
    def lock_clear_all(self, environ):
        redis = environ[SESSION_KEY].redis

        for sesh_key in redis.scan_iter(SESH_LIST.format('*')):
            redis.delete(sesh_key)

        for lock_key in redis.scan_iter('lock:*'):
            redis.delete(lock_key)

        return WbResponse.redir_response('/_locks')
Example #24
0
def test_wbresponse_text_stream():
    stream = [
        u'\u00c3'
    ]  # Unicode Character 'LATIN CAPITAL LETTER A WITH TILDE' (U+00C3)
    expected = [b'\xc3\x83']
    res = WbResponse.text_stream(stream, content_type='text/plain')
    status_headers = res.status_headers
    assert status_headers.statusline == '200 OK'
    assert ('Content-Type',
            'text/plain; charset=utf-8') in status_headers.headers
    assert inspect.isgenerator(res.body)
    assert list(res.body) == expected

    res = WbResponse.text_stream(stream)
    status_headers = res.status_headers
    assert status_headers.statusline == '200 OK'
    assert ('Content-Type',
            'text/plain; charset=utf-8') in status_headers.headers
    assert inspect.isgenerator(res.body)
    assert list(res.body) == expected
Example #25
0
    def send_redirect(self, new_path, url_parts, urlrewriter):
        scheme, netloc, path, query, frag = url_parts
        path = new_path
        url = urlunsplit((scheme, netloc, path, query, frag))
        resp = WbResponse.redir_response(urlrewriter.rewrite(url),
                                         '307 Temporary Redirect')

        if self.enable_memento:
            resp.status_headers['Link'] = MementoUtils.make_link(url, 'original')

        return resp
Example #26
0
    def put_custom_record(self, environ, coll="$root"):
        """ When recording, PUT a custom WARC record to the specified collection
        (Available only when recording)

        :param dict environ: The WSGI environment dictionary for the request
        :param str coll: The name of the collection the record is to be served from
        """
        chunks = []
        while True:
            buff = environ["wsgi.input"].read()
            if not buff:
                break

            chunks.append(buff)

        data = b"".join(chunks)

        params = dict(parse_qsl(environ.get("QUERY_STRING")))

        rec_type = "resource"

        headers = {"Content-Type": environ.get("CONTENT_TYPE", "text/plain")}

        target_uri = params.get("url")

        if not target_uri:
            return WbResponse.json_response({"error": "no url"},
                                            status="400 Bad Request")

        timestamp = params.get("timestamp")
        if timestamp:
            headers["WARC-Date"] = timestamp_to_iso_date(timestamp)

        put_url = self.put_custom_record_path.format(url=target_uri,
                                                     coll=coll,
                                                     rec_type=rec_type)
        res = requests.put(put_url, headers=headers, data=data)

        res = res.json()

        return WbResponse.json_response(res)
Example #27
0
    def _error_response(self,
                        environ,
                        msg='',
                        details='',
                        status='404 Not Found'):
        resp = self.error_view.render_to_string(environ,
                                                err_msg=msg,
                                                err_details=details)

        return WbResponse.text_response(resp,
                                        status=status,
                                        content_type='text/html')
Example #28
0
    def serve_listing(self, environ):
        """Serves the response for WARCServer fixed and dynamic listing (paths)

        :param dict environ: The WSGI environment dictionary for the request
        :return: WbResponse containing the frontend apps WARCServer URL paths
        :rtype: WbResponse
        """
        result = {'fixed': self.warcserver.list_fixed_routes(),
                  'dynamic': self.warcserver.list_dynamic_routes()
                 }

        return WbResponse.json_response(result)
Example #29
0
    def put_screenshot(self, environ, coll):
        self.ensure_coll_exists(coll)

        headers = {'Content-Type': environ.get('CONTENT_TYPE', 'text/plain')}

        query_data = parse_qs(environ.get('QUERY_STRING'))

        url = query_data.get('target_uri', [])
        if url:
            url = url[0]

        if not url:
            return WbResponse.json_response({'error': 'no target_uri'})

        put_url = self.screenshot_recorder_path.format(url=url, coll=coll)

        res = requests.put(put_url,
                           headers=headers,
                           data=environ['wsgi.input'])

        res = res.json()
        return WbResponse.json_response(res)
Example #30
0
    def serve_listing(self, environ):
        """Serves the response for WARCServer fixed and dynamic listing (paths)

        :param dict environ: The WSGI environment dictionary for the request
        :return: WbResponse containing the frontend apps WARCServer URL paths
        :rtype: WbResponse
        """
        result = {
            'fixed': self.warcserver.list_fixed_routes(),
            'dynamic': self.warcserver.list_dynamic_routes()
        }

        return WbResponse.json_response(result)
Example #31
0
    def serve_record(self, environ, coll='$root', url=''):
        """Serve a URL's content from a WARC/ARC record in replay mode or from the live web in
        live, proxy, and record mode.

        :param dict environ: The WSGI environment dictionary for the request
        :param str coll: The name of the collection the record is to be served from
        :param str url: The URL for the corresponding record to be served if it exists
        :return: WbResponse containing the contents of the record/URL
        :rtype: WbResponse
        """
        if coll in self.warcserver.list_fixed_routes():
            return WbResponse.text_response('Error: Can Not Record Into Custom Collection "{0}"'.format(coll))

        return self.serve_content(environ, coll, url, record=True)
Example #32
0
def test_resp_1():
    resp = vars(WbResponse.text_response('Test'))

    expected = {
        'body': [b'Test'],
        'status_headers':
        StatusAndHeaders(protocol='',
                         statusline='200 OK',
                         headers=[('Content-Type',
                                   'text/plain; charset=utf-8'),
                                  ('Content-Length', '4')])
    }

    assert (resp == expected)
Example #33
0
    def serve_cdx(self, environ, coll='$root'):
        base_url = self.rewriterapp.paths['cdx-server']

        #if coll == self.all_coll:
        #    coll = '*'

        cdx_url = base_url.format(coll=coll)

        if environ.get('QUERY_STRING'):
            cdx_url += '&' if '?' in cdx_url else '?'
            cdx_url += environ.get('QUERY_STRING')

        try:
            res = requests.get(cdx_url, stream=True)

            content_type = res.headers.get('Content-Type')

            return WbResponse.bin_stream(StreamIter(res.raw),
                                         content_type=content_type)

        except Exception as e:
            return WbResponse.text_response('Error: ' + str(e),
                                            status='400 Bad Request')
Example #34
0
def test_resp_3():

    resp = vars(WbResponse.redir_response('http://example.com/otherfile'))

    expected = {
        'body': [],
        'status_headers':
        StatusAndHeaders(protocol='',
                         statusline='302 Redirect',
                         headers=[('Location', 'http://example.com/otherfile'),
                                  ('Content-Length', '0')])
    }

    assert (resp == expected)
Example #35
0
    def put_record(self, environ, coll, target_uri_format, rec_type, params, data):
        self.ensure_coll_exists(coll)

        headers = {'Content-Type': environ.get('CONTENT_TYPE', 'text/plain')}

        url = params.get('url')

        if not url:
            return WbResponse.json_response({'error': 'no url'})

        timestamp = params.get('timestamp')
        if timestamp:
            headers['WARC-Date'] = timestamp_to_iso_date(timestamp)

        target_uri = target_uri_format.format(url=url)
        put_url = self.custom_record_path.format(
            url=target_uri, coll=coll, rec_type=rec_type
        )
        res = requests.put(put_url, headers=headers, data=data)

        res = res.json()

        return WbResponse.json_response(res)
Example #36
0
    def handle_request(self, environ, start_response):
        """Retrieves the route handler and calls the handler returning its the response

        :param dict environ: The WSGI environment dictionary for the request
        :param start_response:
        :return: The WbResponse for the request
        :rtype: WbResponse
        """
        urls = self.url_map.bind_to_environ(environ)
        try:
            endpoint, args = urls.match()

            self.rewriterapp.prepare_env(environ)

            # store original script_name (original prefix) before modifications are made
            environ['ORIG_SCRIPT_NAME'] = environ.get('SCRIPT_NAME')

            lang = args.pop('lang', '')
            if lang:
                pop_path_info(environ)
                environ['pywb_lang'] = lang

            response = endpoint(environ, **args)

        except RequestRedirect as rr:
            # if werkzeug throws this, likely a missing slash redirect
            # also check referrer here to avoid another redirect later
            redir = self._check_refer_redirect(environ)
            if redir:
                return redir(environ, start_response)

            response = WbResponse.redir_response(rr.new_url, '307 Redirect')

        except WbException as wbe:
            if wbe.status_code == 404:
                redir = self._check_refer_redirect(environ)
                if redir:
                    return redir(environ, start_response)

            response = self.rewriterapp.handle_error(environ, wbe)

        except Exception as e:
            if self.debug:
                traceback.print_exc()

            response = self.rewriterapp._error_response(
                environ, WbException('Internal Error: ' + str(e)))

        return response(environ, start_response)
Example #37
0
    def serve_home(self, environ):
        home_view = BaseInsertView(self.rewriterapp.jinja_env, 'index.html')
        fixed_routes = self.warcserver.list_fixed_routes()
        dynamic_routes = self.warcserver.list_dynamic_routes()

        routes = fixed_routes + dynamic_routes

        all_metadata = self.metadata_cache.get_all(dynamic_routes)

        content = home_view.render_to_string(environ,
                                             routes=routes,
                                             all_metadata=all_metadata)

        return WbResponse.text_response(
            content, content_type='text/html; charset="utf-8"')
Example #38
0
    def __call__(self, environ, url_str):
        url = url_str.split('?')[0]

        full_path = environ.get('pywb.static_dir')
        if full_path:
            full_path = os.path.join(full_path, url)
            if not os.path.isfile(full_path):
                full_path = None

        if not full_path:
            full_path = os.path.join(self.static_path, url)

        try:
            data = self.block_loader.load(full_path)

            data.seek(0, 2)
            size = data.tell()
            data.seek(0)
            headers = [('Content-Length', str(size))]

            reader = None

            if 'wsgi.file_wrapper' in environ:
                try:
                    reader = environ['wsgi.file_wrapper'](data)
                except:
                    pass

            if not reader:
                reader = iter(lambda: data.read(), b'')

            content_type = 'application/octet-stream'

            guessed = mimetypes.guess_type(full_path)
            if guessed[0]:
                content_type = guessed[0]

            return WbResponse.bin_stream(reader,
                                         content_type=content_type,
                                         headers=headers)

        except IOError:
            raise NotFoundException('Static File Not Found: ' +
                                    url_str)
Example #39
0
    def serve_home(self, environ):
        """Serves the home (/) view of pywb (not a collections)

        :param dict environ: The WSGI environment dictionary for the request
        :return: The WbResponse for serving the home (/) path
        :rtype: WbResponse
        """
        home_view = BaseInsertView(self.rewriterapp.jinja_env, 'index.html')
        fixed_routes = self.warcserver.list_fixed_routes()
        dynamic_routes = self.warcserver.list_dynamic_routes()

        routes = fixed_routes + dynamic_routes

        all_metadata = self.metadata_cache.get_all(dynamic_routes)

        content = home_view.render_to_string(environ,
                                             routes=routes,
                                             all_metadata=all_metadata)

        return WbResponse.text_response(content, content_type='text/html; charset="utf-8"')