コード例 #1
0
ファイル: test_rewrite_live.py プロジェクト: chdorner/pywb
def test_forwarded_scheme():
    rewriter = LiveRewriter()
    env = {'HTTP_X_FORWARDED_PROTO': 'https', 'Other': 'Value'}

    req_headers = rewriter.translate_headers('http://example.com/', 'com,example)/', env)

    assert req_headers == {'X-Forwarded-Proto': 'http'}
コード例 #2
0
ファイル: test_rewrite_live.py プロジェクト: Cloudxtreme/pywb
def test_forwarded_scheme():
    rewriter = LiveRewriter()
    env = {'HTTP_X_FORWARDED_PROTO': 'https', 'Other': 'Value'}

    req_headers = rewriter.translate_headers('http://example.com/', 'com,example)/', env)

    assert req_headers == {'X-Forwarded-Proto': 'http'}
コード例 #3
0
ファイル: test_rewrite_live.py プロジェクト: Cloudxtreme/pywb
def test_csrf_token_headers():
    rewriter = LiveRewriter()
    env = {'HTTP_X_CSRFTOKEN': 'wrong', 'HTTP_COOKIE': 'csrftoken=foobar'}

    req_headers = rewriter.translate_headers('http://example.com/', 'com,example)/', env)

    assert req_headers == {'X-CSRFToken': 'foobar', 'Cookie': 'csrftoken=foobar'}
コード例 #4
0
ファイル: test_rewrite_live.py プロジェクト: chdorner/pywb
def test_csrf_token_headers():
    rewriter = LiveRewriter()
    env = {'HTTP_X_CSRFTOKEN': 'wrong', 'HTTP_COOKIE': 'csrftoken=foobar'}

    req_headers = rewriter.translate_headers('http://example.com/', 'com,example)/', env)

    assert req_headers == {'X-CSRFToken': 'foobar', 'Cookie': 'csrftoken=foobar'}
コード例 #5
0
def test_req_cookie_rewrite_3():
    rewriter = LiveRewriter()
    env = {}

    urlkey = 'example,example,test)/'
    url = 'test.example.example/'

    req_headers = rewriter.translate_headers(url, urlkey, env)

    assert req_headers == {'Cookie': '; FOO=&bar=1'}
コード例 #6
0
ファイル: test_rewrite_live.py プロジェクト: chdorner/pywb
def test_req_cookie_rewrite_3():
    rewriter = LiveRewriter()
    env = {}

    urlkey = 'example,example,test)/'
    url = 'test.example.example/'

    req_headers = rewriter.translate_headers(url, urlkey, env)

    assert req_headers == {'Cookie': '; FOO=&bar=1'}
コード例 #7
0
class RewriteHandler(SearchPageWbUrlHandler):

    LIVE_COOKIE = 'pywb.timestamp={0}; max-age=60'

    def __init__(self, config):
        super(RewriteHandler, self).__init__(config)

        default_proxy = config.get('proxyhostport')
        self.rewriter = LiveRewriter(is_framed_replay=self.is_frame_mode,
                                     default_proxy=default_proxy)

        self.head_insert_view = HeadInsertView.init_from_config(config)

        self.live_cookie = config.get('live-cookie', self.LIVE_COOKIE)

    def handle_request(self, wbrequest):
        try:
            return self.render_content(wbrequest)

        except Exception as exc:
            import traceback
            err_details = traceback.format_exc(exc)
            print err_details

            url = wbrequest.wb_url.url
            msg = 'Could not load the url from the live web: ' + url
            raise LiveResourceException(msg=msg, url=url)

    def _live_request_headers(self, wbrequest):
        return {}

    def render_content(self, wbrequest):
        head_insert_func = self.head_insert_view.create_insert_func(wbrequest)
        req_headers = self._live_request_headers(wbrequest)

        ref_wburl_str = wbrequest.extract_referrer_wburl_str()
        if ref_wburl_str:
            wbrequest.env['REL_REFERER'] = WbUrl(ref_wburl_str).url

        result = self.rewriter.fetch_request(wbrequest.wb_url.url,
                                             wbrequest.urlrewriter,
                                             head_insert_func=head_insert_func,
                                             req_headers=req_headers,
                                             env=wbrequest.env)

        return self._make_response(wbrequest, *result)

    def _make_response(self, wbrequest, status_headers, gen, is_rewritten):
        # if cookie set, pass recorded timestamp info via cookie
        # so that client side may be able to access it
        # used by framed mode to update frame banner
        if self.live_cookie:
            cdx = wbrequest.env['pywb.cdx']
            value = self.live_cookie.format(cdx['timestamp'])
            status_headers.headers.append(('Set-Cookie', value))

        return WbResponse(status_headers, gen)

    def __str__(self):
        return 'Live Web Rewrite Handler'
コード例 #8
0
    def __init__(self, config):
        super(RewriteHandler, self).__init__(config)

        proxyhostport = config.get('proxyhostport')
        self.rewriter = LiveRewriter(is_framed_replay=self.is_frame_mode,
                                     proxies=proxyhostport)

        self.proxies = self.rewriter.proxies

        self.head_insert_view = HeadInsertView.init_from_config(config)

        self.live_cookie = config.get('live-cookie', self.LIVE_COOKIE)

        self.verify = config.get('verify_ssl', True)

        self.ydl = None

        self._cache = None
コード例 #9
0
    def __init__(self, config):
        super(RewriteHandler, self).__init__(config)

        default_proxy = config.get('proxyhostport')
        self.rewriter = LiveRewriter(is_framed_replay=self.is_frame_mode,
                                     default_proxy=default_proxy)

        self.head_insert_view = HeadInsertView.init_from_config(config)

        self.live_cookie = config.get('live-cookie', self.LIVE_COOKIE)
コード例 #10
0
    def __init__(self, config):
        super(RewriteHandler, self).__init__(config)

        proxyhostport = config.get('proxyhostport')
        self.rewriter = LiveRewriter(is_framed_replay=self.is_frame_mode,
                                     proxies=proxyhostport)

        self.proxies = self.rewriter.proxies

        self.head_insert_view = HeadInsertView.init_from_config(config)

        self.live_cookie = config.get('live-cookie', self.LIVE_COOKIE)

        self.verify = config.get('verify_ssl', True)

        self.ydl = None

        self._cache = None
コード例 #11
0
class RewriteHandler(SearchPageWbUrlHandler):

    LIVE_COOKIE = 'pywb.timestamp={0}; max-age=60'

    YT_DL_TYPE = 'application/vnd.youtube-dl_formats+json'

    youtubedl = None

    def __init__(self, config):
        super(RewriteHandler, self).__init__(config)

        proxyhostport = config.get('proxyhostport')
        self.rewriter = LiveRewriter(is_framed_replay=self.is_frame_mode,
                                     proxies=proxyhostport)

        self.proxies = self.rewriter.proxies

        self.head_insert_view = HeadInsertView.init_from_config(config)

        self.live_cookie = config.get('live-cookie', self.LIVE_COOKIE)

        self.verify = config.get('verify_ssl', True)

        self.ydl = None

        self._cache = None

    def handle_request(self, wbrequest):
        try:
            return self.render_content(wbrequest)

        except Exception as exc:
            import traceback
            err_details = traceback.format_exc(exc)
            print err_details

            url = wbrequest.wb_url.url
            msg = 'Could not load the url from the live web: ' + url
            raise LiveResourceException(msg=msg, url=url)

    def _live_request_headers(self, wbrequest):
        return {}

    def render_content(self, wbrequest):
        if wbrequest.wb_url.mod == 'vi_':
            return self._get_video_info(wbrequest)

        head_insert_func = self.head_insert_view.create_insert_func(wbrequest)
        req_headers = self._live_request_headers(wbrequest)

        ref_wburl_str = wbrequest.extract_referrer_wburl_str()
        if ref_wburl_str:
            wbrequest.env['REL_REFERER'] = WbUrl(ref_wburl_str).url

        ignore_proxies = False
        use_206 = False
        url = None
        rangeres = None

        readd_range = False
        cache_key = None

        if self.proxies:
            rangeres = wbrequest.extract_range()

            if rangeres:
                url, start, end, use_206 = rangeres

                # if bytes=0- Range request,
                # simply remove the range and still proxy
                if start == 0 and not end and use_206:
                    wbrequest.wb_url.url = url
                    del wbrequest.env['HTTP_RANGE']
                    readd_range = True
                else:
                    # disables proxy
                    ignore_proxies = True

                    # sets cache_key only if not already cached
                    cache_key = self._get_cache_key('r:', url)

        result = self.rewriter.fetch_request(wbrequest.wb_url.url,
                                             wbrequest.urlrewriter,
                                             head_insert_func=head_insert_func,
                                             req_headers=req_headers,
                                             env=wbrequest.env,
                                             ignore_proxies=ignore_proxies,
                                             verify=self.verify)

        wbresponse = self._make_response(wbrequest, *result)

        if readd_range:
            content_length = (
                wbresponse.status_headers.get_header('Content-Length'))
            try:
                content_length = int(content_length)
                wbresponse.status_headers.add_range(0, content_length,
                                                    content_length)
            except (ValueError, TypeError):
                pass

        if cache_key:
            self._add_proxy_ping(cache_key, url, wbrequest, wbresponse)

        if rangeres:
            referrer = wbrequest.env.get('REL_REFERER')

            # also ping video info
            if referrer:
                try:
                    resp = self._get_video_info(wbrequest,
                                                info_url=referrer,
                                                video_url=url)
                except:
                    print('Error getting video info')

        return wbresponse

    def _make_response(self, wbrequest, status_headers, gen, is_rewritten):
        # if cookie set, pass recorded timestamp info via cookie
        # so that client side may be able to access it
        # used by framed mode to update frame banner
        if self.live_cookie:
            cdx = wbrequest.env.get('pywb.cdx')
            if cdx:
                value = self.live_cookie.format(cdx['timestamp'])
                status_headers.headers.append(('Set-Cookie', value))

        return WbResponse(status_headers, gen)

    def _get_cache_key(self, prefix, url):
        if not self._cache:
            self._cache = create_cache()

        key = self.create_cache_key(prefix, url)

        if key in self._cache:
            return None

        return key

    @staticmethod
    def create_cache_key(prefix, url):
        hash_ = hashlib.md5()
        hash_.update(url)
        key = hash_.hexdigest()
        key = prefix + key
        return key

    def _add_proxy_ping(self, key, url, wbrequest, wbresponse):
        def do_ping():
            headers = self._live_request_headers(wbrequest)
            headers['Connection'] = 'close'

            try:
                # mark as pinged
                self._cache[key] = '1'

                resp = requests.get(url=url,
                                    headers=headers,
                                    proxies=self.proxies,
                                    verify=False,
                                    stream=True)

                # don't actually read whole response,
                # proxy response for writing it
                resp.close()
            except:
                del self._cache[key]
                raise

        def wrap_buff_gen(gen):
            for x in gen:
                yield x

            try:
                do_ping()
            except:
                pass

        #do_ping()
        wbresponse.body = wrap_buff_gen(wbresponse.body)
        return wbresponse

    def _get_video_info(self, wbrequest, info_url=None, video_url=None):
        if not self.youtubedl:
            self.youtubedl = YoutubeDLWrapper()

        if not video_url:
            video_url = wbrequest.wb_url.url

        if not info_url:
            info_url = wbrequest.wb_url.url

        cache_key = None
        if self.proxies:
            cache_key = self._get_cache_key('v:', video_url)

        info = self.youtubedl.extract_info(video_url)

        #if info and info.formats and len(info.formats) == 1:

        content_type = self.YT_DL_TYPE
        metadata = json.dumps(info)

        if (self.proxies and cache_key):
            headers = self._live_request_headers(wbrequest)
            headers['Content-Type'] = content_type

            info_url = HttpsUrlRewriter.remove_https(info_url)

            response = requests.request(method='PUTMETA',
                                        url=info_url,
                                        data=metadata,
                                        headers=headers,
                                        proxies=self.proxies,
                                        verify=False)

            self._cache[cache_key] = '1'

        return WbResponse.text_response(metadata, content_type=content_type)
コード例 #12
0
class RewriteHandler(SearchPageWbUrlHandler):

    LIVE_COOKIE = 'pywb.timestamp={0}; max-age=60'

    YT_DL_TYPE = 'application/vnd.youtube-dl_formats+json'

    youtubedl = None

    def __init__(self, config):
        super(RewriteHandler, self).__init__(config)

        proxyhostport = config.get('proxyhostport')
        self.rewriter = LiveRewriter(is_framed_replay=self.is_frame_mode,
                                     proxies=proxyhostport)

        self.proxies = self.rewriter.proxies

        self.head_insert_view = HeadInsertView.init_from_config(config)

        self.live_cookie = config.get('live-cookie', self.LIVE_COOKIE)

        self.verify = config.get('verify_ssl', True)

        self.ydl = None

        self._cache = None

    def handle_request(self, wbrequest):
        try:
            return self.render_content(wbrequest)

        except Exception as exc:
            import traceback
            err_details = traceback.format_exc(exc)
            print err_details

            url = wbrequest.wb_url.url
            msg = 'Could not load the url from the live web: ' + url
            raise LiveResourceException(msg=msg, url=url)

    def _live_request_headers(self, wbrequest):
        return {}

    def render_content(self, wbrequest):
        if wbrequest.wb_url.mod == 'vi_':
            return self._get_video_info(wbrequest)

        head_insert_func = self.head_insert_view.create_insert_func(wbrequest)
        req_headers = self._live_request_headers(wbrequest)

        ref_wburl_str = wbrequest.extract_referrer_wburl_str()
        if ref_wburl_str:
            wbrequest.env['REL_REFERER'] = WbUrl(ref_wburl_str).url

        ignore_proxies = False
        use_206 = False
        url = None
        rangeres = None

        readd_range = False
        cache_key = None

        if self.proxies:
            rangeres = wbrequest.extract_range()

            if rangeres:
                url, start, end, use_206 = rangeres

                # if bytes=0- Range request,
                # simply remove the range and still proxy
                if start == 0 and not end and use_206:
                    wbrequest.wb_url.url = url
                    del wbrequest.env['HTTP_RANGE']
                    readd_range = True
                else:
                    # disables proxy
                    ignore_proxies = True

                    # sets cache_key only if not already cached
                    cache_key = self._get_cache_key('r:', url)

        result = self.rewriter.fetch_request(wbrequest.wb_url.url,
                                             wbrequest.urlrewriter,
                                             head_insert_func=head_insert_func,
                                             req_headers=req_headers,
                                             env=wbrequest.env,
                                             ignore_proxies=ignore_proxies,
                                             verify=self.verify)

        wbresponse = self._make_response(wbrequest, *result)

        if readd_range:
            content_length = (wbresponse.status_headers.
                              get_header('Content-Length'))
            try:
                content_length = int(content_length)
                wbresponse.status_headers.add_range(0, content_length,
                                                    content_length)
            except (ValueError, TypeError):
                pass

        if cache_key:
            self._add_proxy_ping(cache_key, url, wbrequest, wbresponse)

        if rangeres:
            referrer = wbrequest.env.get('REL_REFERER')

            # also ping video info
            if referrer:
                try:
                    resp = self._get_video_info(wbrequest,
                                                info_url=referrer,
                                                video_url=url)
                except:
                    print('Error getting video info')

        return wbresponse

    def _make_response(self, wbrequest, status_headers, gen, is_rewritten):
        # if cookie set, pass recorded timestamp info via cookie
        # so that client side may be able to access it
        # used by framed mode to update frame banner
        if self.live_cookie:
            cdx = wbrequest.env.get('pywb.cdx')
            if cdx:
                value = self.live_cookie.format(cdx['timestamp'])
                status_headers.headers.append(('Set-Cookie', value))

        return WbResponse(status_headers, gen)

    def _get_cache_key(self, prefix, url):
        if not self._cache:
            self._cache = create_cache()

        key = self.create_cache_key(prefix, url)

        if key in self._cache:
            return None

        return key

    @staticmethod
    def create_cache_key(prefix, url):
        hash_ = hashlib.md5()
        hash_.update(url)
        key = hash_.hexdigest()
        key = prefix + key
        return key

    def _add_proxy_ping(self, key, url, wbrequest, wbresponse):
        def do_ping():
            headers = self._live_request_headers(wbrequest)
            headers['Connection'] = 'close'

            try:
                # mark as pinged
                self._cache[key] = '1'

                resp = requests.get(url=url,
                                    headers=headers,
                                    proxies=self.proxies,
                                    verify=False,
                                    stream=True)

                # don't actually read whole response,
                # proxy response for writing it
                resp.close()
            except:
                del self._cache[key]
                raise

        def wrap_buff_gen(gen):
            for x in gen:
                yield x

            try:
                do_ping()
            except:
                pass

        #do_ping()
        wbresponse.body = wrap_buff_gen(wbresponse.body)
        return wbresponse

    def _get_video_info(self, wbrequest, info_url=None, video_url=None):
        if not self.youtubedl:
            self.youtubedl = YoutubeDLWrapper()

        if not video_url:
            video_url = wbrequest.wb_url.url

        if not info_url:
            info_url = wbrequest.wb_url.url

        cache_key = None
        if self.proxies:
            cache_key = self._get_cache_key('v:', video_url)

        info = self.youtubedl.extract_info(video_url)

        #if info and info.formats and len(info.formats) == 1:

        content_type = self.YT_DL_TYPE
        metadata = json.dumps(info)

        if (self.proxies and cache_key):
            headers = self._live_request_headers(wbrequest)
            headers['Content-Type'] = content_type

            info_url = HttpsUrlRewriter.remove_https(info_url)

            response = requests.request(method='PUTMETA',
                                        url=info_url,
                                        data=metadata,
                                        headers=headers,
                                        proxies=self.proxies,
                                        verify=False)

            self._cache[cache_key] = '1'

        return WbResponse.text_response(metadata, content_type=content_type)
コード例 #13
0
def get_rewritten(*args, **kwargs):
    status_headers, buff = LiveRewriter().get_rewritten(remote_only=False,
                                                        *args,
                                                        **kwargs)
    return status_headers, to_native_str(buff)
コード例 #14
0
ファイル: test_rewrite_live.py プロジェクト: Cloudxtreme/pywb
def get_rewritten(*args, **kwargs):
    return LiveRewriter().get_rewritten(remote_only=False, *args, **kwargs)
コード例 #15
0
def get_rewritten(*args, **kwargs):
    return LiveRewriter().get_rewritten(*args, **kwargs)