Exemple #1
0
    def setup(self):
        self.requestlog = []
        self.cache = {}

        def make_httpd(app):
            proxyserv = ProxyServer(('', 0), ProxyRequest)
            proxyserv.requestlog = self.requestlog
            proxyserv.force_err = False
            self.proxyserv = proxyserv
            return proxyserv

        self.server = ServerThreadRunner(make_httpd)

        config = dict(collections=dict(rewrite='$liveweb'),
                      framed_replay=True,
                      proxyhostport=self.server.proxy_dict)

        self.app = init_app(create_wb_router, load_yaml=False, config=config)

        def create_cache():
            return self.cache

        pywb.webapp.live_rewrite_handler.create_cache = create_cache

        self.testapp = webtest.TestApp(self.app)
def setup_module():
    openssl_support = pytest.importorskip("OpenSSL")

    def make_httpd(app):
        return make_server('', 0, app)

    global server
    server = ServerThreadRunner(make_httpd, TEST_CONFIG)
Exemple #3
0
def setup_module():
    certauth = pytest.importorskip("certauth")

    def make_httpd(app):
        return make_server('', 0, app)

    global server
    server = ServerThreadRunner(make_httpd, TEST_CONFIG)
Exemple #4
0
    def setup(self):
        self.requestlog = []
        self.cache = {}

        def make_httpd(app):
            proxyserv = ProxyServer(("", 0), ProxyRequest)
            proxyserv.requestlog = self.requestlog
            proxyserv.force_err = False
            self.proxyserv = proxyserv
            return proxyserv

        self.server = ServerThreadRunner(make_httpd)

        config = dict(collections=dict(rewrite="$liveweb"), framed_replay=True, proxyhostport=self.server.proxy_dict)

        self.app = init_app(create_wb_router, load_yaml=False, config=config)

        def create_cache():
            return self.cache

        pywb.webapp.live_rewrite_handler.create_cache = create_cache

        self.testapp = webtest.TestApp(self.app)
Exemple #5
0
class TestProxyLiveRewriter:
    def setup(self):
        self.requestlog = []
        self.cache = {}

        def make_httpd(app):
            proxyserv = ProxyServer(('', 0), ProxyRequest)
            proxyserv.requestlog = self.requestlog
            proxyserv.force_err = False
            self.proxyserv = proxyserv
            return proxyserv

        self.server = ServerThreadRunner(make_httpd)

        config = dict(collections=dict(rewrite='$liveweb'),
                      framed_replay=True,
                      proxyhostport=self.server.proxy_dict)

        self.app = init_app(create_wb_router, load_yaml=False, config=config)

        def create_cache():
            return self.cache

        pywb.webapp.live_rewrite_handler.create_cache = create_cache

        self.testapp = webtest.TestApp(self.app)

    def teardown(self):
        self.server.stop_thread()

    def test_echo_proxy_referrer(self):
        headers = [('User-Agent', 'python'),
                   ('Referer', 'http://localhost:80/rewrite/other.example.com')
                   ]
        resp = self.testapp.get('/rewrite/http://example.com/',
                                headers=headers)

        # ensure just one request
        assert len(self.requestlog) == 1

        # equal to returned response (echo)
        assert self.requestlog[0] == resp.body
        assert resp.headers['x-archive-orig-x-proxy'] == 'test'

        assert resp.body.startswith('GET http://example.com/ HTTP/1.1')
        assert 'referer: http://other.example.com' in resp.body

        assert len(self.cache) == 0

    def test_echo_proxy_start_unbounded_remove_range(self):
        headers = [('Range', 'bytes=0-')]
        resp = self.testapp.get('/rewrite/http://example.com/',
                                headers=headers)

        # actual response is with range
        assert resp.status_int == 206
        assert 'Content-Range' in resp.headers
        assert resp.headers['Accept-Ranges'] == 'bytes'

        assert len(self.requestlog) == 1

        # proxied, but without range
        assert self.requestlog[0] == resp.body
        assert resp.headers['x-archive-orig-x-proxy'] == 'test'

        assert self.requestlog[0].startswith(
            'GET http://example.com/ HTTP/1.1')
        assert 'range: ' not in self.requestlog[0]

        assert len(self.cache) == 0

    def test_echo_proxy_bounded_noproxy_range(self):
        headers = [('Range', 'bytes=10-1000')]
        resp = self.testapp.get('/rewrite/http://example.com/foobar',
                                headers=headers)

        # actual response is with range
        assert resp.status_int == 206
        assert 'Content-Range' in resp.headers
        assert resp.headers['Accept-Ranges'] == 'bytes'

        # not from proxy
        assert 'x-archive-orig-x-proxy' not in resp.headers

        # proxy receives a request also, but w/o range
        assert len(self.requestlog) == 1

        # proxy receives different request than our response
        assert self.requestlog[0] != resp.body

        assert self.requestlog[0].startswith(
            'GET http://example.com/foobar HTTP/1.1')

        # no range request
        assert 'range: ' not in self.requestlog[0]

        # r: key cached
        assert len(self.cache) == 1
        assert RewriteHandler.create_cache_key(
            'r:', 'http://example.com/foobar') in self.cache

        # Second Request
        # clear log
        self.requestlog.pop()
        headers = [('Range', 'bytes=101-150')]
        resp = self.testapp.get('/rewrite/http://example.com/foobar',
                                headers=headers)

        # actual response is with range
        assert resp.status_int == 206
        assert 'Content-Range' in resp.headers
        assert resp.headers['Accept-Ranges'] == 'bytes'

        # not from proxy
        assert 'x-archive-orig-x-proxy' not in resp.headers

        # already pinged proxy, no additional requests set to proxy
        assert len(self.requestlog) == 0
        assert len(self.cache) == 1

    def test_echo_proxy_video_info(self):
        resp = self.testapp.get(
            '/rewrite/vi_/https://www.youtube.com/watch?v=DjFZyFWSt1M')
        assert resp.status_int == 200
        assert resp.content_type == RewriteHandler.YT_DL_TYPE, resp.content_type

        assert len(self.requestlog) == 1
        assert self.requestlog[0].startswith(
            'PUTMETA http://www.youtube.com/watch?v=DjFZyFWSt1M HTTP/1.1')

        # second request, not sent to proxy
        resp = self.testapp.get(
            '/rewrite/vi_/https://www.youtube.com/watch?v=DjFZyFWSt1M')
        assert len(self.requestlog) == 1

        # v: video info cache
        assert len(self.cache) == 1
        assert RewriteHandler.create_cache_key(
            'v:', 'https://www.youtube.com/watch?v=DjFZyFWSt1M') in self.cache

    def test_echo_proxy_video_with_referrer(self):
        headers = [('Range', 'bytes=1000-2000'),
                   ('Referer',
                    'http://localhost:80/rewrite/https://example.com/')]
        resp = self.testapp.get(
            '/rewrite/http://www.youtube.com/watch?v=DjFZyFWSt1M',
            headers=headers)

        # not from proxy
        assert 'x-archive-orig-x-proxy' not in resp.headers

        # proxy receives two requests
        assert len(self.requestlog) == 2

        # first, a video info request recording the page
        assert self.requestlog[0].startswith(
            'PUTMETA http://example.com/ HTTP/1.1')

        # second, non-ranged request for page
        assert self.requestlog[1].startswith(
            'GET http://www.youtube.com/watch?v=DjFZyFWSt1M HTTP/1.1')
        assert 'range' not in self.requestlog[1]

        # both video info and range cached
        assert len(self.cache) == 2
        assert RewriteHandler.create_cache_key(
            'v:', 'http://www.youtube.com/watch?v=DjFZyFWSt1M') in self.cache
        assert RewriteHandler.create_cache_key(
            'r:', 'http://www.youtube.com/watch?v=DjFZyFWSt1M') in self.cache

    def test_echo_proxy_error(self):
        headers = [('Range', 'bytes=1000-2000'),
                   ('Referer',
                    'http://localhost:80/rewrite/https://example.com/')]

        self.proxyserv.force_err = True
        resp = self.testapp.get(
            '/rewrite/http://www.youtube.com/watch?v=DjFZyFWSt1M',
            headers=headers)

        # not from proxy
        assert 'x-archive-orig-x-proxy' not in resp.headers

        # no proxy requests as we're forcing exception
        assert len(self.requestlog) == 0

        assert len(self.cache) == 0
Exemple #6
0
class TestProxyLiveRewriter:
    def setup(self):
        self.requestlog = []
        self.cache = {}

        def make_httpd(app):
            proxyserv = ProxyServer(("", 0), ProxyRequest)
            proxyserv.requestlog = self.requestlog
            proxyserv.force_err = False
            self.proxyserv = proxyserv
            return proxyserv

        self.server = ServerThreadRunner(make_httpd)

        config = dict(collections=dict(rewrite="$liveweb"), framed_replay=True, proxyhostport=self.server.proxy_dict)

        self.app = init_app(create_wb_router, load_yaml=False, config=config)

        def create_cache():
            return self.cache

        pywb.webapp.live_rewrite_handler.create_cache = create_cache

        self.testapp = webtest.TestApp(self.app)

    def teardown(self):
        self.server.stop_thread()

    def test_echo_proxy_referrer(self):
        headers = [("User-Agent", "python"), ("Referer", "http://localhost:80/rewrite/other.example.com")]
        resp = self.testapp.get("/rewrite/http://example.com/", headers=headers)

        # ensure just one request
        assert len(self.requestlog) == 1

        # equal to returned response (echo)
        assert self.requestlog[0] == resp.body
        assert resp.headers["x-archive-orig-x-proxy"] == "test"

        assert resp.body.startswith("GET http://example.com/ HTTP/1.1")
        assert "referer: http://other.example.com" in resp.body

        assert len(self.cache) == 0

    def test_echo_proxy_start_unbounded_remove_range(self):
        headers = [("Range", "bytes=0-")]
        resp = self.testapp.get("/rewrite/http://example.com/", headers=headers)

        # actual response is with range
        assert resp.status_int == 206
        assert "Content-Range" in resp.headers
        assert resp.headers["Accept-Ranges"] == "bytes"

        assert len(self.requestlog) == 1

        # proxied, but without range
        assert self.requestlog[0] == resp.body
        assert resp.headers["x-archive-orig-x-proxy"] == "test"

        assert self.requestlog[0].startswith("GET http://example.com/ HTTP/1.1")
        assert "range: " not in self.requestlog[0]

        assert len(self.cache) == 0

    def test_echo_proxy_bounded_noproxy_range(self):
        headers = [("Range", "bytes=10-1000")]
        resp = self.testapp.get("/rewrite/http://example.com/foobar", headers=headers)

        # actual response is with range
        assert resp.status_int == 206
        assert "Content-Range" in resp.headers
        assert resp.headers["Accept-Ranges"] == "bytes"

        # not from proxy
        assert "x-archive-orig-x-proxy" not in resp.headers

        # proxy receives a request also, but w/o range
        assert len(self.requestlog) == 1

        # proxy receives different request than our response
        assert self.requestlog[0] != resp.body

        assert self.requestlog[0].startswith("GET http://example.com/foobar HTTP/1.1")

        # no range request
        assert "range: " not in self.requestlog[0]

        # r: key cached
        assert len(self.cache) == 1
        assert RewriteHandler.create_cache_key("r:", "http://example.com/foobar") in self.cache

        # Second Request
        # clear log
        self.requestlog.pop()
        headers = [("Range", "bytes=101-150")]
        resp = self.testapp.get("/rewrite/http://example.com/foobar", headers=headers)

        # actual response is with range
        assert resp.status_int == 206
        assert "Content-Range" in resp.headers
        assert resp.headers["Accept-Ranges"] == "bytes"

        # not from proxy
        assert "x-archive-orig-x-proxy" not in resp.headers

        # already pinged proxy, no additional requests set to proxy
        assert len(self.requestlog) == 0
        assert len(self.cache) == 1

    def test_echo_proxy_video_info(self):
        resp = self.testapp.get("/rewrite/vi_/https://www.youtube.com/watch?v=DjFZyFWSt1M")
        assert resp.status_int == 200
        assert resp.content_type == RewriteHandler.YT_DL_TYPE, resp.content_type

        assert len(self.requestlog) == 1
        assert self.requestlog[0].startswith("PUTMETA http://www.youtube.com/watch?v=DjFZyFWSt1M HTTP/1.1")

        # second request, not sent to proxy
        resp = self.testapp.get("/rewrite/vi_/https://www.youtube.com/watch?v=DjFZyFWSt1M")
        assert len(self.requestlog) == 1

        # v: video info cache
        assert len(self.cache) == 1
        assert RewriteHandler.create_cache_key("v:", "https://www.youtube.com/watch?v=DjFZyFWSt1M") in self.cache

    def test_echo_proxy_video_with_referrer(self):
        headers = [("Range", "bytes=1000-2000"), ("Referer", "http://localhost:80/rewrite/https://example.com/")]
        resp = self.testapp.get("/rewrite/http://www.youtube.com/watch?v=DjFZyFWSt1M", headers=headers)

        # not from proxy
        assert "x-archive-orig-x-proxy" not in resp.headers

        # proxy receives two requests
        assert len(self.requestlog) == 2

        # first, a video info request recording the page
        assert self.requestlog[0].startswith("PUTMETA http://example.com/ HTTP/1.1")

        # second, non-ranged request for page
        assert self.requestlog[1].startswith("GET http://www.youtube.com/watch?v=DjFZyFWSt1M HTTP/1.1")
        assert "range" not in self.requestlog[1]

        # both video info and range cached
        assert len(self.cache) == 2
        assert RewriteHandler.create_cache_key("v:", "http://www.youtube.com/watch?v=DjFZyFWSt1M") in self.cache
        assert RewriteHandler.create_cache_key("r:", "http://www.youtube.com/watch?v=DjFZyFWSt1M") in self.cache

    def test_echo_proxy_error(self):
        headers = [("Range", "bytes=1000-2000"), ("Referer", "http://localhost:80/rewrite/https://example.com/")]

        self.proxyserv.force_err = True
        resp = self.testapp.get("/rewrite/http://www.youtube.com/watch?v=DjFZyFWSt1M", headers=headers)

        # not from proxy
        assert "x-archive-orig-x-proxy" not in resp.headers

        # no proxy requests as we're forcing exception
        assert len(self.requestlog) == 0

        assert len(self.cache) == 0
def setup_module():
    def make_httpd(app):
        return make_server('', 0, app)

    global server
    server = ServerThreadRunner(make_httpd, TEST_CONFIG)
Exemple #8
0
class TestProxyLiveRewriter:
    def setup(self):
        self.requestlog = []
        self.cache = {}

        def make_httpd(app):
            proxyserv = ProxyServer(('', 0), ProxyRequest)
            proxyserv.requestlog = self.requestlog
            proxyserv.force_err = False
            self.proxyserv = proxyserv
            return proxyserv

        self.server = ServerThreadRunner(make_httpd)

        self.app = init_app(create_live_rewriter_app, load_yaml=False,
                           config=dict(framed_replay=True,
                           proxyhostport=self.server.proxy_dict))

        def create_cache():
            return self.cache

        pywb.webapp.live_rewrite_handler.create_cache = create_cache

        self.testapp = webtest.TestApp(self.app)

    def teardown(self):
        self.server.stop_thread()

    def test_echo_proxy_referrer(self):
        headers = [('User-Agent', 'python'), ('Referer', 'http://localhost:80/rewrite/other.example.com')]
        resp = self.testapp.get('/rewrite/http://example.com/', headers=headers)

        # ensure just one request
        assert len(self.requestlog) == 1

        # equal to returned response (echo)
        assert self.requestlog[0] == resp.body
        assert resp.headers['x-archive-orig-x-proxy'] == 'test'

        assert resp.body.startswith('GET http://example.com/ HTTP/1.1')
        assert 'referer: http://other.example.com' in resp.body

        assert len(self.cache) == 0

    def test_echo_proxy_start_unbounded_remove_range(self):
        headers = [('Range', 'bytes=0-')]
        resp = self.testapp.get('/rewrite/http://example.com/', headers=headers)

        # actual response is with range
        assert resp.status_int == 206
        assert 'Content-Range' in resp.headers
        assert resp.headers['Accept-Ranges'] == 'bytes'

        assert len(self.requestlog) == 1

        # proxied, but without range
        assert self.requestlog[0] == resp.body
        assert resp.headers['x-archive-orig-x-proxy'] == 'test'

        assert self.requestlog[0].startswith('GET http://example.com/ HTTP/1.1')
        assert 'range: ' not in self.requestlog[0]

        assert len(self.cache) == 0

    def test_echo_proxy_bounded_noproxy_range(self):
        headers = [('Range', 'bytes=10-1000')]
        resp = self.testapp.get('/rewrite/http://example.com/foobar', headers=headers)

        # actual response is with range
        assert resp.status_int == 206
        assert 'Content-Range' in resp.headers
        assert resp.headers['Accept-Ranges'] == 'bytes'

        # not from proxy
        assert 'x-archive-orig-x-proxy' not in resp.headers

        # proxy receives a request also, but w/o range
        assert len(self.requestlog) == 1

        # proxy receives different request than our response
        assert self.requestlog[0] != resp.body

        assert self.requestlog[0].startswith('GET http://example.com/foobar HTTP/1.1')

        # no range request
        assert 'range: ' not in self.requestlog[0]

        # r: key cached
        assert len(self.cache) == 1
        assert RewriteHandler.create_cache_key('r:', 'http://example.com/foobar') in self.cache

        # Second Request
        # clear log
        self.requestlog.pop()
        headers = [('Range', 'bytes=101-150')]
        resp = self.testapp.get('/rewrite/http://example.com/foobar', headers=headers)

        # actual response is with range
        assert resp.status_int == 206
        assert 'Content-Range' in resp.headers
        assert resp.headers['Accept-Ranges'] == 'bytes'

        # not from proxy
        assert 'x-archive-orig-x-proxy' not in resp.headers

        # already pinged proxy, no additional requests set to proxy
        assert len(self.requestlog) == 0
        assert len(self.cache) == 1

    def test_echo_proxy_video_info(self):
        resp = self.testapp.get('/rewrite/vi_/https://www.youtube.com/watch?v=DjFZyFWSt1M')
        assert resp.status_int == 200
        assert resp.content_type == RewriteHandler.YT_DL_TYPE, resp.content_type

        assert len(self.requestlog) == 1
        assert self.requestlog[0].startswith('PUTMETA http://www.youtube.com/watch?v=DjFZyFWSt1M HTTP/1.1')

        # second request, not sent to proxy
        resp = self.testapp.get('/rewrite/vi_/https://www.youtube.com/watch?v=DjFZyFWSt1M')
        assert len(self.requestlog) == 1

        # v: video info cache
        assert len(self.cache) == 1
        assert RewriteHandler.create_cache_key('v:', 'https://www.youtube.com/watch?v=DjFZyFWSt1M') in self.cache

    def test_echo_proxy_video_with_referrer(self):
        headers = [('Range', 'bytes=1000-2000'), ('Referer', 'http://localhost:80/rewrite/https://example.com/')]
        resp = self.testapp.get('/rewrite/http://www.youtube.com/watch?v=DjFZyFWSt1M', headers=headers)

        # not from proxy
        assert 'x-archive-orig-x-proxy' not in resp.headers

        # proxy receives two requests
        assert len(self.requestlog) == 2

        # first, a video info request recording the page
        assert self.requestlog[0].startswith('PUTMETA http://example.com/ HTTP/1.1')

        # second, non-ranged request for page
        assert self.requestlog[1].startswith('GET http://www.youtube.com/watch?v=DjFZyFWSt1M HTTP/1.1')
        assert 'range' not in self.requestlog[1]

        # both video info and range cached
        assert len(self.cache) == 2
        assert RewriteHandler.create_cache_key('v:', 'http://www.youtube.com/watch?v=DjFZyFWSt1M') in self.cache
        assert RewriteHandler.create_cache_key('r:', 'http://www.youtube.com/watch?v=DjFZyFWSt1M') in self.cache


    def test_echo_proxy_error(self):
        headers = [('Range', 'bytes=1000-2000'), ('Referer', 'http://localhost:80/rewrite/https://example.com/')]

        self.proxyserv.force_err = True
        resp = self.testapp.get('/rewrite/http://www.youtube.com/watch?v=DjFZyFWSt1M', headers=headers)

        # not from proxy
        assert 'x-archive-orig-x-proxy' not in resp.headers

        # no proxy requests as we're forcing exception
        assert len(self.requestlog) == 0

        assert len(self.cache) == 0