def test_proxy_precedence_meta(self):
     os.environ["http_proxy"] = "https://proxy.com"
     mw = HttpProxyMiddleware()
     req = Request("http://scrapytest.org",
                   meta={"proxy": "https://new.proxy:3128"})
     assert mw.process_request(req, spider) is None
     self.assertEqual(req.meta, {"proxy": "https://new.proxy:3128"})
Ejemplo n.º 2
0
 def test_proxy_auth_empty_passwd(self):
     os.environ['http_proxy'] = 'https://user:@proxy:3128'
     mw = HttpProxyMiddleware()
     req = Request('http://scrapytest.org')
     assert mw.process_request(req, spider) is None
     self.assertEquals(req.meta, {'proxy': 'https://proxy:3128'})
     self.assertEquals(req.headers.get('Proxy-Authorization'), b'Basic dXNlcjo=')
 def test_proxy_precedence_meta(self):
     os.environ['http_proxy'] = 'https://proxy.com'
     mw = HttpProxyMiddleware()
     req = Request('http://scrapytest.org',
                   meta={'proxy': 'https://new.proxy:3128'})
     assert mw.process_request(req, spider) is None
     self.assertEquals(req.meta, {'proxy': 'https://new.proxy:3128'})
Ejemplo n.º 4
0
 def test_add_proxy_without_credentials(self):
     middleware = HttpProxyMiddleware()
     request = Request('https://example.com')
     assert middleware.process_request(request, spider) is None
     request.meta['proxy'] = 'https://example.com'
     assert middleware.process_request(request, spider) is None
     self.assertEqual(request.meta['proxy'], 'https://example.com')
     self.assertNotIn(b'Proxy-Authorization', request.headers)
    def test_no_enviroment_proxies(self):
        os.environ = {'dummy_proxy': 'reset_env_and_do_not_raise'}
        mw = HttpProxyMiddleware()

        for url in ('http://e.com', 'https://e.com', 'file:///tmp/a'):
            req = Request(url)
            assert mw.process_request(req, spider) is None
            self.assertEquals(req.url, url)
            self.assertEquals(req.meta, {})
Ejemplo n.º 6
0
 def test_proxy_authentication_header_undefined_proxy(self):
     middleware = HttpProxyMiddleware()
     request = Request(
         'https://example.com',
         headers={'Proxy-Authorization': 'Basic foo'},
     )
     assert middleware.process_request(request, spider) is None
     self.assertNotIn('proxy', request.meta)
     self.assertNotIn(b'Proxy-Authorization', request.headers)
Ejemplo n.º 7
0
    def test_no_environment_proxies(self):
        os.environ = {'dummy_proxy': 'reset_env_and_do_not_raise'}
        mw = HttpProxyMiddleware()

        for url in ('http://e.com', 'https://e.com', 'file:///tmp/a'):
            req = Request(url)
            assert mw.process_request(req, spider) is None
            self.assertEqual(req.url, url)
            self.assertEqual(req.meta, {})
Ejemplo n.º 8
0
 def test_proxy_authentication_header_proxy_without_credentials(self):
     middleware = HttpProxyMiddleware()
     request = Request(
         'https://example.com',
         headers={'Proxy-Authorization': 'Basic foo'},
         meta={'proxy': 'https://example.com'},
     )
     assert middleware.process_request(request, spider) is None
     self.assertEqual(request.meta['proxy'], 'https://example.com')
     self.assertNotIn(b'Proxy-Authorization', request.headers)
Ejemplo n.º 9
0
 def test_remove_proxy_with_credentials(self):
     middleware = HttpProxyMiddleware()
     request = Request(
         'https://example.com',
         meta={'proxy': 'https://*****:*****@example.com'},
     )
     assert middleware.process_request(request, spider) is None
     request.meta['proxy'] = None
     assert middleware.process_request(request, spider) is None
     self.assertIsNone(request.meta['proxy'])
     self.assertNotIn(b'Proxy-Authorization', request.headers)
Ejemplo n.º 10
0
    def test_enviroment_proxies(self):
        os.environ['http_proxy'] = http_proxy = 'https://proxy.for.http:3128'
        os.environ['https_proxy'] = https_proxy = 'http://proxy.for.https:8080'
        os.environ.pop('file_proxy', None)
        mw = HttpProxyMiddleware()

        for url, proxy in [('http://e.com', http_proxy),
                ('https://e.com', https_proxy), ('file://tmp/a', None)]:
            req = Request(url)
            assert mw.process_request(req, spider) is None
            self.assertEquals(req.url, url)
            self.assertEquals(req.meta.get('proxy'), proxy)
 def test_proxy_auth(self):
     os.environ['http_proxy'] = 'https://*****:*****@proxy:3128'
     mw = HttpProxyMiddleware()
     req = Request('http://scrapytest.org')
     assert mw.process_request(req, spider) is None
     self.assertEqual(req.meta, {'proxy': 'https://*****:*****@proxy:3128'})
     assert mw.process_request(req, spider) is None
     self.assertEqual(req.meta, {'proxy': 'https://proxy:3128'})
     self.assertEqual(req.headers.get('Proxy-Authorization'), b'Basic dXNlcm5hbWU6cGFzc3dvcmQ=')
Ejemplo n.º 12
0
 def test_proxy_auth(self):
     os.environ['http_proxy'] = 'https://*****:*****@proxy:3128'
     mw = HttpProxyMiddleware()
     req = Request('http://scrapytest.org')
     assert mw.process_request(req, spider) is None
     self.assertEqual(req.meta['proxy'], 'https://*****:*****@proxy:3128'})
     assert mw.process_request(req, spider) is None
     self.assertEqual(req.meta['proxy'], 'https://proxy:3128')
     self.assertEqual(req.headers.get('Proxy-Authorization'), b'Basic dXNlcm5hbWU6cGFzc3dvcmQ=')
Ejemplo n.º 13
0
 def test_change_proxy_remove_credentials(self):
     """If the proxy request meta switches to a proxy URL with a different
     proxy and no credentials, no credentials must be used."""
     middleware = HttpProxyMiddleware()
     request = Request(
         'https://example.com',
         meta={'proxy': 'https://*****:*****@example.com'},
     )
     assert middleware.process_request(request, spider) is None
     request.meta['proxy'] = 'https://example.org'
     assert middleware.process_request(request, spider) is None
     self.assertEqual(request.meta, {'proxy': 'https://example.org'})
     self.assertNotIn(b'Proxy-Authorization', request.headers)
Ejemplo n.º 14
0
    def test_proxy_auth_encoding(self):
        # utf-8 encoding
        os.environ['http_proxy'] = 'https://m\u00E1n:pass@proxy:3128'
        mw = HttpProxyMiddleware(auth_encoding='utf-8')
        req = Request('http://scrapytest.org')
        assert mw.process_request(req, spider) is None
        self.assertEqual(req.meta['proxy'], 'https://*****:*****@proxy:3128'})
        assert mw.process_request(req, spider) is None
        self.assertEqual(req.meta['proxy'], 'https://*****:*****@proxy:3128'})
        assert mw.process_request(req, spider) is None
        self.assertEqual(req.meta['proxy'], 'https://proxy:3128')
        self.assertEqual(req.headers.get('Proxy-Authorization'), b'Basic /HNlcjpwYXNz')
    def test_proxy_auth_encoding(self):
        # utf-8 encoding
        os.environ["http_proxy"] = "https://m\u00E1n:pass@proxy:3128"
        mw = HttpProxyMiddleware(auth_encoding="utf-8")
        req = Request("http://scrapytest.org")
        assert mw.process_request(req, spider) is None
        self.assertEqual(req.meta, {"proxy": "https://*****:*****@proxy:3128"})
        assert mw.process_request(req, spider) is None
        self.assertEqual(req.meta, {"proxy": "https://*****:*****@proxy:3128"})
        assert mw.process_request(req, spider) is None
        self.assertEqual(req.meta, {"proxy": "https://proxy:3128"})
        self.assertEqual(req.headers.get("Proxy-Authorization"),
                         b"Basic /HNlcjpwYXNz")
    def test_environment_proxies(self):
        os.environ["http_proxy"] = http_proxy = "https://proxy.for.http:3128"
        os.environ["https_proxy"] = https_proxy = "http://proxy.for.https:8080"
        os.environ.pop("file_proxy", None)
        mw = HttpProxyMiddleware()

        for url, proxy in [
            ("http://e.com", http_proxy),
            ("https://e.com", https_proxy),
            ("file://tmp/a", None),
        ]:
            req = Request(url)
            assert mw.process_request(req, spider) is None
            self.assertEqual(req.url, url)
            self.assertEqual(req.meta.get("proxy"), proxy)
Ejemplo n.º 17
0
 def test_add_proxy_with_credentials(self):
     middleware = HttpProxyMiddleware()
     request = Request('https://example.com')
     assert middleware.process_request(request, spider) is None
     request.meta['proxy'] = 'https://*****:*****@example.com'
     assert middleware.process_request(request, spider) is None
     self.assertEqual(request.meta['proxy'], 'https://example.com')
     encoded_credentials = middleware._basic_auth_header(
         'user1',
         'password1',
     )
     self.assertEqual(
         request.headers['Proxy-Authorization'],
         b'Basic ' + encoded_credentials,
     )
 def test_proxy_auth_empty_passwd(self):
     os.environ["http_proxy"] = "https://user:@proxy:3128"
     mw = HttpProxyMiddleware()
     req = Request("http://scrapytest.org")
     assert mw.process_request(req, spider) is None
     self.assertEqual(req.meta, {"proxy": "https://*****:*****@proxy:3128"})
     assert mw.process_request(req, spider) is None
     self.assertEqual(req.meta, {"proxy": "https://proxy:3128"})
     self.assertEqual(req.headers.get("Proxy-Authorization"),
                      b"Basic dXNlcm5hbWU6")
Ejemplo n.º 19
0
 def test_proxy_authentication_header_proxy_with_same_credentials(self):
     middleware = HttpProxyMiddleware()
     encoded_credentials = middleware._basic_auth_header(
         'user1',
         'password1',
     )
     request = Request(
         'https://example.com',
         headers={'Proxy-Authorization': b'Basic ' + encoded_credentials},
         meta={'proxy': 'https://*****:*****@example.com'},
     )
     assert middleware.process_request(request, spider) is None
     self.assertEqual(request.meta['proxy'], 'https://example.com')
     self.assertEqual(
         request.headers['Proxy-Authorization'],
         b'Basic ' + encoded_credentials,
     )
Ejemplo n.º 20
0
    def test_change_proxy_remove_credentials_preremoved_header(self):
        """Corner case of proxy switch with credentials removal where the
        credentials have been removed beforehand.

        It ensures that our implementation does not assume that the credentials
        header exists when trying to remove it.
        """
        middleware = HttpProxyMiddleware()
        request = Request(
            'https://example.com',
            meta={'proxy': 'https://*****:*****@example.com'},
        )
        assert middleware.process_request(request, spider) is None
        request.meta['proxy'] = 'https://example.org'
        del request.headers[b'Proxy-Authorization']
        assert middleware.process_request(request, spider) is None
        self.assertEqual(request.meta, {'proxy': 'https://example.org'})
        self.assertNotIn(b'Proxy-Authorization', request.headers)
Ejemplo n.º 21
0
    def test_no_proxy(self):
        os.environ['http_proxy'] = http_proxy = 'https://proxy.for.http:3128'
        mw = HttpProxyMiddleware()

        os.environ['no_proxy'] = '*'
        req = Request('http://noproxy.com')
        assert mw.process_request(req, spider) is None
        assert 'proxy' not in req.meta

        os.environ['no_proxy'] = 'other.com'
        req = Request('http://noproxy.com')
        assert mw.process_request(req, spider) is None
        assert 'proxy' in req.meta

        os.environ['no_proxy'] = 'other.com,noproxy.com'
        req = Request('http://noproxy.com')
        assert mw.process_request(req, spider) is None
        assert 'proxy' not in req.meta
Ejemplo n.º 22
0
 def test_change_credentials(self):
     """If the proxy request meta switches to a proxy URL with different
     credentials, those new credentials must be used."""
     middleware = HttpProxyMiddleware()
     request = Request(
         'https://example.com',
         meta={'proxy': 'https://*****:*****@example.com'},
     )
     assert middleware.process_request(request, spider) is None
     request.meta['proxy'] = 'https://*****:*****@example.com'
     assert middleware.process_request(request, spider) is None
     self.assertEqual(request.meta['proxy'], 'https://example.com')
     encoded_credentials = middleware._basic_auth_header(
         'user2',
         'password2',
     )
     self.assertEqual(
         request.headers['Proxy-Authorization'],
         b'Basic ' + encoded_credentials,
     )
Ejemplo n.º 23
0
    def test_change_proxy_keep_credentials(self):
        middleware = HttpProxyMiddleware()
        request = Request(
            'https://example.com',
            meta={'proxy': 'https://*****:*****@example.com'},
        )
        assert middleware.process_request(request, spider) is None

        request.meta['proxy'] = 'https://*****:*****@example.org'
        assert middleware.process_request(request, spider) is None
        self.assertEqual(request.meta['proxy'], 'https://example.org')
        encoded_credentials = middleware._basic_auth_header(
            'user1',
            'password1',
        )
        self.assertEqual(
            request.headers['Proxy-Authorization'],
            b'Basic ' + encoded_credentials,
        )

        # Make sure, indirectly, that _auth_proxy is updated.
        request.meta['proxy'] = 'https://example.com'
        assert middleware.process_request(request, spider) is None
        self.assertEqual(request.meta['proxy'], 'https://example.com')
        self.assertNotIn(b'Proxy-Authorization', request.headers)
Ejemplo n.º 24
0
    def test_remove_credentials(self):
        """If the proxy request meta switches to a proxy URL with the same
        proxy but no credentials, the original credentials must be still
        used.

        To remove credentials while keeping the same proxy URL, users must
        delete the Proxy-Authorization header.
        """
        middleware = HttpProxyMiddleware()
        request = Request(
            'https://example.com',
            meta={'proxy': 'https://*****:*****@example.com'},
        )
        assert middleware.process_request(request, spider) is None

        request.meta['proxy'] = 'https://example.com'
        assert middleware.process_request(request, spider) is None
        self.assertEqual(request.meta['proxy'], 'https://example.com')
        encoded_credentials = middleware._basic_auth_header(
            'user1',
            'password1',
        )
        self.assertEqual(
            request.headers['Proxy-Authorization'],
            b'Basic ' + encoded_credentials,
        )

        request.meta['proxy'] = 'https://example.com'
        del request.headers[b'Proxy-Authorization']
        assert middleware.process_request(request, spider) is None
        self.assertEqual(request.meta['proxy'], 'https://example.com')
        self.assertNotIn(b'Proxy-Authorization', request.headers)
    def test_no_proxy(self):
        os.environ['http_proxy'] = 'https://proxy.for.http:3128'
        mw = HttpProxyMiddleware()

        os.environ['no_proxy'] = '*'
        req = Request('http://noproxy.com')
        assert mw.process_request(req, spider) is None
        assert 'proxy' not in req.meta

        os.environ['no_proxy'] = 'other.com'
        req = Request('http://noproxy.com')
        assert mw.process_request(req, spider) is None
        assert 'proxy' in req.meta

        os.environ['no_proxy'] = 'other.com,noproxy.com'
        req = Request('http://noproxy.com')
        assert mw.process_request(req, spider) is None
        assert 'proxy' not in req.meta

        # proxy from meta['proxy'] takes precedence
        os.environ['no_proxy'] = '*'
        req = Request('http://noproxy.com', meta={'proxy': 'http://proxy.com'})
        assert mw.process_request(req, spider) is None
        self.assertEqual(req.meta, {'proxy': 'http://proxy.com'})
Ejemplo n.º 26
0
    def test_proxy_auth_encoding(self):
        # utf-8 encoding
        os.environ['http_proxy'] = u'https://m\u00E1n:pass@proxy:3128'
        mw = HttpProxyMiddleware(auth_encoding='utf-8')
        req = Request('http://scrapytest.org')
        assert mw.process_request(req, spider) is None
        self.assertEquals(req.meta, {'proxy': 'https://proxy:3128'})
        self.assertEquals(req.headers.get('Proxy-Authorization'), b'Basic bcOhbjpwYXNz')

        # default latin-1 encoding
        mw = HttpProxyMiddleware(auth_encoding='latin-1')
        req = Request('http://scrapytest.org')
        assert mw.process_request(req, spider) is None
        self.assertEquals(req.meta, {'proxy': 'https://proxy:3128'})
        self.assertEquals(req.headers.get('Proxy-Authorization'), b'Basic beFuOnBhc3M=')
    def test_no_proxy(self):
        os.environ["http_proxy"] = "https://proxy.for.http:3128"
        mw = HttpProxyMiddleware()

        os.environ["no_proxy"] = "*"
        req = Request("http://noproxy.com")
        assert mw.process_request(req, spider) is None
        assert "proxy" not in req.meta

        os.environ["no_proxy"] = "other.com"
        req = Request("http://noproxy.com")
        assert mw.process_request(req, spider) is None
        assert "proxy" in req.meta

        os.environ["no_proxy"] = "other.com,noproxy.com"
        req = Request("http://noproxy.com")
        assert mw.process_request(req, spider) is None
        assert "proxy" not in req.meta

        # proxy from meta['proxy'] takes precedence
        os.environ["no_proxy"] = "*"
        req = Request("http://noproxy.com", meta={"proxy": "http://proxy.com"})
        assert mw.process_request(req, spider) is None
        self.assertEqual(req.meta, {"proxy": "http://proxy.com"})
Ejemplo n.º 28
0
    def test_no_proxy(self):
        os.environ['http_proxy'] = 'https://proxy.for.http:3128'
        mw = HttpProxyMiddleware()

        os.environ['no_proxy'] = '*'
        req = Request('http://noproxy.com')
        assert mw.process_request(req, spider) is None
        assert 'proxy' not in req.meta

        os.environ['no_proxy'] = 'other.com'
        req = Request('http://noproxy.com')
        assert mw.process_request(req, spider) is None
        assert 'proxy' in req.meta

        os.environ['no_proxy'] = 'other.com,noproxy.com'
        req = Request('http://noproxy.com')
        assert mw.process_request(req, spider) is None
        assert 'proxy' not in req.meta

        # proxy from meta['proxy'] takes precedence
        os.environ['no_proxy'] = '*'
        req = Request('http://noproxy.com', meta={'proxy': 'http://proxy.com'})
        assert mw.process_request(req, spider) is None
        self.assertEqual(req.meta, {'proxy': 'http://proxy.com'})
 def test_proxy_already_seted(self):
     os.environ["http_proxy"] = "https://proxy.for.http:3128"
     mw = HttpProxyMiddleware()
     req = Request("http://noproxy.com", meta={"proxy": None})
     assert mw.process_request(req, spider) is None
     assert "proxy" in req.meta and req.meta["proxy"] is None
 def test_proxy_precedence_meta(self):
     os.environ['http_proxy'] = 'https://proxy.com'
     mw = HttpProxyMiddleware()
     req = Request('http://scrapytest.org', meta={'proxy': 'https://new.proxy:3128'})
     assert mw.process_request(req, spider) is None
     self.assertEqual(req.meta, {'proxy': 'https://new.proxy:3128'})
 def test_proxy_already_seted(self):
     os.environ['http_proxy'] = http_proxy = 'https://proxy.for.http:3128'
     mw = HttpProxyMiddleware()
     req = Request('http://noproxy.com', meta={'proxy': None})
     assert mw.process_request(req, spider) is None
     assert 'proxy' in req.meta and req.meta['proxy'] is None
Ejemplo n.º 32
0
 def test_proxy_already_seted(self):
     os.environ['http_proxy'] = 'https://proxy.for.http:3128'
     mw = HttpProxyMiddleware()
     req = Request('http://noproxy.com', meta={'proxy': None})
     assert mw.process_request(req, spider) is None
     assert 'proxy' in req.meta and req.meta['proxy'] is None
Ejemplo n.º 33
0
 def test_no_proxy_invalid_values(self):
     os.environ['no_proxy'] = '/var/run/docker.sock'
     mw = HttpProxyMiddleware()
     # '/var/run/docker.sock' may be used by the user for
     # no_proxy value but is not parseable and should be skipped
     assert 'no' not in mw.proxies
Ejemplo n.º 34
0
 def test_not_enabled(self):
     crawler = get_crawler(Spider, {'HTTPPROXY_ENABLED': False})
     with pytest.raises(NotConfigured):
         HttpProxyMiddleware.from_crawler(crawler)