def test_proxy_auth_encoding(self): # utf-8 encoding os.environ["http_proxy"] = "https://m\u00E1n:pass@proxy:3128" mw = HttpProxyMiddleware(auth_encoding="utf-8") req = Request("http://scrapytest.org") assert mw.process_request(req, spider) is None self.assertEqual(req.meta, {"proxy": "https://*****:*****@proxy:3128"}) assert mw.process_request(req, spider) is None self.assertEqual(req.meta, {"proxy": "https://*****:*****@proxy:3128"}) assert mw.process_request(req, spider) is None self.assertEqual(req.meta, {"proxy": "https://proxy:3128"}) self.assertEqual(req.headers.get("Proxy-Authorization"), b"Basic /HNlcjpwYXNz")
def test_proxy_auth_encoding(self): # utf-8 encoding os.environ['http_proxy'] = u'https://m\u00E1n:pass@proxy:3128' mw = HttpProxyMiddleware(auth_encoding='utf-8') req = Request('http://scrapytest.org') assert mw.process_request(req, spider) is None self.assertEqual(req.meta, {'proxy': 'https://*****:*****@proxy:3128'}) assert mw.process_request(req, spider) is None self.assertEqual(req.meta, {'proxy': 'https://*****:*****@proxy:3128'}) assert mw.process_request(req, spider) is None self.assertEqual(req.meta, {'proxy': 'https://proxy:3128'}) self.assertEqual(req.headers.get('Proxy-Authorization'), b'Basic /HNlcjpwYXNz')
def test_remove_credentials(self): """If the proxy request meta switches to a proxy URL with the same proxy but no credentials, the original credentials must be still used. To remove credentials while keeping the same proxy URL, users must delete the Proxy-Authorization header. """ middleware = HttpProxyMiddleware() request = Request( 'https://example.com', meta={'proxy': 'https://*****:*****@example.com'}, ) assert middleware.process_request(request, spider) is None request.meta['proxy'] = 'https://example.com' assert middleware.process_request(request, spider) is None self.assertEqual(request.meta['proxy'], 'https://example.com') encoded_credentials = middleware._basic_auth_header( 'user1', 'password1', ) self.assertEqual( request.headers['Proxy-Authorization'], b'Basic ' + encoded_credentials, ) request.meta['proxy'] = 'https://example.com' del request.headers[b'Proxy-Authorization'] assert middleware.process_request(request, spider) is None self.assertEqual(request.meta['proxy'], 'https://example.com') self.assertNotIn(b'Proxy-Authorization', request.headers)
def test_change_proxy_keep_credentials(self): middleware = HttpProxyMiddleware() request = Request( 'https://example.com', meta={'proxy': 'https://*****:*****@example.com'}, ) assert middleware.process_request(request, spider) is None request.meta['proxy'] = 'https://*****:*****@example.org' assert middleware.process_request(request, spider) is None self.assertEqual(request.meta['proxy'], 'https://example.org') encoded_credentials = middleware._basic_auth_header( 'user1', 'password1', ) self.assertEqual( request.headers['Proxy-Authorization'], b'Basic ' + encoded_credentials, ) # Make sure, indirectly, that _auth_proxy is updated. request.meta['proxy'] = 'https://example.com' assert middleware.process_request(request, spider) is None self.assertEqual(request.meta['proxy'], 'https://example.com') self.assertNotIn(b'Proxy-Authorization', request.headers)
def test_proxy_auth_encoding(self): # utf-8 encoding os.environ['http_proxy'] = 'https://m\u00E1n:pass@proxy:3128' mw = HttpProxyMiddleware(auth_encoding='utf-8') req = Request('http://scrapytest.org') assert mw.process_request(req, spider) is None self.assertEqual(req.meta['proxy'], 'https://*****:*****@proxy:3128'}) assert mw.process_request(req, spider) is None self.assertEqual(req.meta['proxy'], 'https://*****:*****@proxy:3128'}) assert mw.process_request(req, spider) is None self.assertEqual(req.meta['proxy'], 'https://proxy:3128') self.assertEqual(req.headers.get('Proxy-Authorization'), b'Basic /HNlcjpwYXNz')
def test_add_proxy_without_credentials(self): middleware = HttpProxyMiddleware() request = Request('https://example.com') assert middleware.process_request(request, spider) is None request.meta['proxy'] = 'https://example.com' assert middleware.process_request(request, spider) is None self.assertEqual(request.meta['proxy'], 'https://example.com') self.assertNotIn(b'Proxy-Authorization', request.headers)
def test_remove_proxy_with_credentials(self): middleware = HttpProxyMiddleware() request = Request( 'https://example.com', meta={'proxy': 'https://*****:*****@example.com'}, ) assert middleware.process_request(request, spider) is None request.meta['proxy'] = None assert middleware.process_request(request, spider) is None self.assertIsNone(request.meta['proxy']) self.assertNotIn(b'Proxy-Authorization', request.headers)
def test_proxy_auth_empty_passwd(self): os.environ['http_proxy'] = 'https://user:@proxy:3128' mw = HttpProxyMiddleware() req = Request('http://scrapytest.org') assert mw.process_request(req, spider) is None self.assertEqual(req.meta['proxy'], 'https://*****:*****@proxy:3128'}) assert mw.process_request(req, spider) is None self.assertEqual(req.meta['proxy'], 'https://proxy:3128') self.assertEqual(req.headers.get('Proxy-Authorization'), b'Basic dXNlcm5hbWU6')
def test_proxy_auth_empty_passwd(self): os.environ['http_proxy'] = 'https://user:@proxy:3128' mw = HttpProxyMiddleware() req = Request('http://scrapytest.org') assert mw.process_request(req, spider) is None self.assertEqual(req.meta, {'proxy': 'https://*****:*****@proxy:3128'}) assert mw.process_request(req, spider) is None self.assertEqual(req.meta, {'proxy': 'https://proxy:3128'}) self.assertEqual(req.headers.get('Proxy-Authorization'), b'Basic dXNlcm5hbWU6')
def test_proxy_auth(self): os.environ['http_proxy'] = 'https://*****:*****@proxy:3128' mw = HttpProxyMiddleware() req = Request('http://scrapytest.org') assert mw.process_request(req, spider) is None self.assertEqual(req.meta, {'proxy': 'https://*****:*****@proxy:3128'}) assert mw.process_request(req, spider) is None self.assertEqual(req.meta, {'proxy': 'https://proxy:3128'}) self.assertEqual(req.headers.get('Proxy-Authorization'), b'Basic dXNlcm5hbWU6cGFzc3dvcmQ=')
def test_change_proxy_remove_credentials(self): """If the proxy request meta switches to a proxy URL with a different proxy and no credentials, no credentials must be used.""" middleware = HttpProxyMiddleware() request = Request( 'https://example.com', meta={'proxy': 'https://*****:*****@example.com'}, ) assert middleware.process_request(request, spider) is None request.meta['proxy'] = 'https://example.org' assert middleware.process_request(request, spider) is None self.assertEqual(request.meta, {'proxy': 'https://example.org'}) self.assertNotIn(b'Proxy-Authorization', request.headers)
def test_add_proxy_with_credentials(self): middleware = HttpProxyMiddleware() request = Request('https://example.com') assert middleware.process_request(request, spider) is None request.meta['proxy'] = 'https://*****:*****@example.com' assert middleware.process_request(request, spider) is None self.assertEqual(request.meta['proxy'], 'https://example.com') encoded_credentials = middleware._basic_auth_header( 'user1', 'password1', ) self.assertEqual( request.headers['Proxy-Authorization'], b'Basic ' + encoded_credentials, )
def test_proxy_auth_encoding(self): # utf-8 encoding os.environ['http_proxy'] = u'https://m\u00E1n:pass@proxy:3128' mw = HttpProxyMiddleware(auth_encoding='utf-8') req = Request('http://scrapytest.org') assert mw.process_request(req, spider) is None self.assertEquals(req.meta, {'proxy': 'https://proxy:3128'}) self.assertEquals(req.headers.get('Proxy-Authorization'), b'Basic bcOhbjpwYXNz') # default latin-1 encoding mw = HttpProxyMiddleware(auth_encoding='latin-1') req = Request('http://scrapytest.org') assert mw.process_request(req, spider) is None self.assertEquals(req.meta, {'proxy': 'https://proxy:3128'}) self.assertEquals(req.headers.get('Proxy-Authorization'), b'Basic beFuOnBhc3M=')
def test_proxy_auth_empty_passwd(self): os.environ["http_proxy"] = "https://user:@proxy:3128" mw = HttpProxyMiddleware() req = Request("http://scrapytest.org") assert mw.process_request(req, spider) is None self.assertEqual(req.meta, {"proxy": "https://*****:*****@proxy:3128"}) assert mw.process_request(req, spider) is None self.assertEqual(req.meta, {"proxy": "https://proxy:3128"}) self.assertEqual(req.headers.get("Proxy-Authorization"), b"Basic dXNlcm5hbWU6")
def test_proxy_precedence_meta(self): os.environ['http_proxy'] = 'https://proxy.com' mw = HttpProxyMiddleware() req = Request('http://scrapytest.org', meta={'proxy': 'https://new.proxy:3128'}) assert mw.process_request(req, spider) is None self.assertEquals(req.meta, {'proxy': 'https://new.proxy:3128'})
def test_proxy_precedence_meta(self): os.environ["http_proxy"] = "https://proxy.com" mw = HttpProxyMiddleware() req = Request("http://scrapytest.org", meta={"proxy": "https://new.proxy:3128"}) assert mw.process_request(req, spider) is None self.assertEqual(req.meta, {"proxy": "https://new.proxy:3128"})
def test_proxy_auth(self): os.environ['http_proxy'] = 'https://*****:*****@proxy:3128' mw = HttpProxyMiddleware() req = Request('http://scrapytest.org') assert mw.process_request(req, spider) is None self.assertEquals(req.meta, {'proxy': 'https://proxy:3128'}) self.assertEquals(req.headers.get('Proxy-Authorization'), b'Basic dXNlcjpwYXNz')
def test_proxy_auth(self): os.environ['http_proxy'] = 'https://*****:*****@proxy:3128' mw = HttpProxyMiddleware() req = Request('http://scrapytest.org') assert mw.process_request(req, spider) is None self.assertEquals(req.meta, {'proxy': 'https://proxy:3128'}) self.assertEquals(req.headers.get('Proxy-Authorization'), 'Basic dXNlcjpwYXNz')
def test_no_enviroment_proxies(self): os.environ = {'dummy_proxy': 'reset_env_and_do_not_raise'} mw = HttpProxyMiddleware() for url in ('http://e.com', 'https://e.com', 'file:///tmp/a'): req = Request(url) assert mw.process_request(req, spider) is None self.assertEquals(req.url, url) self.assertEquals(req.meta, {})
def test_no_proxy(self): os.environ['http_proxy'] = http_proxy = 'https://proxy.for.http:3128' mw = HttpProxyMiddleware() os.environ['no_proxy'] = '*' req = Request('http://noproxy.com') assert mw.process_request(req, spider) is None assert 'proxy' not in req.meta os.environ['no_proxy'] = 'other.com' req = Request('http://noproxy.com') assert mw.process_request(req, spider) is None assert 'proxy' in req.meta os.environ['no_proxy'] = 'other.com,noproxy.com' req = Request('http://noproxy.com') assert mw.process_request(req, spider) is None assert 'proxy' not in req.meta
def test_change_proxy_remove_credentials_preremoved_header(self): """Corner case of proxy switch with credentials removal where the credentials have been removed beforehand. It ensures that our implementation does not assume that the credentials header exists when trying to remove it. """ middleware = HttpProxyMiddleware() request = Request( 'https://example.com', meta={'proxy': 'https://*****:*****@example.com'}, ) assert middleware.process_request(request, spider) is None request.meta['proxy'] = 'https://example.org' del request.headers[b'Proxy-Authorization'] assert middleware.process_request(request, spider) is None self.assertEqual(request.meta, {'proxy': 'https://example.org'}) self.assertNotIn(b'Proxy-Authorization', request.headers)
def test_proxy_authentication_header_undefined_proxy(self): middleware = HttpProxyMiddleware() request = Request( 'https://example.com', headers={'Proxy-Authorization': 'Basic foo'}, ) assert middleware.process_request(request, spider) is None self.assertNotIn('proxy', request.meta) self.assertNotIn(b'Proxy-Authorization', request.headers)
def test_no_environment_proxies(self): os.environ = {'dummy_proxy': 'reset_env_and_do_not_raise'} mw = HttpProxyMiddleware() for url in ('http://e.com', 'https://e.com', 'file:///tmp/a'): req = Request(url) assert mw.process_request(req, spider) is None self.assertEqual(req.url, url) self.assertEqual(req.meta, {})
def test_proxy_authentication_header_proxy_without_credentials(self): middleware = HttpProxyMiddleware() request = Request( 'https://example.com', headers={'Proxy-Authorization': 'Basic foo'}, meta={'proxy': 'https://example.com'}, ) assert middleware.process_request(request, spider) is None self.assertEqual(request.meta['proxy'], 'https://example.com') self.assertNotIn(b'Proxy-Authorization', request.headers)
def test_change_credentials(self): """If the proxy request meta switches to a proxy URL with different credentials, those new credentials must be used.""" middleware = HttpProxyMiddleware() request = Request( 'https://example.com', meta={'proxy': 'https://*****:*****@example.com'}, ) assert middleware.process_request(request, spider) is None request.meta['proxy'] = 'https://*****:*****@example.com' assert middleware.process_request(request, spider) is None self.assertEqual(request.meta['proxy'], 'https://example.com') encoded_credentials = middleware._basic_auth_header( 'user2', 'password2', ) self.assertEqual( request.headers['Proxy-Authorization'], b'Basic ' + encoded_credentials, )
def test_enviroment_proxies(self): os.environ['http_proxy'] = http_proxy = 'https://proxy.for.http:3128' os.environ['https_proxy'] = https_proxy = 'http://proxy.for.https:8080' os.environ.pop('file_proxy', None) mw = HttpProxyMiddleware() for url, proxy in [('http://e.com', http_proxy), ('https://e.com', https_proxy), ('file://tmp/a', None)]: req = Request(url) assert mw.process_request(req, spider) is None self.assertEquals(req.url, url) self.assertEquals(req.meta.get('proxy'), proxy)
def test_environment_proxies(self): os.environ["http_proxy"] = http_proxy = "https://proxy.for.http:3128" os.environ["https_proxy"] = https_proxy = "http://proxy.for.https:8080" os.environ.pop("file_proxy", None) mw = HttpProxyMiddleware() for url, proxy in [ ("http://e.com", http_proxy), ("https://e.com", https_proxy), ("file://tmp/a", None), ]: req = Request(url) assert mw.process_request(req, spider) is None self.assertEqual(req.url, url) self.assertEqual(req.meta.get("proxy"), proxy)
def test_no_proxy(self): os.environ['http_proxy'] = 'https://proxy.for.http:3128' mw = HttpProxyMiddleware() os.environ['no_proxy'] = '*' req = Request('http://noproxy.com') assert mw.process_request(req, spider) is None assert 'proxy' not in req.meta os.environ['no_proxy'] = 'other.com' req = Request('http://noproxy.com') assert mw.process_request(req, spider) is None assert 'proxy' in req.meta os.environ['no_proxy'] = 'other.com,noproxy.com' req = Request('http://noproxy.com') assert mw.process_request(req, spider) is None assert 'proxy' not in req.meta # proxy from meta['proxy'] takes precedence os.environ['no_proxy'] = '*' req = Request('http://noproxy.com', meta={'proxy': 'http://proxy.com'}) assert mw.process_request(req, spider) is None self.assertEqual(req.meta, {'proxy': 'http://proxy.com'})
def test_no_proxy(self): os.environ["http_proxy"] = "https://proxy.for.http:3128" mw = HttpProxyMiddleware() os.environ["no_proxy"] = "*" req = Request("http://noproxy.com") assert mw.process_request(req, spider) is None assert "proxy" not in req.meta os.environ["no_proxy"] = "other.com" req = Request("http://noproxy.com") assert mw.process_request(req, spider) is None assert "proxy" in req.meta os.environ["no_proxy"] = "other.com,noproxy.com" req = Request("http://noproxy.com") assert mw.process_request(req, spider) is None assert "proxy" not in req.meta # proxy from meta['proxy'] takes precedence os.environ["no_proxy"] = "*" req = Request("http://noproxy.com", meta={"proxy": "http://proxy.com"}) assert mw.process_request(req, spider) is None self.assertEqual(req.meta, {"proxy": "http://proxy.com"})
def test_proxy_authentication_header_proxy_with_same_credentials(self): middleware = HttpProxyMiddleware() encoded_credentials = middleware._basic_auth_header( 'user1', 'password1', ) request = Request( 'https://example.com', headers={'Proxy-Authorization': b'Basic ' + encoded_credentials}, meta={'proxy': 'https://*****:*****@example.com'}, ) assert middleware.process_request(request, spider) is None self.assertEqual(request.meta['proxy'], 'https://example.com') self.assertEqual( request.headers['Proxy-Authorization'], b'Basic ' + encoded_credentials, )
def test_proxy_already_seted(self): os.environ["http_proxy"] = "https://proxy.for.http:3128" mw = HttpProxyMiddleware() req = Request("http://noproxy.com", meta={"proxy": None}) assert mw.process_request(req, spider) is None assert "proxy" in req.meta and req.meta["proxy"] is None
def test_proxy_precedence_meta(self): os.environ['http_proxy'] = 'https://proxy.com' mw = HttpProxyMiddleware() req = Request('http://scrapytest.org', meta={'proxy': 'https://new.proxy:3128'}) assert mw.process_request(req, spider) is None self.assertEqual(req.meta, {'proxy': 'https://new.proxy:3128'})
def test_proxy_already_seted(self): os.environ['http_proxy'] = http_proxy = 'https://proxy.for.http:3128' mw = HttpProxyMiddleware() req = Request('http://noproxy.com', meta={'proxy': None}) assert mw.process_request(req, spider) is None assert 'proxy' in req.meta and req.meta['proxy'] is None
def test_proxy_already_seted(self): os.environ['http_proxy'] = 'https://proxy.for.http:3128' mw = HttpProxyMiddleware() req = Request('http://noproxy.com', meta={'proxy': None}) assert mw.process_request(req, spider) is None assert 'proxy' in req.meta and req.meta['proxy'] is None