def test_get_proxies_local_no_user(self, mock_in_gcp, mock_secret): mock_in_gcp.return_value = True test_secrets = { "proxy_url": "proxy.net/", "proxy_password": "******", } mock_secret.side_effect = test_secrets.get with pytest.raises(Exception) as exception: scraper_utils.get_proxies() assert str(exception.value) == "No proxy user/pass"
def test_get_proxies_local_no_user(self, mock_in_gae, mock_secret): mock_in_gae.return_value = True test_secrets = { 'proxy_url': 'proxy.net/', 'proxy_password': '******', } mock_secret.side_effect = test_secrets.get with pytest.raises(Exception) as exception: scraper_utils.get_proxies() assert str(exception.value) == 'No proxy user/pass'
def test_get_proxies_local(self, mock_in_gcp, mock_secret): mock_in_gcp.return_value = False test_secrets = { "proxy_url": "proxy.biz/", "test_proxy_user": "******", "test_proxy_password": "******", } mock_secret.side_effect = test_secrets.get proxies = scraper_utils.get_proxies() assert proxies is None
def test_get_proxies_local(self, mock_in_gae, mock_secret): mock_in_gae.return_value = False test_secrets = { 'proxy_url': 'proxy.biz/', 'test_proxy_user': '******', 'test_proxy_password': '******', } mock_secret.side_effect = test_secrets.get proxies = scraper_utils.get_proxies() assert proxies is None
def fetch_page(url, headers=None, cookies=None, params=None, post_data=None, json_data=None, should_proxy=True): """Fetch content from a URL. If data is None (the default), we perform a GET for the page. If the data is set, it must be a dict of parameters to use as POST data in a POST request to the url. Args: url: (string) URL to fetch content from headers: (dict) any headers to send in addition to the default cookies: (dict) any cookies to send in the request. params: dict of parameters to pass in the url of a GET request post_data: dict of parameters to pass into the html POST request json_data: dict of parameters in JSON format to pass into the html POST request extra_headers: dict of parameters to add to the headers of this request should_proxy: (bool) whether or not to use a proxy. Returns: The content. """ if should_proxy: proxies = scraper_utils.get_proxies() else: proxies = None headers = headers.copy() if headers else {} if 'User-Agent' not in headers: headers.update(scraper_utils.get_headers()) try: if post_data is None and json_data is None: page = requests.get( url, proxies=proxies, headers=headers, cookies=cookies, params=params, verify=False) elif params is None: page = requests.post( url, proxies=proxies, headers=headers, cookies=cookies, data=post_data, json=json_data, verify=False) else: raise ValueError( "Both params ({}) for a GET request and either post_data " "({}) or json_data ({}) for a POST request were set." \ .format(params, post_data, json_data)) page.raise_for_status() except requests.exceptions.RequestException as ce: raise FetchPageError(ce.request, ce.response) return page
def test_get_proxies_prod(self, mock_in_gcp, mock_rand, mock_secret): mock_in_gcp.return_value = True mock_rand.return_value = 10 test_secrets = { "proxy_url": "proxy.net/", "proxy_user": "******", "proxy_password": "******", } mock_secret.side_effect = test_secrets.get proxies = scraper_utils.get_proxies() assert proxies == { "http": "http://*****:*****@proxy.net/", "https": "http://*****:*****@proxy.net/", }
def test_get_proxies_prod(self, mock_in_gae, mock_rand, mock_secret): mock_in_gae.return_value = True mock_rand.return_value = 10 test_secrets = { 'proxy_url': 'proxy.net/', 'proxy_user': '******', 'proxy_password': '******', } mock_secret.side_effect = test_secrets.get proxies = scraper_utils.get_proxies() assert proxies == { 'http': 'http://*****:*****@proxy.net/', 'https': 'http://*****:*****@proxy.net/' }