Example #1
0
    def test_get_proxies_local_no_user(self, mock_in_gcp, mock_secret):
        mock_in_gcp.return_value = True
        test_secrets = {
            "proxy_url": "proxy.net/",
            "proxy_password": "******",
        }
        mock_secret.side_effect = test_secrets.get

        with pytest.raises(Exception) as exception:
            scraper_utils.get_proxies()
        assert str(exception.value) == "No proxy user/pass"
    def test_get_proxies_local_no_user(self, mock_in_gae, mock_secret):
        mock_in_gae.return_value = True
        test_secrets = {
            'proxy_url': 'proxy.net/',
            'proxy_password': '******',
        }
        mock_secret.side_effect = test_secrets.get

        with pytest.raises(Exception) as exception:
            scraper_utils.get_proxies()
        assert str(exception.value) == 'No proxy user/pass'
Example #3
0
    def test_get_proxies_local(self, mock_in_gcp, mock_secret):
        mock_in_gcp.return_value = False
        test_secrets = {
            "proxy_url": "proxy.biz/",
            "test_proxy_user": "******",
            "test_proxy_password": "******",
        }
        mock_secret.side_effect = test_secrets.get

        proxies = scraper_utils.get_proxies()
        assert proxies is None
    def test_get_proxies_local(self, mock_in_gae, mock_secret):
        mock_in_gae.return_value = False
        test_secrets = {
            'proxy_url': 'proxy.biz/',
            'test_proxy_user': '******',
            'test_proxy_password': '******',
        }
        mock_secret.side_effect = test_secrets.get

        proxies = scraper_utils.get_proxies()
        assert proxies is None
Example #5
0
    def fetch_page(url, headers=None, cookies=None, params=None,
                   post_data=None, json_data=None, should_proxy=True):
        """Fetch content from a URL. If data is None (the default), we perform
        a GET for the page. If the data is set, it must be a dict of parameters
        to use as POST data in a POST request to the url.

        Args:
            url: (string) URL to fetch content from
            headers: (dict) any headers to send in addition to the default
            cookies: (dict) any cookies to send in the request.
            params: dict of parameters to pass in the url of a GET request
            post_data: dict of parameters to pass into the html POST request
            json_data: dict of parameters in JSON format to pass into the html
                       POST request
            extra_headers: dict of parameters to add to the headers of this
                           request
            should_proxy: (bool) whether or not to use a proxy.

        Returns:
            The content.

        """
        if should_proxy:
            proxies = scraper_utils.get_proxies()
        else:
            proxies = None
        headers = headers.copy() if headers else {}
        if 'User-Agent' not in headers:
            headers.update(scraper_utils.get_headers())

        try:
            if post_data is None and json_data is None:
                page = requests.get(
                    url, proxies=proxies, headers=headers, cookies=cookies,
                    params=params, verify=False)
            elif params is None:
                page = requests.post(
                    url, proxies=proxies, headers=headers, cookies=cookies,
                    data=post_data, json=json_data, verify=False)
            else:
                raise ValueError(
                    "Both params ({}) for a GET request and either post_data "
                    "({}) or json_data ({}) for a POST request were set." \
                        .format(params, post_data, json_data))
            page.raise_for_status()
        except requests.exceptions.RequestException as ce:
            raise FetchPageError(ce.request, ce.response)

        return page
Example #6
0
    def test_get_proxies_prod(self, mock_in_gcp, mock_rand, mock_secret):
        mock_in_gcp.return_value = True
        mock_rand.return_value = 10
        test_secrets = {
            "proxy_url": "proxy.net/",
            "proxy_user": "******",
            "proxy_password": "******",
        }
        mock_secret.side_effect = test_secrets.get

        proxies = scraper_utils.get_proxies()
        assert proxies == {
            "http": "http://*****:*****@proxy.net/",
            "https": "http://*****:*****@proxy.net/",
        }
    def test_get_proxies_prod(self, mock_in_gae, mock_rand, mock_secret):
        mock_in_gae.return_value = True
        mock_rand.return_value = 10
        test_secrets = {
            'proxy_url': 'proxy.net/',
            'proxy_user': '******',
            'proxy_password': '******',
        }
        mock_secret.side_effect = test_secrets.get

        proxies = scraper_utils.get_proxies()
        assert proxies == {
            'http': 'http://*****:*****@proxy.net/',
            'https': 'http://*****:*****@proxy.net/'
        }