def test_request_retries(self): retries = 5 e = error.HTTPError('some url', 503, 'some msg', '', BytesIO()) self.mock_open = MagicMock(side_effect=e) request.make_request('http://fauxurl.com', max_retries=retries, open_func=self.mock_open) self.assertEqual(self.mock_open.call_count, retries)
def save_from_url(url, filename): """ Saves a remote file to S3 and returns its S3 URL. """ try: res = make_request(url) except error.HTTPError as e: # Wikimedia 404 errors are very common, since images may go # out of date. # So common that for now these exceptions are just ignored. if e.code == 404 and 'wikimedia' in url: logger.warning('Error requesting {0} : {1}'.format(url, e)) # Other exceptions are more remarkable and should be brought up. else: logger.exception('Error requesting {0} : {1}'.format(url, e)) return None except (ConnectionResetError, BadStatusLine, ValueError) as e: logger.exception('Error requesting {0} : {1}'.format(url, e)) return None data = BytesIO(res.read()) return save_from_file(data, filename)
def _request(endpoint, url, format='json'): complete_url = '{0}{1}'.format(endpoint, url) res = make_request(complete_url) content = res.read() if format == 'json': return json.loads(content.decode('utf-8')) elif format == 'xml': return xmltodict.parse(content) return None
def _request(endpoint, url, format="json"): complete_url = "{0}{1}".format(endpoint, url) res = make_request(complete_url) content = res.read() if format == "json": return json.loads(content.decode("utf-8")) elif format == "xml": return xmltodict.parse(content) return None
def _get_html(url): # Some sites, such as NYTimes, track which # articles have been viewed with cookies. # Without cookies, you get thrown into an infinite loop. cookies = CookieJar() opener = request.build_opener(request.HTTPCookieProcessor(cookies)) # Get the raw html. # Spoof a user agent. # This can help get around 403 (forbidden) errors. try: html = make_request(url, open_func=opener.open, headers={'User-Agent': 'Chrome'}).read() except IncompleteRead as e: html = e.partial return html
def _get_html(url): # Some sites, such as NYTimes, track which # articles have been viewed with cookies. # Without cookies, you get thrown into an infinite loop. cookies = CookieJar() opener = request.build_opener(request.HTTPCookieProcessor(cookies)) # Get the raw html. # Spoof a user agent. # This can help get around 403 (forbidden) errors. try: html = make_request(url, open_func=opener.open, headers={ 'User-Agent': 'Chrome' }).read() except IncompleteRead as e: html = e.partial return html