def _create_request(self, verb, url, query_params=None, data=None, send_as_file=False): """Helper method to create a single `grequests.post` or `grequests.get`. Args: verb - MultiRequest._VERB_POST or MultiRequest._VERB_GET url - A string URL query_params - None or a dict data - None or a string or a dict send_as_file - A boolean, should the data be sent as a file. Returns: requests.PreparedRequest Raises: InvalidRequestError - if an invalid verb is passed in. """ # Prepare a set of kwargs to make it easier to avoid missing default params. kwargs = { 'headers': self._default_headers, 'params': query_params, 'timeout': self._req_timeout, 'session': self._session } if MultiRequest._VERB_POST == verb: if not send_as_file: return grequests.post(url, data=data, **kwargs) else: return grequests.post(url, files={'file': data}, **kwargs) elif MultiRequest._VERB_GET == verb: return grequests.get(url, data=data, **kwargs) else: raise InvalidRequestError('Invalid verb {0}'.format(verb))
def _multi_request(self, verb, urls, query_params, data, to_json=True, send_as_file=False, file_download=False): """Issues multiple batches of simultaneous HTTP requests and waits for responses. Args: verb - MultiRequest._VERB_POST or MultiRequest._VERB_GET urls - A string URL or list of string URLs query_params - None, a dict, or a list of dicts representing the query params data - None, a dict or string, or a list of dicts and strings representing the data body. to_json - A boolean, should the responses be returned as JSON blobs Returns: If multiple requests are made - a list of dicts if to_json, a list of requests responses otherwise If a single request is made, the return is not a list Raises: InvalidRequestError - if no URL is supplied or if any of the requests returns 403 Access Forbidden response """ if not urls: raise InvalidRequestError('No URL supplied') # Break the params into batches of request_params request_params = self._zip_request_params(urls, query_params, data) batch_of_params = [ request_params[pos:pos + self._max_requests] for pos in range(0, len(request_params), self._max_requests) ] # Iteratively issue each batch, applying the rate limiter if necessary all_responses = [] for param_batch in batch_of_params: if self._rate_limiter: self._rate_limiter.make_calls(num_calls=len(param_batch)) prepared_requests = [ self._create_request( verb, url, query_params=query_param, data=datum, send_as_file=send_as_file, ) for url, query_param, datum in param_batch ] responses = self._wait_for_response(prepared_requests) for response in responses: if response and not file_download: all_responses.append( self._convert_to_json(response ) if to_json else response) elif file_download: all_responses.append(self._handle_file_download(response)) else: all_responses.append(None) return all_responses
def map_with_retries(self, requests, responses_for_requests): """Provides session-based retry functionality :param requests: A collection of Request objects. :param responses_for_requests: Dictionary mapping of requests to responses :param max_retries: The maximum number of retries to perform per session :param args: Additional arguments to pass into a retry mapping call """ retries = [] response_futures = [preq.callable() for preq in requests] for request, response_future in zip(requests, response_futures): try: response = response_future.result() if response is not None and response.status_code == 403: logging.warning( 'Request to {} caused a 403 response status code.'. format(request.url)) raise InvalidRequestError('Access forbidden') if response is not None: responses_for_requests[request] = response except RequestException as re: logging.error('An exception was raised for {}: {}'.format( request.url, re)) if self.total_retries > 0: self.total_retries -= 1 retries.append(request) # Recursively retry failed requests with the modified total retry count if retries: self.map_with_retries(retries, responses_for_requests)
def test_specific_exception(self): try: raise InvalidRequestError() except Exception as e: write_exception(e) output = self._stringio.getvalue() T.assert_equal(0, output.find('[ERROR] InvalidRequestError'))
def _wait_for_response(self, requests): """Issues a batch of requests and waits for the responses. If some of the requests fail it will retry the failed ones up to `_max_retry` times. Args: requests - A list of requests Returns: A list of `requests.models.Response` objects Raises: InvalidRequestError - if any of the requests returns "403 Forbidden" response """ responses_for_requests = OrderedDict.fromkeys(requests) for retry in range(self._max_retry): try: logging.debug('Try #{0}'.format(retry + 1)) responses = grequests.map(requests, self._handle_exception) if any(response is not None and response.status_code == 403 for response in responses): raise InvalidRequestError('Access forbidden') failed_requests = [] for request, response in zip(requests, responses): if response: responses_for_requests[request] = response else: failed_requests.append(request) if not failed_requests: break logging.warning('Try #{0}. Expected {1} successful response(s) but only got {2}.'.format( retry + 1, len(requests), len(requests) - len(failed_requests))) # retry only for the failed requests requests = failed_requests except InvalidRequestError: raise except Exception as e: # log the exception for the informative purposes and pass to the next iteration logging.exception('Try #{0}. Exception occured: {1}. Retrying.'.format(retry + 1, e)) pass if failed_requests: logging.warning('Still {0} failed request(s) after {1} retries:'.format(len(failed_requests), self._max_retry)) for failed_request in failed_requests: failed_response = failed_request.response if failed_response is not None: # in case response text does contain some non-ascii characters failed_response_text = failed_response.text.encode('ascii', 'xmlcharrefreplace') logging.warning('Request to {0} failed with status code {1}. Response text: {2}'.format( failed_request.url, failed_response.status_code, failed_response_text)) else: logging.warning('Request to {0} failed with None response.'.format(failed_request.url)) return list(responses_for_requests.values())
def _handle_exception(self, request, exception): """Handles grequests exception (timeout, etc.). Args: request - A request that caused the exception exception - An exception caused by the request Raises: InvalidRequestError - custom exception encapsulating grequests exception """ raise InvalidRequestError('Request to {0} caused an exception: {1}'.format(request.url, exception))
def test_exception_message(self): try: raise InvalidRequestError('Look for me in validation') except Exception as e: write_exception(e) output = self._stringio.getvalue() T.assert_equal( 0, output.find( '[ERROR] InvalidRequestError Look for me in validation'))
def _zip_request_params(self, urls, query_params, data): """Massages inputs and returns a list of 3-tuples zipping them up. This is all the smarts behind deciding how many requests to issue. It's fine for an input to have 0, 1, or a list of values. If there are two inputs each with a list of values, the cardinality of those lists much match. Args: urls - 1 string URL or a list of URLs query_params - None, 1 dict, or a list of dicts data - None, 1 dict or string, or a list of dicts or strings Returns: A list of 3-tuples (url, query_param, data) Raises: InvalidRequestError - if cardinality of lists does not match """ # Everybody gets to be a list if not isinstance(urls, list): urls = [urls] if not isinstance(query_params, list): query_params = [query_params] if not isinstance(data, list): data = [data] # Counts must not mismatch url_count = len(urls) query_param_count = len(query_params) data_count = len(data) max_count = max(url_count, query_param_count, data_count) if (max_count > url_count > 1 or max_count > query_param_count > 1 or max_count > data_count > 1): raise InvalidRequestError( 'Mismatched parameter count url_count:{0} query_param_count:{1} data_count:{2} max_count:{3}', url_count, query_param_count, data_count, max_count, ) # Pad out lists if url_count < max_count: urls = urls * max_count if query_param_count < max_count: query_params = query_params * max_count if data_count < max_count: data = data * max_count return list(zip(urls, query_params, data))
def map_with_retries(self, requests, responses_for_requests, *args, **kwargs): """Wraps around grequests.map to provide session-based retry functionality :param requests: A collection of Request objects. :param responses_for_requests: Dictionary mapping of requests to responses :param max_retries: The maximum number of retries to perform per session :param exception_handler: Callback function, called when exception occured. Params: Request, Exception :param args: Additional arguments to pass into a retry mapping call :param kwargs: Keyword arguments passed into the grequests wrapper, e.g. exception_handler """ _exception_handler = kwargs.pop('exception_handler', None) retries = [] def exception_handler(request, exception, *args, **kwargs): if self.total_retries > 0: self.total_retries -= 1 retries.append(request) if _exception_handler: return _exception_handler(request, exception, *args, **kwargs) responses = grequests.map(requests, exception_handler=exception_handler, *args, **kwargs) for request, response in zip(requests, responses): if response is not None and response.status_code == 403: logging.debug( 'Request was received with a 403 response status code.') raise InvalidRequestError('Access forbidden') if response: responses_for_requests[request] = response # Recursively retry failed requests with the modified total retry count if retries: self.map_with_retries(retries, responses_for_requests, *args, exception_handler=_exception_handler, **kwargs)
def _create_request(self, verb, url, query_params=None, data=None, send_as_file=False): """Helper method to create a single `grequests.post` or `grequests.get`. Args: verb - MultiRequest._VERB_POST or MultiRequest._VERB_GET url - A string URL query_params - None or a dict data - None or a string or a dict send_as_file - A boolean, should the data be sent as a file. Returns: requests.PreparedRequest Raises: InvalidRequestError - if an invalid verb is passed in. """ if MultiRequest._VERB_POST == verb: if not send_as_file: return grequests.post(url, headers=self._default_headers, params=query_params, data=data, timeout=self._req_timeout) else: files = {'file': data} return grequests.post(url, headers=self._default_headers, params=query_params, files=files, timeout=self._req_timeout) elif MultiRequest._VERB_GET == verb: return grequests.get(url, headers=self._default_headers, params=query_params, data=data, timeout=self._req_timeout) else: raise InvalidRequestError('Invalid verb {0}'.format(verb))