Example #1
0
    def _create_request(self, verb, url, query_params=None, data=None, send_as_file=False):
        """Helper method to create a single `grequests.post` or `grequests.get`.

        Args:
            verb - MultiRequest._VERB_POST or MultiRequest._VERB_GET
            url - A string URL
            query_params - None or a dict
            data - None or a string or a dict
            send_as_file - A boolean, should the data be sent as a file.
        Returns:
            requests.PreparedRequest
        Raises:
            InvalidRequestError - if an invalid verb is passed in.
        """

        # Prepare a set of kwargs to make it easier to avoid missing default params.
        kwargs = {
            'headers': self._default_headers,
            'params': query_params,
            'timeout': self._req_timeout,
            'session': self._session
        }

        if MultiRequest._VERB_POST == verb:
            if not send_as_file:
                return grequests.post(url, data=data, **kwargs)
            else:
                return grequests.post(url, files={'file': data}, **kwargs)
        elif MultiRequest._VERB_GET == verb:
            return grequests.get(url, data=data, **kwargs)
        else:
            raise InvalidRequestError('Invalid verb {0}'.format(verb))
Example #2
0
    def _multi_request(self,
                       verb,
                       urls,
                       query_params,
                       data,
                       to_json=True,
                       send_as_file=False,
                       file_download=False):
        """Issues multiple batches of simultaneous HTTP requests and waits for responses.

        Args:
            verb - MultiRequest._VERB_POST or MultiRequest._VERB_GET
            urls - A string URL or list of string URLs
            query_params - None, a dict, or a list of dicts representing the query params
            data - None, a dict or string, or a list of dicts and strings representing the data body.
            to_json - A boolean, should the responses be returned as JSON blobs
        Returns:
            If multiple requests are made - a list of dicts if to_json, a list of requests responses otherwise
            If a single request is made, the return is not a list
        Raises:
            InvalidRequestError - if no URL is supplied or if any of the requests returns 403 Access Forbidden response
        """
        if not urls:
            raise InvalidRequestError('No URL supplied')

        # Break the params into batches of request_params
        request_params = self._zip_request_params(urls, query_params, data)
        batch_of_params = [
            request_params[pos:pos + self._max_requests]
            for pos in range(0, len(request_params), self._max_requests)
        ]

        # Iteratively issue each batch, applying the rate limiter if necessary
        all_responses = []
        for param_batch in batch_of_params:
            if self._rate_limiter:
                self._rate_limiter.make_calls(num_calls=len(param_batch))

            prepared_requests = [
                self._create_request(
                    verb,
                    url,
                    query_params=query_param,
                    data=datum,
                    send_as_file=send_as_file,
                ) for url, query_param, datum in param_batch
            ]

            responses = self._wait_for_response(prepared_requests)
            for response in responses:
                if response and not file_download:
                    all_responses.append(
                        self._convert_to_json(response
                                              ) if to_json else response)
                elif file_download:
                    all_responses.append(self._handle_file_download(response))
                else:
                    all_responses.append(None)

        return all_responses
Example #3
0
    def map_with_retries(self, requests, responses_for_requests):
        """Provides session-based retry functionality

        :param requests: A collection of Request objects.
        :param responses_for_requests: Dictionary mapping of requests to responses
        :param max_retries: The maximum number of retries to perform per session
        :param args: Additional arguments to pass into a retry mapping call


        """
        retries = []
        response_futures = [preq.callable() for preq in requests]

        for request, response_future in zip(requests, response_futures):
            try:
                response = response_future.result()
                if response is not None and response.status_code == 403:
                    logging.warning(
                        'Request to {} caused a 403 response status code.'.
                        format(request.url))
                    raise InvalidRequestError('Access forbidden')
                if response is not None:
                    responses_for_requests[request] = response
            except RequestException as re:
                logging.error('An exception was raised for {}: {}'.format(
                    request.url, re))
                if self.total_retries > 0:
                    self.total_retries -= 1
                    retries.append(request)

        # Recursively retry failed requests with the modified total retry count
        if retries:
            self.map_with_retries(retries, responses_for_requests)
Example #4
0
    def test_specific_exception(self):
        try:
            raise InvalidRequestError()
        except Exception as e:
            write_exception(e)

        output = self._stringio.getvalue()
        T.assert_equal(0, output.find('[ERROR] InvalidRequestError'))
Example #5
0
    def _wait_for_response(self, requests):
        """Issues a batch of requests and waits for the responses.
        If some of the requests fail it will retry the failed ones up to `_max_retry` times.

        Args:
            requests - A list of requests
        Returns:
            A list of `requests.models.Response` objects
        Raises:
            InvalidRequestError - if any of the requests returns "403 Forbidden" response
        """
        responses_for_requests = OrderedDict.fromkeys(requests)

        for retry in range(self._max_retry):
            try:
                logging.debug('Try #{0}'.format(retry + 1))
                responses = grequests.map(requests, self._handle_exception)

                if any(response is not None and response.status_code == 403 for response in responses):
                    raise InvalidRequestError('Access forbidden')

                failed_requests = []

                for request, response in zip(requests, responses):
                    if response:
                        responses_for_requests[request] = response
                    else:
                        failed_requests.append(request)

                if not failed_requests:
                    break

                logging.warning('Try #{0}. Expected {1} successful response(s) but only got {2}.'.format(
                    retry + 1, len(requests), len(requests) - len(failed_requests)))

                # retry only for the failed requests
                requests = failed_requests
            except InvalidRequestError:
                raise
            except Exception as e:
                # log the exception for the informative purposes and pass to the next iteration
                logging.exception('Try #{0}. Exception occured: {1}. Retrying.'.format(retry + 1, e))
                pass

        if failed_requests:
            logging.warning('Still {0} failed request(s) after {1} retries:'.format(len(failed_requests), self._max_retry))
            for failed_request in failed_requests:
                failed_response = failed_request.response
                if failed_response is not None:
                    # in case response text does contain some non-ascii characters
                    failed_response_text = failed_response.text.encode('ascii', 'xmlcharrefreplace')
                    logging.warning('Request to {0} failed with status code {1}. Response text: {2}'.format(
                        failed_request.url, failed_response.status_code, failed_response_text))
                else:
                    logging.warning('Request to {0} failed with None response.'.format(failed_request.url))

        return list(responses_for_requests.values())
Example #6
0
    def _handle_exception(self, request, exception):
        """Handles grequests exception (timeout, etc.).

        Args:
            request - A request that caused the exception
            exception - An exception caused by the request
        Raises:
            InvalidRequestError - custom exception encapsulating grequests exception
        """
        raise InvalidRequestError('Request to {0} caused an exception: {1}'.format(request.url, exception))
Example #7
0
    def test_exception_message(self):
        try:
            raise InvalidRequestError('Look for me in validation')
        except Exception as e:
            write_exception(e)

        output = self._stringio.getvalue()
        T.assert_equal(
            0,
            output.find(
                '[ERROR] InvalidRequestError Look for me in validation'))
Example #8
0
    def _zip_request_params(self, urls, query_params, data):
        """Massages inputs and returns a list of 3-tuples zipping them up.

        This is all the smarts behind deciding how many requests to issue.
        It's fine for an input to have 0, 1, or a list of values.
        If there are two inputs each with a list of values, the cardinality of those lists much match.

        Args:
            urls - 1 string URL or a list of URLs
            query_params - None, 1 dict, or a list of dicts
            data - None, 1 dict or string, or a list of dicts or strings
        Returns:
            A list of 3-tuples (url, query_param, data)
        Raises:
            InvalidRequestError - if cardinality of lists does not match
        """

        # Everybody gets to be a list
        if not isinstance(urls, list):
            urls = [urls]
        if not isinstance(query_params, list):
            query_params = [query_params]
        if not isinstance(data, list):
            data = [data]

        # Counts must not mismatch
        url_count = len(urls)
        query_param_count = len(query_params)
        data_count = len(data)

        max_count = max(url_count, query_param_count, data_count)

        if (max_count > url_count > 1 or max_count > query_param_count > 1
                or max_count > data_count > 1):
            raise InvalidRequestError(
                'Mismatched parameter count url_count:{0} query_param_count:{1} data_count:{2} max_count:{3}',
                url_count,
                query_param_count,
                data_count,
                max_count,
            )

        # Pad out lists
        if url_count < max_count:
            urls = urls * max_count
        if query_param_count < max_count:
            query_params = query_params * max_count
        if data_count < max_count:
            data = data * max_count

        return list(zip(urls, query_params, data))
Example #9
0
    def map_with_retries(self, requests, responses_for_requests, *args,
                         **kwargs):
        """Wraps around grequests.map to provide session-based retry functionality

        :param requests: A collection of Request objects.
        :param responses_for_requests: Dictionary mapping of requests to responses
        :param max_retries: The maximum number of retries to perform per session
        :param exception_handler: Callback function, called when exception occured. Params: Request, Exception
        :param args: Additional arguments to pass into a retry mapping call
        :param kwargs: Keyword arguments passed into the grequests wrapper, e.g. exception_handler


        """
        _exception_handler = kwargs.pop('exception_handler', None)
        retries = []

        def exception_handler(request, exception, *args, **kwargs):
            if self.total_retries > 0:
                self.total_retries -= 1
                retries.append(request)
            if _exception_handler:
                return _exception_handler(request, exception, *args, **kwargs)

        responses = grequests.map(requests,
                                  exception_handler=exception_handler,
                                  *args,
                                  **kwargs)

        for request, response in zip(requests, responses):
            if response is not None and response.status_code == 403:
                logging.debug(
                    'Request was received with a 403 response status code.')
                raise InvalidRequestError('Access forbidden')
            if response:
                responses_for_requests[request] = response

        # Recursively retry failed requests with the modified total retry count
        if retries:
            self.map_with_retries(retries,
                                  responses_for_requests,
                                  *args,
                                  exception_handler=_exception_handler,
                                  **kwargs)
Example #10
0
    def _create_request(self,
                        verb,
                        url,
                        query_params=None,
                        data=None,
                        send_as_file=False):
        """Helper method to create a single `grequests.post` or `grequests.get`.

        Args:
            verb - MultiRequest._VERB_POST or MultiRequest._VERB_GET
            url - A string URL
            query_params - None or a dict
            data - None or a string or a dict
            send_as_file - A boolean, should the data be sent as a file.
        Returns:
            requests.PreparedRequest
        Raises:
            InvalidRequestError - if an invalid verb is passed in.
        """
        if MultiRequest._VERB_POST == verb:
            if not send_as_file:
                return grequests.post(url,
                                      headers=self._default_headers,
                                      params=query_params,
                                      data=data,
                                      timeout=self._req_timeout)
            else:
                files = {'file': data}
                return grequests.post(url,
                                      headers=self._default_headers,
                                      params=query_params,
                                      files=files,
                                      timeout=self._req_timeout)
        elif MultiRequest._VERB_GET == verb:
            return grequests.get(url,
                                 headers=self._default_headers,
                                 params=query_params,
                                 data=data,
                                 timeout=self._req_timeout)
        else:
            raise InvalidRequestError('Invalid verb {0}'.format(verb))