コード例 #1
0
ファイル: fetch_link.py プロジェクト: shafiahmed/mediacloud
def get_seeded_content(db: DatabaseHandler,
                       topic_fetch_url: dict) -> typing.Optional[str]:
    """Return content for this url and topic in topic_seed_urls.

    Arguments:
    db - db handle
    topic_fetch_url - topic_fetch_url dict from db

    Returns:
    dummy response object

    """
    r = db.query(
        "select content from topic_seed_urls where topics_id = %(a)s and url = %(b)s and content is not null",
        {
            'a': topic_fetch_url['topics_id'],
            'b': topic_fetch_url['url']
        }).flat()

    if len(r) == 0:
        return None

    response = Response(code=200, message='OK', headers={}, data=r[0])
    response.set_request(Request('GET', topic_fetch_url['url']))

    return response
コード例 #2
0
ファイル: __init__.py プロジェクト: berkmancenter/mediacloud
    def __get_follow_http_html_redirects_redirects_exhausted(self, response_: Response) -> Union[Response, None]:

        if response_ is None:
            raise McGetFollowHTTPHTMLRedirectsException("Response is None.")

        # If one of the URLs that we've been redirected to contains another encoded URL, assume
        # that we're hitting a paywall and the URLencoded URL is the right one
        urls_redirected_to = []

        for x in range(self.max_redirect() + 1):
            previous = response_.previous()
            if previous is None:
                break

            url_redirected_to = previous.request().url()
            encoded_url_redirected_to = quote(url_redirected_to)

            for redir_url in urls_redirected_to:
                if re.search(pattern=re.escape(encoded_url_redirected_to),
                             string=redir_url,
                             flags=re.IGNORECASE | re.UNICODE):
                    log.debug("""
                        Encoded URL %(encoded_url_redirected_to)s is a substring of another URL %(matched_url)s, so
                        I'll assume that %(url_redirected_to)s is the correct one.
                    """ % {
                        'encoded_url_redirected_to': encoded_url_redirected_to,
                        'matched_url': redir_url,
                        'url_redirected_to': url_redirected_to,
                    })
                    return previous

            urls_redirected_to.append(url_redirected_to)

        # Return the original URL (unless we find a URL being a substring of another URL, see below)
        return None
コード例 #3
0
    def __get_follow_http_html_redirects_redirects_exhausted(self, response_: Response) -> Union[Response, None]:

        if response_ is None:
            raise McGetFollowHTTPHTMLRedirectsException("Response is None.")

        # If one of the URLs that we've been redirected to contains another encoded URL, assume
        # that we're hitting a paywall and the URLencoded URL is the right one
        urls_redirected_to = []

        for x in range(self.max_redirect() + 1):
            previous = response_.previous()
            if previous is None:
                break

            url_redirected_to = previous.request().url()
            encoded_url_redirected_to = quote(url_redirected_to)

            for redir_url in urls_redirected_to:
                if re.search(pattern=re.escape(encoded_url_redirected_to),
                             string=redir_url,
                             flags=re.IGNORECASE | re.UNICODE):
                    log.debug("""
                        Encoded URL %(encoded_url_redirected_to)s is a substring of another URL %(matched_url)s, so
                        I'll assume that %(url_redirected_to)s is the correct one.
                    """ % {
                        'encoded_url_redirected_to': encoded_url_redirected_to,
                        'matched_url': redir_url,
                        'url_redirected_to': url_redirected_to,
                    })
                    return previous

            urls_redirected_to.append(url_redirected_to)

        # Return the original URL (unless we find a URL being a substring of another URL, see below)
        return None
コード例 #4
0
ファイル: fetch_link.py プロジェクト: shafiahmed/mediacloud
def fetch_url(
        db: DatabaseHandler,
        url: str,
        network_down_host: str = DEFAULT_NETWORK_DOWN_HOST,
        network_down_port: str = DEFAULT_NETWORK_DOWN_PORT,
        network_down_timeout: int = DEFAULT_NETWORK_DOWN_TIMEOUT,
        domain_timeout: typing.Optional[int] = None
) -> typing.Optional[Request]:
    """Fetch a url and return the content.

    If fetching the url results in a 400 error, check whether the network_down_host is accessible.  If so,
    return the errored response.  Otherwise, wait network_down_timeout seconds and try again.

    This function catches McGetException and returns a dummy 400 Response object.

    Arguments:
    db - db handle
    url - url to fetch
    network_down_host - host to check if network is down on error
    network_down_port - port to check if network is down on error
    network_down_timeout - seconds to wait if the network is down
    domain_timeout - value to pass to ThrottledUserAgent()

    Returns:
    Response object
    """
    while True:
        ua = ThrottledUserAgent(db, domain_timeout=domain_timeout)

        try:
            response = ua.get_follow_http_html_redirects(url)
        except mediawords.util.web.user_agent.McGetFollowHTTPHTMLRedirectsException:
            response = Response(400, 'bad url', {}, 'not a http url')

        if response.is_success():
            return response

        if response.code() == 400 and _network_is_down(network_down_host,
                                                       network_down_port):
            log.warning(
                "Response failed with %s and network is down.  Waiting to retry ..."
                % (url, ))
            time.sleep(network_down_timeout)
        else:
            return response
コード例 #5
0
 def from_useragent_response(cls, url: str, response: Response):
     return cls(
         url=url,
         is_success=response.is_success(),
         code=response.code(),
         message=response.message(),
         content=response.decoded_content(),
         last_requested_url=response.request().url() if response.request() else None,
     )
コード例 #6
0
 def from_useragent_response(cls, url: str, response: Response):
     return cls(
         url=url,
         is_success=response.is_success(),
         code=response.code(),
         message=response.message(),
         content=response.decoded_content(),
         last_requested_url=response.request().url() if response.request() else None,
     )
コード例 #7
0
ファイル: fetch_link.py プロジェクト: tidehc/mediacloud
def _make_dummy_bypassed_response(url: str) -> Response:
    """Given a url, make and return a response object with that url and empty content."""
    response = Response(code=200, message='OK', headers={}, data='')
    response.set_request(Request('GET', url))

    return response
コード例 #8
0
ファイル: __init__.py プロジェクト: berkmancenter/mediacloud
    def request(self, request: Request) -> Response:
        """Execute a request, return a response.

        All other helpers are supposed to use request() internally as it implements max. size, callbacks, blacklisted
        URLs etc."""

        if request is None:
            raise McRequestException("Request is None.")

        request = self.__blacklist_request_if_needed(request=request)

        self.__log_request(request=request)

        try:
            requests_prepared_request = self.__prepare_request(request)
        except Exception as ex:
            raise McRequestException("Unable to prepare request %s: %s" % (str(request), str(ex),))

        try:
            user_agent_response = self.__execute_request(requests_prepared_request)
        except Exception as ex:
            raise McRequestException("Unable to execute request %s: %s" % (str(requests_prepared_request), str(ex),))

        if user_agent_response.requests_response is None:
            raise McRequestException("Response from 'requests' is None.")

        response = Response(
            requests_response=user_agent_response.requests_response,
            max_size=self.max_size(),
            error_is_client_side=user_agent_response.error_is_client_side,
        )

        # Build the previous request / response chain from the redirects
        current_response = response
        for previous_rq_response in reversed(user_agent_response.requests_response.history):
            previous_rq_request = previous_rq_response.request
            previous_response_request = Request.from_requests_prepared_request(
                requests_prepared_request=previous_rq_request
            )

            # Sometimes reading the (chunked?) previous response's data fails with:
            #
            #      AttributeError: 'NoneType' object has no attribute 'readline'
            #
            # Previous response's data is not that important, so fail rather silently.
            try:
                previous_rq_response.text
            except Exception as ex:
                log.warning("Reading previous response's data failed: %s" % str(ex))
                previous_rq_response.raw_data = io.StringIO('')

            previous_response = Response(requests_response=previous_rq_response, max_size=self.max_size())
            previous_response.set_request(request=previous_response_request)

            current_response.set_previous(previous=previous_response)
            current_response = previous_response

        # Redirects might have happened, so we have to recreate the request object from the latest page that was
        # redirected to
        response_request = Request.from_requests_prepared_request(
            requests_prepared_request=user_agent_response.requests_response.request
        )
        response.set_request(response_request)

        return response
コード例 #9
0
ファイル: __init__.py プロジェクト: berkmancenter/mediacloud
    def __get_follow_http_html_redirects_follow_redirects(self,
                                                          response_: Response,
                                                          meta_redirects_left: int) -> Union[Response, None]:

        from mediawords.util.web.user_agent.html_redirects import (
            target_request_from_meta_refresh_url,
            target_request_from_archive_org_url,
            target_request_from_archive_is_url,
            target_request_from_linkis_com_url,
            target_request_from_alarabiya_url,
        )

        if response_ is None:
            raise McGetFollowHTTPHTMLRedirectsException("Response is None.")

        if response_.is_success():

            base_url = get_base_url(response_.request().url())

            html_redirect_functions = [
                target_request_from_meta_refresh_url,
                target_request_from_archive_org_url,
                target_request_from_archive_is_url,
                target_request_from_linkis_com_url,
                target_request_from_alarabiya_url,
            ]
            for html_redirect_function in html_redirect_functions:
                request_after_meta_redirect = html_redirect_function(
                    content=response_.decoded_content(),
                    archive_site_url=base_url,
                )
                if request_after_meta_redirect is not None:
                    log.warning(
                        "meta redirect from %s: %s" % (html_redirect_function, request_after_meta_redirect.url()))
                    if not urls_are_equal(url1=response_.request().url(), url2=request_after_meta_redirect.url()):

                        log.debug("URL after HTML redirects: %s" % request_after_meta_redirect.url())

                        orig_redirect_response = self.request(request=request_after_meta_redirect)
                        redirect_response = orig_redirect_response

                        # Response might have its previous() already set due to HTTP redirects,
                        # so we have to find the initial response first
                        previous = None
                        for x in range(self.max_redirect() + 1):
                            previous = redirect_response.previous()
                            if previous is None:
                                break
                            redirect_response = previous

                        if previous is not None:
                            raise McGetFollowHTTPHTMLRedirectsException(
                                "Can't find the initial redirected response; URL: %s" %
                                request_after_meta_redirect.url()
                            )

                        log.debug("Setting previous of URL %(url)s to %(previous_url)s" % {
                            'url': redirect_response.request().url(),
                            'previous_url': response_.request().url(),
                        })
                        redirect_response.set_previous(response_)

                        meta_redirects_left = meta_redirects_left - 1

                        return self.__get_follow_http_html_redirects(
                            response_=orig_redirect_response,
                            meta_redirects_left=meta_redirects_left,
                        )

            # No <meta /> refresh, the current URL is the final one
            return response_

        else:
            log.debug("Request to %s was unsuccessful: %s" % (response_.request().url(), response_.status_line(),))

            # Return the original URL and give up
            return None
コード例 #10
0
    def request(self, request: Request) -> Response:
        """Execute a request, return a response.

        All other helpers are supposed to use request() internally as it implements max. size, callbacks, blacklisted
        URLs etc."""

        if request is None:
            raise McRequestException("Request is None.")

        request = self.__blacklist_request_if_needed(request=request)

        self.__log_request(request=request)

        method = request.method()
        if method is None:
            raise McRequestException("Request's method is None.")

        url = request.url()
        if url is None:
            raise McRequestException("Request's URL is None.")

        headers = request.headers()
        if headers is None:
            raise McRequestException("Request's headers is None.")

        auth_username = request.auth_username()
        auth_password = request.auth_password()
        if ((auth_username is None and auth_password is not None)
                or (auth_username is not None and auth_password is None)):
            raise McRequestException(
                "Either both or none of HTTP authentication credentials must be not None."
            )

        auth = None
        if auth_username is not None and auth_password is not None:
            if ((len(auth_username) == 0 and len(auth_password) > 0)
                    or (len(auth_username) > 0 and len(auth_password) == 0)):
                raise McRequestException(
                    "Either both or none of HTTP authentication credentials must be not Empty."
                )

            auth = HTTPBasicAuth(auth_username, auth_password)

        data = request.content()

        try:
            requests_request = requests.Request(
                method=method,
                url=url,
                data=data,
                headers=headers,
                auth=auth,
            )
            requests_prepared_request = self.__session.prepare_request(
                requests_request)

        except Exception as ex:
            raise McRequestException("Unable to prepare request %s: %s" % (
                str(request),
                str(ex),
            ))

        error_is_client_side = False

        try:
            requests_response = self.__session.send(
                request=requests_prepared_request,
                timeout=self.timeout(),

                # To be able to enforce max_size
                stream=True,
            )

        except requests.TooManyRedirects as ex:

            # On too many redirects, return the last fetched page (just like LWP::UserAgent does)
            log.warning("Exceeded max. redirects for URL %s" % request.url())
            requests_response = ex.response
            response_data = str(ex)

        except requests.Timeout as ex:

            log.warning("Timeout for URL %s" % request.url())

            # We treat timeouts as client-side errors too because we can retry on them
            error_is_client_side = True

            requests_response = requests.Response()
            requests_response.status_code = HTTPStatus.REQUEST_TIMEOUT.value
            requests_response.reason = HTTPStatus.REQUEST_TIMEOUT.phrase
            requests_response.request = requests_prepared_request

            requests_response.history = []

            response_data = str(ex)

        except Exception as ex:

            # Client-side error
            log.warning("Client-side error while processing request %s: %s" % (
                str(request),
                str(ex),
            ))

            error_is_client_side = True

            requests_response = requests.Response()
            requests_response.status_code = HTTPStatus.BAD_REQUEST.value
            requests_response.reason = "Client-side error"
            requests_response.request = requests_prepared_request

            # Previous request / response chain is not built for client-side errored requests
            requests_response.history = []

            requests_response.headers = {
                # LWP::UserAgent compatibility
                'Client-Warning': 'Client-side error',
            }

            response_data = str(ex)

        else:

            try:

                max_size = self.max_size()

                response_data = ""
                read_response_data = True

                if max_size is not None:
                    content_length = requests_response.headers.get(
                        'Content-Length', None)

                    if content_length is not None:
                        content_length = int(content_length)
                        if content_length > max_size:
                            log.warning(
                                "Content-Length exceeds %d for URL %s" % (
                                    max_size,
                                    url,
                                ))

                            # Release the response to return connection back to the pool
                            # (http://docs.python-requests.org/en/master/user/advanced/#body-content-workflow)
                            requests_response.close()

                            read_response_data = False

                if read_response_data:

                    if requests_response.encoding is None:

                        if requests_response.apparent_encoding is None:
                            # If encoding is not in HTTP headers nor can be determined from content itself, assume that
                            # it's UTF-8
                            requests_response.encoding = 'UTF-8'

                        else:
                            # Test the encoding guesser's opinion, just like browsers do
                            requests_response.encoding = requests_response.apparent_encoding

                    else:

                        # If "Content-Type" HTTP header contains a string "text" and doesn't have "charset" property,
                        # "requests" falls back to setting the encoding to ISO-8859-1, which is probably not right
                        # (encoding might have been defined in the HTML content itself via <meta> tag), so we use the
                        # "apparent encoding" instead
                        if requests_response.encoding.lower() == 'iso-8859-1':
                            if requests_response.apparent_encoding is not None:
                                requests_response.encoding = requests_response.apparent_encoding

                    # Some pages report some funky encoding; in that case, fallback to UTF-8
                    try:
                        codecs.lookup(requests_response.encoding)
                    except LookupError:
                        log.warning("Invalid encoding %s for URL %s" %
                                    (requests_response.encoding,
                                     requests_response.url))
                        requests_response.encoding = 'UTF-8'

                    response_data_size = 0
                    for chunk in requests_response.iter_content(
                            chunk_size=None, decode_unicode=True):
                        response_data += chunk
                        response_data_size += len(chunk)

                        # Content-Length might be missing / lying, so we measure size while fetching the data too
                        if max_size is not None:
                            if response_data_size > max_size:
                                log.warning("Data size exceeds %d for URL %s" %
                                            (
                                                max_size,
                                                url,
                                            ))

                                # Release the response to return connection back to the pool
                                # (http://docs.python-requests.org/en/master/user/advanced/#body-content-workflow)
                                requests_response.close()

                                break

            except requests.RequestException as ex:

                log.warning("Error reading data for URL %s" % request.url())

                # We treat timeouts as client-side errors too because we can retry on them
                error_is_client_side = True

                requests_response = requests.Response()
                requests_response.status_code = HTTPStatus.REQUEST_TIMEOUT.value
                requests_response.reason = HTTPStatus.REQUEST_TIMEOUT.phrase
                requests_response.request = requests_prepared_request

                requests_response.history = []

                response_data = str(ex)

        if requests_response is None:
            raise McRequestException("Response from 'requests' is None.")

        if response_data is None:
            # Probably a programming error
            raise McRequestException("Response data is None.")

        response = Response.from_requests_response(
            requests_response=requests_response,
            data=response_data,
        )

        if error_is_client_side:
            response.set_error_is_client_side(
                error_is_client_side=error_is_client_side)

        # Build the previous request / response chain from the redirects
        current_response = response
        for previous_rq_response in reversed(requests_response.history):
            previous_rq_request = previous_rq_response.request
            previous_response_request = Request.from_requests_prepared_request(
                requests_prepared_request=previous_rq_request)

            previous_response = Response.from_requests_response(
                requests_response=previous_rq_response)
            previous_response.set_request(request=previous_response_request)

            current_response.set_previous(previous=previous_response)
            current_response = previous_response

        # Redirects might have happened, so we have to recreate the request object from the latest page that was
        # redirected to
        response_request = Request.from_requests_prepared_request(
            requests_prepared_request=requests_response.request)
        response.set_request(response_request)

        return response
コード例 #11
0
        def __inner_follow_redirects(
                response_: Response,
                meta_redirects_left: int) -> Union[Response, None]:

            from mediawords.util.web.user_agent.html_redirects import (
                target_request_from_meta_refresh_url,
                target_request_from_archive_org_url,
                target_request_from_archive_is_url,
                target_request_from_linkis_com_url,
                target_request_from_alarabiya_url,
            )

            if response_ is None:
                raise McGetFollowHTTPHTMLRedirectsException(
                    "Response is None.")

            if response_.is_success():

                base_url = get_base_url(response_.request().url())

                html_redirect_functions = [
                    target_request_from_meta_refresh_url,
                    target_request_from_archive_org_url,
                    target_request_from_archive_is_url,
                    target_request_from_linkis_com_url,
                    target_request_from_alarabiya_url,
                ]
                for html_redirect_function in html_redirect_functions:
                    request_after_meta_redirect = html_redirect_function(
                        content=response_.decoded_content(),
                        archive_site_url=base_url,
                    )
                    if request_after_meta_redirect is not None:
                        if not urls_are_equal(
                                url1=response_.request().url(),
                                url2=request_after_meta_redirect.url()):

                            log.debug("URL after HTML redirects: %s" %
                                      request_after_meta_redirect.url())

                            orig_redirect_response = self.request(
                                request=request_after_meta_redirect)
                            redirect_response = orig_redirect_response

                            # Response might have its previous() already set due to HTTP redirects,
                            # so we have to find the initial response first
                            previous = None
                            for x in range(self.max_redirect() + 1):
                                previous = redirect_response.previous()
                                if previous is None:
                                    break
                                redirect_response = previous

                            if previous is not None:
                                raise McGetFollowHTTPHTMLRedirectsException(
                                    "Can't find the initial redirected response; URL: %s"
                                    % request_after_meta_redirect.url())

                            log.debug(
                                "Setting previous of URL %(url)s to %(previous_url)s"
                                % {
                                    'url': redirect_response.request().url(),
                                    'previous_url': response_.request().url(),
                                })
                            redirect_response.set_previous(response_)

                            meta_redirects_left = meta_redirects_left - 1

                            return __inner(
                                response_=orig_redirect_response,
                                meta_redirects_left=meta_redirects_left,
                            )

                # No <meta /> refresh, the current URL is the final one
                return response_

            else:
                log.debug("Request to %s was unsuccessful: %s" % (
                    response_.request().url(),
                    response_.status_line(),
                ))

                # Return the original URL and give up
                return None
コード例 #12
0
ファイル: __init__.py プロジェクト: robpotter89/backend
    def request(self, request: Request) -> Response:
        """Execute a request, return a response.

        All other helpers are supposed to use request() internally as it implements max. size, callbacks, blacklisted
        URLs etc."""

        if request is None:
            raise McRequestException("Request is None.")

        request = self.__blacklist_request_if_needed(request=request)

        self.__log_request(request=request)

        try:
            requests_prepared_request = self.__prepare_request(request)
        except Exception as ex:
            raise McRequestException("Unable to prepare request %s: %s" % (
                str(request),
                str(ex),
            ))

        try:
            user_agent_response = self.__execute_request(
                requests_prepared_request)
        except Exception as ex:
            raise ex
            raise McRequestException("Unable to execute request %s: %s" % (
                str(requests_prepared_request),
                str(ex),
            ))

        if user_agent_response.requests_response is None:
            raise McRequestException("Response from 'requests' is None.")

        response = Response(
            requests_response=user_agent_response.requests_response,
            max_size=self.max_size(),
            error_is_client_side=user_agent_response.error_is_client_side,
        )

        # Build the previous request / response chain from the redirects
        current_response = response
        for previous_rq_response in reversed(
                user_agent_response.requests_response.history):
            previous_rq_request = previous_rq_response.request
            previous_response_request = Request.from_requests_prepared_request(
                requests_prepared_request=previous_rq_request)

            # Sometimes reading the (chunked?) previous response's data fails with:
            #
            #      AttributeError: 'NoneType' object has no attribute 'readline'
            #
            # Previous response's data is not that important, so fail rather silently.
            try:
                previous_rq_response.text
            except Exception as ex:
                log.warning("Reading previous response's data failed: %s" %
                            str(ex))
                previous_rq_response.raw_data = io.StringIO('')

            previous_response = Response(
                requests_response=previous_rq_response,
                max_size=self.max_size())
            previous_response.set_request(request=previous_response_request)

            current_response.set_previous(previous=previous_response)
            current_response = previous_response

        # Redirects might have happened, so we have to recreate the request object from the latest page that was
        # redirected to
        response_request = Request.from_requests_prepared_request(
            requests_prepared_request=user_agent_response.requests_response.
            request)
        response.set_request(response_request)

        return response
コード例 #13
0
    def request(self, request: Request) -> Response:
        """Execute a request, return a response.

        All other helpers are supposed to use request() internally as it implements max. size, callbacks, blacklisted
        URLs etc."""

        if request is None:
            raise McRequestException("Request is None.")

        request = self.__blacklist_request_if_needed(request=request)

        self.__log_request(request=request)

        try:
            requests_prepared_request = self.__prepare_request(request)
        except Exception as ex:
            raise McRequestException("Unable to prepare request %s: %s" % (
                str(request),
                str(ex),
            ))

        try:
            user_agent_response = self.__execute_request(
                requests_prepared_request)
        except Exception as ex:
            raise McRequestException("Unable to execute request %s: %s" % (
                str(requests_prepared_request),
                str(ex),
            ))

        try:
            response_data = self.__read_response_data(
                user_agent_response.requests_response)
        except Exception as ex:
            log.warning("Error reading data for URL %s" % request.url())

            user_agent_response.requests_response = requests.Response()
            user_agent_response.requests_response.status_code = HTTPStatus.REQUEST_TIMEOUT.value
            user_agent_response.requests_response.reason = HTTPStatus.REQUEST_TIMEOUT.phrase
            user_agent_response.requests_response.request = requests_prepared_request

            user_agent_response.requests_response.history = []

            # We treat timeouts as client-side errors too because we can retry on them
            user_agent_response.error_is_client_side = True

            response_data = str(ex)

        if user_agent_response.requests_response is None:
            raise McRequestException("Response from 'requests' is None.")

        if response_data is None:
            # Probably a programming error
            raise McRequestException("Response data is None.")

        response = Response.from_requests_response(
            requests_response=user_agent_response.requests_response,
            data=response_data,
        )

        if user_agent_response.error_is_client_side is True:
            response.set_error_is_client_side(
                error_is_client_side=user_agent_response.error_is_client_side)

        # Build the previous request / response chain from the redirects
        current_response = response
        for previous_rq_response in reversed(
                user_agent_response.requests_response.history):
            previous_rq_request = previous_rq_response.request
            previous_response_request = Request.from_requests_prepared_request(
                requests_prepared_request=previous_rq_request)

            # Sometimes reading the (chunked?) previous response's data fails with:
            #
            #      AttributeError: 'NoneType' object has no attribute 'readline'
            #
            # Previous response's data is not that important, so fail rather silently.
            try:
                previous_rq_response_data = previous_rq_response.text
            except Exception as ex:
                log.warning("Reading previous response's data failed: %s" %
                            str(ex))
                previous_rq_response_data = ''

            previous_response = Response.from_requests_response(
                requests_response=previous_rq_response,
                data=previous_rq_response_data)
            previous_response.set_request(request=previous_response_request)

            current_response.set_previous(previous=previous_response)
            current_response = previous_response

        # Redirects might have happened, so we have to recreate the request object from the latest page that was
        # redirected to
        response_request = Request.from_requests_prepared_request(
            requests_prepared_request=user_agent_response.requests_response.
            request)
        response.set_request(response_request)

        return response