def process_response(self, response):
        '''Passes response (Response or Failure object) received from
        Downloader thought pipeline middlewares.

        Return value is either Request, Response or Failure object.
        '''
        # we can be sure that response.request is set from the downloader
        request = response.request

        for name, enabled_setting, pr, pf in self._process_response:
            # skip disabled mw through meta
            if not request.meta.get(enabled_setting, True):
                continue
            method = pr if isinstance(response, Response) else pf
            try:
                response = method(response)
            except:
                response = Failure()
            assert response is None or isinstance(response, (Request, Response, Failure)), \
                'Middleware %s.process_request must return None, Response, Request or Failure, got %s' % \
                (method.im_self.__class__.__name__, type(response))
            if response is None:
                method_name = 'process_response()' if method is pr else 'process_failure()'
                failure = Failure(DropRequest(
                    '`%s` pipeline middleware dropped the request in `%s` method' %
                    (name, method_name)))
                failure.request = request
                return failure
            if not isinstance(response, (Response, Failure)):
                return response

            # make sure, request attribute is always set
            response.request = request
        return response
Exemple #2
0
    def process_response(self, response):
        '''Passes response (Response or Failure object) received from
        Downloader thought pipeline middlewares.

        Return value is either Request, Response or Failure object.
        '''
        # we can be sure that response.request is set from the downloader
        request = response.request

        for name, enabled_setting, pr, pf in self._process_response:
            # skip disabled mw through meta
            if not request.meta.get(enabled_setting, True):
                continue
            method = pr if isinstance(response, Response) else pf
            try:
                response = method(response)
            except:
                response = Failure()
            assert response is None or isinstance(response, (Request, Response, Failure)), \
                'Middleware %s.process_request must return None, Response, Request or Failure, got %s' % \
                (method.im_self.__class__.__name__, type(response))
            if response is None:
                method_name = 'process_response()' if method is pr else 'process_failure()'
                failure = Failure(
                    DropRequest(
                        '`%s` pipeline middleware dropped the request in `%s` method'
                        % (name, method_name)))
                failure.request = request
                return failure
            if not isinstance(response, (Response, Failure)):
                return response

            # make sure, request attribute is always set
            response.request = request
        return response
    def _test_retry_exception(self, req, exception):
        failure = Failure(exception)

        # first retry
        failure.request = req
        req = self.mw.process_failure(failure)
        self.assertIsInstance(req, Request)
        self.assertEqual(req.meta['retry_times'], 1)

        # second retry
        failure.request = req
        req = self.mw.process_failure(failure)
        self.assertIsInstance(req, Request)
        self.assertEqual(req.meta['retry_times'], 2)

        # discard it
        failure.request = req
        req = self.mw.process_failure(failure)
        self.assertIs(req, failure)
Exemple #4
0
    def _test_retry_exception(self, req, exception):
        failure = Failure(exception)

        # first retry
        failure.request = req
        req = self.mw.process_failure(failure)
        self.assertIsInstance(req, Request)
        self.assertEqual(req.meta['retry_times'], 1)

        # second retry
        failure.request = req
        req = self.mw.process_failure(failure)
        self.assertIsInstance(req, Request)
        self.assertEqual(req.meta['retry_times'], 2)

        # discard it
        failure.request = req
        req = self.mw.process_failure(failure)
        self.assertIs(req, failure)
Exemple #5
0
    def test_processing(self):
        self.engine.start()
        del self.sp.received[:]

        # normal behavior
        req = Request('http://github.com/')
        resp = Response('', request=req)
        self.engine.response_queue.push(resp)
        self.clock.pump([self.engine.QUEUE_CHECK_FREQUENCY, 0])
        self.check_signals([signals.response_downloaded,
                            signals.response_received])

        # download error
        fail = Failure(Exception())
        fail.request = req
        self.engine.response_queue.push(fail)
        self.clock.pump([self.engine.QUEUE_CHECK_FREQUENCY, 0, 0, 0])
        self.check_signals([signals.failure_received,
                            signals.spider_error])

        # pipeline None
        self.pipeline.resp = lambda req: None
        self.engine.response_queue.push(resp)
        self.clock.pump([self.engine.QUEUE_CHECK_FREQUENCY, 0, 0])
        self.check_signals([signals.response_downloaded,
                            signals.failure_received,
                            signals.spider_error])

        # pipeline request
        self.pipeline.resp = lambda req: Request('http://github.com/')
        self.engine.response_queue.push(resp)
        self.clock.pump([self.engine.QUEUE_CHECK_FREQUENCY, 0, 0])
        self.check_signals([signals.response_downloaded,
                            signals.request_received])
        self.assertEqual(len(self.engine.request_queue), 1)
        self.engine.request_queue.pop()

        # pipeline failure
        self.pipeline.resp = lambda req: Failure(Exception())
        self.engine.response_queue.push(resp)
        self.clock.pump([self.engine.QUEUE_CHECK_FREQUENCY, 0, 0])
        self.check_signals([signals.response_downloaded,
                            signals.failure_received,
                            signals.spider_error])
Exemple #6
0
    def test_processing(self):
        self.engine.start()
        del self.sp.received[:]

        # normal behavior
        req = Request("http://github.com/")
        resp = Response("", request=req)
        self.engine.response_queue.push(resp)
        self.clock.pump([self.engine.QUEUE_CHECK_FREQUENCY, 0])
        self.check_signals([signals.response_downloaded, signals.response_received])

        # download error
        fail = Failure(Exception())
        fail.request = req
        self.engine.response_queue.push(fail)
        self.clock.pump([self.engine.QUEUE_CHECK_FREQUENCY, 0, 0, 0])
        self.check_signals([signals.failure_received, signals.spider_error])

        # pipeline None
        self.pipeline.resp = lambda req: None
        self.engine.response_queue.push(resp)
        self.clock.pump([self.engine.QUEUE_CHECK_FREQUENCY, 0, 0])
        self.check_signals([signals.response_downloaded, signals.failure_received, signals.spider_error])

        # pipeline request
        self.pipeline.resp = lambda req: Request("http://github.com/")
        self.engine.response_queue.push(resp)
        self.clock.pump([self.engine.QUEUE_CHECK_FREQUENCY, 0, 0])
        self.check_signals([signals.response_downloaded, signals.request_received])
        self.assertEqual(len(self.engine.request_queue), 1)
        self.engine.request_queue.pop()

        # pipeline failure
        self.pipeline.resp = lambda req: Failure(Exception())
        self.engine.response_queue.push(resp)
        self.clock.pump([self.engine.QUEUE_CHECK_FREQUENCY, 0, 0])
        self.check_signals([signals.response_downloaded, signals.failure_received, signals.spider_error])