def process_response(self, response): '''Passes response (Response or Failure object) received from Downloader thought pipeline middlewares. Return value is either Request, Response or Failure object. ''' # we can be sure that response.request is set from the downloader request = response.request for name, enabled_setting, pr, pf in self._process_response: # skip disabled mw through meta if not request.meta.get(enabled_setting, True): continue method = pr if isinstance(response, Response) else pf try: response = method(response) except: response = Failure() assert response is None or isinstance(response, (Request, Response, Failure)), \ 'Middleware %s.process_request must return None, Response, Request or Failure, got %s' % \ (method.im_self.__class__.__name__, type(response)) if response is None: method_name = 'process_response()' if method is pr else 'process_failure()' failure = Failure(DropRequest( '`%s` pipeline middleware dropped the request in `%s` method' % (name, method_name))) failure.request = request return failure if not isinstance(response, (Response, Failure)): return response # make sure, request attribute is always set response.request = request return response
def process_response(self, response): '''Passes response (Response or Failure object) received from Downloader thought pipeline middlewares. Return value is either Request, Response or Failure object. ''' # we can be sure that response.request is set from the downloader request = response.request for name, enabled_setting, pr, pf in self._process_response: # skip disabled mw through meta if not request.meta.get(enabled_setting, True): continue method = pr if isinstance(response, Response) else pf try: response = method(response) except: response = Failure() assert response is None or isinstance(response, (Request, Response, Failure)), \ 'Middleware %s.process_request must return None, Response, Request or Failure, got %s' % \ (method.im_self.__class__.__name__, type(response)) if response is None: method_name = 'process_response()' if method is pr else 'process_failure()' failure = Failure( DropRequest( '`%s` pipeline middleware dropped the request in `%s` method' % (name, method_name))) failure.request = request return failure if not isinstance(response, (Response, Failure)): return response # make sure, request attribute is always set response.request = request return response
def _test_retry_exception(self, req, exception): failure = Failure(exception) # first retry failure.request = req req = self.mw.process_failure(failure) self.assertIsInstance(req, Request) self.assertEqual(req.meta['retry_times'], 1) # second retry failure.request = req req = self.mw.process_failure(failure) self.assertIsInstance(req, Request) self.assertEqual(req.meta['retry_times'], 2) # discard it failure.request = req req = self.mw.process_failure(failure) self.assertIs(req, failure)
def test_processing(self): self.engine.start() del self.sp.received[:] # normal behavior req = Request('http://github.com/') resp = Response('', request=req) self.engine.response_queue.push(resp) self.clock.pump([self.engine.QUEUE_CHECK_FREQUENCY, 0]) self.check_signals([signals.response_downloaded, signals.response_received]) # download error fail = Failure(Exception()) fail.request = req self.engine.response_queue.push(fail) self.clock.pump([self.engine.QUEUE_CHECK_FREQUENCY, 0, 0, 0]) self.check_signals([signals.failure_received, signals.spider_error]) # pipeline None self.pipeline.resp = lambda req: None self.engine.response_queue.push(resp) self.clock.pump([self.engine.QUEUE_CHECK_FREQUENCY, 0, 0]) self.check_signals([signals.response_downloaded, signals.failure_received, signals.spider_error]) # pipeline request self.pipeline.resp = lambda req: Request('http://github.com/') self.engine.response_queue.push(resp) self.clock.pump([self.engine.QUEUE_CHECK_FREQUENCY, 0, 0]) self.check_signals([signals.response_downloaded, signals.request_received]) self.assertEqual(len(self.engine.request_queue), 1) self.engine.request_queue.pop() # pipeline failure self.pipeline.resp = lambda req: Failure(Exception()) self.engine.response_queue.push(resp) self.clock.pump([self.engine.QUEUE_CHECK_FREQUENCY, 0, 0]) self.check_signals([signals.response_downloaded, signals.failure_received, signals.spider_error])
def test_processing(self): self.engine.start() del self.sp.received[:] # normal behavior req = Request("http://github.com/") resp = Response("", request=req) self.engine.response_queue.push(resp) self.clock.pump([self.engine.QUEUE_CHECK_FREQUENCY, 0]) self.check_signals([signals.response_downloaded, signals.response_received]) # download error fail = Failure(Exception()) fail.request = req self.engine.response_queue.push(fail) self.clock.pump([self.engine.QUEUE_CHECK_FREQUENCY, 0, 0, 0]) self.check_signals([signals.failure_received, signals.spider_error]) # pipeline None self.pipeline.resp = lambda req: None self.engine.response_queue.push(resp) self.clock.pump([self.engine.QUEUE_CHECK_FREQUENCY, 0, 0]) self.check_signals([signals.response_downloaded, signals.failure_received, signals.spider_error]) # pipeline request self.pipeline.resp = lambda req: Request("http://github.com/") self.engine.response_queue.push(resp) self.clock.pump([self.engine.QUEUE_CHECK_FREQUENCY, 0, 0]) self.check_signals([signals.response_downloaded, signals.request_received]) self.assertEqual(len(self.engine.request_queue), 1) self.engine.request_queue.pop() # pipeline failure self.pipeline.resp = lambda req: Failure(Exception()) self.engine.response_queue.push(resp) self.clock.pump([self.engine.QUEUE_CHECK_FREQUENCY, 0, 0]) self.check_signals([signals.response_downloaded, signals.failure_received, signals.spider_error])