def _itemproc_finished(self, output: Any, item: Any, response: Response, spider: Spider) -> None: """ItemProcessor finished for the given ``item`` and returned ``output`` """ assert self.slot is not None # typing self.slot.itemproc_size -= 1 if isinstance(output, Failure): ex = output.value if isinstance(ex, DropItem): logkws = self.logformatter.dropped(item, ex, response, spider) if logkws is not None: logger.log(*logformatter_adapter(logkws), extra={'spider': spider}) return self.signals.send_catch_log_deferred( signal=signals.item_dropped, item=item, response=response, spider=spider, exception=output.value) else: logkws = self.logformatter.item_error(item, ex, response, spider) logger.log(*logformatter_adapter(logkws), extra={'spider': spider}, exc_info=failure_to_exc_info(output)) return self.signals.send_catch_log_deferred( signal=signals.item_error, item=item, response=response, spider=spider, failure=output) else: logkws = self.logformatter.scraped(output, response, spider) if logkws is not None: logger.log(*logformatter_adapter(logkws), extra={'spider': spider}) return self.signals.send_catch_log_deferred( signal=signals.item_scraped, item=output, response=response, spider=spider)
def _itemproc_finished(self, output, item, response, spider): """ItemProcessor finished for the given ``item`` and returned ``output`` """ self.slot.itemproc_size -= 1 if isinstance(output, Failure): ex = output.value if isinstance(ex, DropItem): logkws = self.logformatter.dropped(item, ex, response, spider) logger.log(*logformatter_adapter(logkws), extra={'spider': spider}) return self.signals.send_catch_log_deferred( signal=signals.item_dropped, item=item, response=response, spider=spider, exception=output.value) else: logger.error('Error processing %(item)s', {'item': item}, exc_info=failure_to_exc_info(output), extra={'spider': spider}) return self.signals.send_catch_log_deferred( signal=signals.item_error, item=item, response=response, spider=spider, failure=output) else: logkws = self.logformatter.scraped(output, response, spider) logger.log(*logformatter_adapter(logkws), extra={'spider': spider}) return self.signals.send_catch_log_deferred( signal=signals.item_scraped, item=output, response=response, spider=spider)
def _log_download_errors(self, spider_failure: Failure, download_failure: Failure, request: Request, spider: Spider) -> Union[Failure, None]: """Log and silence errors that come from the engine (typically download errors that got propagated thru here). spider_failure: the value passed into the errback of self.call_spider() download_failure: the value passed into _scrape2() from ExecutionEngine._handle_downloader_output() as "result" """ if not download_failure.check(IgnoreRequest): if download_failure.frames: logkws = self.logformatter.download_error(download_failure, request, spider) logger.log( *logformatter_adapter(logkws), extra={'spider': spider}, exc_info=failure_to_exc_info(download_failure), ) else: errmsg = download_failure.getErrorMessage() if errmsg: logkws = self.logformatter.download_error( download_failure, request, spider, errmsg) logger.log( *logformatter_adapter(logkws), extra={'spider': spider}, ) if spider_failure is not download_failure: return spider_failure return None
def _log_download_errors(self, spider_failure, download_failure, request, spider): """Log and silence errors that come from the engine (typically download errors that got propagated thru here) """ if isinstance(download_failure, Failure) and not download_failure.check(IgnoreRequest): if download_failure.frames: logkws = self.logformatter.download_error( download_failure, request, spider) logger.log( *logformatter_adapter(logkws), extra={'spider': spider}, exc_info=failure_to_exc_info(download_failure), ) else: errmsg = download_failure.getErrorMessage() if errmsg: logkws = self.logformatter.download_error( download_failure, request, spider, errmsg) logger.log( *logformatter_adapter(logkws), extra={'spider': spider}, ) if spider_failure is not download_failure: return spider_failure
async def _handle_downloader_output(self, result, request, spider): if not isinstance(result, (Request, Response, Exception, BaseException)): raise TypeError( "Incorrect type: expected Request, Response or Failure, got %s: %r" % (type(result), result)) if isinstance(result, Request): await self.crawl(result, spider) return if isinstance(result, Response): result.request = request logkws = self.logformatter.crawled(request, result, spider) if logkws is not None: logger.log(*logformatter_adapter(logkws), extra={'spider': spider}) self.signals.send_catch_log(signals.response_received, response=result, request=request, spider=spider) await self.scraper.enqueue_scrape(result, request, spider) self.slot.remove_request(request) asyncio.create_task(self._next_request(self.spider))
def _on_success(response): assert isinstance(response, (Response, Request)) if isinstance(response, Response): response.request = request # tie request to response received logkws = self.logformatter.crawled(request, response, spider) logger.log(*logformatter_adapter(logkws), extra={'spider': spider}) self.signals.send_catch_log(signal=signals.response_received, \ response=response, request=request, spider=spider) return response
def _itemproc_finished(self, output, item, response, spider): """ItemProcessor finished for the given ``item`` and returned ``output`` """ self.slot.itemproc_size -= 1 if isinstance(output, Failure): ex = output.value if isinstance(ex, DropItem): logkws = self.logformatter.dropped(item, ex, response, spider) logger.log(*logformatter_adapter(logkws), extra={'spider': spider}) return self.signals.send_catch_log_deferred( signal=signals.item_dropped, item=item, response=response, spider=spider, exception=output.value) else: logger.error('Error processing %(item)s', {'item': item}, extra={'spider': spider, 'failure': output}) else: logkws = self.logformatter.scraped(output, response, spider) logger.log(*logformatter_adapter(logkws), extra={'spider': spider}) return self.signals.send_catch_log_deferred( signal=signals.item_scraped, item=output, response=response, spider=spider)
async def _itemproc_finished(self, output, item, response, spider): """ItemProcessor finished for the given ``item`` and returned ``output`` """ self.slot.itemproc_size -= 1 if isinstance(output, (Exception, BaseException)): if isinstance(output, DropItem): logkws = self.logformatter.dropped(item, output, response, spider) if logkws is not None: logger.log(*logformatter_adapter(logkws), extra={'spider': spider}) return await self.signals.send_catch_log_deferred( signal=signals.item_dropped, item=item, response=response, spider=spider, exception=output) else: logkws = self.logformatter.item_error(item, output, response, spider) logger.log(*logformatter_adapter(logkws), extra={'spider': spider}, exc_info=output) return await self.signals.send_catch_log_deferred( signal=signals.item_error, item=item, response=response, spider=spider, failure=output) else: logkws = self.logformatter.scraped(output, response, spider) if logkws is not None: logger.log(*logformatter_adapter(logkws), extra={'spider': spider}) return await self.signals.send_catch_log_deferred( signal=signals.item_scraped, item=output, response=response, spider=spider)
def _on_success(response): if not isinstance(response, (Response, Request)): raise TypeError( "Incorrect type: expected Response or Request, got %s: %r" % (type(response), response) ) if isinstance(response, Response): response.request = request # tie request to response received logkws = self.logformatter.crawled(request, response, spider) if logkws is not None: logger.log(*logformatter_adapter(logkws), extra={'spider': spider}) self.signals.send_catch_log(signal=signals.response_received, response=response, request=request, spider=spider) return response
async def handle_spider_error(self, exc, request, response, spider): if isinstance(exc, CloseSpider): await self.crawler.engine.close_spider(spider, exc.reason or 'cancelled') return logkws = self.logformatter.spider_error(exc, request, response, spider) logger.log(*logformatter_adapter(logkws), exc_info=exc, extra={'spider': spider}) self.signals.send_catch_log(signal=signals.spider_error, failure=exc, response=response, spider=spider) self.crawler.stats.inc_value("spider_exceptions/%s" % exc.__class__.__name__, spider=spider)
def _on_success(result: Union[Response, Request]) -> Union[Response, Request]: if not isinstance(result, (Response, Request)): raise TypeError(f"Incorrect type: expected Response or Request, got {type(result)}: {result!r}") if isinstance(result, Response): if result.request is None: result.request = request logkws = self.logformatter.crawled(result.request, result, spider) if logkws is not None: logger.log(*logformatter_adapter(logkws), extra={"spider": spider}) self.signals.send_catch_log( signal=signals.response_received, response=result, request=result.request, spider=spider, ) return result
def _log_download_errors(self, spider_exception, download_exception, request, spider): """Log and silence errors that come from the engine (typically download errors that got propagated thru here) """ if isinstance(download_exception, (Exception, BaseException)) \ and not isinstance(download_exception, IgnoreRequest): logkws = self.logformatter.download_error(download_exception, request, spider) logger.log( *logformatter_adapter(logkws), extra={'spider': spider}, exc_info=download_exception, ) if spider_exception is not download_exception: return spider_exception
def handle_spider_error(self, _failure, request, response, spider): exc = _failure.value if isinstance(exc, CloseSpider): self.crawler.engine.close_spider(spider, exc.reason or 'cancelled') return logkws = self.logformatter.spider_error(_failure, request, response, spider) logger.log(*logformatter_adapter(logkws), exc_info=failure_to_exc_info(_failure), extra={'spider': spider}) self.signals.send_catch_log(signal=signals.spider_error, failure=_failure, response=response, spider=spider) self.crawler.stats.inc_value( f"spider_exceptions/{_failure.value.__class__.__name__}", spider=spider)
"""