Esempio n. 1
0
 def _itemproc_finished(self, output: Any, item: Any, response: Response, spider: Spider) -> None:
     """ItemProcessor finished for the given ``item`` and returned ``output``
     """
     assert self.slot is not None  # typing
     self.slot.itemproc_size -= 1
     if isinstance(output, Failure):
         ex = output.value
         if isinstance(ex, DropItem):
             logkws = self.logformatter.dropped(item, ex, response, spider)
             if logkws is not None:
                 logger.log(*logformatter_adapter(logkws), extra={'spider': spider})
             return self.signals.send_catch_log_deferred(
                 signal=signals.item_dropped, item=item, response=response,
                 spider=spider, exception=output.value)
         else:
             logkws = self.logformatter.item_error(item, ex, response, spider)
             logger.log(*logformatter_adapter(logkws), extra={'spider': spider},
                        exc_info=failure_to_exc_info(output))
             return self.signals.send_catch_log_deferred(
                 signal=signals.item_error, item=item, response=response,
                 spider=spider, failure=output)
     else:
         logkws = self.logformatter.scraped(output, response, spider)
         if logkws is not None:
             logger.log(*logformatter_adapter(logkws), extra={'spider': spider})
         return self.signals.send_catch_log_deferred(
             signal=signals.item_scraped, item=output, response=response,
             spider=spider)
Esempio n. 2
0
 def _itemproc_finished(self, output, item, response, spider):
     """ItemProcessor finished for the given ``item`` and returned ``output``
     """
     self.slot.itemproc_size -= 1
     if isinstance(output, Failure):
         ex = output.value
         if isinstance(ex, DropItem):
             logkws = self.logformatter.dropped(item, ex, response, spider)
             logger.log(*logformatter_adapter(logkws),
                        extra={'spider': spider})
             return self.signals.send_catch_log_deferred(
                 signal=signals.item_dropped,
                 item=item,
                 response=response,
                 spider=spider,
                 exception=output.value)
         else:
             logger.error('Error processing %(item)s', {'item': item},
                          exc_info=failure_to_exc_info(output),
                          extra={'spider': spider})
             return self.signals.send_catch_log_deferred(
                 signal=signals.item_error,
                 item=item,
                 response=response,
                 spider=spider,
                 failure=output)
     else:
         logkws = self.logformatter.scraped(output, response, spider)
         logger.log(*logformatter_adapter(logkws), extra={'spider': spider})
         return self.signals.send_catch_log_deferred(
             signal=signals.item_scraped,
             item=output,
             response=response,
             spider=spider)
Esempio n. 3
0
    def _log_download_errors(self, spider_failure: Failure, download_failure: Failure, request: Request,
                             spider: Spider) -> Union[Failure, None]:
        """Log and silence errors that come from the engine (typically download
        errors that got propagated thru here).

        spider_failure: the value passed into the errback of self.call_spider()
        download_failure: the value passed into _scrape2() from
        ExecutionEngine._handle_downloader_output() as "result"
        """
        if not download_failure.check(IgnoreRequest):
            if download_failure.frames:
                logkws = self.logformatter.download_error(download_failure, request, spider)
                logger.log(
                    *logformatter_adapter(logkws),
                    extra={'spider': spider},
                    exc_info=failure_to_exc_info(download_failure),
                )
            else:
                errmsg = download_failure.getErrorMessage()
                if errmsg:
                    logkws = self.logformatter.download_error(
                        download_failure, request, spider, errmsg)
                    logger.log(
                        *logformatter_adapter(logkws),
                        extra={'spider': spider},
                    )

        if spider_failure is not download_failure:
            return spider_failure
        return None
Esempio n. 4
0
    def _log_download_errors(self, spider_failure, download_failure, request,
                             spider):
        """Log and silence errors that come from the engine (typically download
        errors that got propagated thru here)
        """
        if isinstance(download_failure,
                      Failure) and not download_failure.check(IgnoreRequest):
            if download_failure.frames:
                logkws = self.logformatter.download_error(
                    download_failure, request, spider)
                logger.log(
                    *logformatter_adapter(logkws),
                    extra={'spider': spider},
                    exc_info=failure_to_exc_info(download_failure),
                )
            else:
                errmsg = download_failure.getErrorMessage()
                if errmsg:
                    logkws = self.logformatter.download_error(
                        download_failure, request, spider, errmsg)
                    logger.log(
                        *logformatter_adapter(logkws),
                        extra={'spider': spider},
                    )

        if spider_failure is not download_failure:
            return spider_failure
Esempio n. 5
0
    async def _handle_downloader_output(self, result, request, spider):

        if not isinstance(result,
                          (Request, Response, Exception, BaseException)):
            raise TypeError(
                "Incorrect type: expected Request, Response or Failure, got %s: %r"
                % (type(result), result))

        if isinstance(result, Request):
            await self.crawl(result, spider)
            return

        if isinstance(result, Response):
            result.request = request
            logkws = self.logformatter.crawled(request, result, spider)
            if logkws is not None:
                logger.log(*logformatter_adapter(logkws),
                           extra={'spider': spider})
            self.signals.send_catch_log(signals.response_received,
                                        response=result,
                                        request=request,
                                        spider=spider)

        await self.scraper.enqueue_scrape(result, request, spider)

        self.slot.remove_request(request)
        asyncio.create_task(self._next_request(self.spider))
Esempio n. 6
0
File: engine.py Progetto: 01-/scrapy
 def _on_success(response):
     assert isinstance(response, (Response, Request))
     if isinstance(response, Response):
         response.request = request # tie request to response received
         logkws = self.logformatter.crawled(request, response, spider)
         logger.log(*logformatter_adapter(logkws), extra={'spider': spider})
         self.signals.send_catch_log(signal=signals.response_received, \
             response=response, request=request, spider=spider)
     return response
Esempio n. 7
0
 def _on_success(response):
     assert isinstance(response, (Response, Request))
     if isinstance(response, Response):
         response.request = request # tie request to response received
         logkws = self.logformatter.crawled(request, response, spider)
         logger.log(*logformatter_adapter(logkws), extra={'spider': spider})
         self.signals.send_catch_log(signal=signals.response_received, \
             response=response, request=request, spider=spider)
     return response
Esempio n. 8
0
 def _itemproc_finished(self, output, item, response, spider):
     """ItemProcessor finished for the given ``item`` and returned ``output``
     """
     self.slot.itemproc_size -= 1
     if isinstance(output, Failure):
         ex = output.value
         if isinstance(ex, DropItem):
             logkws = self.logformatter.dropped(item, ex, response, spider)
             logger.log(*logformatter_adapter(logkws), extra={'spider': spider})
             return self.signals.send_catch_log_deferred(
                 signal=signals.item_dropped, item=item, response=response,
                 spider=spider, exception=output.value)
         else:
             logger.error('Error processing %(item)s', {'item': item},
                          extra={'spider': spider, 'failure': output})
     else:
         logkws = self.logformatter.scraped(output, response, spider)
         logger.log(*logformatter_adapter(logkws), extra={'spider': spider})
         return self.signals.send_catch_log_deferred(
             signal=signals.item_scraped, item=output, response=response,
             spider=spider)
Esempio n. 9
0
 async def _itemproc_finished(self, output, item, response, spider):
     """ItemProcessor finished for the given ``item`` and returned ``output``
     """
     self.slot.itemproc_size -= 1
     if isinstance(output, (Exception, BaseException)):
         if isinstance(output, DropItem):
             logkws = self.logformatter.dropped(item, output, response,
                                                spider)
             if logkws is not None:
                 logger.log(*logformatter_adapter(logkws),
                            extra={'spider': spider})
             return await self.signals.send_catch_log_deferred(
                 signal=signals.item_dropped,
                 item=item,
                 response=response,
                 spider=spider,
                 exception=output)
         else:
             logkws = self.logformatter.item_error(item, output, response,
                                                   spider)
             logger.log(*logformatter_adapter(logkws),
                        extra={'spider': spider},
                        exc_info=output)
             return await self.signals.send_catch_log_deferred(
                 signal=signals.item_error,
                 item=item,
                 response=response,
                 spider=spider,
                 failure=output)
     else:
         logkws = self.logformatter.scraped(output, response, spider)
         if logkws is not None:
             logger.log(*logformatter_adapter(logkws),
                        extra={'spider': spider})
         return await self.signals.send_catch_log_deferred(
             signal=signals.item_scraped,
             item=output,
             response=response,
             spider=spider)
Esempio n. 10
0
 def _on_success(response):
     if not isinstance(response, (Response, Request)):
         raise TypeError(
             "Incorrect type: expected Response or Request, got %s: %r"
             % (type(response), response)
         )
     if isinstance(response, Response):
         response.request = request  # tie request to response received
         logkws = self.logformatter.crawled(request, response, spider)
         if logkws is not None:
             logger.log(*logformatter_adapter(logkws), extra={'spider': spider})
         self.signals.send_catch_log(signal=signals.response_received,
             response=response, request=request, spider=spider)
     return response
Esempio n. 11
0
 async def handle_spider_error(self, exc, request, response, spider):
     if isinstance(exc, CloseSpider):
         await self.crawler.engine.close_spider(spider, exc.reason
                                                or 'cancelled')
         return
     logkws = self.logformatter.spider_error(exc, request, response, spider)
     logger.log(*logformatter_adapter(logkws),
                exc_info=exc,
                extra={'spider': spider})
     self.signals.send_catch_log(signal=signals.spider_error,
                                 failure=exc,
                                 response=response,
                                 spider=spider)
     self.crawler.stats.inc_value("spider_exceptions/%s" %
                                  exc.__class__.__name__,
                                  spider=spider)
Esempio n. 12
0
 def _on_success(result: Union[Response, Request]) -> Union[Response, Request]:
     if not isinstance(result, (Response, Request)):
         raise TypeError(f"Incorrect type: expected Response or Request, got {type(result)}: {result!r}")
     if isinstance(result, Response):
         if result.request is None:
             result.request = request
         logkws = self.logformatter.crawled(result.request, result, spider)
         if logkws is not None:
             logger.log(*logformatter_adapter(logkws), extra={"spider": spider})
         self.signals.send_catch_log(
             signal=signals.response_received,
             response=result,
             request=result.request,
             spider=spider,
         )
     return result
Esempio n. 13
0
    def _log_download_errors(self, spider_exception, download_exception,
                             request, spider):
        """Log and silence errors that come from the engine (typically download
        errors that got propagated thru here)
        """
        if isinstance(download_exception, (Exception, BaseException)) \
                and not isinstance(download_exception, IgnoreRequest):
            logkws = self.logformatter.download_error(download_exception,
                                                      request, spider)
            logger.log(
                *logformatter_adapter(logkws),
                extra={'spider': spider},
                exc_info=download_exception,
            )

        if spider_exception is not download_exception:
            return spider_exception
Esempio n. 14
0
 def handle_spider_error(self, _failure, request, response, spider):
     exc = _failure.value
     if isinstance(exc, CloseSpider):
         self.crawler.engine.close_spider(spider, exc.reason or 'cancelled')
         return
     logkws = self.logformatter.spider_error(_failure, request, response,
                                             spider)
     logger.log(*logformatter_adapter(logkws),
                exc_info=failure_to_exc_info(_failure),
                extra={'spider': spider})
     self.signals.send_catch_log(signal=signals.spider_error,
                                 failure=_failure,
                                 response=response,
                                 spider=spider)
     self.crawler.stats.inc_value(
         f"spider_exceptions/{_failure.value.__class__.__name__}",
         spider=spider)
Esempio n. 15
0
"""