コード例 #1
0
    def spider_closed(self, spider, reason):
        stats = spider.crawler.stats.get_stats()

        # 获取数据
        start_time = stats.get("start_time")
        finish_time = stats.get("finish_time")
        duration = (finish_time - start_time).seconds

        # 保存收集到的信息
        result = ScrapydUtil.parse_log_file(self.log_file)

        item = {
            "job_id": result.get('job_id', ''),
            "project": result.get('project', ''),
            "spider": spider.name,
            "item_scraped_count": stats.get("item_scraped_count", 0),
            "item_dropped_count": stats.get("item_dropped_count", 0),
            "start_time": start_time.strftime(self.DATETIME_FORMAT),
            "finish_time": finish_time.strftime(self.DATETIME_FORMAT),
            "duration": duration,
            "finish_reason": stats.get("finish_reason"),
            "log_error_count": stats.get("log_count/ERROR", 0),
        }

        logger.info(item)

        self.collection_item(item)
コード例 #2
0
 def insert_data(self, table, data):
     """插入数据"""
     try:
         result = table.insert(data)
         logger.info("success: {}".format(result))
     except (pymongo.errors.DuplicateKeyError,
             pymongo.errors.ServerSelectionTimeoutError) as e:
         logger.info("error: {}".format(e))
     finally:
         raise DropItem
コード例 #3
0
    def spider_closed(self, spider, reason):
        stats = spider.crawler.stats.get_stats()

        # 获取数据
        item_scraped_count = stats.get("item_scraped_count", 0)
        item_dropped_count = stats.get("item_dropped_count", 0)

        start_time = stats.get("start_time")
        finish_time = stats.get("finish_time")

        # 打印收集日志
        item_count = item_scraped_count + item_dropped_count
        duration = (finish_time - start_time).seconds

        logger.info("*" * 30)
        logger.info("* {}".format(spider.name))
        logger.info("* item_count : {}".format(item_count))
        logger.info("* duration : {}".format(duration))
        logger.info("*" * 30)
コード例 #4
0
    def process_request(self, request, spider):
        logger.info('*' * 32)
        logger.info(self.__class__.__name__)
        logger.info('*' * 32)

        return HtmlResponse(url=request.url, request=request)
コード例 #5
0
 def collection_item(self, item):
     """处理收集到的数据,以json 形式提交"""
     res = requests.post(self.stats_collection_url, json=item)
     logger.info(res.text)