#coding=utf-8 __author__ = 'xiyuanbupt' import argparse from pymongo import MongoClient from statistics.fromLog import getScrapyStatusFromScrapyLog from conf_util import ConfUtil tClient = MongoClient(ConfUtil.getMongoIP(),ConfUtil.getMongoPort()) tDb = tClient[ConfUtil.getStatisticsDBName()] ''' 本脚本为在爬虫爬取相关数据之后通过日志统计相关信息 信息形式如下 {'downloader/request_bytes': 227847, 'downloader/request_count': 427, 'downloader/request_method_count/GET': 427, 'downloader/response_bytes': 799168, 'downloader/response_count': 427, 'downloader/response_status_count/200': 427, 'finish_reason': 'finished', 'finish_time': datetime.datetime(2016, 5, 3, 9, 7, 24, 34782), 'item_scraped_count': 6882, 'log_count/DEBUG': 7310, 'log_count/INFO': 16, 'request_depth_max': 3, 'response_received_count': 427, 'scheduler/dequeued': 427, 'scheduler/dequeued/memory': 427, 'scheduler/enqueued': 427, 'scheduler/enqueued/memory': 427,
#coding=utf-8 __author__ = 'xiyuanbupt' from collections import defaultdict,Counter from pymongo import MongoClient from conf_util import ConfUtil from statistics.fromDB import KaoLaStatistics,XmlyStatistics,QtStatistics from statistics.fromSys import getDirSize client = MongoClient(ConfUtil.getMongoIP(),ConfUtil.getMongoPort()) db = client[ConfUtil.getStatisticsDBName()] class Main: coll = db[ConfUtil.getStatisticCronPerHourCollection()] def __init__(self): self.klSt = KaoLaStatistics() self.qtSt = QtStatistics() self.xmlySt = XmlyStatistics() def runOnce(self): ''' 执行一次统计任务 :return: ''' kl = self.runForKL() qt = self.runForQt() xmly = self.runForXMLY() forInsert = dict(