Ejemplo n.º 1
0
#coding=utf-8
__author__ = 'xiyuanbupt'
import argparse
from pymongo import MongoClient

from statistics.fromLog import getScrapyStatusFromScrapyLog
from conf_util import ConfUtil

tClient = MongoClient(ConfUtil.getMongoIP(),ConfUtil.getMongoPort())
tDb = tClient[ConfUtil.getStatisticsDBName()]

'''
本脚本为在爬虫爬取相关数据之后通过日志统计相关信息
信息形式如下
    {'downloader/request_bytes': 227847,
     'downloader/request_count': 427,
     'downloader/request_method_count/GET': 427,
     'downloader/response_bytes': 799168,
     'downloader/response_count': 427,
     'downloader/response_status_count/200': 427,
     'finish_reason': 'finished',
     'finish_time': datetime.datetime(2016, 5, 3, 9, 7, 24, 34782),
     'item_scraped_count': 6882,
     'log_count/DEBUG': 7310,
     'log_count/INFO': 16,
     'request_depth_max': 3,
     'response_received_count': 427,
     'scheduler/dequeued': 427,
     'scheduler/dequeued/memory': 427,
     'scheduler/enqueued': 427,
     'scheduler/enqueued/memory': 427,
Ejemplo n.º 2
0
#coding=utf-8
__author__ = 'xiyuanbupt'

from collections import defaultdict,Counter

from pymongo import MongoClient

from conf_util import ConfUtil
from statistics.fromDB import KaoLaStatistics,XmlyStatistics,QtStatistics
from statistics.fromSys import getDirSize

client = MongoClient(ConfUtil.getMongoIP(),ConfUtil.getMongoPort())
db = client[ConfUtil.getStatisticsDBName()]

class Main:
    coll = db[ConfUtil.getStatisticCronPerHourCollection()]

    def __init__(self):
        self.klSt = KaoLaStatistics()
        self.qtSt = QtStatistics()
        self.xmlySt = XmlyStatistics()

    def runOnce(self):
        '''
        执行一次统计任务
        :return:
        '''
        kl = self.runForKL()
        qt = self.runForQt()
        xmly = self.runForXMLY()
        forInsert = dict(