def tostring2(self): if self.values[SpiderReport.URL_UPLOAD] > 0: total = float(self.values[SpiderReport.URL_UPLOAD]) return SpiderReport.REPORT_FORMAT.format( ch=self.channel, query=self.query, type=self.type, v1=Common.float2percent(self.values[SpiderReport.URL_UPLOAD] / total), v2=Common.float2percent(self.values[SpiderReport.URL_DOWNLOAD] / total), v3=Common.float2percent(self.values[SpiderReport.URL_NO_TEMPLATE] / total), v4=Common.float2percent(self.values[SpiderReport.URL_NO_SITE] / total), v5=Common.float2percent(self.values[SpiderReport.URL_WITH_CMT] / total), v6=Common.float2percent( (self.values[SpiderReport.URL_FAILED] + self.values[SpiderReport.URL_UPLOAD] - self.values[ SpiderReport.URL_DOWNLOAD]) / total) ) else: return SpiderReport.REPORT_FORMAT.format( ch=self.channel, query=self.query, type=self.type, v1=Common.float2percent(0.0), v2=Common.float2percent(0.0), v3=Common.float2percent(0.0), v4=Common.float2percent(0.0), v5=Common.float2percent(0.0), v6=Common.float2percent(0.0))
def flush(): # dump s1 download failed url SpiderConfigure.getinstance().setchannel(constant.SPIDER_CHANNEL_S1) SpiderConfigure.getinstance().setquery('') for url in SpiderReport.getinstance().s1urls: Logger.log(url, constant.ERRORCODE_FAIL_LOAD_DOWN) # dump none url got from website for query querynositemap = {} for query in SpiderReport.getinstance().querysitesmap.keys(): querynositemap[query] = 0 for site in SpiderReport.getinstance().querysitesmap[query]: SpiderReport.s2queryurl(query, site, None, True) querynositemap[query] += 1 # for query in SpiderReport.getinstance().querysitesmap.keys(): if query in querynositemap: SpiderReport.s2queryurl(query, SpiderReport.getinstance().s2sitenum, SpiderReport.getinstance().s2sitenum - querynositemap[query], True) else: SpiderReport.s2queryurl(query, SpiderReport.getinstance().s2sitenum, SpiderReport.getinstance().s2sitenum, True) # # report filename = SpiderConfigure.getconfig(const.SPIDER_STORAGE_DOMAIN, const.SPIDER_INFO_REPORT_FILE).format( date=TimeUtility.getcurrentdate()) FileUtility.remove(filename) FileUtility.writeline(filename, SpiderReport.REPORT_FORMAT.format( ch='CHANNEL', query='QUERY', type='TYPE', v1='UPLOAD', v2='DOWNLOAD', v3='NO_TEMPLATE', v4='NO_SITE', v5='WITH_CMT', v6='FAILED' )) for key in SpiderReport.getinstance().reportlist.keys(): for type in SpiderReport.getinstance().reportlist[key].keys(): r = SpiderReport.getinstance().reportlist[key][type] FileUtility.writeline(filename, r.tostring()) for key in SpiderReport.getinstance().s2sitereportlist.keys(): for type in SpiderReport.getinstance().s2sitereportlist[key].keys(): r = SpiderReport.getinstance().s2sitereportlist[key][type] FileUtility.writeline(filename, r.tostring()) FileUtility.writeline(filename, SpiderReport.getinstance().totalreport.tostring()) FileUtility.writeline(filename, SpiderReport.getinstance().totalreport.tostring2()) FileUtility.flush() threshold = float(SpiderConfigure.getconfig(const.SPIDER_EXCEPTION_DOMAIN, const.SPIDER_FAILED_THRESHOLD)) rate = SpiderReport.getinstance().totalreport.getsuccess() if rate < threshold: Logger.getlogging().warning('success rate is lower than threshold') param = NotifyParam() param.code = NotifyParam.SPIDER_NOTIFY_OVER_FAILED param.message = 'success rate {rate} is lower than threshold {th}'.format(rate=Common.float2percent(rate), th=Common.float2percent( threshold)) SpiderNotify.notify(param)