def flush():
        # dump s1 download failed url
        SpiderConfigure.getinstance().setchannel(constant.SPIDER_CHANNEL_S1)
        SpiderConfigure.getinstance().setquery('')
        for url in SpiderReport.getinstance().s1urls:
            Logger.log(url, constant.ERRORCODE_FAIL_LOAD_DOWN)
        # dump none url got from website for query
        querynositemap = {}
        for query in SpiderReport.getinstance().querysitesmap.keys():
            querynositemap[query] = 0
            for site in SpiderReport.getinstance().querysitesmap[query]:
                SpiderReport.s2queryurl(query, site, None, True)
                querynositemap[query] += 1
#
        for query in SpiderReport.getinstance().querysitesmap.keys():
            if query in querynositemap:
                SpiderReport.s2queryurl(query, SpiderReport.getinstance().s2sitenum,
                                        SpiderReport.getinstance().s2sitenum - querynositemap[query], True)
            else:
                SpiderReport.s2queryurl(query, SpiderReport.getinstance().s2sitenum,
                                        SpiderReport.getinstance().s2sitenum, True)
#
        # report
        filename = SpiderConfigure.getconfig(const.SPIDER_STORAGE_DOMAIN,
                                             const.SPIDER_INFO_REPORT_FILE).format(
            date=TimeUtility.getcurrentdate())
        FileUtility.remove(filename)
        FileUtility.writeline(filename, SpiderReport.REPORT_FORMAT.format(
            ch='CHANNEL',
            query='QUERY',
            type='TYPE',
            v1='UPLOAD',
            v2='DOWNLOAD',
            v3='NO_TEMPLATE',
            v4='NO_SITE',
            v5='WITH_CMT',
            v6='FAILED'
        ))
        for key in SpiderReport.getinstance().reportlist.keys():
            for type in SpiderReport.getinstance().reportlist[key].keys():
                r = SpiderReport.getinstance().reportlist[key][type]
                FileUtility.writeline(filename, r.tostring())
        for key in SpiderReport.getinstance().s2sitereportlist.keys():
            for type in SpiderReport.getinstance().s2sitereportlist[key].keys():
                r = SpiderReport.getinstance().s2sitereportlist[key][type]
                FileUtility.writeline(filename, r.tostring())
        FileUtility.writeline(filename, SpiderReport.getinstance().totalreport.tostring())
        FileUtility.writeline(filename, SpiderReport.getinstance().totalreport.tostring2())
        FileUtility.flush()
        threshold = float(SpiderConfigure.getconfig(const.SPIDER_EXCEPTION_DOMAIN,
                                                    const.SPIDER_FAILED_THRESHOLD))
        rate = SpiderReport.getinstance().totalreport.getsuccess()
        if rate < threshold:
            Logger.getlogging().warning('success rate is lower than threshold')
            param = NotifyParam()
            param.code = NotifyParam.SPIDER_NOTIFY_OVER_FAILED
            param.message = 'success rate {rate} is lower than threshold {th}'.format(rate=Common.float2percent(rate),
                                                                                      th=Common.float2percent(
                                                                                          threshold))
            SpiderNotify.notify(param)
 def writetofile(filename, cond={}):
     Logger.getlogging().debug(
         'Now {t}, Starting Output Comments To {f}'.format(t=int(
             time.time()),
                                                           f=filename))
     for doc in SQLDAO.getinstance().find(SQLDAO.SPIDER_TABLE_COMMENTS,
                                          cond):
         url = doc[SQLDAO.SPIDER_TABLE_COMMENTS_URL]
         fstring = CMTStorage.COMMENTS_FORMAT.format(
             channel=ChannelDao.getchannel(url),
             content=doc[SQLDAO.SPIDER_TABLE_COMMENTS_CONTENT],
             cmtnum=CMTStorage.getcount(url),
             publishdate=doc[SQLDAO.SPIDER_TABLE_COMMENTS_PUBLISH_DATE],
             user=doc[SQLDAO.SPIDER_TABLE_COMMENTS_USER],
             url=doc[SQLDAO.SPIDER_TABLE_COMMENTS_URL],
             title=NewsStorage.gettitle(url))
         FileUtility.writeline(filename, fstring.encode(CHARSET_UTF8))
     FileUtility.flush()
     Logger.getlogging().debug(
         '{t} Comments Finish'.format(t=int(time.time())))
Example #3
0
 def upload(self):
     FileUtility.flush()
     upfiles = FileUtility.getfilelist(
         Storage.getstoragelocation(const.SPIDER_URLS_TEMP_PATH), [])
     return self.downloader.upload(upfiles)