Beispiel #1
0
    def getLogger(cfg, forceNew=False):

        from superbase.globalData import gTop
        if not gTop.get(
                GD_LOGGER) or forceNew:  # singleton or force a new logger

            from superbase.globalData import gConfig
            from superbase.globalData import PROJECT_ROOT
            from superbase.utility.ioUtil import getPrintDict, mkdir

            logDir = os.path.join(PROJECT_ROOT, "log")
            mkdir(logDir)

            for key, value in cfg.items():
                if key in IN_PARAMS_KEY:
                    L1, L2, L3 = IN_PARAMS_KEY[key]
                    BASIC_SETTINGS[L1][L2][L3] = value
                elif key == CFG_LOG_FILE_NAME:
                    logFileName = os.path.join(logDir, value)
                    dir = os.path.split(logFileName)[0]
                    mkdir(dir)
            BASIC_SETTINGS["handlers"]["file"]["filename"] = logFileName

            logging.config.dictConfig(BASIC_SETTINGS)
            logger = logging.getLogger(SMILE_LOGGER)
            logger2 = logAdaper(logger)
            gTop.set(GD_LOGGER, logger2)  # logger#
            hint = "current code root %s\n--config is--\n%s" % (
                PROJECT_ROOT, getPrintDict(gConfig.cfg))
            if gConfig.get(CFG_JOB_ID, 0) > 0:
                logger2.info(hint)
            else:
                logger2.debug(hint)

        return gTop.get(GD_LOGGER)
Beispiel #2
0
    def __init__(self, params=None, subConfigDict=None):
        """
        :param params: 权限最高输入config,可覆盖所有,通常是命令行传入
        :param subConfigDict: 权限次高config,可覆盖父类,通常是子类固定设置或者用于
        :return:
        """

        newCfg = params or subConfigDict
        if newCfg:
            # 统一配置访问入口,会整合global,class,and input,可以用gConfig 统一访问
            # parseParams中转 把传递过来的str类型的配置 转换成dict类型的配置
            configIn = self.parseParams(params)  # input first
            if subConfigDict:
                # 如果subConfigDict非空 则把 处理好的配置参数 添加到subConfigDict
                subConfigDict.update(configIn)  # subClass second
            else:
                subConfigDict = configIn
            # 把配置添加到 全局数据单点配置中
            gTop.get(GD_CFG_IN).update(configIn)
            # 把子配置 更新到全局配置
            gConfig.update(subConfigDict)  #
            # 工作环境
            # upper()返回转换为大写的字符串的副本。
            gConfig.set("env", gConfig.get("env").upper())  # make sure capital

        # 创建日志记录器
        createLogger(gConfig)
Beispiel #3
0
 def changeNode(self):
     """
     :return:
     """
     logInfo("AntiBlock:change node")
     gTop.get(GD_JOB_ERROR)[CFG_JOB_CHANGENODE] = 1.
     self.parent.setExit()
Beispiel #4
0
    def logDB(self, jobName, batch, msg, id):
        """
        保存断点和增量的数据库的日志
        :param jobName: 项目的名字
        :param batch:
        :param msg: db日志信息
        :return:
        """

        TABLE = "exceptions"
        from superbase.globalData import gTop
        from superbase.utility.timeUtil import getTimestamp
        # 获取时间节点
        curTime = getTimestamp()
        db = gTop.get(CFG_DB_MONITOR)
        if id and db and db.conn:
            if not db.getOne("select eid from exceptions where eid='%s'" % id):
                params = {
                    'eid': id,
                    'jobName': jobName,
                    'batch': batch,
                    'info': msg,
                    'inTime': curTime
                }
                db.insert(TABLE, params)
                return LOG_NOTEXIST
            else:
                return LOG_EXIST
        return 0
Beispiel #5
0
 def alarmPageError(self, url, content, downInfo):
     """
     解析元素有错,有可能是blocked 也有可能是页面结构变化,邮件警告,人工检查
     :param url:
     :param content:
     :param downInfo:downNum,downTime,downInterval etc.
     :return:
     """
     fname, filePath = AntiBlock.saveWrongPage(content)
     info = {
         'jobName': gConfig.get(CFG_JOB_NAME),
         'batch': gConfig.get(CFG_JOB_BATCH),
         'url': url,
         'filePath': filePath,
         'type': self.blocked,
         'detail': json.dumps(downInfo),
         'inTime': getTimestamp(),
     }
     title = "block-%s" % self.blocked
     content = getPrintDict(info)
     attach = [(fname, filePath)]
     emails2 = [gConfig.get(CFG_JOB_EMAIL)] if gConfig.get(CFG_JOB_EMAIL, None) else []
     if gConfig.get(CFG_JOB_ENABLE, 0):
         gTop.get('db').insert("block", info)
         from jobManager.job import Job
         Job().sendEmail(
             title=title,
             content=content,
             attach=attach,
             emails2=emails2
         )
     else:
         Mail.sendEmail(
             title=title,
             content=content,
             t_address=emails2,
             attaches=attach
         )
     logError("blocked?check the content\n%s" % getPrintDict(info))
Beispiel #6
0
    def testJob(self):
        """
        env=ONLINE,job.id=18230 testJob
        :return:
        """
        proc = psutil.Process(os.getpid())
        info = "\nkill Job %s--%s-%s" % (proc.pid, " ".join(
            proc.cmdline()), getTimestampBySec(proc.create_time()))
        logInfo(info)
        return

        db = gTop.get(CFG_DB_MONITOR)
        gConfig.set("debug.sql", 1)
        db.insert("block", {"jobName": "ttest"})
        db.update("block", {"jobName": "ttest2"}, "where id=1")
        JobUtil().jobStatusInfo({"account": 100, "ip": 300, "num": 3450})
Beispiel #7
0
 def getMissedBatch(self):
     db = gTop.get(CFG_DB_MONITOR)
     batches = [
         batch[0] for batch in db.query(
             "select batch from job where name='tycdetail_fetcher' and status=100"
         )
     ]
     batches2 = [
         batch[0] for batch in db.query(
             "select batch from batch where batch like 'tycdetail_%' and closed=0"
         )
     ]
     result = list(set(batches) - set(batches2))
     if result:
         sql = "select id, name,status,batch,beginTime from job where batch in ('%s')" % (
             "','".join(result))
         logDebug(sql)
         for id, name, status, batch, beginTime in db.query(sql):
             if status != 100:
                 db.update("job", {"status": 2}, "where id=%s" % id)
         for batch2 in result:
             db.update("batch", {"closed": 0}, "where batch='%s'" % batch2)
Beispiel #8
0
    def backup(self, beginId, endId=286):
        """
        python spiderx\testx.py env=ONLINE backup 105 199
        python spiderx\testx.py env=ONLINE backup 21 99

        :param beginId:
        :param endId:
        :return:
        """
        dest = "e:/company"
        gConfig.set(CFG_DB_BUSINESS, "TYC")

        beginId = int(beginId)
        endId = int(endId)
        while beginId <= endId:
            try:
                db = gTop.get(CFG_DB_BUSINESS)
                company = "company_58_%s" % beginId

                fname = os.path.join(dest, "%s.txt" % company)
                offset1 = 0
                limit1 = 1000
                with codecs.open(fname, "w", encoding="utf-8") as f:
                    while True:
                        rows = db.query(
                            "select name,url from %s limit %s offset %s" %
                            (company, limit1, offset1))
                        if not rows:
                            break
                        offset1 += limit1
                        for name, url in rows:
                            f.write("%s##%s\n" % (name, url))

            except Exception:
                logException()
            else:
                beginId += 1
                logInfo("backup-%s" % company)
Beispiel #9
0
 def testDb(self):
     gConfig.set("env", "ONLINE")
     name = gTop.get(CFG_DB_MONITOR).getOne("select name from job limit 1")
     logInfo("testing-name-%s" % name)
     from superbase.utility.aliyun import AliYun
     AliYun().upFile(os.path.join(PROJECT_ROOT, "log/spiderx.log"))