def getLogger(cfg, forceNew=False): from superbase.globalData import gTop if not gTop.get( GD_LOGGER) or forceNew: # singleton or force a new logger from superbase.globalData import gConfig from superbase.globalData import PROJECT_ROOT from superbase.utility.ioUtil import getPrintDict, mkdir logDir = os.path.join(PROJECT_ROOT, "log") mkdir(logDir) for key, value in cfg.items(): if key in IN_PARAMS_KEY: L1, L2, L3 = IN_PARAMS_KEY[key] BASIC_SETTINGS[L1][L2][L3] = value elif key == CFG_LOG_FILE_NAME: logFileName = os.path.join(logDir, value) dir = os.path.split(logFileName)[0] mkdir(dir) BASIC_SETTINGS["handlers"]["file"]["filename"] = logFileName logging.config.dictConfig(BASIC_SETTINGS) logger = logging.getLogger(SMILE_LOGGER) logger2 = logAdaper(logger) gTop.set(GD_LOGGER, logger2) # logger# hint = "current code root %s\n--config is--\n%s" % ( PROJECT_ROOT, getPrintDict(gConfig.cfg)) if gConfig.get(CFG_JOB_ID, 0) > 0: logger2.info(hint) else: logger2.debug(hint) return gTop.get(GD_LOGGER)
def __init__(self, params=None, subConfigDict=None): """ :param params: 权限最高输入config,可覆盖所有,通常是命令行传入 :param subConfigDict: 权限次高config,可覆盖父类,通常是子类固定设置或者用于 :return: """ newCfg = params or subConfigDict if newCfg: # 统一配置访问入口,会整合global,class,and input,可以用gConfig 统一访问 # parseParams中转 把传递过来的str类型的配置 转换成dict类型的配置 configIn = self.parseParams(params) # input first if subConfigDict: # 如果subConfigDict非空 则把 处理好的配置参数 添加到subConfigDict subConfigDict.update(configIn) # subClass second else: subConfigDict = configIn # 把配置添加到 全局数据单点配置中 gTop.get(GD_CFG_IN).update(configIn) # 把子配置 更新到全局配置 gConfig.update(subConfigDict) # # 工作环境 # upper()返回转换为大写的字符串的副本。 gConfig.set("env", gConfig.get("env").upper()) # make sure capital # 创建日志记录器 createLogger(gConfig)
def changeNode(self): """ :return: """ logInfo("AntiBlock:change node") gTop.get(GD_JOB_ERROR)[CFG_JOB_CHANGENODE] = 1. self.parent.setExit()
def logDB(self, jobName, batch, msg, id): """ 保存断点和增量的数据库的日志 :param jobName: 项目的名字 :param batch: :param msg: db日志信息 :return: """ TABLE = "exceptions" from superbase.globalData import gTop from superbase.utility.timeUtil import getTimestamp # 获取时间节点 curTime = getTimestamp() db = gTop.get(CFG_DB_MONITOR) if id and db and db.conn: if not db.getOne("select eid from exceptions where eid='%s'" % id): params = { 'eid': id, 'jobName': jobName, 'batch': batch, 'info': msg, 'inTime': curTime } db.insert(TABLE, params) return LOG_NOTEXIST else: return LOG_EXIST return 0
def alarmPageError(self, url, content, downInfo): """ 解析元素有错,有可能是blocked 也有可能是页面结构变化,邮件警告,人工检查 :param url: :param content: :param downInfo:downNum,downTime,downInterval etc. :return: """ fname, filePath = AntiBlock.saveWrongPage(content) info = { 'jobName': gConfig.get(CFG_JOB_NAME), 'batch': gConfig.get(CFG_JOB_BATCH), 'url': url, 'filePath': filePath, 'type': self.blocked, 'detail': json.dumps(downInfo), 'inTime': getTimestamp(), } title = "block-%s" % self.blocked content = getPrintDict(info) attach = [(fname, filePath)] emails2 = [gConfig.get(CFG_JOB_EMAIL)] if gConfig.get(CFG_JOB_EMAIL, None) else [] if gConfig.get(CFG_JOB_ENABLE, 0): gTop.get('db').insert("block", info) from jobManager.job import Job Job().sendEmail( title=title, content=content, attach=attach, emails2=emails2 ) else: Mail.sendEmail( title=title, content=content, t_address=emails2, attaches=attach ) logError("blocked?check the content\n%s" % getPrintDict(info))
def testJob(self): """ env=ONLINE,job.id=18230 testJob :return: """ proc = psutil.Process(os.getpid()) info = "\nkill Job %s--%s-%s" % (proc.pid, " ".join( proc.cmdline()), getTimestampBySec(proc.create_time())) logInfo(info) return db = gTop.get(CFG_DB_MONITOR) gConfig.set("debug.sql", 1) db.insert("block", {"jobName": "ttest"}) db.update("block", {"jobName": "ttest2"}, "where id=1") JobUtil().jobStatusInfo({"account": 100, "ip": 300, "num": 3450})
def getMissedBatch(self): db = gTop.get(CFG_DB_MONITOR) batches = [ batch[0] for batch in db.query( "select batch from job where name='tycdetail_fetcher' and status=100" ) ] batches2 = [ batch[0] for batch in db.query( "select batch from batch where batch like 'tycdetail_%' and closed=0" ) ] result = list(set(batches) - set(batches2)) if result: sql = "select id, name,status,batch,beginTime from job where batch in ('%s')" % ( "','".join(result)) logDebug(sql) for id, name, status, batch, beginTime in db.query(sql): if status != 100: db.update("job", {"status": 2}, "where id=%s" % id) for batch2 in result: db.update("batch", {"closed": 0}, "where batch='%s'" % batch2)
def backup(self, beginId, endId=286): """ python spiderx\testx.py env=ONLINE backup 105 199 python spiderx\testx.py env=ONLINE backup 21 99 :param beginId: :param endId: :return: """ dest = "e:/company" gConfig.set(CFG_DB_BUSINESS, "TYC") beginId = int(beginId) endId = int(endId) while beginId <= endId: try: db = gTop.get(CFG_DB_BUSINESS) company = "company_58_%s" % beginId fname = os.path.join(dest, "%s.txt" % company) offset1 = 0 limit1 = 1000 with codecs.open(fname, "w", encoding="utf-8") as f: while True: rows = db.query( "select name,url from %s limit %s offset %s" % (company, limit1, offset1)) if not rows: break offset1 += limit1 for name, url in rows: f.write("%s##%s\n" % (name, url)) except Exception: logException() else: beginId += 1 logInfo("backup-%s" % company)
def testDb(self): gConfig.set("env", "ONLINE") name = gTop.get(CFG_DB_MONITOR).getOne("select name from job limit 1") logInfo("testing-name-%s" % name) from superbase.utility.aliyun import AliYun AliYun().upFile(os.path.join(PROJECT_ROOT, "log/spiderx.log"))