def deleteFailtJobId(self): try: for i in range(self.errorQueue.qsize()): ### 获取已经执行任务的队列 ### jobId, crontabTime = self.errorQueue.get() if jobId in bigdog.JOBRELATIONDICT.keys( ): ### 获取向下依赖 jobid ### deleteList = bigdog.JOBRELATIONDICT[jobId] ### 向下依赖列表 ### for key in deleteList: self.errorQueue.put((key, crontabTime)) if key in self.jobRelationDict.keys(): ### 依赖 列表 ### if key in self.jobRelationDict.keys(): self.jobRelationDict.pop(key) else: pass Producer.setJobLogStatus(self, key, crontabTime, '3', '-99999') else: pass else: print('fffxxxx------->' + str(jobId)) if self.errorQueue.qsize() != 0: Producer.deleteFailtJobId(self) else: pass except: errors = traceback.format_exc() print('Producer[deleteFailtJobId] ->' + errors) baseModel.setWriteContentList(metaInfo.getvLogFilePath(), errors, 'a') return "-99999"
def deleteJobId(self): try: for i in range(self.successQueue.qsize()): ### 获取已经执行任务的队列 ### jobId = self.successQueue.get() ### 获取已经任务Id jobId ### self.taskStatusDist[jobId] = 2 ### jobId 状态成功 ### if jobId in bigdog.JOBRELATIONDICT.keys( ): ### 判断该JobId是否在 被别人依赖 ### deleteList = bigdog.JOBRELATIONDICT[ jobId] ### 获取被别人依赖的列表 ### for i in deleteList: if i in self.jobRelationDict.keys(): ### key 是否存在 ### tmpList = self.jobRelationDict[i] ### 任务 依赖列表 ### tmpList.remove(jobId) ### 从依赖列表中删除 已执行任务Id ### self.jobRelationDict[i] = tmpList ### 属性兑换 ### else: pass else: pass ### 无底下任务 ### except: errors = traceback.format_exc() print('Producer[deleteJobId] ->' + errors) baseModel.setWriteContentList(metaInfo.getvLogFilePath(), errors, 'a') return "-99999"
def compareTimes(self, executeTime, ruleName, executeDay): try: hour, minute, second = executeTime.split(':') realhour, realminute, realsecond = time.strftime( '%H:%M:%S', time.localtime()).split(':') timeRuleStatus = "0" if ruleName in ('DAY', 'WEEK', 'MONTH', 'ODDDAY', 'EVENDAY', 'LASTDAY', 'FIRSTDAY', 'WORKDAY'): timeRuleStatus = Producer.compareTimeRule( self, executeDay, ruleName) if ruleName == 'HOUR': hour = "00" realhour = "00" seconds = int(hour) * 3600 + int(minute) * 60 + int( second) ## 配置时间 ## realseconds = int(realhour) * 3600 + int(realminute) * 60 + int( realsecond) ## 当前时间 ## if seconds <= realseconds and timeRuleStatus == "0": return "0" elif seconds <= realseconds and timeRuleStatus == "-1": ## 本天不需要跑 ## return "-1" else: return "-99999" except: errors = traceback.format_exc() print('Producer[compareTimes] ->' + errors) baseModel.setWriteContentList(metaInfo.getvLogFilePath(), errors, 'a') return "-99999"
def sendMail(subject, sendMail, textContent=None, htmlContent=None, attFilePath=None): try: msg = MIMEMultipart('alternative') msg['Subject'] = subject if textContent is not None: text = textContent part1 = MIMEText(text, 'plain') msg.attach(part1) if htmlContent is not None: html = htmlContent part2 = MIMEText(html, 'html') msg.attach(part2) msg['From'] = formataddr( [metaInfo.getmailUserHead(), metaInfo.getmailUser()]) if attFilePath is not None: att = getAttFileContent(attFilePath) msg.attach(att) smtp = smtplib.SMTP() smtp.connect(metaInfo.getmailHost(), metaInfo.getmailPort()) smtp.login(metaInfo.getmailUser(), metaInfo.getmailUserPassWord()) smtp.sendmail(metaInfo.getmailUser(), sendMail.split(','), msg.as_string()) smtp.quit() return "0" except: errors = traceback.format_exc() print('baseMail[sendMail] ->' + errors) baseModel.setWriteContentList(metaInfo.getvLogFilePath(), errors, 'a') return "-99999"
def deleteJobId(self, jobId): ### 将已经执行 ok任务放到 删除队列中 ### try: ### JOBINFODICT STARTJOBRELATIONDICT JOBRELATIONDICT self.successQueue.put(jobId) except: errors = traceback.format_exc() print('Consumer[deleteJobId] ->' + errors) baseModel.setWriteContentList(metaInfo.getvLogFilePath(), errors, 'a') return "-99999"
def deleteFailtJobId(self, jobId, crontabTime): ### 将已经执行 fail任务 放到 异常队列中 ### try: self.errorQueue.put((jobId, crontabTime)) except: errors = traceback.format_exc() print('Consumer[deleteFailtJobId] ->' + errors) baseModel.setWriteContentList(metaInfo.getvLogFilePath(), errors, 'a') return "-99999"
def setJobLogStatus(self, key, crontabTime, jobStatus, jobErrorStatus): try: jobLog.deleteJobLog(key, 0) jobLog.insertJobLog(key, 0, crontabTime, '-') jobLog.updateJobLog(key, 0, jobStatus, jobErrorStatus) except: errors = traceback.format_exc() print('Producer[setJobLogStatus] ->' + errors) baseModel.setWriteContentList(metaInfo.getvLogFilePath(), errors, 'a') return "-99999"
def getAttFileContent(attFilePath): try: att = MIMEText(open(attFilePath, 'rb').read(), 'base64', 'utf-8') att["Content-Type"] = 'application/octet-stream' att["Content-Disposition"] = 'attachment; filename="' + attFilePath.split( '/')[-1] + '"' return att except: errors = traceback.format_exc() print('baseMail[getAttFileContent] ->' + errors) baseModel.setWriteContentList(metaInfo.getvLogFilePath(), errors, 'a') return "-99999"
def run(): try: #print(bigdog.STARTJOBRELATIONDICT) if len(bigdog.STARTJOBRELATIONDICT) > 0: manager = multiprocessing.Manager() tmpStartJobRelation = manager.dict(bigdog.STARTJOBRELATIONDICT) jobLog.deleteGroupLog() jobLog.insertGroupLog() producer = Producer.Producer(tmpStartJobRelation, workQueue, metaInfo.getvGroupId(), metaInfo.getvDate(), metaInfo.getvSnapshot(), successQueue, errorQueue, {}, producerOver) ### 生产者 ### consumer = Consumer.Consumer(tmpStartJobRelation, workQueue, metaInfo.getvGroupId(), metaInfo.getvDate(), metaInfo.getvSnapshot(), successQueue, errorQueue, int(bigdog.GROUPINFO[1]), producerOver) ### 消费者 ### producer.start() consumer.start() consumer.join() producer.join() #jobLog.updateGroupLog(0) ### 运行统计信息 ## groupRunInfo = jobLog.getGroupRunInfo() #print(tmpStatus) if isinstance(groupRunInfo, tuple): confPv, relaPv, succPv, errPv = groupRunInfo if errPv == 0: jobLog.updateGroupLog(2, confPv, relaPv, succPv, errPv) else: jobLog.updateGroupLog(3, confPv, relaPv, succPv, errPv) msgContents = 'bigdog[' + metaInfo.getvGroupId( ) + ':%s:%s]{confPv:%d,relaPv:%d,succPv:%d,errPv:%d}' msgContents = msgContents % (metaInfo.getvDate(), metaInfo.getvSnapshot(), confPv, relaPv, succPv, errPv) print(msgContents) baseModel.sendMsgInfo( 'bigdog group[' + metaInfo.getvGroupId() + '] run status', bigdog.GROUPINFO[-1], msgContents) else: pass else: print('配置信息有问题') except: errors = traceback.format_exc() print('mainInfo[run] ->' + errors) baseModel.setWriteContentList(metaInfo.getvLogFilePath(), errors, 'a') return "-99999"
def compareMonth(self, executeDay): try: if str(int( self.vDate[6:8])) in executeDay.split(','): ### 需要执行 ### return "0" else: ### 不需要执行 ### return "-1" except: errors = traceback.format_exc() print('Producer[compareMonth] ->' + errors) baseModel.setWriteContentList(metaInfo.getvLogFilePath(), errors, 'a') return "-99999"
def compareWeek(self, executeDay): try: weekDay = baseModel.getWeekDay(self.vDate, '%Y%m%d') ### 获取周几 ### if weekDay in executeDay.split(','): ### 需要执行 ### return "0" else: ### 不需要执行 ### return "-1" except: errors = traceback.format_exc() print('Producer[compareWeek] ->' + errors) baseModel.setWriteContentList(metaInfo.getvLogFilePath(), errors, 'a') return "-99999"
def runJob(self, jobId): try: #### statusId 1 开始 2 正常 3 错误 4 未执行 jobName, jobPath, executeTime, executeDay, retryCount, ruleName, mailList, statusId = bigdog.JOBINFODICT[ jobId] if mailList is None: mailList = bigdog.GROUPINFO[-1] crontabTime = baseModel.getTimeFormat(time.time(), forMat='%Y%m%d%H%M%S') alarmIndex = 0 if retryCount is None: retryCount = bigdog.GROUPINFO[2] for i in range(retryCount): ### 异常后 重复多少次 ### ### 执行程序 ### tmpLogName = jobPath.split( '/')[-1][0:-2] + 'log.' + self.vSnapshot tmpLogPathName = metaInfo.LOGDIRPATH + '/' + metaInfo.getvDate( ) + '/' + metaInfo.getvGroupId( ) + '/' + tmpLogName + '.' + str(i) baseModel.rmFile(tmpLogPathName) jobLog.deleteJobLog(jobId, i) jobLog.insertJobLog(jobId, i, crontabTime, tmpLogPathName) baseCmd = 'bash ' + jobPath + ' ' + self.vDate + ' ' + mailList + ' ' + self.vSnapshot + ' ' + tmpLogPathName + ' >>' + tmpLogPathName + ' 1>>' + tmpLogPathName + ' 2>>' + tmpLogPathName print(baseCmd) status = os.system(baseCmd) if status == 0: ### 调整日志 ### jobLog.updateJobLog(jobId, i, 2, status) ### 删除 执行成功 jobid ### Consumer.deleteJobId(self, jobId) break else: jobLog.updateJobLog(jobId, i, 3, status) alarmIndex = i ### 是否Hive hive 日志解析### if alarmIndex == retryCount - 1: ### 重复次数都错误 告警### msgContent = 'bigdog[' + self.vDate + ':' + self.vSnapshot + ']{' + bigdog.GROUPINFO[ 0] + '---' + jobPath + ' is execution error}-->' + tmpLogPathName baseModel.sendMsgInfo(metaInfo.taskSubject, mailList, msgContent) ### 依赖相关程序 下线 ### Consumer.deleteFailtJobId(self, jobId, crontabTime) else: pass #print('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx------->' + str(jobId)) except: errors = traceback.format_exc() print('Consumer[runJob] ->' + errors) baseModel.setWriteContentList(metaInfo.getvLogFilePath(), errors, 'a') return "-99999"
def run(self): try: while True: Producer.deleteJobId(self) ### 删除已经执行ok队列信息 ### Producer.deleteFailtJobId(self) ### 删除已经执行error队列信息 ### if len(dict( self.jobRelationDict)) == 0: ### 字典为空 说明该批次任务结束 ### if self.producerOver.qsize() > 0: return "0" else: pass for key, value in ( self.jobRelationDict).items(): ### 判断任务能够执行 ### if len(value) == 0: ### 能够执行任务 ### print(key) #### statusId 1 开始 2 正常 3 错误 4 未执行 jobName, jobPath, executeTime, executeDay, retryCount, ruleName, mailList, statusId = bigdog.JOBINFODICT[ key] status = Producer.compareTimes(self, executeTime, ruleName, executeDay) if status == "0": if statusId in (1, 3, 4): ### 该任务可以执行 ### self.queue.put(key) ### 插入队列中 ### self.jobRelationDict.pop( key) ### 删除 字典 key ### else: ### 该任务在该批次已经执行过并且正常 ### self.jobRelationDict.pop( key) ### 删除 字典 key ### self.successQueue.put(key) ### 放入成功队列中 ### elif status == "-1": ### 本天不跑 状态成功 未执行[-99999] ### Producer.setJobLogStatus( self, key, baseModel.getTimeFormat(time.time(), forMat='%Y%m%d%H%M%S'), '2', '-99999') self.jobRelationDict.pop(key) ### 删除 字典 key ### self.successQueue.put(key) ### 放入成功队列中 ### else: pass else: pass time.sleep(5) ### 每5秒扫描 任务是否执行完成 ### print('Producer----> runing|' + str(len(dict(self.jobRelationDict)))) print('Producer----> over') except: errors = traceback.format_exc() print('Producer[run] ->' + errors) baseModel.setWriteContentList(metaInfo.getvLogFilePath(), errors, 'a') return "-99999"
def getGroupRunInfo(): try: dbType, dbUserName, dbUserPassWord, dbName, dbPort, dbHost, charset = dbConfig.DBINFO[ 0:] sqlName = dbConfig.LOGSQLDICT['groupRunInfo'].format( metaInfo.getvSnapshot(), metaInfo.getvGroupId(), metaInfo.getvDate()) result = dbConnet.select(dbType, dbUserName, dbUserPassWord, dbName, dbPort, dbHost, charset, sqlName) return result[0] except: errors = traceback.format_exc() print('jobLog[updateGroupLog] ->' + errors) baseModel.setWriteContentList(metaInfo.getvLogFilePath(), errors, 'a') return "-99999"
def insertGroupLog(): try: dbType, dbUserName, dbUserPassWord, dbName, dbPort, dbHost, charset = dbConfig.DBINFO[ 0:] sqlName = dbConfig.LOGSQLDICT['INSERTGROUPLOG'].format( metaInfo.getvGroupId(), metaInfo.getvDate(), metaInfo.getvSnapshot()) result = dbConnet.insertDeteleUpdate(dbType, dbUserName, dbUserPassWord, dbName, dbPort, dbHost, charset, sqlName) if result == "0": return "0" else: return "-99999" except: errors = traceback.format_exc() print('jobLog[insertGroupLog] ->' + errors) baseModel.setWriteContentList(metaInfo.getvLogFilePath(), errors, 'a') return "-99999"
def run(self): try: while True: print('C---->' + str(self.jobRelationDict)) time.sleep(5) ### 每5秒扫描 查看队列是否完成 ### threadNums = threading.activeCount() workQueueSize = self.queue.qsize() jobSize = len(self.jobRelationDict) if jobSize == 0 and workQueueSize == 0: tmpi = 0 for tmpthreadname in threading.enumerate(): ### 获取所有队列 ### if (tmpthreadname.name )[:14] == 'bigDogJobList-': ### 判断是否存在 运行线程 ### tmpi += 1 if tmpi == 0: self.producerOver.put('ok') return "0" else: pass else: if threadNums >= self.parallelNums + 2: ### 线程数 超过阀值 ### pass else: if (self.parallelNums + 2) - threadNums > workQueueSize: executeSize = workQueueSize else: executeSize = self.parallelNums + 2 - threadNums for threadTmp in range(executeSize): jobId = self.queue.get() tmpThread = threading.Thread( target=Consumer.runJob, args=(self, jobId), name='bigDogJobList-' + str(random.randint(1, 1000000000000))) tmpThread.start() print('Consumer----> over') except: errors = traceback.format_exc() print('Consumer[run] ->' + errors) baseModel.setWriteContentList(metaInfo.getvLogFilePath(), errors, 'a') return "-99999"
def updateJobLog(jobId, executeStep, vStatus, vErrorCode): try: dbType, dbUserName, dbUserPassWord, dbName, dbPort, dbHost, charset = dbConfig.DBINFO[ 0:] sqlName = dbConfig.LOGSQLDICT['UPDATELOG'].format( vStatus, vErrorCode, metaInfo.getvSnapshot(), metaInfo.getvGroupId(), jobId, metaInfo.getvDate(), executeStep) #print(sqlName) result = dbConnet.insertDeteleUpdate(dbType, dbUserName, dbUserPassWord, dbName, dbPort, dbHost, charset, sqlName) if result == "0": return "0" else: return "-99999" except: errors = traceback.format_exc() #print('jobLog[updateJobLog] ->' + errors) baseModel.setWriteContentList(metaInfo.getvLogFilePath(), errors, 'a') return "-99999"
def compareTimeRule(self, executeDay, ruleName): try: if ruleName == 'DAY': ### 天执行 ### return "0" elif ruleName == 'WEEK': ### 周执行 ### return Producer.compareWeek(self, executeDay) elif ruleName == 'MONTH': ### 月执行 ### return Producer.compareMonth(self, executeDay) elif ruleName == 'ODDDAY': ### 逢单号执行 ### if int(self.vDate[6:8]) % 2 == 1: return "0" else: return "-1" elif ruleName == 'EVENDAY': ### 逢双号执行 ### if int(self.vDate[6:8]) % 2 == 0: return "0" else: return "-1" elif ruleName == 'LASTDAY': ### 月末最后一天执行### if baseModel.getDateForMat(self.vDate, '%Y%m%d', '%d', 'days', 1) == "01": return "0" else: return "-1" elif ruleName == 'FIRSTDAY': ### 月第一号执行 ### if self.vDate[6:8] == "01": return "0" else: return "-1" elif ruleName == 'WORKDAY': ### 排除周六周日 ### if baseModel.getWeekDay(self.vDate, '%Y%m%d') not in ('6', '7'): return "0" else: return "-1" except: errors = traceback.format_exc() print('Producer[compareTimeRule] ->' + errors) baseModel.setWriteContentList(metaInfo.getvLogFilePath(), errors, 'a') return "-99999"