def singalCheck(taskResultId): # 根据任务编号查询所有任务是否已经完成 executeCount = TaskInfo.select().where( (TaskInfo.taskResultId == taskResultId) & (TaskInfo.state != 1) & (TaskInfo.state != 6)).count() subTaskResult = TaskInfo.select().where(TaskInfo.taskResultId == taskResultId).count() print 'subTaskResult', subTaskResult print 'executeCount', executeCount if executeCount == subTaskResult and executeCount != 0: #将本次任务检查结果明细生成到result目录 checkTaskResult = TaskResult.getOne(TaskResult.taskResultId == taskResultId) taskId = checkTaskResult.taskId.taskId result_taskresultid = checkTaskResult.taskResultId taskCount = TaskInfo.select().where( (TaskInfo.taskResultId == taskResultId)).count() onceCount = config.getTaskResultCount packCount = int(math.ceil(float(taskCount)/float(onceCount))) #生成检查结果明细文件 print '开始生成检查结果明细文件' print 'packCount:',packCount genTaskResultFile(taskId,result_taskresultid,packCount) print '单次任务检查完毕结束当前任务,开始发送邮件通知' # 说明该任务结果已经发送完毕,从apscheduler任务调度中删除该任务 scheduler.remove_job(str(taskResultId)) bigTaskId = checkTaskResult.taskId.taskId taskResultId = checkTaskResult.taskResultId # 更新上一次子任务的状态 oTime = format(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S') q = TaskResult.update(state="2", overTime=oTime).where( TaskResult.taskResultId == taskResultId) q.execute() #修改任务结果对应的大任务状态为已完成 q = Task.update(state="2").where(Task.taskId==taskId) q.execute() # 任务执行完毕后发送邮件通知 mutil = MailUtil() # 获取当前任务绑定的邮箱账号 toEmail = TaskResult.getOne(TaskResult.taskResultId == taskResultId).taskId.userId.email if toEmail is not None: from_addr = config.SEND_EMAIL password = config.SEND_EMAIL_PASSWORD to_addr = toEmail smtp_server = config.SMTP_SERVER msg = str.format(config.MAIL_NOTICE, bigTaskId, bigTaskId, str(taskResultId)) subject = config.MAIL_SUBJECT mutil.sendMail.delay(from_addr, password, to_addr, smtp_server, msg, subject) else: print "该任务未绑定接收邮箱,任务结果编号:",taskResultId
def checkAllLz(filePath, taskResultId): taskResult = TaskResult.getOne(TaskResult.taskResultId == taskResultId) delayDay = taskResult.taskId.intervalDay #根据当前任务编号目录获取该目录下的所有xml文件 print '目录路径:',filePath fnames=os.listdir(filePath) for name in fnames: xmlName = filePath+'/'+name print 'xmlName:',xmlName data = parseXMLFile(xmlName, 'CheckItem') print 'data:',data # 将需要检查的信息入库 for d in data: cname = d.get('cname') url = d.get('url') if url != '': if url[-1] == '/': url = url.replace('http://', '')[0:-1].replace(' ','') if cname!='': cname = cname.replace(' ','') webArea = d.get('area') webtype = d.get('WebType') # 检查更新company if Company.getOne(Company.coname == cname) is None: c = Company() c.coname = cname c.save(force_insert=True) # 检查更新website if Website.getOne(Website.domain == url) is not None: q = Website.update(domain=url, type=webtype, area=webArea).where(Website.domain == url) q.execute() else: com = Company.getOne(Company.coname == cname) w = Website() w.regID = com w.domain = url w.area = webArea w.type = webtype w.save(force_insert=True) updateWeb = Website.getOne(Website.domain == url) subTask = TaskInfo() subTask.taskResultId = taskResult subTask.webId = updateWeb subTask.state = '1' subTask.save(force_insert=True) taskResultId = str(taskResultId) if delayDay > 0: # 需要周期执行的任务 executeMultiTaskInfo(taskResultId) else: #logger.debug("开始调用单次任务") # 单次执行的任务 executeSingleTaskInfo(taskResultId)
def fetchCycle(subtaskId, taskResultId, delayTag): if delayTag: checkTaskResult = TaskResult.getOne(TaskResult.taskResultId == taskResultId) bigTaskId = checkTaskResult.taskId.taskId # 根据任务结果编号获取本次需要检查的任务记录 subTask = TaskInfo.getOne(TaskInfo.id == subtaskId) if subTask is not None: companyName = subTask.cname url = subTask.url # 抓取检测 fetchWebsite(companyName, bigTaskId, subTask.id, url) else: print 'taskinfo记录为空:', subTask
def intervalDelayTask(taskResultId): # 获取celery中当前已经正在进行的任务数 nowCount = TaskInfo.select().order_by(TaskInfo.id).where((TaskInfo.state == '6')) if nowCount >= 0: # 当celery中正在做的任务数量少于指定的数量时,向celery添加需要执行的任务 if nowCount <= config.celeryMaxCount: taskCount = TaskInfo.select().where( (TaskInfo.state == '1') & (TaskInfo.taskResultId == taskResultId)).count() print 'taskCount:', taskCount if taskCount == 0: singalCheck(taskResultId) # 查询该任务设置的延迟时间开启下一次需要检查的任务 interval = Task.getOne(Task.taskId == ( TaskResult.select(TaskResult.taskId).where(TaskResult.taskResultId == taskResultId))).intervalDay print 'interval:', interval if interval != "": # 生成需要轮巡的新主任务结果记录 taskResult = TaskResult() lastTaskResult = TaskResult.getOne(TaskResult.taskResultId == taskResultId) taskResult.taskId = lastTaskResult.taskId taskResult.state = '1' taskResult.save() #将上一次的任务结果编号所对应的webId指定给新的任务结果 psql_db.transaction() try: query = (TaskInfo .insert_from( fields=[TaskInfo.webId], query=TaskInfo.select(TaskInfo.webId).where(TaskInfo.taskResultId == lastTaskResult))) query.execute() q = TaskInfo.update(taskResultId=taskResult).where(TaskInfo.taskResultId.is_null()) q.execute() except Exception, e: print e psql_db.rollback() # 获取当前时间 ctime = datetime.datetime.now() delay_time = int(interval) stime = ctime + datetime.timedelta(seconds=delay_time) scheduler.add_job(intervalDelayTask, "date", next_run_time=stime, args=[taskResult.taskResultId], jobstore="default", id=taskResult.taskResultId) else: tasks = TaskInfo.select().order_by(TaskInfo.id).paginate(0, config.sendCeleryCount).where( (TaskInfo.taskResultId == taskResultId) & (TaskInfo.state == '1')) for subTask in tasks: subtaskId = subTask.id fetchCycle.apply_async((subtaskId,), queue="celery") # 更新taskinfo状态为已发送 q = TaskInfo.update(state='6').where(TaskInfo.id == subtaskId) q.execute()