def checkAllLz(filePath, taskResultId): taskResult = TaskResult.getOne(TaskResult.taskResultId == taskResultId) delayDay = taskResult.taskId.intervalDay #根据当前任务编号目录获取该目录下的所有xml文件 print '目录路径:',filePath fnames=os.listdir(filePath) for name in fnames: xmlName = filePath+'/'+name print 'xmlName:',xmlName data = parseXMLFile(xmlName, 'CheckItem') print 'data:',data # 将需要检查的信息入库 for d in data: cname = d.get('cname') url = d.get('url') if url != '': if url[-1] == '/': url = url.replace('http://', '')[0:-1].replace(' ','') if cname!='': cname = cname.replace(' ','') webArea = d.get('area') webtype = d.get('WebType') # 检查更新company if Company.getOne(Company.coname == cname) is None: c = Company() c.coname = cname c.save(force_insert=True) # 检查更新website if Website.getOne(Website.domain == url) is not None: q = Website.update(domain=url, type=webtype, area=webArea).where(Website.domain == url) q.execute() else: com = Company.getOne(Company.coname == cname) w = Website() w.regID = com w.domain = url w.area = webArea w.type = webtype w.save(force_insert=True) updateWeb = Website.getOne(Website.domain == url) subTask = TaskInfo() subTask.taskResultId = taskResult subTask.webId = updateWeb subTask.state = '1' subTask.save(force_insert=True) taskResultId = str(taskResultId) if delayDay > 0: # 需要周期执行的任务 executeMultiTaskInfo(taskResultId) else: #logger.debug("开始调用单次任务") # 单次执行的任务 executeSingleTaskInfo(taskResultId)
def intervalFetch(taskResultId, lastTaskResultId, delayTag): # 获取上次任务taskResultId taskInfos = TaskInfo.select().where(TaskInfo.taskResultId == lastTaskResultId) for task in taskInfos: subTask = TaskInfo() subTask.taskResultId = taskResultId subTask.state = '' subTask.cname = task.cname subTask.url = task.url subTask.save(force_insert=True) fetchCycle.apply_async((subTask.id, taskResultId, delayTag), queue="celery")