Beispiel #1
0
def singalCheck(taskResultId):
    # 根据任务编号查询所有任务是否已经完成
    executeCount = TaskInfo.select().where(
            (TaskInfo.taskResultId == taskResultId) & (TaskInfo.state != 1) & (TaskInfo.state != 6)).count()
    subTaskResult = TaskInfo.select().where(TaskInfo.taskResultId == taskResultId).count()
    print 'subTaskResult', subTaskResult
    print 'executeCount', executeCount
    if executeCount == subTaskResult and executeCount != 0:
        #将本次任务检查结果明细生成到result目录
        checkTaskResult = TaskResult.getOne(TaskResult.taskResultId == taskResultId)
        taskId = checkTaskResult.taskId.taskId
        result_taskresultid = checkTaskResult.taskResultId
        taskCount = TaskInfo.select().where(
                (TaskInfo.taskResultId == taskResultId)).count()
        onceCount = config.getTaskResultCount
        packCount = int(math.ceil(float(taskCount)/float(onceCount)))
        #生成检查结果明细文件
        print '开始生成检查结果明细文件'
        print 'packCount:',packCount
        genTaskResultFile(taskId,result_taskresultid,packCount)


        print '单次任务检查完毕结束当前任务,开始发送邮件通知'
        # 说明该任务结果已经发送完毕,从apscheduler任务调度中删除该任务
        scheduler.remove_job(str(taskResultId))

        bigTaskId = checkTaskResult.taskId.taskId
        taskResultId = checkTaskResult.taskResultId
        # 更新上一次子任务的状态
        oTime = format(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
        q = TaskResult.update(state="2", overTime=oTime).where(
                TaskResult.taskResultId == taskResultId)
        q.execute()
        #修改任务结果对应的大任务状态为已完成
        q = Task.update(state="2").where(Task.taskId==taskId)
        q.execute()
        # 任务执行完毕后发送邮件通知
        mutil = MailUtil()
        # 获取当前任务绑定的邮箱账号
        toEmail = TaskResult.getOne(TaskResult.taskResultId == taskResultId).taskId.userId.email
        if toEmail is not None:
            from_addr = config.SEND_EMAIL
            password = config.SEND_EMAIL_PASSWORD
            to_addr = toEmail
            smtp_server = config.SMTP_SERVER
            msg = str.format(config.MAIL_NOTICE, bigTaskId, bigTaskId, str(taskResultId))
            subject = config.MAIL_SUBJECT
            mutil.sendMail.delay(from_addr, password, to_addr, smtp_server, msg, subject)
        else:
            print "该任务未绑定接收邮箱,任务结果编号:",taskResultId
Beispiel #2
0
def checkAllLz(filePath, taskResultId):

    taskResult = TaskResult.getOne(TaskResult.taskResultId == taskResultId)
    delayDay = taskResult.taskId.intervalDay
    #根据当前任务编号目录获取该目录下的所有xml文件
    print '目录路径:',filePath
    fnames=os.listdir(filePath)
    for name in fnames:
        xmlName = filePath+'/'+name
        print 'xmlName:',xmlName
        data = parseXMLFile(xmlName, 'CheckItem')
        print 'data:',data
        # 将需要检查的信息入库
        for d in data:
            cname = d.get('cname')
            url = d.get('url')
            if url != '':
                if url[-1] == '/':
                    url = url.replace('http://', '')[0:-1].replace(' ','')
            if cname!='':
                cname = cname.replace(' ','')
            webArea = d.get('area')
            webtype = d.get('WebType')
            # 检查更新company
            if Company.getOne(Company.coname == cname) is None:
                c = Company()
                c.coname = cname
                c.save(force_insert=True)
            # 检查更新website
            if Website.getOne(Website.domain == url) is not None:
                q = Website.update(domain=url, type=webtype, area=webArea).where(Website.domain == url)
                q.execute()
            else:
                com = Company.getOne(Company.coname == cname)
                w = Website()
                w.regID = com
                w.domain = url
                w.area = webArea
                w.type = webtype
                w.save(force_insert=True)
            updateWeb = Website.getOne(Website.domain == url)
            subTask = TaskInfo()
            subTask.taskResultId = taskResult
            subTask.webId = updateWeb
            subTask.state = '1'
            subTask.save(force_insert=True)

    taskResultId = str(taskResultId)
    if delayDay > 0:
        # 需要周期执行的任务
        executeMultiTaskInfo(taskResultId)
    else:
        #logger.debug("开始调用单次任务")
        # 单次执行的任务
        executeSingleTaskInfo(taskResultId)
Beispiel #3
0
def fetchCycle(subtaskId, taskResultId, delayTag):
    if delayTag:
        checkTaskResult = TaskResult.getOne(TaskResult.taskResultId == taskResultId)
        bigTaskId = checkTaskResult.taskId.taskId
        # 根据任务结果编号获取本次需要检查的任务记录
        subTask = TaskInfo.getOne(TaskInfo.id == subtaskId)
        if subTask is not None:
            companyName = subTask.cname
            url = subTask.url
            # 抓取检测
            fetchWebsite(companyName, bigTaskId, subTask.id, url)
        else:
            print 'taskinfo记录为空:', subTask
Beispiel #4
0
def intervalDelayTask(taskResultId):
    # 获取celery中当前已经正在进行的任务数
    nowCount = TaskInfo.select().order_by(TaskInfo.id).where((TaskInfo.state == '6'))
    if nowCount >= 0:
        # 当celery中正在做的任务数量少于指定的数量时,向celery添加需要执行的任务
        if nowCount <= config.celeryMaxCount:
            taskCount = TaskInfo.select().where(
                    (TaskInfo.state == '1') & (TaskInfo.taskResultId == taskResultId)).count()
            print 'taskCount:', taskCount
            if taskCount == 0:
                singalCheck(taskResultId)
                # 查询该任务设置的延迟时间开启下一次需要检查的任务
                interval = Task.getOne(Task.taskId == (
                    TaskResult.select(TaskResult.taskId).where(TaskResult.taskResultId == taskResultId))).intervalDay
                print 'interval:', interval
                if interval != "":
                    # 生成需要轮巡的新主任务结果记录
                    taskResult = TaskResult()
                    lastTaskResult = TaskResult.getOne(TaskResult.taskResultId == taskResultId)
                    taskResult.taskId = lastTaskResult.taskId
                    taskResult.state = '1'
                    taskResult.save()

                    #将上一次的任务结果编号所对应的webId指定给新的任务结果
                    psql_db.transaction()
                    try:
                        query = (TaskInfo
                            .insert_from(
                                fields=[TaskInfo.webId],
                                query=TaskInfo.select(TaskInfo.webId).where(TaskInfo.taskResultId == lastTaskResult)))
                        query.execute()
                        q = TaskInfo.update(taskResultId=taskResult).where(TaskInfo.taskResultId.is_null())
                        q.execute()
                    except Exception, e:
                        print e
                        psql_db.rollback()
                    # 获取当前时间
                    ctime = datetime.datetime.now()
                    delay_time = int(interval)
                    stime = ctime + datetime.timedelta(seconds=delay_time)
                    scheduler.add_job(intervalDelayTask, "date", next_run_time=stime, args=[taskResult.taskResultId],
                                      jobstore="default", id=taskResult.taskResultId)
            else:
                tasks = TaskInfo.select().order_by(TaskInfo.id).paginate(0, config.sendCeleryCount).where(
                        (TaskInfo.taskResultId == taskResultId) & (TaskInfo.state == '1'))
                for subTask in tasks:
                    subtaskId = subTask.id
                    fetchCycle.apply_async((subtaskId,), queue="celery")
                    # 更新taskinfo状态为已发送
                    q = TaskInfo.update(state='6').where(TaskInfo.id == subtaskId)
                    q.execute()