예제 #1
0
def checkRunning(Job, tx_dt):
    # Job:{'Name':
    # ,'dt':
    # ,'model':
    # ,'system':}
    # tx_dt:20171025
    try:
        RunningList = os.listdir(Config.ETL_Ctrl_Running)
        TrueList = []
        # Running flag file's name like : S04_S04_COMP_20171024.ctl
        RegexStr = re.compile(r'^\w+?_\w+_\d{8}\.ctl$')
        msg = r"正在检查执行中的作业..."
        LogUtil.Etl_Log(msg)
        for i in RunningList:
            if RegexStr.match(i):
                TrueList.append(i)
        msg = r"正在执行作业数量:[%s]" % len(TrueList)
        LogUtil.Etl_Log(msg)
        if len(TrueList) < Config.PARALLEL_JOB_NUM:
            if checkDependency(Job, tx_dt):
                msg = r"检查完毕,可以继续执行作业"
                LogUtil.Etl_Log(msg)
                return True
            else:
                msg = r"检查完毕,依赖作业未完成"
                LogUtil.Etl_Log(msg)
                return False
        else:
            msg = r"检查完毕,并行已满"
            LogUtil.Etl_Log(msg)
            return False
    except Exception as e:
        print "执行checkRunning失败"
        LogUtil.Etl_Log_Exception(e, 'StartJob.checkRunning')
        ExceptionExit()
예제 #2
0
def RunJob(Job, CtlName):
    # Job:{'Name':
    # ,'dt':
    # ,'model':
    # ,'system':}
    # CtlName:S04_S04_COMP_20171023.ctl
    reg = re.compile(r'^(\w+?)_(\w+)_(\d{8})\.ctl$')
    getList = reg.findall(CtlName)[0]
    msg = r"开始执行作业:%s" % (getList[1] + '_' + getList[2])
    print msg
    LogUtil.Etl_Log(msg)
    try:
        for i in xrange(3):
            time.sleep(10)
            print Job['Name'] + '  ' + str(i)
        FileUtil.move(Config.ETL_Ctrl_Running + os.sep + CtlName,
                      Config.ETL_Job_Done + os.sep + CtlName)
    except Exception as e:
        updateJob(Job, CtlName[len(CtlName) - 12:-4], 'FAILED')
        LogUtil.Etl_Log_Exception(e, 'StartJob.RunJob:' + CtlName)
        ExceptionExit()
    msg = "作业执行完毕:%s %s" % (Job['Name'], CtlName[len(CtlName) - 12:-4])
    print msg
    LogUtil.Etl_Log(msg)
    updateJob(Job, CtlName[len(CtlName) - 12:-4], 'DONE')
    # ToDo 调起流作业
    touchStream(CtlName)
예제 #3
0
def StartJob():
    global _pool
    Receive = os.listdir(Config.ETL_Ctrl_Receive)
    Running = os.listdir(Config.ETL_Ctrl_Running)
    QueueLs = os.listdir(Config.ETL_Ctrl_Queue)
    # S04_S04_COMP_20171024.ctl
    reg = re.compile(r'^(\w+?)_(\w+)_(\d{8})\.ctl$')
    JobStats = GetJobStats()
    for i in Receive:
        if i in Running:
            msg = r"作业已经在执行中:%s" % i
            LogUtil.Etl_Log(msg)
            os.remove(Config.ETL_Ctrl_Receive + os.sep + i)
        if i in QueueLs:
            msg = r"作业已经在队列中:%s" % i
            LogUtil.Etl_Log(msg)
            os.remove(Config.ETL_Ctrl_Receive + os.sep + i)
        getList = reg.findall(i)[0]
        if getList != []:
            if getList[1] in JobStats[getList[0]]['Jobs']:
                if checkRunning(JobStats[getList[0]]['Jobs'][getList[1]],
                                getList[2]):
                    FileUtil.move(Config.ETL_Ctrl_Receive + os.sep + i,
                                  Config.ETL_Ctrl_Running + os.sep + i)
                    arg = JobStats[getList[0]]['Jobs'][getList[1]]
                    _pool.apply_async(RunJob, (
                        arg,
                        i,
                    ))
                else:
                    FileUtil.move(Config.ETL_Ctrl_Receive + os.sep + i,
                                  Config.ETL_Ctrl_Queue + os.sep + i)
예제 #4
0
def init():
    global _pool
    log = LogUtil()
    log.Clear_Ctrl_Log()
    log.Etl_Log(
        '---------------------------------开始初始化---------------------------------'
    )
    log.Etl_Log('开始清理调度')
    # ToDo 清理所有ETL_HOME下状态的文件
    # 创建进程池
    if _pool == None:
        _pool = ProcessUtil.CreateProcessPool()
예제 #5
0
def updateJob(Job, dt, Last_Stats):
    # Job:{'Name':
    # ,'dt':
    # ,'model':
    # ,'system':}
    # dt:20171025
    # Last_Stats:20171024
    try:
        InfoPath = Config.ETL_Ctrl_JobInfo + os.sep + Job[
            'system'] + os.sep + Job['Name']
        if not os.path.exists(InfoPath):
            LogUtil.Etl_Log(r'作业信息不存在,请检查:' + InfoPath)
        f = open(InfoPath, 'r')
        f_read = f.read()
        f.close()
        f_List = f_read.split(' ')
        f_write = '%s %s %s %s' % (dt, f_List[1], f_List[2], Last_Stats)
        f = open(InfoPath, 'w')
        f.write(f_write)
        f.close()
    except Exception as e:
        print "执行updateJob失败"
        LogUtil.Etl_Log_Exception(e, 'StartJob.updateJob')
        ExceptionExit()
    finally:
        if 'f' in vars():
            f.close()
예제 #6
0
def CheckQueue():
    # 如果是数据库,可以设置优先级之类的来做排序,只取前n个作业判断是否可以执行。而后cp到Receive目录
    # 这边第一版用的是文件来保存信息。优先级虽然也可以做,但是目测效率上依旧不会太好
    # 遂放弃治疗。无视效率。不考虑优先级。所有check后可以run的作业全部扔到Receive目录
    try:
        JobList = os.listdir(Config.ETL_Ctrl_Queue)
        reg = re.compile(r'^(\w+?)_(\w+)_(\d{8})\.ctl$')
        for i in JobList:
            if reg.match(i):
                (etl_sys, job, dt) = reg.findall(i)[0]
                Job = GetJobInfo(etl_sys, job)
                if checkDependency(Job, dt):
                    FileUtil.move(Config.ETL_Ctrl_Queue + os.sep + i,
                                  Config.ETL_Ctrl_Receive + os.sep + i)
                else:
                    msg = r'依赖未完成,继续等待:' + etl_sys + '_' + job
                    LogUtil.Etl_Log(msg)
    except Exception as e:
        print "执行CheckQueue失败"
        LogUtil.Etl_Log_Exception(e, 'StartJob.CheckQueue')