def post(self):
        title = '调度任务列表'
        #需要先从azkaban登陆
        session_id = self.get_argument('session_id', '')
        login_user = self.get_argument('login_user', '')
        if session_id == '' or login_user == '':
            self.render('to_login.html')
            return
        #参数
        query_name = self.get_argument('query_name', '')
        query_project_name = self.get_argument('query_project_name', '')
        query_server_host = self.get_argument('query_server_host', '')
        query_user = self.get_argument('query_user', '')
        #列表
        jobs = Job.get_alljobs(query_name, query_project_name,
                               query_server_host, query_user, login_user)

        query_dict = {
            'query_name': query_name,
            'query_project_name': query_project_name,
            'query_server_host': query_server_host,
            'query_user': query_user,
            'session_id': session_id,
            'login_user': login_user
        }

        logging.info('query job list [%s]' % query_dict)
        self.render('list.html', title=title, jobs=jobs, query_dict=query_dict)
 def get_alljobs(self):
     job_list = Job.get_alljobs()
     jobs = map(lambda x: {
         'name': x.name,
         'project_name': x.project_name
     }, job_list)
     self.write(json.dumps(jobs))
    def post(self):
        op = self.get_argument('op', '')
        if op == 'show':
            name = self.get_argument('name', '')
            job = Job.get_job_fromdb(name)
            self.write('<pre>%s</pre>' % str(job))
            return

        title = '任务配置'
        #需要先从azkaban登陆
        session_id = self.get_argument('session_id', '')
        login_user = self.get_argument('login_user', '')
        if session_id == '' or login_user == '':
            self.render('to_login.html')
            return
        #参数
        query_name = self.get_argument('query_name', '')
        query_project_name = self.get_argument('query_project_name', '')
        query_server_host = self.get_argument('query_server_host', '')
        query_user = self.get_argument('query_user', '')
        #任务
        name = self.get_argument('name', '')
        is_copy = self.get_argument('is_copy', 'false')
        job = Job()
        if name != '':
            job = Job.get_job_fromdb(name)
        if is_copy == 'true':
            job.name = ''
            job.loc = ''
        projects = Job.get_projects(login_user)
        jobs = Job.get_alljobs(login_user=login_user)
        all_jobs = Job.get_alljobs()

        query_dict = {
            'query_name': query_name,
            'query_project_name': query_project_name,
            'query_server_host': query_server_host,
            'query_user': query_user,
            'session_id': session_id,
            'login_user': login_user
        }

        logging.info('to update job [%s]' % name)
        self.render('to_update.html',
                    title=title,
                    job=job,
                    jobs=jobs,
                    all_jobs=all_jobs,
                    projects=projects,
                    query_dict=query_dict)
    def post(self):
        #需要先从azkaban登陆
        op = self.get_argument('op', '')
        login_user = self.get_argument('login_user', '')
        project_name = self.get_argument('project_name', '')
        if login_user == '':
            self.render('to_login.html')
            return

        if op == 'edit':
            #项目
            jobs = Job.get_alljobs(project_name=project_name,
                                   login_user=login_user)
            has_dependce_jobs = set()
            is_dependced_jobs = set()

            for job in jobs:
                if job.dependencies != '':
                    has_dependce_jobs.add(job.name)
                    is_dependced_jobs.update(job.dependencies.split(','))

            for job in jobs:
                job.has_dependce = True if job.name in has_dependce_jobs else False
                job.is_dependced = True if job.name in is_dependced_jobs else False

            logging.info('to edit dag [%s][%s]' % (project_name, login_user))
            self.render('dag_edit.html',
                        project_name=project_name,
                        jobs=jobs,
                        login_user=login_user)
        elif op == 'save':
            try:
                nodes = self.get_argument('nodes', '')
                links = self.get_argument('links', '')
                ns = json.loads(nodes)
                ls = json.loads(links)

                Job.update_dag(login_user, ns, ls)
                logging.info('edit dag [%s][%s]' % (project_name, login_user))
                self.write("保存成功")
            except Exception, e:
                logging.info(e)
                self.write('保存失败[%s]' % str(e))
Example #5
0
    def post(self):
        #任务
        name = self.get_argument('name', '')
        logging.info('----name:[%s]' % name)
        job = None
        try:
            job = Job.get_job_fromdb(name)
        except:
            pass

        if job == None:
            self.write('non_exist')
        else:
            self.write('exist')
Example #6
0
def exe_job(name,execid,start_time,param_dict_str=''):
    job = Job.get_job_fromdb(name)
    try:
        error_flag = process(name,execid,start_time,param_dict_str)
    except:
        traceback.print_exc()
        exe_alarm(job,execid,start_time)
        sysout('-------------以下为系统主动报错信息,忽略-------------------')
        raise MyScheduleException('脚本执行过程出错')
        
    if error_flag:
        exe_alarm(job,execid,start_time)
        sysout('-------------以下为系统主动报错信息,忽略-------------------')
        raise MyScheduleException('脚本执行过程出错')
    def post(self):
        title = '任务配置'
        #需要先从azkaban登陆
        session_id = self.get_argument('session_id','')
        login_user = self.get_argument('login_user','')
        if session_id=='' or login_user=='':
            self.render('to_login.html')
            return
        #参数
        query_name = self.get_argument('query_name','')
        query_project_name = self.get_argument('query_project_name','')
        query_server_host = self.get_argument('query_server_host','')
        query_user = self.get_argument('query_user','')
        #任务删除
        name = self.get_argument('name','')
        job = Job.get_job_fromdb(name)
        #job = Job()
        #job.name = name
        job.updater = login_user
        job.unschedule_flow()
        job.delete_dependencies()
        job.delete_job()
        
        #列表
        jobs = Job.get_alljobs(query_name,query_project_name,query_server_host,query_user,login_user)

        query_dict = {
                'query_name':query_name,
                'query_project_name':query_project_name,
                'query_server_host':query_server_host,
                'query_user':query_user,
                'session_id':session_id,
                'login_user':login_user
        }

        logging.info('[%s] delete job [%s]' % (login_user,name))
        self.render('list.html',title=title,jobs=jobs,query_dict=query_dict)
Example #8
0
def generate_files(username='',session_id=''):
    #1/清空所有文件
    dir_path = '%s/../files/' % CURRENTPATH
    os.system('rm -rf %s' % dir_path)
    os.system('mkdir %s' % dir_path)
    #2/生成文件目录
    
    #3/生成文件
    job_list = Job.get_alljobs(login_user=username)
    for job in job_list:
        #2/生成文件目录
        project_path = dir_path+job.project_name+'/'
        zip_path = dir_path+job.project_name+'.zip'
        if not os.path.exists(project_path):
            os.system('mkdir %s' % project_path)
        #3/生成文件
        file_name = job.name+'.job'
        file_path = project_path+file_name
    
        f = open(file_path,'wb')
        f.write(job.get_file_str())
        f.close()
        
        #4/zip包
        zf = zipfile.ZipFile(zip_path,'w',zipfile.ZIP_DEFLATED)
        #遍历所有文件
        for dirpath, dirnames, filenames in os.walk(project_path):
            for filename in filenames:
                zf.write(os.path.join(dirpath,filename))
        zf.close()
        
        print '任务%s.job文件已生成' % job.name

    result_list = []
    #5/上传zip包
    for filename in os.listdir(dir_path):
        if filename.endswith('.zip'):
            zip_path = dir_path+'/'+filename
            project_name = filename.replace('.zip','')
            result = upload_zip(session_id,project_name,zip_path)
            result_list.append(result)

    print result_list
    return result_list
    def delete_job(self):
        login_user = self.username
        name = self.get_argument('name')

        try:
            job = Job.get_job_fromdb(name)
        except:
            raise Exception('not fonud job[%s]' % name)
        job.updater = login_user

        flag, mes = job.has_job_permission()
        logging.info('check job permission [%s] [%s]' % (flag, mes))
        if not flag:
            raise Exception(mes)

        job.unschedule_flow(self.session_id)
        job.delete_dependencies()
        job.delete_job()
        logging.info('[%s]delete job [%s]' % (login_user, name))
def schedule_flow(session_id,project_name,flow_name,schedule_time,period):
    url = 'https://localhost:8443/schedule'
    
    project_id = Job.get_projectid_byname(project_name)
    sstime = datetime.datetime.strptime(schedule_time, "%Y-%m-%d %H:%M")
    sdate = sstime.strftime('%m/%d/%Y')
    stime = sstime.strftime('%I,%M,%p,')

    params = 'is_recurring=on&period=%s&projectName=%s&flow=%s&projectId=%s&scheduleTime=%s&scheduleDate=%s' % (period,project_name,flow_name,project_id,stime,    sdate)
    
    command = '''curl -k %s -d "ajax=scheduleFlow&%s" -b azkaban.browser.session.id=%s''' % (url,params,session_id)
    
    logging.info(command)
    status,result = commands.getstatusoutput(command)
    #
    resp = json.loads(result[result.find('{'):])
    if "error" in resp.keys():
        raise Exception(resp['error'])
    if 'status' in resp.keys()  and resp['status']=='error':
        raise Exception(resp['message'])
    
    return resp
Example #11
0
def get_extdep_jobs(base_time,ext_deps):
    ext_jobs = json.loads(ext_deps)
    query_jobs = []
    for ext_job in ext_jobs['jobs']:
        execute_time = get_exetime(base_time,ext_job['time_type'],ext_job['hour_diff'])

        #判断小时级任务天级依赖
        scripts = Job.get_job_fromdb(ext_job['name']).server_script
        if ext_job['time_type']=='day' and (scripts.find('&[last_hour]')>=0 or scripts.find('&[cur_hour]')>=0):
            sysout('>>>>>>>>>>>>%s为小时级任务天级依赖,生成所有任务依赖' % ext_job['name'])
            for i in range(24):
                job_status = JobStatus()
                job_status.job_name = ext_job['name']
                job_status.execute_time = '%s%s' % (execute_time,'0%s' % i if i<10 else i)
                sysout('>>>>>>>>>>>>添加外部依赖任务检查项[%s][%s]' % (job_status.job_name,job_status.execute_time))
                query_jobs.append(job_status)
        else:
            job_status = JobStatus()
            job_status.job_name = ext_job['name']
            job_status.execute_time = execute_time
            sysout('>>>>>>>>>>>>添加外部依赖任务检查项[%s][%s]' % (job_status.job_name,job_status.execute_time))
            query_jobs.append(job_status)
    
    return query_jobs
Example #12
0
def exe_job(name,execid,start_time,param_dict_str=''):
    job = Job.get_job_fromdb(name)
    if job.server_host == '' or job.server_user=='' or job.server_script=='':
        raise MyScheduleException('远程配置信息不全')
    
    #1/初始化配置
    ssh = paramiko.SSHClient()
    ssh.load_system_host_keys()
    ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
    ssh.connect(job.server_host,22,job.server_user, "")
    
    #2/初始化默认参数,替换脚本
    param_dict = get_param_dict(start_time,param_dict_str)
    #拼接脚本命令,cd目录,执行脚本,获取脚本执行状态
    command = ' source /etc/bashrc;'
    if job.server_dir !='':
        work_dir,tmp_time = replace_param(job.server_dir,param_dict)
        print 'work_dir:%s' % work_dir
        command = 'cd %s;' % work_dir
    command = command + job.server_script
    if not job.server_script.endswith(';'):
        command = command + ';'
    command = command+'echo "script execute status ["$?"]";'
    command,execute_time = replace_param(command,param_dict)
    print '>>>>>>>>>>>>%s' % (command)
    #####更新job执行状态,启动######
    update_jobstatus(name,execute_time,execid,0)
    
    #3/检查跨项目或时间维度的依赖任务,未完成的两分钟检查一次,并实时输出到日志中
    file_path = '%s/../executor/executions/%s/home/hadoop/service/azkaban/files/%s/_job.%s.%s.log' % (CURRENTPATH,execid,job.project_name,execid,job.name)
    if job.ext_dependencies != '':
        query_jobs = get_extdep_jobs(execute_time,job.ext_dependencies)
        f = open(file_path,'a')
        f.write('>>>>>>>>>>>>当前任务的基准时间:%s\n' % execute_time)
        f.write('>>>>>>>>>>>>依赖的外部任务配置:%s\n' % job.ext_dependencies)
        f.close()
        while True:
            flag,mes = JobStatus.is_ready(query_jobs)
            f = open(file_path,'a')
            f.write('>>>>>>>>>>>>依赖的外部任务准备情况:%s\n' % mes)
            f.close()
            
            if flag:
                break
            else:
                time.sleep(120)

    #4/脚本执行,输出信息
    stdin, stdout, stderr = ssh.exec_command(command)
    error_flag = True
    print '---------------out----------------'
    for line in stdout.readlines():
        print line.replace('\n','')
        if line.find('script execute status [0]')>=0:
            error_flag=False
    print '---------------err-----------------'
    for line in stderr.readlines():
        print line.replace('\n','')
    print '---------------err end-----------------'
    ssh.close()

    #5/出错发邮件及短信,并抛出异常
    if error_flag:
        #####更新job执行状态,失败######
        update_jobstatus(name,execute_time,execid,-1)
        exe_alarm(job,execid,start_time)
        raise MyScheduleException('脚本执行过程出错')
    
    #####更新job执行状态,成功######
    update_jobstatus(name,execute_time,execid,1)
Example #13
0
    def update_job(self):
        login_user = self.username
        #必需参数
        required_args = [
            'name', 'project_name', 'server_host', 'server_user',
            'server_script', 'server_dir'
        ]
        for arg in required_args:
            self.get_argument(arg)
        #生成job
        attr_list = Job.get_attr_list()
        #dependencies_box = self.get_argument('dependencies_box','')
        job = Job()
        #动态加载字段,默认均为字符串
        for attr in attr_list:
            value = str(self.get_argument(attr, '')).strip()
            if value != '':
                setattr(job, attr, value)
                logging.info(attr + ':' + value)
        #默认设置
        job.name = job.name.replace('.', '-')
        job.updater = login_user
        job.update_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        if job.creator == '':
            job.creator = job.updater
            job.create_time = job.update_time
        #更新
        flag, mes = job.has_job_permission()

        logging.info('check job permission [%s] [%s]' % (flag, mes))
        if not flag:
            raise Exception(mes)

        job.update_job()

        logging.info('[%s] update job [%s]' % (login_user, job.name))
Example #14
0
def process(name,execid,start_time,param_dict_str=''):
    sysout('+++++++++++++任务初始化+++++++++++++++')
    sysout('开始加载远程任务 %s' % name)
    job = Job.get_job_fromdb(name)
    if job.server_host == '' or job.server_user=='' or job.server_script=='':
        raise MyScheduleException('远程配置信息不全')
    
    sysout('任务加载成功 \n=====================\n%s\n=====================' % job)
    
    #1/初始化配置
    ssh = paramiko.SSHClient()
    ssh.load_system_host_keys()
    ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
    ssh.connect(job.server_host,22,job.server_user, "")
    
    #2/初始化默认参数,替换脚本
    param_dict = get_param_dict(execid,param_dict_str)
    #拼接脚本命令,cd目录,执行脚本,获取脚本执行状态
    command = ' source /etc/bashrc;'
    if job.server_dir !='':
        work_dir,tmp_time = replace_param(job.server_dir,param_dict)
        sysout('配置脚本执行目录  work_dir:%s' % work_dir)
        command = 'cd %s;' % work_dir
    
    #给每个命令设备stdout不缓存的参数
    scripts = ';'.join(map(lambda x:'' if x.strip()=='' else 'stdbuf -o0 %s' % x , job.server_script.split(';')))
    
    command = command + scripts
    if not job.server_script.endswith(';'):
        command = command + ';'
    command = command+'echo "script execute status ["$?"]";'
    command,execute_time = replace_param(command,param_dict)
    sysout('生成完整执行脚本:%s' % (command))
    sysout('+++++++++++++任务初始化完成+++++++++++++++')
    #####更新job执行状态,启动######
    update_jobstatus(name,execute_time,execid,0)
    
    #3/检查跨项目或时间维度的依赖任务,未完成的两分钟检查一次,并实时输出到日志中
    if job.ext_dependencies != '':
        sysout('>>>>>>>>>>>>检查外部依赖完成情况')
        sysout('>>>>>>>>>>>>当前任务的基准时间:%s' % execute_time)
        sysout('>>>>>>>>>>>>依赖的外部任务配置:%s' % job.ext_dependencies)

        query_jobs = get_extdep_jobs(execute_time,job.ext_dependencies)
        wait_count = 1
        while True:
            flag,mes = JobStatus.is_ready(query_jobs)
            sysout('>>>>>>>>>>>>第%s次检查,依赖的外部任务准备情况:%s\n' % (wait_count,mes))
            
            if wait_count*120 > 86400:
                update_jobstatus(name,execute_time,execid,-1)
                raise MyScheduleException('依赖任务等待时间超过1天,请检查')
            if flag:
                update_jobstatus(name,execute_time,execid,None,True)
                break
            else:
                wait_count += 1
                time.sleep(120)

    #4/脚本执行,输出信息
    sysout('开始执行远程脚本:\n%s' % scripts)
    stdin, stdout, stderr = ssh.exec_command(command)
    error_flag = True
    sysout('---------------out 远程脚本日志输出----------------')
    while True:
        line = stdout.readline()
        if len(line) == 0:
            break
        sysout(line.strip())
        #sysout('%s,%s' % (time.time(),line))
        if line.find('script execute status [0]')>=0:
            error_flag=False
    sysout('---------------err 远程脚本错误输出-----------------')
    for line in stderr.readlines():
        sysout(line.replace('\n',''))
    sysout('---------------err end-----------------')
    ssh.close()

    #5/出错发邮件及短信,并抛出异常
    if error_flag:
        #####更新job执行状态,失败######
        update_jobstatus(name,execute_time,execid,-1)
        return True

    #####更新job执行状态,成功######
    exe_success(job,execid,start_time)
    update_jobstatus(name,execute_time,execid,1)
    
    return False
    def post(self):
        #更新完跳转到列表页
        title = '调度任务列表'
        #需要先从azkaban登陆
        session_id = self.get_argument('session_id','')
        login_user = self.get_argument('login_user','')
        if session_id=='' or login_user=='':
            self.render('to_login.html')
            return
        #参数
        query_name = self.get_argument('query_name','')
        query_project_name = self.get_argument('query_project_name','')
        query_server_host = self.get_argument('query_server_host','')
        query_user = self.get_argument('query_user','')
        #生成job
        attr_list = Job.get_attr_list()
        dependencies_box = self.get_argument('dependencies_box','')
        logging.info('>>>>>>>>>>>'+str(type(dependencies_box)))
        logging.info('>>>>>>>>>>>'+str(dependencies_box))
        job = Job()
        #动态加载字段,默认均为字符串
        for attr in attr_list:
            value = str(self.get_argument(attr,'')).strip()
            if value!='':
                setattr(job,attr,value)
                logging.info(attr+':'+value)
        #默认设置
        job.name = job.name.replace('.','-')
        job.updater = login_user
        job.update_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        if job.creator == '':
            job.creator = job.updater
            job.create_time = job.update_time
        #更新
        job.update_job()

        #列表
        jobs = Job.get_alljobs(query_name,query_project_name,query_server_host,query_user,login_user)

        query_dict = {
                'query_name':query_name,
                'query_project_name':query_project_name,
                'query_server_host':query_server_host,
                'query_user':query_user,
                'session_id':session_id,
                'login_user':login_user
        }

        logging.info('[%s] update job [%s]' % (login_user,job.name))
        self.render('list.html',title=title,jobs=jobs,query_dict=query_dict)