Exemple #1
0
    def schedule(self, cron=None):
        """
        安排执行计划
        """
        flow_cron = get_project_info(self.prj_name, "cron." + str(self.flowId).strip())
        if flow_cron:
            cron = flow_cron  # prj_cron.get(self.prj_name + self.flowId, None)
        if check_cron(cron):
            data = {
                'session.id': self.cookies_fetcher.get_session_id(),
                'ajax': u'scheduleCronFlow',
                'projectName': self.prj_name,
                'flow': self.flowId,
                'failureAction': 'finishPossible',  # finishCurrent, cancelImmediatel
                'cronExpression': cron
            }

            response = requests.post(
                azkaban_url + '/schedule',
                data=data
            )
            rs = str(response.content, 'utf-8')
            if response.status_code != 200 or 'error' in rs:
                logger.info("秒 分 时 日 月 周 data: \n%s", data)
                logger.error(rs)
                logger.error("{0}设置执行计划失败".format(self.prj_name))
                return False
            else:
                logger.info("{0} flow:{1}设置执行计划成功".format(self.prj_name, self.flowId))
                return True
        else:
            # logger.error("{0}设置执行计划失败,请设置正确的cron时间格式.ERROR for:{1}".format(self.prj_name, cron))
            return False
Exemple #2
0
 def fetch_schedule(self):
     """
     获取执行计划
     """
     prj_id = get_projects().get(self.prj_name)
     response = requests.get(
         azkaban_url + '/schedule',
         params={
             'session.id': self.cookies_fetcher.get_session_id(),
             'ajax': 'fetchSchedule',
             'projectId': prj_id,
             'flowId': self.flowId
         }
     )
     if response.status_code != 200:
         logger.info(str(response.content, 'utf-8'))
         raise Exception("{0}获取执行计划列表失败".format(self.prj_name))
     else:
         # logger.debug(str(response.content, 'utf-8'))
         try:
             schd_id = response.json()['schedule']['scheduleId']
             logger.debug("{0} flow:{1}获取执行计划ID是{2}".format(self.prj_name, self.flowId, schd_id))
             return schd_id
         except Exception as e:
             logger.debug(str(e))
             logger.info("{0} flow:{1} 没有设置执行计划".format(self.prj_name, self.flowId))
             return None
Exemple #3
0
 def handle_timeout(self):
     """
     设置超时后先杀死进程然后恢复执行,循环监控
     :return:
     """
     while True:
         logger.info('checking to execute flow {flow}, {exec_id}'.format(flow=self.flowId, exec_id=self.exec_id))
         result = self.get_flow_exec_info()
         self.refresh_flow_execution()
         start_time = result['startTime']
         start_time /= 1000
         if result['status'] == 'KILLED':
             logger.info("{execid} has been killed.".format(execid=self.exec_id))
             break
         elif result['status'] == 'SUCCEEDED':
             logger.info("{execid} has been SUCCEEDED.".format(execid=self.exec_id))
             break
         elif result['status'] == 'FAILED':
             logger.info("{execid} has been FAILED.".format(execid=self.exec_id))
             break
         else:
             if start_time > 0 and int(time.time()) - start_time > 60 * self.flow_timeout \
                     and result['endTime'] == -1:
                 logger.info('reached timeout threshold \n')
                 self.cancel()
                 time.sleep(60)
                 self.resume_flow()
         time.sleep(check_interval)
Exemple #4
0
 def cancel(self):
     """取消执行"""
     target = '%s/executor?ajax=cancelFlow&execid=%s' % (azkaban_url,
                                                         self.exec_id)
     resp = requests.get(target, cookies=self.cookies_fetcher.get_cookies())
     if resp.status_code != 200:
         logger.info(str(resp.content, 'utf-8'))
Exemple #5
0
 def execute(self):
     """
     执行工作流
     :return: 返回执行id
     """
     logger.info('开始执行flow {flow}'.format(flow=self.flowId))
     url = '{azkaban_url}/executor?ajax=executeFlow&project={project}&flow={flow}' + self.disabled + self.flow_override
     url = url.format(
         azkaban_url=azkaban_url,
         project=self.prj_name,
         flow=self.flowId)
     # logger.info("执行url:" + url)
     flows_resp = requests.get(
         url,
         cookies=self.cookies_fetcher.get_cookies()
     )
     rs = str(flows_resp.content, 'utf-8')
     if flows_resp.status_code != 200 or 'error' in rs:
         logger.error(rs)
         raise Exception('执行{flow} 报错'.format(flow=self.flowId))
     else:
         # logger.info(rs)
         exec_id = json.loads(rs)['execid']
         logger.info(('开始执行{flow},execid是{exec_id}'.format(flow=self.flowId, exec_id=exec_id)))
         return FlowExecution(exec_id, self.cookies_fetcher)
Exemple #6
0
 def upload_zip(self, zip_file):
     """上传zip文件"""
     if self.download_zip():
         # 备份文件成功
         logger.info("备份{0}项目文件成功".format(self.name))
         files = {
             'file':
             (os.path.basename(zip_file), open(zip_file,
                                               'rb'), 'application/zip')
         }
         upload_data = {
             'project': self.name,
             'ajax': 'upload',
         }
         resp = requests.post(
             "{azkaban_url}/manager".format(azkaban_url=azkaban_url),
             data=upload_data,
             cookies=self.cookies_fetcher.get_cookies(),
             files=files)
         if resp.status_code != 200:
             logger.error(str(resp.content, 'utf-8'))
             raise Exception('上传ZIP文件失败:{name} '.format(name=zip_file))
         logger.info("上传ZIP文件完成:" + self.name)
     else:
         raise Exception('项目文件{name}:文件备份失败'.format(name=self.name))
Exemple #7
0
 def download_zip(self):
     """下载zip文件"""
     if self.crt_flag:
         url = "{azkaban_url}/manager?session.id={id}&project={project}&download=True".format(id=self.cookies_fetcher.get_session_id(),
                                                                                              azkaban_url=azkaban_url,
                                                                                              project=self.name)
         # headers = {
         #     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit \
         #            /537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
         #     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image \
         #            /webp,image/apng,*/*;q=0.8',
         #     'Accept-Encoding': 'gzip, deflate',
         #     'Accept-Language': 'zh-CN, zh; q=0.9',
         #     'Referer': url,
         #     'Upgrade-Insecure-Requests': '1',
         #     'azkaban_url': azkaban_url,
         # }
         resp = requests.get(url, stream=True)  # headers=headers,
         now_time = get_current_timekey()
         backup_dt_dir = os.path.join(backup_path, now_time[0:8])
         if not (os.path.exists(backup_dt_dir)):
             os.makedirs(backup_dt_dir)
         file_path = os.path.join(backup_dt_dir, self.name + "_" + now_time + '.zip')
         with open(file_path, "wb") as code:
             code.write(resp.content)
         if resp.status_code != 200:
             raise Exception('下载{project}项目文件失败'.format(project=self.name))
         logger.info("下载ZIP文件完成" + self.name + ": " + file_path)
         return True
     else:
         logger.info("项目不存在不能下载")
         return True
Exemple #8
0
    def set_sla(self, schedule_id, email, settings):
        """
        为SLA设置执行计划 基本不用
        :param schedule_id: Schedule ID.
        :param email: Array of emails to receive notifications.
        :param settings: Array of comma delimited strings of SLA settings
          consisting of:
          + job name - blank for full workflow
          + rule - SUCCESS or FINISH
          + duration - specified in hh:mm
          + email action - bool
          + kill action - bool

        """
        logger.debug('Setting SLA for schedule Id %s.', schedule_id)
        request_data = {
            'ajax': 'setSla',
            'scheduleId': schedule_id,
            'slaEmails': ','.join(email),
        }
        for i, setting in enumerate(settings):
            request_data['settings[%s]' % (i, )] = setting
        res = extract_json(
            self._request(
                method='POST',
                endpoint='schedule',
                data=request_data,
            ))
        logger.info('Set SLAs for schedule Id %s.', schedule_id)
        return res
Exemple #9
0
 def resume_flow(self):
     """恢复执行"""
     target = '%s/executor?ajax=executeFlow&project=%s&flow=%s&disabled=%s' % (
         azkaban_url, self.prj_name.name, self.flowId, get_str_set(self.job_status_dict.get('SUCCEEDED')))
     resp = requests.get(target, cookies=self.cookies_fetcher.get_cookies())
     contents = resp.content
     new_exec_id = json.loads(contents)['execid']
     logger.info('old exec_id {old} to new one {new}'.format(old=self.exec_id, new=new_exec_id))
     self.exec_id = new_exec_id
Exemple #10
0
 def del_prj(self):
     """删除项目"""
     if len(self.fetch_flow_schedule()) > 0:
         logger.info("该项目有执行计划,不能删除")
     if self.crt_flag and self.download_zip():
         resp = requests.get("{azkaban_url}/manager?delete=true&project={name}".format(azkaban_url=azkaban_url, name=self.name),
                             cookies=self.cookies_fetcher.get_cookies())
         if resp.status_code != 200:
             raise Exception('Error happened when delete project {project} to azkaban'.format(project=self.name))
         logger.info('删除Project:' + self.name)
         return self
Exemple #11
0
 def fetch_job(self):
     """获取工作流的job"""
     flows_resp = requests.get(
         '{azkaban_url}/manager?ajax=fetchflowgraph&project={project}&flow={flow}'.format(azkaban_url=azkaban_url, project=self.prj_name,
                                                                                          flow=self.flowId),
         cookies=self.cookies_fetcher.get_cookies())
     if flows_resp.status_code != 200:
         raise Exception('Error happened when fetch job from {0} in {1}'.format(self.flowId, self.prj_name))
     jobs = json.loads(str(flows_resp.content, 'utf-8'))['nodes']
     logger.info(jobs)
     return jobs
Exemple #12
0
 def refresh_flow_execution(self):
     """
     刷新执行ID的状态.
     :return:
     """
     result = self.get_flow_exec_info()
     for dd in result['nodes']:
         cu = self.job_status_dict.get(dd['status'], set())
         cu.add(dd['id'])
         self.job_status_dict[dd['status']] = cu
     for k, v in self.job_status_dict.items():
         logger.info('%s  status: %s : %d/%d \n' % (get_current_timekey(), k, len(v), len(result['nodes'])))
Exemple #13
0
 def schedule_flows(self, cron=None, flows=None):
     """将所有的工作流列入执行计划"""
     all_flows = self.fetch_flow()
     if cron is None:
         # try:
         cron = get_project_info(prj_nm=self.name, key_nm="cron." + self.name)  # prj_cron.get(self.name, None)
         if cron is None and get_project_info(prj_nm=self.name, key_nm="cron") is None:
             logger.error(self.name + "的 system.properties中没有配置定时器cron请配置")
             return
     if flows is None:
         flows = all_flows
     for f in all_flows:
         if f in flows:
             logger.info("设置项目{0}的工作流{1}执行计划".format(self.name, f))
             flow = Flow(self.name, f, self.cookies_fetcher)
             flow.schedule(cron)
Exemple #14
0
 def create_prj(self):
     """创建项目,先看看有木有存在"""
     if self.crt_flag:
         logger.info("项目已经存在不能创建")
         return self
     create_data = {
         'name': self.name,
         'description': self.description
     }
     resp = requests.post("{azkaban_url}/manager?action=create".format(azkaban_url=azkaban_url), data=create_data,
                          cookies=self.cookies_fetcher.get_cookies())
     if resp.status_code != 200:
         raise Exception('项目 {project} 创建失败'.format(project=self.name))
     # logger.info(resp.content)
     logger.info('项目 {project} 创建状态 : {status}'.format(project=self.name,
                                                       status=json.loads(str(resp.content, 'utf-8'))['status']))
     return self
Exemple #15
0
 def get_properties(self):
     try:
         pro_file = open(self.fileName, 'Ur')
         for line in pro_file.readlines():
             line = line.strip().replace('\n', '')
             if line.find("#") != -1:
                 line = line[0:line.find('#')]
             if line.find('=') > 0:
                 strs = line.split('=')
                 strs[1] = line[len(strs[0]) + 1:]
                 self.__get_dict(strs[0].strip(), self.properties, strs[1].strip())
     except Exception as e:
         logger.info("本地的本质文件不规范,注意出现了com key值 就不能出现 com.hy的key值")
         raise Exception(e)
     else:
         pro_file.close()
     return self.properties
Exemple #16
0
def check_cron(cron):
    """
    检查cron时间格式是否合规
    :param cron:
    :return:
    """
    if cron and " " in cron:
        try:
            pt = Cron(cron)
            logger.info("cron格式校验通过,下次执行时间: " + str(pt.get_next()))
            return True
        except Exception as e:
            logger.error(str(e))
            logger.error("cron不能为空或者格式不对,你输入的是{0}".format(cron))
            return False
    else:
        logger.warning("cron不能为空或者格式不对,你输入的是{0}".format(cron))
        return False
Exemple #17
0
 def unscheduled(self):
     """
     取消执行计划
     """
     schd_id = self.fetch_schedule()
     if schd_id:
         data = {
             u'session.id': self.cookies_fetcher.get_session_id(),
             u'action': u'removeSched',
             u'scheduleId': schd_id
         }
         response = requests.post(azkaban_url + '/schedule', data=data)
         if response.status_code != 200:
             logger.info("Request data: \n%s", data)
             logger.error(str(response.content, 'utf-8'))
             raise Exception("{0} 取消执行计划失败".format(self.prj_name))
         else:
             logger.info("{0} flow:{1} 执行计划id={2}已经被取消".format(
                 self.prj_name, self.flowId, schd_id))
             return True
Exemple #18
0
    def get_sla(self, schedule_id):
        """
         获取SLA设置信息. 基本不用
        :param schedule_id: Schedule Id - obtainable from get_schedule

        """
        logger.debug('Retrieving SLA for schedule ID %s.', schedule_id)
        res = extract_json(
            self._request(
                method='GET',
                endpoint='schedule',
                params={
                    'ajax': 'slaInfo',
                    'scheduleId': schedule_id
                },
            ))
        logger.info('Retrieved SLA for schedule ID %s.', schedule_id)
        if 'settings' not in res:
            raise Exception('Failed to get SLA; check that an SLA exists.')
        return res
Exemple #19
0
    def cancel_execution(self, exec_id):
        """
        取消执行execution.
        :param exec_id: Execution ID.

        """
        logger.debug('Cancelling execution %s.', exec_id)
        res = extract_json(
            self._request(
                method='GET',
                endpoint='executor',
                params={
                    'execid': exec_id,
                    'ajax': 'cancelFlow',
                },
            ))
        if 'error' in res:
            raise Exception('Execution %s is not running.', exec_id)
        else:
            logger.info('Execution %s cancelled.', exec_id)
        return
Exemple #20
0
def exec_shell(shell, logs_print=True):
    logger.info("执行shell:" + shell)
    try:
        proc = subprocess.Popen(shell,
                                shell=True,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.STDOUT)  # subprocess.STDOUT
        pre_line = ''
        error_line = ''
        while proc.poll() is None:
            line = proc.stdout.readline().strip().decode('utf-8')
            if line and len(line) > 2:
                pre_line = line
                if 'error' in line.lower():
                    error_line = line
                if logs_print:
                    logger.info(line)
        # outs, errs = proc.communicate(timeout=15)
        # res是一个对象,需要读取res对象的stderr\strout\stdin的属性,才可获取值
        # 如果:err有值,即表示命令执行报错,即:stdout就为空
        if proc.returncode == 0:
            logger.info('shell 执行成功')
            return True
        else:
            raise Exception(error_line + '\n' + pre_line)
    except Exception as e:
        logger.error('shell执行失败:' + str(e))
        raise Exception("shell执行失败:" + str(e))
Exemple #21
0
def restart_azkaban(azkaban_path="/opt/softs/azkaban"):
    """
    启动或重启azkaban
    :param azkaban_path: azkaban安装路径(目录下有web-server和exec-server目录)
    :return:
    """
    global exec_server, web_server
    os.chdir(azkaban_path)
    # print(os.getcwd())
    dirs = os.listdir(azkaban_path)
    for i in dirs:
        if 'web-' in i.lower() or '-web' in i.lower():
            web_server = os.path.join(azkaban_path, i)
            continue
        if 'exec-' in i.lower() or '-exec' in i.lower():
            exec_server = os.path.join(azkaban_path, i)
            continue
    os.chdir(exec_server)
    tp = os.popen("jps")  # 判断是否已经启动
    jps = tp.readlines()
    tp.close()
    azkaban_exec = False
    azkaban_web = False
    for i in jps:
        tp = i.strip().split(" ")
        if tp[1].strip().upper() == 'AzkabanExecutorServer'.upper():
            azkaban_exec = True
        if tp[1].strip().upper() == 'AzkabanWebServer'.upper():
            azkaban_web = True
    if azkaban_exec:
        exec_shell("bin/shutdown-exec.sh")
    exec_shell("bin/start-exec.sh")
    time.sleep(10)
    active_executor()
    os.chdir(web_server)
    time.sleep(10)
    logger.info(os.getcwd())
    if azkaban_web:
        exec_shell("bin/shutdown-web.sh")
    exec_shell("bin/start-web.sh")
Exemple #22
0
def active_executor(hosts=None, port=12321):
    """激活各节点你的Executor"""
    unactive_host_list = get_executor(active=0)
    if len(unactive_host_list) < 1:
        logger.info("所有节点都已经激活")
        return

    if hosts:
        if hosts not in unactive_host_list:
            logger.info("{0}节点没有部署成功,请确认是否部署成功和hostname".format(hosts))
            return
        url = "http://{0}:{1}/executor?action=activate".format(hosts, port)
        try:
            rs = requests.get(url)
            if str(rs.content, 'utf-8') == "{\"status\":\"success\"}":
                logger.info("Executor : {0} 激活成功".format(hosts))
            else:
                logger.error("激活Executor失败,请确认Executor正确启动")
        except Exception as e:
            logger.error("激活Executor失败,请确认正确的ip和port" + str(e))
    else:
        rs = get_executor(active=0, rstype="port")
        for i in rs.keys():
            active_executor(i, rs[i])