예제 #1
0
 def execute(self):
     """
     执行工作流
     :return: 返回执行id
     """
     logger.info('开始执行flow {flow}'.format(flow=self.flowId))
     url = '{azkaban_url}/executor?ajax=executeFlow&project={project}&flow={flow}' + self.disabled + self.flow_override
     url = url.format(
         azkaban_url=azkaban_url,
         project=self.prj_name,
         flow=self.flowId)
     # logger.info("执行url:" + url)
     flows_resp = requests.get(
         url,
         cookies=self.cookies_fetcher.get_cookies()
     )
     rs = str(flows_resp.content, 'utf-8')
     if flows_resp.status_code != 200 or 'error' in rs:
         logger.error(rs)
         raise Exception('执行{flow} 报错'.format(flow=self.flowId))
     else:
         # logger.info(rs)
         exec_id = json.loads(rs)['execid']
         logger.info(('开始执行{flow},execid是{exec_id}'.format(flow=self.flowId, exec_id=exec_id)))
         return FlowExecution(exec_id, self.cookies_fetcher)
예제 #2
0
파일: azkabans.py 프로젝트: Terr123123/dps
 def upload_zip(self, zip_file):
     """上传zip文件"""
     if self.download_zip():
         # 备份文件成功
         logger.info("备份{0}项目文件成功".format(self.name))
         files = {
             'file':
             (os.path.basename(zip_file), open(zip_file,
                                               'rb'), 'application/zip')
         }
         upload_data = {
             'project': self.name,
             'ajax': 'upload',
         }
         resp = requests.post(
             "{azkaban_url}/manager".format(azkaban_url=azkaban_url),
             data=upload_data,
             cookies=self.cookies_fetcher.get_cookies(),
             files=files)
         if resp.status_code != 200:
             logger.error(str(resp.content, 'utf-8'))
             raise Exception('上传ZIP文件失败:{name} '.format(name=zip_file))
         logger.info("上传ZIP文件完成:" + self.name)
     else:
         raise Exception('项目文件{name}:文件备份失败'.format(name=self.name))
예제 #3
0
 def replace_property(file_name, from_regex, to_str, append_on_not_exists=True):
     import tempfile
     tmpfile = tempfile.TemporaryFile(mode='w+')
     if os.path.exists(file_name):
         r_open = open(file_name, 'r')
         pattern = re.compile(r'' + from_regex)
         found = None
         for line in r_open:
             if pattern.search(line) and not line.strip().startswith('#'):
                 found = True
                 line = re.sub(from_regex, to_str, line)
             tmpfile.write(line)
         if not found and append_on_not_exists:
             tmpfile.write('\n' + to_str)
         r_open.close()
         tmpfile.seek(0)
         content = tmpfile.read()
         if os.path.exists(file_name):
             os.remove(file_name)
         w_open = open(file_name, 'w')
         w_open.write(content)
         w_open.close()
         tmpfile.close()
     else:
         logger.error("文件 %s not found" % file_name)
예제 #4
0
    def schedule(self, cron=None):
        """
        安排执行计划
        """
        flow_cron = get_project_info(self.prj_name, "cron." + str(self.flowId).strip())
        if flow_cron:
            cron = flow_cron  # prj_cron.get(self.prj_name + self.flowId, None)
        if check_cron(cron):
            data = {
                'session.id': self.cookies_fetcher.get_session_id(),
                'ajax': u'scheduleCronFlow',
                'projectName': self.prj_name,
                'flow': self.flowId,
                'failureAction': 'finishPossible',  # finishCurrent, cancelImmediatel
                'cronExpression': cron
            }

            response = requests.post(
                azkaban_url + '/schedule',
                data=data
            )
            rs = str(response.content, 'utf-8')
            if response.status_code != 200 or 'error' in rs:
                logger.info("秒 分 时 日 月 周 data: \n%s", data)
                logger.error(rs)
                logger.error("{0}设置执行计划失败".format(self.prj_name))
                return False
            else:
                logger.info("{0} flow:{1}设置执行计划成功".format(self.prj_name, self.flowId))
                return True
        else:
            # logger.error("{0}设置执行计划失败,请设置正确的cron时间格式.ERROR for:{1}".format(self.prj_name, cron))
            return False
예제 #5
0
파일: utils.py 프로젝트: Terr123123/dps
def rplc_cmd_with_prop(cmd, prj_nm, global_prop=None):
    """替换命令里面的参数用来测试"""
    if global_prop is None:
        global_prop = get_global_prop(prj_nm)
    cmd = cmd.replace("$", "")
    match = re.findall(r"{.*?}", cmd)  # r"\{.*?\}"
    for i in match:
        if "." in i:
            cmd = cmd.replace(i, i.replace(".", "_"))
    try:
        cmd = cmd.format(**global_prop)
        if check_file_exists:  # 检查文件是否存在
            tp = cmd.split(" ")
            for i in tp[0:2]:  # 仅检查前两个
                if "." in i:
                    file_nm, file_type = os.path.splitext(i)
                    if file_type in ['.sh', '.py', '.jar']:
                        if i.startswith("/") or i.startswith("\\"):
                            file_nm = i
                        else:
                            file_nm = os.path.join(get_prj_path(prj_nm, file_type="dir", search_path="conf"), i)
                        if not os.path.exists(file_nm):
                            if is_prod:
                                logger.error("项目文件不存在:" + file_nm)
                                return None
                            else:
                                logger.warning("测试环境,项目文件不存在:" + file_nm)
        return cmd
    except Exception as e:
        errors = str(e)
        if 'azkaban_' in errors:
            errors = errors.replace("_", ".")
        logger.warning("使用参数在全局变量中不存在:" + errors)
        return cmd
예제 #6
0
파일: azssh.py 프로젝트: Terr123123/dps
def exec_shell(shell, logs_print=True):
    logger.info("执行shell:" + shell)
    try:
        proc = subprocess.Popen(shell,
                                shell=True,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.STDOUT)  # subprocess.STDOUT
        pre_line = ''
        error_line = ''
        while proc.poll() is None:
            line = proc.stdout.readline().strip().decode('utf-8')
            if line and len(line) > 2:
                pre_line = line
                if 'error' in line.lower():
                    error_line = line
                if logs_print:
                    logger.info(line)
        # outs, errs = proc.communicate(timeout=15)
        # res是一个对象,需要读取res对象的stderr\strout\stdin的属性,才可获取值
        # 如果:err有值,即表示命令执行报错,即:stdout就为空
        if proc.returncode == 0:
            logger.info('shell 执行成功')
            return True
        else:
            raise Exception(error_line + '\n' + pre_line)
    except Exception as e:
        logger.error('shell执行失败:' + str(e))
        raise Exception("shell执行失败:" + str(e))
예제 #7
0
파일: utils.py 프로젝트: Terr123123/dps
def eval_str(strs):
    if strs:
        try:
            rs = eval(strs)
            return rs
        except Exception as e:
            logger.error("参数格式不对 -f/-i/-d \"['a','b']\" -p \"{'s':1}\"  :" + str(e))
            sys.exit(0)
    else:
        return None
예제 #8
0
 def fetch_flow(self):
     """获取所有工作流"""
     flows_resp = requests.get(
         '{azkaban_url}/manager?ajax=fetchprojectflows&project={project}'.format(azkaban_url=azkaban_url, project=self.name),
         cookies=self.cookies_fetcher.get_cookies())
     if flows_resp.status_code != 200:
         logger.error(str(flows_resp.content, 'utf-8'))
         raise Exception('Error happened when fetch flow from {project} in azkaban'.format(project=self.name))
     flows = json.loads(str(flows_resp.content, 'utf-8'))['flows']
     tp = []
     for flow in flows:
         tp.append(flow['flowId'])
     #     yield Flow(self.name, flow['flowId'], cookies_fetcher=self.cookies_fetcher)
     return tp
예제 #9
0
파일: cookies.py 프로젝트: Terr123123/dps
 def __init__(self, user=login_user, pwd=login_pwd):
     self.login_data = {
         'action': 'login',
         'username': user,
         'password': pwd
     }
     try:
         resp = requests.post(
             "{azkaban_url}".format(azkaban_url=azkaban_url),
             data=self.login_data)
         self.cookies = resp.cookies
         self.session_id = resp.json()['session.id']
         logger.debug("azkaban登录成功")
     except Exception as e:
         logger.error("登录错误:" + str(e))
예제 #10
0
 def schedule_flows(self, cron=None, flows=None):
     """将所有的工作流列入执行计划"""
     all_flows = self.fetch_flow()
     if cron is None:
         # try:
         cron = get_project_info(prj_nm=self.name, key_nm="cron." + self.name)  # prj_cron.get(self.name, None)
         if cron is None and get_project_info(prj_nm=self.name, key_nm="cron") is None:
             logger.error(self.name + "的 system.properties中没有配置定时器cron请配置")
             return
     if flows is None:
         flows = all_flows
     for f in all_flows:
         if f in flows:
             logger.info("设置项目{0}的工作流{1}执行计划".format(self.name, f))
             flow = Flow(self.name, f, self.cookies_fetcher)
             flow.schedule(cron)
예제 #11
0
def extract_json(response):
    """
    解析 JSON 来自  response.
    :param response: Request response object.

    """
    try:
        json = response.json()
    except ValueError as err:  # this should never happen
        logger.error('没有json格式体 :\n%s', str(response.text, 'utf-8'))
        raise err
    else:
        if 'error' in json:
            raise json['error']
        elif json.get('status') == 'error':
            raise json['message']
        else:
            return json
예제 #12
0
def check_cron(cron):
    """
    检查cron时间格式是否合规
    :param cron:
    :return:
    """
    if cron and " " in cron:
        try:
            pt = Cron(cron)
            logger.info("cron格式校验通过,下次执行时间: " + str(pt.get_next()))
            return True
        except Exception as e:
            logger.error(str(e))
            logger.error("cron不能为空或者格式不对,你输入的是{0}".format(cron))
            return False
    else:
        logger.warning("cron不能为空或者格式不对,你输入的是{0}".format(cron))
        return False
예제 #13
0
파일: azkabans.py 프로젝트: Terr123123/dps
 def unscheduled(self):
     """
     取消执行计划
     """
     schd_id = self.fetch_schedule()
     if schd_id:
         data = {
             u'session.id': self.cookies_fetcher.get_session_id(),
             u'action': u'removeSched',
             u'scheduleId': schd_id
         }
         response = requests.post(azkaban_url + '/schedule', data=data)
         if response.status_code != 200:
             logger.info("Request data: \n%s", data)
             logger.error(str(response.content, 'utf-8'))
             raise Exception("{0} 取消执行计划失败".format(self.prj_name))
         else:
             logger.info("{0} flow:{1} 执行计划id={2}已经被取消".format(
                 self.prj_name, self.flowId, schd_id))
             return True
예제 #14
0
파일: utils.py 프로젝트: Terr123123/dps
def get_project_info(prj_nm, key_nm):
    """
    获取项目的参数配置,例如dw.properties
    :param prj_nm
    :param key_nm 如a.b.c a
    :return
    """
    # prj_path = os.path.join(conf_path, prj_nm)
    # prop_file_sys = os.path.join(prj_path, "system.properties")
    prop_file_prj = get_prj_path(prj_nm, file_type="properties", search_path="conf")  # os.path.join(prj_path, prj_nm + ".properties")
    prop_file = None
    if os.path.exists(prop_file_prj):
        prop_file = prop_file_prj
    if prop_file:
        prop = Properties(prop_file)
        prop.get_properties()  # 获取数值
        return prop.get_value_by_key(key_nm)
    else:
        logger.error(prop_file_prj + "配置文件不存在,请加以配置")
        return None
예제 #15
0
def copy_local_param(prj_nm):
    """读取项目下的配置文件内容,并输出到正式项目system.properties"""
    rd_file = get_prj_path(prj_nm, file_type="properties", search_path="conf")
    # os.path.join(os.path.join(conf_path, prj_nm), prj_nm + ".properties")
    wt_file = get_prj_path(prj_nm, file_type="properties", search_path="temp")
    # os.path.join(os.path.join(temp_path, prj_nm), "system.properties")
    if not os.path.exists(rd_file):
        logger.error(rd_file + "文件不存在,请创建")
        return
    if not os.path.exists(wt_file):
        logger.error(wt_file + "参数文件没有生成")
        return
    with open(wt_file, 'a') as w:
        with open(rd_file, 'r') as r:
            line = r.readline()
            while line:
                if "=" in line.strip():
                    tp = line.strip().split("=")[0].strip()
                    if tp in job_config_keys or 'cron' in tp:
                        # 特殊参数 和job参数不输入到systemp.properties
                        pass
                    else:
                        w.write(line)
                line = r.readline()
예제 #16
0
def active_executor(hosts=None, port=12321):
    """激活各节点你的Executor"""
    unactive_host_list = get_executor(active=0)
    if len(unactive_host_list) < 1:
        logger.info("所有节点都已经激活")
        return

    if hosts:
        if hosts not in unactive_host_list:
            logger.info("{0}节点没有部署成功,请确认是否部署成功和hostname".format(hosts))
            return
        url = "http://{0}:{1}/executor?action=activate".format(hosts, port)
        try:
            rs = requests.get(url)
            if str(rs.content, 'utf-8') == "{\"status\":\"success\"}":
                logger.info("Executor : {0} 激活成功".format(hosts))
            else:
                logger.error("激活Executor失败,请确认Executor正确启动")
        except Exception as e:
            logger.error("激活Executor失败,请确认正确的ip和port" + str(e))
    else:
        rs = get_executor(active=0, rstype="port")
        for i in rs.keys():
            active_executor(i, rs[i])
예제 #17
0
def crt_job_file(prj_nm):
    """
    创建job文件
    :param prj_nm:
    :return:
    """
    prj_conf_path = os.path.join(conf_path, prj_nm)
    filepath = os.path.join(prj_conf_path, prj_nm + '.csv')
    if os.path.exists(filepath):
        df = pd.read_csv(filepath)
        prj_path = os.path.join(temp_path, prj_nm)
        df['command'] = df['command'].fillna(
            "echo 'Exec ${azkaban.job.id} of ${azkaban.flow.flowid} with batch_id:${batch_id} etl_dt:${etl_dt} batch_dt:${batch_dt}'"
        )
        if os.path.exists(prj_path):
            import shutil
            shutil.rmtree(prj_path)  # 清空项目对应的临时目录,一般是temp目录下
        os.makedirs(prj_path)
        df = df.fillna('')
        df['job_nm'] = df['job_nm'].apply(lambda x: x.strip())
        jobs = set(df['job_nm'])  # 需要配置的job set
        df['dependencies'] = df['dependencies'].apply(
            lambda x: x.strip().replace(",", ",").replace(" ", ""))
        df['dependencies'].apply(
            lambda x: check_depend_if_in_flow(x, jobs))  # 检查依赖job是否在配置中
        for i in df.to_dict(orient='records'):
            job_path = os.path.join(prj_path, i['job_nm'] + '.job')
            with open(job_path, 'w') as job:
                job.write("type = command\n")
                if len(i['dependencies']) > 1:
                    job.write("dependencies = " +
                              i['dependencies'].strip().replace(
                                  ",", ",").replace(" ", "") + "\n")
                job.write("retries = {0}\n".format(
                    get_project_info(prj_nm, "retries") or 3))
                job.write("retry.backoff = {0}\n".format(
                    get_project_info(prj_nm, "retry.backoff")
                    or 60000))  # 重试的间隔(毫秒)
                if get_project_info(prj_nm, "failure.emails"):
                    # 失败邮件通知 如果项目配置文件有配置
                    job.write("failure.emails = {0}\n".format(
                        get_project_info(prj_nm, "failure.emails")))
                if get_project_info(prj_nm, "success.emails"):
                    # 失败邮件通知 如果项目配置文件有配置
                    job.write("success.emails = {0}\n".format(
                        get_project_info(prj_nm, "success.emails")))
                if get_project_info(prj_nm, "working.dir"):
                    # 失败邮件通知 如果项目配置文件有配置
                    job.write("working.dir = {0}\n".format(
                        get_project_info(prj_nm, "working.dir")))
                # command = "command = {0} {1} {2} {3}".format(i['command'].strip(), i['arg1'], i['arg2'], i['arg3'])
                command = "command = {0}".format(i['command'].strip())
                check_cmd = rplc_cmd_with_prop(i['command'].strip(), prj_nm)
                if check_cmd:
                    job.write(command.strip())
                else:
                    logger.error("{0}项目job文件生中断,命令解析不通过".format(prj_nm))
                    raise Exception("{0}项目job文件生中断,命令解析不通过".format(prj_nm))
        # 生成end_flow
        depend_job_set = set()  # 依赖的job set 集

        def add_depend_job_set(strs):
            """计算被依赖的job"""
            if strs:
                tp = strs.split(",")
                depend_job_set.update(tp)  # 更新 job set

        df[df['dependencies'].notna()]['dependencies'].apply(
            add_depend_job_set)
        not_depended_jobs = ','.join(jobs - depend_job_set)  # 算出不被依赖的job
        job_path = os.path.join(prj_path, prj_nm + "_end_flow.job")
        with open(job_path, 'w') as job:  # 生成end_flow_prj_nm.job 文件
            job.write("type = command\n")
            job.write("dependencies = " + not_depended_jobs + "\n")
            job.write("command = echo 'all flow end' \n")
        # end_flow job生成完成
        crt_sys_prop(prj_nm)  # 生成全局系统参数文件  system.properies
        copy_local_param(prj_nm)  # 把项目参数(例如dw.properies)输出到全局系统参数文件
        # copy_dir(prj_conf_path, os.path.join(prj_path, 'scripts'), ignore_file_type=['job', 'flow', 'project'])
        # copy_dir(prj_conf_path, prj_path, only_file_type=['job', 'flow', 'project'])
        copy_dir(prj_conf_path, prj_path, ignore_file_type=['properties'])
        zip_path = prj_path + '.zip'
        zip_dir(prj_path, zip_path)  # 将目标目录压缩成zip文件
        # logger.info("{0}项目压缩完成,文件路径是:{1}".format(prj_nm, zip_path))
        return zip_path  # 返回zip文件路径
    else:
        logger.error("文件不存在:" + filepath)
        return None