def connectors_status_check(): SERVER_LIST = ALI_SERVER_LIST for server_ip, server_port in SERVER_LIST: url = CONNECTORS_URL % (server_ip, server_port) r = requests.get(url) connectors = r.json() for connector in connectors: c_url = STATUS_URL % (server_ip, server_port, connector) logging.info('connector url %s', c_url) c_r = requests.get(c_url) content = c_r.json() tasks = content['tasks'] for task in tasks: if task['state'] != 'RUNNING': #钉钉报警 dingding_alet = DingdingAlert( 'https://oapi.dingtalk.com/robot/send?access_token=928e66bef8d88edc89fe0f0ddd52bfa4dd28bd4b1d24ab4626c804df8878bb48' ) msg = """ DW kafka connectors状态异常,请检查。 {connector}, task_len:{task_len}, error_task_id:{task_id}, error_msg:{error_msg} """.format(connector=connector, task_len=len(tasks), task_id=task['id'], error_msg=task['trace']) dingding_alet.send(msg) # 重启任务 restart_url = TASK_RESTART_URL % (server_ip, server_port, connector, task['id']) requests.post(restart_url) logging.info( 'ip %s, port %s, connector %s, task id %d restart. url %s', server_ip, server_port, connector, task['id'], restart_url)
def __init__(self, v_is_open, v_ds, v_db_name, v_table_name, v_data_hdfs_path, v_country_partition="true", v_file_type="true", v_hour=None): #self.comwx = ComwxApi('wwd26d45f97ea74ad2', 'BLE_v25zCmnZaFUgum93j3zVBDK-DjtRkLisI_Wns4g', '1000011') self.dingding_alert = DingdingAlert( 'https://oapi.dingtalk.com/robot/send?access_token=928e66bef8d88edc89fe0f0ddd52bfa4dd28bd4b1d24ab4626c804df8878bb48' ) self.table_name = v_table_name self.hdfs_data_dir_str = "" self.data_hdfs_path = v_data_hdfs_path self.db_name = v_db_name self.ds = v_ds self.country_partition = v_country_partition self.file_type = v_file_type self.hour = v_hour self.is_open = v_is_open self.v_del_flag = 0 self.v_country_code_map = None self.country_code_list = "" self.get_country_code()
def __init__(self): self.dingding_alet = DingdingAlert( 'https://oapi.dingtalk.com/robot/send?access_token=928e66bef8d88edc89fe0f0ddd52bfa4dd28bd4b1d24ab4626c804df8878bb48' ) self.table_name = "" self.hdfs_data_dir_str = "" self.db_name = "" self.ds = ""
def __init__(self): self.hive_cursor = get_hive_cursor() self.dingding_alert = DingdingAlert( 'https://oapi.dingtalk.com/robot/send?access_token=928e66bef8d88edc89fe0f0ddd52bfa4dd28bd4b1d24ab4626c804df8878bb48' ) #self.dingding_alert = DingdingAlert_dev('https://oapi.dingtalk.com/robot/send?access_token=c08440c8e569bb38ec358833f9d577b7638af5aaefbd55e3fd748b798fecc4d4') self.alert_url = "http://8.208.14.165:8080/admin/airflow/tree?dag_id=" self.owner_name = None self.hdfs_dir_name = None
def __init__(self,args): #正式环境 #self.dingding_alert = DingdingAlert('https://oapi.dingtalk.com/robot/send?access_token=928e66bef8d88edc89fe0f0ddd52bfa4dd28bd4b1d24ab4626c804df8878bb48') #测试环境 self.dingding_alert = DingdingAlert('https://oapi.dingtalk.com/robot/send?access_token=c08440c8e569bb38ec358833f9d577b7638af5aaefbd55e3fd748b798fecc4d4') self.alert_url="http://8.208.14.165:8080/admin/airflow/tree?dag_id=" self.items=args self.dag=None self.v_dag_id=None self.v_owner_name=None self.v_table_name=None self.hdfs_data_dir_str=None self.v_data_oss_path=None self.v_db_name=None self.v_is_country_partition=None self.v_is_result_force_exist=None self.v_utc_ds=None self.v_utc_hour=None self.v_is_countries_online=None self.v_del_flag=0 self.v_frame_type=None self.v_country_code_map=None self.country_code_list=None self.v_local_date=None self.v_local_hour=None self.v_time_offset=0 self.v_business_key=None self.v_execute_time_offset=0 self.get_mian_argument() self.get_country_code()
def __init__(self, ds, v_info): self.dingding_alert = DingdingAlert( 'https://oapi.dingtalk.com/robot/send?access_token=928e66bef8d88edc89fe0f0ddd52bfa4dd28bd4b1d24ab4626c804df8878bb48' ) self.v_info = v_info self.v_data_dir = "" self.start_timeThour = "" self.end_dateThour = "" self.partition = "" self.less_res = [] self.greater_res = [] self.log_unite_dist = {} self.start_time = "" self.end_time = ""
def on_success_callback(context): # 定时最大执行延时12小时 max_delayed_time = 43200 # 正常执行时间 next_execution_dt = pendulum.parse(str(context['next_execution_date'])) next_execution_ts = next_execution_dt.int_timestamp # 当前时间 now_dt = pendulum.parse('now') now_ts = now_dt.int_timestamp time_diff = now_ts - next_execution_ts if time_diff >= max_delayed_time: # 钉钉报警 dingding_alert = DingdingAlert( 'https://oapi.dingtalk.com/robot/send?access_token=928e66bef8d88edc89fe0f0ddd52bfa4dd28bd4b1d24ab4626c804df8878bb48' ) task = "{dag}.{task}".format(dag=context['task_instance'].dag_id, task=context['task_instance'].task_id) msg = "任务回溯操作{task},计划执行时间:{ne},当前执行时间:{nt}".format( task=task, ne=next_execution_dt, nt=now_dt) dingding_alert.send('DW {msg} 产出超时'.format(msg=msg))
class TaskTouchzSuccess(object): def __init__(self): self.dingding_alet = DingdingAlert( 'https://oapi.dingtalk.com/robot/send?access_token=928e66bef8d88edc89fe0f0ddd52bfa4dd28bd4b1d24ab4626c804df8878bb48' ) self.table_name = "" self.hdfs_data_dir_str = "" self.db_name = "" self.ds = "" def set_touchz_success(self, tables): try: for item in tables: self.table_name = item.get('table', None) self.hdfs_data_dir_str = item.get('hdfs_path', None) #判断数据文件是否为0 line_str = "$HADOOP_HOME/bin/hadoop fs -du -s {hdfs_data_dir} | tail -1 | awk \'{{print $1}}\'".format( hdfs_data_dir=self.hdfs_data_dir_str) logging.info(line_str) with os.popen(line_str) as p: line_num = p.read() #数据为0,发微信报警通知 if line_num[0] == str(0): self.dingding_alet.send('DW调度系统任务 {jobname} 数据产出异常'.format( jobname=self.table_name)) logging.info("Error : {hdfs_data_dir} is empty".format( hdfs_data_dir=self.hdfs_data_dir_str)) sys.exit(1) else: succ_str = "$HADOOP_HOME/bin/hadoop fs -touchz {hdfs_data_dir}/_SUCCESS".format( hdfs_data_dir=self.hdfs_data_dir_str) logging.info(succ_str) os.popen(succ_str) time.sleep(10) logging.info("DATA EXPORT Successed ......") except Exception as e: logging.info(e) sys.exit(1) def get_country_code(self): cursor = get_hive_cursor() #获取二位国家码 get_sql = ''' select concat_ws(',',collect_set(country_code)) as country_code from {db}.{table} WHERE dt='{pt}' '''.format(pt=self.ds, table=self.table_name, db=self.db_name) cursor.execute(get_sql) res = cursor.fetchone() if len(res[0]) > 1: country_code_list = res[0] logging.info('Executing 二位国家码: %s', country_code_list) else: country_code_list = "nal" logging.info('Executing 二位国家码为空,赋予默认值 %s', country_code_list) return country_code_list def check_success_exist(self): """ 验证_SUCCESS是否执行成功 """ time.sleep(15) print("debug-> check_success_exist") command = "hadoop dfs -ls {hdfs_data_dir}/_SUCCESS>/dev/null 2>/dev/null && echo 1 || echo 0".format( hdfs_data_dir=self.hdfs_data_dir_str) logging.info(command) out = os.popen(command, 'r') res = out.readlines() res = 0 if res is None else res[0].lower().strip() out.close() #判断 _SUCCESS 文件是否生成 if res == '' or res == 'None' or res[0] == '0': logging.info("_SUCCESS 验证失败") sys.exit(1) else: logging.info("_SUCCESS 验证成功") def delete_exist_partition(self): """ 删除已有分区,保证数据唯一性 """ time.sleep(10) print("debug-> delete_exist_partition") #删除语句 del_command = "hadoop dfs -rm -r {hdfs_data_dir}".format( hdfs_data_dir=self.hdfs_data_dir_str) logging.info(del_command) os.popen(del_command, 'r') time.sleep(10) #验证删除分区是否存在 check_command = "hadoop dfs -ls {hdfs_data_dir}>/dev/null 2>/dev/null && echo 1 || echo 0".format( hdfs_data_dir=self.hdfs_data_dir_str) out = os.popen(check_command, 'r') res = out.readlines() res = 0 if res is None else res[0].lower().strip() out.close() print(res) #判断 删除分区是否存在 if res == '' or res == 'None' or res[0] == '0': logging.info("目录删除成功") else: #目录存在 logging.info("目录删除失败:" + " " + "{hdfs_data_dir}".format( hdfs_data_dir=self.hdfs_data_dir_str)) sys.exit(1) def data_not_file_type_touchz(self): """ 非空文件 touchz _SUCCESS """ try: print("debug-> data_not_file_type_touchz") mkdir_str = "$HADOOP_HOME/bin/hadoop fs -mkdir -p {hdfs_data_dir}".format( hdfs_data_dir=self.hdfs_data_dir_str) logging.info(mkdir_str) os.popen(mkdir_str) time.sleep(10) succ_str = "$HADOOP_HOME/bin/hadoop fs -touchz {hdfs_data_dir}/_SUCCESS".format( hdfs_data_dir=self.hdfs_data_dir_str) logging.info(succ_str) os.popen(succ_str) logging.info("DATA EXPORT Successed ......") self.check_success_exist() except Exception as e: logging.info(e) sys.exit(1) def data_file_type_touchz(self): """ 空文件 touchz _SUCCESS """ try: print("debug-> data_file_type_touchz") #判断数据文件是否为0 line_str = "$HADOOP_HOME/bin/hadoop fs -du -s {hdfs_data_dir} | tail -1 | awk \'{{print $1}}\'".format( hdfs_data_dir=self.hdfs_data_dir_str) logging.info(line_str) with os.popen(line_str) as p: line_num = p.read() #数据为0,发微信报警通知 if line_num[0] == str(0): self.dingding_alet.send('DW调度系统任务 {jobname} 数据产出异常'.format( jobname=self.table_name)) logging.info("Error : {hdfs_data_dir} is empty".format( hdfs_data_dir=self.hdfs_data_dir_str)) sys.exit(1) else: time.sleep(5) succ_str = "hadoop fs -touchz {hdfs_data_dir}/_SUCCESS".format( hdfs_data_dir=self.hdfs_data_dir_str) logging.info(succ_str) os.popen(succ_str) logging.info("DATA EXPORT Successed ......") self.check_success_exist() except Exception as e: logging.info(e) sys.exit(1) def del_path(self, ds, db_name, table_name, data_hdfs_path, country_partition="true", file_type="true", hour=None): """ country_partition:是否有国家分区 file_type:是否空文件也生成 success """ try: self.db_name = db_name self.ds = ds self.table_name = table_name # 没有国家分区并且每个目录必须有数据才能生成 Success if country_partition.lower() == "false" and file_type.lower( ) == "true": if hour is None: #输出不同国家的数据路径 self.hdfs_data_dir_str = data_hdfs_path + "/dt=" + self.ds else: #输出不同国家的数据路径 self.hdfs_data_dir_str = data_hdfs_path + "/dt=" + self.ds + "/hour=" + hour self.delete_exist_partition() return # 没有国家分区并且数据为空也生成 Success if country_partition.lower() == "false" and file_type.lower( ) == "false": if hour is None: #输出不同国家的数据路径 self.hdfs_data_dir_str = data_hdfs_path + "/dt=" + self.ds else: #输出不同国家的数据路径 self.hdfs_data_dir_str = data_hdfs_path + "/dt=" + self.ds + "/hour=" + hour self.delete_exist_partition() return #获取国家列表 country_code_list = self.get_country_code() for country_code_word in country_code_list.split(","): #有国家分区并且每个目录必须有数据才能生成 Success if country_partition.lower() == "true" and file_type.lower( ) == "true": if hour is None: #输出不同国家的数据路径 self.hdfs_data_dir_str = data_hdfs_path + "/country_code=" + country_code_word + "/dt=" + self.ds else: #输出不同国家的数据路径 self.hdfs_data_dir_str = data_hdfs_path + "/country_code=" + country_code_word + "/dt=" + self.ds + "/hour=" + hour self.delete_exist_partition() #有国家分区并且数据为空也生成 Success if country_partition.lower() == "true" and file_type.lower( ) == "false": if hour is None: #输出不同国家的数据路径 self.hdfs_data_dir_str = data_hdfs_path + "/country_code=" + country_code_word + "/dt=" + self.ds else: #输出不同国家的数据路径 self.hdfs_data_dir_str = data_hdfs_path + "/country_code=" + country_code_word + "/dt=" + self.ds + "/hour=" + hour self.delete_exist_partition() except Exception as e: logging.info(e) sys.exit(1) def countries_touchz_success(self, ds, db_name, table_name, data_hdfs_path, country_partition="true", file_type="true", hour=None): """ country_partition:是否有国家分区 file_type:是否空文件也生成 success """ try: self.db_name = db_name self.ds = ds self.table_name = table_name # 没有国家分区并且每个目录必须有数据才能生成 Success if country_partition.lower() == "false" and file_type.lower( ) == "true": if hour is None: #输出不同国家的数据路径 self.hdfs_data_dir_str = data_hdfs_path + "/dt=" + self.ds else: #输出不同国家的数据路径 self.hdfs_data_dir_str = data_hdfs_path + "/dt=" + self.ds + "/hour=" + hour self.data_file_type_touchz() return # 没有国家分区并且数据为空也生成 Success if country_partition.lower() == "false" and file_type.lower( ) == "false": if hour is None: #输出不同国家的数据路径 self.hdfs_data_dir_str = data_hdfs_path + "/dt=" + self.ds else: #输出不同国家的数据路径 self.hdfs_data_dir_str = data_hdfs_path + "/dt=" + self.ds + "/hour=" + hour self.data_not_file_type_touchz() return #获取国家列表 country_code_list = self.get_country_code() for country_code_word in country_code_list.split(","): #有国家分区并且每个目录必须有数据才能生成 Success if country_partition.lower() == "true" and file_type.lower( ) == "true": if hour is None: #输出不同国家的数据路径 self.hdfs_data_dir_str = data_hdfs_path + "/country_code=" + country_code_word + "/dt=" + self.ds else: #输出不同国家的数据路径 self.hdfs_data_dir_str = data_hdfs_path + "/country_code=" + country_code_word + "/dt=" + self.ds + "/hour=" + hour self.data_file_type_touchz() #有国家分区并且数据为空也生成 Success if country_partition.lower() == "true" and file_type.lower( ) == "false": if hour is None: #输出不同国家的数据路径 self.hdfs_data_dir_str = data_hdfs_path + "/country_code=" + country_code_word + "/dt=" + self.ds else: #输出不同国家的数据路径 self.hdfs_data_dir_str = data_hdfs_path + "/country_code=" + country_code_word + "/dt=" + self.ds + "/hour=" + hour self.data_not_file_type_touchz() except Exception as e: logging.info(e) sys.exit(1)
class CountriesAppFrame(object): def __init__(self,args): #正式环境 #self.dingding_alert = DingdingAlert('https://oapi.dingtalk.com/robot/send?access_token=928e66bef8d88edc89fe0f0ddd52bfa4dd28bd4b1d24ab4626c804df8878bb48') #测试环境 self.dingding_alert = DingdingAlert('https://oapi.dingtalk.com/robot/send?access_token=c08440c8e569bb38ec358833f9d577b7638af5aaefbd55e3fd748b798fecc4d4') self.alert_url="http://8.208.14.165:8080/admin/airflow/tree?dag_id=" self.items=args self.dag=None self.v_dag_id=None self.v_owner_name=None self.v_table_name=None self.hdfs_data_dir_str=None self.v_data_oss_path=None self.v_db_name=None self.v_is_country_partition=None self.v_is_result_force_exist=None self.v_utc_ds=None self.v_utc_hour=None self.v_is_countries_online=None self.v_del_flag=0 self.v_frame_type=None self.v_country_code_map=None self.country_code_list=None self.v_local_date=None self.v_local_hour=None self.v_time_offset=0 self.v_business_key=None self.v_execute_time_offset=0 self.get_mian_argument() self.get_country_code() def get_mian_argument(self): """ 获取主类参数 """ for item in self.items: #airflow dag self.dag=item.get('dag', None) #是否开通多国家业务(默认true) self.v_is_countries_online=item.get('is_countries_online', "true") #数据库名称 self.v_db_name=item.get('db_name', None) #表名称 self.v_table_name=item.get('table_name', None) #oss 路径 self.v_data_oss_path=item.get('data_oss_path', None) #是否有国家分区(默认true) self.v_is_country_partition=item.get('is_country_partition', "true") #数据文件是否强制存在(默认true) self.v_is_result_force_exist=item.get('is_result_force_exist', "true") #脚本执行时间(%Y-%m-%d %H:%M:%S) self.v_execute_time=item.get('execute_time', None) #脚本执行UTC日期 self.v_utc_ds=(datetime.strptime(self.v_execute_time,'%Y-%m-%d %H:%M:%S')).strftime('%Y-%m-%d').strip() #脚本执行UTC小时 self.v_utc_hour=(datetime.strptime(self.v_execute_time,'%Y-%m-%d %H:%M:%S')).strftime('%H').strip() #是否开启小时级任务(默认false) self.v_is_hour_task=item.get('is_hour_task', "false") #框架类型(utc[默认],local[使用本地时间产出]) self.v_frame_type=item.get('frame_type', "utc") #是否开启时间前后偏移(影响success 文件) self.v_is_offset=item.get('is_offset', "false") #执行时间偏移值 -1、0、1 产出的时间偏移量,用于产出前后小时分区 self.v_execute_time_offset=int(item.get('execute_time_offset', 0)) #产品线名称 self.v_business_key=item.get('business_key',None) if self.dag: self.v_dag_id=self.dag.dag_id self.v_owner_name=self.dag.default_args.get("owner") else: self.v_owner_name="Null" self.v_dag_id = self.v_table_name def get_local_date_time(self,country_code): """ 通过函数GetLocalTime ,安装国家码获取时区对应的本地时间 """ v_utc_time='{v_sys_utc}'.format(v_sys_utc=self.v_utc_ds+" "+self.v_utc_hour) #国家对应的本地日期 self.v_local_date=GetLocalTime(self.v_business_key,'{v_utc_time}'.format(v_utc_time=v_utc_time),country_code,self.v_time_offset)["date"] #国家对应的本地小时 self.v_local_hour=GetLocalTime(self.v_business_key,'{v_utc_time}'.format(v_utc_time=v_utc_time),country_code,self.v_time_offset)["hour"] def get_country_code(self): """ 获取当前表中所有二位国家码 """ if self.v_business_key is None: logging.info("Error: Business Key Is None (Quit ... ...)") sys.exit(1) else: self.v_business_key=self.v_business_key.lower().strip() business_line_config_file =self.v_business_key+"_country_code_tag" print(business_line_config_file) if self.v_is_countries_online.lower()=="false": self.country_code_list="nal" if self.v_is_countries_online.lower()=="true": self.v_country_code_map = eval(Variable.get(business_line_config_file)) s=list(self.v_country_code_map.keys()) self.country_code_list=",".join(s) def check_success_exist(self): """ 验证_SUCCESS是否执行成功 """ time.sleep(15) logging.info("Check_Success_Exist") command="hadoop fs -ls {hdfs_data_dir}/_SUCCESS>/dev/null 2>/dev/null && echo 1 || echo 0".format(hdfs_data_dir=self.hdfs_data_dir_str) #logging.info(command) out = os.popen(command, 'r') res = out.readlines() res = 0 if res is None else res[0].lower().strip() out.close() #判断 _SUCCESS 文件是否生成 if res== '' or res == 'None' or res[0] == '0': logging.info("_SUCCESS 验证失败") sys.exit(1) else: logging.info("_SUCCESS 验证成功") def delete_exist_partition(self): """ 删除已有分区,保证数据唯一性 """ time.sleep(10) logging.info("Delete_Exist_Partition") #删除语句 del_command="hadoop fs -rm -r {hdfs_data_dir}".format(hdfs_data_dir=self.hdfs_data_dir_str) logging.info(del_command) os.popen(del_command, 'r') time.sleep(10) #验证删除分区是否存在 check_command="hadoop fs -ls {hdfs_data_dir}>/dev/null 2>/dev/null && echo 1 || echo 0".format(hdfs_data_dir=self.hdfs_data_dir_str) out = os.popen(check_command, 'r') res = out.readlines() res = 0 if res is None else res[0].lower().strip() out.close() print(res) #判断 删除分区是否存在 if res== '' or res == 'None' or res[0] == '0': logging.info("目录删除成功") else: #目录存在 logging.info("目录删除失败:"+" "+"{hdfs_data_dir}".format(hdfs_data_dir=self.hdfs_data_dir_str)) def data_not_file_type_touchz(self): """ 非空文件 touchz _SUCCESS """ try: logging.info("Data_Not_File_Type_Touchz") mkdir_str="$HADOOP_HOME/bin/hadoop fs -mkdir -p {hdfs_data_dir}".format(hdfs_data_dir=self.hdfs_data_dir_str) logging.info(mkdir_str) os.popen(mkdir_str) time.sleep(10) succ_str="$HADOOP_HOME/bin/hadoop fs -touchz {hdfs_data_dir}/_SUCCESS".format(hdfs_data_dir=self.hdfs_data_dir_str) logging.info(succ_str) os.popen(succ_str) logging.info("DATA EXPORT Successed ......") self.check_success_exist() except Exception as e: #self.dingding_alert.send('DW调度系统任务 {jobname} 数据产出异常'.format(jobname=self.v_table_name)) logging.info(e) sys.exit(1) def data_file_type_touchz(self): """ 创建 _SUCCESS """ try: logging.info("Data_File_Type_Touchz") #判断数据文件是否为0 line_str="$HADOOP_HOME/bin/hadoop fs -du -s {hdfs_data_dir} | tail -1 | awk \'{{print $1}}\'".format(hdfs_data_dir=self.hdfs_data_dir_str) logging.info(line_str) with os.popen(line_str) as p: line_num=p.read() #数据为0,发钉钉报警通知 if line_num[0] == str(0): #报警信息 self.dingding_monitor() logging.info("Error : {hdfs_data_dir} is empty".format(hdfs_data_dir=self.hdfs_data_dir_str)) sys.exit(1) else: time.sleep(5) succ_str="hadoop fs -touchz {hdfs_data_dir}/_SUCCESS".format(hdfs_data_dir=self.hdfs_data_dir_str) logging.info(succ_str) os.popen(succ_str) logging.info("DATA EXPORT Successed ......") self.check_success_exist() except Exception as e: #self.dingding_alert.send('DW调度系统任务 {jobname} 数据产出异常'.format(jobname=self.v_table_name)) logging.info(e) sys.exit(1) def delete_partition(self): """ 删除分区调用函数 """ self.v_del_flag=1 #没有国家分区 if self.v_is_country_partition.lower()=="false": self.not_exist_country_code_data_dir(self.delete_exist_partition) #有国家分区 if self.v_is_country_partition.lower()=="true": self.exist_country_code_data_dir(self.delete_exist_partition) self.v_del_flag=0 def touchz_success(self): """ 生成 Success 函数 """ #执行时间偏移值为负数时 if self.v_execute_time_offset<0: v_flag=0 else: v_flag=1 # 同时满足 开启时间偏移、小时任务、执行时间偏移值 if self.v_is_offset=="true" and self.v_is_hour_task=="true" and self.v_execute_time_offset!=0: #执行次数(1+执行时间的偏移量) exe_num=1+abs(self.v_execute_time_offset) for i in range(0,exe_num): if v_flag==0 and i>0: self.v_time_offset=int(-i) else: self.v_time_offset=i self.assign_touchz_success_class() else: self.assign_touchz_success_class() def assign_touchz_success_class(self): """ 生成 Success 函数 """ # 没有国家分区并且每个目录必须有数据才能生成 Success if self.v_is_country_partition.lower()=="false" and self.v_is_result_force_exist.lower()=="true": self.not_exist_country_code_data_dir(self.data_file_type_touchz) # 没有国家分区并且数据为空也生成 Success if self.v_is_country_partition.lower()=="false" and self.v_is_result_force_exist.lower()=="false": self.not_exist_country_code_data_dir(self.data_not_file_type_touchz) #有国家分区并且每个目录必须有数据才能生成 Success if self.v_is_country_partition.lower()=="true" and self.v_is_result_force_exist.lower()=="true": self.exist_country_code_data_dir(self.data_file_type_touchz) #有国家分区并且数据为空也生成 Success if self.v_is_country_partition.lower()=="true" and self.v_is_result_force_exist.lower()=="false": self.exist_country_code_data_dir(self.data_not_file_type_touchz) #没有国家码分区 def not_exist_country_code_data_dir(self,object_task): """ country_partition:是否有国家分区 file_type:是否空文件也生成 success """ try: #没有小时级分区 if self.v_is_hour_task.lower()=="false": #输出不同国家的数据路径(没有小时级分区) self.hdfs_data_dir_str=self.v_data_oss_path+"/dt="+self.v_utc_ds else: #输出不同国家的数据路径(有小时级分区) self.hdfs_data_dir_str=self.v_data_oss_path+"/dt="+self.v_utc_ds+"/hour="+self.v_utc_hour # 没有国家分区并且每个目录必须有数据才能生成 Success if self.v_is_country_partition.lower()=="false" and self.v_is_result_force_exist.lower()=="true": object_task() return # 没有国家分区并且数据为空也生成 Success if self.v_is_country_partition.lower()=="false" and self.v_is_result_force_exist.lower()=="false": object_task() return except Exception as e: #self.dingding_alert.send('DW调度系统任务 {jobname} 数据产出异常'.format(jobname=table_name)) logging.info(e) sys.exit(1) def exist_country_code_data_dir(self,object_task): """ country_partition:是否有国家分区 file_type:是否空文件也生成 success """ try: #获取国家列表 for country_code_word in self.country_code_list.split(","): if country_code_word.lower()=='nal': country_code_word=country_code_word.lower() else: country_code_word=country_code_word.upper() #没有小时级分区 if self.v_is_hour_task.lower()=="false": #输出不同国家的数据路径(没有小时级分区) self.hdfs_data_dir_str=self.v_data_oss_path+"/country_code="+country_code_word+"/dt="+self.v_utc_ds #(多国家)UTC 小时分区 if self.v_is_hour_task.lower()=="true" and self.v_frame_type.lower()=="utc": #输出不同国家(UTC时间)的数据路径(UTC 小时级分区) self.hdfs_data_dir_str=self.v_data_oss_path+"/country_code="+country_code_word+"/dt="+self.v_utc_ds+"/hour="+self.v_utc_hour #(多国家)Local 小时分区 if self.v_is_hour_task.lower()=="true" and self.v_frame_type.lower()=="local": #获取国家对应的本地日期和小时 self.v_local_date、self.v_local_hour self.get_local_date_time(country_code_word) #输出不同国家(本地时间)的数据路径(Local 小时级分区) self.hdfs_data_dir_str=self.v_data_oss_path+"/country_code="+country_code_word+"/dt="+self.v_local_date+"/hour="+self.v_local_hour #没有开通多国家业务(国家码默认nal) if self.v_is_country_partition.lower()=="true" and self.v_is_countries_online.lower()=="false": #必须有数据才可以生成Success 文件 if self.v_is_result_force_exist.lower()=="true": object_task() #数据为空也生成 Success 文件 if self.v_is_result_force_exist.lower()=="false": object_task() #开通多国家业务 if self.v_is_country_partition.lower()=="true" and self.v_is_countries_online.lower()=="true": #刚刚开国的国家(按照false处理) if self.v_country_code_map[country_code_word].lower()=="new": #删除多国家分区使用 if self.v_del_flag==1: object_task() continue else: self.data_not_file_type_touchz() continue #业务开国很久,必须有数据才可以生成Success 文件 if self.v_is_result_force_exist.lower()=="true": #删除多国家分区使用 if self.v_del_flag==1: object_task() continue #在必须有数据条件下:国家是nal时,数据可以为空 if country_code_word=="nal": self.data_not_file_type_touchz() else: object_task() #数据为空也生成 Success 文件 if self.v_is_result_force_exist.lower()=="false": object_task() except Exception as e: #self.dingding_alert.send('DW调度系统任务 {jobname} 数据产出异常'.format(jobname=table_name)) logging.info(e) sys.exit(1) def alter_partition(self): """ alter partition 语句(包含单国家、多国家) """ alter_str="" # 没有国家分区 && 小时参数为None if self.v_is_country_partition.lower()=="false" and self.v_is_hour_task.lower()=="false": v_par_str="dt='{ds}'".format(ds=self.v_utc_ds) alter_str="alter table {db}.{table_name} drop partition({v_par});\n alter table {db}.{table_name} add partition({v_par});".format(v_par=v_par_str,table_name=self.v_table_name,db=self.v_db_name) return alter_str # 没有国家分区 && 小时参数不为None if self.v_is_country_partition.lower()=="false" and self.v_is_hour_task.lower()=="true": v_par_str="dt='{ds}',hour='{hour}'".format(ds=self.v_utc_ds,hour=self.v_utc_hour) alter_str="alter table {db}.{table_name} drop partition({v_par});\n alter table {db}.{table_name} add partition({v_par});".format(v_par=v_par_str,table_name=self.v_table_name,db=self.v_db_name) return alter_str #有国家分区 for country_code_word in self.country_code_list.split(","): # 有国家分区 && 小时参数为None if self.v_is_country_partition.lower()=="true" and self.v_is_hour_task.lower()=="false": v_par_str="country_code='{country_code}',dt='{ds}'".format(ds=self.v_utc_ds,country_code=country_code_word) alter_str=alter_str+"\n"+"alter table {db}.{table_name} drop partition({v_par});\n alter table {db}.{table_name} add partition({v_par});".format(v_par=v_par_str,table_name=self.v_table_name,db=self.v_db_name) # 多国家(utc)分区 && 小时参数不为None && utc分区 if self.v_is_country_partition.lower()=="true" and self.v_is_hour_task.lower()=="true" and self.v_frame_type.lower()=="utc": v_par_str="country_code='{country_code}',dt='{ds}',hour='{hour}'".format(ds=self.v_utc_ds,hour=self.v_utc_hour,country_code=country_code_word) alter_str=alter_str+"\n"+"alter table {db}.{table_name} drop partition({v_par});\n alter table {db}.{table_name} add partition({v_par});".format(v_par=v_par_str,table_name=self.v_table_name,db=self.v_db_name) # 多国家(本地时间)分区 && 小时参数不为None && 本地时间 if self.v_is_country_partition.lower()=="true" and self.v_is_hour_task.lower()=="true" and self.v_frame_type.lower()=="local": #获取国家对应的本地日期和小时 self.v_local_date、self.v_local_hour self.get_local_date_time(country_code_word) #表分区,时间是本地时间 v_par_str="country_code='{country_code}',dt='{ds}',hour='{hour}'".format(ds=self.v_local_date,hour=self.v_local_hour,country_code=country_code_word) alter_str=alter_str+"\n"+"alter table {db}.{table_name} drop partition({v_par});\n alter table {db}.{table_name} add partition({v_par});".format(v_par=v_par_str,table_name=self.v_table_name,db=self.v_db_name) return alter_str def dingding_monitor(self): """ 钉钉报警群 """ url=""" {alter_url}{dag_id} """.format(alter_url=self.alert_url,dag_id=self.v_dag_id) self.dingding_alert.markdown_send("【及时性预警】", "Test <font color=#000000 size=3 face=\"微软雅黑\">【监控】</font><font color=#FF0000 size=3 face=\"微软雅黑\">及时性预警 </font>\n\n"+ "**超时任务:** \n\n [{dag_id}]({url}) \n\n".format( dag_id=self.v_dag_id, url=url)+ "**负 责 人 :** {owner_name} \n\n".format( owner_name=self.v_owner_name)+ "**等待路径:** {hdfs_dir_name} \n\n".format( hdfs_dir_name=self.hdfs_data_dir_str) ) logging.info("异常任务已发送报警群... ...")
# pushgateway外网地址 # push_gateway_address = "152.32.140.147:9091" # pushgateway内网地址 push_gateway_address = "10.52.61.177:9091" # pushgateway删除job restful http 接口 push_gateway_delete_restful_address = "/metrics/job" # 格式化时间格式 time_format = "%Y-%m-%dT%H:%M:%S" # 订订报警地址 dingding_address = "https://oapi.dingtalk.com/robot/send?access_token=3845cd5ba5cc5f9505133b5d2847d525d78026b0735e4e3c4a8707b51f718f74" dingding_alert = DingdingAlert(dingding_address) # ResourceManager http api接口地址 RM_HTTP_ADDRESS = "http://node5.datalake.opay.com:8088/ws/v1/cluster/apps" RM_HTTP_PARAMS = {'state': 'RUNNING', 'applicationTypes': 'Apache Flink'} # 任务配置情况 # 任务名称 并行度 container数量 slot数量 checkpoint statebackend路径 mainClass taskManager内存大小 , 是否从checkpoint中恢复 task_map = { 'opay-user-order-etl': ('opay-metrics', 8, 4, 2, 's3a://opay-bi/flink/workflow/checkpoint', 'com.opay.bd.opay.main.OpayUserOrderMergeMain', 3072, False), } args = { 'owner': 'linan',
class TaskHourSuccessCountMonitor(object): def __init__(self, ds, v_info): self.dingding_alert = DingdingAlert( 'https://oapi.dingtalk.com/robot/send?access_token=928e66bef8d88edc89fe0f0ddd52bfa4dd28bd4b1d24ab4626c804df8878bb48' ) self.v_info = v_info self.v_data_dir = "" self.start_timeThour = "" self.end_dateThour = "" self.partition = "" self.less_res = [] self.greater_res = [] self.log_unite_dist = {} self.start_time = "" self.end_time = "" def get_partition_list(self): """ 获取小时级分区所有_SUCCESS文件 """ command = "hadoop dfs -ls {data_dir}/hour=*/_SUCCESS|awk -F\"hour=\" \'{{print $2}}\'|tr \"\\n\" \",\"|sed -e 's/,$/\\n/'".format( data_dir=self.v_data_dir) logging.info(command) out = os.popen(command, 'r') res = out.readlines() res[0] = 0 if res[0] is None else res[0].lower().strip() out.close() #判断 _SUCCESS 文件是否生成 if res[0] == '' or res[0] == 'None' or res[0] == '0': logging.info("_SUCCESS list 获取失败") sys.exit(1) else: return res[0] def number_rebuild(self, s): """ 将基准小时格式进行格式化(1->01) """ n = str(s) if len(n) < 2: s_nm = "0" + n else: s_nm = n return s_nm def nm_less_diff(self, s): """ 小于时间范围的判断 """ sylstr = str(s) + "/_SUCCESS" #每个数字前增加 1(01->101) v_in_number = "1" + self.syl if int(s) <= int(v_in_number): if sylstr not in self.less_res: self.less_res.append(sylstr) def nm_greater_diff(self, s): """ 大于时间范围的判断 """ sylstr = str(s) + "/_SUCCESS" #每个数字前增加 1(01->101) v_in_number = "1" + self.syl if int(s) >= int(v_in_number): if sylstr not in self.greater_res: self.greater_res.append(sylstr) def summary_results(self, depend_data_dir, symbol, start_hour): """ 分支sub函数 """ #对比符号("<" and ">") symbol = symbol.strip() #数据目录分区地址 self.v_data_dir = depend_data_dir.strip() self.syl = self.number_rebuild(start_hour) res_list = [] #获取分区列表 partition_list = self.get_partition_list() for i in partition_list.split(","): #将原有小时分区,前面加1,进行数据对比 source_nm = int("1" + i.split("/")[0]) if symbol == "<": self.nm_less_diff(source_nm) if symbol == ">": self.nm_greater_diff(source_nm) if symbol == "<": res_list = self.less_res #输入日志 self.log_unite_dist[self.end_time] = res_list if symbol == ">": res_list = self.greater_res #输入日志 self.log_unite_dist[self.start_time] = res_list return len(res_list) def HourSuccessCountMonitor(self): """ 主函数 """ for item in self.v_info: #Json 变量信息 start_timeThour = item.get('start_timeThour', None) end_dateThour = item.get('end_dateThour', None) depend_dir = item.get('depend_dir', None) table_name = item.get('table', None) #开始日期和小时 self.start_time = start_timeThour.split("T")[0] start_time_hour = start_timeThour.split("T")[1] #开始依赖小时路径 depend_start_dir = depend_dir + "/" + table_name + "/dt=" + self.start_time #结束日期和小时 self.end_time = end_dateThour.split("T")[0] end_time_hour = end_dateThour.split("T")[1] #结束依赖小时路径 depend_end_dir = depend_dir + "/" + table_name + "/dt=" + self.end_time #开始时间与结束时间不相同时 if self.start_time != self.end_time: #统计依赖小时级分区个数 hour_res_nm = self.summary_results( depend_start_dir, ">", start_time_hour) + self.summary_results( depend_end_dir, "<", end_time_hour) #开始时间与结束时间相同时 if self.start_time == self.end_time: #统计依赖小时级分区个数 hour_res_nm = self.summary_results(depend_start_dir, ">", start_time_hour) logging.info(self.log_unite_dist) #不等于24,属于依赖不成立 if hour_res_nm != 24: logging.info( "小时级分区文件SUCCESS 个数 {hour_res_nm} 不完整,异常退出.....".format( hour_res_nm=hour_res_nm)) self.dingding_alert.send( "DW 依赖数据源 {table_name} 小时级分区文件SUCCESS 个数 {hour_res_nm} 缺失,异常退出....." .format(hour_res_nm=hour_res_nm, table_name=table_name)) sys.exit(1) else: logging.info("小时级分区文件SUCCESS 个数 {hour_res_nm} 完整,依赖成功".format( hour_res_nm=hour_res_nm)) self.log_unite_dist = {}
class CountriesPublicFrame(object): def __init__(self, v_is_open, v_ds, v_db_name, v_table_name, v_data_hdfs_path, v_country_partition="true", v_file_type="true", v_hour=None): #self.comwx = ComwxApi('wwd26d45f97ea74ad2', 'BLE_v25zCmnZaFUgum93j3zVBDK-DjtRkLisI_Wns4g', '1000011') self.dingding_alert = DingdingAlert( 'https://oapi.dingtalk.com/robot/send?access_token=928e66bef8d88edc89fe0f0ddd52bfa4dd28bd4b1d24ab4626c804df8878bb48' ) self.table_name = v_table_name self.hdfs_data_dir_str = "" self.data_hdfs_path = v_data_hdfs_path self.db_name = v_db_name self.ds = v_ds self.country_partition = v_country_partition self.file_type = v_file_type self.hour = v_hour self.is_open = v_is_open self.v_del_flag = 0 self.v_country_code_map = None self.country_code_list = "" self.get_country_code() def get_country_code(self): """ 获取当前表中所有二位国家码 """ if self.is_open.lower() == "false": self.country_code_list = "nal" if self.is_open.lower() == "true": self.v_country_code_map = eval(Variable.get("country_code_dim")) s = list(self.v_country_code_map.keys()) self.country_code_list = ",".join(s) def check_success_exist(self): """ 验证_SUCCESS是否执行成功 """ time.sleep(15) print("debug-> check_success_exist") command = "hadoop fs -ls {hdfs_data_dir}/_SUCCESS>/dev/null 2>/dev/null && echo 1 || echo 0".format( hdfs_data_dir=self.hdfs_data_dir_str) #logging.info(command) out = os.popen(command, 'r') res = out.readlines() res = 0 if res is None else res[0].lower().strip() out.close() #判断 _SUCCESS 文件是否生成 if res == '' or res == 'None' or res[0] == '0': logging.info("_SUCCESS 验证失败") sys.exit(1) else: logging.info("_SUCCESS 验证成功") def delete_exist_partition(self): """ 删除已有分区,保证数据唯一性 """ time.sleep(10) print("debug-> delete_exist_partition") #删除语句 del_command = "hadoop fs -rm -r {hdfs_data_dir}".format( hdfs_data_dir=self.hdfs_data_dir_str) logging.info(del_command) os.popen(del_command, 'r') time.sleep(10) #验证删除分区是否存在 check_command = "hadoop fs -ls {hdfs_data_dir}>/dev/null 2>/dev/null && echo 1 || echo 0".format( hdfs_data_dir=self.hdfs_data_dir_str) out = os.popen(check_command, 'r') res = out.readlines() res = 0 if res is None else res[0].lower().strip() out.close() print(res) #判断 删除分区是否存在 if res == '' or res == 'None' or res[0] == '0': logging.info("目录删除成功") else: #目录存在 logging.info("目录删除失败:" + " " + "{hdfs_data_dir}".format( hdfs_data_dir=self.hdfs_data_dir_str)) def data_not_file_type_touchz(self): """ 非空文件 touchz _SUCCESS """ try: print("debug-> data_not_file_type_touchz") mkdir_str = "$HADOOP_HOME/bin/hadoop fs -mkdir -p {hdfs_data_dir}".format( hdfs_data_dir=self.hdfs_data_dir_str) logging.info(mkdir_str) os.popen(mkdir_str) time.sleep(10) succ_str = "$HADOOP_HOME/bin/hadoop fs -touchz {hdfs_data_dir}/_SUCCESS".format( hdfs_data_dir=self.hdfs_data_dir_str) logging.info(succ_str) os.popen(succ_str) logging.info("DATA EXPORT Successed ......") self.check_success_exist() except Exception as e: #self.dingding_alert.send('DW调度系统任务 {jobname} 数据产出异常'.format(jobname=self.table_name)) logging.info(e) sys.exit(1) def data_file_type_touchz(self): """ 创建 _SUCCESS """ try: print("debug-> data_file_type_touchz") #判断数据文件是否为0 line_str = "$HADOOP_HOME/bin/hadoop fs -du -s {hdfs_data_dir} | tail -1 | awk \'{{print $1}}\'".format( hdfs_data_dir=self.hdfs_data_dir_str) logging.info(line_str) with os.popen(line_str) as p: line_num = p.read() #数据为0,发微信报警通知 if line_num[0] == str(0): self.dingding_alert.send('DW调度系统任务 {jobname} 数据产出异常'.format( jobname=self.table_name)) logging.info("Error : {hdfs_data_dir} is empty".format( hdfs_data_dir=self.hdfs_data_dir_str)) sys.exit(1) else: time.sleep(5) succ_str = "hadoop fs -touchz {hdfs_data_dir}/_SUCCESS".format( hdfs_data_dir=self.hdfs_data_dir_str) logging.info(succ_str) os.popen(succ_str) logging.info("DATA EXPORT Successed ......") self.check_success_exist() except Exception as e: #self.dingding_alert.send('DW调度系统任务 {jobname} 数据产出异常'.format(jobname=self.table_name)) logging.info(e) sys.exit(1) def delete_partition(self): """ 删除分区调用函数 """ self.v_del_flag = 1 if self.country_partition.lower() == "false": self.not_exist_country_code_data_dir(self.delete_exist_partition) #有国家分区 if self.country_partition.lower() == "true": self.exist_country_code_data_dir(self.delete_exist_partition) self.v_del_flag = 0 def touchz_success(self): """ 生成 Success 函数 """ # 没有国家分区并且每个目录必须有数据才能生成 Success if self.country_partition.lower() == "false" and self.file_type.lower( ) == "true": self.not_exist_country_code_data_dir(self.data_file_type_touchz) # 没有国家分区并且数据为空也生成 Success if self.country_partition.lower() == "false" and self.file_type.lower( ) == "false": self.not_exist_country_code_data_dir( self.data_not_file_type_touchz) #有国家分区并且每个目录必须有数据才能生成 Success if self.country_partition.lower() == "true" and self.file_type.lower( ) == "true": self.exist_country_code_data_dir(self.data_file_type_touchz) #有国家分区并且数据为空也生成 Success if self.country_partition.lower() == "true" and self.file_type.lower( ) == "false": self.exist_country_code_data_dir(self.data_not_file_type_touchz) #没有国家码分区 def not_exist_country_code_data_dir(self, object_task): """ country_partition:是否有国家分区 file_type:是否空文件也生成 success """ try: #没有小时级分区 if self.hour is None: #输出不同国家的数据路径(没有小时级分区) self.hdfs_data_dir_str = self.data_hdfs_path + "/dt=" + self.ds else: #输出不同国家的数据路径(有小时级分区) self.hdfs_data_dir_str = self.data_hdfs_path + "/dt=" + self.ds + "/hour=" + self.hour # 没有国家分区并且每个目录必须有数据才能生成 Success if self.country_partition.lower( ) == "false" and self.file_type.lower() == "true": object_task() return # 没有国家分区并且数据为空也生成 Success if self.country_partition.lower( ) == "false" and self.file_type.lower() == "false": object_task() return except Exception as e: #self.dingding_alert.send('DW调度系统任务 {jobname} 数据产出异常'.format(jobname=table_name)) logging.info(e) sys.exit(1) #有国家码分区 def exist_country_code_data_dir_dev(self, object_task): """ country_partition:是否有国家分区 file_type:是否空文件也生成 success """ try: #获取国家列表 for country_code_word in self.country_code_list.split(","): if country_code_word.lower() == 'nal': country_code_word = country_code_word.lower() else: country_code_word = country_code_word.upper() #没有小时级分区 if self.hour is None: #输出不同国家的数据路径(没有小时级分区) self.hdfs_data_dir_str = self.data_hdfs_path + "/country_code=" + country_code_word + "/dt=" + self.ds else: #输出不同国家的数据路径(有小时级分区) self.hdfs_data_dir_str = self.data_hdfs_path + "/country_code=" + country_code_word + "/dt=" + self.ds + "/hour=" + self.hour #没有开通多国家业务(国家码默认nal) if self.country_partition.lower( ) == "true" and self.is_open.lower() == "false": #必须有数据才可以生成Success 文件 if self.file_type.lower() == "true": object_task() #数据为空也生成 Success 文件 if self.file_type.lower() == "false": object_task() #开通多国家业务 if self.country_partition.lower( ) == "true" and self.is_open.lower() == "true": #必须有数据才可以生成Success 文件 if self.file_type.lower() == "true": #删除多国家分区使用 if self.v_del_flag == 1: object_task() continue #在必须有数据条件下:国家是nal时,数据可以为空 if country_code_word == "nal": self.data_not_file_type_touchz() else: object_task() #数据为空也生成 Success 文件 if self.file_type.lower() == "false": object_task() except Exception as e: #self.dingding_alert.send('DW调度系统任务 {jobname} 数据产出异常'.format(jobname=table_name)) logging.info(e) sys.exit(1) def exist_country_code_data_dir(self, object_task): """ country_partition:是否有国家分区 file_type:是否空文件也生成 success """ try: #获取国家列表 for country_code_word in self.country_code_list.split(","): if country_code_word.lower() == 'nal': country_code_word = country_code_word.lower() else: country_code_word = country_code_word.upper() #没有小时级分区 if self.hour is None: #输出不同国家的数据路径(没有小时级分区) self.hdfs_data_dir_str = self.data_hdfs_path + "/country_code=" + country_code_word + "/dt=" + self.ds else: #输出不同国家的数据路径(有小时级分区) self.hdfs_data_dir_str = self.data_hdfs_path + "/country_code=" + country_code_word + "/dt=" + self.ds + "/hour=" + self.hour #没有开通多国家业务(国家码默认nal) if self.country_partition.lower( ) == "true" and self.is_open.lower() == "false": #必须有数据才可以生成Success 文件 if self.file_type.lower() == "true": object_task() #数据为空也生成 Success 文件 if self.file_type.lower() == "false": object_task() #开通多国家业务 if self.country_partition.lower( ) == "true" and self.is_open.lower() == "true": #刚刚开国的国家(按照false处理) if self.v_country_code_map[country_code_word].lower( ) == "new": #删除多国家分区使用 if self.v_del_flag == 1: object_task() continue else: self.data_not_file_type_touchz() continue #必须有数据才可以生成Success 文件 if self.file_type.lower() == "true": #删除多国家分区使用 if self.v_del_flag == 1: object_task() continue #在必须有数据条件下:国家是nal时,数据可以为空 if country_code_word == "nal": self.data_not_file_type_touchz() else: object_task() #数据为空也生成 Success 文件 if self.file_type.lower() == "false": object_task() except Exception as e: #self.dingding_alert.send('DW调度系统任务 {jobname} 数据产出异常'.format(jobname=table_name)) logging.info(e) sys.exit(1) # alter 语句 def alter_partition(self): alter_str = "" # 没有国家分区 && 小时参数为None if self.country_partition.lower() == "false" and self.hour is None: v_par_str = "dt='{ds}'".format(ds=self.ds) alter_str = "alter table {db}.{table_name} drop partition({v_par});\n alter table {db}.{table_name} add partition({v_par});".format( v_par=v_par_str, table_name=self.table_name, db=self.db_name) return alter_str # 有国家分区 && 小时参数不为None if self.country_partition.lower() == "false" and self.hour is not None: v_par_str = "dt='{ds}',hour='{hour}'".format(ds=self.ds, hour=self.hour) alter_str = "alter table {db}.{table_name} drop partition({v_par});\n alter table {db}.{table_name} add partition({v_par});".format( v_par=v_par_str, table_name=self.table_name, db=self.db_name) return alter_str for country_code_word in self.country_code_list.split(","): # 有国家分区 && 小时参数为None if self.country_partition.lower() == "true" and self.hour is None: v_par_str = "country_code='{country_code}',dt='{ds}'".format( ds=self.ds, country_code=country_code_word) alter_str = alter_str + "\n" + "alter table {db}.{table_name} drop partition({v_par});\n alter table {db}.{table_name} add partition({v_par});".format( v_par=v_par_str, table_name=self.table_name, db=self.db_name) # 有国家分区 && 小时参数不为None if self.country_partition.lower( ) == "true" and self.hour is not None: v_par_str = "country_code='{country_code}',dt='{ds}',hour='{hour}'".format( ds=self.ds, hour=self.hour, country_code=country_code_word) alter_str = alter_str + "\n" + "alter table {db}.{table_name} drop partition({v_par});\n alter table {db}.{table_name} add partition({v_par});".format( v_par=v_par_str, table_name=self.table_name, db=self.db_name) return alter_str
from airflow.operators.bash_operator import BashOperator from airflow.operators.impala_plugin import ImpalaOperator from airflow.sensors import UFileSensor from airflow.sensors import WebHdfsSensor from airflow.sensors.hive_partition_sensor import HivePartitionSensor from utils.connection_helper import get_hive_cursor from plugins.comwx import ComwxApi from datetime import datetime, timedelta from plugins.DingdingAlert import DingdingAlert import re import logging from airflow.sensors.s3_key_sensor import S3KeySensor #comwx = ComwxApi('wwd26d45f97ea74ad2', 'BLE_v25zCmnZaFUgum93j3zVBDK-DjtRkLisI_Wns4g', '1000011') dingding_alert = DingdingAlert( 'https://oapi.dingtalk.com/robot/send?access_token=928e66bef8d88edc89fe0f0ddd52bfa4dd28bd4b1d24ab4626c804df8878bb48' ) args = { 'owner': 'yangmingze', 'start_date': datetime(2019, 12, 19), 'depends_on_past': False, 'retries': 3, 'retry_delay': timedelta(minutes=5), 'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG('impala_syc_app_oride_order_tags_d', schedule_interval="30 03 * * *",
class TaskTimeoutMonitor(object): hive_cursor = None dingding_alert = None def __init__(self): self.hive_cursor = get_hive_cursor() self.dingding_alert = DingdingAlert( 'https://oapi.dingtalk.com/robot/send?access_token=928e66bef8d88edc89fe0f0ddd52bfa4dd28bd4b1d24ab4626c804df8878bb48' ) #self.dingding_alert = DingdingAlert_dev('https://oapi.dingtalk.com/robot/send?access_token=c08440c8e569bb38ec358833f9d577b7638af5aaefbd55e3fd748b798fecc4d4') self.alert_url = "http://8.208.14.165:8080/admin/airflow/tree?dag_id=" self.owner_name = None self.hdfs_dir_name = None def __del__(self): self.hive_cursor.close() self.hive_cursor = None """ 检查文件,协程多个调用并发执行 """ #@asyncio.coroutine def task_trigger(self, command, dag_id_name, timeout): # timeout --时间偏移量 # 时间偏移量= 任务正常执行结束时间(秒)+允许任务延迟的最大时间(秒) # 正常执行结束时间300秒+ 允许任务延迟的最大120秒=时间偏移量420 秒 try: sum_timeout = 0 timeout_step = 120 #任务监控间隔时间(秒) command = command.strip() #次数 num = 0 while sum_timeout <= int(timeout): logging.info("sum_timeout:" + str(sum_timeout)) logging.info("timeout:" + str(timeout)) logging.info(command) time.sleep(timeout_step) #yield from asyncio.sleep(int(timeout_step)) sum_timeout += timeout_step out = os.popen(command, 'r') res = out.readlines() #res 获取返回值_SUCCESS是否存在(1 存在) res = 0 if res is None else res[0].lower().strip() out.close() logging.info("数据标识的返回值:" + str(res)) #判断数据文件是否生成 if res == '' or res == 'None' or res == '0': url = """ {alter_url}{dag_id} """.format(alter_url=self.alert_url, dag_id=dag_id_name) if sum_timeout >= int(timeout): #换算分钟 format_date = int(int(timeout) / 60) num = num + 1 self.dingding_alert.markdown_send( "【及时性预警】", "DW <font color=#000000 size=3 face=\"微软雅黑\">【监控】</font><font color=#FF0000 size=3 face=\"微软雅黑\">及时性预警 </font>\n\n" + "**超时任务:** \n\n [{dag_id}]({url}) \n\n" .format(dag_id=dag_id_name, url=url) + "**负 责 人 :** {owner_name} \n\n".format( owner_name=self.owner_name) + "**等待路径:** {hdfs_dir_name} \n\n". format(hdfs_dir_name=self.hdfs_dir_name) + "**预留时间:** {timeout} 分钟 \n\n".format( timeout=str(format_date)) + "**预警次数:** {num}".format(num=num)) logging.info("任务超时... ...") sum_timeout = 0 else: num = 0 break except Exception as e: logging.info(e) sys.exit(1) """ 设置任务监控 @:param list [{"db":"", "table":"table", "partition":"partition", "timeout":"timeout"},] """ def set_task_monitor(self, tables): commands = [] for item in tables: # db = item.get('db', None) partition = item.get('partition', None) timeout = item.get('timeout', None) dag = item.get('dag', None) if dag: table = dag.dag_id self.owner_name = dag.default_args.get("owner") else: self.owner_name = "Null" table = item.get('table', None) if table is None or db is None or partition is None or timeout is None: return None location = None hql = ''' DESCRIBE FORMATTED {db}.{table} '''.format(table=table, db=db) logging.info(hql) self.hive_cursor.execute(hql) res = self.hive_cursor.fetchall() for (col_name, col_type, col_comment) in res: col_name = col_name.lower().strip() if col_name == 'location:': location = col_type break if location is None: return None self.hdfs_dir_name = location + "/" + partition + "/_SUCCESS" commands.append({ 'cmd': ''' hadoop fs -ls {path}/{partition}/_SUCCESS >/dev/null 2>/dev/null && echo 1 || echo 0 '''.format(timeout=timeout, path=location, partition=partition), 'partition': partition, 'timeout': timeout, 'table': table }) for items in commands: self.task_trigger(items['cmd'], items['table'], items['timeout'])