from datetime import datetime, timedelta import airflow import logging from utils.connection_helper import get_hive_cursor from airflow.utils.email import send_email from plugins.comwx import ComwxApi comwx = ComwxApi('wwd26d45f97ea74ad2', 'BLE_v25zCmnZaFUgum93j3zVBDK-DjtRkLisI_Wns4g', '1000011') now = datetime.today() # 预警阀值 ods_data_alert_limit = 0.1 # 停止阀值 ods_data_stop_limit = 0.45 """ 校验全局数据量函数 """ def validata_data(db, table_name, table_format, table_core_list, table_not_core_list, ds, **kwargs): cursor = get_hive_cursor() day = ds day_before_1 = airflow.macros.ds_add(ds, -1) day_before_7 = airflow.macros.ds_add(ds, -7) flag = True
def __init__(self): self.hive_cursor = get_hive_cursor() self.comwx = ComwxApi('wwd26d45f97ea74ad2', 'BLE_v25zCmnZaFUgum93j3zVBDK-DjtRkLisI_Wns4g', '1000011')
class ModelPublicFrame(object): hive_cursor = None comwx = None def __init__(self): self.hive_cursor = get_hive_cursor() self.comwx = ComwxApi('wwd26d45f97ea74ad2', 'BLE_v25zCmnZaFUgum93j3zVBDK-DjtRkLisI_Wns4g', '1000011') # self.ds_date=execution_date.strftime("%Y-%m-%d") #日期(%Y-%m-%d) # self.ds_date_hour=execution_date.strftime("%Y-%m-%d %H") #日期(%Y-%m-%d %H) # self.ds_date_minute=execution_date.strftime("%Y-%m-%d %H:%M") #日期(%Y-%m-%d %H:%M) # self.ds_date_second=execution_date.strftime("%Y-%m-%d %H:%M:%S") #日期(%Y-%m-%d %H:%M:%S) # self.dag=dag def __del__(self): self.hive_cursor.close() self.hive_cursor = None #读取hive location地址 def get_hive_location(self, db, table): location = None try: hql = ''' DESCRIBE FORMATTED {db}.{table} '''.format(table=table, db=db) #logging.info(hql) self.hive_cursor.execute(hql) res = self.hive_cursor.fetchall() for (col_name, col_type, col_comment) in res: col_name = col_name.lower().strip() if col_name == 'location:': location = col_type #break if location is None: return None else: return location except Exception as e: self.comwx.postAppMessage( 'Error: ' + db.table + '数据开发模板--读取hive location地址 异常', '271') logging.info(e) sys.exit(1) """ 检查文件,协程多个调用并发执行 """ #@asyncio.coroutine def task_trigger(self, command, dag_id_name, timeout): # timeout --时间偏移量 # 时间偏移量= 任务正常执行结束时间(秒)+允许任务延迟的最大时间(秒) # 正常执行结束时间300秒+ 允许任务延迟的最大120秒=时间偏移量420 秒 try: sum_timeout = 0 timeout_step = 120 #任务监控间隔时间(秒) command = command.strip() while sum_timeout <= int(timeout): logging.info("timeout:" + str(timeout)) logging.info(command) #yield from asyncio.sleep(int(timeout_step)) time.sleep(timeout_step) sum_timeout += timeout_step out = os.popen(command, 'r') res = out.readlines() #res 获取返回值_SUCCESS是否存在(1 存在) res = 0 if res is None else res[0].lower().strip() out.close() logging.info("数据标识的返回值:" + str(res)) #判断数据文件是否生成 if res == '' or res == 'None' or res == '0': if sum_timeout >= int(timeout): self.comwx.postAppMessage( 'DW调度任务 {dag_id} 产出超时'.format(dag_id=dag_id_name, timeout=timeout), '271') logging.info("任务超时 ... ... ") sum_timeout = 0 else: break except Exception as e: self.comwx.postAppMessage( 'DW调度任务 {dag_id} code 异常'.format(dag_id=dag_id_name), '271') logging.info(e) sys.exit(1) """ 设置任务超时监控 @:param list [{"db":"", "table":"table", "partitions":"country_code=nal", "timeout":"timeout"},] """ def task_timeout_monitor(self, tables, ds): commands = [] for item in tables: table = item.get('table', None) db = item.get('db', None) partition = item.get('partitions', None) #分区地址 timeout = item.get('timeout', None) if table is None or db is None or partition is None or timeout is None: return None #读取hive 表的location location = self.get_hive_location(db, table) commands.append({ 'cmd': ''' hadoop fs -ls {path}/{partition}/dt={pt}/_SUCCESS >/dev/null 2>/dev/null && echo 1 || echo 0 '''.format(pt=ds, path=location, partition=partition), 'partition': partition, 'timeout': timeout, 'table': table }) for items in commands: command = items['cmd'].strip() out = os.popen(command, 'r') res = out.readlines() #res 获取返回值_SUCCESS是否存在(1 存在) res = 0 if res is None else res[0].lower().strip() out.close() if res == '1': logging.info("任务成功 ... ... ") break else: self.task_trigger(items['cmd'], items['table'], items['timeout']) """ 任务完成标识_SUCCESS @:param list [{"db":"", "table":"table", "partitions":"country_code=nal"] """ def task_touchz_success(self, tables, ds): #表的location location = None try: for item in tables: table = item.get('table', None) db = item.get('db', None) partition = item.get('partitions', None) #分区地址 #读取hive location地址 location = self.get_hive_location(db, table) hdfs_data_dir_str = location + '/' + partition + '/dt=' + ds #判断数据文件是否为0 line_str = "$HADOOP_HOME/bin/hadoop fs -du -s {hdfs_data_dir} | tail -1 | awk \'{{print $1}}\'".format( hdfs_data_dir=hdfs_data_dir_str) logging.info(line_str) with os.popen(line_str) as p: line_num = p.read() #数据为0,发微信报警通知 if line_num[0] == str(0): self.comwx.postAppMessage( 'DW调度系统任务 {jobname} 数据为 0,未生成 _SUCCESS 标识,对应时间:{pt}'. format(jobname=table, pt=ds), '271') logging.info("Error : {hdfs_data_dir} is empty".format( hdfs_data_dir=hdfs_data_dir_str)) sys.exit(1) else: succ_str = "$HADOOP_HOME/bin/hadoop fs -touchz {hdfs_data_dir}/_SUCCESS".format( hdfs_data_dir=hdfs_data_dir_str) logging.info(succ_str) os.popen(succ_str) time.sleep(10) logging.info("DATA EXPORT Successed ......") except Exception as e: self.comwx.postAppMessage( 'DW调度系统任务 {jobname} _SUCCESS 产出异常,对应时间:{pt}'.format( jobname=table, pt=ds), '271') logging.info(e) sys.exit(1) """ 任务完成标识_SUCCESS @:param list [{"db":"db_name", "table":"table_name", "partitions":"country_code=nal"] """ def tesk_dependence(self, tables, dag): dependence = [] try: for item in tables: #读取 db、table、partition table = item.get('table', None) db = item.get('db', None) partition = item.get('partitions', None) if table is None or db is None or partition is None: return None #读取hive location地址 location = self.get_hive_location(db, table) #替换原有bucket location = location.replace('ufile://opay-datalake/', '') #task_id 名称 task_id_flag = table + "_task" #区分ods的依赖路径 if db[-3:].lower() == 'ods' or db[-2:].lower() == 'bi': # 配置依赖关系(前一天分区) dependence_task_flag = HivePartitionSensor( task_id='dependence_{task_id_name}'.format( task_id_name=task_id_flag), table=table, partition="dt='{{ds}}'", schema=db, poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) else: # 配置依赖关系(前一天分区) dependence_task_flag = UFileSensor( task_id='dependence_{task_id_name}'.format( task_id_name=task_id_flag), filepath= '{hdfs_path_name}/{partition_name}/dt={{{{ds}}}}/_SUCCESS' .format(hdfs_path_name=location, partition_name=partition), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) dependence.append(dependence_task_flag) return dependence except Exception as e: #self.comwx.postAppMessage('DW调度系统任务 {jobname} 任务依赖列表产出异常'.format(jobname=dag.dag_id),'271') logging.info(e) sys.exit(1)
def init_mysql_table(**op_kwargs): hive_cursor = get_hive_cursor('hiveserver2_default') hive_db = op_kwargs.get('db') hive_table = op_kwargs.get('table') mysql_cursor = op_kwargs.get('mysql_conn') dt = op_kwargs.get('ds') overwrite = op_kwargs.get('overwrite') hive_columns = get_hive_table_columns(hive_cursor, hive_db, hive_table) cols = [] mcols = [] for v in hive_columns: if "int" in v['type']: cols.append("if(`{}` is NULL, 0, `{}`)".format( v['name'].lower(), v['name'].lower())) elif v['type'] == 'float' or v['type'] == 'double' or v[ 'type'] == 'decimal': cols.append("if(`{}` is NULL, '0.00', `{}`)".format( v['name'].lower(), v['name'].lower())) elif v['type'] == 'array' or v['type'] == 'map' or v[ 'type'] == 'struct': cols.append("''") else: cols.append("if(`{}` is NULL, '', `{}`)".format( v['name'].lower(), v['name'].lower())) mcols.append(v['name'].lower()) new_table = create_bi_mysql_table(mysql_cursor, hive_db, hive_table, hive_columns) if new_table: # 新表 全量 hql = ''' SELECT {cols} FROM {db}.{table} '''.format(db=hive_db, table=hive_table, cols=",".join(cols)) else: # 增量 hql = ''' SELECT {cols} FROM {db}.{table} WHERE dt = '{dt}' '''.format(db=hive_db, table=hive_table, cols=",".join(cols), dt=dt) logging.info(hql) wxapi = ComwxApi('wwd26d45f97ea74ad2', 'BLE_v25zCmnZaFUgum93j3zVBDK-DjtRkLisI_Wns4g', '1000011') try: mconn = get_db_conn(mysql_cursor) mcursor = mconn.cursor() # mysql_connectors[mysql_cursor] if overwrite: mcursor.execute("TRUNCATE TABLE {db}.{table}".format( db=hive_db, table=hive_table)) else: mcursor.execute( "DELETE FROM {db}.{table} WHERE dt = '{dt}'".format( db=hive_db, table=hive_table, dt=dt)) isql = 'replace into {db}.{table} (`{cols}`) values '.format( db=hive_db, table=hive_table, cols='`,`'.join(mcols)) hive_cursor.execute(hql) rows = [] cnt = 0 while True: try: record = hive_cursor.next() except: record = None # logging.info(record) if not record: break rows.append("('{}')".format("','".join([ str(MySQLdb.escape_string(str(x)), encoding="utf-8") for x in record ]))) # logging.info(rows) cnt += 1 if cnt >= 1000: logging.info(cnt) mcursor.execute("{h} {v}".format(h=isql, v=",".join(rows))) cnt = 0 rows = [] # logging.info(rows) if cnt > 0: logging.info("last: {}".format(cnt)) mcursor.execute("{h} {v}".format(h=isql, v=",".join(rows))) mcursor.close() hive_cursor.close() except BaseException as e: logging.info(e) mcursor.close() hive_cursor.close() wxapi.postAppMessage( '重要重要重要:{}.{}数据写入mysql异常【{}】'.format(hive_db, hive_table, dt), '271')
def data_monitor(**op_kwargs): time.sleep(300) prev_timepoint = math.floor(int(time.time()) / 600) * 600 - 600 prev_timestr = time.strftime('%Y-%m-%d %H:%M:00', time.localtime(prev_timepoint)) bidbconn = get_db_conn('mysql_bi') oride_db_conn = get_db_conn('sqoop_db') #查询城市列表 city_sql = ''' select count(distinct id) from data_city_conf where id < 999000 ''' oridedb = oride_db_conn.cursor() oridedb.execute(city_sql) results = oridedb.fetchone() (city_cnt, ) = results total_count = (int(city_cnt) + 1) * 5 comwx = ComwxApi('wwd26d45f97ea74ad2', 'BLE_v25zCmnZaFUgum93j3zVBDK-DjtRkLisI_Wns4g', '1000011') #查询当前点数据指标总数 metrics_sql = ''' select city_id, city_name, serv_type, order_time, (orders+orders_user+orders_pick+drivers_serv+drivers_orderable+orders_finish+ avg_pick+avg_take+not_sys_cancel_orders+picked_orders+orders_accept+agg_orders_finish) as total from oride_orders_status_10min where order_time = '{}' '''.format(prev_timestr) bidb = bidbconn.cursor() logging.info(metrics_sql) bidb.execute(metrics_sql) results = bidb.fetchall() metrics_cnt = 0 for (city_id, city_name, serv_type, order_time, total) in results: if city_id >= 999000: continue metrics_cnt += 1 if city_id == 0 and serv_type == -1 and total <= 0: comwx.postAppMessage( '{0}[{1}]10分钟数据{2}数据记录指标全部为0异常,请及时排查,谢谢'.format( city_name, serv_type, order_time), '271') return if metrics_cnt < total_count: comwx.postAppMessage( '10分钟数据{0}数据记录缺失异常({1}<{2}),请及时排查,谢谢'.format( prev_timestr, metrics_cnt, total_count), '271') return #检查上2个时间点数据 与 一周前相同时间点对比差异 weekly_diff = ''' select t1.city_id, t1.city_name, t1.serv_type, t1.order_time, t1.orders as t1orders, if(isnull(t2.orders) or t2.orders<=0, 0, t2.orders) as t2orders, t1.orders_user as t1ousers, if(isnull(t2.orders_user) or t2.orders_user<=0, 0, t2.orders_user) as t2ousers, t1.orders_pick as t1opicks, if(isnull(t2.orders_pick) or t2.orders_pick<=0, 0, t2.orders_pick) as t2opicks, t1.drivers_serv as t1dservs, if(isnull(t2.drivers_serv) or t2.drivers_serv<=0, 0, t2.drivers_serv) as t2dservs, t1.drivers_orderable as t1doables, if(isnull(t2.drivers_orderable) or t2.drivers_orderable<=0, 0, t2.drivers_orderable) as t2doables, t1.orders_finish as t1ofs, if(isnull(t2.orders_finish) or t2.orders_finish<=0, 0, t2.orders_finish) as t2ofs, t1.avg_pick as t1apicks, if(isnull(t2.avg_pick) or t2.avg_pick<=0, 0, t2.avg_pick) as t2apicks, t1.avg_take as t1atakes, if(isnull(t2.avg_take) or t2.avg_take<=0, 0, t2.avg_take) as t2atakes, t1.not_sys_cancel_orders as t1norders, if(isnull(t2.not_sys_cancel_orders) or t2.not_sys_cancel_orders<=0, 0, t2.not_sys_cancel_orders) as t2norders, t1.picked_orders as t1pos, if(isnull(t2.picked_orders) or t2.picked_orders<=0, 0, t2.picked_orders) as t2pos, t1.agg_orders_finish as t1aofs, if(isnull(t2.agg_orders_finish) or t2.agg_orders_finish<=0, 0, t2.agg_orders_finish) as t2aofs from (select * from oride_orders_status_10min where order_time>=from_unixtime({dsb2})) t1 left join (select * from oride_orders_status_10min where order_time>=from_unixtime({dsb7}) and order_time<=from_unixtime({dsb7a3})) t2 on t1.city_id = t2.city_id and t1.serv_type = t2.serv_type and t1.order_time = date_format(from_unixtime(unix_timestamp(t2.order_time)+86400*7), '%Y-%m-%d %H:%i:00') '''.format(dsb2=prev_timepoint - 1200, dsb7=prev_timepoint - 1200 - 86400 * 7, dsb7a3=prev_timepoint - 86400 * 7) logging.info(weekly_diff) bidb.execute(weekly_diff) results = bidb.fetchall() for (city_id, city_name, serv_type, order_time, t1orders, t2orders, t1ousers, t2ousers, t1opicks, t2opicks, t1dservs, t2dservs, t1doables, t2doables, t1ofs, t2ofs, t1apicks, t2apicks, t1atakes, t2atakes, t1norders, t2norders, t1pos, t2pos, t1aofs, t2aofs) in results: if serv_type == -1 and ((t2orders >= 100 and t2orders > t1orders and (t2orders - t1orders)/t2orders > 0.8) or \ (t2orders > 0 and t2orders < 100 and (t2orders - t1orders) > 40)): comwx.postAppMessage( '{0}[{1}]10分钟数据{2}下单数记录与上周同期对比异常,请及时排查,谢谢'.format( city_name, serv_type, order_time), '271') return if serv_type == -1 and ((t2dservs >= 200 and t2dservs > t1dservs and (t2dservs - t1dservs)/t2dservs > 0.8) or \ (t2dservs > 0 and t2dservs < 100 and (t2dservs - t1dservs) > 80)): comwx.postAppMessage( '{0}[{1}]10分钟数据{2}在线司机数记录与上周同期对比异常,请及时排查,谢谢'.format( city_name, serv_type, order_time), '271') return if serv_type == -1 and ((t2doables >= 200 and t2doables > t1doables and (t2doables - t1doables)/t2doables > 0.8) or \ (t2doables > 0 and t2doables < 100 and (t2doables - t1doables) > 80)): comwx.postAppMessage( '{0}[{1}]10分钟数据{2}可接单司机数记录与上周同期对比异常,请及时排查,谢谢'.format( city_name, serv_type, order_time), '271') return
def sync_table_data_to_bi(**op_kwargs): odb = op_kwargs.get('odb', '') otable = op_kwargs.get('otable', '') oconn = op_kwargs.get('oconn', '') ddb = op_kwargs.get('ddb', '') ds = op_kwargs.get('ds') if odb == "" or otable == "" or oconn == "" or ddb == "": return dconn = get_db_conn('mysql_bi_utf8mb4') dcursor = dconn.cursor() isql = '''replace into {db}.{table} (id, username, password, name, phone, avatar, opay_email, leader_id, opay_user, job_id, department_id, remember_token, create_user, created_at, updated_at, is_old_admin) values '''.format(db=ddb, table=otable) oconn = get_db_conn(oconn) ocursor = oconn.cursor() logging.info(ds) if ds == '2019-11-01': where = "" else: where = '''WHERE (created_at>='{ds} 00:00:00' AND created_at<='{ds} 23:59:59') OR (updated_at>='{ds} 00:00:00' AND updated_at<='{ds} 23:59:59') '''.format(ds=ds) try: osql = ''' SELECT id, username, password, name, phone, avatar, opay_email, leader_id, opay_user, job_id, department_id, remember_token, create_user, created_at, updated_at, is_old_admin FROM {db}.{table} {where} '''.format(db=odb, table=otable, where=where) logging.info(osql) ocursor.execute(osql) row = [] res = ocursor.fetchall() for record in res: # while True: # try: # record = ocursor.next() # except BaseException as e: # logging.info(e) # record = None # logging.info(record) if not record: break row.append("('{}')".format("','".join( [str(x).replace("'", "\\'") for x in record]))) if len(row) >= 2000: logging.info(len(row)) dcursor.execute("{h} {v}".format(h=isql, v=",".join(row))) row = [] if len(row) > 0: logging.info("last: {}".format(len(row))) dcursor.execute("{h} {v}".format(h=isql, v=",".join(row))) except BaseException as e: logging.info(e) wxapi = ComwxApi('wwd26d45f97ea74ad2', 'BLE_v25zCmnZaFUgum93j3zVBDK-DjtRkLisI_Wns4g', '1000011') wxapi.postAppMessage( '重要重要重要:{}.{}数据写入mysql异常【{}】'.format(odb, otable, ds), '271')