コード例 #1
0
def getHiveTb():
    engine = cons.meta('hive')
    sql_txt = """
            select  
                t.TBL_ID tb_id,
                d.name db, 
                t.TBL_NAME tb,
                v.COLUMN_NAME col, 
                v.TYPE_NAME ctype,
                v.`COMMENT` col_com
            from columns_v2 v 
            inner join sds s on v.CD_ID=s.CD_ID 
            inner join tbls t on s.sd_id=t.sd_id inner join dbs d on d.db_id=t.db_id 
            where d.`NAME` in('cdi','app') order by t.TBL_ID,v.INTEGER_IDX;
            """
    cols = pd.read_sql(sql_txt, engine)
    sql_txt = """
           select s.tbl_id tb_id,
                   max(if(PARAM_KEY='comment',PARAM_VALUE,null)) tb_com,
                   max(if(PARAM_KEY='numRows',PARAM_VALUE,'')) row_num,
                   max(if(PARAM_KEY='rawDataSize',PARAM_VALUE,'')) raw_data_size,
                   max(if(PARAM_KEY='totalSize',PARAM_VALUE,'')) total_size,
                   FROM_UNIXTIME(max(if(PARAM_KEY='transient_lastDdlTime',PARAM_VALUE,''))) last_ddl_time,
                   FROM_UNIXTIME(max(if(PARAM_KEY='last_modified_time',PARAM_VALUE,''))) last_modified_time,
                   max(if(PARAM_KEY='last_modified_by',PARAM_VALUE,null)) last_modified_by
            from TABLE_PARAMS s GROUP BY s.TBL_ID
            """
    tbs = pd.read_sql(sql_txt, engine)
    tp = cols[['tb_id', 'tb', 'db']].drop_duplicates()
    tbs = tbs.merge(tp, how='inner', left_on='tb_id', right_on='tb_id')
    return cols, tbs
コード例 #2
0
def re_run():
    engine = cons.meta('hive')
    sql_txt = """
           SELECT * FROM `etl错误任务视图`
           where start_time<CONCAT(CURRENT_DATE(),' 08:00:00');
            """
    err_df = pd.read_sql(sql_txt, engine)
    if err_df.shape[0] > 0:
        pass
    else:
        print('没有错误作业')
    return err_df
コード例 #3
0
def getHive():
    engine = cons.meta('hive')
    sql_txt = """
            SELECT 
                d.`NAME` db_name,
                TBL_NAME tb_name,
                FROM_UNIXTIME(CREATE_TIME) create_time,
                ifnull(p.row_num,0)+ifnull(pt.row_num,0) row_num,
                ifnull(p.total_size,0)+ifnull(pt.total_size,0) total_size,
                p.comments,
                case when pt.last_ddl_time>p.last_ddl_time then pt.last_ddl_time else p.last_ddl_time end last_ddl_time,
                -- case when pt.last_modified_time>p.last_modified_time then pt.last_modified_time else p.last_modified_time end last_modified_time,
                pt.part_name
             FROM tbls t
            INNER JOIN dbs d on t.DB_ID=d.DB_ID and d.`NAME` in('sdd','cdi','app')
            LEFT JOIN(select tbl_id,
                            max(if(PARAM_KEY='comment',PARAM_VALUE,null)) comments,
                            max(if(PARAM_KEY='numRows',PARAM_VALUE,'')) row_num,
                            max(if(PARAM_KEY='rawDataSize',PARAM_VALUE,'')) raw_data_size,
                            max(if(PARAM_KEY='totalSize',PARAM_VALUE,'')) total_size,
                            FROM_UNIXTIME(max(if(PARAM_KEY='transient_lastDdlTime',PARAM_VALUE,''))) last_ddl_time,
                            FROM_UNIXTIME(max(if(PARAM_KEY='last_modified_time',PARAM_VALUE,''))) last_modified_time,
                            max(if(PARAM_KEY='last_modified_by',PARAM_VALUE,null)) last_modified_by
                    from TABLE_PARAMS GROUP BY tbl_id) p on t.TBL_ID=p.tbl_id
            left JOIN(
                    SELECT 
                    p.TBL_ID,
                    sum(k.raw_data_size) raw_data_size,
                    sum(k.row_num) row_num,
                    sum(k.total_size) total_size,
                    max(p.PART_NAME) part_name,
                    max(k.last_ddl_time) last_ddl_time,
                    max(k.last_modified_time) last_modified_time
            from partitions p
            LEFT JOIN(
                    select PART_ID,
                    max(if(PARAM_KEY='numRows',PARAM_VALUE,'')) row_num,
                    max(if(PARAM_KEY='rawDataSize',PARAM_VALUE,'')) raw_data_size,
                    max(if(PARAM_KEY='totalSize',PARAM_VALUE,'')) total_size,
                    FROM_UNIXTIME(max(if(PARAM_KEY='transient_lastDdlTime',PARAM_VALUE,''))) last_ddl_time,
                    FROM_UNIXTIME(max(if(PARAM_KEY='last_modified_time',PARAM_VALUE,''))) last_modified_time
                     from partition_params GROUP BY PART_ID) k on p.PART_ID=k.PART_ID
            GROUP BY p.TBL_ID) pt on t.TBL_ID=pt.tbl_id
            """
    oz_df = pd.read_sql(sql_txt, engine)
    return oz_df
コード例 #4
0
def getOozie():
    oozie = cons.meta('oozie')
    sql_txt = """
            SELECT
            	ifnull(w.app_name, c.app_name) job_name,
            	c.last_modified_time job_last_time,
            	c.next_matd_time job_next_time,
            	w.end_time - w.start_time job_used_times,c.frequency
            FROM
            	coord_jobs c
            LEFT JOIN coord_actions j ON c.id = j.job_id
            AND c.last_action_number = j.action_number
            LEFT JOIN wf_jobs w ON w.id = j.external_id
            WHERE
            	c.user_name = 'hue'
            AND c.`status` = 'RUNNING'
            """
    oz_df = pd.read_sql(sql_txt, oozie)
    return oz_df
コード例 #5
0
                if keys in oz_df.loc[j, 'job_name']:
                    sh_oz_map.loc[i, 'job_name'] = oz_df.loc[j, 'job_name']

    sh_oz_map = sh_oz_map.merge(oz_df, how='left', on='job_name')
    del sh_oz_map['sh_key'], i, j, keys, sh_list, sh_list_key  #删除无效字段
    rs = rs.merge(sh_oz_map, how='left', on='sh_files')
    hive_df = getHive()
    sch_rs = rs[pd.notnull(rs['job_name'])]  #在执行计划中的
    last_rs = hive_df.merge(sch_rs,
                            how='left',
                            left_on='tb_name',
                            right_on='cfg_target_tb')
    last_rs['sh_files'] = last_rs['sh_files'].fillna('无配置shell')
    last_rs['job_name'] = last_rs['job_name'].fillna('暂无定时配置')
    last_rs['comments'] = last_rs['comments'].fillna(last_rs['sql_tb_cn'])
    etl_data = cons.meta('etl_data')
    table_to_jobs = last_rs.copy()
    last_rs['oper_date'] = today
    #result to mysql
    etl_data.execute(
        "delete from etl_job_set where oper_date='{0}'".format(today))
    insert_rs = last_rs[insert_cols].copy()
    insert_rs = insert_rs.astype('str')
    insert_rs.to_sql(name='etl_job_set',
                     con=etl_data,
                     if_exists='append',
                     index=False)
    etl_data.execute(
        "delete from etl_log_sum where oper_date='{0}'".format(today))
    log_rs['oper_date'] = today
    log_rs[[