def merge_pre_hi_data_task(hive_db, hive_all_hi_table_name, hive_hi_table_name, is_must_have_data, pt, now_hour, pre_hour_day, pre_hour, **kwargs): sqoopSchema = SqoopSchemaUpdate() hive_columns = sqoopSchema.get_hive_column_name(hive_db, hive_all_hi_table_name) hql = ADD_HI_SQL.format(db_name=hive_db, hive_all_hi_table_name=hive_all_hi_table_name, hive_hi_table_name=hive_hi_table_name, pt=pt, now_hour=now_hour, pre_hour_day=pre_hour_day, pre_hour=pre_hour, columns=',\n'.join(hive_columns)) hive_hook = HiveCliHook() # 读取sql logging.info('Executing: %s', hql) # 执行Hive hive_hook.run_cli(hql) # 生成_SUCCESS """ 第一个参数true: 数据目录是有country_code分区。false 没有 第二个参数true: 数据有才生成_SUCCESS false 数据没有也生成_SUCCESS """ TaskTouchzSuccess().countries_touchz_success( pt, hive_db, hive_all_hi_table_name, ALL_HI_OSS_PATH % hive_all_hi_table_name, "false", is_must_have_data, now_hour)
def merge_pre_hi_with_full_data_task(hive_db, hive_h_his_table_name, hive_hi_table_name, mysql_db_name, mysql_table_name, mysql_conn, sqoop_temp_db_name, sqoop_table_name, pt, now_hour, pre_day, pre_hour_day, pre_hour, is_must_have_data, **kwargs): sqoopSchema = SqoopSchemaUpdate() hive_columns = sqoopSchema.get_hive_column_name(hive_db, hive_h_his_table_name) mysql_columns = sqoopSchema.get_mysql_column_name(mysql_db_name, mysql_table_name, mysql_conn) pre_day_ms = int(time.mktime(time.strptime(pre_day, "%Y-%m-%d"))) * 1000 hql = MERGE_HI_WITH_FULL_SQL.format( columns=',\n'.join(hive_columns), pt=pt, now_hour=now_hour, db_name=hive_db, mysql_db_name=mysql_db_name, hive_h_his_table_name=hive_h_his_table_name, hive_hi_table_name=hive_hi_table_name, mysql_table_name=mysql_table_name, pre_day_ms=pre_day_ms, mysql_columns=',\n'.join(mysql_columns), sqoop_temp_db_name=sqoop_temp_db_name, sqoop_table_name=sqoop_table_name) hive_hook = HiveCliHook() # 读取sql logging.info('Executing: %s', hql) # 执行Hive hive_hook.run_cli(hql) # 生成_SUCCESS """ 第一个参数true: 数据目录是有country_code分区。false 没有 第二个参数true: 数据有才生成_SUCCESS false 数据没有也生成_SUCCESS """ TaskTouchzSuccess().countries_touchz_success( pt, hive_db, hive_h_his_table_name, H_HIS_OSS_PATH % hive_h_his_table_name, "false", is_must_have_data, now_hour)
def create_hive_external_table(db, table, conn, **op_kwargs): sqoopSchema = SqoopSchemaUpdate() response = sqoopSchema.update_hive_schema( hive_db=hive_db, hive_table=hive_table.format(bs=table), mysql_db=db, mysql_table=table, mysql_conn=conn ) #if response: # return True mysql_conn = get_db_conn(conn) mcursor = mysql_conn.cursor() sql = ''' select COLUMN_NAME, DATA_TYPE, COLUMN_COMMENT, COLUMN_TYPE from information_schema.COLUMNS where TABLE_SCHEMA='{db}' and TABLE_NAME='{table}' order by ORDINAL_POSITION '''.format(db=db, table=table) # logging.info(sql) mcursor.execute(sql) res = mcursor.fetchall() # logging.info(res) columns = [] for (name, type, comment, co_type) in res: if type.upper() == 'DECIMAL': columns.append("`%s` %s comment '%s'" % (name, co_type.replace('unsigned', '').replace('signed', ''), comment)) else: columns.append("`%s` %s comment '%s'" % (name, mysql_type_to_hive.get(type.upper(), 'string'), comment)) mysql_conn.close() # 创建hive数据表的sql hql = ods_create_table_hql.format( db_name=hive_db, table_name=hive_table.format(bs=table), columns=",\n".join(columns), hdfs_path=hdfs_path.format(bs=table) ) logging.info(hql) hive_hook = HiveCliHook() logging.info('Executing: %s', hql) hive_hook.run_cli(hql)
def run_check_table(schema_table_db_name, schema_table_name, target_table_db_name, target_table_name, conn_id, hive_table_name, server_name, **kwargs): # SHOW TABLES in oride_db LIKE 'data_aa' check_sql = 'SHOW TABLES in %s LIKE \'%s\'' % (HIVE_DB, hive_table_name) hive2_conn = HiveServer2Hook().get_conn() cursor = hive2_conn.cursor() cursor.execute(check_sql) if len(cursor.fetchall()) == 0: logging.info('Create Hive Table: %s.%s', HIVE_DB, hive_table_name) # get table column column_sql = ''' SELECT COLUMN_NAME, DATA_TYPE, NUMERIC_PRECISION, NUMERIC_SCALE, COLUMN_COMMENT FROM information_schema.columns WHERE table_schema='{db_name}' and table_name='{table_name}' '''.format(db_name=schema_table_db_name, table_name=schema_table_name) mysql_hook = MySqlHook(conn_id) mysql_conn = mysql_hook.get_conn() mysql_cursor = mysql_conn.cursor() mysql_cursor.execute(column_sql) results = mysql_cursor.fetchall() rows = [] for result in results: if result[0] == 'dt': col_name = '_dt' else: col_name = result[0] if result[1] == 'timestamp' or result[1] == 'varchar' or result[1] == 'char' or result[1] == 'text' or \ result[1] == 'longtext' or \ result[1] == 'mediumtext' or \ result[1] == 'json' or \ result[1] == 'datetime': data_type = 'string' elif result[1] == 'decimal': data_type = result[1] + "(" + str(result[2]) + "," + str( result[3]) + ")" else: data_type = result[1] rows.append("`%s` %s comment '%s'" % (col_name, data_type, str(result[4]).replace( '\n', '').replace('\r', ''))) mysql_conn.close() # hive create table hive_hook = HiveCliHook() sql = ODS_CREATE_TABLE_SQL.format( db_name=HIVE_DB, table_name=hive_table_name, columns=",\n".join(rows), oss_path=OSS_PATH % ("{server_name}.{db_name}.{table_name}".format( server_name=server_name, db_name=target_table_db_name, table_name=target_table_name))) logging.info('Executing: %s', sql) hive_hook.run_cli(sql) else: sqoopSchema = SqoopSchemaUpdate() response = sqoopSchema.append_hive_schema( hive_db=HIVE_DB, hive_table=hive_table_name, mysql_db=schema_table_db_name, mysql_table=schema_table_name, mysql_conn=conn_id, oss_path=OSS_PATH % ("{server_name}.{db_name}.{table_name}".format( server_name=server_name, db_name=target_table_db_name, table_name=target_table_name))) if response: return True return