def search_empty_tables(table_list, not_validate_list) -> list: with UseSqlserverDB(TARGET_DB) as cursor: generate_empty_validation_sql = "SELECT NAME, 'SELECT TOP 2 * FROM ' + NAME + ' WITH(NOLOCK) ORDER BY 1 ASC;' AS SQL_TEXT FROM sysobjects WHERE xtype = 'U' AND uid = 1 AND ((name LIKE 'D[_]%' OR name LIKE 'B[_]%' OR name LIKE 'R[_]%' OR name LIKE 'F[_]%' OR name LIKE 'RPT[_]%') and name <> 'D_AUDIT_LOG') ORDER BY name" rs_table_list = query(cursor, generate_empty_validation_sql) not_empty_list = [] empty_table_counter = 0 for item in rs_table_list: table_name = item[0] sql_text = item[1] if not_validate_list: if table_name in not_validate_list: continue if table_list: if table_name not in table_list: continue else: if identify_backup_tables(table_name.lower()): not_validate_list.append(table_name) continue else: if identify_backup_tables(table_name.lower()): not_validate_list.append(table_name) continue rs_table_data = query(cursor, sql_text) if rs_table_data: if len(rs_table_data) == 1 and ( str(table_name).startswith("D_") or str(table_name).startswith("R_") ) and rs_table_data[0][0] == -1: msg = "\033[32m" + table_name + "\033[0m only has -1 key row, please check." add_msg('1 empty_table', table_name, '0', msg) empty_table_counter += 1 elif (str(table_name).startswith("B_") or str(table_name).startswith("F_") ) and rs_table_data[0][0] == -1: msg = "\033[32m" + table_name + "\033[0m HAS -1 key row, which should NOT. please check." add_msg('1 empty_table', table_name, '0', msg) else: not_empty_list.append(table_name) else: empty_table_counter += 1 msg = "\033[32m" + table_name + " \033[0mis empty, please check." add_msg('1 empty_table', table_name, '0', msg) search_empty_result = [ not_empty_list, empty_table_counter, not_validate_list ] return search_empty_result
def create_base(meta): global ddl_pd with UseSqlserverDB(TARGET_DB) as cursor: for index, row in meta.iterrows(): tb_name = str(row['table_name']) sql = str(row['sql']) create_sql = "CREATE VIEW V_CDC_TEMP_" + tb_name + " AS " + sql execute(cursor, create_sql) check_sql = "SELECT lower(a.referenced_entity_name) as ref_table ,lower(a.referenced_minor_name) as ref_column ,c.name as typename ,CONVERT(VARCHAR(50),b.precision) precision ,CONVERT(VARCHAR(50),b.scale) scale ,CONVERT(VARCHAR(50),b.max_length) max_length ,b.is_nullable nullable, '," + tb_name + "' AS impact_table FROM sys.dm_sql_referenced_entities ( 'DBO.V_CDC_TEMP_" + tb_name + "', 'OBJECT') a inner join sys.all_columns b on a.referenced_minor_name = b.name and a.referenced_id= b.object_id inner join sys.systypes c on b.system_type_id = c.xtype where a.referenced_minor_name is not null order by 1,2" signle = pd.DataFrame(query(cursor, check_sql)) signle.columns = [ 'ref_table', 'ref_column', 'typename', 'precision', 'scale', 'max_length', 'nullable', 'impact_table' ] ddl_pd = ddl_pd.append(signle) drop_sql = "DROP VIEW V_CDC_TEMP_" + tb_name execute(cursor, drop_sql) ddl_pd = ddl_pd.groupby([ 'ref_table', 'ref_column', 'typename', 'precision', 'scale', 'max_length', 'nullable' ]).agg(impact_list=pd.NamedAgg(column='impact_table', aggfunc='sum')) ddl_pd = ddl_pd.apply(remove_first_comma) ddl_pd.to_excel(BASE_FILE, sheet_name="DDL")
def query_meta_data(table_list, target_db): if table_list[0:1] == ',': table_list = table_list[1:] sql = "SELECT lower(a.name) as ref_table, lower(b.name) as ref_column ,c.name as typename ,CONVERT(VARCHAR(50),b.precision) precision ,CONVERT(VARCHAR(50),b.scale) scale ,CONVERT(VARCHAR(50),b.max_length) max_length ,b.is_nullable nullable FROM sys.all_objects a inner join sys.all_columns b on a.object_id= b.object_id inner join sys.systypes c on b.system_type_id = c.xtype WHERE a.name in (" + table_list + ") ORDER BY 1,2" with UseSqlserverDB(target_db) as cursor: return query(cursor, sql)
def check_translation(cursor, table_nm): list_sql = "SELECT table_name,column_name FROM information_schema.columns WHERE table_schema = 'dbo' AND table_name ='" + table_nm + "' AND DATA_TYPE = 'varchar' AND CHARACTER_MAXIMUM_LENGTH > 14 ORDER BY table_name, ordinal_position;" check_list = query(cursor, list_sql) for item in check_list: column_name_str = str(item[1]) check_sql = "SELECT TOP 1 " + column_name_str + " FROM " + table_nm + " WITH(NOLOCK) WHERE " + column_name_str + " LIKE '%<<translatable%'" if has_data(cursor, check_sql): msg = "\n\033[32m" + table_nm + "." + column_name_str + "\033[0m has un-translate string, please verify. SELECT TOP 100 " + column_name_str + " FROM " + table_nm + " WITH(NOLOCK) WHERE " + column_name_str + " LIKE '%<<translatable%'\n" add_msg('3 translation', table_nm, column_name_str, msg)
def search_empty_tables(cursor, table_list) -> list: generate_empty_validation_sql = "SELECT NAME, 'SELECT TOP 2 * FROM ' + NAME + ' WITH(NOLOCK) ORDER BY 1 ASC;' AS SQL_TEXT FROM sysobjects WHERE xtype = 'U' AND uid = 1 AND ((name LIKE 'D[_]%' OR name LIKE 'B[_]%' OR name LIKE 'R[_]%' OR name LIKE 'F[_]%' OR name LIKE 'RPT[_]%') and name <> 'D_AUDIT_LOG') ORDER BY name" rs_table_list = query(cursor, generate_empty_validation_sql) not_empty_list = [] empty_table_counter = 0 not_validate_list = [] for item in rs_table_list: table_name = item[0] sql_text = item[1] if identify_backup_tables(table_name.lower()): not_validate_list.append(table_name) continue if table_list: if table_name not in table_list: continue rs_table_data = query(cursor, sql_text) if rs_table_data: if len(rs_table_data) == 1 and (str(table_name).startswith("D_") or str(table_name).startswith("R_") ) and rs_table_data[0][0] == -1: print("\033[32m" + table_name + "\033[0m only has -1 key row, please check.") else: not_empty_list.append(table_name) else: empty_table_counter += 1 print("\033[32m" + table_name + " \033[0mis empty, please check.") search_empty_result = [ not_empty_list, empty_table_counter, not_validate_list ] return search_empty_result
def check_columns(cursor, table_nm, business_key_conf): generate_raw_list = "SELECT table_name,column_name,ordinal_position, data_type FROM information_schema.columns WHERE table_schema = 'dbo' AND table_name = '" + table_nm + "' ORDER BY ordinal_position" rs_list = query(cursor, generate_raw_list) has_mart_source_id = False has_awo_id = False has_cur_rec_ind = False has_current_record_ind = False for item in rs_list: column_name = str(item[1]) position = item[2] #data_type = str(item[3]) # Log # ''' print("checking: \033[32m" + table_nm + "\033[0m.\033[34m" + column_name+"\033[0m") if column_name == 'CUSTOMER_NB': print('test') pass ''' if position == 1: pk_column = column_name # checking if column values are all NULL except the -1 one null_check_sql = "SELECT TOP 1 " + column_name + " FROM " + table_nm + " WITH(NOLOCK) WHERE " + pk_column + " > 0 AND " + column_name + " IS NOT NULL" not_empty_ind = True not_empty_ind = has_data(cursor, null_check_sql) if not not_empty_ind: msg = "\033[32m" + table_nm + "." + column_name + "\033[0m is empty." add_msg('5 column_check', table_nm, column_name, msg) elif not_empty_ind: null_check_sql = "SELECT TOP 1 " + column_name + " FROM " + table_nm + " WITH(NOLOCK) WHERE " + pk_column + " > 0 AND convert(varchar," + column_name + ") <> ''" if not has_data(cursor, null_check_sql): msg = "\033[32m" + table_nm + "." + column_name + "\033[0m are all empty string." add_msg('5 column_check', table_nm, column_name, msg) elif column_name == "MART_SOURCE_ID": has_mart_source_id = True elif column_name == "AWO_ID": has_awo_id = True elif column_name == "CUR_REC_IND": has_cur_rec_ind = True elif column_name == "CURRENT_RECORD_IND": has_current_record_ind = True elif column_name.endswith( "_KEY" ) and column_name != "LABEL_KEY" and table_nm != "D_TRANSLATION": key_check_sql = "SELECT TOP 1 " + column_name + " FROM " + table_nm + " WITH(NOLOCK) WHERE " + column_name + " > -1" if not has_data(cursor, key_check_sql): msg = "\033[32m" + table_nm + "." + column_name + "\033[0m \033[33mis all -1, please verify.\033[0m" add_msg('5 column_check', table_nm, column_name, msg) # for KEYs, check if there is NULL value, which should NOT null_value_check_sql = "SELECT TOP 1 " + column_name + " FROM " + table_nm + " WITH(NOLOCK) WHERE " + column_name + " IS NULL" if has_data(cursor, null_value_check_sql): msg = "\033[32m" + table_nm + "." + column_name + "\033[0m has \033[22mNULL\033[0m value, please verify." add_msg('5 column_check', table_nm, column_name, msg) if column_name.endswith("_DATE_KEY") or column_name.endswith( "_TIME_KEY") or column_name.endswith( "ITEM_KEY") or column_name.endswith("ORDER_KEY"): check_sql = "SELECT TOP 1 " + column_name + " FROM " + table_nm + " WITH(NOLOCK) WHERE " + column_name + " = -1 AND " + pk_column + " > 0;" if has_data(cursor, check_sql): msg = "\033[32m" + table_nm + "." + column_name + "\033[0m has \033[22m-1\033[0m value, please verify." add_msg('5 column_check', table_nm, column_name, msg) check_duplicate(cursor, has_mart_source_id, has_awo_id, has_cur_rec_ind, has_current_record_ind, table_nm, business_key_conf)
def check_column(cursor, tb_list, business_key_conf): generate_raw_list = "SELECT table_name,column_name FROM information_schema.columns WHERE table_schema = 'dbo' AND table_name IN (" + tb_list + ")ORDER BY table_name, ordinal_position" rs_list = query(cursor, generate_raw_list) has_mart_source_id = False has_awo_id = False has_cur_rec_ind = False has_current_record_ind = False old_table_name = "" table_count = 0 row_count = 0 pk_column = "" for item in rs_list: table_name = item[0] column_name = item[1] # testing code # print("checking: \033[32m" + table_name + "\033[0m.\033[34m" + column_name + "\033[0m") if column_name == 'LATITUDE_VAL': print('test') pass if row_count == 0: pk_column = column_name if old_table_name != table_name: pk_column = column_name if table_count != 0: check_duplicate(cursor, has_mart_source_id, has_awo_id, has_cur_rec_ind, has_current_record_ind, old_table_name, business_key_conf) table_count += 1 has_mart_source_id = False has_awo_id = False has_cur_rec_ind = False has_current_record_ind = False old_table_name = table_name # checking if column values are all NULL except the -1 one null_check_sql = "SELECT TOP 1 " + column_name + " FROM " + table_name + " WITH(NOLOCK) WHERE " + pk_column + " > 0 AND " + column_name + " IS NOT NULL" not_empty_ind = True not_empty_ind = has_data(cursor, null_check_sql) if not not_empty_ind: print("\033[32m" + table_name + "." + column_name + "\033[0m is empty.") elif not_empty_ind: null_check_sql = "SELECT TOP 1 " + column_name + " FROM " + table_name + " WITH(NOLOCK) WHERE " + pk_column + " > 0 AND convert(varchar," + column_name + ") <> ''" if not has_data(cursor, null_check_sql): print("\033[32m" + table_name + "." + column_name + "\033[0m are all empty string.") elif str(column_name) == "MART_SOURCE_ID": has_mart_source_id = True elif str(column_name) == "AWO_ID": has_awo_id = True elif str(column_name) == "CUR_REC_IND": has_cur_rec_ind = True elif str(column_name) == "CURRENT_RECORD_IND": has_current_record_ind = True elif str(column_name).endswith("_KEY") and ( column_name != "LABEL_KEY" and table_name != "D_TRANSLATION"): key_check_sql = "SELECT TOP 1 " + column_name + " FROM " + table_name + " WITH(NOLOCK) WHERE " + column_name + " > -1" if not has_data(cursor, key_check_sql): print("\033[32m" + table_name + "." + column_name + "\033[0m \033[33mis all -1, please verify.\033[0m") # for KEYs, check if there is NULL value, which should NOT null_value_check_sql = "SELECT TOP 1 " + column_name + " FROM " + table_name + " WITH(NOLOCK) WHERE " + column_name + " IS NULL" if has_data(cursor, null_value_check_sql): print("\033[32m" + table_name + "." + column_name + "\033[0m has \033[22mNULL\033[0m value, please verify.") if str(column_name).endswith("_DATE_KEY") or str( column_name).endswith("_TIME_KEY") or str( column_name).endswith("ITEM_KEY") or str( column_name).endswith("ORDER_KEY"): check_sql = "SELECT TOP 1 " + column_name + " FROM " + table_name + " WITH(NOLOCK) WHERE " + column_name + " = -1 AND " + pk_column + " > 0;" if has_data(cursor, check_sql): print( "\033[32m" + table_name + "." + column_name + "\033[0m has \033[22m-1\033[0m value, please verify.") row_count += 1 check_duplicate(cursor, has_mart_source_id, has_awo_id, has_cur_rec_ind, has_current_record_ind, old_table_name, business_key_conf) return table_count