def create_base(meta): global ddl_pd with UseSqlserverDB(TARGET_DB) as cursor: for index, row in meta.iterrows(): tb_name = str(row['table_name']) sql = str(row['sql']) create_sql = "CREATE VIEW V_CDC_TEMP_" + tb_name + " AS " + sql execute(cursor, create_sql) check_sql = "SELECT lower(a.referenced_entity_name) as ref_table ,lower(a.referenced_minor_name) as ref_column ,c.name as typename ,CONVERT(VARCHAR(50),b.precision) precision ,CONVERT(VARCHAR(50),b.scale) scale ,CONVERT(VARCHAR(50),b.max_length) max_length ,b.is_nullable nullable, '," + tb_name + "' AS impact_table FROM sys.dm_sql_referenced_entities ( 'DBO.V_CDC_TEMP_" + tb_name + "', 'OBJECT') a inner join sys.all_columns b on a.referenced_minor_name = b.name and a.referenced_id= b.object_id inner join sys.systypes c on b.system_type_id = c.xtype where a.referenced_minor_name is not null order by 1,2" signle = pd.DataFrame(query(cursor, check_sql)) signle.columns = [ 'ref_table', 'ref_column', 'typename', 'precision', 'scale', 'max_length', 'nullable', 'impact_table' ] ddl_pd = ddl_pd.append(signle) drop_sql = "DROP VIEW V_CDC_TEMP_" + tb_name execute(cursor, drop_sql) ddl_pd = ddl_pd.groupby([ 'ref_table', 'ref_column', 'typename', 'precision', 'scale', 'max_length', 'nullable' ]).agg(impact_list=pd.NamedAgg(column='impact_table', aggfunc='sum')) ddl_pd = ddl_pd.apply(remove_first_comma) ddl_pd.to_excel(BASE_FILE, sheet_name="DDL")
def query_meta_data(table_list, target_db): if table_list[0:1] == ',': table_list = table_list[1:] sql = "SELECT lower(a.name) as ref_table, lower(b.name) as ref_column ,c.name as typename ,CONVERT(VARCHAR(50),b.precision) precision ,CONVERT(VARCHAR(50),b.scale) scale ,CONVERT(VARCHAR(50),b.max_length) max_length ,b.is_nullable nullable FROM sys.all_objects a inner join sys.all_columns b on a.object_id= b.object_id inner join sys.systypes c on b.system_type_id = c.xtype WHERE a.name in (" + table_list + ") ORDER BY 1,2" with UseSqlserverDB(target_db) as cursor: return query(cursor, sql)
def search_empty_tables(table_list, not_validate_list) -> list: with UseSqlserverDB(TARGET_DB) as cursor: generate_empty_validation_sql = "SELECT NAME, 'SELECT TOP 2 * FROM ' + NAME + ' WITH(NOLOCK) ORDER BY 1 ASC;' AS SQL_TEXT FROM sysobjects WHERE xtype = 'U' AND uid = 1 AND ((name LIKE 'D[_]%' OR name LIKE 'B[_]%' OR name LIKE 'R[_]%' OR name LIKE 'F[_]%' OR name LIKE 'RPT[_]%') and name <> 'D_AUDIT_LOG') ORDER BY name" rs_table_list = query(cursor, generate_empty_validation_sql) not_empty_list = [] empty_table_counter = 0 for item in rs_table_list: table_name = item[0] sql_text = item[1] if not_validate_list: if table_name in not_validate_list: continue if table_list: if table_name not in table_list: continue else: if identify_backup_tables(table_name.lower()): not_validate_list.append(table_name) continue else: if identify_backup_tables(table_name.lower()): not_validate_list.append(table_name) continue rs_table_data = query(cursor, sql_text) if rs_table_data: if len(rs_table_data) == 1 and ( str(table_name).startswith("D_") or str(table_name).startswith("R_") ) and rs_table_data[0][0] == -1: msg = "\033[32m" + table_name + "\033[0m only has -1 key row, please check." add_msg('1 empty_table', table_name, '0', msg) empty_table_counter += 1 elif (str(table_name).startswith("B_") or str(table_name).startswith("F_") ) and rs_table_data[0][0] == -1: msg = "\033[32m" + table_name + "\033[0m HAS -1 key row, which should NOT. please check." add_msg('1 empty_table', table_name, '0', msg) else: not_empty_list.append(table_name) else: empty_table_counter += 1 msg = "\033[32m" + table_name + " \033[0mis empty, please check." add_msg('1 empty_table', table_name, '0', msg) search_empty_result = [ not_empty_list, empty_table_counter, not_validate_list ] return search_empty_result
def check_data(table_nm, business_key_conf): """ 1. If columns is key, validate if it's all -1. 2. Validate if column is null. 3. Validate if there are duplicates on awo_id/mart_source_id or business keys. """ with UseSqlserverDB(TARGET_DB) as cursor: check_default_row(cursor, table_nm) #check_translation(cursor, table_nm) check_columns(cursor, table_nm, business_key_conf)
def update_db(sql: str, acct: dict): try: with UseSqlserverDB(acct) as cursor: cursor.execute(sql) except DBConnectionError as err: print('Is your database swithed on? Error:', str(err)) except CredentialsError as err: print('User-id/Password issues. Error:', str(err)) except SQLError as err: print('Is your query correct? Error:', str(err)) except Exception as err: print('Something went wrong:', str(err)) return "Error"
def getMartTime(mart_server, mart_list, model_type, matrix): with UseSqlserverDB(mart_server) as cursor: for schema in mart_list: if model_type == 'HF': query = "SELECT MAX(SRC_SNPSHT_DT) FROM " + schema + "_HF_MART.DBO.F_ORDER_ITEM_TRANSACTION WITH(NOLOCK)" elif model_type == 'Camping': query = "SELECT MAX(ORDER_DTM) FROM " + schema + "_CAMPING_MART.DBO.D_ORDER WITH(NOLOCK)" result = inquery_single_row(query, cursor) for item in matrix: if item[0] == schema: item[2] = result item[4] = query return matrix
### from openpyxl import Workbook from openpyxl import load_workbook from db_connect.sqlserver_db import UseSqlserverDB import conf.acct as acct import tool.TSQL as TSQL_function import tool.tool as tool CURRENT_DB = acct.UAT_UT_CAMPING_MART SEED_FILE = r'.\seed\DataDictionary_Template.xlsx' excelName = tool.file_name('DataDictionary', 'xlsx') workbook = load_workbook(SEED_FILE) with UseSqlserverDB(CURRENT_DB) as cursor: sheet = workbook.get_sheet_by_name('DataDictionary') rows = sheet.rows columns = sheet.columns for row in range(2, sheet.max_row + 1): tableName = str(sheet.cell(row=row, column=1).value) columnName = str(sheet.cell(row=row, column=4).value) #print (tableName + ":" + columnName) query = "SELECT TOP 1 [" + columnName + "] FROM " + tableName + " WITH(NOLOCK) WHERE [" + columnName + "] IS NOT NULL" result = str(TSQL_function.inquery_single_row(query, cursor)) sheet.cell(row=row, column=12).value = result workbook.save(excelName)
tb_list = "'" for table in table_list: tb_list += str(table) + "','" tb_list = tb_list[:len(tb_list) - 2] check_minus_one(cursor, table_list) #check_translation(cursor, tb_list) table_counter = check_column(cursor, tb_list, business_key_conf) return table_counter if __name__ == '__main__': with UseSqlserverDB(TARGET_DB) as cursor: tables_result = search_empty_tables(cursor, table_list) if tables_result[2]: print( "These following table(s) will not be been validated this time:\n" ) for table_nm in tables_result[2]: print(table_nm) if len(tables_result[0]) > 0: table_counter = check_data(cursor, tables_result[0], business_key_conf) print("\n\nThere are " + str(tables_result[1]) + " empty table(s).") if table_counter: print(str(table_counter) + " non-empty table(s) verified.")
ALTER TABLE DBO.table_nm ALTER COLUMN column_nm new_type(new_len) null PRINT '[INFO] ALTER COLUMN [DBO].[table_nm].[column_nm] TO new_type(new_len)' END '''.replace('table_nm',table_nm).replace('column_nm',column_nm).replace('type_nm',type_nm).replace('[length]',length).replace('new_type',new_type).replace('new_len',new_len) return sql if __name__ == '__main__': for server in SERVER_LIST: server_nm = server[0] name = server[1] with UseSqlserverDB(server_nm) as cursor: db_list = search_db(cursor) gen_sql = '' rollback_sql = '' count = 1 new_sheet = workbook.copy_worksheet(tempate_sheet) new_sheet.title = name for db in db_list: address1_list = search_column(cursor, db_name = db[0], column_name = "a.name LIKE 'STREET_ADR_LN_1' OR a.name LIKE 'ADDRESS_LINE_1%' ", column_type = 'varchar',column_len = '512', table_name = 'D_ADDRESS') city_list = search_column(cursor, db_name = db[0], column_name = "a.name = 'CITY_NM' OR a.name = 'CITY_NAME' ", column_type = 'varchar',column_len = '255', table_name = 'D_ADDRESS') if len(address1_list) > 0 or len(city_list) > 0: gen_sql += 'USE ' + db[0] + '\nGO \n'