def crTable_Constraints(cursor): if crossrep.verbose == True: print('Start creating constraint table ' + crossrep.tb_constr + ' ...') tbname = crossrep.tb_constr created = False query = ("select distinct database_name from " + crossrep.tb_db + " order by database_name") if crossrep.verbose == True: print(query) cursor.execute(query) rec = cursor.fetchall() for r in rec: dbname = crossrep.quoteID(r[0]) if crossrep.verbose == True: print('dbname:' + dbname) if created == False: # SNOW-84847 cursor.execute( "create or replace table " + tbname + " as select distinct TABLE_CATALOG, TABLE_SCHEMA, TABLE_NAME, CONSTRAINT_TYPE from " + crossrep.acctpref_qualifier + dbname + ".INFORMATION_SCHEMA.TABLE_CONSTRAINTS") created = True else: cursor.execute( "insert into " + tbname + " select distinct TABLE_CATALOG, TABLE_SCHEMA, TABLE_NAME, CONSTRAINT_TYPE from " + crossrep.acctpref_qualifier + dbname + ".INFORMATION_SCHEMA.TABLE_CONSTRAINTS") cursor.execute("commit") if crossrep.verbose == True: print('Finish creating constraint table ...')
def genDatabaseDDL(dbname, of, cursor): if crossrep.verbose == True: print('Start generating database object DDLs for ' + dbname + '...') dbname = crossrep.quoteID(dbname) dquery = "select get_ddl('database', '" + crossrep.acctpref_qualifier + dbname + "')" try: cursor.execute(dquery) #cursor.execute("select get_ddl('database', '" + crossrep.acctpref_qualifier+dbname + "')") row = cursor.fetchall() for record in row: ddl = record[0] #ddl=re.sub(r'\"([^:]+):.+',r'\1',ddl) #print('before ===> ' + ddl) #ddl=re.sub(r'create\s+(or\s+replace\s+)?(database|schema|table|view|materialized view|file format|function|sequence|procedure|materliazed view|external table|stage|pipe|stream|task)\s+',r'create \2 if not exists ',ddl,flags=re.MULTILINE|re.IGNORECASE) ddl = re.sub( r'create\s+(or\s+replace\s+)?(table|view|materialized view|file format|function|sequence|procedure|external table|stage|pipe|stream|task)\s+', r'create \2 if not exists ', ddl, flags=re.MULTILINE | re.IGNORECASE) ddl = re.sub( r'create\s+(or\s+replace\s+)?(database|schema)\s+(\S+)(\s+)?;', r'create \2 if not exists \3;\n use \2 \3;', ddl, flags=re.MULTILINE | re.IGNORECASE) # create VIEW if not exists S2.VIEW1 COPY GRANTS AS SELECT * FROM S1.TAB1; # remove COPY GRANTS on create view - not needed for newly created objects/grants ddl = re.sub( r'create\s+(view if not exists)\s+(\S)+\s+(COPY GRANTS)\s+', r'create view if not exists \2 ', ddl, flags=re.MULTILINE | re.IGNORECASE) #print('after ===> ' + ddl) # FIELD_OPTIONALLY_ENCLOSED_BY = ''' ==> FIELD_OPTIONALLY_ENCLOSED_BY = '''' #ddl=re.sub(r'FIELD_OPTIONALLY_ENCLOSED_BY = \'\'\'',r'FIELD_OPTIONALLY_ENCLOSED_BY = "\'"',ddl,flags=re.MULTILINE|re.IGNORECASE) ddl = re.sub(r"FIELD_OPTIONALLY_ENCLOSED_BY = '''", r"FIELD_OPTIONALLY_ENCLOSED_BY = ''''", ddl, flags=re.MULTILINE | re.IGNORECASE) #of = open(file_pref + dbname+".sql","w") of.write(ddl) #of.close() except Exception as err: print('An error occured in generating DDLs, skipped database "' + crossrep.acctpref_qualifier + dbname + '" :' + str(err)) print('Query:\t' + dquery) if crossrep.verbose == True: print('Finish creating database DDL script file ...')
def ELTByDatabase(ufile_pref, lfile_pref, stgname, ffname, dbname, cursor): if crossrep.verbose == True: print('Start generating unloading data copy statement ...') print(ufile_pref + '; ' + lfile_pref + '; ' + stgname + '; ' + ffname + '; ' + dbname) dbname = crossrep.quoteID(dbname) query = ( " select distinct TABLE_CATALOG, TABLE_SCHEMA, TABLE_NAME,FLOOR(BYTES/1000000000,1) as GB from " + crossrep.acctpref_qualifier + dbname + ".information_schema.tables where table_owner is not null and row_count > 0 and row_count is not null and TABLE_TYPE = 'BASE TABLE' order by TABLE_CATALOG, TABLE_SCHEMA, TABLE_NAME " ) if crossrep.verbose == True: print(query) # split(object_name,'.')[0] dbname, split(object_name,'.')[1] scname cursor.execute(query) rec = cursor.fetchall() ### spliting the output into 4 different files based on table data size: ### SMALL : < 2GB ### LARGE : [2GB, 15GB) ### XLARGE : [15GB, 30GB) ### XXLARGE : > 30GB count_small = 0 cns = 1 count_large = 0 cnl = 1 count_xlarge = 0 cnxl = 1 count_2xlarge = 0 cn2xl = 1 for r in rec: dbname = crossrep.quoteID(r[0]) scname = crossrep.quoteID(r[1]) tbname = crossrep.quoteID(r[2]) tbsize_gb = r[3] stage_folder = '@' + crossrep.default_db + '.' + crossrep.default_sc + '.' + stgname + '/' + dbname + '/' + scname + '/' + tbname + '/' # 18 (small wh) or 8 or 4 (2xl) jobs in each batch unloading/loading file if tbsize_gb < 2: count_small += 1 if count_small >= 17: count_small = 1 cns += 1 ufile = ufile_pref + dbname + "_small_" + str(cns) + ".sql" lfile = lfile_pref + dbname + "_small_" + str(cns) + ".sql" elif tbsize_gb >= 2 and tbsize_gb < 15: count_large += 1 if count_large >= 9: count_large = 1 cnl += 1 ufile = ufile_pref + dbname + "_large_" + str(cnl) + ".sql" lfile = lfile_pref + dbname + "_large_" + str(cnl) + ".sql" elif tbsize_gb >= 15 and tbsize_gb < 30: count_xlarge += 1 if count_xlarge >= 5: count_xlarge = 1 cnxl += 1 ufile = ufile_pref + dbname + "_xlarge_" + str(cnxl) + ".sql" lfile = lfile_pref + dbname + "_xlarge_" + str(cnxl) + ".sql" else: # tbsize_gb >= 30: count_2xlarge += 1 if count_2xlarge >= 5: count_2xlarge = 1 cn2xl += 1 ufile = ufile_pref + dbname + "_2xlarge_" + str(cn2xl) + ".sql" lfile = lfile_pref + dbname + "_2xlarge_" + str(cn2xl) + ".sql" uf = open(ufile, "a+") lf = open(lfile, "a+") if crossrep.verbose == True: print("unloading file:" + ufile) print("loading file:" + lfile) unload_query = ('copy into ' + stage_folder + ' from ' + dbname + '.' + scname + '.' + tbname + ' FILE_FORMAT = (FORMAT_NAME=\'' + crossrep.default_db + '.' + crossrep.default_sc + '.' + ffname + '\') MAX_FILE_SIZE=160000000 HEADER=TRUE;\n') uf.write(unload_query) uf.close() load_query = ('copy into ' + dbname + '.' + scname + '.' + tbname + ' from ' + stage_folder + ' FILE_FORMAT = (FORMAT_NAME=\'' + crossrep.default_db + '.' + crossrep.default_sc + '.' + ffname + '\') ;\n') lf.write(load_query) lf.close() if crossrep.verbose == True: print('Finish generating unloading data copy statement ... ')
def repExTable(drop_file, ddl_file, cursor): if crossrep.verbose == True: print('Start reporting external tables ...') #query = ("select dbname, name from "+ tbschema + " order by dbname, name" ) if crossrep.mode == 'CUSTOMER': query = ( " select CATALOG_NAME, SCHEMA_NAME from snowflake.account_usage.SCHEMATA where SCHEMA_OWNER is not null and DELETED is null order by CATALOG_NAME, SCHEMA_NAME " ) elif crossrep.mode == 'SNOWFLAKE': query = ( " select CATALOG_NAME, SCHEMA_NAME from " + crossrep.tb_sc + " where SCHEMA_OWNER is not null order by CATALOG_NAME, SCHEMA_NAME " ) if crossrep.verbose == True: print(query) print('account prefix:' + crossrep.acctpref_qualifier) cursor.execute(query) rec = cursor.fetchall() for r in rec: dbname = crossrep.quoteID(r[0]) scname = crossrep.quoteID(r[1]) if crossrep.verbose == True: print("dbname: " + dbname + "; scname: " + scname) try: cursor.execute("begin") cursor.execute("show external tables in schema " + crossrep.acctpref_qualifier + dbname + "." + scname) #cursor.execute("show external tables in account " ) #cursor.execute('create or replace temp table temp_exttbs as select "database_name" dbname, "schema_name" scname, "name" name,"owner" owner,"invalid" invalid from table(result_scan(last_query_id())) ') cursor.execute( 'create or replace temp table temp_exttbs as select "name" name,"owner" owner,"invalid" invalid from table(result_scan(last_query_id())) ' ) cursor.execute("commit") ext_query = "select name from temp_exttbs " cursor.execute(ext_query) record = cursor.fetchall() for row in record: #dbname = row[0] #scname = row[1] tname = row[0] if crossrep.verbose == True: print("tbname:" + tname) drop_file.write('drop external table if exists "' + dbname + '"."' + scname + '"."' + tname + '" ;\n') #rep_file.write('"'+ dbname + '"."'+ scname+'"."'+tname + '"\n') val_query = ( "select name, owner from temp_exttbs where invalid = 'false'") cursor.execute(val_query) record = cursor.fetchall() for row in record: tname = row[0] owner = row[1] if crossrep.verbose == True: print("dbname: " + dbname + "; scname: " + scname + "; tbname: " + tname) ddl_file.write('--"' + dbname + '"."' + scname + '"."' + tname + '"\n') dquery = "select get_ddl('table','" + dbname + "." + scname + "." + tname + "')" if crossrep.verbose == True: print(dquery) cursor.execute("select get_ddl('table','" + dbname + "." + scname + "." + tname + "')") rec = cursor.fetchall() for r in rec: tbddl = r[0] ddl_file.write(tbddl + '\n\n') #ddl_file.write('grant ownership on table ' + '"'+ dbname + '"."'+ scname+'"."'+tname + '" to role ' + owner+ ';\n\n') cursor.execute('drop table if exists temp_exttbs ') cursor.execute("commit") except Exception as err: print('An error occurred in reporting external tables:' + str(err)) print("Skipped external tables in schema " + crossrep.acctpref_qualifier + dbname + "." + scname) if crossrep.verbose == True: print('Finish reporting external tables ... ')
def crSchema(cursor): tbname = crossrep.tb_sc if crossrep.verbose == True: print('Start creating or updating schema table ...') isCreated = False # check whether table exists checkquery = ( "select count(*) from information_schema.tables where table_catalog = '" + crossrep.default_db + "' and table_schema = '" + crossrep.default_sc + "' and table_name = '" + tbname + "' and table_owner is not null") cursor.execute(checkquery) rec = cursor.fetchall() for r in rec: if r[0] == 0: # create table if not existing if crossrep.verbose == True: print('create new schema table') else: if crossrep.verbose == True: print('update schema table') isCreated = True tb_temp = "TEMP_" + tbname cursor.execute("select distinct DATABASE_NAME from " + crossrep.tb_db + " order by DATABASE_NAME") rec = cursor.fetchall() for r in rec: dbname = crossrep.quoteID(r[0]) #squery = "SHOW SCHEMAS IN DATABASE " + crossrep.source_acct + ".\""+dbname+"\"" squery = "SHOW SCHEMAS IN DATABASE " + crossrep.acctpref_qualifier + dbname cursor.execute("begin") cquery = ( "create or replace temp table " + tb_temp + " as select '" + dbname + "'::string CATALOG_NAME, \"name\" SCHEMA_NAME, \"owner\" SCHEMA_OWNER from table(result_scan(last_query_id())) " + " where SCHEMA_OWNER != '' and SCHEMA_NAME != '' ") if crossrep.verbose == True: print(squery) print(cquery) cursor.execute(squery) cursor.execute(cquery) cursor.execute("commit") if isCreated == False: if crossrep.verbose == True: print('create schema table') cursor.execute( "create or replace table " + tbname + " as select CATALOG_NAME, SCHEMA_NAME, SCHEMA_OWNER from " + tb_temp) isCreated = True else: if crossrep.verbose == True: print('update schema table') # insert/update the delta of new users information mquery = ( "merge into " + tbname + " tgt using (" + " select CATALOG_NAME, SCHEMA_NAME, SCHEMA_OWNER from " + tb_temp + " minus " + " select CATALOG_NAME, SCHEMA_NAME, SCHEMA_OWNER from " + tbname + " ) as src on tgt.SCHEMA_NAME = src.SCHEMA_NAME AND tgt.CATALOG_NAME = src.CATALOG_NAME " + " when not matched then insert ( CATALOG_NAME, SCHEMA_NAME, SCHEMA_OWNER)" + " values ( src.CATALOG_NAME, src.SCHEMA_NAME, src.SCHEMA_OWNER )" ) if crossrep.verbose == True: print(mquery) cursor.execute(mquery) # delete the one dropped dquery = ( "delete from " + tbname + " tgt using (" + " select CATALOG_NAME , SCHEMA_NAME from " + tbname + " where CATALOG_NAME = '" + dbname + "'" " minus " + " select CATALOG_NAME, SCHEMA_NAME from " + tb_temp + " ) as src where tgt.SCHEMA_NAME = src.SCHEMA_NAME AND tgt.CATALOG_NAME = src.CATALOG_NAME" ) if crossrep.verbose == True: print(dquery) cursor.execute(dquery) cursor.execute("commit") cursor.execute("drop table if exists " + tb_temp) cursor.execute("commit") if crossrep.verbose == True: print('Finish creating schema table ...')
def repMVs(drop_file, ddl_file, cursor): if crossrep.verbose == True: print('Start reporting cross-db referenced materized views ... ') #query = ("select dbname, name from "+ tbschema + " order by dbname, name" ) if crossrep.mode == 'CUSTOMER': query = ( " select CATALOG_NAME, SCHEMA_NAME from snowflake.account_usage.SCHEMATA where SCHEMA_OWNER is not null and DELETED is null order by CATALOG_NAME, SCHEMA_NAME " ) elif crossrep.mode == 'SNOWFLAKE': query = ( " select CATALOG_NAME, SCHEMA_NAME from " + crossrep.tb_sc + " where SCHEMA_OWNER is not null order by CATALOG_NAME, SCHEMA_NAME " ) if crossrep.verbose == True: print(query) cursor.execute(query) rec = cursor.fetchall() for r in rec: try: dbname = crossrep.quoteID(r[0]) scname = crossrep.quoteID(r[1]) if crossrep.verbose == True: print("dbname:" + dbname + "; scname:" + scname) cursor.execute("begin") cursor.execute("show materialized views in schema " + crossrep.acctpref_qualifier + dbname + "." + scname) #cursor.execute("show materialized views in account " ) cursor.execute( 'create or replace temp table temp_mvs as select "database_name" dbname, "schema_name" scname, "name" name,"source_database_name" sdbname,"owner" owner,"invalid" invalid, "text" ddl from table(result_scan(last_query_id())) ' ) cursor.execute("commit") inv_query = "select name from temp_mvs where invalid = 'true' " cursor.execute(inv_query) record = cursor.fetchall() #rep_file.write('--invalid MV list ... ') for row in record: vname = crossrep.quoteID(row[0]) drop_query = "drop materialized view '" + dbname + "'.'" + scname + "'.'" + vname + "' ;\n " drop_file.write(drop_query) if crossrep.verbose == True: print(drop_query) #rep_file.write('"'+ dbname + '"."'+ scname+'"."'+vname + '"\n') crossdb_query = ( "select name, ddl, owner from temp_mvs where invalid = 'false' and sdbname != dbname" ) cursor.execute(crossdb_query) record = cursor.fetchall() #rep_file.write('--cross-db MV list ... ') for row in record: vname = crossrep.quoteID(row[0]) ddl = row[1] owner = row[2] #rep_file.write('"'+ dbname + '"."'+ scname+'"."'+vname + '"\n') drop_file.write('drop materialized view "' + dbname + '"."' + scname + '"."' + vname + '" ; \n') ddl_file.write('--"' + dbname + '"."' + scname + '"."' + vname + '"\n') ddl_file.write(ddl + '\n\n') #ddl_file.write('grant ownership on view ' + '"'+ dbname + '"."'+ scname+'"."'+vname + '" to role ' + owner+ ';\n\n') cursor.execute('drop table if exists temp_mvs ') cursor.execute("commit") except Exception as err: print('An error occurred in reporting cross-db referenced MVs :' + str(err)) print("Skipped MVs in schema " + crossrep.acctpref_qualifier + dbname + "." + scname) if crossrep.verbose == True: print('Finish reporting cross-db referenced materized views ... ')