def getLastUpdateDates(db_conf_file, sc_schema, resources): print('Getting last update dates...') conn = getDatabaseConnection(db_conf_file) cur = conn.cursor() last_updates_dict = dict() for resource in resources: target_table_name = 'stg_' + COUNTRY + '_' + resource[4:] # Target table name has the country in the middle of the source table name (for example, stg_d_base_contacts -> stg_pt_d_base_contacts) scai_process_name = scai.getProcessShortDescription(db_conf_file, target_table_name) cur.execute( "SELECT isnull(rel_integr_proc.last_processing_datetime,'1900-01-01 00:00:00.000000') "\ "FROM crm_integration_anlt.t_lkp_scai_process proc, crm_integration_anlt.t_rel_scai_integration_process rel_integr_proc "\ "WHERE rel_integr_proc.cod_process = proc.cod_process "\ "AND rel_integr_proc.cod_country = %(COD_COUNTRY)d "\ "AND rel_integr_proc.cod_integration = %(COD_INTEGRATION)d "\ "AND rel_integr_proc.ind_active = 1 "\ "AND proc.dsc_process_short = '%(scai_process_name)s' "\ % { 'COD_COUNTRY':COD_COUNTRY, 'COD_INTEGRATION':COD_INTEGRATION, 'scai_process_name':scai_process_name } ) last_updates_dict[resource] = cur.fetchone()[0].isoformat() print('\t' + target_table_name + ': ' + last_updates_dict[resource]) cur.close() conn.close() return last_updates_dict
def copyFromDatabaseToS3(source_conf, target_conf, resources, schema, last_updates_dict, aux_path, scai_last_execution_status=1): print('Connecting to Chandra...') conn = getDatabaseConnection(source_conf) cur = conn.cursor() credentials = getS3Keys(source_conf) sc_conf = json.load(open(source_conf)) #UNLOAD resources data print('Unloading from Chandra...') for resource in resources: print('\t' + resource + ": " + last_updates_dict[resource]) tg_table = 'stg_' + COUNTRY + '_' + resource[4:] # Target table name has the country in the middle of the source table name (for example, stg_d_base_contacts -> stg_pt_d_base_contacts) scai_process_name = scai.getProcessShortDescription(target_conf, tg_table) # SCAI if(scai_last_execution_status==3): scai_process_status = scai.processCheck(target_conf, scai_process_name, COD_INTEGRATION, COD_COUNTRY,scai_last_execution_status) # SCAI # Is normal execution or re-execution starting from the step that was in error if (scai_last_execution_status == 1 or (scai_last_execution_status == 3 and scai_process_status == 3)): scai.processStart(target_conf, scai_process_name, COD_INTEGRATION, COD_COUNTRY) # SCAI try: cur.execute( "UNLOAD ('SELECT * from %(schema)s.%(resource)s "\ " WHERE meta_event_time >= \\\'%(last_update_date)s\\\' "\ " AND base_account_country = \\\'%(BASE_ACCOUNT_COUNTRY)s\\\'') "\ "TO 's3://%(aux_path)s/%(schema)s_%(resource)s/data_' "\ "CREDENTIALS '%(credentials)s' "\ "ESCAPE "\ "manifest;" % { 'schema':schema, 'resource':resource, 'last_update_date':last_updates_dict[resource], 'credentials':credentials, 'aux_path':aux_path, 'BASE_ACCOUNT_COUNTRY':BASE_ACCOUNT_COUNTRY } ) except Exception as e: conn.rollback() scai.processEnd(target_conf, scai_process_name, COD_INTEGRATION, COD_COUNTRY, tg_table, 'meta_event_time',3) # SCAI scai.integrationEnd(target_conf, COD_INTEGRATION, COD_COUNTRY, 3) # SCAI print (e) print (e.pgerror) sys.exit("The process aborted with error.") else: conn.commit() #Enable execution of following processes scai_last_execution_status = 1 #Close connection cur.close() conn.close()
def copyFromS3ToDatabase(target_conf, resources, sc_schema, tg_schema, aux_path, scai_last_execution_status=1): #LOAD to target redshift print('Connecting to Yamato...') conn_target = getDatabaseConnection(target_conf) cur_target = conn_target.cursor() credentials = getS3Keys(target_conf) print('Loading to Yamato...') for resource in resources: tg_table = 'stg_' + COUNTRY + '_' + resource[4:] # Target table name has the country in the middle of the source table name (for example, stg_d_base_contacts -> stg_pt_d_base_contacts) print('Loading %(tg_schema)s.%(tg_table)s...' % {'tg_schema':tg_schema, 'tg_table':tg_table }) scai_process_name = scai.getProcessShortDescription(target_conf, tg_table) # SCAI if(scai_last_execution_status==3): scai_process_status = scai.processCheck(target_conf, scai_process_name, COD_INTEGRATION, COD_COUNTRY,scai_last_execution_status) # SCAI # Is normal execution or re-execution starting from the step that was in error if (scai_last_execution_status == 1 or (scai_last_execution_status == 3 and scai_process_status == 3)): try: cur_target.execute( "TRUNCATE TABLE %(tg_schema)s.%(tg_table)s; "\ "COPY %(tg_schema)s.%(tg_table)s "\ "FROM 's3://%(aux_path)s/%(sc_schema)s_%(resource)s/data_manifest' "\ "CREDENTIALS '%(credentials)s' "\ "REGION 'us-west-2' "\ "ESCAPE "\ "manifest; "\ "ANALYZE %(tg_schema)s.%(tg_table)s;" % { 'tg_schema':tg_schema, 'tg_table':tg_table, 'resource':resource, 'credentials':credentials, 'aux_path':aux_path, 'sc_schema':sc_schema } ) except Exception as e: conn_target.rollback() scai.processEnd(target_conf, scai_process_name, COD_INTEGRATION, COD_COUNTRY, tg_table, 'meta_event_time',3) # SCAI scai.integrationEnd(target_conf, COD_INTEGRATION, COD_COUNTRY, 3) # SCAI print (e) print (e.pgerror) sys.exit("The process aborted with error.") else: conn_target.commit() scai.processEnd(target_conf, scai_process_name, COD_INTEGRATION, COD_COUNTRY, tg_table, 'meta_event_time',1) # SCAI #Enable execution of following processes scai_last_execution_status = 1 cur_target.close() conn_target.close()
def copyBaseTables(db_conf_file, sc_schema, tg_schema, resources, last_updates_dict, verticals_names='', scai_last_execution_status=1): print('Connecting to Yamato...') conn_target = getDatabaseConnection(db_conf_file) cur_target = conn_target.cursor() for resource in resources: tg_table = 'stg_' + COUNTRY + '_' + resource[4:] # Target table name has the country in the middle of the source table name (for example, stg_d_base_contacts -> stg_pt_d_base_contacts) scai_process_name = scai.getProcessShortDescription(db_conf_file, tg_table) # SCAI if(scai_last_execution_status==3): scai_process_status = scai.processCheck(db_conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY,scai_last_execution_status) # SCAI # Is normal execution or re-execution starting from the step that was in error if (scai_last_execution_status == 1 or (scai_last_execution_status == 3 and scai_process_status == 3)): #scai.processStart(db_conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY) # SCAI print('Loading %(tg_schema)s.%(tg_table)s from %(last_update)s...' % {'tg_schema':tg_schema, 'tg_table':tg_table, 'last_update':last_updates_dict[resource]}) try: cur_target.execute( "TRUNCATE TABLE %(tg_schema)s.%(tg_table)s; "\ "INSERT INTO %(tg_schema)s.%(tg_table)s "\ "SELECT * FROM %(sc_schema)s.%(resource)s "\ "WHERE operation_timestamp >= '%(last_update_date)s'; "\ "ANALYZE %(tg_schema)s.%(tg_table)s;" % { 'tg_table':tg_table, 'tg_schema':tg_schema, 'sc_schema':sc_schema, 'resource':resource, 'last_update_date':last_updates_dict[resource] } ) except Exception as e: conn_target.rollback() #scai.processEnd(db_conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY, tg_table, 'operation_timestamp',3) # SCAI #scai.integrationEnd(db_conf_file, COD_INTEGRATION, COD_COUNTRY, 3) # SCAI print (e) print (e.pgerror) sys.exit("The process aborted with error.") else: conn_target.commit() #scai.processEnd(db_conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY, tg_table, 'operation_timestamp',1) # SCAI #Enable execution of following processes scai_last_execution_status = 1 cur_target.close() conn_target.close() # If error was solved here, return new status to use in subsequent processes return scai_last_execution_status
def copyHydraTable(db_conf_file, sc_schema, tg_schema, resource, last_update_date, horizontal_name, scai_last_execution_status=1): print('Connecting to Yamato...') conn = getDatabaseConnection(db_conf_file) cur = conn.cursor() tg_table = 'stg_%(COUNTRY)s_%(sc_schema)s_%(resource)s' % { 'resource': resource, 'sc_schema': sc_schema, 'COUNTRY': COUNTRY } scai_process_name = scai.getProcessShortDescription( db_conf_file, tg_table) # SCAI if (scai_last_execution_status == 3): scai_process_status = scai.processCheck( db_conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY, scai_last_execution_status) # SCAI # Is normal execution or re-execution starting from the step that was in error if (scai_last_execution_status == 1 or (scai_last_execution_status == 3 and scai_process_status == 3)): scai.processStart(db_conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY) # SCAI print( 'Loading %(tg_schema)s.%(tg_table)s from %(last_update)s...' % { 'tg_schema': tg_schema, 'tg_table': tg_table, 'last_update': last_update_date }) try: cur.execute( "TRUNCATE TABLE %(tg_schema)s.%(tg_table)s; "\ "INSERT INTO %(tg_schema)s.%(tg_table)s "\ "SELECT "\ " server_date_day, "\ " ad_id, "\ " action_type, "\ " %(horizontal_name)s source, "\ " count(*) occurrences, "\ " count(distinct session_long) distinct_occurrences "\ "FROM hydra.web "\ "WHERE upper(country_code) = '%(HYDRA_COUNTRY_CODE)s' "\ "AND ad_id is not null "\ "AND server_date_day >= '%(last_update_date)s' "\ "GROUP BY server_date_day, ad_id, action_type; "\ "ANALYZE %(tg_schema)s.%(tg_table)s;" % { 'tg_table':tg_table, 'tg_schema':tg_schema, 'horizontal_name':horizontal_name, 'HYDRA_COUNTRY_CODE':HYDRA_COUNTRY_CODE, 'last_update_date':last_update_date } ) except Exception as e: conn.rollback() scai.processEnd(db_conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY, tg_table, 'server_date_day', 3) # SCAI scai.integrationEnd(db_conf_file, COD_INTEGRATION, COD_COUNTRY, 3) # SCAI print(e) print(e.pgerror) sys.exit("The process aborted with error.") else: conn.commit() scai.processEnd(db_conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY, tg_table, 'server_date_day', 1) #Enable execution of following processes scai_last_execution_status = 1 # SCAI cur.close() cur.close() # If error was solved here, return new status to use in subsequent processes return scai_last_execution_status
def copyHydraVerticalsTable(db_conf_file, sc_schema, tg_schema, resource, last_update_date, hydra_verticals_names, anlt_verticals_names, scai_last_execution_status=1): print('Connecting to Yamato...') conn = getDatabaseConnection(db_conf_file) cur = conn.cursor() tg_table = 'stg_%(COUNTRY)s_%(sc_schema)s_%(resource)s' % { 'resource': resource, 'sc_schema': sc_schema, 'COUNTRY': COUNTRY } scai_process_name = scai.getProcessShortDescription( db_conf_file, tg_table) # SCAI if (scai_last_execution_status == 3): scai_process_status = scai.processCheck( db_conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY, scai_last_execution_status) # SCAI # Is normal execution or re-execution starting from the step that was in error if (scai_last_execution_status == 1 or (scai_last_execution_status == 3 and scai_process_status == 3)): scai.processStart(db_conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY) # SCAI print( 'Loading %(tg_schema)s.%(tg_table)s from %(last_update)s...' % { 'tg_schema': tg_schema, 'tg_table': tg_table, 'last_update': last_update_date }) # Dynamically build CASE statement according to number of verticals case_statement = "CASE" for i in range(len(anlt_verticals_names)): case_statement += " WHEN lower(host) LIKE '%%" + hydra_verticals_names[ i] + "%%' THEN " + anlt_verticals_names[i] case_statement += " ELSE 'other' END" try: cur.execute( "TRUNCATE TABLE %(tg_schema)s.%(tg_table)s; "\ "INSERT INTO %(tg_schema)s.%(tg_table)s "\ "SELECT "\ " server_date_day, "\ " ad_id, "\ " trackname, "\ " %(case_statement)s source, "\ " count(*) occurrences, "\ " count(distinct session_long) distinct_occurrences "\ "FROM hydra_verticals.web "\ "WHERE upper(country_code) = '%(HYDRA_COUNTRY_CODE)s' "\ "AND ad_id is not null "\ "AND server_date_day >= '%(last_update_date)s' "\ "GROUP BY server_date_day, ad_id, trackname, "\ " %(case_statement)s; "\ "ANALYZE %(tg_schema)s.%(tg_table)s;" % { 'tg_table':tg_table, 'tg_schema':tg_schema, 'HYDRA_COUNTRY_CODE':HYDRA_COUNTRY_CODE, 'last_update_date':last_update_date, 'case_statement':case_statement } ) except Exception as e: conn.rollback() scai.processEnd(db_conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY, tg_table, 'server_date_day', 3) # SCAI scai.integrationEnd(db_conf_file, COD_INTEGRATION, COD_COUNTRY, 3) # SCAI print(e) print(e.pgerror) sys.exit("The process aborted with error.") else: conn.commit() scai.processEnd(db_conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY, tg_table, 'server_date_day', 1) # SCAI #Enable execution of following processes scai_last_execution_status = 1 cur.close() cur.close() # If error was solved here, return new status to use in subsequent processes return scai_last_execution_status