def copyFromDatabaseToS3(source_conf, target_conf, resources, schema, last_updates_dict, aux_path, scai_last_execution_status=1): print('Connecting to Chandra...') conn = getDatabaseConnection(source_conf) cur = conn.cursor() credentials = getS3Keys(source_conf) sc_conf = json.load(open(source_conf)) #UNLOAD resources data print('Unloading from Chandra...') for resource in resources: print('\t' + resource + ": " + last_updates_dict[resource]) tg_table = 'stg_' + COUNTRY + '_' + resource[4:] # Target table name has the country in the middle of the source table name (for example, stg_d_base_contacts -> stg_pt_d_base_contacts) scai_process_name = scai.getProcessShortDescription(target_conf, tg_table) # SCAI if(scai_last_execution_status==3): scai_process_status = scai.processCheck(target_conf, scai_process_name, COD_INTEGRATION, COD_COUNTRY,scai_last_execution_status) # SCAI # Is normal execution or re-execution starting from the step that was in error if (scai_last_execution_status == 1 or (scai_last_execution_status == 3 and scai_process_status == 3)): scai.processStart(target_conf, scai_process_name, COD_INTEGRATION, COD_COUNTRY) # SCAI try: cur.execute( "UNLOAD ('SELECT * from %(schema)s.%(resource)s "\ " WHERE meta_event_time >= \\\'%(last_update_date)s\\\' "\ " AND base_account_country = \\\'%(BASE_ACCOUNT_COUNTRY)s\\\'') "\ "TO 's3://%(aux_path)s/%(schema)s_%(resource)s/data_' "\ "CREDENTIALS '%(credentials)s' "\ "ESCAPE "\ "manifest;" % { 'schema':schema, 'resource':resource, 'last_update_date':last_updates_dict[resource], 'credentials':credentials, 'aux_path':aux_path, 'BASE_ACCOUNT_COUNTRY':BASE_ACCOUNT_COUNTRY } ) except Exception as e: conn.rollback() scai.processEnd(target_conf, scai_process_name, COD_INTEGRATION, COD_COUNTRY, tg_table, 'meta_event_time',3) # SCAI scai.integrationEnd(target_conf, COD_INTEGRATION, COD_COUNTRY, 3) # SCAI print (e) print (e.pgerror) sys.exit("The process aborted with error.") else: conn.commit() #Enable execution of following processes scai_last_execution_status = 1 #Close connection cur.close() conn.close()
def copyFromS3ToDatabase(target_conf, resources, sc_schema, tg_schema, aux_path, scai_last_execution_status=1): #LOAD to target redshift print('Connecting to Yamato...') conn_target = getDatabaseConnection(target_conf) cur_target = conn_target.cursor() credentials = getS3Keys(target_conf) print('Loading to Yamato...') for resource in resources: tg_table = 'stg_' + COUNTRY + '_' + resource[4:] # Target table name has the country in the middle of the source table name (for example, stg_d_base_contacts -> stg_pt_d_base_contacts) print('Loading %(tg_schema)s.%(tg_table)s...' % {'tg_schema':tg_schema, 'tg_table':tg_table }) scai_process_name = scai.getProcessShortDescription(target_conf, tg_table) # SCAI if(scai_last_execution_status==3): scai_process_status = scai.processCheck(target_conf, scai_process_name, COD_INTEGRATION, COD_COUNTRY,scai_last_execution_status) # SCAI # Is normal execution or re-execution starting from the step that was in error if (scai_last_execution_status == 1 or (scai_last_execution_status == 3 and scai_process_status == 3)): try: cur_target.execute( "TRUNCATE TABLE %(tg_schema)s.%(tg_table)s; "\ "COPY %(tg_schema)s.%(tg_table)s "\ "FROM 's3://%(aux_path)s/%(sc_schema)s_%(resource)s/data_manifest' "\ "CREDENTIALS '%(credentials)s' "\ "REGION 'us-west-2' "\ "ESCAPE "\ "manifest; "\ "ANALYZE %(tg_schema)s.%(tg_table)s;" % { 'tg_schema':tg_schema, 'tg_table':tg_table, 'resource':resource, 'credentials':credentials, 'aux_path':aux_path, 'sc_schema':sc_schema } ) except Exception as e: conn_target.rollback() scai.processEnd(target_conf, scai_process_name, COD_INTEGRATION, COD_COUNTRY, tg_table, 'meta_event_time',3) # SCAI scai.integrationEnd(target_conf, COD_INTEGRATION, COD_COUNTRY, 3) # SCAI print (e) print (e.pgerror) sys.exit("The process aborted with error.") else: conn_target.commit() scai.processEnd(target_conf, scai_process_name, COD_INTEGRATION, COD_COUNTRY, tg_table, 'meta_event_time',1) # SCAI #Enable execution of following processes scai_last_execution_status = 1 cur_target.close() conn_target.close()
def copyBaseTables(db_conf_file, sc_schema, tg_schema, resources, last_updates_dict, verticals_names='', scai_last_execution_status=1): print('Connecting to Yamato...') conn_target = getDatabaseConnection(db_conf_file) cur_target = conn_target.cursor() for resource in resources: tg_table = 'stg_' + COUNTRY + '_' + resource[4:] # Target table name has the country in the middle of the source table name (for example, stg_d_base_contacts -> stg_pt_d_base_contacts) scai_process_name = scai.getProcessShortDescription(db_conf_file, tg_table) # SCAI if(scai_last_execution_status==3): scai_process_status = scai.processCheck(db_conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY,scai_last_execution_status) # SCAI # Is normal execution or re-execution starting from the step that was in error if (scai_last_execution_status == 1 or (scai_last_execution_status == 3 and scai_process_status == 3)): #scai.processStart(db_conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY) # SCAI print('Loading %(tg_schema)s.%(tg_table)s from %(last_update)s...' % {'tg_schema':tg_schema, 'tg_table':tg_table, 'last_update':last_updates_dict[resource]}) try: cur_target.execute( "TRUNCATE TABLE %(tg_schema)s.%(tg_table)s; "\ "INSERT INTO %(tg_schema)s.%(tg_table)s "\ "SELECT * FROM %(sc_schema)s.%(resource)s "\ "WHERE operation_timestamp >= '%(last_update_date)s'; "\ "ANALYZE %(tg_schema)s.%(tg_table)s;" % { 'tg_table':tg_table, 'tg_schema':tg_schema, 'sc_schema':sc_schema, 'resource':resource, 'last_update_date':last_updates_dict[resource] } ) except Exception as e: conn_target.rollback() #scai.processEnd(db_conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY, tg_table, 'operation_timestamp',3) # SCAI #scai.integrationEnd(db_conf_file, COD_INTEGRATION, COD_COUNTRY, 3) # SCAI print (e) print (e.pgerror) sys.exit("The process aborted with error.") else: conn_target.commit() #scai.processEnd(db_conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY, tg_table, 'operation_timestamp',1) # SCAI #Enable execution of following processes scai_last_execution_status = 1 cur_target.close() conn_target.close() # If error was solved here, return new status to use in subsequent processes return scai_last_execution_status
scai_last_execution_status = scai.getLastExecutionStatus( DB_CONF_FILE, COD_INTEGRATION, COD_COUNTRY) # SCAI if (country_execution_status != 1 and scai_last_execution_status == 1): print( 'The integration executed successfuly on last execution. The problem is further ahead.' ) sys.exit(0) if (scai_last_execution_status == 2): sys.exit("The integration is already running...") if (scai_last_execution_status == 3): scai_process_status = scai.processCheck(DB_CONF_FILE, DSC_PROCESS, COD_INTEGRATION, COD_COUNTRY, scai_last_execution_status) # SCAI # Is normal execution or re-execution starting from the step that was in error if (scai_last_execution_status == 1 or (scai_last_execution_status == 3 and scai_process_status == 3)): #check in which block to begin cur.execute("select "\ " nvl(block_nbr,1) as block_nbr "\ " from crm_integration_anlt.t_rel_scai_country_integration country_integration "\ " where "\ " country_integration.cod_integration = %(COD_INTEGRATION)d "\ " and country_integration.cod_country = %(COD_COUNTRY)d "\ " and ind_active = 1 "\ % { 'COD_COUNTRY':COD_COUNTRY ,
def getDatabaseConnection(conf_file): data = json.load(open(conf_file)) return psycopg2.connect(dbname=data['dbname'], host=data['host'], port=data['port'], user=data['user'], password=data['pass']) conf_file = sys.argv[1] # File with source database COD_COUNTRY = int(sys.argv[2]) # Country code base_api_token = json.load(open(conf_file))['base_api_token_otodompl'] client = basecrm.Client(access_token=base_api_token) conn = getDatabaseConnection(conf_file) cur = conn.cursor() scai_process_status = scai.processCheck(conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY, 1) # SCAI #First time ever execution if (not scai_process_status): scai_process_status = 1 if (scai_process_status != 1): sys.exit("The integration is already running or there was an error with the last execution that has to be fixed manually.") scai.integrationStart(conf_file, COD_INTEGRATION, COD_COUNTRY) # SCAI scai.processStart(conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY) # SCAI print('Start Truncate aut_otodompl_base_to_bd_contact: ' + time.strftime("%H:%M:%S")) cur.execute("truncate table crm_integration_anlt.aut_otodompl_base_to_bd_contact; ") print('End Truncate aut_otodompl_base_to_bd_contact: ' + time.strftime("%H:%M:%S"))
def main(conf_file): print('Starting Process... ' + time.strftime("%H:%M:%S")) global BASE_API_TOKEN BASE_API_TOKEN = json.load(open(conf_file))['base_api_token_olxpt'] client = getBaseConnection() # Create Redshift Connection conn = getDatabaseConnection(conf_file) cur = conn.cursor() scai_process_status = scai.processCheck(conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY, 1) # SCAI #First time ever execution if (not scai_process_status): scai_process_status = 1 #if (scai_process_status != 1): # sys.exit("The integration is already running or there was an error with the last execution that has to be fixed manually.") scai.integrationStart(conf_file, COD_INTEGRATION, COD_COUNTRY) # SCAI scai.processStart(conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY) # SCAI print('Starting Data Query... ' + time.strftime("%H:%M:%S")) cur.execute( "select "\ "fac.name, "\ "cast(lkp_contact.opr_contact as integer), "\ "lkp_base_user.opr_base_user, "\ "4616871 as opr_base_source, "\ "case "\ "when "\ "v_lkp_pi.cod_index_type = 1 /* VAS */ "\ "and to_date(fac.paidads_valid_to,'yyyy-mm-dd') /* active package */ < to_date(sysdate,'yyyy-mm-dd') "\ "and last_call < to_date(sysdate,'yyyy-mm-dd') -15 "\ "then 7344246 /* organico */ "\ "else 2950782 /* sales pipeline */ "\ "end as opr_stage, "\ "(fac.price * (-1))/1.23 as val_price, "\ "lkp_category.name_pt sub_category, "\ "lkp_category_parent.name_pt main_category, "\ "to_char(to_date(fac.date,'yyyy/mm/dd'),'dd/mm/yyyy') as dat_payment, "\ "fac.id_transaction, "\ "'Automation' as deal_created_by "\ "from "\ "db_atlas.olxpt_paidads_user_payments fac, "\ "db_atlas.olxpt_users lkp_atlas_user, "\ "db_atlas.olxpt_ads lkp_ad, "\ "db_atlas.olxpt_categories lkp_category, "\ "( "\ "select id opr_category, name_pt, parent_level1, parent_level2, parent_id from db_atlas.olxpt_categories lkp_category "\ "where "\ "parent_level2 is null "\ "and parent_level1 is null "\ ") lkp_category_parent, "\ "crm_integration_anlt.t_lkp_contact lkp_contact, "\ "crm_integration_anlt.t_lkp_base_user lkp_base_user, "\ "crm_integration_anlt.t_lkp_paidad_index lkp_pi, "\ "crm_integration_anlt.t_lkp_paidad_index_type lkp_pit, "\ "crm_integration_anlt.v_lkp_paidad_index v_lkp_pi, "\ "(select cod_contact, to_date(max(updated_at),'yyyy-mm-dd') last_call from crm_integration_anlt.t_fac_call where cod_source_system = 16 group by cod_contact) fac_call "\ "where "\ "fac.id_user = lkp_atlas_user.id "\ "and lkp_contact.cod_source_system = 16 "\ "and lower(lkp_atlas_user.email) = lower(lkp_contact.email) "\ "and fac.id_index = lkp_pi.opr_paidad_index "\ "and lkp_pi.cod_paidad_index_type = lkp_pit.cod_paidad_index_type "\ "and lkp_pit.valid_to = 20991231 "\ "and lkp_pit.cod_source_system = 8 "\ "and lkp_pit.opr_paidad_index_type in ('ad_homepage','highlight','bundle','nnl','pushup','logo','topads','topupaccount','paid_subscription','paid_limits_single','paid_for_post') "\ "and lkp_pi.cod_source_system = 8 "\ "and fac.id_ad = lkp_ad.id "\ "and lkp_ad.category_id = lkp_category.id "\ "and lkp_contact.cod_base_user_owner = lkp_base_user.cod_base_user "\ "and lkp_base_user.cod_source_system = 16 "\ "and lkp_base_user.valid_to = 20991231 "\ "and v_lkp_pi.cod_paidad_index = lkp_pi.cod_paidad_index "\ "and isnull(lkp_category.parent_level1,-2) = lkp_category_parent.opr_category "\ "and fac.price < 0 "\ "and trunc(fac.date) > (select last_processing_datetime from crm_integration_anlt.aut_deals_insert_to_base_date where source_system = 'pthorizontal') "\ "and lkp_contact.valid_to = 20991231 "\ "and lkp_pi.valid_to = 20991231 "\ "and lkp_contact.cod_contact = fac_call.cod_contact (+); ") result_list = cur.fetchall() print('Ending Data Query... ' + time.strftime("%H:%M:%S")) print('Starting Delete Dates Query... ' + time.strftime("%H:%M:%S")) cur.execute( "delete from crm_integration_anlt.aut_deals_insert_to_base_date where source_system = 'pthorizontal'; " ) print('Ending Delete Dates Query... ' + time.strftime("%H:%M:%S")) print('Starting Dates Query... ' + time.strftime("%H:%M:%S")) cur.execute( "insert into crm_integration_anlt.aut_deals_insert_to_base_date "\ "select "\ "'pthorizontal' as source_system, "\ "max(fac.date) "\ "from "\ "db_atlas.olxpt_paidads_user_payments fac, "\ "db_atlas.olxpt_users lkp_atlas_user, "\ "db_atlas.olxpt_ads lkp_ad, "\ "db_atlas.olxpt_categories lkp_category, "\ "( "\ "select id opr_category, name_pt, parent_level1, parent_level2, parent_id from db_atlas.olxpt_categories lkp_category "\ "where "\ "parent_level2 is null "\ "and parent_level1 is null "\ ") lkp_category_parent, "\ "crm_integration_anlt.t_lkp_contact lkp_contact, "\ "crm_integration_anlt.t_lkp_base_user lkp_base_user, "\ "crm_integration_anlt.t_lkp_paidad_index lkp_pi, "\ "crm_integration_anlt.t_lkp_paidad_index_type lkp_pit, "\ "crm_integration_anlt.v_lkp_paidad_index v_lkp_pi "\ "where "\ "fac.id_user = lkp_atlas_user.id "\ "and lkp_contact.cod_source_system = 16 "\ "and lower(lkp_atlas_user.email) = lower(lkp_contact.email) "\ "and fac.id_index = lkp_pi.opr_paidad_index "\ "and lkp_pi.cod_paidad_index_type = lkp_pit.cod_paidad_index_type "\ "and lkp_pit.valid_to = 20991231 "\ "and lkp_pit.cod_source_system = 8 "\ "and lkp_pit.opr_paidad_index_type in ('ad_homepage','highlight','bundle','nnl','pushup','logo','topads','topupaccount','paid_subscription','paid_limits_single','paid_for_post') "\ "and lkp_pi.cod_source_system = 8 "\ "and fac.id_ad = lkp_ad.id "\ "and lkp_ad.category_id = lkp_category.id "\ "and lkp_contact.cod_base_user_owner = lkp_base_user.cod_base_user "\ "and lkp_base_user.cod_source_system = 16 "\ "and lkp_base_user.valid_to = 20991231 "\ "and v_lkp_pi.cod_paidad_index = lkp_pi.cod_paidad_index "\ "and isnull(lkp_category.parent_level1,-2) = lkp_category_parent.opr_category "\ "and fac.price < 0 "\ "and lkp_contact.valid_to = 20991231 "\ "and lkp_pi.valid_to = 20991231; ") conn.commit() print('Ending Dates Query... ' + time.strftime("%H:%M:%S")) print('Starting Deals Creations in Base... ' + time.strftime("%H:%M:%S")) # Threading implementation number_active_threads = 0 number_deals = len(result_list) deals_per_thread = -(-number_deals // MAX_ACTIVE_THREADS ) # Ceiling of integer division thread_list = [] i = 0 j = deals_per_thread for n in range(0, MAX_ACTIVE_THREADS): t = threading.Thread(target=createDealsInBase, args=(client, result_list[i:j], conf_file)) thread_list.append(t) t.start() print('Spawned thread #' + str(n + 1)) i = i + deals_per_thread j = j + deals_per_thread if j > number_deals: j = number_deals for t in thread_list: t.join() print('Ending Deals Creations in Base... ' + time.strftime("%H:%M:%S")) scai.processEnd(conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY, '', '', 1) # SCAI scai.integrationEnd(conf_file, COD_INTEGRATION, COD_COUNTRY, 1) # SCAI cur.close() conn.close() print('Ending Process... ' + time.strftime("%H:%M:%S"))
def copyHydraTable(db_conf_file, sc_schema, tg_schema, resource, last_update_date, horizontal_name, scai_last_execution_status=1): print('Connecting to Yamato...') conn = getDatabaseConnection(db_conf_file) cur = conn.cursor() tg_table = 'stg_%(COUNTRY)s_%(sc_schema)s_%(resource)s' % { 'resource': resource, 'sc_schema': sc_schema, 'COUNTRY': COUNTRY } scai_process_name = scai.getProcessShortDescription( db_conf_file, tg_table) # SCAI if (scai_last_execution_status == 3): scai_process_status = scai.processCheck( db_conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY, scai_last_execution_status) # SCAI # Is normal execution or re-execution starting from the step that was in error if (scai_last_execution_status == 1 or (scai_last_execution_status == 3 and scai_process_status == 3)): scai.processStart(db_conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY) # SCAI print( 'Loading %(tg_schema)s.%(tg_table)s from %(last_update)s...' % { 'tg_schema': tg_schema, 'tg_table': tg_table, 'last_update': last_update_date }) try: cur.execute( "TRUNCATE TABLE %(tg_schema)s.%(tg_table)s; "\ "INSERT INTO %(tg_schema)s.%(tg_table)s "\ "SELECT "\ " server_date_day, "\ " ad_id, "\ " action_type, "\ " %(horizontal_name)s source, "\ " count(*) occurrences, "\ " count(distinct session_long) distinct_occurrences "\ "FROM hydra.web "\ "WHERE upper(country_code) = '%(HYDRA_COUNTRY_CODE)s' "\ "AND ad_id is not null "\ "AND server_date_day >= '%(last_update_date)s' "\ "GROUP BY server_date_day, ad_id, action_type; "\ "ANALYZE %(tg_schema)s.%(tg_table)s;" % { 'tg_table':tg_table, 'tg_schema':tg_schema, 'horizontal_name':horizontal_name, 'HYDRA_COUNTRY_CODE':HYDRA_COUNTRY_CODE, 'last_update_date':last_update_date } ) except Exception as e: conn.rollback() scai.processEnd(db_conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY, tg_table, 'server_date_day', 3) # SCAI scai.integrationEnd(db_conf_file, COD_INTEGRATION, COD_COUNTRY, 3) # SCAI print(e) print(e.pgerror) sys.exit("The process aborted with error.") else: conn.commit() scai.processEnd(db_conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY, tg_table, 'server_date_day', 1) #Enable execution of following processes scai_last_execution_status = 1 # SCAI cur.close() cur.close() # If error was solved here, return new status to use in subsequent processes return scai_last_execution_status
def copyHydraVerticalsTable(db_conf_file, sc_schema, tg_schema, resource, last_update_date, hydra_verticals_names, anlt_verticals_names, scai_last_execution_status=1): print('Connecting to Yamato...') conn = getDatabaseConnection(db_conf_file) cur = conn.cursor() tg_table = 'stg_%(COUNTRY)s_%(sc_schema)s_%(resource)s' % { 'resource': resource, 'sc_schema': sc_schema, 'COUNTRY': COUNTRY } scai_process_name = scai.getProcessShortDescription( db_conf_file, tg_table) # SCAI if (scai_last_execution_status == 3): scai_process_status = scai.processCheck( db_conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY, scai_last_execution_status) # SCAI # Is normal execution or re-execution starting from the step that was in error if (scai_last_execution_status == 1 or (scai_last_execution_status == 3 and scai_process_status == 3)): scai.processStart(db_conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY) # SCAI print( 'Loading %(tg_schema)s.%(tg_table)s from %(last_update)s...' % { 'tg_schema': tg_schema, 'tg_table': tg_table, 'last_update': last_update_date }) # Dynamically build CASE statement according to number of verticals case_statement = "CASE" for i in range(len(anlt_verticals_names)): case_statement += " WHEN lower(host) LIKE '%%" + hydra_verticals_names[ i] + "%%' THEN " + anlt_verticals_names[i] case_statement += " ELSE 'other' END" try: cur.execute( "TRUNCATE TABLE %(tg_schema)s.%(tg_table)s; "\ "INSERT INTO %(tg_schema)s.%(tg_table)s "\ "SELECT "\ " server_date_day, "\ " ad_id, "\ " trackname, "\ " %(case_statement)s source, "\ " count(*) occurrences, "\ " count(distinct session_long) distinct_occurrences "\ "FROM hydra_verticals.web "\ "WHERE upper(country_code) = '%(HYDRA_COUNTRY_CODE)s' "\ "AND ad_id is not null "\ "AND server_date_day >= '%(last_update_date)s' "\ "GROUP BY server_date_day, ad_id, trackname, "\ " %(case_statement)s; "\ "ANALYZE %(tg_schema)s.%(tg_table)s;" % { 'tg_table':tg_table, 'tg_schema':tg_schema, 'HYDRA_COUNTRY_CODE':HYDRA_COUNTRY_CODE, 'last_update_date':last_update_date, 'case_statement':case_statement } ) except Exception as e: conn.rollback() scai.processEnd(db_conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY, tg_table, 'server_date_day', 3) # SCAI scai.integrationEnd(db_conf_file, COD_INTEGRATION, COD_COUNTRY, 3) # SCAI print(e) print(e.pgerror) sys.exit("The process aborted with error.") else: conn.commit() scai.processEnd(db_conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY, tg_table, 'server_date_day', 1) # SCAI #Enable execution of following processes scai_last_execution_status = 1 cur.close() cur.close() # If error was solved here, return new status to use in subsequent processes return scai_last_execution_status