def process_file(filename): ddbb = Redshift() ddbb.open_connection() ddbb.truncate_table('bi_development_stg.adwords_stg') ddbb.copy_file_into_redshift(s3_file_name=filename, table_name='bi_development_stg.adwords_stg', ignore_header=3, delimiter=',') ddbb.close_connection()
def truntcate_stg_table(): ddbb = Redshift() ddbb.open_connection() ddbb.truncate_table('bi_development_stg.google_analytics_stg') ddbb.close_connection()
def process_stg_table(): ddbb = Redshift() ddbb.open_connection() ddbb.update_table_from_table_current_month( origin_table_name='bi_development_stg.v_facebook_stg_day', destiny_table_name='bi_development.facebook_cost', date_field='campaign_date') ddbb.close_connection()
def process_stg_table(): ddbb = Redshift() ddbb.open_connection() ddbb.update_table_from_table_current_month( origin_table_name='bi_development_stg.v_google_analytics_stg', destiny_table_name='bi_development.google_analytics', date_field='event_date') ddbb.close_connection()
def process_file(filename): ddbb = Redshift() ddbb.open_connection() ddbb.copy_file_into_redshift( s3_file_name=filename, table_name='bi_development_stg.google_analytics_stg', ignore_header=7, delimiter=',') ddbb.close_connection()
def process_file(filename): ddbb = Redshift() ddbb.open_connection() if 'session_start' in filename: ddbb.truncate_table('bi_development_stg.firebase_sessions_stg') ddbb.copy_file_into_redshift( s3_file_name=filename, table_name='bi_development_stg.firebase_sessions_stg', ignore_header=1, delimiter=',') elif 'ecomerce_purchase' in filename: ddbb.truncate_table('bi_development_stg.firebase_purchase_stg') ddbb.copy_file_into_redshift( s3_file_name=filename, table_name='bi_development_stg.firebase_purchase_stg', ignore_header=1, delimiter=',') else: ddbb.truncate_table('bi_development_stg.firebase_stg') ddbb.copy_file_into_redshift( s3_file_name=filename, table_name='bi_development_stg.firebase_stg', ignore_header=1, delimiter=',') ddbb.close_connection()
def process_stg_table(): ddbb = Redshift() ddbb.open_connection() ddbb.update_table_from_table_current_month( origin_table_name='bi_development_stg.v_firebase_stg', destiny_table_name='bi_development.firebase', date_field='event_date') ddbb.update_table_from_table_current_month( origin_table_name='bi_development_stg.v_firebase_sessions_stg', destiny_table_name='bi_development.firebase_sessions', date_field='event_date') ddbb.update_table_from_table_current_month( origin_table_name='bi_development_stg.v_firebase_purchase_stg', destiny_table_name='bi_development.firebase_purchase', date_field='event_date') ddbb.close_connection()
def fountain_load_data(): folder = 'fountain' filename = "descarga_fountain.csv" table_stg = 'bi_development_stg.fountain_stg' destiny_table = 'bi_development.fountain' origin_table = 'bi_development_stg.v_fountain_stg' print_message('FOUNTAIN', 'Starting to load the data') json = Json() dataset = json.getData() s3_bucket = S3Bucket() s3_bucket.save_csv(dataset, filename, folder) ddbb = Redshift() ddbb.open_connection() ddbb.truncate_table(table_stg) ddbb.copy_file_into_redshift(s3_file_name='{folder}/{filename}'.format( folder=folder, filename=filename), table_name=table_stg, ignore_header=1, delimiter=cred.S3_DELIMITER) ddbb.copy_table_from_table(origin_table, destiny_table) ddbb.close_connection() s3_bucket.move_to_backup('{folder}/{filename}'.format(folder=folder, filename=filename)) print_message('FOUNTAIN', 'Data added to database')
def helpscout_load_data(): folder = 'helpscout' processed_files = 'processed_files' # Mailbox mailbox_filename = 'helpscout_mailbox.csv' mailbox_table_stg = 'bi_development_stg.helpscout_mailbox_stg' mailbox_destiny_table = 'bi_development.helpscout_mailbox' mailbox_origin_table = 'bi_development_stg.v_helpscout_mailbox_stg' # Mailbox conversations_filename = 'helpscout_conversations.csv' conversations_table_stg = 'bi_development_stg.helpscout_conversations_stg' conversations_destiny_table = 'bi_development.helpscout_conversations' conversations_origin_table = 'bi_development_stg.v_helpscout_conversations_stg' s3_bucket = S3Bucket() ddbb = Redshift() hs = HelpScout_api() # Get the mailboxes mailboxes = hs.getMailboxes() s3_bucket.save_csv(mailboxes, mailbox_filename, folder) ddbb.open_connection() ddbb.truncate_table(mailbox_table_stg) ddbb.copy_file_into_redshift(s3_file_name='{folder}/{filename}'.format( folder=folder, filename=mailbox_filename), table_name=mailbox_table_stg, ignore_header=1, delimiter=cred.S3_DELIMITER) ddbb.copy_table_from_table(mailbox_origin_table, mailbox_destiny_table) ddbb.close_connection() s3_bucket.move_to_backup('{folder}/{filename}'.format( folder=folder, filename=mailbox_filename)) print_message('HELPSCOUT', 'Mailboxes data added to database') # Get the mailboxes days = 4 conversations = hs.getConversationsLast(hours=24 * days) if len(conversations) > 0: s3_bucket.save_csv(conversations, conversations_filename, folder) ddbb.open_connection() ddbb.truncate_table(conversations_table_stg) ddbb.copy_file_into_redshift(s3_file_name='{folder}/{filename}'.format( folder=folder, filename=conversations_filename), table_name=conversations_table_stg, ignore_header=1, delimiter=cred.S3_DELIMITER) s3_bucket.move_to_backup('{folder}/{filename}'.format( folder=folder, filename=conversations_filename)) ddbb.execute_query(sql.HELPSCOUT_DELETE) ddbb.execute_query(sql.HELPSCOUT_CLOSE.format(days=days)) ddbb.execute_query(sql.HELPSCOUT_INSERT) ddbb.execute_query(sql.HELPSCOUT_ACTIVE) ddbb.close_connection() print_message('HELPSCOUT', 'Conversations data added to database')
def getDelighted(api_key): folder = 'delighted' # Surveys surveys_filename = 'delighted_surveys.csv' surveys_table_stg = 'bi_development_stg.delighted_surveys_stg' # People people_filename = 'delighted_people.csv' people_table_stg = 'bi_development_stg.delighted_people_stg' s3_bucket = S3Bucket() delighted = Delighted(api_key) surveyDataset = delighted.getSurveys() s3_bucket.save_csv(surveyDataset, surveys_filename, folder) peopleDataset = delighted.getPeople() s3_bucket.save_csv(peopleDataset, people_filename, folder) ddbb = Redshift() ddbb.open_connection() ddbb.truncate_table(surveys_table_stg) ddbb.copy_file_into_redshift(s3_file_name='{folder}/{filename}'.format(folder=folder, filename=surveys_filename), table_name=surveys_table_stg, ignore_header=1, delimiter=cred.S3_DELIMITER) ddbb.execute_query(sql.DELIGHTED_SURVEYS_CHANGES) ddbb.execute_query(sql.DELIGHTED_SURVEYS_DELETE) ddbb.execute_query(sql.DELIGHTED_SURVEYS) s3_bucket.move_to_backup('{folder}/{filename}'.format(folder=folder, filename=surveys_filename)) print_message('DELIGHTED', 'Surveys data added to database') ddbb.truncate_table(people_table_stg) ddbb.copy_file_into_redshift(s3_file_name='{folder}/{filename}'.format(folder=folder, filename=people_filename), table_name=people_table_stg, ignore_header=1, delimiter=cred.S3_DELIMITER) ddbb.execute_query(sql.DELIGHTED_PEOPLE) print_message('DELIGHTED', 'People data added to database') s3_bucket.move_to_backup('{folder}/{filename}'.format(folder=folder, filename=people_filename)) ddbb.close_connection()
def generate_last_period(): json = JsonAircall() s3_bucket = S3Bucket() folder = 'aircall' filename = 'aircall.csv' ddbb = Redshift() aircall_calls_stg_table = 'bi_development_stg.aircall_calls_stg' delete_loaded_records = sqls.AIRCALL_DELETE_LOADED_RECORDS load_new_records = sqls.AIRCALL_LOAD_NEW_RECORDS from_date = dates.return_seconds(1) rawCalls = json.getCalls(from_date=from_date) if len(rawCalls) > 0: rawCallsF = JsonAircall.formatDataFrame(rawCalls) s3_bucket.save_csv(df=rawCallsF, s3_file_name=filename, s3_folder=folder) ddbb.open_connection() ddbb.truncate_table(aircall_calls_stg_table) ddbb.copy_file_into_redshift(s3_file_name='{folder}/{file}'.format( folder=folder, file=filename), table_name=aircall_calls_stg_table, ignore_header=1) s3_bucket.move_to_backup('{folder}/{file}'.format(folder=folder, file=filename)) ddbb.execute_query(delete_loaded_records) ddbb.execute_query(load_new_records) ddbb.close_connection()
def hubspot_send_date(): parameters = sys.argv directory = 'assortment' file_name_users = 'users_data.csv' file_name_carts = 'carts_data.csv' file_name_abandoned = 'abandoned_carts_data.csv' hubspot_delimiter = ',' redshift = Redshift() redshift.open_connection() sftp = Sftp( host=cred.FOXTER_FTP_IP, username=cred.FOXTER_FTP_USER, password=cred.FOXTER_FTP_PASSWORD ) date_filter = 'incremental' if len(parameters) > 1: if parameters[1] == 'all': date_filter = 'all' elif parameters[1] == 'day': date_filter = '1 day' elif parameters[1] == 'month': date_filter = '30 day' # else: # if datetime.today().hour == 7: # date_filter = '1 day' # else: # if datetime.today().hour == 7: # date_filter = '1 day' if date_filter == 'incremental': users_sql = sqls.HUBSPOT_USERS_DATA + sqls.HUBSPOT_USERS_DATA_INCREMENTAL carts_sql = sqls.HUBSPOT_CARTS_DATA + sqls.HUBSPOT_CARTS_DATA_INCREMENTAL carts_abandoned_sql = sqls.HUBSPOT_ABANDONED_DATA + sqls.HUBSPOT_ABANDONED_DATA_INCREMENTAL elif date_filter == 'all': users_sql = sqls.HUBSPOT_USERS_DATA carts_sql = sqls.HUBSPOT_CARTS_DATA carts_abandoned_sql = sqls.HUBSPOT_ABANDONED_DATA elif date_filter == '1 day': users_sql = sqls.HUBSPOT_USERS_DATA + sqls.HUBSPOT_USERS_DATA_INCREMENTAL_DAY carts_sql = sqls.HUBSPOT_CARTS_DATA + sqls.HUBSPOT_CARTS_DATA_INCREMENTAL_DAY carts_abandoned_sql = sqls.HUBSPOT_ABANDONED_DATA elif date_filter == '30 day': users_sql = sqls.HUBSPOT_USERS_DATA + sqls.HUBSPOT_USERS_DATA_30_DAY carts_sql = sqls.HUBSPOT_CARTS_DATA + sqls.HUBSPOT_CARTS_DATA_30_DAY carts_abandoned_sql = sqls.HUBSPOT_ABANDONED_DATA # users_sql = sqls.HUBSPOT_USERS_DATA print_message('HUBSPOT', 'The export of the data starts ({date_filter})'.format(date_filter=date_filter)) carts_data = redshift.fetch_data(carts_sql) sftp.send_file_df(df=carts_data, file_name=file_name_carts, directory=directory, delimiter=hubspot_delimiter) print_message('HUBSPOT', 'Carts data exported') carts_abandoned_data = redshift.fetch_data(carts_abandoned_sql) sftp.send_file_df(df=carts_abandoned_data, file_name=file_name_abandoned, directory=directory, delimiter=hubspot_delimiter) print_message('HUBSPOT', 'Abandoned carts data exported') users_data = redshift.fetch_data(users_sql) sftp.send_file_df(df=users_data, file_name=file_name_users, directory=directory, delimiter=hubspot_delimiter) print_message('HUBSPOT', 'Users data exported') # slack_message('HUBSPOT', 'Files loaded into the sftp') sftp.close() redshift.close_connection()
def load_hubspot_data(): filename = "hubspot_campaign.csv" filename_all_events = "hubspot_events_all.csv" folder = 'hubspot' campaign_stg_table = 'bi_development_stg.hubspot_campaigns_stg' events_stg_table = 'bi_development_stg.hubspot_events_stg' processed_files = 'processed_files' sql_load_campaigns = sqls.HUBSPOT_LOAD_CAMPAIGNS sql_load_events = sqls.HUBSPOT_LOAD_EVENTS s3 = S3Bucket() ddbb = Redshift() ddbb.open_connection() json = JsonHubspot() emailMarketing = json.getMarketingEmails() dfObj = json.campaignsToDict(emailMarketing) s3.save_csv(df=dfObj.transpose(), s3_file_name=filename, s3_folder=folder) ddbb.truncate_table(campaign_stg_table) ddbb.copy_file_into_redshift(s3_file_name='{folder}/{filename}'.format(folder=folder, filename=filename), table_name=campaign_stg_table, ignore_header=1, delimiter=cred.S3_DELIMITER) ddbb.execute_query(sql_load_campaigns) s3.move_to_backup('{folder}/{filename}'.format(folder=folder, filename=filename)) print_message('HUBSPOT', 'Campaigns loaded into database') all_events = pd.DataFrame() campaigns = ddbb.fetch_data('select email_campaign_id from bi_development.hubspot_campaigns order by created desc') last_event = ddbb.fetch_data("select pgdate_part('epoch', max(dateadd('hour', -4, created)))*1000 as created_num from bi_development.hubspot_events") ddbb.close_connection() last_event_num = 0 for row in last_event['created_num']: last_event_num = row for row in campaigns['email_campaign_id']: events = json.getCampaignEvents(str(row), str(last_event_num).replace('.0', '')) # events = json.getCampaignEvents(str(row), dates.return_miliseconds(1)) # print(events) all_events = all_events.append(events) if len(all_events) > 0: tmp_file_name = filename_all_events S3Bucket().save_csv(all_events, tmp_file_name, 'hubspot') # slack_message('HUBSPOT', 'Events extracted from hubspot') file_list = s3.list_folders(folder_url=folder) files = 0 ddbb.open_connection() ddbb.truncate_table(events_stg_table) for file in file_list: if not file.startswith(processed_files) \ and str(file) != 'hubspot/hubspot_campaign.csv': ddbb.copy_file_into_redshift(s3_file_name=file, table_name=events_stg_table, ignore_header=1, delimiter=cred.S3_DELIMITER) s3.move_to_backup(file) files = files + 1 ddbb.execute_query(sql_load_events) print_message('HUBSPOT', 'Events loaded into database') ddbb.close_connection()