def generate_period(): json = JsonAircall() s3_bucket = S3Bucket() folder = 'aircall' filename = 'aircall_{date}.csv' date_start = datetime.strptime('2021-02-01', '%Y-%m-%d') # end_date = datetime.strptime('2021-02-15', '%Y-%m-%d') end_date = datetime.now() # REALIZA PROCESAMIENTO DE LAS FECHAS DE 7 EN 7 DÍAS while date_start < end_date: from_date = dates.return_seconds_from_date( date_start.strftime('%Y-%m-%d')) to_date = dates.return_seconds_from_date( (date_start + timedelta(days=7)).strftime('%Y-%m-%d')) print('date_start: {date_start} - from: {from_date} - to:{to_date}'. format(date_start=date_start, from_date=from_date, to_date=to_date)) rawCalls = json.getCalls(from_date=from_date, to_date=to_date) if len(rawCalls) > 0: rawCallsF = JsonAircall.formatDataFrame(rawCalls) s3_bucket.save_csv(df=rawCallsF, s3_file_name=filename.format( date=date_start.strftime('%Y%m%d')), s3_folder=folder) date_start = date_start + timedelta(days=7)
def fountain_load_data(): folder = 'fountain' filename = "descarga_fountain.csv" table_stg = 'bi_development_stg.fountain_stg' destiny_table = 'bi_development.fountain' origin_table = 'bi_development_stg.v_fountain_stg' print_message('FOUNTAIN', 'Starting to load the data') json = Json() dataset = json.getData() s3_bucket = S3Bucket() s3_bucket.save_csv(dataset, filename, folder) ddbb = Redshift() ddbb.open_connection() ddbb.truncate_table(table_stg) ddbb.copy_file_into_redshift(s3_file_name='{folder}/{filename}'.format( folder=folder, filename=filename), table_name=table_stg, ignore_header=1, delimiter=cred.S3_DELIMITER) ddbb.copy_table_from_table(origin_table, destiny_table) ddbb.close_connection() s3_bucket.move_to_backup('{folder}/{filename}'.format(folder=folder, filename=filename)) print_message('FOUNTAIN', 'Data added to database')
def generate_last_period(): json = JsonAircall() s3_bucket = S3Bucket() folder = 'aircall' filename = 'aircall.csv' ddbb = Redshift() aircall_calls_stg_table = 'bi_development_stg.aircall_calls_stg' delete_loaded_records = sqls.AIRCALL_DELETE_LOADED_RECORDS load_new_records = sqls.AIRCALL_LOAD_NEW_RECORDS from_date = dates.return_seconds(1) rawCalls = json.getCalls(from_date=from_date) if len(rawCalls) > 0: rawCallsF = JsonAircall.formatDataFrame(rawCalls) s3_bucket.save_csv(df=rawCallsF, s3_file_name=filename, s3_folder=folder) ddbb.open_connection() ddbb.truncate_table(aircall_calls_stg_table) ddbb.copy_file_into_redshift(s3_file_name='{folder}/{file}'.format( folder=folder, file=filename), table_name=aircall_calls_stg_table, ignore_header=1) s3_bucket.move_to_backup('{folder}/{file}'.format(folder=folder, file=filename)) ddbb.execute_query(delete_loaded_records) ddbb.execute_query(load_new_records) ddbb.close_connection()
def helpscout_load_data(): folder = 'helpscout' processed_files = 'processed_files' # Mailbox mailbox_filename = 'helpscout_mailbox.csv' mailbox_table_stg = 'bi_development_stg.helpscout_mailbox_stg' mailbox_destiny_table = 'bi_development.helpscout_mailbox' mailbox_origin_table = 'bi_development_stg.v_helpscout_mailbox_stg' # Mailbox conversations_filename = 'helpscout_conversations.csv' conversations_table_stg = 'bi_development_stg.helpscout_conversations_stg' conversations_destiny_table = 'bi_development.helpscout_conversations' conversations_origin_table = 'bi_development_stg.v_helpscout_conversations_stg' s3_bucket = S3Bucket() ddbb = Redshift() hs = HelpScout_api() # Get the mailboxes mailboxes = hs.getMailboxes() s3_bucket.save_csv(mailboxes, mailbox_filename, folder) ddbb.open_connection() ddbb.truncate_table(mailbox_table_stg) ddbb.copy_file_into_redshift(s3_file_name='{folder}/{filename}'.format( folder=folder, filename=mailbox_filename), table_name=mailbox_table_stg, ignore_header=1, delimiter=cred.S3_DELIMITER) ddbb.copy_table_from_table(mailbox_origin_table, mailbox_destiny_table) ddbb.close_connection() s3_bucket.move_to_backup('{folder}/{filename}'.format( folder=folder, filename=mailbox_filename)) print_message('HELPSCOUT', 'Mailboxes data added to database') # Get the mailboxes days = 4 conversations = hs.getConversationsLast(hours=24 * days) if len(conversations) > 0: s3_bucket.save_csv(conversations, conversations_filename, folder) ddbb.open_connection() ddbb.truncate_table(conversations_table_stg) ddbb.copy_file_into_redshift(s3_file_name='{folder}/{filename}'.format( folder=folder, filename=conversations_filename), table_name=conversations_table_stg, ignore_header=1, delimiter=cred.S3_DELIMITER) s3_bucket.move_to_backup('{folder}/{filename}'.format( folder=folder, filename=conversations_filename)) ddbb.execute_query(sql.HELPSCOUT_DELETE) ddbb.execute_query(sql.HELPSCOUT_CLOSE.format(days=days)) ddbb.execute_query(sql.HELPSCOUT_INSERT) ddbb.execute_query(sql.HELPSCOUT_ACTIVE) ddbb.close_connection() print_message('HELPSCOUT', 'Conversations data added to database')
def getDelighted(api_key): folder = 'delighted' # Surveys surveys_filename = 'delighted_surveys.csv' surveys_table_stg = 'bi_development_stg.delighted_surveys_stg' # People people_filename = 'delighted_people.csv' people_table_stg = 'bi_development_stg.delighted_people_stg' s3_bucket = S3Bucket() delighted = Delighted(api_key) surveyDataset = delighted.getSurveys() s3_bucket.save_csv(surveyDataset, surveys_filename, folder) peopleDataset = delighted.getPeople() s3_bucket.save_csv(peopleDataset, people_filename, folder) ddbb = Redshift() ddbb.open_connection() ddbb.truncate_table(surveys_table_stg) ddbb.copy_file_into_redshift(s3_file_name='{folder}/{filename}'.format(folder=folder, filename=surveys_filename), table_name=surveys_table_stg, ignore_header=1, delimiter=cred.S3_DELIMITER) ddbb.execute_query(sql.DELIGHTED_SURVEYS_CHANGES) ddbb.execute_query(sql.DELIGHTED_SURVEYS_DELETE) ddbb.execute_query(sql.DELIGHTED_SURVEYS) s3_bucket.move_to_backup('{folder}/{filename}'.format(folder=folder, filename=surveys_filename)) print_message('DELIGHTED', 'Surveys data added to database') ddbb.truncate_table(people_table_stg) ddbb.copy_file_into_redshift(s3_file_name='{folder}/{filename}'.format(folder=folder, filename=people_filename), table_name=people_table_stg, ignore_header=1, delimiter=cred.S3_DELIMITER) ddbb.execute_query(sql.DELIGHTED_PEOPLE) print_message('DELIGHTED', 'People data added to database') s3_bucket.move_to_backup('{folder}/{filename}'.format(folder=folder, filename=people_filename)) ddbb.close_connection()
def change_files(): s3 = S3Bucket() # firebase_files = 'firebase' firebase_files = 'firebase' tmp_filename = 'tmp_file.csv' file_list = s3.list_folders(firebase_files) for file in file_list: local_filename = file.split('/')[1] s3.download_file(s3_file_name=file, target_filename=tmp_filename) tmp_file = open(tmp_filename, 'r') start_file = 0 inicio = 0 inicios = 0 cabeceras = 0 first_date = '' for line in tmp_file.readlines(): if line.rstrip('\n').startswith('# Fecha de inicio'): inicios = inicios + 1 if not first_date: first_date = line.rstrip('\n').replace( '# Fecha de inicio: ', '') first_date = first_date[0:4] + '-' + first_date[ 4:6] + '-' + first_date[6:8] tmp_file.close() if inicios > 0: tmp_file = open(tmp_filename, 'r') local_file = open(local_filename, 'w') for line in tmp_file.readlines(): if line.rstrip('\n').startswith('# Fecha de inicio'): inicio = inicio + 1 if inicios == inicio: cabeceras = cabeceras + 1 if cabeceras >= 3: local_file.writelines( line.rstrip('\n') + ',' + first_date + '\n') local_file.close() tmp_file.close() s3.upload_file(s3_file_name=local_filename, s3_folder='firebase', local_file_name=local_filename) os.remove(tmp_filename) os.remove(local_filename)
def digital_load_data(): s3 = S3Bucket() processed_files = 'processed_files' file_types = ['adwords', 'facebook', 'firebase', 'GoogleAnalytics'] digital_google_analytics.truntcate_stg_table() digital_firebase.change_files() for file_type in file_types: file_list = s3.list_folders(file_type) files = 0 for file in file_list: if not file.startswith(processed_files): load_file(file) s3.move_to_backup(file) files = files + 1 if files > 0: process_file(file_type) slack_message('DIGITAL', 'Files from {file_type} has been processed'.format(file_type=file_type))
def load_hubspot_data(): filename = "hubspot_campaign.csv" filename_all_events = "hubspot_events_all.csv" folder = 'hubspot' campaign_stg_table = 'bi_development_stg.hubspot_campaigns_stg' events_stg_table = 'bi_development_stg.hubspot_events_stg' processed_files = 'processed_files' sql_load_campaigns = sqls.HUBSPOT_LOAD_CAMPAIGNS sql_load_events = sqls.HUBSPOT_LOAD_EVENTS s3 = S3Bucket() ddbb = Redshift() ddbb.open_connection() json = JsonHubspot() emailMarketing = json.getMarketingEmails() dfObj = json.campaignsToDict(emailMarketing) s3.save_csv(df=dfObj.transpose(), s3_file_name=filename, s3_folder=folder) ddbb.truncate_table(campaign_stg_table) ddbb.copy_file_into_redshift(s3_file_name='{folder}/{filename}'.format(folder=folder, filename=filename), table_name=campaign_stg_table, ignore_header=1, delimiter=cred.S3_DELIMITER) ddbb.execute_query(sql_load_campaigns) s3.move_to_backup('{folder}/{filename}'.format(folder=folder, filename=filename)) print_message('HUBSPOT', 'Campaigns loaded into database') all_events = pd.DataFrame() campaigns = ddbb.fetch_data('select email_campaign_id from bi_development.hubspot_campaigns order by created desc') last_event = ddbb.fetch_data("select pgdate_part('epoch', max(dateadd('hour', -4, created)))*1000 as created_num from bi_development.hubspot_events") ddbb.close_connection() last_event_num = 0 for row in last_event['created_num']: last_event_num = row for row in campaigns['email_campaign_id']: events = json.getCampaignEvents(str(row), str(last_event_num).replace('.0', '')) # events = json.getCampaignEvents(str(row), dates.return_miliseconds(1)) # print(events) all_events = all_events.append(events) if len(all_events) > 0: tmp_file_name = filename_all_events S3Bucket().save_csv(all_events, tmp_file_name, 'hubspot') # slack_message('HUBSPOT', 'Events extracted from hubspot') file_list = s3.list_folders(folder_url=folder) files = 0 ddbb.open_connection() ddbb.truncate_table(events_stg_table) for file in file_list: if not file.startswith(processed_files) \ and str(file) != 'hubspot/hubspot_campaign.csv': ddbb.copy_file_into_redshift(s3_file_name=file, table_name=events_stg_table, ignore_header=1, delimiter=cred.S3_DELIMITER) s3.move_to_backup(file) files = files + 1 ddbb.execute_query(sql_load_events) print_message('HUBSPOT', 'Events loaded into database') ddbb.close_connection()