def upload_to_redshift(self, file_name): """ Upload from S3 to Redshift """ REDSHIFT_TABLE_NAME = SCHEMA+"."+ADSOURCE.replace("-","_") pr.exec_commit(""" CREATE TABLE IF NOT EXISTS {fn} (id INT IDENTITY(1,1), date DATE NOT NULL, campaign VARCHAR(256) NOT NULL, adclicks VARCHAR(256) NULL, impressions VARCHAR(256) NULL, adcost FLOAT NULL, updated_ts TIMESTAMP NOT NULL );""".format(fn=REDSHIFT_TABLE_NAME)) pr.exec_commit(""" COPY {fn} FROM 's3://amaro-bi/{filepath}' ACCEPTINVCHARS delimiter ',' ignoreheader 1 csv quote as '"' dateformat 'auto' timeformat 'auto' region 'sa-east-1' access_key_id '{acess_key}' secret_access_key '{secret_key}'; """.format(fn=REDSHIFT_TABLE_NAME, filepath=file_name, acess_key=s3_credentials_AWS_ACCESS_KEY, secret_key=s3_credentials_AWS_SECRET_KEY)) print('Finished processing')
def delete_from_date(date): """ Delete data from table """ query = "DELETE FROM {table} WHERE date_sent >= '{datef}'".format( table=REDSHIFT_TABLE_NAME, datef=date) print("PRINT SQL STATEMENT: ", query) pr.exec_commit(query) return None
def delete_from_date(date): """ Delete data from table """ query = "DELETE FROM manual_data_sources.limesurvey_pedido_entregue_survey WHERE date_sent >= '{datef}'" sql_statm = query.format(datef=date) print("PRINT SQL STATEMENT: ", sql_statm) pr.exec_commit(sql_statm) return None
def stops_to_durations(): connect_to_redshift() df = pr.redshift_to_pandas("""select a.* from (select data_frame_ref, stop_id from stop_events group by data_frame_ref, stop_id) a left join (select data_frame_ref, departure_stop_id from trip_durations group by data_frame_ref, departure_stop_id) b on a.data_frame_ref = b.data_frame_ref and a.stop_id = b.departure_stop_id where b.data_frame_ref is null and b.departure_stop_id is null and a.data_frame_ref < trunc(convert_timezone('US/Pacific', GETDATE())) order by a.data_frame_ref, a.stop_id;""") n_days_dep_stops = df.shape[0] for i, row in df.iterrows(): data_frame_ref = row['data_frame_ref'] dep_stop_id = row['stop_id'] print("Processing data_frame_ref {}, departure_stop_id {} ({} of {})". format(data_frame_ref, dep_stop_id, (i + 1), n_days_dep_stops)) pr.exec_commit("""insert into trip_durations select a.data_frame_ref, a.trip_id, a.stop_id as departure_stop_id, a.stop_time as departure_time, a.stop_time_unix as departure_time_unix, s.stop_id as arrival_stop_id, s.stop_time as arrival_time, s.stop_time_unix as arrival_time_unix, s.stop_time_unix - a.stop_time_unix as trip_duration, date_trunc('hour', a.stop_time) as departure_time_hour from (select * from stop_events where data_frame_ref = '{}' and stop_id = {}) a join stop_events s on a.data_frame_ref = s.data_frame_ref and a.trip_id = s.trip_id and s.stop_time_unix > a.stop_time_unix""".format( data_frame_ref, dep_stop_id)) pr.close_up_shop()
def delete_from_date(self, app_path): """ Delete data from table """ sql_statm = "DELETE FROM manual_data_sources.rtb_ad_cost WHERE date >= '{datef}'".format(datef=self.get_start_date(app_path)) print("PRINT SQL STATEMENT: ",sql_statm) pr.exec_commit(sql_statm) return None
table=REDSHIFT_TABLE_NAME, datef=date) print("PRINT SQL STATEMENT: ", query) pr.exec_commit(query) return None delete_from_date(date='2018-01-01') print('5. Create table') pr.exec_commit(""" CREATE TABLE IF NOT EXISTS {table} (id_answer varchar(256), date_sent varchar(256), grade varchar(256), email varchar(256), order_number varchar(256), return_order_number varchar(256), language varchar(256), updated_ts varchar(256), return_channel varchar(256) );""".format(table=REDSHIFT_TABLE_NAME)) print('6. Upload to Redshift') pr.exec_commit(""" COPY {table} FROM 's3://amaro-bi/{filepath}' ACCEPTINVCHARS delimiter ',' ignoreheader 1 csv quote as '"'