def get_hh_traces_stress_test(run_id, region): mapping_info = read_pickle_from_s3(project_bucket, results_stress_test_summary_mapping_s3_pickle_path.format(run_id)) df_hh_traces = pd.DataFrame() for elem in mapping_info: print(elem) if (elem[0] == region) & (elem[1] <= datetime(2022, 1, 1).date()): week_ending = elem[1] week_starting = week_ending - timedelta(weeks=1) p = elem[2] sim_index = int(elem[3]) df_sim = read_pickle_from_s3(project_bucket, results_stress_test_by_sim_s3_pickle_path.format(run_id, sim_index, sim_index)) df_sim['Date'] = \ df_sim['SettlementDateTime'].apply(lambda row: (row.to_pydatetime() - timedelta(minutes=30)).date()) df_tmp = df_sim[(df_sim['Date'] > week_starting) & (df_sim['Date'] <= week_ending)][['TradingRegion', 'SettlementDateTime', 'Spot Price', 'adjusted price', 'Adjusted EAR Cost']] df_tmp['Percentile'] = p df_tmp['PercentileBase'] = elem[0] df_hh_traces = df_hh_traces.append(df_tmp) else: continue write_pickle_to_s3(df_hh_traces, project_bucket, results_stress_test_hh_traces_s3_pickle_path.format(run_id, region))
def simulate_demand_profile(key_name): region = key_name.split('/')[1] distributor = key_name.split('/')[2].split('.')[0] # get reference day parameters from Spot Simulations # object_key = spot_simulation_input_extra_info.format(run_id) # s3 = boto3.client('s3') # obj = s3.get_object(Bucket=bucket_nbe, Key=object_key) # data = obj['Body'].read() # extra_sim_mapping = pd.read_excel(io.BytesIO(data)) # # extra_sim_mapping = pd.read_excel('../input/mapping.xlsx') # extra_sim_mapping['original_sim'] = (extra_sim_mapping[1] / 9).astype('int') # extra_sims = extra_sim_mapping[['original_sim', 3]].to_dict(orient='split')['data'] params = read_pickle_from_s3(bucket_spot_simulation, parameters_for_batch_v2.format(run_id)) meter_data_dic = read_pickle_from_s3( bucket_nbe, meter_data_pickle_path.format(region, distributor)) for sim_i in params.keys(): sim_meter_data = dict() for week in params[ sim_i][:209]: # first 209 weeks in simulation: to 2024-1-2 for day in week: sim_day = day['sim_date'] ref_day = day['ref_date'] sim_meter_data[sim_day] = meter_data_dic[ref_day] write_pickle_to_s3( sim_meter_data, bucket_nbe, meter_data_simulation_s3_pickle_path.format( run_id, sim_i, distributor)) print(sim_i)
def get_hh_traces(run_id, job_id): mapping_info = read_pickle_from_s3( bucket_nbe, results_EAR_summary_mapping_s3_pickle_path.format(run_id, job_id)) df_hh_traces = pd.DataFrame() for elem in mapping_info: print(elem) if (elem[0] == 'GrandTotal') & (elem[1] <= datetime(2022, 1, 1).date()): week_ending = elem[1] week_starting = week_ending - timedelta(weeks=1) p = elem[2] sim_index = int(elem[3]) df_sim = read_pickle_from_s3( bucket_nbe, results_EAR_simulation_s3_pickle_path.format( run_id, job_id, sim_index)) df_sim['Date'] = \ df_sim['SettlementDateTime'].apply(lambda row: (row.to_pydatetime() - timedelta(minutes=30)).date()) df_tmp = df_sim[(df_sim['Date'] > week_starting) & (df_sim['Date'] <= week_ending)][[ 'TradingRegion', 'SettlementDateTime', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Spot Price', 'Total Cost ($)' ]] df_tmp_grandtotal = df_tmp.groupby(['SettlementDateTime']).sum() df_tmp_grandtotal.reset_index(inplace=True) df_tmp_grandtotal.insert(0, 'TradingRegion', 'GrandTotal') df_tmp.reset_index(inplace=True, drop=True) df_tmp = df_tmp.append(df_tmp_grandtotal).reset_index(drop=True) df_tmp[ 'Swap Hedged Qty (MW)'] = df_tmp['Swap Hedged Qty (MWh)'] * 2 df_tmp['Cap Hedged Qty (MW)'] = df_tmp['Cap Hedged Qty (MWh)'] * 2 df_tmp['Customer Net MW'] = df_tmp['Customer Net MWh'] * 2 df_tmp = df_tmp.drop(columns=[ 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)', 'Customer Net MWh' ]) df_tmp['Percentile'] = p df_hh_traces = df_hh_traces.append(df_tmp) else: continue df_hh_traces['Spot Run No.'] = run_id df_hh_traces['Job No.'] = job_id # df_hh_traces.to_csv('HH_Simulation_Traces_{}.csv'.format(run_id)) write_pickle_to_s3( df_hh_traces, bucket_nbe, results_EAR_hh_traces_s3_pickle_path.format(run_id, job_id))
def main_process(key_name): # input_path = '../input/' # filename = 'demand_profile_NBE.xlsx' # input_df = pd.read_excel(os.path.join(input_path, filename)) # read deal position data (pre-sorted) from S3 target folder filename = key_name.split('/')[1] object_key = meter_data_file_path.format(filename) s3 = boto3.client('s3') obj = s3.get_object(Bucket=bucket_nbe, Key=object_key) data = obj['Body'].read() input_df = pd.read_excel(io.BytesIO(data)) demand_entity_list = list(set(input_df['Distributor'])) # process data for each distributor for distributor in demand_entity_list: distributor_df = input_df[ input_df['Distributor'] == distributor].reset_index( drop=True).sort_values(by=['INTERVAL_DATE', 'INTERVAL_NUM']) kwargs = { 'hist_start_date': distributor_df['INTERVAL_DATE'].iloc[0].date(), 'hist_end_date': distributor_df['INTERVAL_DATE'].iloc[-1].date(), 'ref_start_date': datetime.datetime.strptime(ref_start_date_str, "%Y-%m-%d").date(), 'ref_end_date': datetime.datetime.strptime(ref_end_date_str, "%Y-%m-%d").date(), 'name': distributor, # the naming convention of regions, i.e. VIC1 'region': distributor_df['STATE'].iloc[0] if distributor_df['STATE'].iloc[0].endswith('1') else distributor_df['STATE'].iloc[0] + '1' } demand_profile = DemandProfile(kwargs) demand_profile.region = 'NSW1' if demand_profile.region == 'ACT1' else demand_profile.region demand_profile.weather_station = weather_stations[ demand_profile.region] distributor_df = distributor_df.drop(columns=['STATE', 'Distributor']) cleaned_data = demand_profile.check_missing_30_min(distributor_df) populated_ref = demand_profile.populate_history() populated_data = demand_profile.get_df_map(cleaned_data, populated_ref) write_pickle_to_s3( populated_data, bucket_nbe, meter_data_pickle_path.format(demand_profile.region, demand_profile.name)) print(demand_profile.name)
def get_output_stress_test(run_id, sim_num): # output by simulation df_all_sim = pd.DataFrame() for i in range(sim_num): if i < 900: df_tmp = read_pickle_from_s3(bucket_nbe, results_stress_test_summary_by_sim_s3_pickle_path.format(runid, i)) else: df_tmp = read_pickle_from_s3(bucket_nbe, results_stress_test_summary_by_sim_s3_pickle_path.format(runid, 900 + (i - 900) * 9)) df_all_sim = df_all_sim.append(df_tmp) print(i) # output by percentile, normal percentile format, and save the mapping of percentile & sim_index percentile_list = [0, 0.05, 0.25, 0.5, 0.75, 0.95, 1] df_percentile = df_all_sim[['TradingRegion', 'FourWeekBlocks', 'Adjusted EAR Cost']].groupby(['TradingRegion', 'FourWeekBlocks']).quantile(percentile_list) df_percentile.reset_index(inplace=True) df_percentile = df_percentile.rename(columns={'level_2': 'Percentile'}) df_percentile_lst = capture_sim_no_for_percentile_stress_test(df_all_sim, df_percentile) df_percentile_update = pd.DataFrame.from_records(df_percentile_lst, columns=['TradingRegion', 'FourWeekBlocks', 'Percentile', 'Adjusted EAR Cost', 'SimNo. (based on Adjusted EAR Cost)']) df_percentile_update = df_percentile_update.rename(columns={'Adjusted EAR Cost': 'Adjusted Wholesale Margin', 'SimNo. (based on Adjusted EAR Cost)': 'SimNo. (based on Adjusted Wholesale Margin)'}) mapping_info = df_percentile_update[['TradingRegion', 'FourWeekBlocks', 'Percentile', 'SimNo. (based on Adjusted Wholesale Margin)']].to_dict(orient='split')['data'] write_pickle_to_s3(mapping_info, bucket_nbe, results_stress_test_summary_mapping_s3_pickle_path.format(run_id)) print('mapping information saved.') new_output = duplicate_percentile_for_pbi_stress_test(df_percentile_lst, p_position=2) df_percentile_pbi = pd.DataFrame.from_records(new_output, columns=['TradingRegion', 'FourWeeksBlocks_WeekEnding', 'Percentile', 'Adjusted Wholesale Margin', 'SimNo. (based on Adjusted Wholesale Margin)']) df_percentile_pbi['Spot Run No.'] = run_id df_percentile_pbi['Job No.'] = job_id # df_percentile_pbi.to_excel('NBE_StressTest_Output_by_PBI_percentiles_{}.xlsx'.format(run_id)) df_percentile_pbi.reset_index(inplace=True) df_percentile_pbi = df_percentile_pbi.rename(columns={'index': 'Case'}) df_percentile_pbi.to_csv('EAR_StressTest_Output_by_PBI_percentiles_{}_{}.csv'.format(job_id, run_id), index=False)
def stress_test_summary(run_id, sim_index): df_raw = read_pickle_from_s3(bucket_nbe, results_stress_test_by_sim_s3_pickle_path.format(run_id, sim_index, sim_index)) df = df_raw[['TradingRegion', 'SettlementDateTime', 'Adjusted EAR Cost']] df['WeekEnding'] = df['SettlementDateTime'].apply(get_week_ending) week_ending_init = df['WeekEnding'].iloc[0] df['FourWeekBlocks'] = df['WeekEnding'].apply(get_four_week_blocks, args=(week_ending_init, )) df_summarized = df[['TradingRegion', 'Adjusted EAR Cost', 'FourWeekBlocks']].groupby(['TradingRegion', 'FourWeekBlocks']).sum() df_grandtotal = df[['FourWeekBlocks', 'Adjusted EAR Cost']].groupby(['FourWeekBlocks']).sum() df_grandtotal.reset_index(inplace=True) df_grandtotal.insert(0, 'TradingRegion', 'GrandTotal') df_summarized.reset_index(inplace=True) df_summarized = df_summarized.append(df_grandtotal).reset_index(drop=True) df_summarized['SimNo'] = sim_index write_pickle_to_s3(df_summarized, bucket_nbe, results_stress_test_summary_by_sim_s3_pickle_path.format(run_id, sim_index)) return df_summarized
def main(params): # parse parameters run_id = params['run_id'] job_id = params['job_id'] sim_index = params['sim_index'] start_date = datetime.date(*params["start_date"]) end_date = datetime.date(*params["end_date"]) # period_days = 7 df_pd = pd.read_parquet('s3://{}/{}'.format(bucket_nbe, period_definition_path)) df_profile_mapping = pd.read_parquet('s3://{}/{}'.format( bucket_nbe, profile_mapping_path)) profile_id = { row['name']: row['id'] for _, row in df_profile_mapping.iterrows() } profile_region_id = { row['name']: row['region_id'] for _, row in df_profile_mapping.iterrows() } # read spot price data df_sp = pd.read_parquet('s3://{}/{}'.format( bucket_nbe, spot_price_by_sim_parquet_path.format(run_id, sim_index))) df_sp['Half Hour Starting'] = df_sp['Half Hour Starting'].apply( lambda x: x.to_pydatetime()) df_sp['Date'] = df_sp['Half Hour Starting'].apply(lambda x: x.date()) for period_resolution in period_resolutions: period_starting = start_date period_ending = find_next_period_ending(start_date, by=period_resolution) # incl. result = avg_price_calcs(period_resolution, period_starting, period_ending, end_date, df_sp, df_pd, profile_id, profile_region_id, sim_index) write_pickle_to_s3( result, bucket_nbe, results_avg_price_by_profile_by_sim_path.format( period_resolution, job_id, run_id, sim_index))
def get_output(run_id, job_id, sim_num): # output by simulation - weekly/monthly/quarterly average price for p in ['week', 'month', 'quarter']: print("loading data of all simulations by {}.".format(p)) all_sim_lst = [] for i in range(sim_num): if i < 900: tmp_lst = \ read_pickle_from_s3(bucket_nbe, results_avg_price_by_profile_by_sim_path.format(p, job_id, run_id, i)) else: tmp_lst = \ read_pickle_from_s3(bucket_nbe, results_avg_price_by_profile_by_sim_path.format(p, job_id, run_id, 900 + (i - 900) * 9)) all_sim_lst += tmp_lst print(i) col_period = 'WeekEnding' if p == 'week' else 'PeriodEnding' df_output = pd.DataFrame(all_sim_lst, columns=[col_period, 'TradingRegion', 'SimNo', 'Profile', 'Average Spot Price', 'Average Cap Payouts', 'NoOfHour', 'ProfileID', 'RegionID']) df_output['Profile'] = df_output['Profile'].apply(remove_profile_region_suffix) df_output['Spot Run No.'] = df_output.apply(lambda _: run_id, axis=1) df_output['Job No.'] = df_output.apply(lambda _: job_id, axis=1) csv_buffer = StringIO() df_output.to_csv(csv_buffer, index=False) s3_resource = boto3.resource('s3') s3_resource.Object(bucket_nbe, results_avg_price_by_profile_by_sim_csv_path.format(job_id, run_id, p, job_id, run_id)).put(Body=csv_buffer.getvalue()) # output by simulation - weekly print("loading data of all simulations by week.") df_all_sim = pd.DataFrame() for i in range(sim_num): if i < 900: df_tmp = \ read_pickle_from_s3(bucket_nbe, results_EAR_week_summary_by_simulation_s3_pickle_path.format(job_id, run_id, i)) else: df_tmp = \ read_pickle_from_s3(bucket_nbe, results_EAR_week_summary_by_simulation_s3_pickle_path.format(job_id, run_id, 900 + (i - 900) * 9)) df_all_sim = df_all_sim.append(df_tmp) print(i) df_all_sim['Spot Run No.'] = run_id df_all_sim['Job No.'] = job_id # EAR_Output_by_simulations_by_week.csv df_all_sim_week_complete = df_all_sim df_all_sim_week_complete.reset_index(inplace=True) df_all_sim_week_complete = df_all_sim_week_complete.rename(columns={'index': 'Case'}) csv_buffer = StringIO() df_all_sim_week_complete.to_csv(csv_buffer, index=False) s3_resource = boto3.resource('s3') s3_resource.Object(bucket_nbe, results_by_sim_by_week_complete.format(job_id, run_id, job_id, run_id)).put(Body=csv_buffer.getvalue()) # EAR_Output_by_sim_by_week_for_CFD_impact.csv df_all_sim_week = df_all_sim[ ['TradingRegion', 'WeekEnding', 'Total Cost (Incl Cap)', 'Transfer Cost', 'Wholesale Margin', 'SimNo', 'Spot Run No.', 'Job No.']] df_all_sim_week.reset_index(inplace=True) df_all_sim_week = df_all_sim_week.rename(columns={'index': 'Case'}) csv_buffer = StringIO() df_all_sim_week.to_csv(csv_buffer, index=False) s3_resource = boto3.resource('s3') s3_resource.Object(bucket_nbe, results_by_sim_by_week.format(job_id, run_id, job_id, run_id)).put(Body=csv_buffer.getvalue()) # output by simulation - monthly print("loading data of all simulations by month.") df_all_sim_month = pd.DataFrame() for i in range(sim_num): if i < 900: df_tmp = read_pickle_from_s3(bucket_nbe, results_EAR_mth_summary_by_simulation_s3_pickle_path.format(job_id, run_id, i)) else: df_tmp = read_pickle_from_s3(bucket_nbe, results_EAR_mth_summary_by_simulation_s3_pickle_path.format(job_id, run_id, 900+(i - 900) * 9)) df_all_sim_month = df_all_sim_month.append(df_tmp) print(i) df_all_sim_month['Spot Run No.'] = run_id df_all_sim_month['Job No.'] = job_id # EAR_Output_by_simulations_by_month.csv df_all_sim_month_complete = df_all_sim_month df_all_sim_month_complete.reset_index(inplace=True) df_all_sim_month_complete = df_all_sim_month_complete.rename(columns={'index': 'Case'}) csv_buffer = StringIO() df_all_sim_month_complete.to_csv(csv_buffer, index=False) s3_resource = boto3.resource('s3') s3_resource.Object(bucket_nbe, results_by_sim_by_month_complete.format(job_id, run_id, job_id, run_id)).put(Body=csv_buffer.getvalue()) # EAR_Output_by_sim_by_month_for_CFD_impact.csv df_all_sim_month.reset_index(inplace=True) df_all_sim_month = df_all_sim_month.rename(columns={'index': 'Case'}) df_all_sim_month = df_all_sim_month[ ['Case', 'TradingRegion', 'MonthEnding', 'Total Cost (Incl Cap)', 'Transfer Cost', 'Wholesale Margin', 'SimNo', 'Spot Run No.', 'Job No.']] csv_buffer = StringIO() df_all_sim_month.to_csv(csv_buffer, index=False) s3_resource = boto3.resource('s3') s3_resource.Object(bucket_nbe, results_by_sim_by_month.format(job_id, run_id, job_id, run_id)).put(Body=csv_buffer.getvalue()) # output by simulation - quarterly print("loading data of all simulations by quarter.") df_all_sim_qtr = pd.DataFrame() for i in range(sim_num): if i < 900: df_tmp = read_pickle_from_s3(bucket_nbe, results_EAR_qtr_summary_by_simulation_s3_pickle_path.format(job_id, run_id, i)) else: df_tmp = read_pickle_from_s3(bucket_nbe, results_EAR_qtr_summary_by_simulation_s3_pickle_path.format(job_id, run_id, 900+(i - 900) * 9)) df_all_sim_qtr = df_all_sim_qtr.append(df_tmp) print(i) df_all_sim_qtr['Spot Run No.'] = run_id df_all_sim_qtr['Job No.'] = job_id # EAR_Output_by_simulations_by_quarter.csv df_all_sim_quarter_complete = df_all_sim_qtr df_all_sim_quarter_complete.reset_index(inplace=True) df_all_sim_quarter_complete = df_all_sim_quarter_complete.rename(columns={'index': 'Case'}) csv_buffer = StringIO() df_all_sim_quarter_complete.to_csv(csv_buffer, index=False) s3_resource = boto3.resource('s3') s3_resource.Object(bucket_nbe, results_by_sim_by_quarter_complete.format(job_id, run_id, job_id, run_id)).put(Body=csv_buffer.getvalue()) # EAR_Output_by_sim_by_quarter_for_CFD_impact.csv df_all_sim_qtr.reset_index(inplace=True) df_all_sim_qtr = df_all_sim_qtr.rename(columns={'index': 'Case'}) df_all_sim_qtr = df_all_sim_qtr[ ['Case', 'TradingRegion', 'QuarterEnding', 'Total Cost (Incl Cap)', 'Transfer Cost', 'Wholesale Margin', 'SimNo', 'Spot Run No.', 'Job No.']] csv_buffer = StringIO() df_all_sim_qtr.to_csv(csv_buffer, index=False) s3_resource = boto3.resource('s3') s3_resource.Object(bucket_nbe, results_by_sim_by_quarter.format(job_id, run_id, job_id, run_id)).put(Body=csv_buffer.getvalue()) # save the mapping of percentile & sim_index of all percentiles percentile_list = [0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.25, 0.5, 0.75, 0.95, 1] df_percentile = df_all_sim[['TradingRegion', 'WeekEnding', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'Total Cost (excl GST)', 'Cap Premium Cost', 'Total Cost (Incl Cap)', 'Transfer Cost', 'Wholesale Margin']].groupby(['TradingRegion', 'WeekEnding']).quantile(percentile_list) df_percentile.reset_index(inplace=True) df_percentile = df_percentile.rename(columns={'level_2': 'Percentile'}) df_percentile_lst = capture_sim_no_for_percentile(df_all_sim, df_percentile) df_percentile_update = pd.DataFrame.from_records(df_percentile_lst, columns=['TradingRegion', 'WeekEnding', 'Percentile', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'Total Cost (excl GST)', 'Cap Premium Cost', 'Total Cost (Incl Cap)', 'Transfer Cost', 'Wholesale Margin', 'SimNo. (based on Wholesale Margin)']) mapping_info = df_percentile_update[['TradingRegion', 'WeekEnding', 'Percentile', 'SimNo. (based on Wholesale Margin)']].to_dict(orient='split')[ 'data'] write_pickle_to_s3(mapping_info, bucket_nbe, results_EAR_summary_mapping_s3_pickle_path.format(job_id, run_id)) print('mapping information saved.') # EAR_Output_by_normal_percentiles.csv percentile_list = [0, 0.05, 0.25, 0.5, 0.75, 0.95, 1] df_percentile = df_all_sim[['TradingRegion', 'WeekEnding', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'Total Cost (excl GST)', 'Cap Premium Cost', 'Total Cost (Incl Cap)', 'Transfer Cost', 'Wholesale Margin']].groupby(['TradingRegion', 'WeekEnding']).quantile( percentile_list) df_percentile.reset_index(inplace=True) df_percentile = df_percentile.rename(columns={'level_2': 'Percentile'}) df_percentile_lst = capture_sim_no_for_percentile(df_all_sim, df_percentile) df_percentile_update = pd.DataFrame.from_records(df_percentile_lst, columns=['TradingRegion', 'WeekEnding', 'Percentile', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'Total Cost (excl GST)', 'Cap Premium Cost', 'Total Cost (Incl Cap)', 'Transfer Cost', 'Wholesale Margin', 'SimNo. (based on Wholesale Margin)']) df_percentile_update['Spot Run No.'] = run_id df_percentile_update['Job No.'] = job_id df_percentile_update.reset_index(inplace=True) df_percentile_update = df_percentile_update.rename(columns={'index': 'Case'}) # df_percentile_update['Year'] = df_percentile_update['WeekEnding'].apply(lambda x: x.year) # df_percentile_update['Month'] = df_percentile_update['WeekEnding'].apply(lambda x: x.month) # df_percentile_update.to_excel('NBE_EAR_Output_by_normal_percentiles_{}_{}.xlsx'.format(run_id, job_id)) # df_percentile_update.to_csv(results_EAR_normal_percentiles.format(run_id, job_id)) csv_buffer = StringIO() df_percentile_update.to_csv(csv_buffer, index=False) s3_resource = boto3.resource('s3') s3_resource.Object(bucket_nbe, results_EAR_normal_percentiles.format(job_id, run_id, job_id, run_id)).put(Body=csv_buffer.getvalue()) # output by risk percentiles for PBI '''
def get_output(run_id, job_id, sim_num): print("loading data of all simulations.") df_all_sim = pd.DataFrame() for i in range(sim_num): if i < 900: df_tmp = read_pickle_from_s3( bucket_nbe, results_EAR_summary_by_simulation_s3_pickle_path.format( run_id, job_id, i)) else: df_tmp = read_pickle_from_s3( bucket_nbe, results_EAR_summary_by_simulation_s3_pickle_path.format( run_id, job_id, 900 + (i - 900) * 9)) df_all_sim = df_all_sim.append(df_tmp) print(i) # # output by simulation # df_all_sim['Spot Run No.'] = run_id # df_all_sim.to_excel('NBE_EAR_Output_by_simulations_{}.xlsx'.format(run_id)) # save the mapping of percentile & sim_index of all percentiles percentile_list = [ 0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.25, 0.5, 0.75, 0.95, 1 ] df_percentile = df_all_sim[[ 'TradingRegion', 'WeekEnding', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'EAR Cost', 'Cap Premium Cost', 'Total Cost ($)' ]].groupby(['TradingRegion', 'WeekEnding']).quantile(percentile_list) df_percentile.reset_index(inplace=True) df_percentile = df_percentile.rename(columns={'level_2': 'Percentile'}) df_percentile_lst = capture_sim_no_for_percentile(df_all_sim, df_percentile) df_percentile_update = pd.DataFrame.from_records( df_percentile_lst, columns=[ 'TradingRegion', 'WeekEnding', 'Percentile', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'EAR Cost', 'Cap Premium Cost', 'Total Cost ($)', 'SimNo. (based on Total Cost)' ]) mapping_info = df_percentile_update[[ 'TradingRegion', 'WeekEnding', 'Percentile', 'SimNo. (based on Total Cost)' ]].to_dict(orient='split')['data'] write_pickle_to_s3( mapping_info, bucket_nbe, results_EAR_summary_mapping_s3_pickle_path.format(run_id, job_id)) print('mapping information saved.') # output by normal percentiles percentile_list = [0, 0.05, 0.25, 0.5, 0.75, 0.95, 1] df_percentile = df_all_sim[[ 'TradingRegion', 'WeekEnding', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'EAR Cost', 'Cap Premium Cost', 'Total Cost ($)' ]].groupby(['TradingRegion', 'WeekEnding']).quantile(percentile_list) df_percentile.reset_index(inplace=True) df_percentile = df_percentile.rename(columns={'level_2': 'Percentile'}) df_percentile_lst = capture_sim_no_for_percentile(df_all_sim, df_percentile) df_percentile_update = pd.DataFrame.from_records( df_percentile_lst, columns=[ 'TradingRegion', 'WeekEnding', 'Percentile', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'EAR Cost', 'Cap Premium Cost', 'Total Cost ($)', 'SimNo. (based on Total Cost)' ]) df_percentile_update['Spot Run No.'] = run_id df_percentile_update['Job No.'] = job_id df_percentile_update['Year'] = df_percentile_update['WeekEnding'].apply( lambda x: x.year) df_percentile_update['Month'] = df_percentile_update['WeekEnding'].apply( lambda x: x.month) # df_percentile_update.to_excel('NBE_EAR_Output_by_normal_percentiles_{}_{}.xlsx'.format(run_id, job_id)) # df_percentile_update.to_csv(results_EAR_normal_percentiles.format(run_id, job_id)) csv_buffer = StringIO() df_percentile_update.to_csv(csv_buffer) s3_resource = boto3.resource('s3') s3_resource.Object( bucket_nbe, results_EAR_normal_percentiles.format( run_id, job_id, run_id, job_id)).put(Body=csv_buffer.getvalue()) # output by percentile for PBI percentile_list_risk = [0, 0.01, 0.02, 0.03, 0.05] df_percentile_risk = df_all_sim[[ 'TradingRegion', 'WeekEnding', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'EAR Cost', 'Cap Premium Cost', 'Total Cost ($)' ]].groupby(['TradingRegion', 'WeekEnding']).quantile(percentile_list_risk) df_percentile_risk.reset_index(inplace=True) df_percentile_risk = df_percentile_risk.rename( columns={'level_2': 'Percentile'}) df_percentile_risk_lst = capture_sim_no_for_percentile( df_all_sim, df_percentile_risk) new_output = duplicate_percentile_for_pbi(df_percentile_risk_lst, p_position=2) df_percentile_pbi = pd.DataFrame.from_records( new_output, columns=[ 'TradingRegion', 'WeekEnding', 'Percentile', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'EAR Cost', 'Cap Premium Cost', 'Total Cost ($)', 'SimNo. (based on Total Cost)' ]) df_percentile_pbi['Spot Run No.'] = run_id df_percentile_pbi['Job No.'] = job_id df_percentile_pbi['Year'] = df_percentile_pbi['WeekEnding'].apply( lambda x: x.year) df_percentile_pbi['Month'] = df_percentile_pbi['WeekEnding'].apply( lambda x: x.month) # df_percentile_pbi.to_excel('NBE_EAR_Output_by_PBI_percentiles_{}_{}.xlsx'.format(run_id, job_id)) # df_percentile_pbi.to_csv(results_EAR_PBI_percentiles.format(run_id, job_id)) csv_buffer = StringIO() df_percentile_pbi.to_csv(csv_buffer) s3_resource = boto3.resource('s3') s3_resource.Object( bucket_nbe, results_EAR_PBI_percentiles.format( run_id, job_id, run_id, job_id)).put(Body=csv_buffer.getvalue())
def load_calculate_summarize(run_id, job_id, date_input, sim_index, start_year, start_month, start_day, end_year, end_month, end_day): """ :param run_id: :param job_id: :param date_input: :param sim_index: :param start_year: :param start_month: :param start_day: :param end_year: :param end_month: :param end_day: :return: """ start_time = time.time() print('Job {} using Spot Run {} starting... SimNo: {}'.format( job_id, run_id, sim_index)) start_date = date(start_year, start_month, start_day) end_date = date(end_year, end_month, end_day) # excl. # read deal position data df_all = read_pickle_from_s3( bucket_nbe, deal_capture_converted_path.format(job_id, date_input)) states = sorted(list(set( df_all['TradingRegion']))) # states = ['NSW1', 'QLD1', 'SA1', 'VIC1'] # read simulated spot price data df_sp = pd.read_parquet( f"s3://{bucket_nbe}/{spot_price_by_sim_parquet_path.format(run_id, sim_index)}" ) print('Spot price from {} SimNo. {} loaded.'.format(run_id, sim_index)) df_sp['Date'] = df_sp['Half Hour Starting'].apply(lambda x: x.date()) df_sp = df_sp[(start_date <= df_sp['Date']) & (df_sp['Date'] < end_date)] if len(pd.concat([df_sp[state] for state in states])) != len(df_all): raise ValueError( f"load data size: {len(pd.concat([df_sp[state] for state in states]))}, expected:{len(df_all)}" ) # sort by region to make sure spot price data and customer data are consistent df_all = df_all.sort_values( by=['TradingRegion', 'SettlementDateTime']).reset_index(drop=True) df_all['Spot Price'] = pd.concat([df_sp[state] for state in states], ignore_index=True) # read simulated customer load data load_data = {} for state in states: state_load_all = read_pickle_from_s3( bucket_nbe, meter_data_simulation_s3_pickle_path.format( run_id, sim_index // 9, 'NBE_{}'.format(state[:-1]))) load_data[state] = pd.concat([ state_load_all[key]['GRID_USAGE'] for key in state_load_all if start_date <= key < end_date ], ignore_index=True) print('Customer data from {} SimNo. {} loaded.'.format(run_id, sim_index)) if len(pd.concat([load_data[state] for state in states])) != len(df_all): raise ValueError( f"load data size: {len(pd.concat([load_data[state] for state in states]))}, expected:{len(df_all)}" ) df_all['Customer Net MWh'] = pd.concat( [load_data[state] for state in states], ignore_index=True) df_all['Customer Net MWh'] = df_all['Customer Net MWh'].apply(lambda x: -x) # calculate earning at risk and output df = calculate_earning_at_risk(df_all) df_output = df[[ 'TradingRegion', 'SettlementDateTime', 'Swap Premium', 'Swap Hedged Qty (MWh)', 'Swap Weighted Strike Price', 'Cap Premium', 'Cap Hedged Qty (MWh)', 'Cap Weighted Strike Price', 'Spot Price', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'EAR Cost', 'Cap Premium Cost', 'Total Cost ($)' ]] print('Calculation finished. Uploading... {} SimNo. {}'.format( run_id, sim_index)) # to keep a copy of half hour resolution raw data. write_pickle_to_s3( df_output, bucket_nbe, results_EAR_simulation_s3_pickle_path.format(run_id, job_id, sim_index)) # weekly summary print('Summarising the data into weekly resolution... {} SimNo. {}'.format( run_id, sim_index)) df_output['WeekEnding'] = df_output['SettlementDateTime'].apply( get_week_ending) # get the week ending date # sum by region by week ending df_summarized = df_output[[ 'TradingRegion', 'WeekEnding', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'EAR Cost', 'Cap Premium Cost', 'Total Cost ($)' ]].groupby(['TradingRegion', 'WeekEnding']).sum() # all regions' sum by week ending df_grandtotal = df_output[[ 'WeekEnding', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'EAR Cost', 'Cap Premium Cost', 'Total Cost ($)' ]].groupby(['WeekEnding']).sum() df_grandtotal.reset_index(inplace=True) df_grandtotal.insert(0, 'TradingRegion', 'GrandTotal') df_summarized.reset_index(inplace=True) df_summarized = df_summarized.append(df_grandtotal).reset_index(drop=True) df_summarized['SimNo'] = sim_index print('Uploading weekly summary... {} SimNo. {}'.format(run_id, sim_index)) write_pickle_to_s3( df_summarized, bucket_nbe, results_EAR_summary_by_simulation_s3_pickle_path.format( run_id, job_id, sim_index)) end_time = time.time() print("Processing time {} SimNo. {} : {} seconds.".format( run_id, sim_index, end_time - start_time))
def transform_format(job_id, date_input, filename, sheet_name, start_year, start_month, start_day, end_year, end_month, end_day): """ :param job_id: :param date_input: :param filename: :param sheet_name: :param start_year: :param start_month: :param start_day: :param end_year: :param end_month: :param end_day: :return: """ start = time.time() # These are the user defined start datetime and end datetime of the analysis start_datetime = datetime.datetime(start_year, start_month, start_day, 0, 30) end_datetime = datetime.datetime(end_year, end_month, end_day, 0, 0) # incl. start_date = (start_datetime - datetime.timedelta(minutes=30)).date() end_date = (end_datetime - datetime.timedelta(minutes=30)).date() # read deal position data (pre-sorted) from S3 target folder object_key = deal_capture_input_path.format(filename) s3 = boto3.client('s3') obj = s3.get_object(Bucket=bucket_nbe, Key=object_key) data = obj['Body'].read() df_all = pd.read_csv(io.BytesIO(data)) df_all['SettlementDate'] = \ df_all['SettlementDate'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d").date()) df_all['SettlementDateTime'] = \ df_all['SettlementDateTime'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d %H:%M")) df_output = pd.DataFrame() regions = list(set(df_all['TradingRegion'])) for region in regions: print(region) df_1_region = df_all[df_all['TradingRegion'] == region] # select the interested period for analysis df_1_region = df_1_region[ (df_1_region['SettlementDate'] >= start_date) & (df_1_region['SettlementDate'] <= end_date)].reset_index( drop=True) df_transformed = df_1_region.pivot_table([ 'Premium', 'Hedged Qty (MWh)', 'Weighted Strike Price', 'Notional Quantity MW', 'Weighted Multiplier' ], ['TradingRegion', 'SettlementDate', 'SettlementDateTime'], 'Type').reset_index() df_transformed.columns = [ 'TradingRegion', 'SettlementDate', 'SettlementDateTime', 'Cap Hedged Qty (MWh)', 'Swap Hedged Qty (MWh)', 'Cap Notional Quantity MW', 'Swap Notional Quantity MW', 'Cap Premium', 'Swap Premium', 'Cap Weighted Multiplier', 'Swap Weighted Multiplier', 'Cap Weighted Strike Price', 'Swap Weighted Strike Price' ] df_transformed = df_transformed.set_index('SettlementDateTime') df_transformed = df_transformed.reindex( pd.date_range(start_datetime, end_datetime, freq='30T')) df_transformed = df_transformed.reset_index().rename( columns={'index': 'SettlementDateTime'}) df_transformed['TradingRegion'] = df_transformed[ 'TradingRegion'].fillna(method='ffill') df_transformed['SettlementDate'] = df_transformed[ 'SettlementDateTime'].apply( lambda x: (x - datetime.timedelta(minutes=30)).date()) df_transformed = df_transformed.fillna(0) df_transformed = df_transformed[[ 'TradingRegion', 'SettlementDate', 'SettlementDateTime', 'Swap Premium', 'Swap Hedged Qty (MWh)', 'Swap Weighted Strike Price', 'Swap Notional Quantity MW', 'Swap Weighted Multiplier', 'Cap Premium', 'Cap Hedged Qty (MWh)', 'Cap Weighted Strike Price', 'Cap Notional Quantity MW', 'Cap Weighted Multiplier' ]] df_output = df_output.append(df_transformed) df_output = df_output.reset_index(drop=True) end = time.time() print('{} seconds.'.format(end - start)) # df_output.to_excel('output/Deal Capture test3.xlsx', index=False) write_pickle_to_s3(df_output, bucket_nbe, deal_capture_converted_path.format(job_id, date_input))