def get_hh_traces_stress_test(run_id, region):
    mapping_info = read_pickle_from_s3(project_bucket,
                                       results_stress_test_summary_mapping_s3_pickle_path.format(run_id))
    df_hh_traces = pd.DataFrame()
    for elem in mapping_info:
        print(elem)
        if (elem[0] == region) & (elem[1] <= datetime(2022, 1, 1).date()):
            week_ending = elem[1]
            week_starting = week_ending - timedelta(weeks=1)
            p = elem[2]
            sim_index = int(elem[3])
            df_sim = read_pickle_from_s3(project_bucket,
                                         results_stress_test_by_sim_s3_pickle_path.format(run_id,
                                                                                          sim_index,
                                                                                          sim_index))
            df_sim['Date'] = \
                df_sim['SettlementDateTime'].apply(lambda row: (row.to_pydatetime() - timedelta(minutes=30)).date())
            df_tmp = df_sim[(df_sim['Date'] > week_starting)
                            & (df_sim['Date'] <= week_ending)][['TradingRegion', 'SettlementDateTime',
                                                                'Spot Price', 'adjusted price', 'Adjusted EAR Cost']]
            df_tmp['Percentile'] = p
            df_tmp['PercentileBase'] = elem[0]
            df_hh_traces = df_hh_traces.append(df_tmp)
        else:
            continue

    write_pickle_to_s3(df_hh_traces, project_bucket,
                       results_stress_test_hh_traces_s3_pickle_path.format(run_id, region))
Example #2
0
def simulate_demand_profile(key_name):
    region = key_name.split('/')[1]
    distributor = key_name.split('/')[2].split('.')[0]

    # get reference day parameters from Spot Simulations
    # object_key = spot_simulation_input_extra_info.format(run_id)
    # s3 = boto3.client('s3')
    # obj = s3.get_object(Bucket=bucket_nbe, Key=object_key)
    # data = obj['Body'].read()
    # extra_sim_mapping = pd.read_excel(io.BytesIO(data))
    # # extra_sim_mapping = pd.read_excel('../input/mapping.xlsx')
    # extra_sim_mapping['original_sim'] = (extra_sim_mapping[1] / 9).astype('int')
    # extra_sims = extra_sim_mapping[['original_sim', 3]].to_dict(orient='split')['data']
    params = read_pickle_from_s3(bucket_spot_simulation,
                                 parameters_for_batch_v2.format(run_id))

    meter_data_dic = read_pickle_from_s3(
        bucket_nbe, meter_data_pickle_path.format(region, distributor))
    for sim_i in params.keys():
        sim_meter_data = dict()
        for week in params[
                sim_i][:209]:  # first 209 weeks in simulation: to 2024-1-2
            for day in week:
                sim_day = day['sim_date']
                ref_day = day['ref_date']
                sim_meter_data[sim_day] = meter_data_dic[ref_day]
        write_pickle_to_s3(
            sim_meter_data, bucket_nbe,
            meter_data_simulation_s3_pickle_path.format(
                run_id, sim_i, distributor))
        print(sim_i)
def get_hh_traces(run_id, job_id):
    mapping_info = read_pickle_from_s3(
        bucket_nbe,
        results_EAR_summary_mapping_s3_pickle_path.format(run_id, job_id))
    df_hh_traces = pd.DataFrame()
    for elem in mapping_info:
        print(elem)
        if (elem[0]
                == 'GrandTotal') & (elem[1] <= datetime(2022, 1, 1).date()):
            week_ending = elem[1]
            week_starting = week_ending - timedelta(weeks=1)
            p = elem[2]
            sim_index = int(elem[3])
            df_sim = read_pickle_from_s3(
                bucket_nbe,
                results_EAR_simulation_s3_pickle_path.format(
                    run_id, job_id, sim_index))
            df_sim['Date'] = \
                df_sim['SettlementDateTime'].apply(lambda row: (row.to_pydatetime() - timedelta(minutes=30)).date())
            df_tmp = df_sim[(df_sim['Date'] > week_starting)
                            & (df_sim['Date'] <= week_ending)][[
                                'TradingRegion', 'SettlementDateTime',
                                'Swap Hedged Qty (MWh)',
                                'Cap Hedged Qty (MWh)', 'Customer Net MWh',
                                'Spot Price', 'Total Cost ($)'
                            ]]
            df_tmp_grandtotal = df_tmp.groupby(['SettlementDateTime']).sum()
            df_tmp_grandtotal.reset_index(inplace=True)
            df_tmp_grandtotal.insert(0, 'TradingRegion', 'GrandTotal')
            df_tmp.reset_index(inplace=True, drop=True)
            df_tmp = df_tmp.append(df_tmp_grandtotal).reset_index(drop=True)
            df_tmp[
                'Swap Hedged Qty (MW)'] = df_tmp['Swap Hedged Qty (MWh)'] * 2
            df_tmp['Cap Hedged Qty (MW)'] = df_tmp['Cap Hedged Qty (MWh)'] * 2
            df_tmp['Customer Net MW'] = df_tmp['Customer Net MWh'] * 2
            df_tmp = df_tmp.drop(columns=[
                'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)',
                'Customer Net MWh'
            ])
            df_tmp['Percentile'] = p
            df_hh_traces = df_hh_traces.append(df_tmp)
        else:
            continue
    df_hh_traces['Spot Run No.'] = run_id
    df_hh_traces['Job No.'] = job_id
    # df_hh_traces.to_csv('HH_Simulation_Traces_{}.csv'.format(run_id))
    write_pickle_to_s3(
        df_hh_traces, bucket_nbe,
        results_EAR_hh_traces_s3_pickle_path.format(run_id, job_id))
def main_process(key_name):
    # input_path = '../input/'
    # filename = 'demand_profile_NBE.xlsx'
    # input_df = pd.read_excel(os.path.join(input_path, filename))
    # read deal position data (pre-sorted) from S3 target folder
    filename = key_name.split('/')[1]
    object_key = meter_data_file_path.format(filename)
    s3 = boto3.client('s3')
    obj = s3.get_object(Bucket=bucket_nbe, Key=object_key)
    data = obj['Body'].read()
    input_df = pd.read_excel(io.BytesIO(data))
    demand_entity_list = list(set(input_df['Distributor']))
    # process data for each distributor
    for distributor in demand_entity_list:
        distributor_df = input_df[
            input_df['Distributor'] == distributor].reset_index(
                drop=True).sort_values(by=['INTERVAL_DATE', 'INTERVAL_NUM'])
        kwargs = {
            'hist_start_date':
            distributor_df['INTERVAL_DATE'].iloc[0].date(),
            'hist_end_date':
            distributor_df['INTERVAL_DATE'].iloc[-1].date(),
            'ref_start_date':
            datetime.datetime.strptime(ref_start_date_str, "%Y-%m-%d").date(),
            'ref_end_date':
            datetime.datetime.strptime(ref_end_date_str, "%Y-%m-%d").date(),
            'name':
            distributor,
            # the naming convention of regions, i.e. VIC1
            'region':
            distributor_df['STATE'].iloc[0]
            if distributor_df['STATE'].iloc[0].endswith('1') else
            distributor_df['STATE'].iloc[0] + '1'
        }
        demand_profile = DemandProfile(kwargs)
        demand_profile.region = 'NSW1' if demand_profile.region == 'ACT1' else demand_profile.region
        demand_profile.weather_station = weather_stations[
            demand_profile.region]
        distributor_df = distributor_df.drop(columns=['STATE', 'Distributor'])

        cleaned_data = demand_profile.check_missing_30_min(distributor_df)
        populated_ref = demand_profile.populate_history()
        populated_data = demand_profile.get_df_map(cleaned_data, populated_ref)
        write_pickle_to_s3(
            populated_data, bucket_nbe,
            meter_data_pickle_path.format(demand_profile.region,
                                          demand_profile.name))
        print(demand_profile.name)
def get_output_stress_test(run_id, sim_num):
    # output by simulation
    df_all_sim = pd.DataFrame()
    for i in range(sim_num):
        if i < 900:
            df_tmp = read_pickle_from_s3(bucket_nbe,
                                         results_stress_test_summary_by_sim_s3_pickle_path.format(runid, i))
        else:
            df_tmp = read_pickle_from_s3(bucket_nbe,
                                         results_stress_test_summary_by_sim_s3_pickle_path.format(runid,
                                                                                                  900 + (i - 900) * 9))
        df_all_sim = df_all_sim.append(df_tmp)
        print(i)

    # output by percentile, normal percentile format, and save the mapping of percentile & sim_index
    percentile_list = [0, 0.05, 0.25, 0.5, 0.75, 0.95, 1]
    df_percentile = df_all_sim[['TradingRegion', 'FourWeekBlocks',
                                'Adjusted EAR Cost']].groupby(['TradingRegion',
                                                               'FourWeekBlocks']).quantile(percentile_list)
    df_percentile.reset_index(inplace=True)
    df_percentile = df_percentile.rename(columns={'level_2': 'Percentile'})
    df_percentile_lst = capture_sim_no_for_percentile_stress_test(df_all_sim, df_percentile)
    df_percentile_update = pd.DataFrame.from_records(df_percentile_lst,
                                                     columns=['TradingRegion', 'FourWeekBlocks', 'Percentile',
                                                              'Adjusted EAR Cost',
                                                              'SimNo. (based on Adjusted EAR Cost)'])
    df_percentile_update = df_percentile_update.rename(columns={'Adjusted EAR Cost': 'Adjusted Wholesale Margin',
                                                                'SimNo. (based on Adjusted EAR Cost)':
                                                                    'SimNo. (based on Adjusted Wholesale Margin)'})
    mapping_info = df_percentile_update[['TradingRegion', 'FourWeekBlocks', 'Percentile',
                                         'SimNo. (based on Adjusted Wholesale Margin)']].to_dict(orient='split')['data']
    write_pickle_to_s3(mapping_info, bucket_nbe, results_stress_test_summary_mapping_s3_pickle_path.format(run_id))
    print('mapping information saved.')

    new_output = duplicate_percentile_for_pbi_stress_test(df_percentile_lst, p_position=2)
    df_percentile_pbi = pd.DataFrame.from_records(new_output,
                                                  columns=['TradingRegion', 'FourWeeksBlocks_WeekEnding', 'Percentile',
                                                           'Adjusted Wholesale Margin',
                                                           'SimNo. (based on Adjusted Wholesale Margin)'])
    df_percentile_pbi['Spot Run No.'] = run_id
    df_percentile_pbi['Job No.'] = job_id
    # df_percentile_pbi.to_excel('NBE_StressTest_Output_by_PBI_percentiles_{}.xlsx'.format(run_id))
    df_percentile_pbi.reset_index(inplace=True)
    df_percentile_pbi = df_percentile_pbi.rename(columns={'index': 'Case'})
    df_percentile_pbi.to_csv('EAR_StressTest_Output_by_PBI_percentiles_{}_{}.csv'.format(job_id, run_id), index=False)
def stress_test_summary(run_id, sim_index):
    df_raw = read_pickle_from_s3(bucket_nbe,
                                 results_stress_test_by_sim_s3_pickle_path.format(run_id, sim_index, sim_index))
    df = df_raw[['TradingRegion', 'SettlementDateTime', 'Adjusted EAR Cost']]
    df['WeekEnding'] = df['SettlementDateTime'].apply(get_week_ending)
    week_ending_init = df['WeekEnding'].iloc[0]
    df['FourWeekBlocks'] = df['WeekEnding'].apply(get_four_week_blocks, args=(week_ending_init, ))
    df_summarized = df[['TradingRegion', 'Adjusted EAR Cost',
                        'FourWeekBlocks']].groupby(['TradingRegion', 'FourWeekBlocks']).sum()
    df_grandtotal = df[['FourWeekBlocks', 'Adjusted EAR Cost']].groupby(['FourWeekBlocks']).sum()
    df_grandtotal.reset_index(inplace=True)
    df_grandtotal.insert(0, 'TradingRegion', 'GrandTotal')
    df_summarized.reset_index(inplace=True)
    df_summarized = df_summarized.append(df_grandtotal).reset_index(drop=True)
    df_summarized['SimNo'] = sim_index
    write_pickle_to_s3(df_summarized,
                       bucket_nbe,
                       results_stress_test_summary_by_sim_s3_pickle_path.format(run_id, sim_index))
    return df_summarized
Example #7
0
def main(params):
    # parse parameters
    run_id = params['run_id']
    job_id = params['job_id']
    sim_index = params['sim_index']
    start_date = datetime.date(*params["start_date"])
    end_date = datetime.date(*params["end_date"])

    # period_days = 7
    df_pd = pd.read_parquet('s3://{}/{}'.format(bucket_nbe,
                                                period_definition_path))
    df_profile_mapping = pd.read_parquet('s3://{}/{}'.format(
        bucket_nbe, profile_mapping_path))
    profile_id = {
        row['name']: row['id']
        for _, row in df_profile_mapping.iterrows()
    }
    profile_region_id = {
        row['name']: row['region_id']
        for _, row in df_profile_mapping.iterrows()
    }

    # read spot price data
    df_sp = pd.read_parquet('s3://{}/{}'.format(
        bucket_nbe, spot_price_by_sim_parquet_path.format(run_id, sim_index)))
    df_sp['Half Hour Starting'] = df_sp['Half Hour Starting'].apply(
        lambda x: x.to_pydatetime())
    df_sp['Date'] = df_sp['Half Hour Starting'].apply(lambda x: x.date())
    for period_resolution in period_resolutions:
        period_starting = start_date
        period_ending = find_next_period_ending(start_date,
                                                by=period_resolution)  # incl.
        result = avg_price_calcs(period_resolution, period_starting,
                                 period_ending, end_date, df_sp, df_pd,
                                 profile_id, profile_region_id, sim_index)
        write_pickle_to_s3(
            result, bucket_nbe,
            results_avg_price_by_profile_by_sim_path.format(
                period_resolution, job_id, run_id, sim_index))
Example #8
0
def get_output(run_id, job_id, sim_num):
    # output by simulation - weekly/monthly/quarterly average price
    for p in ['week', 'month', 'quarter']:
        print("loading data of all simulations by {}.".format(p))
        all_sim_lst = []
        for i in range(sim_num):
            if i < 900:
                tmp_lst = \
                    read_pickle_from_s3(bucket_nbe,
                                        results_avg_price_by_profile_by_sim_path.format(p, job_id, run_id, i))
            else:
                tmp_lst = \
                    read_pickle_from_s3(bucket_nbe,
                                        results_avg_price_by_profile_by_sim_path.format(p, job_id, run_id,
                                                                                        900 + (i - 900) * 9))
            all_sim_lst += tmp_lst
            print(i)
        col_period = 'WeekEnding' if p == 'week' else 'PeriodEnding'
        df_output = pd.DataFrame(all_sim_lst, columns=[col_period, 'TradingRegion', 'SimNo', 'Profile',
                                                       'Average Spot Price', 'Average Cap Payouts', 'NoOfHour',
                                                       'ProfileID', 'RegionID'])
        df_output['Profile'] = df_output['Profile'].apply(remove_profile_region_suffix)
        df_output['Spot Run No.'] = df_output.apply(lambda _: run_id, axis=1)
        df_output['Job No.'] = df_output.apply(lambda _: job_id, axis=1)
        csv_buffer = StringIO()
        df_output.to_csv(csv_buffer, index=False)
        s3_resource = boto3.resource('s3')
        s3_resource.Object(bucket_nbe,
                           results_avg_price_by_profile_by_sim_csv_path.format(job_id,
                                                                               run_id,
                                                                               p,
                                                                               job_id,
                                                                               run_id)).put(Body=csv_buffer.getvalue())

    # output by simulation - weekly
    print("loading data of all simulations by week.")
    df_all_sim = pd.DataFrame()
    for i in range(sim_num):
        if i < 900:
            df_tmp = \
                read_pickle_from_s3(bucket_nbe,
                                    results_EAR_week_summary_by_simulation_s3_pickle_path.format(job_id, run_id, i))
        else:
            df_tmp = \
                read_pickle_from_s3(bucket_nbe,
                                    results_EAR_week_summary_by_simulation_s3_pickle_path.format(job_id,
                                                                                                 run_id,
                                                                                                 900 + (i - 900) * 9))
        df_all_sim = df_all_sim.append(df_tmp)
        print(i)
    df_all_sim['Spot Run No.'] = run_id
    df_all_sim['Job No.'] = job_id
    # EAR_Output_by_simulations_by_week.csv
    df_all_sim_week_complete = df_all_sim
    df_all_sim_week_complete.reset_index(inplace=True)
    df_all_sim_week_complete = df_all_sim_week_complete.rename(columns={'index': 'Case'})
    csv_buffer = StringIO()
    df_all_sim_week_complete.to_csv(csv_buffer, index=False)
    s3_resource = boto3.resource('s3')
    s3_resource.Object(bucket_nbe,
                       results_by_sim_by_week_complete.format(job_id,
                                                              run_id,
                                                              job_id,
                                                              run_id)).put(Body=csv_buffer.getvalue())
    # EAR_Output_by_sim_by_week_for_CFD_impact.csv
    df_all_sim_week = df_all_sim[
        ['TradingRegion', 'WeekEnding', 'Total Cost (Incl Cap)', 'Transfer Cost', 'Wholesale Margin', 'SimNo',
         'Spot Run No.', 'Job No.']]
    df_all_sim_week.reset_index(inplace=True)
    df_all_sim_week = df_all_sim_week.rename(columns={'index': 'Case'})
    csv_buffer = StringIO()
    df_all_sim_week.to_csv(csv_buffer, index=False)
    s3_resource = boto3.resource('s3')
    s3_resource.Object(bucket_nbe,
                       results_by_sim_by_week.format(job_id, run_id, job_id, run_id)).put(Body=csv_buffer.getvalue())

    # output by simulation - monthly
    print("loading data of all simulations by month.")
    df_all_sim_month = pd.DataFrame()
    for i in range(sim_num):
        if i < 900:
            df_tmp = read_pickle_from_s3(bucket_nbe,
                                         results_EAR_mth_summary_by_simulation_s3_pickle_path.format(job_id, run_id, i))
        else:
            df_tmp = read_pickle_from_s3(bucket_nbe,
                                         results_EAR_mth_summary_by_simulation_s3_pickle_path.format(job_id,
                                                                                                     run_id,
                                                                                                     900+(i - 900) * 9))
        df_all_sim_month = df_all_sim_month.append(df_tmp)
        print(i)
    df_all_sim_month['Spot Run No.'] = run_id
    df_all_sim_month['Job No.'] = job_id
    # EAR_Output_by_simulations_by_month.csv
    df_all_sim_month_complete = df_all_sim_month
    df_all_sim_month_complete.reset_index(inplace=True)
    df_all_sim_month_complete = df_all_sim_month_complete.rename(columns={'index': 'Case'})
    csv_buffer = StringIO()
    df_all_sim_month_complete.to_csv(csv_buffer, index=False)
    s3_resource = boto3.resource('s3')
    s3_resource.Object(bucket_nbe,
                       results_by_sim_by_month_complete.format(job_id,
                                                               run_id,
                                                               job_id,
                                                               run_id)).put(Body=csv_buffer.getvalue())
    # EAR_Output_by_sim_by_month_for_CFD_impact.csv
    df_all_sim_month.reset_index(inplace=True)
    df_all_sim_month = df_all_sim_month.rename(columns={'index': 'Case'})
    df_all_sim_month = df_all_sim_month[
        ['Case', 'TradingRegion', 'MonthEnding', 'Total Cost (Incl Cap)', 'Transfer Cost', 'Wholesale Margin',
         'SimNo', 'Spot Run No.', 'Job No.']]
    csv_buffer = StringIO()
    df_all_sim_month.to_csv(csv_buffer, index=False)
    s3_resource = boto3.resource('s3')
    s3_resource.Object(bucket_nbe,
                       results_by_sim_by_month.format(job_id, run_id, job_id, run_id)).put(Body=csv_buffer.getvalue())

    # output by simulation - quarterly
    print("loading data of all simulations by quarter.")
    df_all_sim_qtr = pd.DataFrame()
    for i in range(sim_num):
        if i < 900:
            df_tmp = read_pickle_from_s3(bucket_nbe,
                                         results_EAR_qtr_summary_by_simulation_s3_pickle_path.format(job_id, run_id, i))
        else:
            df_tmp = read_pickle_from_s3(bucket_nbe,
                                         results_EAR_qtr_summary_by_simulation_s3_pickle_path.format(job_id,
                                                                                                     run_id,
                                                                                                     900+(i - 900) * 9))
        df_all_sim_qtr = df_all_sim_qtr.append(df_tmp)
        print(i)
    df_all_sim_qtr['Spot Run No.'] = run_id
    df_all_sim_qtr['Job No.'] = job_id
    # EAR_Output_by_simulations_by_quarter.csv
    df_all_sim_quarter_complete = df_all_sim_qtr
    df_all_sim_quarter_complete.reset_index(inplace=True)
    df_all_sim_quarter_complete = df_all_sim_quarter_complete.rename(columns={'index': 'Case'})
    csv_buffer = StringIO()
    df_all_sim_quarter_complete.to_csv(csv_buffer, index=False)
    s3_resource = boto3.resource('s3')
    s3_resource.Object(bucket_nbe,
                       results_by_sim_by_quarter_complete.format(job_id,
                                                                 run_id,
                                                                 job_id,
                                                                 run_id)).put(Body=csv_buffer.getvalue())
    # EAR_Output_by_sim_by_quarter_for_CFD_impact.csv
    df_all_sim_qtr.reset_index(inplace=True)
    df_all_sim_qtr = df_all_sim_qtr.rename(columns={'index': 'Case'})
    df_all_sim_qtr = df_all_sim_qtr[
        ['Case', 'TradingRegion', 'QuarterEnding', 'Total Cost (Incl Cap)', 'Transfer Cost', 'Wholesale Margin',
         'SimNo', 'Spot Run No.', 'Job No.']]
    csv_buffer = StringIO()
    df_all_sim_qtr.to_csv(csv_buffer, index=False)
    s3_resource = boto3.resource('s3')
    s3_resource.Object(bucket_nbe,
                       results_by_sim_by_quarter.format(job_id, run_id, job_id, run_id)).put(Body=csv_buffer.getvalue())

    # save the mapping of percentile & sim_index of all percentiles
    percentile_list = [0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.25, 0.5, 0.75, 0.95, 1]
    df_percentile = df_all_sim[['TradingRegion', 'WeekEnding', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)',
                                'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'Total Cost (excl GST)',
                                'Cap Premium Cost', 'Total Cost (Incl Cap)', 'Transfer Cost',
                                'Wholesale Margin']].groupby(['TradingRegion', 'WeekEnding']).quantile(percentile_list)
    df_percentile.reset_index(inplace=True)
    df_percentile = df_percentile.rename(columns={'level_2': 'Percentile'})
    df_percentile_lst = capture_sim_no_for_percentile(df_all_sim, df_percentile)
    df_percentile_update = pd.DataFrame.from_records(df_percentile_lst,
                                                     columns=['TradingRegion', 'WeekEnding', 'Percentile',
                                                              'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)',
                                                              'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd',
                                                              'Total Cost (excl GST)',
                                                              'Cap Premium Cost', 'Total Cost (Incl Cap)',
                                                              'Transfer Cost', 'Wholesale Margin',
                                                              'SimNo. (based on Wholesale Margin)'])
    mapping_info = df_percentile_update[['TradingRegion', 'WeekEnding',
                                         'Percentile', 'SimNo. (based on Wholesale Margin)']].to_dict(orient='split')[
        'data']
    write_pickle_to_s3(mapping_info, bucket_nbe, results_EAR_summary_mapping_s3_pickle_path.format(job_id, run_id))
    print('mapping information saved.')

    # EAR_Output_by_normal_percentiles.csv
    percentile_list = [0, 0.05, 0.25, 0.5, 0.75, 0.95, 1]
    df_percentile = df_all_sim[['TradingRegion', 'WeekEnding', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)',
                                'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'Total Cost (excl GST)',
                                'Cap Premium Cost', 'Total Cost (Incl Cap)',
                                'Transfer Cost', 'Wholesale Margin']].groupby(['TradingRegion', 'WeekEnding']).quantile(
        percentile_list)
    df_percentile.reset_index(inplace=True)
    df_percentile = df_percentile.rename(columns={'level_2': 'Percentile'})
    df_percentile_lst = capture_sim_no_for_percentile(df_all_sim, df_percentile)
    df_percentile_update = pd.DataFrame.from_records(df_percentile_lst,
                                                     columns=['TradingRegion', 'WeekEnding', 'Percentile',
                                                              'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)',
                                                              'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd',
                                                              'Total Cost (excl GST)',
                                                              'Cap Premium Cost', 'Total Cost (Incl Cap)',
                                                              'Transfer Cost', 'Wholesale Margin',
                                                              'SimNo. (based on Wholesale Margin)'])
    df_percentile_update['Spot Run No.'] = run_id
    df_percentile_update['Job No.'] = job_id
    df_percentile_update.reset_index(inplace=True)
    df_percentile_update = df_percentile_update.rename(columns={'index': 'Case'})
    # df_percentile_update['Year'] = df_percentile_update['WeekEnding'].apply(lambda x: x.year)
    # df_percentile_update['Month'] = df_percentile_update['WeekEnding'].apply(lambda x: x.month)
    # df_percentile_update.to_excel('NBE_EAR_Output_by_normal_percentiles_{}_{}.xlsx'.format(run_id, job_id))
    # df_percentile_update.to_csv(results_EAR_normal_percentiles.format(run_id, job_id))
    csv_buffer = StringIO()
    df_percentile_update.to_csv(csv_buffer, index=False)
    s3_resource = boto3.resource('s3')
    s3_resource.Object(bucket_nbe,
                       results_EAR_normal_percentiles.format(job_id, run_id, job_id,
                                                             run_id)).put(Body=csv_buffer.getvalue())

    # output by risk percentiles for PBI
    '''
def get_output(run_id, job_id, sim_num):
    print("loading data of all simulations.")
    df_all_sim = pd.DataFrame()
    for i in range(sim_num):
        if i < 900:
            df_tmp = read_pickle_from_s3(
                bucket_nbe,
                results_EAR_summary_by_simulation_s3_pickle_path.format(
                    run_id, job_id, i))
        else:
            df_tmp = read_pickle_from_s3(
                bucket_nbe,
                results_EAR_summary_by_simulation_s3_pickle_path.format(
                    run_id, job_id, 900 + (i - 900) * 9))
        df_all_sim = df_all_sim.append(df_tmp)
        print(i)
    # # output by simulation
    # df_all_sim['Spot Run No.'] = run_id
    # df_all_sim.to_excel('NBE_EAR_Output_by_simulations_{}.xlsx'.format(run_id))

    # save the mapping of percentile & sim_index of all percentiles
    percentile_list = [
        0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.25, 0.5, 0.75, 0.95, 1
    ]
    df_percentile = df_all_sim[[
        'TradingRegion', 'WeekEnding', 'Swap Hedged Qty (MWh)',
        'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd',
        'Cap Cfd', 'EAR Cost', 'Cap Premium Cost', 'Total Cost ($)'
    ]].groupby(['TradingRegion', 'WeekEnding']).quantile(percentile_list)
    df_percentile.reset_index(inplace=True)
    df_percentile = df_percentile.rename(columns={'level_2': 'Percentile'})
    df_percentile_lst = capture_sim_no_for_percentile(df_all_sim,
                                                      df_percentile)
    df_percentile_update = pd.DataFrame.from_records(
        df_percentile_lst,
        columns=[
            'TradingRegion', 'WeekEnding', 'Percentile',
            'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)',
            'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'EAR Cost',
            'Cap Premium Cost', 'Total Cost ($)',
            'SimNo. (based on Total Cost)'
        ])
    mapping_info = df_percentile_update[[
        'TradingRegion', 'WeekEnding', 'Percentile',
        'SimNo. (based on Total Cost)'
    ]].to_dict(orient='split')['data']
    write_pickle_to_s3(
        mapping_info, bucket_nbe,
        results_EAR_summary_mapping_s3_pickle_path.format(run_id, job_id))
    print('mapping information saved.')

    # output by normal percentiles
    percentile_list = [0, 0.05, 0.25, 0.5, 0.75, 0.95, 1]
    df_percentile = df_all_sim[[
        'TradingRegion', 'WeekEnding', 'Swap Hedged Qty (MWh)',
        'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd',
        'Cap Cfd', 'EAR Cost', 'Cap Premium Cost', 'Total Cost ($)'
    ]].groupby(['TradingRegion', 'WeekEnding']).quantile(percentile_list)
    df_percentile.reset_index(inplace=True)
    df_percentile = df_percentile.rename(columns={'level_2': 'Percentile'})
    df_percentile_lst = capture_sim_no_for_percentile(df_all_sim,
                                                      df_percentile)
    df_percentile_update = pd.DataFrame.from_records(
        df_percentile_lst,
        columns=[
            'TradingRegion', 'WeekEnding', 'Percentile',
            'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)',
            'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'EAR Cost',
            'Cap Premium Cost', 'Total Cost ($)',
            'SimNo. (based on Total Cost)'
        ])
    df_percentile_update['Spot Run No.'] = run_id
    df_percentile_update['Job No.'] = job_id
    df_percentile_update['Year'] = df_percentile_update['WeekEnding'].apply(
        lambda x: x.year)
    df_percentile_update['Month'] = df_percentile_update['WeekEnding'].apply(
        lambda x: x.month)
    # df_percentile_update.to_excel('NBE_EAR_Output_by_normal_percentiles_{}_{}.xlsx'.format(run_id, job_id))
    # df_percentile_update.to_csv(results_EAR_normal_percentiles.format(run_id, job_id))
    csv_buffer = StringIO()
    df_percentile_update.to_csv(csv_buffer)
    s3_resource = boto3.resource('s3')
    s3_resource.Object(
        bucket_nbe,
        results_EAR_normal_percentiles.format(
            run_id, job_id, run_id, job_id)).put(Body=csv_buffer.getvalue())

    # output by percentile for PBI
    percentile_list_risk = [0, 0.01, 0.02, 0.03, 0.05]
    df_percentile_risk = df_all_sim[[
        'TradingRegion', 'WeekEnding', 'Swap Hedged Qty (MWh)',
        'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd',
        'Cap Cfd', 'EAR Cost', 'Cap Premium Cost', 'Total Cost ($)'
    ]].groupby(['TradingRegion', 'WeekEnding']).quantile(percentile_list_risk)
    df_percentile_risk.reset_index(inplace=True)
    df_percentile_risk = df_percentile_risk.rename(
        columns={'level_2': 'Percentile'})
    df_percentile_risk_lst = capture_sim_no_for_percentile(
        df_all_sim, df_percentile_risk)
    new_output = duplicate_percentile_for_pbi(df_percentile_risk_lst,
                                              p_position=2)
    df_percentile_pbi = pd.DataFrame.from_records(
        new_output,
        columns=[
            'TradingRegion', 'WeekEnding', 'Percentile',
            'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)',
            'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'EAR Cost',
            'Cap Premium Cost', 'Total Cost ($)',
            'SimNo. (based on Total Cost)'
        ])
    df_percentile_pbi['Spot Run No.'] = run_id
    df_percentile_pbi['Job No.'] = job_id
    df_percentile_pbi['Year'] = df_percentile_pbi['WeekEnding'].apply(
        lambda x: x.year)
    df_percentile_pbi['Month'] = df_percentile_pbi['WeekEnding'].apply(
        lambda x: x.month)
    # df_percentile_pbi.to_excel('NBE_EAR_Output_by_PBI_percentiles_{}_{}.xlsx'.format(run_id, job_id))
    # df_percentile_pbi.to_csv(results_EAR_PBI_percentiles.format(run_id, job_id))
    csv_buffer = StringIO()
    df_percentile_pbi.to_csv(csv_buffer)
    s3_resource = boto3.resource('s3')
    s3_resource.Object(
        bucket_nbe,
        results_EAR_PBI_percentiles.format(
            run_id, job_id, run_id, job_id)).put(Body=csv_buffer.getvalue())
def load_calculate_summarize(run_id, job_id, date_input, sim_index, start_year,
                             start_month, start_day, end_year, end_month,
                             end_day):
    """

    :param run_id:
    :param job_id:
    :param date_input:
    :param sim_index:
    :param start_year:
    :param start_month:
    :param start_day:
    :param end_year:
    :param end_month:
    :param end_day:
    :return:
    """
    start_time = time.time()
    print('Job {} using Spot Run {} starting... SimNo: {}'.format(
        job_id, run_id, sim_index))
    start_date = date(start_year, start_month, start_day)
    end_date = date(end_year, end_month, end_day)  # excl.

    # read deal position data
    df_all = read_pickle_from_s3(
        bucket_nbe, deal_capture_converted_path.format(job_id, date_input))
    states = sorted(list(set(
        df_all['TradingRegion'])))  # states = ['NSW1', 'QLD1', 'SA1', 'VIC1']

    # read simulated spot price data
    df_sp = pd.read_parquet(
        f"s3://{bucket_nbe}/{spot_price_by_sim_parquet_path.format(run_id, sim_index)}"
    )
    print('Spot price from {} SimNo. {} loaded.'.format(run_id, sim_index))
    df_sp['Date'] = df_sp['Half Hour Starting'].apply(lambda x: x.date())
    df_sp = df_sp[(start_date <= df_sp['Date']) & (df_sp['Date'] < end_date)]
    if len(pd.concat([df_sp[state] for state in states])) != len(df_all):
        raise ValueError(
            f"load data size: {len(pd.concat([df_sp[state] for state in states]))}, expected:{len(df_all)}"
        )

    # sort by region to make sure spot price data and customer data are consistent
    df_all = df_all.sort_values(
        by=['TradingRegion', 'SettlementDateTime']).reset_index(drop=True)
    df_all['Spot Price'] = pd.concat([df_sp[state] for state in states],
                                     ignore_index=True)

    # read simulated customer load data
    load_data = {}
    for state in states:
        state_load_all = read_pickle_from_s3(
            bucket_nbe,
            meter_data_simulation_s3_pickle_path.format(
                run_id, sim_index // 9, 'NBE_{}'.format(state[:-1])))
        load_data[state] = pd.concat([
            state_load_all[key]['GRID_USAGE']
            for key in state_load_all if start_date <= key < end_date
        ],
                                     ignore_index=True)
    print('Customer data from {} SimNo. {} loaded.'.format(run_id, sim_index))
    if len(pd.concat([load_data[state] for state in states])) != len(df_all):
        raise ValueError(
            f"load data size: {len(pd.concat([load_data[state] for state in states]))}, expected:{len(df_all)}"
        )
    df_all['Customer Net MWh'] = pd.concat(
        [load_data[state] for state in states], ignore_index=True)
    df_all['Customer Net MWh'] = df_all['Customer Net MWh'].apply(lambda x: -x)

    # calculate earning at risk and output
    df = calculate_earning_at_risk(df_all)
    df_output = df[[
        'TradingRegion', 'SettlementDateTime', 'Swap Premium',
        'Swap Hedged Qty (MWh)', 'Swap Weighted Strike Price', 'Cap Premium',
        'Cap Hedged Qty (MWh)', 'Cap Weighted Strike Price', 'Spot Price',
        'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'EAR Cost',
        'Cap Premium Cost', 'Total Cost ($)'
    ]]
    print('Calculation finished. Uploading... {} SimNo. {}'.format(
        run_id, sim_index))
    # to keep a copy of half hour resolution raw data.
    write_pickle_to_s3(
        df_output, bucket_nbe,
        results_EAR_simulation_s3_pickle_path.format(run_id, job_id,
                                                     sim_index))

    # weekly summary
    print('Summarising the data into weekly resolution... {} SimNo. {}'.format(
        run_id, sim_index))
    df_output['WeekEnding'] = df_output['SettlementDateTime'].apply(
        get_week_ending)  # get the week ending date
    # sum by region by week ending
    df_summarized = df_output[[
        'TradingRegion', 'WeekEnding', 'Swap Hedged Qty (MWh)',
        'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd',
        'Cap Cfd', 'EAR Cost', 'Cap Premium Cost', 'Total Cost ($)'
    ]].groupby(['TradingRegion', 'WeekEnding']).sum()
    # all regions' sum by week ending
    df_grandtotal = df_output[[
        'WeekEnding', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)',
        'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'EAR Cost',
        'Cap Premium Cost', 'Total Cost ($)'
    ]].groupby(['WeekEnding']).sum()
    df_grandtotal.reset_index(inplace=True)
    df_grandtotal.insert(0, 'TradingRegion', 'GrandTotal')
    df_summarized.reset_index(inplace=True)
    df_summarized = df_summarized.append(df_grandtotal).reset_index(drop=True)
    df_summarized['SimNo'] = sim_index
    print('Uploading weekly summary... {} SimNo. {}'.format(run_id, sim_index))
    write_pickle_to_s3(
        df_summarized, bucket_nbe,
        results_EAR_summary_by_simulation_s3_pickle_path.format(
            run_id, job_id, sim_index))
    end_time = time.time()
    print("Processing time {} SimNo. {} : {} seconds.".format(
        run_id, sim_index, end_time - start_time))
def transform_format(job_id, date_input, filename, sheet_name, start_year,
                     start_month, start_day, end_year, end_month, end_day):
    """

    :param job_id:
    :param date_input:
    :param filename:
    :param sheet_name:
    :param start_year:
    :param start_month:
    :param start_day:
    :param end_year:
    :param end_month:
    :param end_day:
    :return:
    """
    start = time.time()
    # These are the user defined start datetime and end datetime of the analysis
    start_datetime = datetime.datetime(start_year, start_month, start_day, 0,
                                       30)
    end_datetime = datetime.datetime(end_year, end_month, end_day, 0,
                                     0)  # incl.
    start_date = (start_datetime - datetime.timedelta(minutes=30)).date()
    end_date = (end_datetime - datetime.timedelta(minutes=30)).date()

    # read deal position data (pre-sorted) from S3 target folder
    object_key = deal_capture_input_path.format(filename)
    s3 = boto3.client('s3')
    obj = s3.get_object(Bucket=bucket_nbe, Key=object_key)
    data = obj['Body'].read()
    df_all = pd.read_csv(io.BytesIO(data))
    df_all['SettlementDate'] = \
        df_all['SettlementDate'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d").date())
    df_all['SettlementDateTime'] = \
        df_all['SettlementDateTime'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d %H:%M"))
    df_output = pd.DataFrame()
    regions = list(set(df_all['TradingRegion']))
    for region in regions:
        print(region)
        df_1_region = df_all[df_all['TradingRegion'] == region]
        # select the interested period for analysis
        df_1_region = df_1_region[
            (df_1_region['SettlementDate'] >= start_date)
            & (df_1_region['SettlementDate'] <= end_date)].reset_index(
                drop=True)
        df_transformed = df_1_region.pivot_table([
            'Premium', 'Hedged Qty (MWh)', 'Weighted Strike Price',
            'Notional Quantity MW', 'Weighted Multiplier'
        ], ['TradingRegion', 'SettlementDate', 'SettlementDateTime'],
                                                 'Type').reset_index()
        df_transformed.columns = [
            'TradingRegion', 'SettlementDate', 'SettlementDateTime',
            'Cap Hedged Qty (MWh)', 'Swap Hedged Qty (MWh)',
            'Cap Notional Quantity MW', 'Swap Notional Quantity MW',
            'Cap Premium', 'Swap Premium', 'Cap Weighted Multiplier',
            'Swap Weighted Multiplier', 'Cap Weighted Strike Price',
            'Swap Weighted Strike Price'
        ]
        df_transformed = df_transformed.set_index('SettlementDateTime')
        df_transformed = df_transformed.reindex(
            pd.date_range(start_datetime, end_datetime, freq='30T'))
        df_transformed = df_transformed.reset_index().rename(
            columns={'index': 'SettlementDateTime'})
        df_transformed['TradingRegion'] = df_transformed[
            'TradingRegion'].fillna(method='ffill')
        df_transformed['SettlementDate'] = df_transformed[
            'SettlementDateTime'].apply(
                lambda x: (x - datetime.timedelta(minutes=30)).date())
        df_transformed = df_transformed.fillna(0)
        df_transformed = df_transformed[[
            'TradingRegion', 'SettlementDate', 'SettlementDateTime',
            'Swap Premium', 'Swap Hedged Qty (MWh)',
            'Swap Weighted Strike Price', 'Swap Notional Quantity MW',
            'Swap Weighted Multiplier', 'Cap Premium', 'Cap Hedged Qty (MWh)',
            'Cap Weighted Strike Price', 'Cap Notional Quantity MW',
            'Cap Weighted Multiplier'
        ]]
        df_output = df_output.append(df_transformed)
    df_output = df_output.reset_index(drop=True)
    end = time.time()
    print('{} seconds.'.format(end - start))
    # df_output.to_excel('output/Deal Capture test3.xlsx', index=False)
    write_pickle_to_s3(df_output, bucket_nbe,
                       deal_capture_converted_path.format(job_id, date_input))