예제 #1
0
def olcaschema_genmix(database, subregion):
    generation_mix_dict = {}

    region = list(pd.unique(database['Subregion']))

    for reg in region:

        database_reg = database[database['Subregion'] == reg]
        exchanges_list = []

        # Creating the reference output
        exchange(exchange_table_creation_ref(database_reg), exchanges_list)

        for index, row in fuel_name.iterrows():
            # Reading complete fuel name and heat content information
            fuelname = row['Fuelname']
            # croppping the database according to the current fuel being considered
            database_f1 = database_reg[database_reg['FuelCategory'] ==
                                       row['FuelList']]
            if database_f1.empty != True:
                ra = exchange_table_creation_input_genmix(
                    database_f1, fuelname)
                exchange(ra, exchanges_list)
                # Writing final file
        final = process_table_creation_genmix(reg, exchanges_list)

        # print(reg +' Process Created')
        generation_mix_dict[reg] = final
    return generation_mix_dict
예제 #2
0
def create_generation_mix_process_df_from_model_generation_data(
        generation_data, subregion):

    # Converting to numeric for better stability and merging
    generation_data['FacilityID'] = generation_data['FacilityID'].astype(str)

    database_for_genmix_final = pd.merge(generation_data,
                                         egrid_facilities_w_fuel_region,
                                         on='FacilityID')

    if subregion == 'all':
        regions = egrid_subregions
    elif subregion == 'NERC':
        regions = list(pd.unique(database_for_genmix_final['NERC']))
    elif subregion == 'BA':
        regions = list(
            pd.unique(database_for_genmix_final['Balancing Authority Name']))
    else:
        regions = [subregion]

    result_database = pd.DataFrame()

    for reg in regions:

        if subregion == 'all':
            database = database_for_genmix_final[
                database_for_genmix_final['Subregion'] == reg]
        elif subregion == 'NERC':
            database = database_for_genmix_final[
                database_for_genmix_final['NERC'] == reg]
        elif subregion == 'BA':
            database = database_for_genmix_final[
                database_for_genmix_final['Balancing Authority Name'] == reg]

            # This makes sure that the dictionary writer works fine because it only works with the subregion column. So we are sending the
        # correct regions in the subregion column rather than egrid subregions if rquired.
        # This makes it easy for the process_dictionary_writer to be much simpler.
        if subregion == 'all':
            database['Subregion'] = database['Subregion']
        elif subregion == 'NERC':
            database['Subregion'] = database['NERC']
        elif subregion == 'BA':
            database['Subregion'] = database['Balancing Authority Name']

        total_gen_reg = np.sum(database['Electricity'])
        for index, row in fuel_name.iterrows():
            # Reading complete fuel name and heat content information
            fuelname = row['FuelList']
            fuelheat = float(row['Heatcontent'])
            # croppping the database according to the current fuel being considered
            database_f1 = database[database['FuelCategory'] == row['FuelList']]
            if database_f1.empty == True:
                database_f1 = database[database['PrimaryFuel'] ==
                                       row['FuelList']]
            if database_f1.empty != True:
                if use_primaryfuel_for_coal:
                    database_f1['FuelCategory'].loc[
                        database_f1['FuelCategory'] ==
                        'COAL'] = database_f1['PrimaryFuel']
                database_f2 = database_f1.groupby(
                    by=['Subregion', 'FuelCategory'])['Electricity'].sum()
                database_f2 = database_f2.reset_index()
                generation = np.sum(database_f2['Electricity'])
                database_f2['Generation_Ratio'] = generation / total_gen_reg
                frames = [result_database, database_f2]
                result_database = pd.concat(frames)

    return result_database
예제 #3
0
def create_generation_process_df(generation_data,
                                 emissions_data,
                                 subregion=None):

    if subregion is None:
        subregion = regional_aggregation

    final_database = combine_gen_emissions_data(generation_data,
                                                emissions_data, subregion)

    if subregion:
        try:
            regions = final_database[subregion].unique()
        except KeyError:
            print(
                f"Configuration file specifes region column as {subregion}, but it does not exist"
            )
            if subregion == 'eGRID':
                regions = egrid_subregions
            elif subregion == 'NERC':
                regions = list(pd.unique(final_data['NERC']))
            elif subregion == 'BA':
                regions = list(
                    pd.unique(final_data['Balancing Authority Name']))
            else:
                regions = [subregion]
    elif subregion == 'eGRID':
        regions = egrid_subregions
    elif subregion == 'NERC':
        regions = list(pd.unique(final_database['NERC']))
    elif subregion == 'BA':
        regions = list(pd.unique(final_database['Balancing Authority Name']))
    else:
        regions = [subregion]

    # Create dfs for storing the output
    result_database = pd.DataFrame()
    total_gen_database = pd.DataFrame()
    # Looping through different subregions to create the files

    # Columns to keep in datbase_f2
    database_f2_cols = [
        'Subregion', 'FuelCategory', 'PrimaryFuel', 'eGRID_ID', 'Electricity',
        'FlowName', 'FlowAmount', 'FlowUUID', 'Compartment', 'Year', 'Source',
        'ReliabilityScore', 'Unit', 'NERC',
        'PercentGenerationfromDesignatedFuelCategory',
        'Balancing Authority Name', 'ElementaryFlowPrimeContext',
        'Balancing Authority Code', 'Ref_Electricity_Subregion_FuelCategory'
    ]
    df_list = []
    for reg in regions:

        print("Creating generation process database for " + reg + " ...")
        # Cropping out based on regions
        if subregion == 'eGRID':
            database = final_database[final_database['Subregion'] == reg]
        elif subregion == 'NERC':
            database = final_database[final_database['NERC'] == reg]
        elif subregion == 'BA':
            database = final_database[
                final_database['Balancing Authority Name'] == reg]
        elif subregion == 'US':
            # For entire US use full database
            database = final_database
        else:
            # This should be a egrid subregion
            database = final_database[final_database['Subregion'] == reg]

        # Initialize values
        database['TechnologicalCorrelation'] = 5
        database['TemporalCorrelation'] = 5
        database['DataCollection'] = 5
        for index, row in fuel_name.iterrows():
            # Reading complete fuel name and heat content information
            fuelname = row['FuelList']
            fuelheat = float(row['Heatcontent'])
            # croppping the database according to the current fuel being considered
            database_f1 = database[database['FuelCategory'] == fuelname]

            if database_f1.empty == True:
                database_f1 = database[database['PrimaryFuel'] == fuelname]
            if database_f1.empty != True:

                database_f1 = database_f1.sort_values(by='Source',
                                                      ascending=False)
                exchange_list = list(pd.unique(database_f1['FlowName']))
                if use_primaryfuel_for_coal:
                    database_f1['FuelCategory'].loc[
                        database_f1['FuelCategory'] ==
                        'COAL'] = database_f1['PrimaryFuel']
                for exchange in exchange_list:
                    database_f2 = database_f1[database_f1['FlowName'] ==
                                              exchange]
                    database_f2 = database_f2[database_f2_cols]

                    compartment_list = list(
                        pd.unique(database_f2['Compartment']))
                    for compartment in compartment_list:
                        database_f3 = database_f2[database_f2['Compartment'] ==
                                                  compartment]

                        database_f3 = database_f3.drop_duplicates(subset=[
                            'Subregion', 'FuelCategory', 'PrimaryFuel',
                            'eGRID_ID', 'Electricity', 'FlowName',
                            'Compartment', 'Year', 'Unit'
                        ])
                        sources = list(pd.unique(database_f3['Source']))
                        # if len(sources) >1:
                        #    print('Error occured. Duplicate emissions from Different source. Writing an error file error.csv')
                        #    database_f3.to_csv(output_dir+'error'+reg+fuelname+exchange+'.csv')

                        # Get electricity relevant for this exchange for the denominator in the emissions factors calcs
                        electricity_source_by_facility_for_region_fuel = database_f1[
                            ['eGRID_ID', 'Electricity',
                             'Source']].drop_duplicates()
                        total_gen, mean, total_facility_considered = total_generation_calculator(
                            sources,
                            electricity_source_by_facility_for_region_fuel)

                        # Add data quality scores

                        database_f3 = add_flow_representativeness_data_quality_scores(
                            database_f3, total_gen)
                        # Can now drop this
                        # database_f3 = database_f3.drop(columns='Ref_Electricity_Subregion_FuelCategory')

                        # Add scores for regions to
                        sources_str = join_with_underscore(sources)
                        exchange_total_gen = pd.DataFrame(
                            [[
                                reg, fuelname, exchange, compartment,
                                sources_str, total_gen
                            ]],
                            columns=[
                                'Subregion', 'FuelCategory', 'FlowName',
                                'Compartment', 'Source', 'Total Generation'
                            ])
                        total_gen_database = total_gen_database.append(
                            exchange_total_gen, ignore_index=True)

                        if exchange == 'Heat' and str(fuelheat) != 'nan':
                            # Getting Emisssion_factor
                            database_f3['Emission_factor'] = compilation(
                                database_f3[['Electricity', 'FlowAmount']],
                                total_gen) / fuelheat
                            database_f3['Unit'] = 'kg'

                        else:
                            database_f3['Emission_factor'] = compilation(
                                database_f3[['Electricity', 'FlowAmount']],
                                total_gen)

                        # Data Quality Scores
                        database_f3['GeographicalCorrelation'] = 1
                        #If flow amount sum = 0, then do not average
                        if sum(database_f3['FlowAmount']) != 0:

                            database_f3['Reliability_Score'] = np.average(
                                database_f3['ReliabilityScore'],
                                weights=database_f3['FlowAmount'])
                            database_f3['TemporalCorrelation'] = np.average(
                                database_f3['TemporalCorrelation'],
                                weights=database_f3['FlowAmount'])

                            database_f3[
                                'TechnologicalCorrelation'] = np.average(
                                    database_f3['TechnologicalCorrelation'],
                                    weights=database_f3['FlowAmount'])
                            database_f3['DataCollection'] = np.average(
                                database_f3['DataCollection'],
                                weights=database_f3['FlowAmount'])

                        # Uncertainty Calcs
                        uncertainty_info = uncertainty_creation(
                            database_f3[['Electricity',
                                         'FlowAmount']], exchange, fuelheat,
                            mean, total_gen, total_facility_considered)

                        database_f3['GeomMean'] = uncertainty_info['geomMean']
                        database_f3['GeomSD'] = uncertainty_info['geomSd']
                        database_f3['Maximum'] = uncertainty_info['maximum']
                        database_f3['Minimum'] = uncertainty_info['minimum']

                        database_f3['Source'] = sources_str

                        # Optionally write out electricity
                        # database_f3['Electricity'] = total_gen

                        # frames = [result_database, database_f3]
                        # result_database = pd.concat(frames)
                        df_list.append(database_f3)

    result_database = pd.concat(df_list)

    # drop_cols = [
    #     'eGRID_ID', 'FlowAmount', 'Electricity', 'ReliabilityScore',
    #     'PrimaryFuel', 'PercentGenerationfromDesignatedFuelCategory',
    #     'Percent_of_Gen_in_EF_Denominator', 'Age'
    # ]

    # if subregion == 'all':
    #     result_database = result_database.drop(
    #         columns=['eGRID_ID', 'FlowAmount', 'Electricity', 'ReliabilityScore', 'PrimaryFuel', 'NERC',
    #                  'Balancing Authority Name', 'Balancing Authority Code'])
    # elif subregion == 'NERC':
    #     result_database = result_database.drop(
    #         columns=['eGRID_ID', 'FlowAmount', 'Electricity', 'ReliabilityScore', 'PrimaryFuel',
    #                  'Balancing Authority Name', 'Balancing Authority Code', 'Subregion'])
    # elif subregion == 'BA':
    #     result_database = result_database.drop(
    #         columns=['eGRID_ID', 'FlowAmount', 'Electricity', 'ReliabilityScore', 'PrimaryFuel', 'NERC',
    #                  'Balancing Authority Code', 'Subregion'])
    # elif subregion == 'US':
    #     result_database = result_database.drop(
    #         columns=['eGRID_ID', 'FlowAmount', 'Electricity', 'ReliabilityScore', 'PrimaryFuel', 'NERC',
    #                  'Balancing Authority Name', 'Balancing Authority Code', 'Subregion'])

    keep_cols = [
        'FuelCategory', 'FlowName', 'FlowUUID', 'Compartment', 'Year',
        'Source', 'Unit', 'Subregion', 'ElementaryFlowPrimeContext',
        'TechnologicalCorrelation', 'TemporalCorrelation', 'DataCollection',
        'Emission_factor', 'ReliabilityScore', 'GeographicalCorrelation',
        'GeomMean', 'GeomSD', 'Maximum', 'Minimum'
    ]
    result_database = result_database[keep_cols]
    result_database = result_database.drop_duplicates()
    # Drop duplicated in total gen database
    #total_gen_database = total_gen_database.drop_duplicates()

    print("Generation process database for " + subregion + " complete.")
    return result_database
예제 #4
0
def olcaschema_genprocess(database, subregion):

    generation_process_dict = {}

    #Map heat flows for renewable fuels to energy elementary flows. This must be applied after emission mapping
    database = map_renewable_heat_flows_to_fedelemflows(database)
    #Add FlowType to the database
    database = map_compartment_to_flow_type(database)
    #Add FlowDirection, muist be applied before fuel mapping!
    database = add_flow_direction(database)

    #Map input flows to
    database = map_heat_inputs_to_fuel_names(database)

    #   if subregion == 'all':
    #        region = egrid_subregions
    #   elif subregion == 'NERC':
    #        region = list(pd.unique(database['NERC']))
    #   elif subregion == 'BA':
    #        region = list(pd.unique(database['Balancing Authority Name']))
    #   else:
    #        region = [subregion]
    region = database["Subregion"].unique()
    for reg in region:

        print("Writing generation process dictionary for " + reg + " ...")
        # This makes sure that the dictionary writer works fine because it only works with the subregion column. So we are sending the
        #correct regions in the subregion column rather than egrid subregions if rquired.
        #This makes it easy for the process_dictionary_writer to be much simpler.
        #        if subregion == 'all':
        #           database['Subregion'] = database['Subregion']
        #        elif subregion == 'NERC':
        #           database['Subregion'] = database['NERC']
        #        elif subregion == 'BA':
        #           database['Subregion'] = database['Balancing Authority Name']

        database_reg = database[database['Subregion'] == reg]

        for index, row in fuel_name.iterrows():
            # Reading complete fuel name and heat content information

            fuelname = row['Fuelname']
            fuelheat = float(row['Heatcontent'])
            database_f1 = database_reg[database_reg['FuelCategory'] ==
                                       row['FuelList']]

            if database_f1.empty != True:

                exchanges_list = []

                #This part is used for writing the input fuel flow informationn.
                database2 = database_f1[database_f1['FlowDirection'] ==
                                        'input']
                if database2.empty != True:

                    exchanges_list = exchange(
                        exchange_table_creation_ref(database2), exchanges_list)
                    ra1 = exchange_table_creation_input(database2)
                    exchanges_list = exchange(ra1, exchanges_list)

                database_f2 = database_f1[database_f1['FlowDirection'] ==
                                          'output']
                exchg_list = list(pd.unique(database_f2['FlowName']))

                for exchange_emissions in exchg_list:
                    database_f3 = database_f2[database_f2['FlowName'] ==
                                              exchange_emissions]
                    compartment_list = list(
                        pd.unique(database_f3['Compartment']))
                    for compartment in compartment_list:
                        database_f4 = database_f3[database_f3['Compartment'] ==
                                                  compartment]
                        ra = exchange_table_creation_output(database_f4)
                        exchanges_list = exchange(ra, exchanges_list)

                        #if len(database_f4) > 1:
                        #print('THIS CHECK DIS DONE TO SEE DUPLICATE FLOWS. DELETE THIS IN LINE 333 to LINE 338\n')
                        #print(database_f4[['FlowName','Source','FuelCategory','Subregion']])
                        #print('\n')

                final = process_table_creation_gen(fuelname, exchanges_list,
                                                   reg)
                final["name"] = "Electricity - " + database_f1[
                    "FuelCategory"].tolist()[0] + " - " + reg
                generation_process_dict[reg + "_" + fuelname] = final

    print("Generation process dictionaries complete.")
    return generation_process_dict
예제 #5
0
def create_generation_process_df(generation_data, emissions_data, subregion):

    emissions_data = emissions_data.drop(columns=['FacilityID'])
    combined_data = generation_data.merge(emissions_data,
                                          left_on=['FacilityID'],
                                          right_on=['eGRID_ID'],
                                          how='right')

    #Checking the odd year
    odd_year = None
    for year in years_in_emissions_and_wastes_by_facility:

        if year != egrid_year:
            odd_year = year
            #Code below not being used
            #checking if any of the years are odd. If yes, we need EIA data.
            #non_egrid_emissions_odd_year = combined_data[combined_data['Year'] == odd_year]
            #odd_database = pd.unique(non_egrid_emissions_odd_year['Source'])

    cols_to_drop_for_final = ['FacilityID']

    #Downloading the required EIA923 data
    if odd_year != None:
        EIA_923_gen_data = eia_download_extract(odd_year)

        #Merging database with EIA 923 data
        combined_data = combined_data.merge(EIA_923_gen_data,
                                            left_on=['eGRID_ID'],
                                            right_on=['Plant Id'],
                                            how='left')
        combined_data['Year'] = combined_data['Year'].astype(str)
        combined_data = combined_data.sort_values(by=['Year'])
        #Replacing the odd year Net generations with the EIA net generations.
        combined_data['Electricity'] = np.where(
            combined_data['Year'] == int(odd_year),
            combined_data['Net Generation (Megawatthours)'],
            combined_data['Electricity'])
        cols_to_drop_for_final = cols_to_drop_for_final + [
            'Plant Id', 'Plant Name', 'State', 'YEAR',
            'Net Generation (Megawatthours)', 'Total Fuel Consumption MMBtu'
        ]

    #Dropping unnecessary columns
    emissions_gen_data = combined_data.drop(columns=cols_to_drop_for_final)

    #Merging with the egrid_facilites file to get the subregion information in the database!!!
    final_data = pd.merge(egrid_facilities_w_fuel_region,
                          emissions_gen_data,
                          left_on=['FacilityID'],
                          right_on=['eGRID_ID'],
                          how='right')

    #Add in reference electricity for subregion and fuel category
    final_data = pd.merge(final_data,
                          ref_egrid_subregion_generation_by_fuelcategory,
                          on=['Subregion', 'FuelCategory'],
                          how='left')

    #store the total elci data in a csv file just for checking
    #final_data.to_excel('elci_summary.xlsx')

    if subregion == 'all':
        regions = egrid_subregions
    elif subregion == 'NERC':
        regions = list(pd.unique(final_data['NERC']))
    elif subregion == 'BA':
        regions = list(pd.unique(final_data['Balancing Authority Name']))
    else:
        regions = [subregion]

    #final_data.to_excel('Main_file.xlsx')
    final_data = final_data.drop(columns=['FacilityID'])

    #THIS CHECK AND STAMENT IS BEING PUT BECAUSE OF SAME FLOW VALUE ERROR STILL BEING THERE IN THE DATA
    final_data = final_data.drop_duplicates(subset=[
        'Subregion', 'PrimaryFuel', 'FuelCategory', 'FlowName', 'FlowAmount',
        'Compartment'
    ])

    final_data = final_data[final_data['FlowName'] != 'Electricity']

    # Map emission flows to fed elem flows
    final_database = map_emissions_to_fedelemflows(final_data)
    # Create dfs for storing the output
    result_database = pd.DataFrame()
    total_gen_database = pd.DataFrame()
    # Looping through different subregions to create the files

    for reg in regions:

        print("Creating generation process database for " + reg + " ...")
        # Cropping out based on regions
        if subregion == 'all':
            database = final_database[final_database['Subregion'] == reg]
        elif subregion == 'NERC':
            database = final_database[final_database['NERC'] == reg]
        elif subregion == 'BA':
            database = final_database[
                final_database['Balancing Authority Name'] == reg]
        elif subregion == 'US':
            # For entire US use full database
            database = final_database
        else:
            # This should be a egrid subregion
            database = final_database[final_database['Subregion'] == reg]

        for index, row in fuel_name.iterrows():
            # Reading complete fuel name and heat content information
            fuelname = row['FuelList']
            fuelheat = float(row['Heatcontent'])
            # croppping the database according to the current fuel being considered
            database_f1 = database[database['FuelCategory'] == fuelname]

            if database_f1.empty == True:
                database_f1 = database[database['PrimaryFuel'] == fuelname]
            if database_f1.empty != True:

                database_f1 = database_f1.sort_values(by='Source',
                                                      ascending=False)
                exchange_list = list(pd.unique(database_f1['FlowName']))
                if use_primaryfuel_for_coal:
                    database_f1['FuelCategory'].loc[
                        database_f1['FuelCategory'] ==
                        'COAL'] = database_f1['PrimaryFuel']

                for exchange in exchange_list:
                    database_f2 = database_f1[database_f1['FlowName'] ==
                                              exchange]
                    database_f2 = database_f2[[
                        'Subregion', 'FuelCategory', 'PrimaryFuel', 'eGRID_ID',
                        'Electricity', 'FlowName', 'FlowAmount', 'FlowUUID',
                        'Compartment', 'Year', 'Source', 'ReliabilityScore',
                        'Unit', 'NERC',
                        'PercentGenerationfromDesignatedFuelCategory',
                        'Balancing Authority Name',
                        'ElementaryFlowPrimeContext',
                        'Balancing Authority Code',
                        'Ref_Electricity_Subregion_FuelCategory'
                    ]]

                    compartment_list = list(
                        pd.unique(database_f2['Compartment']))
                    for compartment in compartment_list:
                        database_f3 = database_f2[database_f2['Compartment'] ==
                                                  compartment]

                        database_f3 = database_f3.drop_duplicates(subset=[
                            'Subregion', 'FuelCategory', 'PrimaryFuel',
                            'eGRID_ID', 'Electricity', 'FlowName',
                            'Compartment', 'Year', 'Unit'
                        ])
                        sources = list(pd.unique(database_f3['Source']))
                        # if len(sources) >1:
                        #    print('Error occured. Duplicate emissions from Different source. Writing an error file error.csv')
                        #    database_f3.to_csv(output_dir+'error'+reg+fuelname+exchange+'.csv')

                        # Get electricity relevant for this exchange for the denominator in the emissions factors calcs
                        electricity_source_by_facility_for_region_fuel = database_f1[
                            ['eGRID_ID', 'Electricity',
                             'Source']].drop_duplicates()
                        total_gen, mean, total_facility_considered = total_generation_calculator(
                            sources,
                            electricity_source_by_facility_for_region_fuel)

                        # Add data quality scores

                        database_f3 = add_flow_representativeness_data_quality_scores(
                            database_f3, total_gen)
                        # Can now drop this
                        database_f3 = database_f3.drop(
                            columns='Ref_Electricity_Subregion_FuelCategory')

                        # Add scores for regions to
                        sources_str = join_with_underscore(sources)
                        exchange_total_gen = pd.DataFrame(
                            [[
                                reg, fuelname, exchange, compartment,
                                sources_str, total_gen
                            ]],
                            columns=[
                                'Subregion', 'FuelCategory', 'FlowName',
                                'Compartment', 'Source', 'Total Generation'
                            ])
                        total_gen_database = total_gen_database.append(
                            exchange_total_gen, ignore_index=True)

                        if exchange == 'Heat' and str(fuelheat) != 'nan':
                            # Getting Emisssion_factor
                            database_f3['Emission_factor'] = compilation(
                                database_f3[['Electricity', 'FlowAmount']],
                                total_gen) / fuelheat
                            database_f3['Unit'] = 'kg'

                        else:
                            database_f3['Emission_factor'] = compilation(
                                database_f3[['Electricity', 'FlowAmount']],
                                total_gen)

                        # Data Quality Scores
                        database_f3['GeographicalCorrelation'] = 1
                        #If flow amount sum = 0, then do not average
                        if sum(database_f3['FlowAmount']) != 0:

                            database_f3['Reliability_Score'] = np.average(
                                database_f3['ReliabilityScore'],
                                weights=database_f3['FlowAmount'])
                            database_f3['TemporalCorrelation'] = np.average(
                                database_f3['TemporalCorrelation'],
                                weights=database_f3['FlowAmount'])

                            database_f3[
                                'TechnologicalCorrelation'] = np.average(
                                    database_f3['TechnologicalCorrelation'],
                                    weights=database_f3['FlowAmount'])
                            database_f3['DataCollection'] = np.average(
                                database_f3['DataCollection'],
                                weights=database_f3['FlowAmount'])

                        # Uncertainty Calcs
                        uncertainty_info = uncertainty_creation(
                            database_f3[['Electricity',
                                         'FlowAmount']], exchange, fuelheat,
                            mean, total_gen, total_facility_considered)

                        database_f3['GeomMean'] = uncertainty_info['geomMean']
                        database_f3['GeomSD'] = uncertainty_info['geomSd']
                        database_f3['Maximum'] = uncertainty_info['maximum']
                        database_f3['Minimum'] = uncertainty_info['minimum']

                        database_f3['Source'] = sources_str

                        # Optionally write out electricity
                        # database_f3['Electricity'] = total_gen

                        frames = [result_database, database_f3]
                        result_database = pd.concat(frames)

    if subregion == 'all':
        result_database = result_database.drop(columns=[
            'eGRID_ID', 'FlowAmount', 'Electricity', 'ReliabilityScore',
            'PrimaryFuel', 'NERC', 'Balancing Authority Name',
            'Balancing Authority Code'
        ])
    elif subregion == 'NERC':
        result_database = result_database.drop(columns=[
            'eGRID_ID', 'FlowAmount', 'Electricity', 'ReliabilityScore',
            'PrimaryFuel', 'Balancing Authority Name',
            'Balancing Authority Code', 'Subregion'
        ])
    elif subregion == 'BA':
        result_database = result_database.drop(columns=[
            'eGRID_ID', 'FlowAmount', 'Electricity', 'ReliabilityScore',
            'PrimaryFuel', 'NERC', 'Balancing Authority Code', 'Subregion'
        ])
    elif subregion == 'US':
        result_database = result_database.drop(columns=[
            'eGRID_ID', 'FlowAmount', 'Electricity', 'ReliabilityScore',
            'PrimaryFuel', 'NERC', 'Balancing Authority Name',
            'Balancing Authority Code', 'Subregion'
        ])

    result_database = result_database.drop_duplicates()
    # Drop duplicated in total gen database
    #total_gen_database = total_gen_database.drop_duplicates()

    print("Generation process database for " + subregion + " complete.")
    return result_database

    return b