Esempio n. 1
0
def combine_gen_emissions_data(generation_data,
                               emissions_data,
                               subregion=None):
    """
    Merge generation and emissions data. Add region designations using either
    eGRID or EIA-860. Same for primary fuel by plant (eGRID or 923). Calculate
    and merge in the total generation by region. Create the column "Subregion"
    to hold regional name info. Remove electricity flows. Rename flows and add
    UUIDs according to the federal flow list.

    Parameters
    ----------
    generation_data : dataframe
        Annual generation for each power plant. Contains the plant ID, generation
        amount, and year.
    emissions_data : dataframe
        Annual emissions of all flows from each facility. Probably compiled in
        the stewi module and loaded from a csv file.
    subregion : str
        MAY BE DEPRECIATED. Description of the region type or single region.
        If the config parameter 'region_column_name' is not false this parameter
        is ignored (the default value is None, which triggers the use of
        model config value).

    Returns
    -------
    dataframe
        Combined emissions and generation data for each facility
    """
    if subregion is None:
        subregion = regional_aggregation

    emissions_data = emissions_data.drop(columns=['FacilityID'])
    generation_data["FacilityID"] = generation_data["FacilityID"].astype(int)
    emissions_data["eGRID_ID"] = emissions_data["eGRID_ID"].astype(int)
    combined_data = generation_data.merge(emissions_data,
                                          left_on=['FacilityID', 'Year'],
                                          right_on=['eGRID_ID', 'Year'],
                                          how='right')

    # # Checking the odd year to determine if emissions are from a year other than
    # # generation - need to normalize emissions data with generation from the
    # # corresponding data.
    # odd_year = None
    # for year in years_in_emissions_and_wastes_by_facility:

    #     if year != egrid_year:
    #         odd_year = year;
    #         #Code below not being used
    #         #checking if any of the years are odd. If yes, we need EIA data.
    #         #non_egrid_emissions_odd_year = combined_data[combined_data['Year'] == odd_year]
    #         #odd_database = pd.unique(non_egrid_emissions_odd_year['Source'])

    cols_to_drop_for_final = ['FacilityID']

    # #Downloading the required EIA923 data
    # # Annual facility generation from the same year as the emissions data
    # # is needed to normalize total facility emissions.
    # if odd_year != None:
    #     EIA_923_gen_data = eia_download_extract(odd_year)

    #     #Merging database with EIA 923 data
    #     combined_data = combined_data.merge(EIA_923_gen_data, left_on = ['eGRID_ID'],right_on = ['Plant Id'],how = 'left')
    #     combined_data['Year'] = combined_data['Year'].astype(str)
    #     combined_data = combined_data.sort_values(by = ['Year'])
    #     #Replacing the odd year Net generations with the EIA net generations.
    #     combined_data['Electricity']= np.where(combined_data['Year'] == int(odd_year), combined_data['Net Generation (Megawatthours)'],combined_data['Electricity'])
    #     cols_to_drop_for_final = cols_to_drop_for_final+['Plant Id','Plant Name','State','YEAR','Net Generation (Megawatthours)','Total Fuel Consumption MMBtu']

    #Dropping unnecessary columns
    emissions_gen_data = combined_data.drop(columns=cols_to_drop_for_final)
    emissions_gen_data["eGRID_ID"] = emissions_gen_data["eGRID_ID"].astype(int)
    if replace_egrid:
        year = eia_gen_year

        # This will only add BA labels, not eGRID subregions
        fuel_region = eia_facility_fuel_region(year)
        fuel_region["FacilityID"] = fuel_region["FacilityID"].astype(int)
        final_data = pd.merge(fuel_region,
                              emissions_gen_data,
                              left_on=['FacilityID'],
                              right_on=['eGRID_ID'],
                              how='right')
    else:
        #Merging with the egrid_facilites file to get the subregion information in the database!!!
        egrid_facilities_w_fuel_region[
            "FacilityID"] = egrid_facilities_w_fuel_region[
                "FacilityID"].astype(int)
        final_data = pd.merge(egrid_facilities_w_fuel_region,
                              emissions_gen_data,
                              left_on=['FacilityID'],
                              right_on=['eGRID_ID'],
                              how='right')

    #Add in reference electricity for subregion and fuel category
    if not replace_egrid:
        final_data = pd.merge(final_data,
                              ref_egrid_subregion_generation_by_fuelcategory,
                              on=['Subregion', 'FuelCategory'],
                              how='left')

    if replace_egrid is True:
        # Subregion shows up all over the place below. If not using egrid
        # sub in the BA name because we don't have the eGRID subregion.
        if subregion:
            assert subregion in final_data.columns
            final_data['Subregion'] = final_data[subregion]
        else:
            final_data['Subregion'] = final_data['Balancing Authority Name']

        subregion_fuel_year_gen = (final_data.groupby(
            ['Subregion', 'FuelCategory', 'Year'],
            as_index=False)['Electricity'].sum())
        subregion_fuel_year_gen.rename(
            columns={'Electricity': 'Ref_Electricity_Subregion_FuelCategory'},
            inplace=True)
        final_data = pd.merge(final_data,
                              subregion_fuel_year_gen,
                              on=['Subregion', 'FuelCategory', 'Year'])

    # Need to drop rows with NaN electricity generation
    # They currently exist when generation from a facility has been omitted
    # because of some filter (e.g. generation from pirmary fuel < 90%)
    # but we still have emissions data.
    final_data.dropna(subset=['Electricity'], inplace=True)

    if subregion:
        try:
            regions = final_data[subregion].unique()
        except KeyError:
            print(
                f"Configuration file specifes region column as {subregion}, but it does not exist"
            )
            if subregion == 'eGRID':
                regions = egrid_subregions
            elif subregion == 'NERC':
                regions = list(pd.unique(final_data['NERC']))
            elif subregion == 'BA':
                regions = list(
                    pd.unique(final_data['Balancing Authority Name']))
            else:
                regions = [subregion]
    elif subregion == 'eGRID':
        regions = egrid_subregions
    elif subregion == 'NERC':
        regions = list(pd.unique(final_data['NERC']))
    elif subregion == 'BA':
        regions = list(pd.unique(final_data['Balancing Authority Name']))
    else:
        regions = [subregion]

    #final_data.to_excel('Main_file.xlsx')
    final_data = final_data.drop(columns=['FacilityID'])

    #THIS CHECK AND STAMENT IS BEING PUT BECAUSE OF SAME FLOW VALUE ERROR STILL BEING THERE IN THE DATA
    dup_cols_check = [
        'Subregion',
        'PrimaryFuel',
        'FuelCategory',
        'FlowName',
        'FlowAmount',
        'Compartment',
    ]

    final_data = final_data.drop_duplicates(subset=dup_cols_check)

    final_data = final_data[final_data['FlowName'] != 'Electricity']

    # Map emission flows to fed elem flows
    final_database = map_emissions_to_fedelemflows(final_data)

    return final_database  #, regions
Esempio n. 2
0
def create_generation_process_df():
    """
    Reads emissions and generation data from different sources to provide
    facility-level emissions. Most important inputs to this process come
    from the model configuration file.

    Parameters
    ----------
    None

    Returns
    ----------
    dataframe
        Datafrane includes all facility-level emissions
    """
    from electricitylci.eia923_generation import (build_generation_data,
                                                  eia923_primary_fuel)
    from electricitylci.egrid_filter import (
        egrid_facilities_to_include,
        emissions_and_waste_for_selected_egrid_facilities,
    )
    from electricitylci.generation import (
        egrid_facilities_w_fuel_region,
        add_technological_correlation_score,
        add_temporal_correlation_score,
    )
    import electricitylci.emissions_other_sources as em_other
    import electricitylci.ampd_plant_emissions as ampd
    from electricitylci.combinator import ba_codes
    import electricitylci.manual_edits as edits

    COMPARTMENT_DICT = {
        "emission/air": "air",
        "emission/water": "water",
        "emission/ground": "ground",
        "input": "input",
        "output": "output",
        "waste": "waste",
        "air": "air",
        "water": "water",
        "ground": "ground",
    }
    if model_specs.replace_egrid:
        generation_data = build_generation_data().drop_duplicates()
        cems_df = ampd.generate_plant_emissions(model_specs.eia_gen_year)
        cems_df.drop(columns=["FlowUUID"], inplace=True)
        emissions_and_waste_for_selected_egrid_facilities = em_other.integrate_replace_emissions(
            cems_df, emissions_and_waste_for_selected_egrid_facilities)
    else:
        from electricitylci.egrid_filter import electricity_for_selected_egrid_facilities
        generation_data = electricity_for_selected_egrid_facilities
        generation_data["Year"] = model_specs.egrid_year
        generation_data["FacilityID"] = generation_data["FacilityID"].astype(
            int)


#        generation_data = build_generation_data(
#            egrid_facilities_to_include=egrid_facilities_to_include
#        )
    emissions_and_waste_for_selected_egrid_facilities.drop(
        columns=["FacilityID"])
    emissions_and_waste_for_selected_egrid_facilities[
        "eGRID_ID"] = emissions_and_waste_for_selected_egrid_facilities[
            "eGRID_ID"].astype(int)
    final_database = pd.merge(
        left=emissions_and_waste_for_selected_egrid_facilities,
        right=generation_data,
        right_on=["FacilityID", "Year"],
        left_on=["eGRID_ID", "Year"],
        how="left",
    )
    egrid_facilities_w_fuel_region[
        "FacilityID"] = egrid_facilities_w_fuel_region["FacilityID"].astype(
            int)
    final_database = pd.merge(
        left=final_database,
        right=egrid_facilities_w_fuel_region,
        left_on="eGRID_ID",
        right_on="FacilityID",
        how="left",
        suffixes=["", "_right"],
    )
    if model_specs.replace_egrid:
        primary_fuel_df = eia923_primary_fuel(year=model_specs.eia_gen_year)
        primary_fuel_df.rename(columns={'Plant Id': "eGRID_ID"}, inplace=True)
        primary_fuel_df["eGRID_ID"] = primary_fuel_df["eGRID_ID"].astype(int)
        key_df = (primary_fuel_df[[
            "eGRID_ID", "FuelCategory"
        ]].dropna().drop_duplicates(subset="eGRID_ID").set_index("eGRID_ID"))
        final_database["FuelCategory"] = final_database["eGRID_ID"].map(
            key_df["FuelCategory"])
    else:
        key_df = (final_database[[
            "eGRID_ID", "FuelCategory"
        ]].dropna().drop_duplicates(subset="eGRID_ID").set_index("eGRID_ID"))
        final_database.loc[final_database["FuelCategory"].isnull(),
                           "FuelCategory"] = final_database.loc[
                               final_database["FuelCategory"].isnull(),
                               "eGRID_ID"].map(key_df["FuelCategory"])
    # if replace_egrid:
    #     final_database["FuelCategory"].fillna(
    #         final_database["FuelCategory_right"], inplace=True
    #     )
    final_database["Final_fuel_agg"] = final_database["FuelCategory"]
    # if model_specs.use_primaryfuel_for_coal:
    #     final_database.loc[
    #         final_database["FuelCategory"] == "COAL", ["Final_fuel_agg"]
    #     ] = final_database.loc[
    #         final_database["FuelCategory"] == "COAL", "PrimaryFuel"
    #     ]
    try:
        year_filter = final_database["Year_x"] == final_database["Year_y"]
        final_database = final_database.loc[year_filter, :]
        final_database.drop(columns="Year_y", inplace=True)
    except KeyError:
        pass
    final_database.rename(columns={"Year_x": "Year"}, inplace=True)
    final_database = map_emissions_to_fedelemflows(final_database)
    dup_cols_check = [
        "FacilityID",
        "FuelCategory",
        "FlowName",
        "FlowAmount",
        "Compartment",
    ]
    final_database = final_database.loc[:,
                                        ~final_database.columns.duplicated()]
    final_database = final_database.drop_duplicates(subset=dup_cols_check)
    final_database.drop(
        columns=["FuelCategory", "FacilityID_x", "FacilityID_y"], inplace=True)
    final_database.rename(
        columns={
            "Final_fuel_agg": "FuelCategory",
            "TargetFlowUUID": "FlowUUID",
        },
        inplace=True,
    )
    final_database = add_temporal_correlation_score(
        final_database, model_specs.electricity_lci_target_year)
    final_database = add_technological_correlation_score(final_database)
    final_database["DataCollection"] = 5
    final_database["GeographicalCorrelation"] = 1

    final_database["eGRID_ID"] = final_database["eGRID_ID"].astype(int)

    final_database.sort_values(by=["eGRID_ID", "Compartment", "FlowName"],
                               inplace=True)
    final_database["stage_code"] = "Power plant"
    final_database["Compartment_path"] = final_database["Compartment"]
    final_database["Compartment"] = final_database["Compartment_path"].map(
        COMPARTMENT_DICT)
    final_database["Balancing Authority Name"] = final_database[
        "Balancing Authority Code"].map(ba_codes["BA_Name"])
    final_database["EIA_Region"] = final_database[
        "Balancing Authority Code"].map(ba_codes["EIA_Region"])
    final_database["FERC_Region"] = final_database[
        "Balancing Authority Code"].map(ba_codes["FERC_Region"])
    final_database = edits.check_for_edits(final_database, "generation.py",
                                           "create_generation_process_df")
    return final_database
Esempio n. 3
0
def create_generation_process_df(generation_data, emissions_data, subregion):

    emissions_data = emissions_data.drop(columns=['FacilityID'])
    combined_data = generation_data.merge(emissions_data,
                                          left_on=['FacilityID'],
                                          right_on=['eGRID_ID'],
                                          how='right')

    #Checking the odd year
    odd_year = None
    for year in years_in_emissions_and_wastes_by_facility:

        if year != egrid_year:
            odd_year = year
            #Code below not being used
            #checking if any of the years are odd. If yes, we need EIA data.
            #non_egrid_emissions_odd_year = combined_data[combined_data['Year'] == odd_year]
            #odd_database = pd.unique(non_egrid_emissions_odd_year['Source'])

    cols_to_drop_for_final = ['FacilityID']

    #Downloading the required EIA923 data
    if odd_year != None:
        EIA_923_gen_data = eia_download_extract(odd_year)

        #Merging database with EIA 923 data
        combined_data = combined_data.merge(EIA_923_gen_data,
                                            left_on=['eGRID_ID'],
                                            right_on=['Plant Id'],
                                            how='left')
        combined_data['Year'] = combined_data['Year'].astype(str)
        combined_data = combined_data.sort_values(by=['Year'])
        #Replacing the odd year Net generations with the EIA net generations.
        combined_data['Electricity'] = np.where(
            combined_data['Year'] == int(odd_year),
            combined_data['Net Generation (Megawatthours)'],
            combined_data['Electricity'])
        cols_to_drop_for_final = cols_to_drop_for_final + [
            'Plant Id', 'Plant Name', 'State', 'YEAR',
            'Net Generation (Megawatthours)', 'Total Fuel Consumption MMBtu'
        ]

    #Dropping unnecessary columns
    emissions_gen_data = combined_data.drop(columns=cols_to_drop_for_final)

    #Merging with the egrid_facilites file to get the subregion information in the database!!!
    final_data = pd.merge(egrid_facilities_w_fuel_region,
                          emissions_gen_data,
                          left_on=['FacilityID'],
                          right_on=['eGRID_ID'],
                          how='right')

    #Add in reference electricity for subregion and fuel category
    final_data = pd.merge(final_data,
                          ref_egrid_subregion_generation_by_fuelcategory,
                          on=['Subregion', 'FuelCategory'],
                          how='left')

    #store the total elci data in a csv file just for checking
    #final_data.to_excel('elci_summary.xlsx')

    if subregion == 'all':
        regions = egrid_subregions
    elif subregion == 'NERC':
        regions = list(pd.unique(final_data['NERC']))
    elif subregion == 'BA':
        regions = list(pd.unique(final_data['Balancing Authority Name']))
    else:
        regions = [subregion]

    #final_data.to_excel('Main_file.xlsx')
    final_data = final_data.drop(columns=['FacilityID'])

    #THIS CHECK AND STAMENT IS BEING PUT BECAUSE OF SAME FLOW VALUE ERROR STILL BEING THERE IN THE DATA
    final_data = final_data.drop_duplicates(subset=[
        'Subregion', 'PrimaryFuel', 'FuelCategory', 'FlowName', 'FlowAmount',
        'Compartment'
    ])

    final_data = final_data[final_data['FlowName'] != 'Electricity']

    # Map emission flows to fed elem flows
    final_database = map_emissions_to_fedelemflows(final_data)
    # Create dfs for storing the output
    result_database = pd.DataFrame()
    total_gen_database = pd.DataFrame()
    # Looping through different subregions to create the files

    for reg in regions:

        print("Creating generation process database for " + reg + " ...")
        # Cropping out based on regions
        if subregion == 'all':
            database = final_database[final_database['Subregion'] == reg]
        elif subregion == 'NERC':
            database = final_database[final_database['NERC'] == reg]
        elif subregion == 'BA':
            database = final_database[
                final_database['Balancing Authority Name'] == reg]
        elif subregion == 'US':
            # For entire US use full database
            database = final_database
        else:
            # This should be a egrid subregion
            database = final_database[final_database['Subregion'] == reg]

        for index, row in fuel_name.iterrows():
            # Reading complete fuel name and heat content information
            fuelname = row['FuelList']
            fuelheat = float(row['Heatcontent'])
            # croppping the database according to the current fuel being considered
            database_f1 = database[database['FuelCategory'] == fuelname]

            if database_f1.empty == True:
                database_f1 = database[database['PrimaryFuel'] == fuelname]
            if database_f1.empty != True:

                database_f1 = database_f1.sort_values(by='Source',
                                                      ascending=False)
                exchange_list = list(pd.unique(database_f1['FlowName']))
                if use_primaryfuel_for_coal:
                    database_f1['FuelCategory'].loc[
                        database_f1['FuelCategory'] ==
                        'COAL'] = database_f1['PrimaryFuel']

                for exchange in exchange_list:
                    database_f2 = database_f1[database_f1['FlowName'] ==
                                              exchange]
                    database_f2 = database_f2[[
                        'Subregion', 'FuelCategory', 'PrimaryFuel', 'eGRID_ID',
                        'Electricity', 'FlowName', 'FlowAmount', 'FlowUUID',
                        'Compartment', 'Year', 'Source', 'ReliabilityScore',
                        'Unit', 'NERC',
                        'PercentGenerationfromDesignatedFuelCategory',
                        'Balancing Authority Name',
                        'ElementaryFlowPrimeContext',
                        'Balancing Authority Code',
                        'Ref_Electricity_Subregion_FuelCategory'
                    ]]

                    compartment_list = list(
                        pd.unique(database_f2['Compartment']))
                    for compartment in compartment_list:
                        database_f3 = database_f2[database_f2['Compartment'] ==
                                                  compartment]

                        database_f3 = database_f3.drop_duplicates(subset=[
                            'Subregion', 'FuelCategory', 'PrimaryFuel',
                            'eGRID_ID', 'Electricity', 'FlowName',
                            'Compartment', 'Year', 'Unit'
                        ])
                        sources = list(pd.unique(database_f3['Source']))
                        # if len(sources) >1:
                        #    print('Error occured. Duplicate emissions from Different source. Writing an error file error.csv')
                        #    database_f3.to_csv(output_dir+'error'+reg+fuelname+exchange+'.csv')

                        # Get electricity relevant for this exchange for the denominator in the emissions factors calcs
                        electricity_source_by_facility_for_region_fuel = database_f1[
                            ['eGRID_ID', 'Electricity',
                             'Source']].drop_duplicates()
                        total_gen, mean, total_facility_considered = total_generation_calculator(
                            sources,
                            electricity_source_by_facility_for_region_fuel)

                        # Add data quality scores

                        database_f3 = add_flow_representativeness_data_quality_scores(
                            database_f3, total_gen)
                        # Can now drop this
                        database_f3 = database_f3.drop(
                            columns='Ref_Electricity_Subregion_FuelCategory')

                        # Add scores for regions to
                        sources_str = join_with_underscore(sources)
                        exchange_total_gen = pd.DataFrame(
                            [[
                                reg, fuelname, exchange, compartment,
                                sources_str, total_gen
                            ]],
                            columns=[
                                'Subregion', 'FuelCategory', 'FlowName',
                                'Compartment', 'Source', 'Total Generation'
                            ])
                        total_gen_database = total_gen_database.append(
                            exchange_total_gen, ignore_index=True)

                        if exchange == 'Heat' and str(fuelheat) != 'nan':
                            # Getting Emisssion_factor
                            database_f3['Emission_factor'] = compilation(
                                database_f3[['Electricity', 'FlowAmount']],
                                total_gen) / fuelheat
                            database_f3['Unit'] = 'kg'

                        else:
                            database_f3['Emission_factor'] = compilation(
                                database_f3[['Electricity', 'FlowAmount']],
                                total_gen)

                        # Data Quality Scores
                        database_f3['GeographicalCorrelation'] = 1
                        #If flow amount sum = 0, then do not average
                        if sum(database_f3['FlowAmount']) != 0:

                            database_f3['Reliability_Score'] = np.average(
                                database_f3['ReliabilityScore'],
                                weights=database_f3['FlowAmount'])
                            database_f3['TemporalCorrelation'] = np.average(
                                database_f3['TemporalCorrelation'],
                                weights=database_f3['FlowAmount'])

                            database_f3[
                                'TechnologicalCorrelation'] = np.average(
                                    database_f3['TechnologicalCorrelation'],
                                    weights=database_f3['FlowAmount'])
                            database_f3['DataCollection'] = np.average(
                                database_f3['DataCollection'],
                                weights=database_f3['FlowAmount'])

                        # Uncertainty Calcs
                        uncertainty_info = uncertainty_creation(
                            database_f3[['Electricity',
                                         'FlowAmount']], exchange, fuelheat,
                            mean, total_gen, total_facility_considered)

                        database_f3['GeomMean'] = uncertainty_info['geomMean']
                        database_f3['GeomSD'] = uncertainty_info['geomSd']
                        database_f3['Maximum'] = uncertainty_info['maximum']
                        database_f3['Minimum'] = uncertainty_info['minimum']

                        database_f3['Source'] = sources_str

                        # Optionally write out electricity
                        # database_f3['Electricity'] = total_gen

                        frames = [result_database, database_f3]
                        result_database = pd.concat(frames)

    if subregion == 'all':
        result_database = result_database.drop(columns=[
            'eGRID_ID', 'FlowAmount', 'Electricity', 'ReliabilityScore',
            'PrimaryFuel', 'NERC', 'Balancing Authority Name',
            'Balancing Authority Code'
        ])
    elif subregion == 'NERC':
        result_database = result_database.drop(columns=[
            'eGRID_ID', 'FlowAmount', 'Electricity', 'ReliabilityScore',
            'PrimaryFuel', 'Balancing Authority Name',
            'Balancing Authority Code', 'Subregion'
        ])
    elif subregion == 'BA':
        result_database = result_database.drop(columns=[
            'eGRID_ID', 'FlowAmount', 'Electricity', 'ReliabilityScore',
            'PrimaryFuel', 'NERC', 'Balancing Authority Code', 'Subregion'
        ])
    elif subregion == 'US':
        result_database = result_database.drop(columns=[
            'eGRID_ID', 'FlowAmount', 'Electricity', 'ReliabilityScore',
            'PrimaryFuel', 'NERC', 'Balancing Authority Name',
            'Balancing Authority Code', 'Subregion'
        ])

    result_database = result_database.drop_duplicates()
    # Drop duplicated in total gen database
    #total_gen_database = total_gen_database.drop_duplicates()

    print("Generation process database for " + subregion + " complete.")
    return result_database

    return b