Ejemplo n.º 1
0
def eia860_boiler_info_design(year):
    expected_860_folder = join(data_dir, "eia860_{}".format(year))

    if not os.path.exists(expected_860_folder):
        print("Downloading EIA-860 files")
        eia860_download(year=year, save_path=expected_860_folder)

        eia860_path, eia860_name = find_file_in_folder(
            folder_path=expected_860_folder,
            file_pattern_match=["6_2_EnviroEquip", "xlsx"],
            return_name=True,
        )

        eia = load_eia860_excel(eia860_path, "Boiler Info & Design Parameters",
                                1)

        # Save as csv for easier access in future
        csv_fn = eia860_name.split(".")[0] + "_boiler_info.csv"
        csv_path = join(expected_860_folder, csv_fn)
        eia.to_csv(csv_path, index=False)

    else:
        all_files = os.listdir(expected_860_folder)

        # Check for both csv and year<_Final> in case multiple years
        # or other csv files exist
        csv_file = [
            f for f in all_files if "_boiler_info.csv" in f
            and "6_2_EnviroEquip_Y{}".format(year) in f
        ]

        # Read and return the existing csv file if it exists
        if csv_file:
            print("Loading {} EIA-860 plant data from csv file".format(year))
            fn = csv_file[0]
            csv_path = join(expected_860_folder, fn)
            eia = pd.read_csv(csv_path, dtype={"Plant Id": str})

        else:
            print("Loading data from previously downloaded excel file")
            eia860_path, eia860_name = find_file_in_folder(
                folder_path=expected_860_folder,
                file_pattern_match=["6_2_EnviroEquip", "xlsx"],
                return_name=True,
            )
            # # would be more elegent with glob but this works to identify the
            # # Schedule_2_3_4_5 file
            # for f in all_files:
            #     if '2___Plant' in f:
            #         plant_file = f
            # eia860_path = join(expected_860_folder, plant_file)
            eia = load_eia860_excel(eia860_path,
                                    "Boiler Info & Design Parameters", 1)

            csv_fn = eia860_name.split(".")[0] + "_boiler_info.csv"
            csv_path = join(expected_860_folder, csv_fn)
            eia.to_csv(csv_path, index=False)
    eia = _clean_columns(eia)
    return eia
Ejemplo n.º 2
0
def eia923_sched8_aec(year):
    expected_923_folder = join(data_dir, "f923_{}".format(year))

    if not os.path.exists(expected_923_folder):
        print("Downloading EIA-923 files")
        eia923_download(year=year, save_path=expected_923_folder)

        eia923_path, eia923_name = find_file_in_folder(
            folder_path=expected_923_folder,
            file_pattern_match=["Schedule_8", "xlsx"],
            return_name=True,
        )
        # Save as csv for easier access in future
        csv_fn = eia923_name.split(".")[0] + "page_8c.csv"
        csv_path = join(expected_923_folder, csv_fn)
        eia = load_eia923_excel(expected_923_folder, page="8c")
        eia.to_csv(csv_path, index=False)
    else:
        all_files = os.listdir(expected_923_folder)
        # Check for both csv and year<_Final> in case multiple years
        # or other csv files exist
        csv_file = [
            f
            for f in all_files
            if ".csv" in f and "{}_Final".format(year) in f and "page_8c" in f
        ]

        # Read and return the existing csv file if it exists
        if csv_file:
            print("Loading {} EIA-923 data from csv file".format(year))
            fn = csv_file[0]
            csv_path = join(expected_923_folder, fn)
            eia = pd.read_csv(
                csv_path,
                dtype={"Plant Id": str, "YEAR": str, "NAICS Code": str},
                low_memory=False,
            )
        else:

            eia923_path, eia923_name = find_file_in_folder(
                folder_path=expected_923_folder,
                file_pattern_match=["Schedule_8", "xlsx"],
                return_name=True,
            )

            # # would be more elegent with glob but this works to identify the
            # # Schedule_2_3_4_5 file
            # for f in all_files:
            #     if '2_3_4_5' in f:
            #         gen_file = f
            # eia923_path = join(expected_923_folder, gen_file)
            eia = load_eia923_excel(eia923_path, page="8c")
            csv_fn = eia923_name.split(".")[0] + "_page_8c.csv"
            csv_path = join(expected_923_folder, csv_fn)
            eia.to_csv(csv_path, index=False)
    eia = _clean_columns(eia)
    return eia
Ejemplo n.º 3
0
def read_eia923_fuel_receipts(year):
    expected_923_folder = join(data_dir, 'f923_{}'.format(year))
    if not os.path.exists(expected_923_folder):
        print('Downloading EIA-923 files')
        eia923_download(year=year, save_path=expected_923_folder)

        eia923_path, eia923_name = find_file_in_folder(
            folder_path=expected_923_folder,
            file_pattern_match=['2_3_4_5'],
            return_name=True)
        eia_fuel_receipts_df = pd.read_excel(
            eia923_path,
            sheet_name='Page 5 Fuel Receipts and Costs',
            skiprows=4,
            usecols="A:E,H:M,P:Q")
        csv_fn = eia923_name.split('.')[0] + '_page_5_reduced.csv'
        csv_path = join(expected_923_folder, csv_fn)
        eia_fuel_receipts_df.to_csv(csv_path, index=False)
    else:
        # Check for both csv and year<_Final> in case multiple years
        # or other csv files exist
        print('Loading data from previously downloaded excel file')
        all_files = os.listdir(expected_923_folder)
        # Check for both csv and year<_Final> in case multiple years
        # or other csv files exist
        csv_file = [
            f for f in all_files if '.csv' in f and '_page_5_reduced.csv' in f
        ]
        if csv_file:
            csv_path = os.path.join(expected_923_folder, csv_file[0])
            eia_fuel_receipts_df = pd.read_csv(csv_path)
        else:
            eia923_path, eia923_name = find_file_in_folder(
                folder_path=expected_923_folder,
                file_pattern_match=['2_3_4_5'],
                return_name=True)
            eia_fuel_receipts_df = pd.read_excel(
                eia923_path,
                sheet_name='Page 5 Fuel Receipts and Costs',
                skiprows=4,
                usecols="A:E,H:M,P:Q")
            csv_fn = eia923_name.split('.')[0] + '_page_5_reduced.csv'
            csv_path = join(expected_923_folder, csv_fn)
            eia_fuel_receipts_df.to_csv(csv_path, index=False)
    _clean_columns(eia_fuel_receipts_df)
    return eia_fuel_receipts_df
Ejemplo n.º 4
0
def eia923_download_extract(
    year,
    group_cols=[
        "Plant Id",
        "Plant Name",
        "State",
        "NAICS Code",
        "Reported Prime Mover",
        "Reported Fuel Type Code",
        "YEAR",
    ],
):
    """
    Download (if necessary) and extract a single year of generation/fuel
    consumption data from EIA-923.

    Data are grouped by plant level

    Parameters
    ----------
    year : int or str
        Year of data to download/extract
    group_cols : list, optional
        The columns from EIA923 generation and fuel sheet to use when grouping
        generation and fuel consumption data.

    """
    expected_923_folder = join(data_dir, "f923_{}".format(year))

    if not os.path.exists(expected_923_folder):
        print("Downloading EIA-923 files")
        eia923_download(year=year, save_path=expected_923_folder)

        eia923_path, eia923_name = find_file_in_folder(
            folder_path=expected_923_folder,
            file_pattern_match=["2_3_4_5"],
            return_name=True,
        )
        # eia923_files = os.listdir(expected_923_folder)

        # # would be more elegent with glob but this works to identify the
        # # Schedule_2_3_4_5 file
        # for f in eia923_files:
        #     if '2_3_4_5' in f:
        #         gen_file = f

        # eia923_path = join(expected_923_folder, gen_file)

        # colstokeep = group_cols + sum_cols
        eia = load_eia923_excel(eia923_path)

        # Save as csv for easier access in future
        csv_fn = eia923_name.split(".")[0] + "page_1.csv"
        csv_path = join(expected_923_folder, csv_fn)
        eia.to_csv(csv_path, index=False)

    else:
        all_files = os.listdir(expected_923_folder)

        # Check for both csv and year<_Final> in case multiple years
        # or other csv files exist
        csv_file = [
            f for f in all_files
            if ".csv" in f and "{}_Final".format(year) in f and "page_1" in f
        ]

        # Read and return the existing csv file if it exists
        if csv_file:
            print("Loading {} EIA-923 data from csv file".format(year))
            fn = csv_file[0]
            csv_path = join(expected_923_folder, fn)
            eia = pd.read_csv(
                csv_path,
                dtype={
                    "Plant Id": str,
                    "YEAR": str,
                    "NAICS Code": str
                },
            )

        else:
            print(
                "Loading data from previously downloaded excel file,",
                " how did the csv file get deleted?",
            )
            eia923_path, eia923_name = find_file_in_folder(
                folder_path=expected_923_folder,
                file_pattern_match=["2_3_4_5", "xlsx"],
                return_name=True,
            )

            # # would be more elegent with glob but this works to identify the
            # # Schedule_2_3_4_5 file
            # for f in all_files:
            #     if '2_3_4_5' in f:
            #         gen_file = f
            # eia923_path = join(expected_923_folder, gen_file)
            eia = load_eia923_excel(eia923_path)

            csv_fn = eia923_name.split(".")[0] + "_page_1.csv"
            csv_path = join(expected_923_folder, csv_fn)
            eia.to_csv(csv_path, index=False)

    # EIA_923 = eia
    # Grouping similar facilities together.
    # group_cols = ['Plant Id', 'Plant Name', 'State', 'YEAR']
    sum_cols = [
        "Total Fuel Consumption MMBtu",
        "Net Generation (Megawatthours)",
    ]
    EIA_923_generation_data = eia.groupby(group_cols,
                                          as_index=False)[sum_cols].sum()

    return EIA_923_generation_data
def eia860_balancing_authority(year, regional_aggregation=None):

    expected_860_folder = join(data_dir, "eia860_{}".format(year))

    if not os.path.exists(expected_860_folder):
        print("Downloading EIA-860 files")
        eia860_download(year=year, save_path=expected_860_folder)

        eia860_path, eia860_name = find_file_in_folder(
            folder_path=expected_860_folder,
            file_pattern_match=["2___Plant"],
            return_name=True,
        )
        # eia860_files = os.listdir(expected_860_folder)

        # # would be more elegent with glob but this works to identify the
        # # Schedule_2_3_4_5 file
        # for f in eia860_files:
        #     if '2___Plant' in f:
        #         plant_file = f

        # eia860_path = join(expected_860_folder, plant_file)

        # colstokeep = group_cols + sum_cols
        eia = load_eia860_excel(eia860_path)

        # Save as csv for easier access in future
        csv_fn = eia860_name.split(".")[0] + ".csv"
        csv_path = join(expected_860_folder, csv_fn)
        eia.to_csv(csv_path, index=False)

    else:
        all_files = os.listdir(expected_860_folder)

        # Check for both csv and year<_Final> in case multiple years
        # or other csv files exist
        csv_file = [
            f for f in all_files
            if ".csv" in f and "Plant_Y{}".format(year) in f
        ]

        # Read and return the existing csv file if it exists
        if csv_file:
            print("Loading {} EIA-860 plant data from csv file".format(year))
            fn = csv_file[0]
            csv_path = join(expected_860_folder, fn)
            eia = pd.read_csv(csv_path,
                              dtype={"Plant Id": str},
                              low_memory=False)

        else:
            print("Loading data from previously downloaded excel file")
            eia860_path, eia860_name = find_file_in_folder(
                folder_path=expected_860_folder,
                file_pattern_match=["2___Plant"],
                return_name=True,
            )
            # # would be more elegent with glob but this works to identify the
            # # Schedule_2_3_4_5 file
            # for f in all_files:
            #     if '2___Plant' in f:
            #         plant_file = f
            # eia860_path = join(expected_860_folder, plant_file)
            eia = load_eia860_excel(eia860_path)

            csv_fn = eia860_name.split(".")[0] + ".csv"
            csv_path = join(expected_860_folder, csv_fn)
            eia.to_csv(csv_path, index=False)

    ba_cols = [
        "Plant Id",
        "State",
        "NERC Region",
        "Balancing Authority Code",
        "Balancing Authority Name",
    ]
    eia_plant_ba_match = eia.loc[:, ba_cols].drop_duplicates()

    # Map the balancing authority to a larger region (e.g. FERC or EIA)
    if regional_aggregation:
        region_map = create_ba_region_map(region_col=regional_aggregation)
        eia_plant_ba_match[regional_aggregation] = eia_plant_ba_match[
            "Balancing Authority Code"].map(region_map)

    return eia_plant_ba_match
Ejemplo n.º 6
0
def generate_upstream_coal_map(year):
    eia_fuel_receipts_df = read_eia923_fuel_receipts(year)
    expected_7a_folder = join(data_dir, 'f7a_{}'.format(year))
    if not os.path.exists(expected_7a_folder):
        eia_7a_download(year, expected_7a_folder)
        eia7a_path, eia7a_name = find_file_in_folder(
            folder_path=expected_7a_folder,
            file_pattern_match=['coalpublic'],
            return_name=True)
    else:
        eia7a_path, eia7a_name = find_file_in_folder(
            folder_path=expected_7a_folder,
            file_pattern_match=['coalpublic'],
            return_name=True)
    eia7a_df = pd.read_excel(eia7a_path,
                             sheet_name='Hist_Coal_Prod',
                             skiprows=3)
    _clean_columns(eia7a_df)
    coal_criteria = eia_fuel_receipts_df['fuel_group'] == 'Coal'
    eia_fuel_receipts_df = eia_fuel_receipts_df.loc[coal_criteria, :]
    eia_fuel_receipts_df = eia_fuel_receipts_df.merge(
        eia7a_df[['msha_id', 'coal_supply_region']],
        how='left',
        left_on='coalmine_msha_id',
        right_on='msha_id',
    )
    eia_fuel_receipts_df.drop(columns=['msha_id'], inplace=True)
    eia_fuel_receipts_df.rename(
        columns={'coal_supply_region': 'eia_coal_supply_region'}, inplace=True)
    state_region_map = pd.read_csv(data_dir + '/coal_state_to_basin.csv')
    eia_fuel_receipts_df = eia_fuel_receipts_df.merge(
        state_region_map[['state', 'basin1', 'basin2']],
        left_on='coalmine_state',
        right_on='state',
        how='left')
    eia_fuel_receipts_df.drop(columns=['state'], inplace=True)
    eia_netl_basin = pd.read_csv(data_dir + '/eia_to_netl_basin.csv')
    eia_fuel_receipts_df = eia_fuel_receipts_df.merge(
        eia_netl_basin,
        how='left',
        left_on='eia_coal_supply_region',
        right_on='eia_basin')
    eia_fuel_receipts_df.drop(columns=['eia_basin'], inplace=True)
    gulf_lignite = (
        (eia_fuel_receipts_df['energy_source'] == 'LIG') &
        (eia_fuel_receipts_df['eia_coal_supply_region'] == 'Interior'))
    eia_fuel_receipts_df.loc[gulf_lignite, ['netl_basin']] = 'Gulf Lignite'
    lignite = ((eia_fuel_receipts_df['energy_source'] == 'LIG') &
               (eia_fuel_receipts_df['eia_coal_supply_region'] == 'Western'))
    eia_fuel_receipts_df.loc[lignite, ['netl_basin']] = 'Lignite'
    netl_na = (eia_fuel_receipts_df['netl_basin'].isna())
    minimerge = pd.merge(left=eia_fuel_receipts_df,
                         right=eia_netl_basin,
                         left_on='basin1',
                         right_on='eia_basin',
                         how='left')
    eia_fuel_receipts_df.loc[netl_na,
                             'netl_basin'] = (minimerge.loc[netl_na,
                                                            'netl_basin_y'])
    eia_fuel_receipts_df[['netl_basin', 'energy_source', 'coalmine_type']]
    eia_fuel_receipts_df.dropna(
        subset=['netl_basin', 'energy_source', 'coalmine_type'], inplace=True)
    eia_fuel_receipts_df['coal_source_code'] = eia_fuel_receipts_df.apply(
        _coal_code, axis=1)
    eia_fuel_receipts_df['heat_input'] = eia_fuel_receipts_df[
        'quantity'] * eia_fuel_receipts_df['average_heat_content']
    eia_fuel_receipts_df.drop_duplicates(inplace=True)
    final_df = eia_fuel_receipts_df.groupby(
        ['plant_id', 'coal_source_code'], as_index=False)['quantity',
                                                          'heat_input'].sum()
    return final_df
def generate_upstream_coal_map(year):
    from electricitylci.globals import STATE_ABBREV
    from electricitylci.eia923_generation import eia923_generation_and_fuel
    eia_fuel_receipts_df = read_eia923_fuel_receipts(year)
    expected_7a_folder = join(data_dir, 'f7a_{}'.format(year))
    if not os.path.exists(expected_7a_folder):
        eia_7a_download(year, expected_7a_folder)
        eia7a_path, eia7a_name = find_file_in_folder(
            folder_path=expected_7a_folder,
            file_pattern_match=['coalpublic'],
            return_name=True)
    else:
        eia7a_path, eia7a_name = find_file_in_folder(
            folder_path=expected_7a_folder,
            file_pattern_match=['coalpublic'],
            return_name=True)
    eia7a_df = pd.read_excel(eia7a_path,
                             sheet_name='Hist_Coal_Prod',
                             skiprows=3)
    _clean_columns(eia7a_df)
    coal_criteria = eia_fuel_receipts_df['fuel_group'] == 'Coal'
    eia_fuel_receipts_df = eia_fuel_receipts_df.loc[coal_criteria, :]
    eia_fuel_receipts_df = eia_fuel_receipts_df.merge(
        eia7a_df[['msha_id', 'coal_supply_region']],
        how='left',
        left_on='coalmine_msha_id',
        right_on='msha_id',
    )
    eia_fuel_receipts_df.drop(columns=['msha_id'], inplace=True)
    eia_fuel_receipts_df.rename(
        columns={'coal_supply_region': 'eia_coal_supply_region'}, inplace=True)
    eia_fuel_receipts_na = eia_fuel_receipts_df.loc[
        eia_fuel_receipts_df["eia_coal_supply_region"].isnull(), :]
    eia_fuel_receipts_good = eia_fuel_receipts_df.loc[
        ~eia_fuel_receipts_df["eia_coal_supply_region"].isnull(), :]
    county_basin = eia7a_df.groupby(
        by=["mine_state", "mine_county", "coal_supply_region"],
        as_index=False)["production_short_tons"].count()
    county_basin["mine_state"] = county_basin["mine_state"].str.replace(
        r" \(.*\)", "")
    county_basin["mine_state_abv"] = county_basin["mine_state"].str.lower(
    ).map(STATE_ABBREV).str.upper()
    county_basin["mine_county"] = county_basin["mine_county"].str.lower()
    fips_codes = pd.read_csv(f"{data_dir}/fips_codes.csv", )
    _clean_columns(fips_codes)
    fips_codes["gu_name"] = fips_codes["gu_name"].str.lower()
    fips_codes["county_fips_code"] = fips_codes["county_fips_code"].astype(
        str).str.replace(".0", "")
    county_basin = county_basin.merge(
        right=fips_codes[["state_abbreviation", "county_fips_code",
                          "gu_name"]],
        left_on=["mine_state_abv", "mine_county"],
        right_on=["state_abbreviation", "gu_name"],
        how="left")
    county_basin.drop_duplicates(subset=["mine_state_abv", "county_fips_code"],
                                 inplace=True)
    eia_fuel_receipts_na = eia_fuel_receipts_na.merge(
        right=county_basin[[
            "mine_state_abv", "county_fips_code", "coal_supply_region"
        ]],
        left_on=["coalmine_state", "coalmine_county"],
        right_on=["mine_state_abv", "county_fips_code"],
        how="left")
    eia_fuel_receipts_na["eia_coal_supply_region"] = eia_fuel_receipts_na[
        "coal_supply_region"]
    eia_fuel_receipts_made_good = eia_fuel_receipts_na.loc[
        ~eia_fuel_receipts_na["eia_coal_supply_region"].isnull(
        ), :].reset_index(drop=True)
    eia_fuel_receipts_na = eia_fuel_receipts_na.loc[
        eia_fuel_receipts_na["eia_coal_supply_region"].isnull(
        ), :].reset_index(drop=True)
    eia_fuel_receipts_made_good.drop(
        columns=["mine_state_abv", "county_fips_code", "coal_supply_region"],
        inplace=True)
    eia_fuel_receipts_good = pd.concat(
        [eia_fuel_receipts_good, eia_fuel_receipts_made_good],
        ignore_index=True,
        sort=False)
    eia_netl_basin = pd.read_csv(data_dir + '/eia_to_netl_basin.csv')
    eia_fuel_receipts_good = eia_fuel_receipts_good.merge(
        eia_netl_basin,
        left_on='eia_coal_supply_region',
        right_on="eia_basin",
        how="left").reset_index(drop=True)
    state_region_map = pd.read_csv(data_dir + '/coal_state_to_basin.csv')
    eia_fuel_receipts_na = eia_fuel_receipts_na.merge(
        state_region_map[['state', 'basin1', 'basin2']],
        left_on='coalmine_state',
        right_on='state',
        how='left')
    eia_fuel_receipts_na.drop(columns=['state'], inplace=True)
    eia_fuel_receipts_na = eia_fuel_receipts_na.merge(
        eia_netl_basin,
        how='left',
        left_on='eia_coal_supply_region',
        right_on='eia_basin')
    eia_fuel_receipts_na.drop(columns=['eia_basin'], inplace=True)
    netl_na = (eia_fuel_receipts_na['netl_basin'].isna())
    minimerge = pd.merge(left=eia_fuel_receipts_na,
                         right=eia_netl_basin,
                         left_on='basin1',
                         right_on='eia_basin',
                         how='left')
    minimerge.drop(columns=[
        "mine_state_abv", "county_fips_code", "coal_supply_region", "basin1",
        "basin2", "netl_basin_x", "eia_basin"
    ],
                   inplace=True)
    minimerge.rename(columns={"netl_basin_y": "netl_basin"}, inplace=True)
    eia_fuel_receipts_good = pd.concat([eia_fuel_receipts_good, minimerge],
                                       ignore_index=True,
                                       sort=False)

    gulf_lignite = (
        (eia_fuel_receipts_good['energy_source'] == 'LIG') &
        (eia_fuel_receipts_good['eia_coal_supply_region'] == 'Interior'))
    eia_fuel_receipts_good.loc[gulf_lignite, ['netl_basin']] = 'Gulf Lignite'
    lignite = ((eia_fuel_receipts_good['energy_source'] == 'LIG') &
               (eia_fuel_receipts_good['eia_coal_supply_region'] == 'Western'))
    eia_fuel_receipts_good.loc[lignite, ['netl_basin']] = 'Lignite'

    #    eia_fuel_receipts_df.loc[netl_na,'netl_basin']=(
    #            minimerge.loc[netl_na,'netl_basin_y'])
    #    eia_fuel_receipts_df[['netl_basin','energy_source','coalmine_type']]
    eia_fuel_receipts_good.dropna(
        subset=['netl_basin', 'energy_source', 'coalmine_type'], inplace=True)
    eia_fuel_receipts_good['coal_source_code'] = eia_fuel_receipts_good.apply(
        _coal_code, axis=1)
    eia_fuel_receipts_good['heat_input'] = eia_fuel_receipts_good[
        'quantity'] * eia_fuel_receipts_good['average_heat_content']
    eia_fuel_receipts_good.drop_duplicates(inplace=True)
    eia_fuel_receipts_good["coal_type"] = eia_fuel_receipts_good[
        "energy_source"].map(coal_type_codes)
    final_df = eia_fuel_receipts_good.groupby(
        ['plant_id', 'coal_type', 'coal_source_code'], )[[
            'quantity', 'heat_input'
        ]].sum()
    final_df["mass_fraction"] = final_df[["quantity"]].div(
        final_df[["quantity"]].groupby(level=[0, 1]).transform('sum'))
    final_df["energy_fraction"] = final_df[["heat_input"]].div(
        final_df[["heat_input"]].groupby(level=[0, 1]).transform('sum'))
    final_df["mass_fraction_total"] = final_df[["quantity"]].div(
        final_df[["quantity"]].groupby(level=[0]).transform('sum'))
    final_df["energy_fraction_total"] = final_df[["heat_input"]].div(
        final_df[["heat_input"]].groupby(level=[0]).transform('sum'))
    eia_923_gen_fuel = eia923_generation_and_fuel(year)
    eia_923_gen_fuel = eia_923_gen_fuel[[
        "plant_id", "reported_fuel_type_code",
        "electric_fuel_consumption_quantity", "elec_fuel_consumption_mmbtu"
    ]]
    eia_923_gen_fuel["coal_type"] = eia_923_gen_fuel[
        "reported_fuel_type_code"].map(coal_type_codes)
    eia_923_gen_fuel["plant_id"] = eia_923_gen_fuel["plant_id"].astype(int)
    eia_923_grouped = eia_923_gen_fuel.groupby(by=["plant_id", "coal_type"])[[
        "electric_fuel_consumption_quantity", "elec_fuel_consumption_mmbtu"
    ]].sum()
    final_df.reset_index(inplace=True)
    eia_923_grouped.reset_index(inplace=True)
    final_df = final_df.merge(eia_923_grouped,
                              on=["plant_id", "coal_type"],
                              how="left")
    rc_coal = eia_923_grouped.loc[eia_923_grouped["coal_type"] ==
                                  "RC", :].set_index("plant_id")
    final_df["rc_coal_quantity"] = final_df["plant_id"].map(
        rc_coal["electric_fuel_consumption_quantity"])
    final_df["rc_coal_mmbtu"] = final_df["plant_id"].map(
        rc_coal["elec_fuel_consumption_mmbtu"])
    final_df["new_quantity"] = final_df["mass_fraction"] * final_df[
        "electric_fuel_consumption_quantity"]
    final_df["new_heat_input"] = final_df["energy_fraction"] * final_df[
        "elec_fuel_consumption_mmbtu"]
    final_df.loc[~final_df["rc_coal_quantity"].isna(),
                 "new_quantity"] = final_df["new_quantity"] + final_df[
                     "mass_fraction_total"] * final_df["rc_coal_quantity"]
    final_df.loc[~final_df["rc_coal_mmbtu"].isna(),
                 "new_heat_input"] = final_df["new_heat_input"] + final_df[
                     "energy_fraction"] * final_df["rc_coal_mmbtu"]
    final_df = final_df[[
        "plant_id", "coal_source_code", "new_quantity", "new_heat_input"
    ]]
    final_df = final_df.rename(columns={
        "new_quantity": "quantity",
        "new_heat_input": "heat_input"
    })
    return final_df