def eia860_boiler_info_design(year): expected_860_folder = join(data_dir, "eia860_{}".format(year)) if not os.path.exists(expected_860_folder): print("Downloading EIA-860 files") eia860_download(year=year, save_path=expected_860_folder) eia860_path, eia860_name = find_file_in_folder( folder_path=expected_860_folder, file_pattern_match=["6_2_EnviroEquip", "xlsx"], return_name=True, ) eia = load_eia860_excel(eia860_path, "Boiler Info & Design Parameters", 1) # Save as csv for easier access in future csv_fn = eia860_name.split(".")[0] + "_boiler_info.csv" csv_path = join(expected_860_folder, csv_fn) eia.to_csv(csv_path, index=False) else: all_files = os.listdir(expected_860_folder) # Check for both csv and year<_Final> in case multiple years # or other csv files exist csv_file = [ f for f in all_files if "_boiler_info.csv" in f and "6_2_EnviroEquip_Y{}".format(year) in f ] # Read and return the existing csv file if it exists if csv_file: print("Loading {} EIA-860 plant data from csv file".format(year)) fn = csv_file[0] csv_path = join(expected_860_folder, fn) eia = pd.read_csv(csv_path, dtype={"Plant Id": str}) else: print("Loading data from previously downloaded excel file") eia860_path, eia860_name = find_file_in_folder( folder_path=expected_860_folder, file_pattern_match=["6_2_EnviroEquip", "xlsx"], return_name=True, ) # # would be more elegent with glob but this works to identify the # # Schedule_2_3_4_5 file # for f in all_files: # if '2___Plant' in f: # plant_file = f # eia860_path = join(expected_860_folder, plant_file) eia = load_eia860_excel(eia860_path, "Boiler Info & Design Parameters", 1) csv_fn = eia860_name.split(".")[0] + "_boiler_info.csv" csv_path = join(expected_860_folder, csv_fn) eia.to_csv(csv_path, index=False) eia = _clean_columns(eia) return eia
def eia923_sched8_aec(year): expected_923_folder = join(data_dir, "f923_{}".format(year)) if not os.path.exists(expected_923_folder): print("Downloading EIA-923 files") eia923_download(year=year, save_path=expected_923_folder) eia923_path, eia923_name = find_file_in_folder( folder_path=expected_923_folder, file_pattern_match=["Schedule_8", "xlsx"], return_name=True, ) # Save as csv for easier access in future csv_fn = eia923_name.split(".")[0] + "page_8c.csv" csv_path = join(expected_923_folder, csv_fn) eia = load_eia923_excel(expected_923_folder, page="8c") eia.to_csv(csv_path, index=False) else: all_files = os.listdir(expected_923_folder) # Check for both csv and year<_Final> in case multiple years # or other csv files exist csv_file = [ f for f in all_files if ".csv" in f and "{}_Final".format(year) in f and "page_8c" in f ] # Read and return the existing csv file if it exists if csv_file: print("Loading {} EIA-923 data from csv file".format(year)) fn = csv_file[0] csv_path = join(expected_923_folder, fn) eia = pd.read_csv( csv_path, dtype={"Plant Id": str, "YEAR": str, "NAICS Code": str}, low_memory=False, ) else: eia923_path, eia923_name = find_file_in_folder( folder_path=expected_923_folder, file_pattern_match=["Schedule_8", "xlsx"], return_name=True, ) # # would be more elegent with glob but this works to identify the # # Schedule_2_3_4_5 file # for f in all_files: # if '2_3_4_5' in f: # gen_file = f # eia923_path = join(expected_923_folder, gen_file) eia = load_eia923_excel(eia923_path, page="8c") csv_fn = eia923_name.split(".")[0] + "_page_8c.csv" csv_path = join(expected_923_folder, csv_fn) eia.to_csv(csv_path, index=False) eia = _clean_columns(eia) return eia
def read_eia923_fuel_receipts(year): expected_923_folder = join(data_dir, 'f923_{}'.format(year)) if not os.path.exists(expected_923_folder): print('Downloading EIA-923 files') eia923_download(year=year, save_path=expected_923_folder) eia923_path, eia923_name = find_file_in_folder( folder_path=expected_923_folder, file_pattern_match=['2_3_4_5'], return_name=True) eia_fuel_receipts_df = pd.read_excel( eia923_path, sheet_name='Page 5 Fuel Receipts and Costs', skiprows=4, usecols="A:E,H:M,P:Q") csv_fn = eia923_name.split('.')[0] + '_page_5_reduced.csv' csv_path = join(expected_923_folder, csv_fn) eia_fuel_receipts_df.to_csv(csv_path, index=False) else: # Check for both csv and year<_Final> in case multiple years # or other csv files exist print('Loading data from previously downloaded excel file') all_files = os.listdir(expected_923_folder) # Check for both csv and year<_Final> in case multiple years # or other csv files exist csv_file = [ f for f in all_files if '.csv' in f and '_page_5_reduced.csv' in f ] if csv_file: csv_path = os.path.join(expected_923_folder, csv_file[0]) eia_fuel_receipts_df = pd.read_csv(csv_path) else: eia923_path, eia923_name = find_file_in_folder( folder_path=expected_923_folder, file_pattern_match=['2_3_4_5'], return_name=True) eia_fuel_receipts_df = pd.read_excel( eia923_path, sheet_name='Page 5 Fuel Receipts and Costs', skiprows=4, usecols="A:E,H:M,P:Q") csv_fn = eia923_name.split('.')[0] + '_page_5_reduced.csv' csv_path = join(expected_923_folder, csv_fn) eia_fuel_receipts_df.to_csv(csv_path, index=False) _clean_columns(eia_fuel_receipts_df) return eia_fuel_receipts_df
def eia923_download_extract( year, group_cols=[ "Plant Id", "Plant Name", "State", "NAICS Code", "Reported Prime Mover", "Reported Fuel Type Code", "YEAR", ], ): """ Download (if necessary) and extract a single year of generation/fuel consumption data from EIA-923. Data are grouped by plant level Parameters ---------- year : int or str Year of data to download/extract group_cols : list, optional The columns from EIA923 generation and fuel sheet to use when grouping generation and fuel consumption data. """ expected_923_folder = join(data_dir, "f923_{}".format(year)) if not os.path.exists(expected_923_folder): print("Downloading EIA-923 files") eia923_download(year=year, save_path=expected_923_folder) eia923_path, eia923_name = find_file_in_folder( folder_path=expected_923_folder, file_pattern_match=["2_3_4_5"], return_name=True, ) # eia923_files = os.listdir(expected_923_folder) # # would be more elegent with glob but this works to identify the # # Schedule_2_3_4_5 file # for f in eia923_files: # if '2_3_4_5' in f: # gen_file = f # eia923_path = join(expected_923_folder, gen_file) # colstokeep = group_cols + sum_cols eia = load_eia923_excel(eia923_path) # Save as csv for easier access in future csv_fn = eia923_name.split(".")[0] + "page_1.csv" csv_path = join(expected_923_folder, csv_fn) eia.to_csv(csv_path, index=False) else: all_files = os.listdir(expected_923_folder) # Check for both csv and year<_Final> in case multiple years # or other csv files exist csv_file = [ f for f in all_files if ".csv" in f and "{}_Final".format(year) in f and "page_1" in f ] # Read and return the existing csv file if it exists if csv_file: print("Loading {} EIA-923 data from csv file".format(year)) fn = csv_file[0] csv_path = join(expected_923_folder, fn) eia = pd.read_csv( csv_path, dtype={ "Plant Id": str, "YEAR": str, "NAICS Code": str }, ) else: print( "Loading data from previously downloaded excel file,", " how did the csv file get deleted?", ) eia923_path, eia923_name = find_file_in_folder( folder_path=expected_923_folder, file_pattern_match=["2_3_4_5", "xlsx"], return_name=True, ) # # would be more elegent with glob but this works to identify the # # Schedule_2_3_4_5 file # for f in all_files: # if '2_3_4_5' in f: # gen_file = f # eia923_path = join(expected_923_folder, gen_file) eia = load_eia923_excel(eia923_path) csv_fn = eia923_name.split(".")[0] + "_page_1.csv" csv_path = join(expected_923_folder, csv_fn) eia.to_csv(csv_path, index=False) # EIA_923 = eia # Grouping similar facilities together. # group_cols = ['Plant Id', 'Plant Name', 'State', 'YEAR'] sum_cols = [ "Total Fuel Consumption MMBtu", "Net Generation (Megawatthours)", ] EIA_923_generation_data = eia.groupby(group_cols, as_index=False)[sum_cols].sum() return EIA_923_generation_data
def eia860_balancing_authority(year, regional_aggregation=None): expected_860_folder = join(data_dir, "eia860_{}".format(year)) if not os.path.exists(expected_860_folder): print("Downloading EIA-860 files") eia860_download(year=year, save_path=expected_860_folder) eia860_path, eia860_name = find_file_in_folder( folder_path=expected_860_folder, file_pattern_match=["2___Plant"], return_name=True, ) # eia860_files = os.listdir(expected_860_folder) # # would be more elegent with glob but this works to identify the # # Schedule_2_3_4_5 file # for f in eia860_files: # if '2___Plant' in f: # plant_file = f # eia860_path = join(expected_860_folder, plant_file) # colstokeep = group_cols + sum_cols eia = load_eia860_excel(eia860_path) # Save as csv for easier access in future csv_fn = eia860_name.split(".")[0] + ".csv" csv_path = join(expected_860_folder, csv_fn) eia.to_csv(csv_path, index=False) else: all_files = os.listdir(expected_860_folder) # Check for both csv and year<_Final> in case multiple years # or other csv files exist csv_file = [ f for f in all_files if ".csv" in f and "Plant_Y{}".format(year) in f ] # Read and return the existing csv file if it exists if csv_file: print("Loading {} EIA-860 plant data from csv file".format(year)) fn = csv_file[0] csv_path = join(expected_860_folder, fn) eia = pd.read_csv(csv_path, dtype={"Plant Id": str}, low_memory=False) else: print("Loading data from previously downloaded excel file") eia860_path, eia860_name = find_file_in_folder( folder_path=expected_860_folder, file_pattern_match=["2___Plant"], return_name=True, ) # # would be more elegent with glob but this works to identify the # # Schedule_2_3_4_5 file # for f in all_files: # if '2___Plant' in f: # plant_file = f # eia860_path = join(expected_860_folder, plant_file) eia = load_eia860_excel(eia860_path) csv_fn = eia860_name.split(".")[0] + ".csv" csv_path = join(expected_860_folder, csv_fn) eia.to_csv(csv_path, index=False) ba_cols = [ "Plant Id", "State", "NERC Region", "Balancing Authority Code", "Balancing Authority Name", ] eia_plant_ba_match = eia.loc[:, ba_cols].drop_duplicates() # Map the balancing authority to a larger region (e.g. FERC or EIA) if regional_aggregation: region_map = create_ba_region_map(region_col=regional_aggregation) eia_plant_ba_match[regional_aggregation] = eia_plant_ba_match[ "Balancing Authority Code"].map(region_map) return eia_plant_ba_match
def generate_upstream_coal_map(year): eia_fuel_receipts_df = read_eia923_fuel_receipts(year) expected_7a_folder = join(data_dir, 'f7a_{}'.format(year)) if not os.path.exists(expected_7a_folder): eia_7a_download(year, expected_7a_folder) eia7a_path, eia7a_name = find_file_in_folder( folder_path=expected_7a_folder, file_pattern_match=['coalpublic'], return_name=True) else: eia7a_path, eia7a_name = find_file_in_folder( folder_path=expected_7a_folder, file_pattern_match=['coalpublic'], return_name=True) eia7a_df = pd.read_excel(eia7a_path, sheet_name='Hist_Coal_Prod', skiprows=3) _clean_columns(eia7a_df) coal_criteria = eia_fuel_receipts_df['fuel_group'] == 'Coal' eia_fuel_receipts_df = eia_fuel_receipts_df.loc[coal_criteria, :] eia_fuel_receipts_df = eia_fuel_receipts_df.merge( eia7a_df[['msha_id', 'coal_supply_region']], how='left', left_on='coalmine_msha_id', right_on='msha_id', ) eia_fuel_receipts_df.drop(columns=['msha_id'], inplace=True) eia_fuel_receipts_df.rename( columns={'coal_supply_region': 'eia_coal_supply_region'}, inplace=True) state_region_map = pd.read_csv(data_dir + '/coal_state_to_basin.csv') eia_fuel_receipts_df = eia_fuel_receipts_df.merge( state_region_map[['state', 'basin1', 'basin2']], left_on='coalmine_state', right_on='state', how='left') eia_fuel_receipts_df.drop(columns=['state'], inplace=True) eia_netl_basin = pd.read_csv(data_dir + '/eia_to_netl_basin.csv') eia_fuel_receipts_df = eia_fuel_receipts_df.merge( eia_netl_basin, how='left', left_on='eia_coal_supply_region', right_on='eia_basin') eia_fuel_receipts_df.drop(columns=['eia_basin'], inplace=True) gulf_lignite = ( (eia_fuel_receipts_df['energy_source'] == 'LIG') & (eia_fuel_receipts_df['eia_coal_supply_region'] == 'Interior')) eia_fuel_receipts_df.loc[gulf_lignite, ['netl_basin']] = 'Gulf Lignite' lignite = ((eia_fuel_receipts_df['energy_source'] == 'LIG') & (eia_fuel_receipts_df['eia_coal_supply_region'] == 'Western')) eia_fuel_receipts_df.loc[lignite, ['netl_basin']] = 'Lignite' netl_na = (eia_fuel_receipts_df['netl_basin'].isna()) minimerge = pd.merge(left=eia_fuel_receipts_df, right=eia_netl_basin, left_on='basin1', right_on='eia_basin', how='left') eia_fuel_receipts_df.loc[netl_na, 'netl_basin'] = (minimerge.loc[netl_na, 'netl_basin_y']) eia_fuel_receipts_df[['netl_basin', 'energy_source', 'coalmine_type']] eia_fuel_receipts_df.dropna( subset=['netl_basin', 'energy_source', 'coalmine_type'], inplace=True) eia_fuel_receipts_df['coal_source_code'] = eia_fuel_receipts_df.apply( _coal_code, axis=1) eia_fuel_receipts_df['heat_input'] = eia_fuel_receipts_df[ 'quantity'] * eia_fuel_receipts_df['average_heat_content'] eia_fuel_receipts_df.drop_duplicates(inplace=True) final_df = eia_fuel_receipts_df.groupby( ['plant_id', 'coal_source_code'], as_index=False)['quantity', 'heat_input'].sum() return final_df
def generate_upstream_coal_map(year): from electricitylci.globals import STATE_ABBREV from electricitylci.eia923_generation import eia923_generation_and_fuel eia_fuel_receipts_df = read_eia923_fuel_receipts(year) expected_7a_folder = join(data_dir, 'f7a_{}'.format(year)) if not os.path.exists(expected_7a_folder): eia_7a_download(year, expected_7a_folder) eia7a_path, eia7a_name = find_file_in_folder( folder_path=expected_7a_folder, file_pattern_match=['coalpublic'], return_name=True) else: eia7a_path, eia7a_name = find_file_in_folder( folder_path=expected_7a_folder, file_pattern_match=['coalpublic'], return_name=True) eia7a_df = pd.read_excel(eia7a_path, sheet_name='Hist_Coal_Prod', skiprows=3) _clean_columns(eia7a_df) coal_criteria = eia_fuel_receipts_df['fuel_group'] == 'Coal' eia_fuel_receipts_df = eia_fuel_receipts_df.loc[coal_criteria, :] eia_fuel_receipts_df = eia_fuel_receipts_df.merge( eia7a_df[['msha_id', 'coal_supply_region']], how='left', left_on='coalmine_msha_id', right_on='msha_id', ) eia_fuel_receipts_df.drop(columns=['msha_id'], inplace=True) eia_fuel_receipts_df.rename( columns={'coal_supply_region': 'eia_coal_supply_region'}, inplace=True) eia_fuel_receipts_na = eia_fuel_receipts_df.loc[ eia_fuel_receipts_df["eia_coal_supply_region"].isnull(), :] eia_fuel_receipts_good = eia_fuel_receipts_df.loc[ ~eia_fuel_receipts_df["eia_coal_supply_region"].isnull(), :] county_basin = eia7a_df.groupby( by=["mine_state", "mine_county", "coal_supply_region"], as_index=False)["production_short_tons"].count() county_basin["mine_state"] = county_basin["mine_state"].str.replace( r" \(.*\)", "") county_basin["mine_state_abv"] = county_basin["mine_state"].str.lower( ).map(STATE_ABBREV).str.upper() county_basin["mine_county"] = county_basin["mine_county"].str.lower() fips_codes = pd.read_csv(f"{data_dir}/fips_codes.csv", ) _clean_columns(fips_codes) fips_codes["gu_name"] = fips_codes["gu_name"].str.lower() fips_codes["county_fips_code"] = fips_codes["county_fips_code"].astype( str).str.replace(".0", "") county_basin = county_basin.merge( right=fips_codes[["state_abbreviation", "county_fips_code", "gu_name"]], left_on=["mine_state_abv", "mine_county"], right_on=["state_abbreviation", "gu_name"], how="left") county_basin.drop_duplicates(subset=["mine_state_abv", "county_fips_code"], inplace=True) eia_fuel_receipts_na = eia_fuel_receipts_na.merge( right=county_basin[[ "mine_state_abv", "county_fips_code", "coal_supply_region" ]], left_on=["coalmine_state", "coalmine_county"], right_on=["mine_state_abv", "county_fips_code"], how="left") eia_fuel_receipts_na["eia_coal_supply_region"] = eia_fuel_receipts_na[ "coal_supply_region"] eia_fuel_receipts_made_good = eia_fuel_receipts_na.loc[ ~eia_fuel_receipts_na["eia_coal_supply_region"].isnull( ), :].reset_index(drop=True) eia_fuel_receipts_na = eia_fuel_receipts_na.loc[ eia_fuel_receipts_na["eia_coal_supply_region"].isnull( ), :].reset_index(drop=True) eia_fuel_receipts_made_good.drop( columns=["mine_state_abv", "county_fips_code", "coal_supply_region"], inplace=True) eia_fuel_receipts_good = pd.concat( [eia_fuel_receipts_good, eia_fuel_receipts_made_good], ignore_index=True, sort=False) eia_netl_basin = pd.read_csv(data_dir + '/eia_to_netl_basin.csv') eia_fuel_receipts_good = eia_fuel_receipts_good.merge( eia_netl_basin, left_on='eia_coal_supply_region', right_on="eia_basin", how="left").reset_index(drop=True) state_region_map = pd.read_csv(data_dir + '/coal_state_to_basin.csv') eia_fuel_receipts_na = eia_fuel_receipts_na.merge( state_region_map[['state', 'basin1', 'basin2']], left_on='coalmine_state', right_on='state', how='left') eia_fuel_receipts_na.drop(columns=['state'], inplace=True) eia_fuel_receipts_na = eia_fuel_receipts_na.merge( eia_netl_basin, how='left', left_on='eia_coal_supply_region', right_on='eia_basin') eia_fuel_receipts_na.drop(columns=['eia_basin'], inplace=True) netl_na = (eia_fuel_receipts_na['netl_basin'].isna()) minimerge = pd.merge(left=eia_fuel_receipts_na, right=eia_netl_basin, left_on='basin1', right_on='eia_basin', how='left') minimerge.drop(columns=[ "mine_state_abv", "county_fips_code", "coal_supply_region", "basin1", "basin2", "netl_basin_x", "eia_basin" ], inplace=True) minimerge.rename(columns={"netl_basin_y": "netl_basin"}, inplace=True) eia_fuel_receipts_good = pd.concat([eia_fuel_receipts_good, minimerge], ignore_index=True, sort=False) gulf_lignite = ( (eia_fuel_receipts_good['energy_source'] == 'LIG') & (eia_fuel_receipts_good['eia_coal_supply_region'] == 'Interior')) eia_fuel_receipts_good.loc[gulf_lignite, ['netl_basin']] = 'Gulf Lignite' lignite = ((eia_fuel_receipts_good['energy_source'] == 'LIG') & (eia_fuel_receipts_good['eia_coal_supply_region'] == 'Western')) eia_fuel_receipts_good.loc[lignite, ['netl_basin']] = 'Lignite' # eia_fuel_receipts_df.loc[netl_na,'netl_basin']=( # minimerge.loc[netl_na,'netl_basin_y']) # eia_fuel_receipts_df[['netl_basin','energy_source','coalmine_type']] eia_fuel_receipts_good.dropna( subset=['netl_basin', 'energy_source', 'coalmine_type'], inplace=True) eia_fuel_receipts_good['coal_source_code'] = eia_fuel_receipts_good.apply( _coal_code, axis=1) eia_fuel_receipts_good['heat_input'] = eia_fuel_receipts_good[ 'quantity'] * eia_fuel_receipts_good['average_heat_content'] eia_fuel_receipts_good.drop_duplicates(inplace=True) eia_fuel_receipts_good["coal_type"] = eia_fuel_receipts_good[ "energy_source"].map(coal_type_codes) final_df = eia_fuel_receipts_good.groupby( ['plant_id', 'coal_type', 'coal_source_code'], )[[ 'quantity', 'heat_input' ]].sum() final_df["mass_fraction"] = final_df[["quantity"]].div( final_df[["quantity"]].groupby(level=[0, 1]).transform('sum')) final_df["energy_fraction"] = final_df[["heat_input"]].div( final_df[["heat_input"]].groupby(level=[0, 1]).transform('sum')) final_df["mass_fraction_total"] = final_df[["quantity"]].div( final_df[["quantity"]].groupby(level=[0]).transform('sum')) final_df["energy_fraction_total"] = final_df[["heat_input"]].div( final_df[["heat_input"]].groupby(level=[0]).transform('sum')) eia_923_gen_fuel = eia923_generation_and_fuel(year) eia_923_gen_fuel = eia_923_gen_fuel[[ "plant_id", "reported_fuel_type_code", "electric_fuel_consumption_quantity", "elec_fuel_consumption_mmbtu" ]] eia_923_gen_fuel["coal_type"] = eia_923_gen_fuel[ "reported_fuel_type_code"].map(coal_type_codes) eia_923_gen_fuel["plant_id"] = eia_923_gen_fuel["plant_id"].astype(int) eia_923_grouped = eia_923_gen_fuel.groupby(by=["plant_id", "coal_type"])[[ "electric_fuel_consumption_quantity", "elec_fuel_consumption_mmbtu" ]].sum() final_df.reset_index(inplace=True) eia_923_grouped.reset_index(inplace=True) final_df = final_df.merge(eia_923_grouped, on=["plant_id", "coal_type"], how="left") rc_coal = eia_923_grouped.loc[eia_923_grouped["coal_type"] == "RC", :].set_index("plant_id") final_df["rc_coal_quantity"] = final_df["plant_id"].map( rc_coal["electric_fuel_consumption_quantity"]) final_df["rc_coal_mmbtu"] = final_df["plant_id"].map( rc_coal["elec_fuel_consumption_mmbtu"]) final_df["new_quantity"] = final_df["mass_fraction"] * final_df[ "electric_fuel_consumption_quantity"] final_df["new_heat_input"] = final_df["energy_fraction"] * final_df[ "elec_fuel_consumption_mmbtu"] final_df.loc[~final_df["rc_coal_quantity"].isna(), "new_quantity"] = final_df["new_quantity"] + final_df[ "mass_fraction_total"] * final_df["rc_coal_quantity"] final_df.loc[~final_df["rc_coal_mmbtu"].isna(), "new_heat_input"] = final_df["new_heat_input"] + final_df[ "energy_fraction"] * final_df["rc_coal_mmbtu"] final_df = final_df[[ "plant_id", "coal_source_code", "new_quantity", "new_heat_input" ]] final_df = final_df.rename(columns={ "new_quantity": "quantity", "new_heat_input": "heat_input" }) return final_df