Ejemplo n.º 1
0
def eia_facility_fuel_region(year):
    primary_fuel = eia923_primary_fuel(year=year)
    ba_match = eia860_balancing_authority(year)
    primary_fuel["Plant Id"] = primary_fuel["Plant Id"].astype(int)
    ba_match["Plant Id"] = ba_match["Plant Id"].astype(int)
    combined = primary_fuel.merge(ba_match, on='Plant Id')
    combined['primary fuel percent gen'] = (
        combined['primary fuel percent gen'] / 100)

    combined.rename(columns={
        'primary fuel percent gen':
        'PercentGenerationfromDesignatedFuelCategory',
        'Plant Id': 'FacilityID',
        'fuel category': 'FuelCategory',
        'NERC Region': 'NERC',
    },
                    inplace=True)

    return combined
Ejemplo n.º 2
0
def build_generation_data(egrid_facilities_to_include=None,
                          generation_years=None):
    """
    Build a dataset of facility-level generation using EIA923. This
    function will apply filters for positive generation, generation
    efficiency within a given range, and a minimum percent of generation
    from the primary fuel (if set in the config file). The returned
    dataframe also includes the balancing authority for every power
    plant.

    Parameters
    ----------
    egrid_facilities_to_include : list, optional
        List of plant codes to include (default is None, which builds a list)
    generation_years : list, optional
        Years of generation data to include in the output (default is None,
        which builds a list from the inventories of interest and eia_gen_year
        parameters)

    Returns
    ----------
    DataFrame

    Dataframe columns include:
    ['FacilityID', 'Electricity', 'Year']
    """

    if not generation_years:
        # Use the years from inventories of interest
        generation_years = set(
            list(inventories_of_interest.values()) + [eia_gen_year])

    df_list = []
    for year in generation_years:
        gen_fuel_data = eia923_download_extract(year)
        primary_fuel = eia923_primary_fuel(gen_fuel_data)
        gen_efficiency = calculate_plant_efficiency(gen_fuel_data)

        final_gen_df = gen_efficiency.merge(primary_fuel, on="Plant Id")
        if not egrid_facilities_to_include:
            if include_only_egrid_facilities_with_positive_generation:
                final_gen_df = final_gen_df.loc[
                    final_gen_df["Net Generation (Megawatthours)"] >= 0, :]
            if filter_on_efficiency:
                final_gen_df = efficiency_filter(final_gen_df)
            if filter_on_min_plant_percent_generation_from_primary_fuel and not keep_mixed_plant_category:
                final_gen_df = final_gen_df.loc[
                    final_gen_df["primary fuel percent gen"] >=
                    min_plant_percent_generation_from_primary_fuel_category, :,
                ]
            # if filter_non_egrid_emission_on_NAICS:
            #     # Check with Wes to see what the filter here is supposed to be
            #     final_gen_df = final_gen_df.loc[
            #         final_gen_df['NAICS Code'] == '22', :
            #     ]
        else:
            final_gen_df = final_gen_df.loc[
                final_gen_df["Plant Id"].isin(egrid_facilities_to_include), :]

        ba_match = eia860_balancing_authority(year)
        ba_match["Plant Id"] = ba_match["Plant Id"].astype(int)
        final_gen_df["Plant Id"] = final_gen_df["Plant Id"].astype(int)
        final_gen_df = final_gen_df.merge(ba_match, on="Plant Id", how="left")
        final_gen_df["Year"] = int(year)
        df_list.append(final_gen_df)

    all_years_gen = pd.concat(df_list)

    all_years_gen = all_years_gen.rename(
        columns={
            "Plant Id": "FacilityID",
            "Net Generation (Megawatthours)": "Electricity",
        })

    all_years_gen = all_years_gen.loc[:, ["FacilityID", "Electricity", "Year"]]
    all_years_gen.reset_index(drop=True, inplace=True)
    all_years_gen["Year"] = all_years_gen["Year"].astype("int32")
    return all_years_gen
Ejemplo n.º 3
0
def generate_hydro_emissions():
    """
    This generates a dataframe of hydro power plant emissions using data
    from an analysis performed at NETL that estimates biogenic reservoir
    emissions. The reservoir emissions are allocated among all uses for
    the reservoir (e.g., boating, fishing, etc.) using market-based
    allocation. The year for the inventory is fixed to 2016.

    Parameters
    ----------
        None
    Returns
    -------
    dataframe:
        Dataframe populated with CO2 and CH4 emissions as well as consumptive
        water use induced by the hydroelectric generator.
    """
    DATA_FILE="hydropower_plant.csv"
    hydro_df = pd.read_csv(
            f"{data_dir}/{DATA_FILE}", index_col=0, low_memory=False
        )

    hydro_df.rename(columns={"Plant Id":"FacilityID","Annual Net Generation (MWh)":"Electricity"},inplace=True)
    FLOW_DICTIONARY={
            "co2 (kg)":{"FlowName":"Carbon dioxide","FlowUUID":"b6f010fb-a764-3063-af2d-bcb8309a97b7","Compartment_path":"emission/air","Compartment":"air"},
            "ch4 (kg)":{"FlowName":"Methane","FlowUUID":"aab83476-ec6c-3742-af85-15d320b7ce80","Compartment_path":"emission/air","Compartment":"air"},
            "water use (m3)":{"FlowName":"Water, fresh","FlowUUID":"8ba7dd57-b502-397b-944a-f63c6615f754","Compartment_path":"resource/water","Compartment":"input"}
            }
    hydro_df.drop(columns=["co2e (kg)"],inplace=True)
    hydro_df=hydro_df.melt(
            id_vars=["FacilityID","Plant Name","NERC Region","Electricity"],
            var_name="flow"
            )
    hydro_df.rename(columns={"value":"FlowAmount","NERC Region":"NERC"},inplace=True)
    hydro_df["FlowDict"]=hydro_df["flow"].map(FLOW_DICTIONARY)
    hydro_df = pd.concat(
            [hydro_df, hydro_df["FlowDict"].apply(pd.Series)], axis=1
        )

    hydro_df["Year"]=2016
    hydro_df["Source"]="netl"
    eia860_df=eia860_balancing_authority(2016)
    eia860_df["Plant Id"]=eia860_df["Plant Id"].astype(int)
    hydro_df=hydro_df.merge(eia860_df,
                            left_on="FacilityID",
                            right_on="Plant Id",
                            suffixes=["","_eia"])
    hydro_df.drop(columns=["FlowDict","flow","NERC Region","Plant Id"],inplace=True)
    hydro_df.loc[hydro_df["FlowName"]=="Water, fresh","FlowAmount"]=hydro_df.loc[hydro_df["FlowName"]=="Waster, fresh","FlowAmount"]*1000
    hydro_df["Unit"]="kg"
    hydro_df["stage_code"] = "Power plant"
    hydro_df["TechnologicalCorrelation"] = 1
    hydro_df["GeographicalCorrelation"] = 1
    hydro_df["TemporalCorrelation"] = 1
    hydro_df["DataCollection"] = 5
    hydro_df["ReliabilityScore"] = 1
    hydro_df["eGRID_ID"]=hydro_df["FacilityID"]
    hydro_df["FuelCategory"]="HYDRO"
    hydro_df["PrimaryFuel"]="WAT"
    hydro_df["quantity"]=hydro_df["Electricity"]
    hydro_df["ElementaryFlowPrimeContext"]="emission"
    hydro_df["input"]=False
    hydro_df.loc[hydro_df["FlowName"]=="Water, fresh","ElementaryFlowPrimeContext"]="resource"
    hydro_df.loc[hydro_df["FlowName"]=="Water, fresh","input"]=True
    hydro_df["plant_id"]=hydro_df["FacilityID"]
    hydro_df["Compartment"]=hydro_df["Compartment_path"]
    return hydro_df
Ejemplo n.º 4
0
def fill_nans(df, key_column="FacilityID", target_columns=[], dropna=True):
    """Fills nan values for the specified target columns by using the data from
    other rows, using the key_column for matches. There is an extra step
    to fill remaining nans for the state column because the module to calculate
    transmission and distribution losses needs values in the state column to
    work.

    Parameters
    ----------
    df : dataframe
        Dataframe containing nans and at a minimum the columns key_column and
        target_columns
    key_column : str, optional
        The column to match for the data to fill target_columns, by default "FacilityID"
    target_columns : list, optional
        A list of columns with nans to fill, by default []. If empty, the function
        will use a pre-defined set of columns.
    dropna : bool, optional
        After nans are filled, drop rows that still contain nans in the
        target columns, by default True

    Returns
    -------
    dataframe: hopefully with all of the nans filled.
    """
    from electricitylci.eia860_facilities import eia860_balancing_authority

    if not target_columns:
        target_columns = [
            "Balancing Authority Code",
            "Balancing Authority Name",
            "FuelCategory",
            "NERC",
            "PercentGenerationfromDesignatedFuelCategory",
            "eGRID_ID",
            "Subregion",
            "FERC_Region",
            "EIA_Region",
            "State",
            "Electricity"
        ]
    confirmed_target = []
    for x in target_columns:
        if x in df.columns:
            confirmed_target.append(x)
        else:
            module_logger.warning(f"Column {x} is not in the dataframe")
    if key_column not in df.columns:
        module_logger.warning(f"Key column '{key_column}' is not in the dataframe")
        raise KeyError
#    key_df = (
#        df[[key_column] + target_columns]
#        .drop_duplicates(subset=key_column)
#        .set_index(key_column)
#    )
    for col in confirmed_target:
        key_df = (
                df[[key_column,col]]
                .dropna()
                .drop_duplicates(subset=key_column)
                .set_index(key_column)
        )
        df.loc[df[col].isnull(), col] = df.loc[
                df[col].isnull(), key_column
        ].map(key_df[col])
    plant_ba = eia860_balancing_authority(eia_gen_year).set_index("Plant Id")
    plant_ba.index = plant_ba.index.astype(int)
    if "State" not in df.columns:
        df["State"]=float("nan")
        confirmed_target.append("State")
    df.loc[df["State"].isna(), "State"] = df.loc[
        df["State"].isna(), "eGRID_ID"
    ].map(plant_ba["State"])
    if dropna:
        df.dropna(subset=confirmed_target, inplace=True)
    return df
def ba_io_trading_model(year=None, subregion=None, regions_to_keep=None):
    REGION_NAMES = [
        'California', 'Carolinas', 'Central',
        'Electric Reliability Council of Texas, Inc.', 'Florida',
        'Mid-Atlantic', 'Midwest', 'New England ISO',
        'New York Independent System Operator', 'Northwest', 'Southeast',
        'Southwest', 'Tennessee Valley Authority'
    ]

    REGION_ACRONYMS = [
        'TVA', 'MIDA', 'CAL', 'CAR', 'CENT', 'ERCO', 'FLA',
        'MIDW', 'ISNE', 'NYIS', 'NW', 'SE', 'SW',
    ]
    if year is None:
        year = model_specs.NETL_IO_trading_year
    if subregion is None:
        subregion = model_specs.regional_aggregation
    if subregion not in ['BA', 'FERC','US']:
        raise ValueError(
            f'subregion or regional_aggregation must have a value of "BA" or "FERC" '
            f'when calculating trading with input-output, not {subregion}'
        )

    # Read in BAA file which contains the names and abbreviations
    df_BA = pd.read_excel(data_dir + '/BA_Codes_930.xlsx', sheet_name = 'US', header = 4)
    df_BA.rename(columns={'etag ID': 'BA_Acronym', 'Entity Name': 'BA_Name','NCR_ID#': 'NRC_ID', 'Region': 'Region'}, inplace=True)
    BA = pd.np.array(df_BA['BA_Acronym'])
    US_BA_acronyms = df_BA['BA_Acronym'].tolist()

    # Read in BAA file which contains the names and abbreviations
    # Original df_BAA does not include the Canadian balancing authorities
    # Import them here, then concatenate to make a single df_BAA_NA (North America)

    df_BA_CA = pd.read_excel(data_dir + '/BA_Codes_930.xlsx', sheet_name = 'Canada', header = 4)
    df_BA_CA.rename(columns={'etag ID': 'BA_Acronym', 'Entity Name': 'BA_Name','NCR_ID#': 'NRC_ID', 'Region': 'Region'}, inplace=True)
    df_BA_NA = pd.concat([df_BA, df_BA_CA])
    ferc_list = df_BA_NA['FERC_Region_Abbr'].unique().tolist()

    # Read in the bulk data

#    download_EBA()
    path = join(data_dir, 'bulk_data', 'EBA.zip')
    NET_GEN_ROWS = []
    BA_TO_BA_ROWS = []
    DEMAND_ROWS=[]
    TOTAL_INTERCHANGE_ROWS=[]
    try:
        logging.info("Using existing bulk data download")
        z = zipfile.ZipFile(path, 'r')

    except FileNotFoundError:
        logging.info("Downloading new bulk data")
        download_EBA()
        z = zipfile.ZipFile(path, 'r')
    logging.info("Loading bulk data to json")
    with z.open('EBA.txt') as f:
        for line in f:
            # All but one BA is currently reporting net generation in UTC and local time
            # for that one BA (GRMA) only UTC time is reported - so only pulling that
            # for now.
            if b'EBA.NG.H' in line and b'EBA.NG.HL' not in line:
                NET_GEN_ROWS.append(json.loads(line))
            # Similarly there are 5 interchanges that report interchange in UTC but not in
            # local time.
            elif b'EBA.ID.H' in line and b'EBA.ID.HL' not in line:
                exchange_line=json.loads(line)
                if exchange_line['series_id'].split('-')[0][4:] not in REGION_ACRONYMS:
#                    try:
                        # Adding this check here to hopefully save some time down the road.
                        # dummy_date=datetime.strptime(exchange_line['data'][0][0],'%Y%m%dT%HZ')
                        BA_TO_BA_ROWS.append(exchange_line)
                        # good_date_count+=1
#                    except ValueError:
                        # bad_date_count+=1
#                        continue
            # Keeping these here just in case
            elif b'EBA.D.H' in line and b'EBA.D.HL' not in line:
                DEMAND_ROWS.append(json.loads(line))
#            elif b'EBA.TI.H' in line:
#                TOTAL_INTERCHANGE_ROWS.append(json.loads(line))
    logging.info(f"Net gen rows: {len(NET_GEN_ROWS)}; BA to BA rows:{len(BA_TO_BA_ROWS)}; Demand rows:{len(DEMAND_ROWS)}")
    eia923_gen=eia923.build_generation_data(generation_years=[year])
    eia860_df=eia860.eia860_balancing_authority(year)
    eia860_df["Plant Id"]=eia860_df["Plant Id"].astype(int)

    eia_combined_df=eia923_gen.merge(eia860_df,
                                     left_on=["FacilityID"],
                                     right_on=["Plant Id"],
                                     how="left")
    eia_gen_ba=eia_combined_df.groupby(by=["Balancing Authority Code"],as_index=False)["Electricity"].sum()

    # Subset for specified eia_gen_year
    start_datetime = '{}-01-01 00:00:00+00:00'.format(year)
    end_datetime = '{}-12-31 23:00:00+00:00'.format(year)

    start_datetime = datetime.strptime(start_datetime, '%Y-%m-%d %H:%M:%S%z')
    end_datetime = datetime.strptime(end_datetime, '%Y-%m-%d %H:%M:%S%z')

    # Net Generation Data Import
    logging.info("Generating df with datetime")
    df_net_gen = row_to_df(NET_GEN_ROWS, 'net_gen')
    del(NET_GEN_ROWS)
    logging.info("Pivoting")
    df_net_gen = df_net_gen.pivot(index = 'datetime', columns = 'region', values = 'net_gen')
    ba_cols = US_BA_acronyms

    gen_cols = list(df_net_gen.columns.values)

    gen_cols_set = set(gen_cols)
    ba_ref_set = set(ba_cols)

    col_diff = list(ba_ref_set - gen_cols_set)
    col_diff.sort(key = str.upper)
    logging.info("Cleaning net_gen dataframe")

    # Add in missing columns, then sort in alphabetical order
    for i in col_diff:
        df_net_gen[i] = 0

    # Keep only the columns that match the balancing authority names, there are several other columns included in the dataset
    # that represent states (e.g., TEX, NY, FL) and other areas (US48)

    df_net_gen = df_net_gen[ba_cols]

    # Resort columns so the headers are in alpha order
    df_net_gen = df_net_gen.sort_index(axis=1)
    df_net_gen = df_net_gen.fillna(value = 0)

    df_net_gen = df_net_gen.loc[start_datetime:end_datetime]

    # Sum values in each column
    df_net_gen_sum = df_net_gen.sum(axis = 0).to_frame()
    logging.info("Reading canadian import data")
    # Add Canadian import data to the net generation dataset, concatenate and put in alpha order
    df_CA_Imports_Gen = pd.read_csv(data_dir + '/CA_Imports_Gen.csv', index_col = 0)
    df_CA_Imports_Gen = df_CA_Imports_Gen[str(year)]

    logging.info("Combining US and Canadian net gen data")
    df_net_gen_sum = pd.concat([df_net_gen_sum,df_CA_Imports_Gen]).sum(axis=1)
    df_net_gen_sum = df_net_gen_sum.to_frame()
    df_net_gen_sum = df_net_gen_sum.sort_index(axis=0)

    # Check the net generation of each Balancing Authority against EIA 923 data.
    # If the percent change of a given area is greater than the mean absolute difference
    # of all of the areas, it will be treated as an error and replaced with the
    # value in EIA923.
    logging.info("Checking against EIA 923 generation data")
    net_gen_check=df_net_gen_sum.merge(
            right=eia_gen_ba,
            left_index=True,
            right_on=["Balancing Authority Code"],
            how="left"
            ).reset_index()
    net_gen_check["diff"]=abs(net_gen_check["Electricity"]-net_gen_check[0])/net_gen_check[0]
    diff_mad=net_gen_check["diff"].mad()
    net_gen_swap=net_gen_check.loc[net_gen_check["diff"]>diff_mad,["Balancing Authority Code","Electricity"]].set_index("Balancing Authority Code")
    df_net_gen_sum.loc[net_gen_swap.index,[0]]=np.nan
    net_gen_swap.rename(columns={"Electricity":0},inplace=True)
    df_net_gen_sum=df_net_gen_sum.combine_first(net_gen_swap)
    # First work on the trading data from the 'df_trade_all_stack_2016' frame
    # This cell does the following:
    # 1. reformats the data to an annual basis
    # 2. formats the BA names in the corresponding columns
    # 3. evalutes the trade values from both BA perspectives
    #  (e.g. BA1 as exporter and importer in a transaction with BA2)
    # 4. evaluates the trading data for any results that don't make sense
    #       a. both BAs designate as importers (negative value)
    #       b. both BAs designate as exporters (postive value)
    #       c. one of the BAs in the transation reports a zero value and the other is nonzero
    # 5. calulate the percent difference in the transaction values reports by BAs
    # 6. final exchange value based on logic;
    #       a. if percent diff is less than 20%, take mean,
    #       b. if not use the value as reported by the exporting BAA
    #       c. designate each BA in the transaction either as the importer or exporter
    # Output is a pivot with index (rows) representing exporting BAs,
    #   columns representing importing BAs, and values for the traded amount

    # Group and resample trading data so that it is on an annual basis

    logging.info("Creating trading dataframe")
    df_ba_trade = ba_exchange_to_df(BA_TO_BA_ROWS, data_type='ba_to_ba')
    del(BA_TO_BA_ROWS)
    df_ba_trade = df_ba_trade.set_index('datetime')
    df_ba_trade['transacting regions'] = df_ba_trade['from_region'] + '-' + df_ba_trade['to_region']

    logging.info("Filtering trading dataframe")
    # Keep only the columns that match the balancing authority names, there are several other columns included in the dataset
    # that represent states (e.g., TEX, NY, FL) and other areas (US48)
    filt1 = df_ba_trade['from_region'].isin(ba_cols)
    filt2 = df_ba_trade['to_region'].isin(ba_cols)
    filt = filt1 & filt2
    df_ba_trade = df_ba_trade[filt]

    # Subset for eia_gen_year, need to pivot first because of non-unique datetime index
    df_ba_trade_pivot = df_ba_trade.pivot(columns = 'transacting regions', values = 'ba_to_ba')

    df_ba_trade_pivot = df_ba_trade_pivot.loc[start_datetime:end_datetime]

    # Sum columns - represents the net transactced amount between the two BAs
    df_ba_trade_sum = df_ba_trade_pivot.sum(axis = 0).to_frame()
    df_ba_trade_sum = df_ba_trade_sum.reset_index()
    df_ba_trade_sum.columns = ['BAAs','Exchange']

    # Split BAA string into exporting and importing BAA columns
    df_ba_trade_sum['BAA1'], df_ba_trade_sum['BAA2'] = df_ba_trade_sum['BAAs'].str.split('-', 1).str
    df_ba_trade_sum = df_ba_trade_sum.rename(columns={'BAAs': 'Transacting BAAs'})

    # Create two perspectives - import and export to use for comparison in selection of the final exchange value between the BAAs
    df_trade_sum_1_2 = df_ba_trade_sum.groupby(['BAA1', 'BAA2','Transacting BAAs'], as_index=False)[['Exchange']].sum()
    df_trade_sum_2_1 = df_ba_trade_sum.groupby(['BAA2', 'BAA1', 'Transacting BAAs'], as_index=False)[['Exchange']].sum()
    df_trade_sum_1_2.columns = ['BAA1_1_2', 'BAA2_1_2','Transacting BAAs_1_2', 'Exchange_1_2']
    df_trade_sum_2_1.columns = ['BAA2_2_1', 'BAA1_2_1','Transacting BAAs_2_1', 'Exchange_2_1']

    # Combine two grouped tables for comparison for exchange values
    df_concat_trade = pd.concat([df_trade_sum_1_2,df_trade_sum_2_1], axis = 1)
    df_concat_trade['Exchange_1_2_abs'] = df_concat_trade['Exchange_1_2'].abs()
    df_concat_trade['Exchange_2_1_abs'] = df_concat_trade['Exchange_2_1'].abs()

    # Create new column to check if BAAs designate as either both exporters or both importers
    # or if one of the entities in the transaction reports a zero value
    # Drop combinations where any of these conditions are true, keep everything else
    df_concat_trade['Status_Check'] = np.where(((df_concat_trade['Exchange_1_2'] > 0) & (df_concat_trade['Exchange_2_1'] > 0)) \
                   |((df_concat_trade['Exchange_1_2'] < 0) & (df_concat_trade['Exchange_2_1'] < 0)) \
                   | ((df_concat_trade['Exchange_1_2'] == 0) | (df_concat_trade['Exchange_2_1'] == 0)), 'drop', 'keep')

    # Calculate the difference in exchange values
    df_concat_trade['Delta'] = df_concat_trade['Exchange_1_2_abs'] - df_concat_trade['Exchange_2_1_abs']

    # Calculate percent diff of exchange_abs values - this can be down two ways:
    # relative to 1_2 exchange or relative to 2_1 exchange - perform the calc both ways
    # and take the average
    df_concat_trade['Percent_Diff_Avg']= ((abs((df_concat_trade['Exchange_1_2_abs']/df_concat_trade['Exchange_2_1_abs'])-1)) \
        + (abs((df_concat_trade['Exchange_2_1_abs']/df_concat_trade['Exchange_1_2_abs'])-1)))/2

    # Mean exchange value
    df_concat_trade['Exchange_mean'] = df_concat_trade[['Exchange_1_2_abs', 'Exchange_2_1_abs']].mean(axis=1)

    # Percent diff equations creats NaN where both values are 0, fill with 0
    df_concat_trade['Percent_Diff_Avg'].fillna(0, inplace = True)

    # Final exchange value based on logic; if percent diff is less than 20%, take mean,
    # if not use the value as reported by the exporting BAA. First figure out which BAA is the exporter
    # by checking the value of the Exchance_1_2
    # If that value is positive, it indicates that BAA1 is exported to BAA2; if negative, use the
    # value from Exchange_2_1
    df_concat_trade['Final_Exchange'] = np.where((df_concat_trade['Percent_Diff_Avg'].abs() < 0.2),
                   df_concat_trade['Exchange_mean'],np.where((df_concat_trade['Exchange_1_2'] > 0),
                                  df_concat_trade['Exchange_1_2'],df_concat_trade['Exchange_2_1']))


    # Assign final designation of BAA as exporter or importer based on logical assignment
    df_concat_trade['Export_BAA'] = np.where((df_concat_trade['Exchange_1_2'] > 0), df_concat_trade['BAA1_1_2'],
                   np.where((df_concat_trade['Exchange_1_2'] < 0), df_concat_trade['BAA2_1_2'],''))

    df_concat_trade['Import_BAA'] = np.where((df_concat_trade['Exchange_1_2'] < 0), df_concat_trade['BAA1_1_2'],
                   np.where((df_concat_trade['Exchange_1_2'] > 0), df_concat_trade['BAA2_1_2'],''))

    df_concat_trade = df_concat_trade[df_concat_trade['Status_Check'] == 'keep']


    # Create the final trading matrix; first grab the necessary columns, rename the columns and then pivot
    df_concat_trade_subset = df_concat_trade[['Export_BAA', 'Import_BAA', 'Final_Exchange']]

    df_concat_trade_subset.columns = ['Exporting_BAA', 'Importing_BAA', 'Amount']

    df_trade_pivot = df_concat_trade_subset.pivot_table(index = 'Exporting_BAA', columns = 'Importing_BAA', values = 'Amount').fillna(0)


    # This cell continues formatting the df_trade
    # Find missing BAs - need to add them in so that we have a square matrix
    # Not all BAs are involved in transactions

    trade_cols = list(df_trade_pivot.columns.values)
    trade_rows = list(df_trade_pivot.index.values)

    trade_cols_set = set(trade_cols)
    trade_rows_set = set(trade_rows)
    trade_ba_ref_set = set(ba_cols)

    trade_col_diff = list(trade_ba_ref_set - trade_cols_set)
    trade_col_diff.sort(key = str.upper)

    trade_row_diff = list(trade_ba_ref_set - trade_rows_set)
    trade_row_diff.sort(key=str.upper)

    # Add in missing columns, then sort in alphabetical order
    for i in trade_col_diff:
        df_trade_pivot[i] = 0

    df_trade_pivot = df_trade_pivot.sort_index(axis=1)

    # Add in missing rows, then sort in alphabetical order
    for i in trade_row_diff:
        df_trade_pivot.loc[i,:] = 0

    df_trade_pivot = df_trade_pivot.sort_index(axis=0)

    # Add Canadian Imports to the trading matrix
    # CA imports are specified in an external file
    df_CA_Imports_Cols = pd.read_csv(data_dir + '/CA_Imports_Cols.csv', index_col = 0)

    df_CA_Imports_Rows = pd.read_csv(data_dir + '/CA_Imports_Rows.csv', index_col = 0)
    df_CA_Imports_Rows = df_CA_Imports_Rows[['us_ba', str(year)]]
    df_CA_Imports_Rows = df_CA_Imports_Rows.pivot(columns = 'us_ba', values = str(year))

    df_concat_trade_CA = pd.concat([df_trade_pivot, df_CA_Imports_Rows])
    df_concat_trade_CA = pd.concat([df_concat_trade_CA, df_CA_Imports_Cols], axis = 1)
    df_concat_trade_CA.fillna(0, inplace = True)
    df_trade_pivot = df_concat_trade_CA
    df_trade_pivot = df_trade_pivot.sort_index(axis=0)
    df_trade_pivot = df_trade_pivot.sort_index(axis=1)

    # Perform trading calculations as provided in Qu et al (2018) to
    # determine the composition of a BA consumption mix

    # Create total inflow vector x and then convert to a diagonal matrix x-hat
    logging.info("Inflow vector")
    x = []
    for i in range (len(df_net_gen_sum)):
        x.append(df_net_gen_sum.iloc[i] + df_trade_pivot.sum(axis = 0).iloc[i])

    x_np = np.array(x)

    # If values are zero, x_hat matrix will be singular, set BAAs with 0 to small value (1)
    df_x = pd.DataFrame(data = x_np, index = df_trade_pivot.index)
    df_x = df_x.rename(columns = {0:'inflow'})
    df_x.loc[df_x['inflow'] == 0] = 1

    x_np = df_x.values

    x_hat = np.diagflat(x_np)


    # Create consumption vector c and then convert to a digaonal matrix c-hat
    # Calculate c based on x and T
    logging.info("consumption vector")
    c = []

    for i in range(len(df_net_gen_sum)):
        c.append(x[i] - df_trade_pivot.sum(axis = 1).iloc[i])

    c_np = np.array(c)
    c_hat = np.diagflat(c_np)

    # Convert df_trade_pivot to matrix
    T = df_trade_pivot.values

    # Create matrix to split T into distinct interconnections - i.e., prevent trading between eastern and western interconnects
    # Connections between the western and eastern interconnects are through SWPP and WAUE
    logging.info("Matrix operations")
    interconnect = df_trade_pivot.copy()
    interconnect[:] = 1
    interconnect.loc['SWPP',['EPE', 'PNM', 'PSCO', 'WACM']] = 0
    interconnect.loc['WAUE',['WAUW', 'WACM']] = 0
    interconnect_mat = interconnect.values
    T_split = np.multiply(T, interconnect_mat)

    # Matrix trading math (see Qu et al. 2018 ES&T paper)
    x_hat_inv = np.linalg.inv(x_hat)

    B = np.matmul(T_split, x_hat_inv)

    I = np.identity(len(df_net_gen_sum))

    diff_I_B = I - B

    G = np.linalg.inv(diff_I_B)

    c_hat_x_hat_inv = np.matmul(c_hat, x_hat_inv)

    G_c = np.matmul(G, c_hat)
    H = np.matmul(G,c_hat, x_hat_inv)

    df_G = pd.DataFrame(G)
    df_B = pd.DataFrame(B)
    df_H = pd.DataFrame(H)

    # Convert H to pandas dataframe, populate index and columns

    df_final_trade_out = df_H
    df_final_trade_out.columns = df_net_gen_sum.index
    df_final_trade_out.index = df_net_gen_sum.index

    # Develop trading input for the eLCI code. Need to melt the dataframe to end up with a three column
    # dataframe:Repeat for both possible aggregation levels - BA and FERC market region

    # Establish a threshold of 0.00001 to be included in the final trading matrix
    # Lots of really small values as a result of the matrix calculate (e.g., 2.0e-15)

    df_final_trade_out_filt = df_final_trade_out.copy()
    col_list = df_final_trade_out.columns.tolist()
    #Adding in a filter for balancing authorities that are not associated
    #with any specific plants in EIA860 - there won't be any data for them in
    #the emissions dataframes. We'll set their quantities to 0 so that the
    #consumption mixes are made up of the rest of the incoming balancing
    #authority areas.
    eia860_bas=sorted(
        list(eia860_df["Balancing Authority Code"].dropna().unique())
        +list(df_CA_Imports_Cols.columns)
        )
    keep_rows = [x for x in df_final_trade_out_filt.index if x in eia860_bas]
    keep_cols = [x for x in df_final_trade_out_filt.columns if x in eia860_bas]
    df_final_trade_out_filt=df_final_trade_out_filt.loc[keep_rows,keep_cols]
    col_list = df_final_trade_out_filt.columns.tolist()
    for i in col_list:
        df_final_trade_out_filt[i] = np.where(df_final_trade_out_filt[i].abs()/df_final_trade_out_filt[i].sum() < 0.00001, 0, df_final_trade_out_filt[i].abs())

    df_final_trade_out_filt = df_final_trade_out_filt.reset_index()
    df_final_trade_out_filt = df_final_trade_out_filt.rename(columns = {'index':'Source BAA'})

    df_final_trade_out_filt_melted = df_final_trade_out_filt.melt(id_vars = 'Source BAA' , value_vars=col_list)
    df_final_trade_out_filt_melted = df_final_trade_out_filt_melted.rename(columns = {'Source BAA':'export BAA', 'variable':'import BAA'})


    # Merge to bring in import region name matched with BAA
    df_final_trade_out_filt_melted_merge = df_final_trade_out_filt_melted.merge(df_BA_NA, left_on = 'import BAA', right_on = 'BA_Acronym')
    df_final_trade_out_filt_melted_merge.rename(columns={'FERC_Region': 'import ferc region', 'FERC_Region_Abbr':'import ferc region abbr'}, inplace=True)
    df_final_trade_out_filt_melted_merge.drop(columns = ['BA_Acronym', 'BA_Name', 'NCR ID#', 'EIA_Region', 'EIA_Region_Abbr'], inplace = True)

    # Merge to bring in export region name matched with BAA
    df_final_trade_out_filt_melted_merge = df_final_trade_out_filt_melted_merge.merge(df_BA_NA, left_on = 'export BAA', right_on = 'BA_Acronym')
    if regions_to_keep is not None:
        # module_logger.info(f"{regions_to_keep}")
        # module_logger.info(f"{df_final_trade_out_filt_melted_merge['BA_Name'].unique()}")
        df_final_trade_out_filt_melted_merge=df_final_trade_out_filt_melted_merge.loc[df_final_trade_out_filt_melted_merge["BA_Name"].isin(regions_to_keep),:]
    df_final_trade_out_filt_melted_merge.rename(columns={'FERC_Region': 'export ferc region', 'FERC_Region_Abbr':'export ferc region abbr'}, inplace=True)
    df_final_trade_out_filt_melted_merge.drop(columns = ['BA_Acronym', 'BA_Name', 'NCR ID#', 'EIA_Region', 'EIA_Region_Abbr'], inplace = True)
#    if subregion == 'BA':
    # Develop final df for BAA
    BAA_import_grouped_tot = df_final_trade_out_filt_melted_merge.groupby(['import BAA'])['value'].sum().reset_index()
    BAA_final_trade = df_final_trade_out_filt_melted_merge.copy()
    BAA_final_trade = BAA_final_trade.drop(columns = ['import ferc region', 'export ferc region', 'import ferc region abbr', 'export ferc region abbr'])
    BAA_final_trade = BAA_final_trade.merge(BAA_import_grouped_tot, left_on = 'import BAA', right_on = 'import BAA')
    BAA_final_trade = BAA_final_trade.rename(columns = {'value_x':'value','value_y':'total'})
    BAA_final_trade['fraction'] = BAA_final_trade['value']/BAA_final_trade['total']
    BAA_final_trade = BAA_final_trade.fillna(value = 0)
    BAA_final_trade = BAA_final_trade.drop(columns = ['value', 'total'])
    # Remove Canadian BAs in import list
    BAA_filt = BAA_final_trade['import BAA'].isin(eia860_bas)
    BAA_final_trade = BAA_final_trade[BAA_filt]
    # There are some BAs that will have 0 trade. Some of these are legitimate
    # Alcoa Yadkin has no demand (i.e., all power generation is exported) others
    # seem to be errors. For those BAs with actual demand, we'll set the 
    # consumption mix to 100% from that BA. For those without demand,
    # fraction will be set to near 0 just to make sure systems can be built
    # in openLCA
    BAA_zero_trade = [x for x in list(BAA_final_trade["import BAA"].unique()) if BAA_final_trade.loc[BAA_final_trade["import BAA"]==x,"fraction"].sum()==0]
    BAAs_from_zero_trade_with_demand = []
    for d_row in DEMAND_ROWS:
        if d_row["series_id"].split('.')[1].split('-')[0] in BAA_zero_trade:
            BAAs_from_zero_trade_with_demand.append(d_row["series_id"].split('.')[1].split('-')[0])
    BAAs_from_zero_trade_with_demand = list(set(BAAs_from_zero_trade_with_demand))
    del(DEMAND_ROWS)
    for baa in BAAs_from_zero_trade_with_demand:
        BAA_final_trade.at[(BAA_final_trade["import BAA"]==baa)&(BAA_final_trade["export BAA"]==baa),"fraction"]=1
    for baa in list(set(BAA_zero_trade)-set(BAAs_from_zero_trade_with_demand)):
        BAA_final_trade.at[(BAA_final_trade["import BAA"]==baa)&(BAA_final_trade["export BAA"]==baa),"fraction"]=1E-15
        #Was later decided to not create consumption mixes for BAs that don't have imports.
        BAA_final_trade.drop(BAA_final_trade[BAA_final_trade["import BAA"]==baa].index,inplace=True)
    BAA_final_trade.to_csv(output_dir + '/BAA_final_trade_{}.csv'.format(year))
    BAA_final_trade["export_name"]=BAA_final_trade["export BAA"].map(df_BA_NA[["BA_Acronym","BA_Name"]].set_index("BA_Acronym")["BA_Name"])
    BAA_final_trade["import_name"]=BAA_final_trade["import BAA"].map(df_BA_NA[["BA_Acronym","BA_Name"]].set_index("BA_Acronym")["BA_Name"])
#        return BAA_final_trade
#    elif subregion == 'FERC':
    ferc_import_grouped_tot = df_final_trade_out_filt_melted_merge.groupby(['import ferc region'])['value'].sum().reset_index()
    # Develop final df for FERC Market Region
    ferc_final_trade = df_final_trade_out_filt_melted_merge.copy()
#    ferc_final_trade = ferc_final_trade.groupby(['import ferc region abbr', 'import ferc region', 'export ferc region','export ferc region abbr'])['value'].sum().reset_index()
    ferc_final_trade = ferc_final_trade.groupby(['import ferc region abbr', 'import ferc region', 'export BAA'])['value'].sum().reset_index()
    ferc_final_trade = ferc_final_trade.merge(ferc_import_grouped_tot, left_on = 'import ferc region', right_on = 'import ferc region')
    ferc_final_trade = ferc_final_trade.rename(columns = {'value_x':'value','value_y':'total'})
    ferc_final_trade['fraction'] = ferc_final_trade['value']/ferc_final_trade['total']
    ferc_final_trade = ferc_final_trade.fillna(value = 0)
    ferc_final_trade = ferc_final_trade.drop(columns = ['value', 'total'])
    # Remove Canadian entry in import list
    ferc_list.remove('CAN')
    ferc_filt = ferc_final_trade['import ferc region abbr'].isin(ferc_list)
    ferc_final_trade = ferc_final_trade[ferc_filt]
    ferc_final_trade.to_csv(output_dir + '/ferc_final_trade_{}.csv'.format(year))
    ferc_final_trade["export_name"]=ferc_final_trade["export BAA"].map(df_BA_NA[["BA_Acronym","BA_Name"]].set_index("BA_Acronym")["BA_Name"])
#        return ferc_final_trade
#    elif subregion== 'US':
    us_import_grouped_tot = df_final_trade_out_filt_melted_merge['value'].sum()
    us_final_trade = df_final_trade_out_filt_melted_merge.copy()
    us_final_trade = us_final_trade.groupby(['export BAA'])['value'].sum().reset_index()
    us_final_trade["fraction"]=us_final_trade["value"]/us_import_grouped_tot
    us_final_trade = us_final_trade.fillna(value = 0)
    us_final_trade=us_final_trade.drop(columns = ["value"])
    us_final_trade["export_name"]=us_final_trade["export BAA"].map(df_BA_NA[["BA_Acronym","BA_Name"]].set_index("BA_Acronym")["BA_Name"])
#        return us_final_trade
    return {'BA':BAA_final_trade,'FERC':ferc_final_trade,'US':us_final_trade}