コード例 #1
0
def Construction(calculation_years=range(2010, 2017)):
    # import 2012 Economic Census data.
    census_data = pd.concat(
        [cons.census(naics) for naics in [23, 236, 237, 238]],
        ignore_index=True)
    # Fill in missing values
    DE = cons.fill_in_missing_data(census_data, 'DE')
    DC = cons.fill_in_missing_data(census_data, 'DC')
    WV = cons.fill_in_missing_data(census_data, 'WV')

    census_data = census_data[(census_data.state_abbr != 'DE')
                              & (census_data.state_abbr != 'DC') &
                              (census_data.state_abbr != 'WV')]

    census_data = pd.concat([census_data, DE, DC, WV])

    census_data.set_index('state', inplace=True)

    census_data = census_data.sort_index().reset_index()

    census_data = census_data.apply(pd.to_numeric, errors='ignore')

    # Calculate state-level energy use (all in MMBtu)
    # Diesel use
    diesel_state = cons.calc_diesel_state(census_data)

    # Natural gas use
    ng_state = cons.calc_ng_state(census_data)

    # Electricity use
    elect_state = cons.calc_elec_state(census_data)

    # Liquid petroleum gas use
    lpg_state = cons.calc_lpg_state(census_data)

    energy_state = pd.concat([diesel_state, ng_state, elect_state, lpg_state],
                             axis=0,
                             ignore_index=True)

    energy_state = cons.format_state_energy(energy_state)

    # Calculate GDP multiplier
    multiplier = cons.calc_bea_multiplier()

    cbp_2012 = get_cbp.CBP(2012).cbp

    # Calculate county fraction of state construction establishments by
    # NAICS code.
    county_frac = cons.calc_county_fraction(cbp_2012)

    county_frac.rename(columns={'naics': 'NAICS'}, inplace=True)

    # Calculate county energy
    cons_energy = cons.calc_county_energy(energy_state,
                                          county_frac,
                                          multiplier,
                                          calculation_years=range(2010, 2017))

    # remove sector total (NAICS == 23) and reset index
    cons_energy = cons_energy[cons_energy.NAICS != 23].reset_index()

    cons_energy = dd.from_pandas(cons_energy.set_index('fipstate'),
                                 npartitions=len(
                                     cons_energy.fipstate.unique()))

    filename = 'cons_county_energy_'+\
        dt.datetime.now().strftime('%Y%m%d_%H%M')+'.parquet.gzip'

    cons_energy.to_parquet('../results/' + filename,
                           compression='gzip',
                           engine='pyarrow')
import Match_GHGRP_County_IPH as county_matching
import get_cbp
import pandas as pd
import datetime as dt

today = dt.datetime.now().strftime('%Y%m%d-%H%M')
# Import GHGRP energy data
energy_ghgrp = pd.read_parquet('../results/ghgrp_energy_20200826-1725.parquet',
                               engine='pyarrow')
cbp = get_cbp.CBP(2014)  # Import county business patterns data
tcm = county_matching.County_matching(2014)  # Instantiate matching methods
# Match GHGRP facilities to their county
ghgrp_matching = tcm.format_ghgrp(energy_ghgrp, cbp.cbp_matching)
cbp.cbp_matching = tcm.ghgrp_counts(cbp.cbp_matching, ghgrp_matching)

# Adjust the CBP establishment counts based on GHGRP facilities
cbp_corrected = tcm.correct_cbp(cbp.cbp_matching)

# Import results of IPF algorithm applied to 2014 MECS.
ipf_results_formatted = pd.read_csv(
    './calculation_data/mecs_2014_ipf_results_naics_employment.csv',
    index_col=0
    )


def calculate_net_electricity(cbp_matching, ipf_results_formatted):
    """
    Estimates net electricity by county, industry, and employment size class
    for all establishments. Applies net electricity intensities calculated
    from MECS to all establishments, including GHGRP facilities, unlike
    combustion emissions estimates.
コード例 #3
0
import pandas as pd
import Match_GHGRP_County_IPH as county_matching
import get_cbp
import Calculate_MfgEnergy_IPH

# Code from run_all_IPH.py. Used to calculate MECS intensities based on
# CBP establishment counts.
energy_ghgrp = pd.read_parquet('../results/ghgrp_energy_20190801-2337.parquet',
                               engine='pyarrow')
cbp = get_cbp.CBP(2014)
tcm = county_matching.County_matching(2014)
ghgrp_matching = tcm.format_ghgrp(energy_ghgrp, cbp.cbp_matching)
cbp.cbp_matching = tcm.ghgrp_counts(cbp.cbp_matching, ghgrp_matching)
cbp_corrected = tcm.correct_cbp(cbp.cbp_matching)
tcmfg = Calculate_MfgEnergy_IPH.Manufacturing_energy(2014, energy_ghgrp)
tcmfg.update_naics(ghgrp_matching)
mecs_intensities = tcmfg.calc_intensities(cbp.cbp_matching)
mecs_intensities = \
    mecs_intensities[mecs_intensities.MECS_FT == 'Net_electricity']

# mecs_elec contains 'dummy' NAICS codes. Need to covert.
naics_mappings = pd.read_csv('./calculation_data/mecs_naics_2012.csv',
                             usecols=['MECS_NAICS_dummies', 'MECS_NAICS'])
mecs_intensities = pd.merge(mecs_intensities,
                            naics_mappings,
                            on='MECS_NAICS_dummies',
                            how='left')


# Calculate establishment counts from county-level energy data, as
# this represents processed CBP and GHGRP data.
コード例 #4
0
    def calc_ghgrp_intensities(self):
        """
        NAICS codes of reported GHGRP data may be corrected based on Census
        County Business Patterns data. Final GHGRP GHG intensity and
        fuel disaggregation are based on these corrected NAICS codes.

        GHG intensity calculated by county, NAICS, and MECS_FT_byp.
        Fuel disaggregation and intensity includes end use.
        """
        if self.year > 2012:
            naics_column = 'PRIMARY_NAICS_CODE_12'

        else:
            naics_column = 'PRIMARY_NAICS_CODE'

        # This is an updated ghgrp energy file. Bug was fixed on 5/5/2020 that
        # didn't capture MTCO2e_TOTAL values. Energy values are the same
        # as the original calculations.
        ghgrp_energy = pd.read_parquet(
            '../results/ghgrp_energy_20200826-1725.parquet',
            engine='pyarrow',
            columns=[
                'FACILITY_ID', 'REPORTING_YEAR', 'FUEL_TYPE',
                'FUEL_TYPE_OTHER', 'FUEL_TYPE_BLEND', 'COUNTY_FIPS',
                'MECS_Region', 'MTCO2e_TOTAL', 'PRIMARY_NAICS_CODE',
                'SECONDARY_NAICS_CODE', 'MMBtu_TOTAL'
            ])

        # Drop entries with zero calculated MMBtu
        ghgrp_energy = ghgrp_energy.loc[(
            ghgrp_energy[ghgrp_energy.MMBtu_TOTAL != 0].index), :]

        ghgrp_energy = pd.DataFrame(
            ghgrp_energy[ghgrp_energy.REPORTING_YEAR == self.year])

        of = breakout_other_fuels.Other_fuels(2014)

        # Map aggregated fuel types to GHGRP fuel types
        ghgrp_energy = of.map_GHGRP_fueltypes(ghgrp_energy, 'MECS_FT_IPF.csv')

        # Map disaggregated fuel types to GHGRP fuel type
        ghgrp_energy = of.map_GHGRP_fueltypes(ghgrp_energy, 'MECS_FT_byp.csv')

        # Replace Biomass emissions with zero value
        ghgrp_energy.loc[ghgrp_energy.MECS_FT_byp == 'Biomass',
                         'MTCO2e_TOTAL'] = 0

        # Sum emissions (MTCO2e) for specified year(s)
        ghgrp_ffc_emissions = ghgrp_energy.groupby(
            ['FACILITY_ID', 'REPORTING_YEAR', 'MECS_FT',
             'MECS_FT_byp']).MTCO2e_TOTAL.sum().dropna()

        # Calculate CO2e intensity (MTCO2e/MMBtu)
        ghgrp_CO2e_intensity = ghgrp_ffc_emissions.divide(
            ghgrp_energy.groupby(
                ['FACILITY_ID', 'REPORTING_YEAR', 'MECS_FT',
                 'MECS_FT_byp']).MMBtu_TOTAL.sum().dropna())

        ghgrp_CO2e_intensity.name = 'MTCO2e_per_MMBtu'

        ghgrp_CO2e_intensity = pd.DataFrame(ghgrp_CO2e_intensity)

        cbp = get_cbp.CBP(2014)

        tcm = county_matching.County_matching(2014)

        ghgrp_matching = tcm.format_ghgrp(ghgrp_energy, cbp.cbp_matching)

        # Update NAICS codes based on Census Business Patterns Data
        energy_ghgrp_matched = \
            pd.merge(ghgrp_energy,
                     ghgrp_matching[['FACILITY_ID',
                                     naics_column]],
                     on='FACILITY_ID', how='left')

        energy_ghgrp_matched[naics_column] = \
            energy_ghgrp_matched[naics_column].astype('int')

        naics6d = pd.DataFrame(
            energy_ghgrp_matched[naics_column].unique(),
            columns=[naics_column],
            index=range(0, len(energy_ghgrp_matched[naics_column].unique())))

        naics6d = Match_MECS_NAICS.Match(naics6d,
                                         naics_column,
                                         naics_vintage=2012)

        energy_ghgrp_matched = pd.merge(energy_ghgrp_matched,
                                        naics6d,
                                        on=naics_column,
                                        how='left')

        # Filter out facilities that use PRIMARY_NAICS_CODE == 486210 and
        # NAICS_USED == 0
        energy_ghgrp_matched = energy_ghgrp_matched[
            (energy_ghgrp_matched[naics_column] != 486210)
            & (energy_ghgrp_matched.MECS_NAICS != 0)]

        if naics_column == 'PRIMARY_NAICS_CODE_12':

            energy_ghgrp_matched.drop('PRIMARY_NAICS_CODE',
                                      inplace=True,
                                      axis=1)

            energy_ghgrp_matched.rename(
                columns={'PRIMARY_NAICS_CODE_12': 'PRIMARY_NAICS_CODE'},
                inplace=True)

        energy_ghgrp_y = energy_ghgrp_matched.groupby(
            [
                'REPORTING_YEAR', 'FACILITY_ID', 'MECS_Region', 'COUNTY_FIPS',
                'PRIMARY_NAICS_CODE', 'MECS_NAICS', 'MECS_FT', 'MECS_FT_byp'
            ],
            as_index=False).MMBtu_TOTAL.sum()

        energy_ghgrp_y['COUNTY_FIPS'] = energy_ghgrp_y.COUNTY_FIPS.astype(int)

        energy_ghgrp_y.rename(columns={'PRIMARY_NAICS_CODE': 'naics'},
                              inplace=True)

        ghgrp_byp = energy_ghgrp_y.groupby(
            ['COUNTY_FIPS', 'naics', 'MECS_FT',
             'MECS_FT_byp']).MMBtu_TOTAL.sum()

        ghgrp_byp = pd.DataFrame(
            ghgrp_byp.divide(ghgrp_byp.sum(level=[0, 1, 2])))

        county_data = pd.read_parquet(self.county_data_file)

        county_data = county_data.groupby([
            'data_source', 'COUNTY_FIPS', 'naics', 'Emp_Size', 'End_use',
            'MECS_FT'
        ],
                                          as_index=False).MMBtu.sum()

        final_ghgrp_fuel_disagg = \
            county_data[county_data.data_source == 'ghgrp'].groupby(
                ['COUNTY_FIPS', 'naics', 'MECS_FT', 'End_use']
                ).MMBtu.sum()

        final_ghgrp_fuel_disagg = final_ghgrp_fuel_disagg.divide(
            final_ghgrp_fuel_disagg.sum(level=[0, 1, 2]))

        final_ghgrp_fuel_disagg = pd.DataFrame(
            final_ghgrp_fuel_disagg.multiply(ghgrp_byp.MMBtu_TOTAL))

        # energy_ghgrp_y.groupby(
        #     ['COUNTY_FIPS', 'MECS_FT', 'naics', 'MECS_FT', 'MECS_FT_byp']
        #     ).MMBtu_TOTAL.sum()
        #
        # final_ghgrp_fuel_disagg = final_ghgrp_fuel_disagg.divide(
        #     final_ghgrp_fuel_disagg.sum(level=[0,1,2,4])
        #     ).reset_index()

        final_ghgrp_fuel_disagg.rename(columns={0: 'MMBtu_fraction'},
                                       inplace=True)

        final_ghgrp_fuel_disagg.dropna(inplace=True)

        final_ghgrp_fuel_disagg.to_csv(os.path.join(
            self.data_dir, 'ghgrp_fuel_disagg_' + str(self.year) + '.csv'),
                                       index=True)

        final_ghgrp_CO2e_intensity = pd.merge(energy_ghgrp_y.set_index(
            ['FACILITY_ID', 'REPORTING_YEAR', 'MECS_FT', 'MECS_FT_byp']),
                                              ghgrp_CO2e_intensity,
                                              left_index=True,
                                              right_index=True,
                                              how='left')

        # Remove MMBtu_TOTAL values of Zero
        final_ghgrp_CO2e_intensity = final_ghgrp_CO2e_intensity.loc[(
            final_ghgrp_CO2e_intensity[
                final_ghgrp_CO2e_intensity.MMBtu_TOTAL != 0].index), :]

        # Created weighted average CO2e intensity by county and naics
        final_ghgrp_CO2e_intensity = pd.DataFrame(
            final_ghgrp_CO2e_intensity.groupby([
                'REPORTING_YEAR', 'COUNTY_FIPS', 'naics', 'MECS_FT',
                'MECS_FT_byp'
            ]).apply(lambda x: np.average(x.MTCO2e_per_MMBtu,
                                          weights=x.MMBtu_TOTAL)))

        final_ghgrp_CO2e_intensity.rename(columns={0: 'MTCO2e_per_MMBtu'},
                                          inplace=True)

        # Do a quick QA/QC on average emission factors of standard fuel types
        # If weighted average is +/- 20%, use EPA standard value.
        def calc_ef_range(x, plusminus=0.2):

            if x['MECS_FT_byp'] != 'Other':

                std_ef = self.std_efs[self.std_efs.MECS_FT ==
                                      x['MECS_FT']].MTCO2e_per_MMBtu.values[0]

            elif x['MECS_FT_byp'] == 'Other':

                return x['MTCO2e_per_MMBtu']

            else:

                std_ef = self.std_efs[
                    (self.std_efs.MECS_FT == x['MECS_FT'])
                    & (self.std_efs.MECS_FT_byp == x['MECS_FT_byp']
                       )].MTCO2e_per_MMBtu.values[0]

            ef_range = [std_ef * (1 - plusminus), std_ef * (1 + plusminus)]

            if ef_range[0] <= x['MTCO2e_per_MMBtu'] <= ef_range[1]:

                return x['MTCO2e_per_MMBtu']

            else:

                return std_ef

        final_ghgrp_CO2e_intensity.reset_index(inplace=True)

        final_ghgrp_CO2e_intensity['pass_qaqc'] = \
            final_ghgrp_CO2e_intensity.apply(lambda x: calc_ef_range(x),
                                             axis=1)

        final_ghgrp_CO2e_intensity.MTCO2e_per_MMBtu.update(
            final_ghgrp_CO2e_intensity.pass_qaqc)

        final_ghgrp_CO2e_intensity.drop(['pass_qaqc'], axis=1, inplace=True)

        # fill biomass emission factor = 0
        biomass = final_ghgrp_CO2e_intensity.where(
            final_ghgrp_CO2e_intensity.MECS_FT_byp == 'Biomass').dropna()

        biomass.loc[:, 'MTCO2e_per_MMBtu'] = 0

        final_ghgrp_CO2e_intensity.update(biomass)

        # Save results
        final_ghgrp_CO2e_intensity.to_csv(os.path.join(
            self.data_dir, 'ghgrp_CO2e_intensity_' + str(self.year) + '.csv'),
                                          index=False)

        return final_ghgrp_CO2e_intensity, final_ghgrp_fuel_disagg
コード例 #5
0
def Manufacturing(calculation_years=range(
    2010, 2017)):  #CBP data only through 2016; 2017 scheduled for Nov 2019

    energy_ghgrp = pd.read_parquet(
        '../results/ghgrp_energy_20191108-1416.parquet', engine='pyarrow')

    for y in calculation_years:

        print(y)

        print("getting cbp")
        cbp = get_cbp.CBP(y)

        cm = county_matching.County_matching(y)

        print("matching ghgrp")
        ghgrp_matching = cm.format_ghgrp(energy_ghgrp, cbp.cbp_matching)

        # Instantiate class for a single year
        cmfg = Calculate_MfgEnergy_IEDB.Manufacturing_energy(y, energy_ghgrp)

        # update NAICS codes for energy_ghgrp based on ghgrp_matching
        print("updating naics")
        cmfg.update_naics(ghgrp_matching)

        # Separate process for combustion fuels
        print("counting ghgrps")
        cbp_matching_counts = cm.ghgrp_counts(cbp.cbp_matching, ghgrp_matching)
        # cbp.cbp_matching = cm.ghgrp_counts(cbp.cbp_matching, ghgrp_matching)

        print("correcting naics")
        # cbp_corrected = cm.correct_cbp(cbp.cbp_matching)
        cbp_corrected = cm.correct_cbp(cbp_matching_counts)

        print("cbp_matching cols, ", cbp.cbp_matching.columns)

        #Export cbp_corrected for 2012 for mining
        if y == 2012:

            cbp_corrected[(cbp_corrected.naics > 210000) & \
                          (cbp_corrected.naics < 220000)][
                                 ['fipstate', 'fipscty', 'naics', 'est',
                                  'COUNTY_FIPS']
                                 ].to_csv(
                '../calculation_data/cbp_corrected_mining.csv'
                )

        # Run IPF only for MECS years, 2010 and 2014
        if (y == 2010) | (y == 2014):

            seed_methods = ipf_seed.IPF_seed(year=y)

            seed_df = seed_methods.create_seed(cbp_matching_counts)

            ipf_methods = ipf.IPF(y,
                                  table3_2=seed_methods.table3_2,
                                  table3_3=seed_methods.table3_3)

            # Run IPF. Saves resulting energy values as csv
            ipf_methods.mecs_ipf(seed_df)

            mecs_intensities = cmfg.calc_intensities(cbp_matching_counts)

            mecs_intensities.to_pickle('mecs_intensities.pkl')

        else:

            mecs_intensities = pd.read_pickle('mecs_intensities.pkl')

        # Calculates non-ghgrp combustion energy use and combines with
        # ghgrp energy use. Distinguishes between data sources with 'data_source'
        # column.
        # This is a dask dataframe partitioned by STATE
        print("calculating combustion")
        mfg_comb_energy = cmfg.combfuel_calc(cbp_corrected, mecs_intensities)

        mfg_comb_energy['year'] = y

        #EIA electricity data; dask dataframe, partitioned by STATE
        print("calculating electricity")
        ghgrp_electricity, elect_fac_ids = cmfg.GHGRP_electricity_calc()

        # GHGRP matching for EIA electricity data
        ghgrp_matching_923 = pd.DataFrame(
            ghgrp_matching[ghgrp_matching.FACILITY_ID.isin(elect_fac_ids)])

        print('cbp_matching: ', cbp.cbp_matching.columns)

        cbp_matching_923 = cm.ghgrp_counts(cbp.cbp_matching,
                                           ghgrp_matching_923)

        cbp_corrected_923 = cm.correct_cbp(cbp_matching_923)

        #estimate non-ghgrp electricity use. Dask dataframe partitioned by STATE
        mfg_elect_energy = cmfg.electricity_calc(cbp_corrected_923,
                                                 mecs_intensities)

        mfg_elect_energy['year'] = y

        if y == calculation_years[0]:

            mfg_energy = dd.multi.concat(
                [mfg_comb_energy, mfg_elect_energy, ghgrp_electricity],
                axis=0,
                join='outer',
                interleave_partitions=True)

#            mfg_energy = mfg_energy.append(ghgrp_electricity,
#                                           interleave_partions=True)

        else:

            mfg_energy = dd.multi.concat([
                mfg_energy, mfg_comb_energy, mfg_elect_energy,
                ghgrp_electricity
            ],
                                         axis=0,
                                         join='outer',
                                         interleave_partitions=True)
#
#            mfg_energy = mfg_energy.append(mfg_elect_energy,
#                                           interleave_partitions=True)
#
#            mfg_energy = mfg_energy.append(ghgrp_electricity,
#                                           interleave_partitions=True)
#
#    mfg_energy = mfg_energy.calculate()[0]

    return mfg_energy