def interpolate_residuals(self, price_df, coeff):
     """Interpolate residuals"""
 
     price_df_updated = price_df.copy(deep=True)
     price_df_updated['residual'] = price_df_updated['MECS_price'].subtract(
         price_df_updated['predicted']
         )
     price_df_updated.index = price_df_updated.index.astype(int)
     interpolated_resid = standard_interpolation(price_df_updated,
                                                 name_to_interp='residual',
                                                 axis=1)
     interpolated_resid['interp_resid'] = interpolated_resid['residual'].ffill().bfill()
                                         
     return interpolated_resid
    def manufacturing_energy_data(self):
        """Collect Manufacturing energy consumption
        by fuel type

        Returns:
            all_manufacturing (pd.DataFrame): Energy Consumption
                                              by fuel type and NAICS
                                              code
        """
        __, industrial_btu = \
            Manufacturing(naics_digits=3).mecs_data_by_year()
        industrial_btu = \
            industrial_btu[
                industrial_btu['region'] == 'Total United States']
        manufacturing = \
            industrial_btu.drop('region', axis=1, errors='ignore')
        manufacturing = \
            industrial_btu.drop('Total', axis=1, errors='ignore')
        manufacturing = manufacturing.dropna(how='all', axis=1)
        manufacturing = manufacturing.fillna(np.nan)
        manufacturing = manufacturing[(manufacturing['NAICS'].notnull())
                                      & (manufacturing['NAICS'] != 'Total')
                                      & (manufacturing['NAICS'] !=
                                         'RSE Column Factors:')]

        all_manufacturing = []
        for n in manufacturing['NAICS'].unique():
            manufacturing_naics = manufacturing[manufacturing['NAICS'] == n]
            manufacturing_naics = manufacturing_naics.drop('NAICS', axis=1)
            manufacturing_naics = manufacturing_naics.set_index('Year')

            manufacturing_naics = \
                manufacturing_naics.apply(
                    lambda col: pd.to_numeric(col, errors='coerce'), axis=1)
            manufacturing_naics.index = manufacturing_naics.index.astype(int)
            manufacturing_naics = manufacturing_naics.sort_index()
            for c in manufacturing_naics.columns:
                manufacturing_naics = \
                    standard_interpolation(manufacturing_naics,
                                           name_to_interp=c,
                                           axis=1)

            manufacturing_naics['NAICS'] = n
            all_manufacturing.append(manufacturing_naics)
        all_manufacturing = pd.concat(all_manufacturing, axis=0)

        return all_manufacturing
Example #3
0
    def build_mining_output(factor, gross_output, value_added, elec, fuels,
                            sector_estimates_elec, sector_estimates_fuels,
                            col_name):

        gross_output.index = gross_output.index.astype(int)
        elec.index = elec.index.astype(int)
        fuels.index = fuels.index.astype(int)
        sector_estimates_elec.index = sector_estimates_elec.index.astype(int)
        sector_estimates_fuels.index = sector_estimates_fuels.index.astype(int)
        """Build data dictionary for the mining subsector"""

        elec = elec.rename(columns={col_name: 'elec'})
        fuels = fuels.rename(columns={col_name: 'fuels'})

        sector_estimates_elec = \
            sector_estimates_elec.rename(
                columns={col_name: 'elec'})
        sector_estimates_fuels = \
            sector_estimates_fuels.rename(
                columns={col_name: 'fuels'})
        elec = pd.concat([elec, sector_estimates_elec], axis=0)
        fuels = pd.concat([fuels, sector_estimates_fuels], axis=0)

        gross_output['output_by_factor'] = gross_output.multiply(factor)
        elec_df = gross_output.merge(elec,
                                     how='outer',
                                     left_index=True,
                                     right_index=True)

        fuels_df = gross_output.merge(fuels,
                                      how='outer',
                                      left_index=True,
                                      right_index=True)

        elec_df['elec_intensity'] = \
            elec_df['elec'].divide(elec_df['output_by_factor'].values)

        elec_df = \
            standard_interpolation(elec_df, name_to_interp='elec_intensity',
                                   axis=1).ffill()

        fuels_df['fuels_intensity'] = \
            fuels_df['fuels'].divide(fuels_df['output_by_factor'].values)

        fuels_df = \
            standard_interpolation(fuels_df, name_to_interp='fuels_intensity',
                                   axis=1).ffill()

        electricity_final = \
            elec_df[['elec_intensity']].multiply(
                elec_df['output_by_factor'], axis='index')
        electricity_final = \
            electricity_final.rename(columns={'elec_intensity': col_name})

        fuels_final = \
            fuels_df[['fuels_intensity']].multiply(
                fuels_df['output_by_factor'], axis='index')

        fuels_final = fuels_final.rename(columns={'fuels_intensity': col_name})

        gross_output = gross_output.drop('output_by_factor', axis=1)
        data_dict = {
            'energy': {
                'elec': electricity_final,
                'fuels': fuels_final
            },
            'activity': {
                'gross_output': gross_output,
                'value_added': value_added
            }
        }
        return data_dict
Example #4
0
    def construction(self):
        """Build data dictionary for the construction sector

        https://www.census.gov/data/tables/2017/econ/economic-census/naics-sector-23.html
        https://www.census.gov/data/tables/2012/econ/census/construction.html
        http://factfinder2.census.gov/faces/tableservices/jsf/pages/productview.xhtml?pid=ECN_2007_US_23I1&prodType=table
        http://factfinder2.census.gov/faces/tableservices/jsf/pages/productview.xhtml?pid=ECN_2002_US_23I04A&prodType=table
        http://www.census.gov/epcd/www/97EC23.HTM
        http://www.census.gov/prod/www/abs/cciview.html

        data_dict (dict): [Description]
        """
        # NonMan_output_data / M, Y
        value_added, gross_output = self.indicators_nonman_2018_bea()
        value_added = value_added[['Construction']]

        gross_output = \
            gross_output[['Construction']].rename(
                columns={'Construction': 'Gross Output'})
        gross_output['Output*0.0001'] = \
            gross_output['Gross Output'].multiply(0.0001)

        electricity, fuels = self.construction_raw_data()

        elec_intensity = electricity.merge(gross_output,
                                           how='outer',
                                           left_index=True,
                                           right_index=True)

        elec_intensity['elec_intensity'] = \
            elec_intensity['Electricity'].divide(
                elec_intensity['Output*0.0001'].values)
        elec_intensity = \
            standard_interpolation(elec_intensity,
                                   name_to_interp='elec_intensity',
                                   axis=1).fillna(method='bfill')
        fuels_intensity = \
            fuels.merge(gross_output, how='outer',
                        left_index=True, right_index=True)
        fuels_intensity['fuels_intensity'] = \
            fuels_intensity['Total Fuel'].divide(
                fuels_intensity['Output*0.0001'] .values)

        fuels_intensity.loc[1982, 'fuels_intensity'] = np.nan
        fuels_intensity.loc[2002, 'fuels_intensity'] = np.nan
        fuels_intensity = \
            standard_interpolation(fuels_intensity,
                                   name_to_interp='fuels_intensity',
                                   axis=1).fillna(method='bfill')

        final_electricity = elec_intensity[['elec_intensity']].multiply(
            elec_intensity['Output*0.0001'], axis='index')
        final_electricity = final_electricity.rename(
            columns={'elec_intensity': 'Construction'})

        final_fuels = fuels_intensity[['fuels_intensity']].multiply(
            fuels_intensity['Output*0.0001'], axis='index')

        final_fuels = final_fuels.rename(
            columns={'fuels_intensity': 'Construction'})

        gross_output = gross_output.drop('Output*0.0001', axis=1)

        gross_output = gross_output.rename(
            columns={'Gross Output': 'Construction'})

        data_dict = {
            'energy': {
                'elec': final_electricity,
                'fuels': final_fuels
            },
            'activity': {
                'gross_output': gross_output,
                'value_added': value_added
            }
        }
        return data_dict
    def energy_data(self):
        """Collect energy consumtion by fuel type data for
        the Industrial Sector (organized by subcategory)

        Returns:
            data (dict): Nested dictionary containing energy
                         consumption by fuel type dataframes
        """
        all_manufacturing = self.manufacturing_energy_data()

        data_dir = './EnergyIntensityIndicators/Industry/Data/'
        construction_elec_fuels = \
            pd.read_csv(
                f'{data_dir}construction_elec_fuels.csv').set_index('Year')
        agriculture = \
            pd.read_excel(
                f'{data_dir}miranowski_data.xlsx',
                sheet_name='Ag Cons by Use', skiprows=4, skipfooter=50,
                usecols='A:F', index_col=0,
                names=['Year', 'Gasoline', 'Diesel', 'LP Gas',
                       'Natural Gas', 'Electricity'])

        # Mining
        mining = \
            pd.read_csv(
                f'{data_dir}mining_energy.csv')
        mining = mining.fillna(np.nan)
        mining = mining.dropna(how='all', axis=1)
        mining = mining[mining['NAICS'].notnull()]
        mining = mining.astype({'Year': int, 'NAICS': int})

        all_mining = []
        for n in mining['NAICS'].unique():
            mining_naics = mining[mining['NAICS'] == n]
            mining_naics = mining_naics.drop('NAICS', axis=1)
            mining_naics = mining_naics.set_index('Year')
            mining_naics = \
                mining_naics.apply(
                    lambda col: pd.to_numeric(col, errors='coerce'), axis=1)

            for c in mining_naics.columns:
                mining_naics = \
                    standard_interpolation(mining_naics,
                                           name_to_interp=c,
                                           axis=1)
                mining_naics['NAICS 4 Digit'] = int(str(n)[:4])
                all_mining.append(mining_naics)

        all_mining = pd.concat(all_mining, axis=0)
        all_mining = all_mining.reset_index()
        all_mining = all_mining.groupby(['Year', 'NAICS 4 Digit']).sum()

        all_mining = all_mining.reset_index()

        industry_names = \
            {2111: 'Petroleum and Natural Gas',
             2121: 'Coal Mining',
             2122: 'Metal Ore Mining',
             2123: 'Nonmetallic Mineral Mining and Quarrying',
             2131: 'Support Activities'}

        all_mining_data = dict()
        other_mining_data = []
        for number, name in industry_names.items():
            mining_df = all_mining[all_mining['NAICS 4 Digit'] == number]
            mining_df = mining_df.drop(['Total Fuel', 'NAICS 4 Digit'],
                                       axis=1,
                                       errors='ignore')
            if number in [2121, 2122, 2123]:
                other_mining_data.append(mining_df)
            else:
                mining_df = mining_df.set_index('Year')
                all_mining_data[name] = mining_df

        other_mining_data = pd.concat(other_mining_data, axis=0)
        other_mining_data = other_mining_data.groupby('Year').sum()
        all_mining_data['Other Mining'] = other_mining_data

        data = {
            'Manufacturing': all_manufacturing,
            'NonManufacturing': {
                'Mining': all_mining_data,
                'Construction': construction_elec_fuels,
                'Agriculture, Forestry & Fishing': agriculture
            }
        }
        return data