Esempio n. 1
0
class WeatherFactors: 
    def __init__(self, sector, directory, activity_data=None, residential_floorspace=None, nominal_energy_intensity=None, \
                 end_year=2018):
        self.end_year = end_year
        self.directory = directory
        self.sector = sector
        self.activity_data = activity_data
        self.nominal_energy_intensity = nominal_energy_intensity
        self.residential_floorspace = residential_floorspace
        self.eia_data = GetEIAData(self.sector)
        self.lmdi_prices = pd.read_excel(f'{self.directory}/EnergyPrices_by_Sector_010820_DBB.xlsx', 
                                         sheet_name='LMDI-Prices', header=14, usecols='A:B, EY')
        self.regions_subregions = ['northeast', 'new_england', 'middle_atlantic', 'midwest', 
                                   'east_north_central', 'west_north_central', 'south', 
                                   'south_atlantic', 'east_south_central', 'west_south_central',
                                   'west', 'mountain', 'pacific']
        self.sub_regions_dict = {'northeast': ['New England', 'Middle Atlantic'], 
                                 'midwest': ['East North Central', 'West North Central'], 
                                 'south': ['South Atlantic', 'East South Central', 'West South Central'],
                                 'west': ['Mountain', 'Pacific']}
      
    @staticmethod
    def adjust_data(subregions, hdd_by_division, hdd_activity_weights, cooling=True, cdd_by_division=None, \
                    cdd_activity_weights=None, use_weights_1961_90=True):
        """Calculate weights for adjusted weather factors prediction
        """        

        years_1961_90 = list(range(1961, 1990 + 1))
        years_1981_2010 = list(range(1981, 1990 + 1))

        if cooling:
            cdd_by_division = cdd_by_division.set_index('Year')
            cdd_by_division.index = cdd_by_division.index.astype(int)

            averages_1961_90_cooling = cdd_by_division.loc[years_1961_90, :].mean(axis=0)
            averages_1981_2010_cooling = cdd_by_division.loc[years_1981_2010, :].mean(axis=0)


        hdd_by_division = hdd_by_division.set_index('Year')
        hdd_by_division.index = hdd_by_division.index.astype(int)

        averages_1961_90_heating = hdd_by_division.loc[years_1961_90, :].mean(axis=0)
        averages_1981_2010_heating = hdd_by_division.loc[years_1981_2010, :].mean(axis=0)
        
        all_s_weights_heating = []
        all_s_weights_cooling = []

        for s in subregions:
            if use_weights_1961_90:
                subregion_weights_heating = averages_1961_90_heating.loc[s] * hdd_activity_weights[s]

                if cooling:
                    subregion_weights_cooling = averages_1961_90_cooling.loc[s] * cdd_activity_weights[s]
                    all_s_weights_cooling.append(subregion_weights_cooling)

            else:
                subregion_weights_heating = averages_1981_2010_heating.loc[s] * hdd_activity_weights[s]

                if cooling:
                    subregion_weights_cooling = averages_1981_2010_cooling.loc[s] * cdd_activity_weights[s]
                    all_s_weights_cooling.append(subregion_weights_cooling)

            
            all_s_weights_heating.append(subregion_weights_heating)

        weights_dict = dict()
        if cooling:
            weights_cooling = sum(all_s_weights_cooling)
            weights_dict['cooling'] = weights_cooling

        weights_heating = sum(all_s_weights_heating)
        weights_dict['heating'] = weights_heating
        return weights_dict

    def process_prices(self, weather_factors_df):
        """TODO: Are distributed lag and time cubed ever the desired variable? 
        Does this method need to exist?
        """        
        lmdi_prices = self.lmdi_prices
        # distributed_lag = 
        # time_cubed = 
        selected_variable = [1] * len(weather_factors_df)
        return selected_variable
    
    @staticmethod
    def cbecs_1995_shares():
        """Calculate fuels and elec shares for the commercial sector from CBECS 1995 data
        """        
        electricty_consumption_tbtu = {'Northeast': 436, 'Midwest': 558, 'South': 1027, 'West': 587}
        electricty_consumption_tbtu['Total'] = sum(electricty_consumption_tbtu.values())
        electricity_df = pd.DataFrame.from_dict(electricty_consumption_tbtu, orient='index', \
                                                columns=['electricity_consumption_tbtu'])

        energy_tbtu = {'Northeast': 1035, 'Midwest': 1497, 'South': 1684, 'West': 1106}
        energy_tbtu['Total'] = sum(energy_tbtu.values())
        energy_df = pd.DataFrame.from_dict(energy_tbtu, orient='index', columns=['energy'])

        shares_df = energy_df.merge(electricity_df, left_index=True, right_index=True, how='outer')


        shares_df['elec_share'] = shares_df.electricity_consumption_tbtu.divide(shares_df.loc['Total', \
                                                                                'electricity_consumption_tbtu'])
        shares_df['fuel_consumption'] = shares_df.energy.subtract(shares_df.electricity_consumption_tbtu)
        shares_df['fuels_share'] = shares_df.fuel_consumption.divide(shares_df.loc['Total', 'fuel_consumption'])
        return shares_df

    @staticmethod 
    def recs_1993_shares():
        """Calculate fuels and elec shares for the residential sector from RECS 1993 data
        """        
        electricty_consumption_tbtu = {'Northeast': 470, 'Midwest': 740, 'South': 1510, 'West': 560}
        electricty_consumption_tbtu['Total'] = sum(electricty_consumption_tbtu.values())
        electricity_df = pd.DataFrame.from_dict(electricty_consumption_tbtu, orient='index', \
                                                columns=['electricity_consumption_tbtu'])

        energy_tbtu = {'Northeast': 2380, 'Midwest': 3130, 'South': 2950, 'West': 1550}
        energy_tbtu['Total'] = sum(energy_tbtu.values())
        energy_df = pd.DataFrame.from_dict(energy_tbtu, orient='index', columns=['energy'])

        shares_df = energy_df.merge(electricity_df, left_index=True, right_index=True, how='outer')


        shares_df['elec_share'] = shares_df.electricity_consumption_tbtu.divide(shares_df.loc['Total', \
                                                                         'electricity_consumption_tbtu'])
        shares_df['fuel_consumption'] = shares_df.energy.subtract(shares_df.electricity_consumption_tbtu)
        shares_df['fuels_share'] = shares_df.fuel_consumption.divide(shares_df.loc['Total', 'fuel_consumption'])
        return shares_df

    def regional_shares(self, dataframe, cols):
        """Calulate shares of regional totals by subregion
        """        
        dataframe = dataframe.set_index('regions_subregions')
        weights_data = dict()
        for col in cols: 
            shares_dict = dict()
            for r_, subregions in self.sub_regions_dict.items():
                subregions = [s.lower().replace(' ', '_') for s in subregions]
                regions_ = subregions + [r_]
                region_total = dataframe.loc[r_, col]
                for r in regions_:
                    share_value = dataframe.loc[r, col] / region_total
                    shares_dict[r] = share_value
            weights_data[col] = shares_dict
        return weights_data

    def gather_weights_data(self):
        """Calculate weights to aggregate subregions into four regions
        """        
        if self.sector == 'residential':
            electricity_data = {'total_elec_tbtu': {'northeast': 470, 'midwest': 740, 
                                                    'south': 1510, 'west': 560}, 
                                'heating_tbtu': {'northeast': 12 * 3.412, 'midwest': 22 * 3.412, 
                                                 'south': 61 * 3.412, 'west': 25 * 3.412}, 
                                'cooling_tbtu': {'northeast': 40, 'midwest': 80, 
                                                 'south': 310, 'west': 30}}
            fuels_data = {'all_energy_tbtu': {'northeast': 2380, 'midwest': 3130, 
                                              'south': 2950, 'west': 1550}, 
                          'electricity_tbtu': {'northeast': 470, 'midwest': 740, 
                                               'south': 1510, 'west': 560}, 
                          'heating_all_energy_tbtu': {'northeast': 1490, 'midwest': 1920, 
                                                      'south': 1210, 'west': 700}}
            # Residential Heating Households Millions
            heating_activity = [4.1, 1, 3.1, 5.8, 3.5, 2.4, 18.8, 10.7, 3.4, 4.8, 8.3, 2, 6.3]
            # Residential Cooling Households Millions
            cooling_activity = [10.9, 2.1, 8.8, 16.4, 10.8, 5.6, 29.4, 15, 5.3, 9.2, 7.1, 2.1, 5.1]             
            all_energy = [19.1, 4.9, 14.2, 23.2, 16.3, 6.9, 32.8, 16.8, 5.9, 10.1, 19.4, 5.3, 14.1]
            electricity = [1.9, 0.5, 1.4, 2.9, 1.6, 1.3, 14.6, 8.7, 2.5, 3.4, 5.6, 1.4, 4.2]

        elif self.sector == 'commercial':
            electricity_data = {'total_elec_tbtu': {'northeast': 436, 'midwest': 558, 
                                                    'south': 1027, 'west': 587}, 
                                 'heating_tbtu': {'northeast': 18, 'midwest': 23, 
                                                  'south': 43, 'west': 28}, 
                                 'cooling_tbtu': {'northeast': 44, 'midwest': 60, 
                                                  'south': 172, 'west': 64}}
            fuels_data = {'all_energy_tbtu': {'northeast': 1035, 'midwest': 1497, 
                                              'south': 1684, 'west': 1106}, 
                          'electricity_tbtu': {'northeast': 436, 'midwest': 558, 
                                               'south': 1027, 'west': 587}, 
                          'heating_all_energy_tbtu': {'northeast': 385, 'midwest': 668, 'south': 376,
                                    'west': 275}}
            # Commercial Heating Floorspace Million SF
            heating_activity = [657, 137, 520, 779, 345, 434, 3189, 1648, 1140, 401, 1219, 469, 750]
            # Commercial Cooling Floorspace Million SF
            cooling_activity = [5919, 1472, 4447, 10860, 7301, 3559, 13666, 6512, 3265, 3889, 7058, 2812, 4246]
            all_energy = [7661, 2031, 5630, 10860, 7301, 3559, 13666, 6512, 3265, 3889, 7065, 2819, 4246]
            electricity = [657, 137, 520, 779, 345, 434, 3189, 1648, 1140, 401, 1219, 469, 750]
        else:
            return None
        
        weights_data_ = {'regions_subregions': self.regions_subregions, 'heating_activity': heating_activity, 
                        'cooling_activity': cooling_activity, 'all_energy': all_energy, 'electricity': electricity}
        
        weights_df = pd.DataFrame(data=weights_data_)
        
        weights_df['fuels'] = weights_df['all_energy'].subtract(weights_df['electricity'])
        return weights_df
    
    def heating_cooling_data(self):
        hdd_by_division_historical = pd.read_csv('./Data/historical_hdd_census_division.csv').set_index('Year')
        cdd_by_division_historical = pd.read_csv('./Data/historical_cdd_census_division.csv').set_index('Year')

        hdd_by_division = self.eia_data.eia_api(id_='1566347', id_type='category')
        hdd_to_drop = [c for c in list(hdd_by_division.columns) if 'Monthly' in c]
        hdd_by_division = hdd_by_division.drop(hdd_to_drop, axis=1)
        hdd_rename_dict = {c: c.replace(', Annual, Number', '') for c in list(hdd_by_division.columns)}
        hdd_by_division = hdd_by_division.rename(columns=hdd_rename_dict)


        hdd_by_division = pd.concat([hdd_by_division_historical, hdd_by_division], sort=True)
        
        cdd_by_division = self.eia_data.eia_api(id_='1566348', id_type='category')
        cdd_to_drop = [c for c in list(cdd_by_division.columns) if 'Monthly' in c]
        cdd_by_division = cdd_by_division.drop(cdd_to_drop, axis=1)
        cdd_rename_dict = {c: c.replace(', Annual, Number', '') for c in list(cdd_by_division.columns)}

        cdd_by_division = cdd_by_division.rename(columns=cdd_rename_dict)

        cdd_by_division = pd.concat([cdd_by_division_historical, cdd_by_division], sort=True)


        title_case_regions = [s.replace('_', ' ').title() for s in self.regions_subregions]
        hdd_names = [f'Heating Degree-Days, {r}' for r in title_case_regions]
        cdd_names = [f'Cooling Degree-Days, {r}' for r in title_case_regions]

        hdd_new_names_dict = {name: name_title for name, name_title in zip(hdd_names, title_case_regions)}
        cdd_new_names_dict = {name: name_title for name, name_title in zip(cdd_names, title_case_regions)}

        hdd_by_division = hdd_by_division.rename(columns=hdd_new_names_dict)
        cdd_by_division = cdd_by_division.rename(columns=cdd_new_names_dict)

        return hdd_by_division, cdd_by_division

    def estimate_regional_shares(self):
        """Spreadsheet equivalent: Commercial --> 'Regional Shares' 
        assumed commercial floorspace in each region follows same trends as population or housing units"""
        regions = ['Northeast', 'Midwest', 'South', 'West']

        cbecs_data = pd.read_csv('./Data/cbecs_data_millionsf.csv').set_index('Year')
        cbecs_data.index = cbecs_data.index.astype(str)
        cbecs_years = list(cbecs_data.index)
        cbecs_data = cbecs_data.rename(columns={'Midwest ': 'Midwest', ' South': 'South', ' West': 'West'})

        cbecs_data.loc['1979', regions] = cbecs_data.loc['1983', regions].subtract([826, 972, 2665, 1212])
        cbecs_data.loc['1979', ['U.S.']] = sum(cbecs_data.loc['1979', regions].values)

        cbecs_data['U.S. (calc)'] = cbecs_data.sum(axis=1)

        comm_regional_shares = cbecs_data.drop(['U.S.', 'U.S. (calc)'], axis=1).divide(cbecs_data['U.S. (calc)'].values.reshape(len(cbecs_data), 1))
        comm_regional_shares_ln = np.log(comm_regional_shares)

        residential_data = ResidentialFloorspace(end_year=self.end_year)  # change to pull from residential().activity()
        final_results_total_floorspace_regions, regional_estimates_all, avg_size_all_regions = residential_data.final_floorspace_estimates()
        
        regional_dfs = [regional_estimates_all[r][['Total']].rename(columns={'Total': r}) for r in regions]
        residential_housing_units = reduce(lambda x, y: pd.merge(x, y, left_index=True, right_index=True, how='outer'), regional_dfs)
        residential_housing_units['U.S.'] = residential_housing_units.sum(axis=1)
        residential_housing_units.index = residential_housing_units.index.astype(str)
        regional_shares_residential_housing_units = residential_housing_units.drop('U.S.', axis=1).divide(residential_housing_units['U.S.'].values.reshape(len(residential_housing_units), 1))
        regional_shares_residential_housing_units_ln = np.log(regional_shares_residential_housing_units)

        regional_shares_residential_housing_units_cbecs_years = regional_shares_residential_housing_units.loc[cbecs_years, :]
        regional_shares_residential_housing_units_cbecs_years_ln = np.log(regional_shares_residential_housing_units_cbecs_years)
        
        predictions_df = pd.DataFrame(columns=comm_regional_shares.columns, index=residential_housing_units.index)
        for region in comm_regional_shares.columns:
            x_values = comm_regional_shares_ln[region].values
            X = x_values.transpose()
            y = regional_shares_residential_housing_units_cbecs_years_ln[region].values

            p = np.polyfit(X, y, 1)
            predictions_df[region] = np.exp(regional_shares_residential_housing_units_ln[region].multiply(p[0]).add(p[1]))

        predictions_df['Predicted Sum'] = predictions_df.sum(axis=1)
        normalized_shares = predictions_df.drop('Predicted Sum', axis=1).divide(predictions_df['Predicted Sum'].values.reshape(len(predictions_df), 1))
        return normalized_shares
    
    def commercial_estimate_regional_floorspace(self):
        regional_shares = self.estimate_regional_shares()
        commercial_floorspace = self.activity_data 

        regional_shares_index = regional_shares.index.astype(str)
        commercial_floorspace_reshape = commercial_floorspace.loc[regional_shares_index, :]

        regional_floorspace = regional_shares.multiply(commercial_floorspace_reshape.values)
        return regional_floorspace

    def commercial_regional_intensity_aggregate(self):
        """Calculate Energy Intensities (kBtu/sq. ft.) by region and fuel type (i.e. Fuels and Electricity) for use
        in calculating weather factors
        Returns:
            dictionary with keys: 'electricity' and 'fuels', values: dataframes of intensity data for the commercial sector
            with Year index and Region columns
        """        
        regional_floorspace = self.commercial_estimate_regional_floorspace()
        total_fuels_to_indicators, elec_to_indicators = self.eia_data.get_seds()
        
        regional_floorspace_index = regional_floorspace.index
        elec_to_indicators =  elec_to_indicators.loc[regional_floorspace_index, :]
        total_fuels_to_indicators =  total_fuels_to_indicators.loc[regional_floorspace_index, :]

        fuels_regional = regional_floorspace.multiply(total_fuels_to_indicators.drop('National', axis=1).values)
        elec_regional = regional_floorspace.multiply(elec_to_indicators.drop('National', axis=1).values)

        return {'fuels': fuels_regional, 'electricity': elec_regional}
    
    def residential_regional_intensity_aggregate(self):
        """This function does not need to exist if nominal_energy_intensity is properly formated, change formatting here if not
        Returns:
            dictionary with keys: 'electricity' and 'fuels', values: dataframes of intensity data for the residential sector
            with Year index and Region columns
            i.e. {'fuels': fuels_regional, 'electricity': elec_regional}
        """        

        nominal_energy_intensity = self.nominal_energy_intensity # nominal_energy_intensity should already be formated in this way 

        return nominal_energy_intensity

    def weather_factors(self, region, energy_type, actual_intensity, weights_df, regional_weights):
        """Estimate a simple regression model to fit the regional intensity to a linear function of time (included squared and cubed values of time) and degree days. 
        -electricity model: constant term, heating degree day (HDD), cooling degree day (CDD), time, time-squared, and time-cubed
        -fuels model: contant term?, HDD, HDD*Time, Time, Time-squared and composite fuel price index (the composite fuel price index was developed as a weighted average of the national distillate
            fuel oil price index and a national average price for natural gas)
        Weather factors are applied at the regional level to generate the weather-normalized intensity indexes for each of the four Census regions
        
        -The weather factors for delivered energy and source energy are computed implicitly. For delivered energy, they are calculated
        as the sum of reported electricity and fuels divided by the sum of the weather-adjusted electricity and weather-adjusted fuels. 
        A similar procedure is followed for source energt. As such, the implied weather factors are a result of the process, not an independent
        variable that influences the values of intensity indexes for delivered energy and source energy. All of these computation occur within Commercial_Total worksheet.

        TODO: Input data 
        """
        if energy_type == 'electricity':
            energy_type = 'elec'
        subregions = self.sub_regions_dict[region]
        subregions_lower = [s.lower().replace(' ', '_') for s in subregions]
        hdd_activity_weights = [regional_weights['heating_activity'][r_] for r_ in subregions_lower]
        hdd_activity_weights_dict = {r : regional_weights['heating_activity'][r_] for r, r_ in zip(subregions, subregions_lower)}
        cdd_activity_weights = [regional_weights['cooling_activity'][r_] for r_ in subregions_lower]
        cdd_activity_weights_dict = {r : regional_weights['cooling_activity'][r_] for r, r_ in zip(subregions, subregions_lower)}
        fuels_weights = [regional_weights['fuels'][r_] for r_ in subregions_lower]
        
        hdd_by_division, cdd_by_division = self.heating_cooling_data()

        heating_degree_days = hdd_by_division[subregions]

        heating_degree_days = heating_degree_days.reset_index('Year')

        heating_degree_days[region] = heating_degree_days[subregions].dot(hdd_activity_weights)

        fuels_heating_degree_days = heating_degree_days
        fuels_heating_degree_days[region] = fuels_heating_degree_days[subregions].dot(fuels_weights)

        weather_factors_df = heating_degree_days[['Year', region]].rename(columns={region: 'HDD'})
        weather_factors_df['Year'] = weather_factors_df['Year'].astype(int)

        weather_factors_df['Time'] = weather_factors_df['Year'].values - 1969
        weather_factors_df['Time^2'] = weather_factors_df[['Time']].pow(2).values

        if energy_type == 'elec': 
            cooling_degree_days = cdd_by_division[subregions]
            cooling_degree_days[region] = cooling_degree_days[subregions].dot(cdd_activity_weights)
            cooling_degree_days = cooling_degree_days.reset_index('Year')
            cooling_degree_days['Year'] = cooling_degree_days['Year'].astype(int)

            weather_factors_df_cooling = cooling_degree_days[['Year', region]].rename(columns={region: 'CDD'})
            weather_factors_df = weather_factors_df.merge(weather_factors_df_cooling, on='Year', how='outer')

            weather_factors_df['Time^3'] = weather_factors_df[['Time']].pow(3).values
            weather_factors_df = weather_factors_df.set_index('Year')
            weather_factors_df.index = weather_factors_df.index.astype(int)
            
            X_data = weather_factors_df[['HDD', 'CDD', 'Time', 'Time^2', 'Time^3']]

        elif energy_type == 'fuels': 
            weather_factors_df['HDD*Time'] = heating_degree_days[region].multiply(weather_factors_df['Time'])
            weather_factors_df['Price'] = self.process_prices(weather_factors_df)
            weather_factors_df = weather_factors_df.set_index('Year')
            weather_factors_df.index = weather_factors_df.index.astype(int)
            X_data = weather_factors_df[['HDD', 'HDD*Time', 'Time', 'Time^2', 'Price']]

        # elif self.energy_type == 'delivered':
        #     weather_factor = (reported_electricity + fuels) / (weather_adjusted_electrity + weather_adjusted_fuels)
        #     return weather_factor
        else:
            raise KeyError(f'Missing valid energy type. Type given: {energy_type}')

        actual_intensity.index = actual_intensity.index.astype(int)  
        data = X_data.merge(actual_intensity, left_index=True, right_index=True, how='inner').dropna()
        X = data.drop(region.capitalize(), axis=1)
        Y = data[[region.capitalize()]]

        reg = linear_model.LinearRegression()
        reg.fit(X, Y)
        coefficients = reg.coef_
        coefficients = coefficients[0]
        intercept = reg.intercept_
        predicted_value_intensity_actualdd = reg.predict(X)  # Predicted value of the intensity based on actual degree days

        if  energy_type == 'elec': 
            prediction2_weights = self.adjust_data(subregions=subregions, hdd_by_division=heating_degree_days, cdd_by_division=cooling_degree_days, 
                                                   cdd_activity_weights=cdd_activity_weights_dict, hdd_activity_weights=hdd_activity_weights_dict,
                                                   use_weights_1961_90=True)
            predicted_value_intensity_ltaveragesdd = intercept + coefficients[0] * prediction2_weights['heating'] + coefficients[1] * prediction2_weights['cooling'] + \
                                                    coefficients[2] * data['Time'] + coefficients[3] * data['Time^2'] + coefficients[4] * data['Time^3']  # Predicted value of the intensity based on the long-term averages of the degree days
        
        elif energy_type == 'fuels': 
            prediction2_weights = self.adjust_data(subregions=subregions, hdd_by_division=heating_degree_days, 
                                                   hdd_activity_weights=hdd_activity_weights_dict, cooling=False,
                                                   use_weights_1961_90=True)
            predicted_value_intensity_ltaveragesdd = intercept + coefficients[0] * prediction2_weights['heating'] + coefficients[1] * data['Time'] + \
                                                     coefficients[2] * data['Time'] + coefficients[3] * data['Time^2'] + coefficients[4] * data['Price'] # Predicted value of the intensity based on the long-term averages of the degree days

        weather_factor = predicted_value_intensity_actualdd.flatten() / predicted_value_intensity_ltaveragesdd.values.flatten()

        try:
            weather_normalized_intensity = actual_intensity.loc[data.index].divide(weather_factor.reshape(len(weather_factor), 1))
        except Exception:
            try:
                weather_normalized_intensity = actual_intensity.loc[data.index].divide(weather_factor)
            except Exception as e:
                raise ValueError(f'Failure to divide: {actual_intensity.shape} by {weather_factor.shape}, failed with error {e}')


        weather_factor_df = pd.DataFrame(data={'Year': data.index, f'{region}_weather_factor': weather_factor}).set_index('Year')
        return weather_factor_df, weather_normalized_intensity
    
    def national_method1_fixed_end_use_share_weights(self, energy_type_):
        """Used fixed weights to develop from regional factors, weighted by regional energy share from 1995 CBECS
        """
        if self.sector == 'commercial':
            shares = self.cbecs_1995_shares()
            intensity_df = self.commercial_regional_intensity_aggregate()
        elif self.sector == 'residential':
            intensity_df = self.residential_regional_intensity_aggregate()
            shares = self.recs_1993_shares()
        
        if energy_type_ == 'elec':
            energy_type = 'electricity'
        else:
            energy_type = energy_type_
        
        regional_weather_factors = []
        weights_df = self.gather_weights_data()
        regional_weights = self.regional_shares(dataframe=weights_df, cols=['heating_activity', 'cooling_activity', 'fuels'])

        for region in self.sub_regions_dict.keys():
            region_cap = region.capitalize()
            if self.sector == 'residential':
                regional_intensity = intensity_df[region_cap][energy_type_]
            elif self.sector == 'commercial':
                regional_intensity = intensity_df[energy_type_][region_cap]

            weather_factors, weather_normalized_intensity = self.weather_factors(region, energy_type_, actual_intensity=regional_intensity, weights_df=weights_df, regional_weights=regional_weights)
            regional_weather_factors.append(weather_factors)
        
        weather_factors_all = pd.concat(regional_weather_factors, axis=1)
        weather_factors_all = weather_factors_all.reindex(columns=list(weather_factors_all.columns) + [f'{energy_type_}_weather_factor'])
        for y in weather_factors_all.index:
            if energy_type == 'electricity': 
                energy_type = 'elec'

            share_name = f'{energy_type}_share'

            year_weather = weather_factors_all.drop(f'{energy_type_}_weather_factor', axis=1).loc[y, :]
            weights = shares[share_name].drop('Total')
            year_factor = year_weather.dot(weights.to_numpy())

            weather_factors_all.loc[y, [f'{energy_type_}_weather_factor']] = year_factor
        return weather_factors_all


    def national_method2_regression_models(self, seds_data, weather_factors):
        seds_data, weather_factors = CalculateLMDI.ensure_same_indices(seds_data, weather_factors)
        
        weather_adjusted_consumption = seds_data.drop('National', axis=1).multiply(weather_factors.values)
        weather_adjusted_consumption['National'] = weather_adjusted_consumption.sum(axis=1)

        implicit_national_weather_factor = seds_data[['National']].divide(weather_adjusted_consumption['National'].values.reshape(len(weather_adjusted_consumption), 1))
        return implicit_national_weather_factor

    
    def adjust_for_weather(self, data, energy_type):
        """Adjust data by weather factors
            Parameters
            ----------
            data: dataframe
                dataset to adjust by weather
            weather_factors: array?
            Returns
            -------
            weather_adjusted_data: dataframe ?
        """
        weather_factors = self.national_method1_fixed_end_use_share_weights(energy_type)
        weather_adjusted_data = data / weather_factors[energy_type]

        return weather_adjusted_data

    def get_weather(self, energy_dict=None, energy_type=None, energy_df=None, weather_adjust=False, seds_data=None):
        if self.sector == 'residential':
            if weather_adjust: 
                for type, energy_dataframe in energy_dict.items():
                    weather_adj_energy = self.adjust_for_weather(energy_dataframe, type)
                    energy_dict[f'{type}_weather_adj'] = weather_adj_energy
                    return energy_dict
            else: 
                weather_factors = dict()
                for type in energy_dict.keys():
                    weather_factors_t = self.national_method1_fixed_end_use_share_weights(energy_type_=type)
                    if type == 'electricity': 
                        type = 'elec'
                    weather_factors[type] = weather_factors_t
                return weather_factors

        elif self.sector == 'commercial':
            weather_factors = dict()
            for type in ['electricity', 'fuels']:
                weather_factors_method1 = self.national_method1_fixed_end_use_share_weights(type)
                                

                early_years = range(min(weather_factors_method1.index), 1969 + 1)

                weather_factors_early = weather_factors_method1.loc[early_years, [f'{type}_weather_factor']]
                weather = weather_factors_method1.drop(f'{type}_weather_factor', axis=1)

                if type == 'electricity': 
                    type = 'elec'

                type_seds = seds_data[type]
                weather_factors_method2 = self.national_method2_regression_models(seds_data=type_seds, weather_factors=weather)
                weather_factors_method2 = weather_factors_method2.rename(columns={'National': f'{type}_weather_factor'})
                late_years = range(1970, max(weather_factors_method2.index) + 1)
                weather_factors_late = weather_factors_method2.loc[late_years]

                weather_factors_t = pd.concat([weather_factors_early, weather_factors_late], sort=True)

                weather_factors[type] = weather_factors_t
            return weather_factors
class CommercialIndicators(CalculateLMDI):
    """
    Data Sources: 
    - New construction is based on data from Dodge Data and Analytics. Dodge data on new floor space additions is available 
    from the published versions of the Statistical Abstract of the United States (SAUS). The Most recent data is from the 2020 
    SAUS, Table 995 "Construction Contracts Started- Value of the Construction and Floor Space of Buildings by Class of Construction:
    2014 to 2018".
    """
    def __init__(self,
                 directory,
                 output_directory,
                 level_of_aggregation,
                 lmdi_model=['multiplicative'],
                 end_year=2018,
                 base_year=1985):
        self.end_year = end_year
        self.sub_categories_list = {
            'Commercial_Total': None
        }  #, 'Total_Commercial_LMDI_UtilAdj': None}
        self.eia_comm = GetEIAData('commercial')
        self.energy_types = ['elec', 'fuels', 'deliv', 'source', 'source_adj']
        super().__init__(sector='commercial',
                         level_of_aggregation=level_of_aggregation,
                         lmdi_models=lmdi_model,
                         directory=directory,
                         output_directory=output_directory,
                         categories_dict=self.sub_categories_list,
                         energy_types=self.energy_types,
                         base_year=base_year,
                         end_year=end_year)
        # self.cbecs =
        # self.residential_housing_units = [0] # Use regional estimates of residential housing units as interpolator, extrapolator via regression model

        # self.mer_data23_May_2016 = GetEIAData.eia_api(id_='711251')  # 'http://api.eia.gov/category/?api_key=YOUR_API_KEY_HERE&category_id=711251'
        # self.mer_data23_Jan_2017 = GetEIAData.eia_api(id_='711251')   # 'http://api.eia.gov/category/?api_key=YOUR_API_KEY_HERE&category_id=711251'
        # self.mer_data23_Dec_2019 =  GetEIAData.eia_api(id_='711251')  # 'http://api.eia.gov/category/?api_key=YOUR_API_KEY_HERE&category_id=711251'
        # self.AER11_Table21C_Update = GetEIAData.eia_api(id_='711251')  # Estimates?

    def collect_input_data(self, dataset_name):
        datasets = \
            {'national_calibration':
                self.eia_comm.national_calibration(),
             'SEDS_CensusRgn':
                self.eia_comm.get_seds(),
             'mer_data_23':
                self.eia_comm.eia_api(id_='711251', id_type='category')}
        return datasets[dataset_name]

    def adjusted_supplier_data(self):
        """
        This worksheet adjusts some of commercial energy consumption data
        as reported in the Annual Energy Review.  These adjustments are
        based upon state-by-state analysis of energy consumption in the
        industrial and commercial sectors.  For electricity, there have been
        a number of reclassifications by utilities since 1990 that has moved
        sales from the industrial sector to the commercial sector.

        The adjustment for electricity consumption is based upon a
        state-by-state examination of commercial and electricity
        sales from 1990 through 2011.  This data is collected
        by EIA via Survey EIA-861.  Significant discontinuities
        in the sales data from one year to the next were removed.
        In most cases, these adjustments caused industrial consumption
        to increase and commercial consumption to decrease.  The
        spreadsheet with these adjustments is
        Sectoral_reclassification5.xls  (10/25/2012).

        In 2009, there was a significant decline in commercial
        electricity sales in MA and a corresponding increase in industrial
        sales
        Assuming that industrial consumption would have
        fallen by 2% between 2008 and 2009, the adjustment
        to both the commercial (+) and industrial sectors (-) was
        estimated to be 7.61 TWh.  .
        The 7.61 TWh converted to Tbtu is 26.0.  This value is then added
        to the negative 164.0 Tbtu in 2009 and subsequent years.

        State Energy Data System (Jan. 2017) via National Calibration worksheet
        """

        # 1949-1969
        published_consumption_trillion_btu = \
            self.eia_comm.eia_api(id_='TOTAL.ESCCBUS.A', id_type='series')  # Column W (electricity retail sales to the commercial sector) # for years 1949-69
        published_consumption_trillion_btu = \
            published_consumption_trillion_btu.rename(
                columns={'Electricity Retail Sales to the Commercial Sector, Annual, Trillion Btu':
                         'published_consumption_trillion_btu'})
        # 1970-2018
        national_calibration = self.collect_input_data('national_calibration')
        published_consumption_trillion_btu.loc['1970':, [
            'published_consumption_trillion_btu'
        ]] = national_calibration.loc['1970':, [
            'Final Est. (Trillion Btu)_elec'
        ]].values  # Column G (electricity final est) # for years 1970-2018
        # 1977-1989

        years = list(
            range(
                1977,
                max(published_consumption_trillion_btu.index.astype(int)) + 1))
        years = [str(y) for y in years]
        # adjustment_to_commercial_trillion_btu_early = number_for_1990
        adjustment_to_commercial_trillion_btu = \
            [9.21340312799975, 9.21340312799975, 9.21340312799975, 9.21340312799975, 9.21340312799975,
            9.21340312799975, 9.21340312799975, 9.21340312799975, 9.21340312799975, 9.21340312799975,
            9.21340312799975, 9.21340312799975, 9.21340312799975, 9.21340312799975, 9.21340654000005,
            29.77918535999970, 10.21012680399960, 1.70263235599987, -40.63866012000020, -40.63865670799990,
            -117.72073870000000, -117.72073528800000, -117.72073187600000, -117.72072846400000, -162.61452790400100,
            -136.25241618800100, -108.91594645600000, -125.97594304400000, -125.97593963200100, -163.95020989600000,
            -163.95020648400000, -163.95020307200000, -137.98708428968000, -137.98487966000100, -137.98487966000100,
            -137.98487966000100, -137.98487966000100, -137.98487966000100, -137.98487966000100, -137.98487966000100,
            -137.98487966000100, -137.98487966000100] # First value is for 1977 - 2018
        adjustment_df = \
            pd.DataFrame([years, adjustment_to_commercial_trillion_btu]).transpose()
        adjustment_df.columns = \
            ['Year', 'adjustment_to_commercial_trillion_btu']

        adjusted_supplier_data = \
            adjustment_df.merge(published_consumption_trillion_btu, how='outer', on='Year')
        adjusted_supplier_data['adjustment_to_commercial_trillion_btu'] = \
            adjusted_supplier_data['adjustment_to_commercial_trillion_btu'].fillna(0)

        adjusted_supplier_data = adjusted_supplier_data.set_index('Year')
        adjusted_supplier_data['adjusted_consumption_trillion_btu'] = \
            adjusted_supplier_data['adjustment_to_commercial_trillion_btu'].add(adjusted_supplier_data['published_consumption_trillion_btu'])
        adjusted_supplier_data['adjusted_consumption_trillion_btu'] = \
            adjusted_supplier_data['adjusted_consumption_trillion_btu'].astype(float)
        adjusted_supplier_data = \
            adjusted_supplier_data.sort_index(ascending=True)

        return adjusted_supplier_data[['adjusted_consumption_trillion_btu']]

    @staticmethod
    def get_saus():
        """Get Data from the Statistical Abstract of the United States (SAUS)
        """

        print('os.getcwd():', os.getcwd())
        try:
            saus_2002 = \
                pd.read_csv('./EnergyIntensityIndicators/Data/SAUS2002_table995.csv').set_index('Year')
        except FileNotFoundError:
            os.chdir('..')
            saus_2002 = \
                pd.read_csv('./EnergyIntensityIndicators/Data/SAUS2002_table995.csv').set_index('Year')

        saus_1994 = {
            1980: 738,
            1981: 787,
            1982: 631,
            1983: 716,
            1984: 901,
            1985: 1039,
            1986: 960,
            1987: 933,
            1988: 883,
            1989: 867,
            1990: 694,
            1991: 477,
            1992: 462,
            1993: 479
        }
        saus_2001 = {
            1980: 738,
            1981: None,
            1982: None,
            1983: None,
            1984: None,
            1985: 1039,
            1986: None,
            1987: None,
            1988: None,
            1989: 867,
            1990: 694,
            1991: 476,
            1992: 462,
            1993: 481,
            1994: 600,
            1995: 700,
            1996: 723,
            1997: 855,
            1998: 1106,
            1999: 1117,
            2000: 1176
        }
        saus_merged = dict()
        for (year, value) in saus_2001.items():
            if value == None:
                set_value = saus_1994[year]
            else:
                set_value = value
            saus_merged[year] = set_value

        saus_merged_df = \
            pd.DataFrame.from_dict(
                saus_merged, orient='index', columns=['Value'])
        return saus_2002, saus_merged_df

    @staticmethod
    def dod_compare_old():
        """
        DODCompareOld Note from PNNL (David B. Belzer):
        "These series are of unknown origin--need to check Jackson and Johnson 197 (sic)?
        """

        dod_old = pd.read_csv(
            './EnergyIntensityIndicators/Data/DODCompareOld.csv').set_index(
                'Year')

        cols_list = ['Retail', 'Auto R', 'Office', 'Warehouse']
        dod_old['Commercial'] = dod_old[cols_list].sum(axis=1)
        dod_old_subset = dod_old.loc[list(range(1960, 1982)), cols_list]
        dod_old_hotel = dod_old.loc[list(range(1980, 1990)), ['Commercial']]
        return dod_old, dod_old_subset, dod_old_hotel

    def dodge_adjustment_ratios(self, dodge_dataframe, start_year, stop_year,
                                adjust_years, late):
        """(1985, 1990) or (1960, 1970)
        """

        year_indices = self.years_to_str(start_year, stop_year)
        revision_factor_commercial = dodge_dataframe.loc[year_indices,
                                                         ['Commercial']].sum(
                                                             axis=0).values
        categories = ['Retail', 'Auto R', 'Office', 'Warehouse', 'Hotel']
        if late:
            col = 'Commercial'
        else:
            col = 'Commercial, Excl Hotel'
        for category in categories:

            revision_factor_cat = dodge_dataframe.loc[
                year_indices,
                [category]].sum(axis=0).values / revision_factor_commercial
            dodge_dataframe.loc[adjust_years,
                                [category]] = dodge_dataframe.loc[
                                    adjust_years,
                                    [col]].values * revision_factor_cat[0]

        return dodge_dataframe

    def west_inflation(self):
        """Jackson and Johnson Estimate of West Census Region Inflation Factor, West Region Shares Based on CBECS

        Note from PNNL: "Staff: Based upon CBECS, the percentage of construction in west census region was slightly greater
         in the 1900-1919 period than in 1920-1945.  Thus, factor is set to 1.12, approximately same as 1925 value published
         by Jackson and Johnson"
        """

        # hist_stat = self.hist_stat()['Commercial  (million SF)'] # hist_stat column E
        # # west inflation column Q
        ornl_78 = {
            1925: 1.127,
            1930: 1.144,
            1935: 1.12,
            1940: 1.182,
            1945: 1.393,
            1950: 1.216,
            1951: 1.237,
            1952: 1.224,
            1953: 1.209,
            1954: 1.213,
            1955: 1.229
        }
        all_years = list(range(min(ornl_78.keys()), max(ornl_78.keys()) + 1))
        increment_years = list(ornl_78.keys())

        final_factors = {year: 1.12 for year in list(range(1919, 1925))}
        for index, y_ in enumerate(increment_years):
            if index > 0:
                year_before = increment_years[index - 1]
                num_years = y_ - year_before
                infl_factor_year_before = ornl_78[year_before]
                infl_factor_y_ = ornl_78[y_]
                increment = 1 / num_years
                for delta in range(num_years):
                    value = infl_factor_year_before * (1 - increment * delta) + \
                        infl_factor_y_ * (increment * delta)
                    year = year_before + delta
                    final_factors[year] = value
        final_factors_df = pd.DataFrame.from_dict(final_factors,
                                                  columns=['Final Factors'],
                                                  orient='index')
        return final_factors_df

    @staticmethod
    def years_to_str(start_year, end_year):
        """Create list of year strings from start_year and end_year range
        """

        list_ = list(range(start_year, end_year + 1))
        return [str(l) for l in list_]

    def hist_stat(self):
        """Historical Dodge Data through 1970

        Data Source: Series N 90-100 Historical Statistics of the U.S., Colonial Times to 1970
        """

        historical_dodge = pd.read_csv(
            './EnergyIntensityIndicators/Data/historical_dodge_data.csv'
        ).set_index('Year')
        pub_inst_values = historical_dodge.loc[list(range(1919, 1925)),
                                               ['Pub&Institutional']].values
        total_1925_6 = pd.DataFrame.sum(
            historical_dodge.loc[list(range(1925, 1927)), ],
            axis=0).drop(index='Commercial  (million SF)')
        inst_pub_total_1925 = pd.DataFrame.sum(
            historical_dodge.loc[1925, ].drop('Commercial  (million SF)'),
            axis=0)
        inst_pub_total_1926 = pd.DataFrame.sum(
            historical_dodge.loc[1926, ].drop('Commercial  (million SF)'),
            axis=0)
        inst_pub_total_1925_6 = inst_pub_total_1925 + inst_pub_total_1926

        shares = total_1925_6.divide(inst_pub_total_1925_6)
        for col in list(total_1925_6.index):
            values = historical_dodge.loc[list(range(1919, 1925)),
                                          ['Pub&Institutional']].multiply(
                                              shares[col]).values
            historical_dodge.at[list(range(1919, 1925)), col] = values

        historical_dodge.at[list(range(1919, 1925)),
                            ['Pub&Institutional']] = pub_inst_values
        return historical_dodge

    def hist_stat_adj(self):
        """Adjust historical Dodge data to account for
        omission of data for the West Census Region prior to 1956
        """

        hist_data = self.hist_stat()
        west_inflation = self.west_inflation()
        hist_data = hist_data.merge(west_inflation,
                                    how='outer',
                                    left_index=True,
                                    right_index=True)
        hist_data['Final Factors'] = hist_data['Final Factors'].fillna(1)
        adjusted_for_west = hist_data.drop(
            columns=['Final Factors', 'Pub&Institutional']).multiply(
                hist_data['Final Factors'].values, axis=0)
        return adjusted_for_west.loc[list(range(1919, 1960)), :]

    def dodge_revised(self):
        """Dodge Additions, adjusted for omission of
        West Census Region prior to 1956
        """

        saus_2002, saus_merged = self.get_saus()
        dod_old, dod_old_subset, dod_old_hotel = self.dod_compare_old()
        west_inflation = self.hist_stat_adj()

        dodge_revised = pd.read_csv(
            './EnergyIntensityIndicators/Data/Dodge_Data.csv').set_index(
                'Year')
        dodge_revised.index = dodge_revised.index.astype(str)

        dodge_revised = dodge_revised.reindex(
            dodge_revised.columns.tolist() +
            ['Commercial, Excl Hotel', 'Hotel'],
            axis=1).fillna(np.nan)

        years_1919_1989 = self.years_to_str(1919, 1990)
        years_1990_1997 = self.years_to_str(1990, 1997)

        dodge_revised.loc[self.years_to_str(1960, 1981),
                          ['Retail', 'Auto R', 'Office', 'Warehouse'
                           ]] = dod_old_subset.values

        dodge_revised.loc[self.years_to_str(1919, 1959),
                          ['Commercial, Excl Hotel']] = west_inflation[
                              'Commercial  (million SF)'].values.reshape(
                                  41, 1)  # hist_stat_adj column Q
        hist_adj_cols = [
            'Education', 'Hospital', 'Public', 'Religious', 'Soc/Amuse', 'Misc'
        ]
        dodge_revised.loc[self.years_to_str(1919, 1959),
                          hist_adj_cols] = west_inflation.drop(
                              'Commercial  (million SF)', axis=1).values
        dodge_revised.loc[self.years_to_str(1990, 1998),
                          hist_adj_cols] = saus_2002.loc[
                              self.years_to_str(1990, 1998), [
                                  'Educational', 'Health', 'Pub. Bldg',
                                  'Religious', 'Soc/Rec', 'Misc.'
                              ]].values
        dodge_revised.loc[self.years_to_str(1990, 2003),
                          ['Soc/Misc']] = saus_2002.loc[
                              self.years_to_str(1990, 2001), ['Soc/Rec']].add(
                                  saus_2002.loc[self.years_to_str(1990, 2001),
                                                ['Misc.']].values)
        dodge_revised.loc[self.years_to_str(1999, 2001),
                          'Misc'] = saus_2002.loc[
                              self.years_to_str(1999, 2001),
                              ['Misc.']].values.reshape(3, )
        dodge_revised.loc[self.years_to_str(1961, 1989),
                          'Misc'] = dodge_revised.loc[
                              self.years_to_str(1961, 1989),
                              'Soc/Misc'].subtract(dodge_revised.loc[
                                  self.years_to_str(1961, 1989),
                                  'Soc/Amuse'].values)
        dodge_revised.loc[str(2000), 'Hospital'] = saus_2002.loc[str(2000),
                                                                 'Health']

        dodge_revised.loc[self.years_to_str(1960, 1989),
                          ['Commercial, Excl Hotel']] = dodge_revised.loc[
                              self.years_to_str(1960, 1989),
                              ['Retail', 'Auto R', 'Office', 'Warehouse']].sum(
                                  axis=1)

        hotel_80_89 = saus_merged.loc[list(range(1980, 1989 + 1)),
                                      ['Value']].subtract(dod_old_hotel.values)

        dodge_revised.loc[self.years_to_str(1980, 1989),
                          ['Hotel']] = hotel_80_89

        hotel_80_89_ratio = hotel_80_89.sum(axis=0).values / dodge_revised.loc[
            self.years_to_str(1980, 1989),
            ['Commercial, Excl Hotel']].sum(axis=0).values

        dodge_revised.loc[
            self.years_to_str(1919, 1979), ['Hotel']] = dodge_revised.loc[
                self.years_to_str(1919, 1979),
                ['Commercial, Excl Hotel']].values * hotel_80_89_ratio

        dodge_revised.loc[years_1990_1997,
                          ['Commercial, Incl Hotel']] = saus_2002.loc[
                              years_1990_1997, ['Commercial']].values

        dodge_revised.loc[self.years_to_str(1985, 1989),
                          ['Commercial']] = saus_merged.loc[
                              list(range(1985, 1989 + 1)), ['Value']].values
        dodge_revised.loc[self.years_to_str(1990, 2018),
                          ['Commercial']] = dodge_revised.loc[
                              self.years_to_str(1990, 2018),
                              ['Commercial, Incl Hotel']].values

        dodge_revised = self.dodge_adjustment_ratios(
            dodge_revised,
            1960,
            1969,
            adjust_years=self.years_to_str(1919, 1959),
            late=False)
        dodge_revised = self.dodge_adjustment_ratios(
            dodge_revised,
            1985,
            1989,
            adjust_years=self.years_to_str(1990, 2018),
            late=True)

        dodge_revised.loc[years_1919_1989,
                          ['Commercial, Incl Hotel']] = dodge_revised.loc[
                              years_1919_1989, ['Commercial, Excl Hotel']].add(
                                  dodge_revised.loc[years_1919_1989,
                                                    ['Hotel']].values)
        dodge_revised['Total'] = dodge_revised.drop(
            ['Commercial, Incl Hotel', 'Commercial, Excl Hotel'],
            axis=1).sum(axis=1).values
        return dodge_revised

    def dodge_to_cbecs(self):
        """Redefine the Dodge building categories more along the lines of CBECS categories. Constant fractions of floor space are moved among categories. 

        Returns:
            dodge_to_cbecs (dataframe): redefined data
        """

        # Key Assumptions:
        education_floor_space_office = .10
        auto_repair_retail = .80
        retail_merc_service = .80  # remainder to food service and sales
        retail_merc_service_food_sales = .11
        retail_merc_service_food_service = .90
        education_assembly = .05
        education_misc = .05  # (laboratories)
        health_transfered_to_cbecs_health = .75  # 25% to lodging (nursing homes)
        misc_public_assembly = .10  # (passenger terminals)

        dodge_revised = self.dodge_revised()  # dataframe

        dodge_to_cbecs = pd.DataFrame(dodge_revised[[
            'Total', 'Religious', 'Warehouse'
        ]]).rename(columns={'Total': 'Dodge_Totals'})
        dodge_to_cbecs['Office'] = dodge_revised[
            'Office'] + education_floor_space_office * dodge_revised[
                'Education']
        dodge_to_cbecs['Merc/Serv'] = retail_merc_service * (
            dodge_revised['Retail'] +
            auto_repair_retail * dodge_revised['Auto R'])
        dodge_to_cbecs['Food_Sales'] = retail_merc_service_food_sales * (
            dodge_revised['Retail'] +
            auto_repair_retail * dodge_revised['Auto R'])
        dodge_to_cbecs['Food_Serv'] = retail_merc_service_food_service * (
            dodge_revised['Retail'] +
            auto_repair_retail * dodge_revised['Auto R'])
        dodge_to_cbecs['Education'] = (
            1 - education_floor_space_office - education_assembly -
            education_misc) * dodge_revised['Education']
        dodge_to_cbecs[
            'Health'] = health_transfered_to_cbecs_health * dodge_revised[
                'Hospital']
        dodge_to_cbecs['Lodging'] = dodge_revised['Hotel'] + (
            1 - health_transfered_to_cbecs_health) * dodge_revised['Hospital']
        dodge_to_cbecs['Assembly'] = dodge_revised[
            'Soc/Amuse'] + misc_public_assembly * dodge_revised[
                'Misc'] + education_assembly * dodge_revised['Education']
        dodge_to_cbecs['Other'] = dodge_revised['Public'] + (
            1 - misc_public_assembly) * dodge_revised['Misc'] + (
                1 - auto_repair_retail) * dodge_revised[
                    'Auto R'] + education_misc * dodge_revised['Education']
        dodge_to_cbecs['Redefined_Totals'] = dodge_to_cbecs.drop(
            'Dodge_Totals', axis=1).sum(axis=1).values
        # dodge_to_cbecs = dodge_to_cbecs.drop()  # don't need totals?
        return dodge_to_cbecs

    def nems_logistic(self, dataframe, params):
        """
        
        PNNL errors found: 
            - Column S in spreadsheet has CO-StatePop2.xls are incorrectly aligned with years
            - Column AL does not actually scale by 1.28 as suggested in the column header

        """

        current_year = dt.datetime.now().year

        link_factors = pd.read_excel(
            f'{self.directory}/CO-EST_statepop2.xls',
            sheet_name='Stock',
            usecols='D:E',
            header=1,
            skiprows=158).rename(columns={1789: 'Year'})
        state_pop = link_factors.set_index('Year').rename(
            columns={' New': 'state_pop'})
        state_pop = state_pop[state_pop.index.notnull()]
        state_pop.index = state_pop.index.astype(int)
        state_pop.index = state_pop.index.astype(str)

        dataframe = dataframe.merge(state_pop,
                                    how='outer',
                                    left_index=True,
                                    right_index=True)
        dataframe = dataframe[dataframe.index.notnull()]
        dataframe = dataframe.reindex(columns=dataframe.columns.tolist() + [
            'adjusted_state_pop', 'adjusted_state_pop_scaled_b',
            'adjusted_state_pop_scaled_c', 'scaled_additions_estimate_a',
            'scaled_additions_estimate_b', 'scaled_additions_estimate_c',
            'removal', 'adjusted_removals', 'old_stk_retain', 'floorspace_bsf'
        ])
        dataframe['Year_Int'] = dataframe.index.astype(int)
        dataframe['age'] = dataframe['Year_Int'].subtract(
            current_year).multiply(-1)
        dataframe['remaining'] = ((dataframe['age'].divide(params[1])).pow(
            params[0]).add(1)).pow(-1)
        dataframe['inflate_fac'] = dataframe['remaining'].pow(-1)

        link_factor = 0.1
        adjusted_state_pop_1 = 40

        timing_wgts_current_yr = 0.4
        timing_wgts_lag_yr = 0.6
        benchmark_factor = 1

        dataframe.loc[str(1838), ['state_pop']] = 400
        dataframe.loc[self.years_to_str(1838, 1919),
                      ['adjusted_state_pop']] = dataframe.loc[
                          self.years_to_str(1838, 1919),
                          ['state_pop']].values * link_factor
        dataframe.loc[self.years_to_str(1920, current_year),
                      ['adjusted_state_pop']] = dataframe.loc[
                          self.years_to_str(1920, current_year),
                          ['Redefined_Totals']].values

        for year in self.years_to_str(1838, current_year):
            adjusted_state_pop_value = dataframe.loc[
                year, ['adjusted_state_pop']].values
            if year == '1838':
                vpip_estimate = adjusted_state_pop_value
            elif year == '1920':
                vpip_estimate = adjusted_state_pop_value
            else:
                adjusted_state_pop_year_before = dataframe.loc[
                    str(int(year) - 1), ['adjusted_state_pop']].values
                vpip_estimate = (
                    timing_wgts_current_yr * adjusted_state_pop_value +
                    timing_wgts_lag_yr *
                    adjusted_state_pop_year_before) * benchmark_factor
            dataframe.loc[year, 'VPIP-Estimate'] = vpip_estimate
        _variable = 1.2569  # This should be solved for
        x_column_value = _variable
        db_estimates = 1.2
        db_estimates2 = [1.25 - 0.01 * d for d in list(range(1990, 2021))]

        post_1989_scaling_factor = db_estimates  # Should choose this
        variable_2 = 1.533  # This should be solved for

        without_lags = dataframe.loc[self.years_to_str(1990, current_year),
                                     ['adjusted_state_pop']].multiply(
                                         post_1989_scaling_factor)

        dataframe.loc[:str(1989),
                      ['scaled_additions_estimate_a']] = dataframe.loc[:str(
                          1989), ['VPIP-Estimate']].values * variable_2
        dataframe.loc[self.years_to_str(1990, current_year),
                      ['scaled_additions_estimate_a']] = dataframe.loc[
                          self.years_to_str(1990, current_year),
                          ['VPIP-Estimate']].values * post_1989_scaling_factor
        dataframe.loc[:str(1989),
                      ['adjusted_state_pop_scaled_b']] = dataframe.loc[
                          self.years_to_str(1790, 1989),
                          ['scaled_additions_estimate_a']].values
        dataframe.loc[self.years_to_str(1990, 2001),
                      ['adjusted_state_pop_scaled_b']] = dataframe.loc[
                          self.years_to_str(1990, 2001),
                          ['scaled_additions_estimate_a']].values * 1.15
        dataframe.loc[self.years_to_str(2002, current_year),
                      ['adjusted_state_pop_scaled_b']] = dataframe.loc[
                          self.years_to_str(2002, current_year),
                          ['scaled_additions_estimate_a']].values

        dataframe.loc[str(1790), ['adjusted_state_pop_scaled_c']] = 1
        dataframe.loc[self.years_to_str(1791, 1989),
                      ['adjusted_state_pop_scaled_c']] = dataframe.loc[
                          self.years_to_str(1791, 1989),
                          ['scaled_additions_estimate_a']].values
        dataframe.loc[self.years_to_str(1990, 2001),
                      ['adjusted_state_pop_scaled_c']] = dataframe.loc[
                          self.years_to_str(1990, 2001),
                          ['scaled_additions_estimate_a']].values * 1.28
        dataframe.loc[self.years_to_str(2002, current_year),
                      ['adjusted_state_pop_scaled_c']] = dataframe.loc[
                          self.years_to_str(2002, current_year),
                          ['scaled_additions_estimate_a']].values

        for y in self.years_to_str(1839, current_year):
            years_diff = current_year - 1870
            start_year = int(y) - years_diff
            # first_index_year = int(dataframe.index[0])
            # if start_year < first_index_year:
            #     start_year = first_index_year
            year_index = self.years_to_str(start_year, int(y))
            remaining = dataframe.loc[self.years_to_str(1870, current_year),
                                      ['remaining']].values.flatten()
            adjusted_state_pop_scaled_b = dataframe.loc[
                year_index,
                ['adjusted_state_pop_scaled_b']].fillna(0).values.flatten()
            adjusted_state_pop_scaled_c = dataframe.loc[
                year_index,
                ['adjusted_state_pop_scaled_c']].fillna(0).values.flatten()

            b_value = np.dot(adjusted_state_pop_scaled_b, remaining)
            c_value = np.dot(adjusted_state_pop_scaled_c, remaining)

            dataframe.loc[y, ['scaled_additions_estimate_b']] = b_value
            dataframe.loc[y, ['scaled_additions_estimate_c']] = c_value

        removal_chg = 1  # Not sure what this is about
        fractions = [0.3, 0.4, 0.4, 0.35, 0.35, 0.35, 0.35, 0.3, 0.3, 0.3]
        fraction_retained = [f * removal_chg for f in fractions]

        for i in dataframe.index:
            if i >= '1870':
                removal = dataframe.loc[
                    i, ['scaled_additions_estimate_c']].values - dataframe.loc[
                        str(int(i) - 1),
                        ['scaled_additions_estimate_c']].values - dataframe.loc[
                            i, ['scaled_additions_estimate_a']].values
                dataframe.loc[i, ['removal']] = removal

        dataframe.loc[self.years_to_str(2009, 2009 + len(fractions) - 1),
                      ['adjusted_removals']] = dataframe.loc[
                          self.years_to_str(2009, 2009 + len(fractions) - 1),
                          ['removal']].values.flatten() * fraction_retained

        for y_ in list(range(2009, 2009 + len(fractions))):
            if y_ == 2009:
                dataframe.loc[str(y_), ['old_stk_retain']] = dataframe.loc[
                    str(y_), ['adjusted_removals']]
            else:
                dataframe.loc[str(y_), ['old_stk_retain']] = dataframe.loc[
                    str(y_ - 1), ['old_stk_retain']].values + dataframe.loc[
                        str(y_), ['adjusted_removals']].values

        dataframe['adjusted_removals'] = dataframe['adjusted_removals'].fillna(
            0)
        dataframe.loc[
            self.years_to_str(1960, current_year),
            ['floorspace_bsf']] = dataframe.loc[
                self.years_to_str(1960, current_year),
                ['scaled_additions_estimate_c']].values - dataframe.loc[
                    self.years_to_str(1960, current_year),
                    ['adjusted_removals']].values

        return dataframe[['floorspace_bsf']].dropna()

    def solve_logistic(self, dataframe):
        """Solve NES logistic parameters
        """

        pnnl_coefficients = [3.92276415015621,
                             73.2238120168849]  # [gamma, lifetime]
        # popt, pcov = curve_fit(self.nems_logistic, xdata=dataframe[], ydata=dataframe[] , p0=pnnl_coefficients)
        # return popt
        return pnnl_coefficients

    def activity(self):
        """Use logistic parameters to find predicted historical floorspace

        Returns:
            historical_floorspace_billion_sq_feet (pd.DataFrame): historical floorspace
                                                                  in the Commercial Sector.
                                                                  Years: 
                                                                  Units: Billion Square Feet
                                                                  Data Source:
        """

        dodge_to_cbecs = self.dodge_to_cbecs(
        )  # columns c-m starting with year 1920 (row 17)
        coeffs = self.solve_logistic(dodge_to_cbecs)
        historical_floorspace_late = self.nems_logistic(
            dodge_to_cbecs, coeffs)  # properly formatted?

        historical_floorspace_early = {
            1949: 27235.1487296062,
            1950: 27788.6370796569,
            1951: 28246.642791733,
            1952: 28701.4989706012,
            1953: 29253.2282427217,
            1954: 29913.8330998026,
            1955: 30679.7157232176,
            1956: 31512.6191323126,
            1957: 32345.382764321,
            1958: 33206.8483392728,
            1959: 34088.6640247816
        }
        historical_floorspace_early = pd.DataFrame.from_dict(
            historical_floorspace_early,
            columns=['floorspace_bsf'],
            orient='index')
        historical_floorspace_early.index = historical_floorspace_early.index.astype(
            str)

        historical_floorspace = pd.concat(
            [historical_floorspace_early, historical_floorspace_late])
        historical_floorspace_billion_sq_feet = historical_floorspace.multiply(
            0.001)
        return historical_floorspace_billion_sq_feet

    def fuel_electricity_consumption(self):
        """
        Trillion Btu

        Returns:
            energy_data (dict): Dictionary of dataframes
                                with keys 'elec' and 'fuels'
        """

        year_range = list(range(1949, 1970))
        year_range = [str(y) for y in year_range]
        national_calibration = self.collect_input_data('national_calibration')
        total_primary_energy_consumption = \
            self.eia_comm.eia_api(id_='TOTAL.TXCCBUS.A',
                                  id_type='series') # pre 1969: AER table 2.1c update column U
        total_primary_energy_consumption = \
            total_primary_energy_consumption.rename(
                columns={'Total Primary Energy Consumed by the Commercial Sector, Annual, Trillion Btu': 'total_primary'})
        # total_primary_energy_consumption = total_primary_energy_consumption[total_primary_energy_consumption.index.isin(year_range)]
        # total_primary_energy_consumption = total_primary_energy_consumption.multiply(0.001)

        fuels_dataframe = total_primary_energy_consumption.copy()
        replacement_data = national_calibration.loc['1970':, [
            'Final Est. (Trillion Btu)_fuels'
        ]]  # >= 1970: National Calibration Column 0
        fuels_dataframe.loc['1970':,
                            ['total_primary']] = replacement_data.values
        fuels_dataframe = fuels_dataframe.rename(
            columns={'total_primary': 'adjusted_consumption_trillion_btu'})
        fuels_dataframe['adjusted_consumption_trillion_btu'] = fuels_dataframe[
            'adjusted_consumption_trillion_btu'].astype(float)
        elec_dataframe = self.adjusted_supplier_data()

        energy_data = {'elec': elec_dataframe, 'fuels': fuels_dataframe}
        return energy_data

    def get_seds(self):
        """Collect SEDS data

        Returns:
            data (dict): Dictionary of dataframes
                         with keys 'elec' and 'fuels'
        """

        seds = self.collect_input_data('SEDS_CensusRgn')
        census_regions = {4: 'West', 3: 'South', 2: 'Midwest', 1: 'Northeast'}
        total_fuels = seds[0].rename(columns=census_regions)
        elec = seds[1].rename(columns=census_regions)
        data = {'elec': elec, 'fuels': total_fuels}
        return data

    def collect_weather(self, comm_activity):
        """Gather weather data for the Commercial sector

        Args:
            comm_activity ([type]): [description]

        Returns:
            weather_data (dict): [description]
        """

        seds = self.get_seds()
        res = ResidentialIndicators(directory=self.directory,
                                    output_directory=self.output_directory,
                                    base_year=self.base_year)
        # residential_activity_data = res.get_floorspace()
        # residential_floorspace = residential_activity_data['floorspace_square_feet']
        weather = WeatherFactors(sector='commercial',
                                 directory=self.directory,
                                 activity_data=comm_activity)
        #  residential_floorspace=residential_floorspace)
        weather_factors = weather.get_weather(seds_data=seds)
        # weather_factors = weather.adjust_for_weather() # What should this return?? (e.g. weather factors or weather adjusted data, both?)

        weather_data = dict()
        for key, value in weather_factors.items():
            value = value.drop('electricity_weather_factor',
                               axis=1,
                               errors='ignore')
            weather_data[key] = value

        return weather_data

    def collect_data(self):
        """Gather decomposition input data for the Commercial sector

        Returns:
            data_dict (dict): Commercial Sector data input to the LMDI model
        """

        # Activity: Floorspace_Estimates column U, B
        # Energy: Elec --> Adjusted Supplier Data Column D
        #         Fuels --> AER11 Table 2.1C_Update column U, National Calibration Column O
        activity_data = self.activity()
        print('Activity data collected without issue')
        energy_data = self.fuel_electricity_consumption()
        print('Energy data collected without issue')

        weather_factors = \
            self.collect_weather(comm_activity=activity_data)

        data_dict = {
            'Commercial_Total': {
                'energy': energy_data,
                'activity': activity_data,
                'weather_factors': weather_factors
            }
        }
        return data_dict

    def main(self, breakout, calculate_lmdi):
        """Decompose energy use for the Commercial sector

        Args:
            breakout ([type]): [description]
            calculate_lmdi ([type]): [description]

        Returns:
            [type]: [description]
        """

        data_dict = self.collect_data()
        results_dict, formatted_results = \
            self.get_nested_lmdi(level_of_aggregation=self.level_of_aggregation,
                                 breakout=breakout,
                                 calculate_lmdi=calculate_lmdi,
                                 raw_data=data_dict,
                                 lmdi_type='LMDI-I')

        return results_dict
class ResidentialIndicators(CalculateLMDI):
    """Class to decompose changes in Energy Consumption
    from the Residential Sector of the US Economy
    """
    def __init__(self,
                 directory,
                 output_directory,
                 level_of_aggregation=None,
                 lmdi_model='multiplicative',
                 base_year=1985,
                 end_year=2018):

        self.eia_res = GetEIAData('residential')
        housing_types = \
            {'Single-Family': None,
             'Multi-Family': None,
             'Manufactured-Homes': None}
        self.sub_categories_list = \
            {'National':
                {'Northeast': housing_types,
                 'Midwest': housing_types,
                 'South': housing_types,
                 'West': housing_types}}

        self.national_calibration = \
            self.eia_res.national_calibration()
        self.seds_census_region = \
            self.eia_res.get_seds()  # energy_consumtpion_data_regional
        RF = ResidentialFloorspace()
        self.ahs_Data = RF.update_ahs_data()
        self.regions = ['Northeast', 'South', 'West', 'Midwest', 'National']
        self.base_year = base_year
        self.directory = directory
        self.end_year = end_year
        self.energy_types = ['elec', 'fuels', 'deliv', 'source']

        super().__init__(sector='residential',
                         level_of_aggregation=level_of_aggregation,
                         lmdi_models=lmdi_model,
                         categories_dict=self.sub_categories_list,
                         energy_types=self.energy_types,
                         directory=directory,
                         output_directory=output_directory,
                         primary_activity='occupied_housing_units',
                         base_year=base_year,
                         end_year=end_year,
                         weather_activity='floorspace_square_feet')

        print("self.dir()):", dir(self))

        # self.AER11_table2_1b_update = GetEIAData.eia_api(id_='711250') # 'http://api.eia.gov/category/?api_key=YOUR_API_KEY_HERE&category_id=711250'
        # self.AnnualData_MER_22_Dec2019 = GetEIAData.eia_api(id_='711250') # 'http://api.eia.gov/category/?api_key=YOUR_API_KEY_HERE&category_id=711250' ?
        # self.RECS_intensity_data =   # '711250' for Residential Sector Energy Consumption

    def get_seds(self):
        """Collect SEDS data

        Returns:
            total_fuels (pd.DataFrame): Fuels data
            elec (pd.DataFrame): Elec Data
        """
        census_regions = {4: 'West', 3: 'South', 2: 'Midwest', 1: 'Northeast'}
        total_fuels = self.seds_census_region[0].rename(columns=census_regions)
        elec = self.seds_census_region[1].rename(columns=census_regions)
        return total_fuels, elec

    def fuel_electricity_consumption(self, total_fuels, elec, region):
        """Combine Energy datasets into one Energy Consumption
        dataframe in Trillion Btu

        Data Source: EIA's State Energy Data System (SEDS)

        Args:
            total_fuels ([type]): [description]
            elec ([type]): [description]
            region ([type]): [description]

        Returns:
           energy_data (dict): [description]
        """

        fuels_dataframe = total_fuels[[region]]
        elec_dataframe = elec[[region]]

        energy_data = {'elec': elec_dataframe, 'fuels': fuels_dataframe}
        return energy_data

    def get_floorspace(self):
        """Collect floorspace data for the Residential sector

        Returns:
            final_floorspace_results (dict): [description]
        """
        residential_data = ResidentialFloorspace(end_year=self.end_year)
        floorspace_square_feet, \
            occupied_housing_units, \
                household_size_square_feet_per_hu = \
                    residential_data.final_floorspace_estimates()

        final_floorspace_results = \
            {'occupied_housing_units':
                occupied_housing_units,
             'floorspace_square_feet':
                floorspace_square_feet,
             'household_size_square_feet_per_hu':
                household_size_square_feet_per_hu}

        return final_floorspace_results

    def activity(self, floorspace):
        """Combine Energy datasets into one Energy
        Consumption Occupied Housing Units

        Args:
            floorspace ([type]): [description]

        Returns:
            all_activity (dict): [description]
        """
        all_activity = dict()
        for region in self.sub_categories_list['National'].keys():
            region_activity = dict()
            for variable, data in floorspace.items():
                df = data[region]
                if variable == 'household_size_square_feet_per_hu':
                    df = df.rename(
                        columns={
                            'avg_size_sqft_mf': 'Multi-Family',
                            'avg_size_sqft_mh': 'Manufactured-Homes',
                            'avg_size_sqft_sf': 'Single-Family'
                        })
                else:
                    df = df.rename(
                        columns={
                            'occupied_units_mf': 'Multi-Family',
                            'occupied_units_mh': 'Manufactured-Homes',
                            'occupied_units_sf': 'Single-Family'
                        })

                region_activity[variable] = df
            all_activity[region] = region_activity

        return all_activity

    def collect_weather(self, energy_dict, nominal_energy_intensity):
        """Collect weather data for the Residential Sector

        Args:
            energy_dict ([type]): [description]
            nominal_energy_intensity ([type]): [description]

        Returns:
            weather_factors (dict): [description]
        """
        weather = \
            WeatherFactors(sector='residential',
                           directory=self.directory,
                           nominal_energy_intensity=nominal_energy_intensity)

        # What should this return?? (e.g. weather factors or weather adjusted data, both?)
        weather_factors = weather.get_weather(energy_dict,
                                              weather_adjust=False)
        return weather_factors

    def collect_data(self):
        """Gather all input data for you in decomposition of
        energy use for the Residential sector

        Returns:
            all_data (dict): All input data for the
                             Residential Sector Energy
                             Decomposition
        """
        total_fuels, elec = self.get_seds()

        floorspace = self.get_floorspace()
        activity = self.activity(floorspace)
        all_data = dict()
        nominal_energy_intensity_by_r = dict()
        for r in self.sub_categories_list['National'].keys():
            region_activity = activity[r]

            energy_data = \
                self.fuel_electricity_consumption(total_fuels,
                                                  elec, region=r)

            nominal_energy_intensity_by_e = dict()

            for e, e_df in energy_data.items():
                e_df = e_df.rename_axis(columns=None)
                floorspace = region_activity['floorspace_square_feet']
                total_floorspace = floorspace.sum(axis=1)

                nominal_energy_intensity = \
                    self.nominal_energy_intensity(
                        energy_input_data=e_df,
                        activity_data_=total_floorspace)
                nominal_energy_intensity_by_e[e] = \
                    nominal_energy_intensity

            region_data = {'energy': energy_data, 'activity': region_activity}

            nominal_energy_intensity_by_r[r] = nominal_energy_intensity_by_e
            all_data[r] = region_data

        weather_factors = self.collect_weather(
            energy_dict=energy_data,
            nominal_energy_intensity=nominal_energy_intensity_by_r
        )  # need to integrate this into the data passed to LMDI

        national_weather_dict = dict()
        for region, r_dict_ in all_data.items():
            weather_factors_by_e_type = dict()

            for e_ in r_dict_['energy'].keys():
                national_weather_dict[e_] = \
                    weather_factors[e_][[f'{e_}_weather_factor']]

                e_r_weather =\
                     weather_factors[e_][[f'{region.lower()}_weather_factor']]
                weather_factors_by_e_type[e_] = e_r_weather

            r_dict_['weather_factors'] = weather_factors_by_e_type
            all_data[region] = r_dict_

        all_data = {'National': all_data}
        return all_data

    def main(self, breakout, calculate_lmdi):
        """Calculate decomposition for the Residential sector

        Args:
            breakout ([type]): [description]
            calculate_lmdi ([type]): [description]

        Returns:
            [type]: [description]
        """
        unit_conversion_factor = 1
        data_dict = self.collect_data()

        results_dict, formatted_results = \
            self.get_nested_lmdi(
                level_of_aggregation=self.level_of_aggregation,
                breakout=breakout, calculate_lmdi=calculate_lmdi,
                raw_data=data_dict, lmdi_type='LMDI-I')

        return results_dict
Esempio n. 4
0
class ResidentialIndicators(CalculateLMDI):
    def __init__(self,
                 directory,
                 output_directory,
                 level_of_aggregation=None,
                 lmdi_model='multiplicative',
                 base_year=1985,
                 end_year=2018):
        self.eia_res = GetEIAData('residential')
        self.sub_categories_list = {
            'National': {
                'Northeast': {
                    'Single-Family': None,
                    'Multi-Family': None,
                    'Manufactured-Homes': None
                },
                'Midwest': {
                    'Single-Family': None,
                    'Multi-Family': None,
                    'Manufactured-Homes': None
                },
                'South': {
                    'Single-Family': None,
                    'Multi-Family': None,
                    'Manufactured-Homes': None
                },
                'West': {
                    'Single-Family': None,
                    'Multi-Family': None,
                    'Manufactured-Homes': None
                }
            }
        }
        self.national_calibration = self.eia_res.national_calibration()
        self.seds_census_region = self.eia_res.get_seds(
        )  # energy_consumtpion_data_regional
        self.ahs_Data = ResidentialFloorspace.update_ahs_data()
        self.conversion_factors = self.eia_res.conversion_factors()
        self.regions = ['Northeast', 'South', 'West', 'Midwest', 'National']
        self.base_year = base_year
        self.directory = directory
        self.end_year = end_year
        self.energy_types = ['elec', 'fuels', 'deliv', 'source']
        super().__init__(sector='residential', level_of_aggregation=level_of_aggregation, lmdi_models=lmdi_model, categories_dict=self.sub_categories_list, \
                    energy_types=self.energy_types, directory=directory, output_directory=output_directory, base_year=base_year)

        # self.AER11_table2_1b_update = GetEIAData.eia_api(id_='711250') # 'http://api.eia.gov/category/?api_key=YOUR_API_KEY_HERE&category_id=711250'
        # self.AnnualData_MER_22_Dec2019 = GetEIAData.eia_api(id_='711250') # 'http://api.eia.gov/category/?api_key=YOUR_API_KEY_HERE&category_id=711250' ?
        # self.RECS_intensity_data =   # '711250' for Residential Sector Energy Consumption

    def get_seds(self):
        census_regions = {4: 'West', 3: 'South', 2: 'Midwest', 1: 'Northeast'}
        total_fuels = self.seds_census_region[0].rename(columns=census_regions)
        elec = self.seds_census_region[1].rename(columns=census_regions)
        return total_fuels, elec

    def fuel_electricity_consumption(self, total_fuels, elec, region):
        """Combine Energy datasets into one Energy Consumption dataframe in Trillion Btu
        Data Source: EIA's State Energy Data System (SEDS)"""

        fuels_dataframe = total_fuels[[region]]
        elec_dataframe = elec[[region]]

        energy_data = {'elec': elec_dataframe, 'fuels': fuels_dataframe}
        return energy_data

    def get_floorspace(self):

        residential_data = ResidentialFloorspace(end_year=self.end_year)
        floorspace_square_feet, occupied_housing_units, household_size_square_feet_per_hu = residential_data.final_floorspace_estimates(
        )

        final_floorspace_results = {
            'occupied_housing_units': occupied_housing_units,
            'floorspace_square_feet': floorspace_square_feet,
            'household_size_square_feet_per_hu':
            household_size_square_feet_per_hu
        }
        return final_floorspace_results

    def activity(self, floorspace):
        """Combine Energy datasets into one Energy Consumption Occupied Housing Units
        """
        all_activity = dict()
        for region in self.sub_categories_list['National'].keys():
            region_activity = dict()
            for variable, data in floorspace.items():
                df = data[region]
                if variable == 'household_size_square_feet_per_hu':
                    df = df.rename(
                        columns={
                            'avg_size_sqft_mf': 'Multi-Family',
                            'avg_size_sqft_mh': 'Manufactured-Homes',
                            'avg_size_sqft_sf': 'Single-Family'
                        })
                else:
                    df = df.rename(
                        columns={
                            'occupied_units_mf': 'Multi-Family',
                            'occupied_units_mh': 'Manufactured-Homes',
                            'occupied_units_sf': 'Single-Family'
                        })

                print(variable, df.columns)
                region_activity[variable] = df
            all_activity[region] = region_activity

        return all_activity

    def collect_weather(self, energy_dict, nominal_energy_intensity):
        weather = WeatherFactors(
            sector='residential',
            directory=self.directory,
            nominal_energy_intensity=nominal_energy_intensity)
        weather_factors = weather.get_weather(
            energy_dict, weather_adjust=False
        )  # What should this return?? (e.g. weather factors or weather adjusted data, both?)
        return weather_factors

    def collect_data(self):
        total_fuels, elec = self.get_seds()
        floorspace = self.get_floorspace()
        activity = self.activity(floorspace)
        all_data = dict()
        nominal_energy_intensity_by_r = dict()
        for r in self.sub_categories_list['National'].keys():
            region_activity = activity[r]

            energy_data = self.fuel_electricity_consumption(total_fuels,
                                                            elec,
                                                            region=r)

            nominal_energy_intensity_by_e = dict()

            for e, e_df in energy_data.items():
                e_df = e_df.rename_axis(columns=None)
                floorspace = region_activity['floorspace_square_feet']
                total_floorspace = floorspace.sum(axis=1)
                nominal_energy_intensity = self.lmdi(
                    model=None,
                    activity_input_data=total_floorspace,
                    energy_input_data=e_df,
                    unit_conversion_factor=1,
                    return_nominal_energy_intensity=True
                )  # shouldn't rely on multiplicative?
                nominal_energy_intensity_by_e[e] = nominal_energy_intensity

            region_data = {'energy': energy_data, 'activity': region_activity}

            nominal_energy_intensity_by_r[r] = nominal_energy_intensity_by_e
            all_data[r] = region_data

        weather_factors = self.collect_weather(
            energy_dict=energy_data,
            nominal_energy_intensity=nominal_energy_intensity_by_r
        )  # need to integrate this into the data passed to LMDI
        for region, r_dict_ in all_data.items():
            weather_factors_by_e_type = dict()

            for e_ in r_dict_['energy'].keys():
                weather_factors_by_e_type[e] = weather_factors

            r_dict_['weather_factors'] = weather_factors_by_e_type
            all_data[region] = r_dict_

        return all_data

    def main(self, breakout, save_breakout, calculate_lmdi):
        unit_conversion_factor = 1

        data_dict = self.collect_data()

        results_dict, formatted_results = self.get_nested_lmdi(
            level_of_aggregation=self.level_of_aggregation,
            breakout=breakout,
            save_breakout=save_breakout,
            calculate_lmdi=calculate_lmdi,
            raw_data=data_dict,
            account_for_weather=True)

        return results