Python df_utils 예제들, EnergyIntensityIndicators.utilities.dataframe_utilities.df_utils Python 예제들

예제 #1

0

파일 보기

    def get_fuel_mix(region_data):
        """Calculate shares of total fuel by fuel type

        Args:
            region_data (DataFrame): Fuel use data by fuel for a region

        Returns:
            fuel_mix (DataFrame): Fuel mix (i.e. share of total by fuel)
        """
        region_data = region_data.drop('Census Region',
                                       axis=1,
                                       errors='ignore')
        region_data = df_utils().create_total_column(region_data,
                                                     total_label='total')
        fuel_mix = \
            df_utils().calculate_shares(region_data, total_label='total')
        return fuel_mix

예제 #2

0

파일 보기

파일: weather_factors.py 프로젝트: NREL/EnergyIntensityIndicators

    def national_method2_regression_models(self, seds_data, weather_factors):
        """Second regression model"""
        seds_data, weather_factors = df_utils().ensure_same_indices(seds_data, weather_factors)
        
        weather_adjusted_consumption = seds_data.drop('National', axis=1).multiply(weather_factors.values)
        weather_adjusted_consumption['National'] = weather_adjusted_consumption.sum(axis=1)

        implicit_national_weather_factor = seds_data[['National']].divide(weather_adjusted_consumption['National'].values.reshape(len(weather_adjusted_consumption), 1))
        return implicit_national_weather_factor

예제 #3

0

파일 보기

파일: weather_factors.py 프로젝트: NREL/EnergyIntensityIndicators

 def heating_cooling_degree_days(type_day):
     regions = ['ENC', 'ESC', 'MATL', 'MTN', 'NENGL', 'PCF', 'SATL', 'WNC', 'WSC', 'USA']
     regions_abbrev_dict = {'ENC': 'east_north_central', 'ESC': 'east_south_central', 'MATL': 'middle_atlantic',
                            'MTN': 'mountain', 'NENGL': 'new_england', 'PCF': 'pacific', 'SATL': 'south_atlantic',
                            'WNC': 'west_north_central', 'WSC': 'west_south_central', 'USA': 'National'}
     dd_data = []
     for region in regions: 
         if self.sector == 'residential':
             standard_id = f'AEO.2020.AEO2019REF.KEI_{t}_RESD_NA_NA_NA_{region}_{type_day}.A'
         elif self.sector == 'commercial':
             standard_id = f'AEO.2020.AEO2019REF.KEI_NA_COMM_NA_NA_NA_{region}_{type_day}.A'
         r_df = self.eia_data.eia_api(id_=standard_id, id_type='series')
         dd_data.append(r_df)
     data_df = df_utils().merge_df_list(dd_data)
     return data_df

예제 #4

0

파일 보기

    def decomposition(self, ASI):
        """Format component data, collect overall effect, return aggregated 
        dataframe of the results for the additive LMDI model.
        """
        # ASI.pop('lower_level_structure', None)
        ASI_df = df_utils().merge_df_list(list(ASI.values()))

        df = self.calculate_effect(ASI_df)
        df = df.reset_index()
        if 'Year' not in df.columns:
            df = df.rename(columns={'index': 'Year'})

        aggregated_df = self.aggregate_additive(df, self.base_year)
        aggregated_df["@filter|Measure|BaseYear"] = self.base_year

        return aggregated_df

예제 #5

0

파일 보기

파일: testing_utilties.py 프로젝트: NREL/EnergyIntensityIndicators

    def pct_diff(self, pnnl_data, eii_data):
        if pnnl_data.empty or eii_data.empty:
            return False
        elif pnnl_data.empty and eii_data.empty:
            return True
        else:
            pnnl_data, eii_data = df_utils()\
                .ensure_same_indices(pnnl_data, eii_data)

            diff_df = pnnl_data.subtract(eii_data)
            diff_df_abs = np.absolute(diff_df)
            pct_diff = np.absolute(diff_df_abs.divide(pnnl_data))
            compare_df = pct_diff.fillna(0)\
                .apply(lambda col: col <= self.acceptable_pct_difference,
                       axis=1)
            return compare_df.all(axis=None)

예제 #6

0

파일 보기

파일: weather_factors.py 프로젝트: NREL/EnergyIntensityIndicators

    def estimate_regional_shares(self):
        """Spreadsheet equivalent: Commercial --> 'Regional Shares' 
        assumed commercial floorspace in each region follows same trends as population or housing units"""
        regions = ['Northeast', 'Midwest', 'South', 'West']
        try:
            cbecs_data = pd.read_csv('./EnergyIntensityIndicators/Data/cbecs_data_millionsf.csv').set_index('Year')
        except FileNotFoundError:
            cbecs_data = pd.read_csv('./Data/cbecs_data_millionsf.csv').set_index('Year')

        cbecs_data.index = cbecs_data.index.astype(str)
        cbecs_years = list(cbecs_data.index)
        cbecs_data = cbecs_data.rename(columns={'Midwest ': 'Midwest', ' South': 'South', ' West': 'West'})

        cbecs_data.loc['1979', regions] = cbecs_data.loc['1983', regions].subtract([826, 972, 2665, 1212])
        cbecs_data.loc['1979', ['U.S.']] = sum(cbecs_data.loc['1979', regions].values)

        cbecs_data['U.S. (calc)'] = cbecs_data.sum(axis=1)

        comm_regional_shares = cbecs_data.drop(['U.S.', 'U.S. (calc)'], axis=1).divide(cbecs_data['U.S. (calc)'].values.reshape(len(cbecs_data), 1))
        comm_regional_shares_ln = np.log(comm_regional_shares)

        residential_data = ResidentialFloorspace(end_year=self.end_year)  # change to pull from residential().activity()
        final_results_total_floorspace_regions, regional_estimates_all, avg_size_all_regions = residential_data.final_floorspace_estimates()
        
        regional_dfs = [regional_estimates_all[r][['Total']].rename(columns={'Total': r}) for r in regions]
        residential_housing_units = df_utils().merge_df_list(regional_dfs)
        residential_housing_units['U.S.'] = residential_housing_units.sum(axis=1)
        residential_housing_units.index = residential_housing_units.index.astype(str)
        regional_shares_residential_housing_units = residential_housing_units.drop('U.S.', axis=1).divide(residential_housing_units['U.S.'].values.reshape(len(residential_housing_units), 1))
        regional_shares_residential_housing_units_ln = np.log(regional_shares_residential_housing_units)

        regional_shares_residential_housing_units_cbecs_years = regional_shares_residential_housing_units.loc[cbecs_years, :]
        regional_shares_residential_housing_units_cbecs_years_ln = np.log(regional_shares_residential_housing_units_cbecs_years)
        
        predictions_df = pd.DataFrame(columns=comm_regional_shares.columns, index=residential_housing_units.index)
        for region in comm_regional_shares.columns:
            x_values = comm_regional_shares_ln[region].values
            X = x_values.transpose()
            y = regional_shares_residential_housing_units_cbecs_years_ln[region].values

            p = np.polyfit(X, y, 1)
            predictions_df[region] = np.exp(regional_shares_residential_housing_units_ln[region].multiply(p[0]).add(p[1]))

        predictions_df['Predicted Sum'] = predictions_df.sum(axis=1)
        normalized_shares = predictions_df.drop('Predicted Sum', axis=1).divide(predictions_df['Predicted Sum'].values.reshape(len(predictions_df), 1))
        return normalized_shares

예제 #7

0

파일 보기

파일: emissions_factors_exploration.py 프로젝트: NREL/EnergyIntensityIndicators

 def get_emissions_factors_plots(self):
     """Collect CO2 Emissions and Energy data by sector, 
     calculate Emissions factors (CO2/Energy) and plot the results
     """
     emissions = self.all_fuels_data()
     energy = self.economy_wide()
     sectors = [
         'commercial', 'industrial', 'residential', 'transportation',
         'electric_power'
     ]
     emissions_factors = dict()
     for s in sectors:
         em = emissions[f'{s}_co2']
         en = energy[f'{s}_energy']
         em, en = df_utils().ensure_same_indices(em, en)
         factor = em.divide(en.values, axis=1)
         factor = factor.rename(
             columns={
                 'CO2 Emissions': 'Million Metric Tons per Trillion Btu'
             })
         emissions_factors[s] = factor
     self.lineplot(emissions_factors,
                   y_label='Million Metric Tons CO2 per Trillion Btu')

예제 #8

0

파일 보기

파일: asm_price_fit.py 프로젝트: NREL/EnergyIntensityIndicators

    def main(self, latest_year, fuel_type, naics, asm_col_map):
        n_dfs = []

        for n in naics:              
            mecs_data = self.import_mecs_historical(fuel_type, n)

            self.check_recent_mecs(latest_year=latest_year, 
                                   last_historical_year=max(mecs_data.index))

            asm_data = self.import_asm_historical(fuel_type, n, asm_col_map)

            price_df = asm_data.merge(mecs_data, how='outer', left_index=True,
                                      right_index=True)
            start_params = [0.646744966, 0.411641841]
            # try:
            fit_coeffs = self.calc_predicted_coeffs(price_df[['asm_price']],
                                                    price_df[['MECS_price']],
                                                    start_params)

            predicted = self.calc_predicted_prices(fit_coeffs, 
                                                   price_df[['MECS_price']],
                                                   price_df[['asm_price']])

            predicted = predicted.reshape((len(predicted), 1))
            price_df['predicted'] = predicted

            interp_resid = self.interpolate_residuals(price_df, fit_coeffs)

            calibrated_prediction = self.calc_calibrated_predicted_price(
                                                                interp_resid)
            calibrated_prediction = calibrated_prediction[['calibrated_prediction']]
            calibrated_prediction = calibrated_prediction.rename(columns={'calibrated_prediction': n})
            n_dfs.append(calibrated_prediction)

        calibrated_prediction_all = df_utils().merge_df_list(n_dfs)
        return calibrated_prediction_all

예제 #9

0

파일 보기

    def collect_weather_data(self,
                             energy_data,
                             activity_input_data,
                             weather_data,
                             total_label,
                             weather_activity,
                             sector='Residential'):
        """Collect weather factors for 'deliv' energy type (from 'elec' and
        'fuels' weather factors) for sector

        Args:
            energy_data (dict): Dictionary of dataframes of energy data
                                from the energy decomposition (keys are
                                'elec' and 'fuels')
            activity_input_data (dict): activity data for the sector
            weather_data (dict): weather factors for 'elec' and 'fuels'
            total_label (str): level total name
            weather_activity (str): Activity data to use in weather data
                                    inference (?)
            sector (str, optional): 'Residential' or 'Commercial'.
                                    Defaults to 'Residential'.

        Returns:
            weather_data (pd.DataFrame): Weather factors for 'deliv'.
        """
        energy_type = 'deliv'
        energy_input_data = \
            self.calculate_energy_data(energy_type, energy_data)
        energy_input_data = energy_input_data.drop('Energy_Type', axis=1)
        energy_data['deliv'] = energy_input_data
        if total_label not in energy_input_data.columns:
            energy_input_data = \
                df_utils().create_total_column(
                    energy_input_data, total_label)

        for a, a_df in activity_input_data.items():
            if isinstance(a_df, pd.Series):
                a_df = a_df.to_frame()
            a_df = \
                df_utils().create_total_column(
                    a_df, total_label)
            activity_input_data[a] = a_df
        setattr(self, 'energy_types', ['elec', 'fuels', 'deliv'])
        base_weather = weather_data

        if self.sector == 'Commercial':
            input_data = energy_data
            weather_data = \
                self.weather_adjustment(
                    input_data,
                    base_weather,
                    energy_type)

        elif self.sector == 'Residential':
            input_data = dict()
            for e in self.energy_types:
                type_df = energy_data[e]
                activity_df = activity_input_data[weather_activity]
                nominal_intensity = \
                    self.nominal_energy_intensity(type_df, activity_df)
                input_data[e] = nominal_intensity

            weather_data = \
                self.weather_adjustment(
                    input_data,
                    base_weather,
                    energy_type)

        setattr(self, 'energy_types', ['all'])
        return weather_data

예제 #10

0

파일 보기

    def collect_seds(self, sector, states):
        """SEDS energy consumption data (in physical units unless
        unavailable, in which case in Btu-- indicated by P or
        B in endpoint)

        Args:
            sector (str): abbreviation for the commercial
                          or residential sector ('CC' or 'RC'
                          respectively)
            states (list): States in region-- used to collect
                           SEDS API data

        Returns:
            fuels_data (pd.DataFrame): Energy consumption by fuel
                                       for region (by state) and sector
        """

        fuels = {
            'CC': [
                'All Petroleum Products', 'Coal', 'Distillate Fuel Oil',
                'Electrical System Energy Losses', 'Electricity Sales',
                'Fuel Ethanol including Denaturant',
                'Fuel Ethanol excluding Denaturant', 'Geothermal',
                'Hydrocarbon gas liquids', 'Hydroelectricity', 'Kerosene',
                'Motor Gasoline',
                'Natural Gas including Supplemental Gaseous Fuels',
                'Petroleum Coke', 'Propane', 'Residual Fuel Oil',
                'Solar Energy', 'Total (per Capita)',
                'Total Energy excluding Electrical System Energy Losses',
                'Waste', 'Wind Energy', 'Wood', 'Wood and Waste'
            ],
            'RC': [
                'All Petroleum Products',
                'Coal',
                'Distillate Fuel Oil',
                'Electrical System Energy Losses',  # in BTU
                'Electricity Sales',
                'Geothermal',
                'Hydrocarbon gas liquids',
                'Kerosene',
                'Natural Gas including Supplemental Gaseous Fuels',
                'Propane',
                'Solar Energy',
                'Total (per Capita)',
                'Total Energy excluding Electrical System Energy Losses',
                'Wood'
            ]
        }
        fuels_data = []
        for f in fuels[sector]:
            state_data = []
            for s in states:
                try:
                    df = self.eia.eia_api(id_=self.seds_endpoints(
                        sector, s, f),
                                          id_type='series',
                                          new_name=f,
                                          units_col=True)
                    state_data.append(df)
                except KeyError:
                    print(f'Endpoint failed for state {s}, sector \
                            {sector} and fuel type {f}')
                    continue

            region_data = pd.concat(state_data, axis=0)
            region_data = region_data.reset_index()
            region_data = region_data.groupby('Year').sum()
            fuels_data.append(region_data)

        fuels_data = df_utils().merge_df_list(fuels_data)
        return fuels_data

예제 #11

0

파일 보기

파일: industrial_emissions.py 프로젝트: NREL/EnergyIntensityIndicators

    def handle_noncombustion(s_data, noncombustion_data, sub_category):
        """Merge noncombustion data into the sub_category level

        Args:
            s_data (dict): categories below subcategory
            noncombustion_data (dict): Nested dictionary.
                                       Keys are subcategories,
                                       inner dictionary keys are
                                       'activity' and 'emissions'
                                       with respective dataframes
                                       as values.
            sub_category (str): Subcategory to collect

        Raises:
            KeyError: noncombustion data missing emissions or
                      activity data

        Returns:
            noncombustion_activity (pd.DataFrame): sub-subcategory data
                                                   merged into one
                                                   subcategory activity df
            noncombustion_emissions (pd.DataFrame): sub-subcategory data
                                                   merged into one
                                                   subcategory emissions df
        """
        if s_data:
            noncombustion_activity = []
            noncombustion_emissions = []

            for s in s_data['noncombustion'].keys():
                noncombustion_cat_data = noncombustion_data[s]

                e_ = noncombustion_cat_data['emissions']
                if isinstance(e_, list):
                    e_ = df_utils().merge_df_list(e_)
                e_ = e_.drop('Total', axis=1, errors='ignore')
                e_ = df_utils().create_total_column(e_, s)
                e_ = e_[[s]]
                noncombustion_emissions.append(e_)

                a_ = noncombustion_cat_data['activity']
                if isinstance(a_, list):
                    a_ = df_utils().merge_df_list(a_)
                a_ = a_.drop('Total', axis=1, errors='ignore')
                a_ = df_utils().create_total_column(a_, s)
                a_ = a_[[s]]
                noncombustion_activity.append(a_)

            noncombustion_activity = \
                df_utils().merge_df_list(noncombustion_activity)
            noncombustion_emissions = \
                df_utils().merge_df_list(noncombustion_emissions)
        else:
            if sub_category in noncombustion_data:
                noncombustion_cat_data = noncombustion_data[sub_category]
                noncombustion_emissions = noncombustion_cat_data['emissions']
                noncombustion_activity = noncombustion_cat_data['activity']
            else:
                raise KeyError(
                    'noncombustion_cat_data missing emissions or activity ' +
                    f'for subcategory {sub_category}')

        return noncombustion_activity, noncombustion_emissions