Example #1
0
    def clean_timeseries(self,
                         attr='values',
                         inplace=True,
                         time_index_name='year',
                         time_index=None,
                         lower=0,
                         upper=None,
                         interpolation_method='missing',
                         extrapolation_method='missing'):
        if time_index is None:
            time_index = cfg.years
        interpolation_method = self.interpolation_method if interpolation_method is 'missing' else interpolation_method
        extrapolation_method = self.extrapolation_method if extrapolation_method is 'missing' else extrapolation_method
        exp_growth_rate = self.extrapolation_growth if hasattr(
            self, 'extrapolation_growth') else None

        data = getattr(self, attr)
        clean_data = TimeSeries.clean(
            data=data,
            newindex=time_index,
            time_index_name=time_index_name,
            interpolation_method=interpolation_method,
            extrapolation_method=extrapolation_method,
            exp_growth_rate=exp_growth_rate).clip(lower=lower, upper=upper)
        if inplace:
            setattr(self, attr, clean_data)
        else:
            return clean_data
 def run_all_cleaning_methods(self, x, y, newindex):
     for method in self.methods:
         data = pd.DataFrame(y, index=x)
         newdata = TimeSeries.clean(data,
                                    newindex=newindex,
                                    interpolation_method=(None if method=='decay_towards_linear_regression' else method),  # not supported for linear regression
                                    extrapolation_method=method)
    def run_all_cleaning_methods(self, x, y, newindex):
        for method in self.methods:
            print method
            print x, y
            data = pd.DataFrame(y, index=x)
            newdata = TimeSeries.clean(data, newindex=newindex, interpolation_method=method)

            plt.plot(newdata.index, newdata[0])
            plt.plot(x, y, '.')
 def run_all_cleaning_methods(self, x, y, newindex):
     for method in self.methods:
         data = pd.DataFrame(y, index=x)
         newdata = TimeSeries.clean(
             data,
             newindex=newindex,
             interpolation_method=(
                 None if method == 'decay_towards_linear_regression' else
                 method),  # not supported for linear regression
             extrapolation_method=method)
    def run_all_cleaning_methods(self, x, y, newindex):
        for method in self.methods:
            print method
            print x, y
            data = pd.DataFrame(y, index=x)
            newdata = TimeSeries.clean(data,
                                       newindex=newindex,
                                       interpolation_method=method)

            plt.plot(newdata.index, newdata[0])
            plt.plot(x, y, '.')
Example #6
0
    def _update_dataframe_totals_after_foreign_gau(self, df, current_geography, foreign_geography, impacted_gaus, foreign_gau, map_key, zero_out_negatives):
        y_or_v = GeoMapper._get_df_time_index_name(df)
        # we first need to do a clean time series
        # then we need to allocate out and subtract
        indexer = util.level_specific_indexer(df, current_geography, [impacted_gaus])
        impacted_gaus_slice = df.loc[indexer, :].reset_index().set_index(df.index.names)

        foreign_gau_slice = util.df_slice(df, foreign_gau, current_geography, drop_level=False, reset_index=True)
        foreign_gau_slice.index = foreign_gau_slice.index.rename(foreign_geography, level=current_geography)

        # do the allocation, take the ratio of foreign to native, do a clean timeseries, then reconstitute the foreign gau data over all years
        allocation = self.map_df(foreign_geography, current_geography, map_key=map_key, primary_subset_id=[foreign_gau])
        allocated_foreign_gau_slice = util.DfOper.mult((foreign_gau_slice, allocation), fill_value=np.nan)
        allocated_foreign_gau_slice = allocated_foreign_gau_slice.reorder_levels([-1]+range(df.index.nlevels))
        ratio_allocated_to_impacted = util.DfOper.divi((allocated_foreign_gau_slice, impacted_gaus_slice), fill_value=np.nan, non_expandable_levels=[])
        ratio_allocated_to_impacted.iloc[np.nonzero(impacted_gaus_slice.values==0)] = 0
        clean_ratio = TimeSeries.clean(data=ratio_allocated_to_impacted, time_index_name=y_or_v, interpolation_method='linear_interpolation', extrapolation_method='nearest')
        allocated_foreign_gau_slice_all_years = util.DfOper.mult((clean_ratio, impacted_gaus_slice), fill_value=np.nan, non_expandable_levels=[])
        allocated_foreign_gau_slice_new_geo = util.remove_df_levels(allocated_foreign_gau_slice_all_years, foreign_geography)
        allocated_foreign_gau_slice_foreign_geo = util.remove_df_levels(allocated_foreign_gau_slice_all_years, current_geography)
        allocated_foreign_gau_slice_foreign_geo.index = allocated_foreign_gau_slice_foreign_geo.index.rename(current_geography, level=foreign_geography)

        # update foreign GAUs after clean timeseries
        allocated_gau_years = list(allocated_foreign_gau_slice_foreign_geo.index.get_level_values(y_or_v).values)
        allocated_foreign_gau_slice_foreign_geo = allocated_foreign_gau_slice_foreign_geo.reorder_levels(df.index.names).sort()
        indexer = util.level_specific_indexer(allocated_foreign_gau_slice_foreign_geo, [current_geography, y_or_v], [foreign_gau, allocated_gau_years])
            
        df.loc[indexer, :] = allocated_foreign_gau_slice_foreign_geo.loc[indexer, :]

        new_impacted_gaus = util.DfOper.subt((impacted_gaus_slice, allocated_foreign_gau_slice_new_geo), fill_value=np.nan, non_expandable_levels=[])
        new_impacted_gaus = new_impacted_gaus.reorder_levels(df.index.names).sort()
        if new_impacted_gaus.min().min() < 0:
            if not zero_out_negatives:
                raise ValueError(
                    'Negative values resulted from subtracting the foreign gau from the base gaus. This is the resulting dataframe: {}'.format(new_impacted_gaus))
            else:
                new_impacted_gaus[new_impacted_gaus < 0] = 0
        if new_impacted_gaus.isnull().all().value:
            pdb.set_trace()
            raise ValueError('Year or vitages did not overlap between the foreign gaus and impacted gaus')

        # update native GAUs after netting out foreign gaus
        impacted_gau_years = list(impacted_gaus_slice.index.get_level_values(y_or_v).values)
        indexer = util.level_specific_indexer(df, [current_geography, y_or_v], [impacted_gaus, impacted_gau_years])
        df.loc[indexer, :] = new_impacted_gaus.loc[indexer, :]

        return df
            newdata = TimeSeries.clean(data,
                                       newindex=newindex,
                                       interpolation_method=method)

            plt.plot(newdata.index, newdata[0])
            plt.plot(x, y, '.')


#newindex = np.arange(2015, 2025)

newindex = np.arange(2012, 2017)
x = np.array([2015, 2018, 2020])
y = np.array([.8, .7, .4])
data = pd.DataFrame(y, index=x)
newdata = TimeSeries.clean(data,
                           newindex=newindex,
                           interpolation_method='linear_interpolation',
                           extrapolation_method='nearest')
#
#
#newindex = np.arange(2020, 2025)
#multi_data = pd.concat([data]*3, keys=['a', 'b', 'c'], names=['dummy', 'year'])
#newdata2 = TimeSeries.clean(multi_data, time_index_name='year', newindex=newindex, interpolation_method='linear_interpolation', extrapolation_method='nearest')

newindex = np.arange(2015, 2050)
multi_data = pd.concat([data] * 3,
                       keys=['a', 'b', 'c'],
                       names=['dummy', 'year'])
newdata2 = TimeSeries.clean(multi_data,
                            time_index_name='year',
                            newindex=newindex,
                            interpolation_method='nearest',
                                                       target=rio_emissions_fe,
                                                       earliest_year=2040)
    ep_emissions = pd.concat([
        ep_emissions_fe.reset_index(),
        ep_emissions[ep_emissions.index.get_level_values('FINAL_ENERGY') !=
                     ep_fe].reset_index()
    ]).set_index(ep_emissions.index.names).sort_index()

print '\n scaling for all emissions'
ep_emissions = ep_emissions.groupby(
    level=['TIMESTAMP', 'SCENARIO', 'YEAR']).apply(scale,
                                                   target=remove_df_levels(
                                                       rio_emissions,
                                                       'product fuel'),
                                                   earliest_year=2020)
# interpolate between all years
print 'interpolating emissions'
ep_emissions = reindex_df_level_with_new_elements(
    ep_emissions, 'YEAR', rio_emissions_years).fillna(0)
ep_emissions = ep_emissions.reset_index().set_index(
    ep_emissions.index.names).sort_index()
ep_emissions = TimeSeries.clean(ep_emissions,
                                newindex=years,
                                time_index_name='YEAR',
                                interpolation_method='linear_interpolation')

print 'saving scaled emissions'
ep_emissions.to_csv(
    r"D:\Dropbox (EER)\Evolved Energy Research\Projects & Marketing\Princeton University\Ryan's output template\combined_outputs\c_emissions_scaled.csv"
)
    def run_all_cleaning_methods(self, x, y, newindex):
        for method in self.methods:
            data = pd.DataFrame(y, index=x)
            newdata = TimeSeries.clean(data,
                                       newindex=newindex,
                                       interpolation_method=(None if method=='decay_towards_linear_regression' else method),  # not supported for linear regression
                                       extrapolation_method=method)


#newindex = np.arange(2015, 2025)

newindex = np.arange(2012, 2017)
x = np.array([2015, 2018, 2020])
y = np.array([.8, .7, .4])
data = pd.DataFrame(y, index=x)
newdata = TimeSeries.clean(data, newindex=newindex, interpolation_method='linear_interpolation', extrapolation_method='nearest')
#
#
#newindex = np.arange(2020, 2025)
#multi_data = pd.concat([data]*3, keys=['a', 'b', 'c'], names=['dummy', 'year'])
#newdata2 = TimeSeries.clean(multi_data, time_index_name='year', newindex=newindex, interpolation_method='linear_interpolation', extrapolation_method='nearest')


newindex = np.arange(2015, 2050)
multi_data = pd.concat([data]*3, keys=['a', 'b', 'c'], names=['dummy', 'year'])
newdata2 = TimeSeries.clean(multi_data, time_index_name='year', newindex=newindex, interpolation_method='nearest', extrapolation_method='exponential')


#raw_values = pd.read_csv('raw_values_example_for_clean_timeseries.csv')
#raw_values.set_index(['us', 'efficiency_type', 'supply_node', 'year'], inplace=True)
#raw_values.sort_index(inplace=True)
Example #10
0
    def _update_dataframe_totals_after_foreign_gau(self, df, current_geography,
                                                   foreign_geography,
                                                   impacted_gaus, foreign_gau,
                                                   map_key,
                                                   zero_out_negatives):
        y_or_v = GeoMapper._get_df_time_index_name(df)
        # we first need to do a clean time series
        # then we need to allocate out and subtract
        indexer = util.level_specific_indexer(df, current_geography,
                                              [impacted_gaus])
        impacted_gaus_slice = df.loc[indexer, :].reset_index().set_index(
            df.index.names)

        foreign_gau_slice = util.df_slice(df,
                                          foreign_gau,
                                          current_geography,
                                          drop_level=False,
                                          reset_index=True)
        foreign_gau_slice.index = foreign_gau_slice.index.rename(
            foreign_geography, level=current_geography)

        # do the allocation, take the ratio of foreign to native, do a clean timeseries, then reconstitute the foreign gau data over all years
        allocation = self.map_df(foreign_geography,
                                 current_geography,
                                 map_key=map_key,
                                 primary_subset_id=[foreign_gau])
        allocated_foreign_gau_slice = util.DfOper.mult(
            (foreign_gau_slice, allocation), fill_value=np.nan)
        allocated_foreign_gau_slice = allocated_foreign_gau_slice.reorder_levels(
            [-1] + range(df.index.nlevels))
        ratio_allocated_to_impacted = util.DfOper.divi(
            (allocated_foreign_gau_slice, impacted_gaus_slice),
            fill_value=np.nan,
            non_expandable_levels=[])
        clean_ratio = TimeSeries.clean(
            data=ratio_allocated_to_impacted,
            time_index_name=y_or_v,
            interpolation_method='linear_interpolation',
            extrapolation_method='nearest')
        allocated_foreign_gau_slice_all_years = util.DfOper.mult(
            (clean_ratio, impacted_gaus_slice),
            fill_value=np.nan,
            non_expandable_levels=[])

        allocated_foreign_gau_slice_new_geo = util.remove_df_levels(
            allocated_foreign_gau_slice_all_years, foreign_geography)
        allocated_foreign_gau_slice_foreign_geo = util.remove_df_levels(
            allocated_foreign_gau_slice_all_years, current_geography)
        allocated_foreign_gau_slice_foreign_geo.index = allocated_foreign_gau_slice_foreign_geo.index.rename(
            current_geography, level=foreign_geography)

        # update foreign GAUs after clean timeseries
        allocated_gau_years = list(
            allocated_foreign_gau_slice_foreign_geo.index.get_level_values(
                y_or_v).values)
        indexer = util.level_specific_indexer(
            allocated_foreign_gau_slice_foreign_geo,
            [current_geography, y_or_v], [foreign_gau, allocated_gau_years])
        try:
            df.loc[indexer, :] = allocated_foreign_gau_slice_foreign_geo.loc[
                indexer, :]
        except:
            pdb.set_trace()

        new_impacted_gaus = util.DfOper.subt(
            (impacted_gaus_slice, allocated_foreign_gau_slice_new_geo),
            fill_value=np.nan,
            non_expandable_levels=[])
        new_impacted_gaus = new_impacted_gaus.reorder_levels(
            df.index.names).sort()
        if new_impacted_gaus.min().min() < 0:
            if not zero_out_negatives:
                raise ValueError(
                    'Negative values resulted from subtracting the foreign gau from the base gaus. This is the resulting dataframe: {}'
                    .format(new_impacted_gaus))
            else:
                new_impacted_gaus[new_impacted_gaus < 0] = 0
        if new_impacted_gaus.isnull().all().value:
            pdb.set_trace()
            raise ValueError(
                'Year or vitages did not overlap between the foreign gaus and impacted gaus'
            )

        # update native GAUs after netting out foreign gaus
        impacted_gau_years = list(
            impacted_gaus_slice.index.get_level_values(y_or_v).values)
        indexer = util.level_specific_indexer(
            df, [current_geography, y_or_v],
            [impacted_gaus, impacted_gau_years])
        df.loc[indexer, :] = new_impacted_gaus.loc[indexer, :]

        return df