Beispiel #1
0
    def process_shape(self, active_dates_index=None, time_slice_elements=None):
        self.num_active_years = len(active_dates_index)/8766.
        if active_dates_index is not None:
            self.active_dates_index = active_dates_index

        if active_dates_index is None:
            raise ValueError('processing a shape requires an active date index')

        self.time_slice_elements = Shapes.create_time_slice_elements(active_dates_index) if time_slice_elements is None else time_slice_elements
        
        if self.shape_type=='weather date':
            self.values = util.reindex_df_level_with_new_elements(self.raw_values, 'weather_datetime', active_dates_index) # this step is slow, consider replacing
            if self.values.isnull().values.any():
                raise ValueError('Weather data did not give full coverage of the active dates')

        elif self.shape_type=='time slice':
            self.values = self.create_empty_shape_data()
            
            non_time_elements_in_levels = [list(util.get_elements_from_level(self.values, e)) for e in self._non_time_keys]
            time_elements_in_levels = [list(util.get_elements_from_level(self.values, e)) for e in self._active_time_keys]
            
            for ind, value in self.raw_values.iterrows():
                non_time_portion = [ind[self._non_time_dict[e]] for e in self._non_time_keys]
                time_portion = [ind[self._active_time_dict[e]] for e in self._active_time_keys]
                if not np.all([s in l for s, l in zip(non_time_portion+time_portion, non_time_elements_in_levels+time_elements_in_levels)]):
                    continue
                
                indexer = tuple(non_time_portion + time_portion + [slice(None)])
                                
                if self.shape_unit_type=='energy':
                    len_slice = len(self.values.loc[indexer])
                    self.values.loc[indexer] = value[0]/float(len_slice)*self.num_active_years
                elif self.shape_unit_type=='power':
                    self.values.loc[indexer] = value[0]
            
            if self.values.isnull().values.any():
                raise ValueError('Shape time slice data did not give full coverage of the active dates')
            # reindex to remove the helper columns
            self.values.index = self.values.index.droplevel(self._active_time_keys)
        
        self.values = self.values.swaplevel('weather_datetime', -1).sort_index()
        self.geomap_to_time_zone()
        self.localize_shapes()
        self.standardize_time_across_timezones()
        self.geomap_to_primary_geography()
        self.sum_over_time_zone()
        self.normalize()
        self.add_timeshift_type()
Beispiel #2
0
 def _add_missing_geographies(self, df, current_geography, current_data_type):
     current_number_of_geographies = len(util.get_elements_from_level(df, current_geography))
     propper_number_of_geographies = len(cfg.geo.geographies_unfiltered[current_geography])
     if current_data_type == 'total' and current_number_of_geographies != propper_number_of_geographies:
         # we only want to do it when we have a total, otherwise we can't just fill with zero
         df = util.reindex_df_level_with_new_elements(df, current_geography, cfg.geo.geographies_unfiltered[current_geography], fill_value=np.nan)
     return df
Beispiel #3
0
    def produce_flexible_load(shape_df, percent_flexible=None, hr_delay=None, hr_advance=None):
        percent_flexible = 0 if percent_flexible is None else percent_flexible
        hr_delay = 0 if hr_delay is None else hr_delay
        hr_advance = 0 if hr_advance is None else hr_advance
        
        if percent_flexible==0 or (hr_delay==0 and hr_advance==0):
            return util.df_slice(shape_df, elements=2, levels='timeshift_type')
                
        timeshift_levels = list(util.get_elements_from_level(shape_df, 'timeshift_type'))
        timeshift_levels.sort()
        if timeshift_levels==[1, 2, 3]:
            delay = util.df_slice(shape_df, elements=1, levels='timeshift_type')
            native = util.df_slice(shape_df, elements=2, levels='timeshift_type')
            advance = util.df_slice(shape_df, elements=3, levels='timeshift_type')
        elif timeshift_levels==[2]:
            # TODO this could be a lambda function
            def shift(df, hr):
                """ positive hours is a shift forward, negative hours a shift back"""
                return df.shift(hr).bfill().ffill()
                
            non_weather = [n for n in shape_df.index.names if n!='weather_datetime']
            delay = shape_df.groupby(level=non_weather).apply(shift, hr=hr_delay)
            native = shape_df
            advance = shape_df.groupby(level=non_weather).apply(shift, hr=-hr_advance)
        else:
            raise ValueError("elements in the level timeshift_type are not recognized")

        return pd.concat([delay*percent_flexible + native*(1-percent_flexible),
                          native,
                          advance*percent_flexible + native*(1-percent_flexible)], keys=[1,2,3], names=['timeshift_type'])
Beispiel #4
0
    def process_shape(self):
        logging.info('    shape: ' + self.name)
        self.num_active_years = num_active_years(self.active_dates_index)
        
        if self.shape_type=='weather date':
            self.values = util.reindex_df_level_with_new_elements(self.raw_values, 'weather_datetime', self.active_dates_index)
            self.values  = self.values.replace(np.nan,0)# this step is slow, consider replacing
            if self.values.isnull().values.any():
                raise ValueError('Weather data for shape {} did not give full coverage of the active dates'.format(self.name))

        elif self.shape_type=='time slice':
            self.values = self.create_empty_shape_data()
            
            non_time_elements_in_levels = [list(util.get_elements_from_level(self.values, e)) for e in self._non_time_keys]
            time_elements_in_levels = [list(util.get_elements_from_level(self.values, e)) for e in self._active_time_keys]
            
            for ind, value in self.raw_values.iterrows():
                non_time_portion = [ind[self._non_time_dict[e]] for e in self._non_time_keys]
                time_portion = [ind[self._active_time_dict[e]] for e in self._active_time_keys]
                if not np.all([s in l for s, l in zip(non_time_portion+time_portion, non_time_elements_in_levels+time_elements_in_levels)]):
                    continue
                
                indexer = tuple(non_time_portion + time_portion + [slice(None)])
                                
                if self.shape_unit_type=='energy':
                    len_slice = len(self.values.loc[indexer])
                    self.values.loc[indexer] = value[0]/float(len_slice)*self.num_active_years
                elif self.shape_unit_type=='power':
                    self.values.loc[indexer] = value[0]
            
            if self.values.isnull().values.any():
                raise ValueError('Shape time slice data did not give full coverage of the active dates')
            # reindex to remove the helper columns
            self.values.index = self.values.index.droplevel(self._active_time_keys)

        self.values = cfg.geo.filter_extra_geos_from_df(self.values.swaplevel('weather_datetime', -1).sort())
        self.geomap_to_time_zone()
        self.localize_shapes()
        self.standardize_time_across_timezones()
        self.geomap_to_primary_geography()
        self.sum_over_time_zone()
        self.normalize()
        self.add_timeshift_type()
        # raw values can be very large, so we delete it in this one case
        del self.raw_values
Beispiel #5
0
    def produce_flexible_load(shape_df,
                              percent_flexible=None,
                              hr_delay=None,
                              hr_advance=None):
        percent_flexible = 0 if percent_flexible is None else percent_flexible
        hr_delay = 0 if hr_delay is None else hr_delay
        hr_advance = 0 if hr_advance is None else hr_advance

        if percent_flexible == 0 or (hr_delay == 0 and hr_advance == 0):
            return util.df_slice(shape_df, elements=2, levels='timeshift_type')

        timeshift_levels = list(
            util.get_elements_from_level(shape_df, 'timeshift_type'))
        timeshift_levels.sort()
        if timeshift_levels == [1, 2, 3]:
            delay = util.df_slice(shape_df,
                                  elements=1,
                                  levels='timeshift_type')
            native = util.df_slice(shape_df,
                                   elements=2,
                                   levels='timeshift_type')
            advance = util.df_slice(shape_df,
                                    elements=3,
                                    levels='timeshift_type')
        elif timeshift_levels == [2]:
            # TODO this could be a lambda function
            def shift(df, hr):
                """ positive hours is a shift forward, negative hours a shift back"""
                return df.shift(hr).bfill().ffill()

            non_weather = [
                n for n in shape_df.index.names if n != 'weather_datetime'
            ]
            delay = shape_df.groupby(level=non_weather).apply(shift,
                                                              hr=hr_delay)
            native = shape_df
            advance = shape_df.groupby(level=non_weather).apply(shift,
                                                                hr=-hr_advance)
        else:
            raise ValueError(
                "elements in the level timeshift_type are not recognized")

        return pd.concat([
            delay * percent_flexible + native * (1 - percent_flexible), native,
            advance * percent_flexible + native * (1 - percent_flexible)
        ],
                         keys=[1, 2, 3],
                         names=['timeshift_type'])
Beispiel #6
0
    def produce_flexible_load(shape_df, percent_flexible=None, hr_delay=None, hr_advance=None):
        hr_delay = 0 if hr_delay is None else hr_delay
        hr_advance = 0 if hr_advance is None else hr_advance
        
        native_slice = shape_df.xs(2, level='timeshift_type')
        native_slice_stacked = pd.concat([native_slice]*3, keys=[1,2,3], names=['timeshift_type'])

        pflex_stacked = pd.concat([percent_flexible]*3, keys=[1,2,3], names=['timeshift_type'])

        timeshift_levels = sorted(list(util.get_elements_from_level(shape_df, 'timeshift_type')))
        if timeshift_levels==[1, 2, 3]:
            # here, we have flexible load profiles already specified by the user
            names = shape_df.index.names
            full_load = shape_df.squeeze().unstack('timeshift_type')
            group_by_names = [n for n in full_load.index.names if n != 'weather_datetime']
            full_load = full_load.groupby(level=group_by_names).apply(Shape.ensure_feasible_flexible_load)
            full_load = full_load.stack('timeshift_type').reorder_levels(names).sort_index().to_frame()
            full_load.columns = ['value']
        elif timeshift_levels==[2]:
            non_weather = [n for n in native_slice.index.names if n!='weather_datetime']

            # positive hours is a shift forward, negative hours a shift back
            shift = lambda df, hr: df.shift(hr).ffill().fillna(value=0)
            delay_load = native_slice.groupby(level=non_weather).apply(shift, hr=hr_delay)

            def advance_load_function(df, hr):
                df_adv = df.shift(-hr).ffill().fillna(value=0)
                df_adv.iloc[0] += df.iloc[:hr].sum().sum()
                return df_adv
            advance_load = native_slice.groupby(level=non_weather).apply(advance_load_function, hr=hr_advance)
            
            full_load = pd.concat([delay_load, native_slice, advance_load], keys=[1,2,3], names=['timeshift_type'])
        else:
            raise ValueError("elements in the level timeshift_type are not recognized")
        
        return util.DfOper.add((util.DfOper.mult((full_load, pflex_stacked), collapsible=False),
                                util.DfOper.mult((native_slice_stacked, 1-pflex_stacked), collapsible=False)))
Beispiel #7
0
    def process_shape(self, active_dates_index=None, time_slice_elements=None):
        self.num_active_years = len(active_dates_index) / 8766.
        if active_dates_index is not None:
            self.active_dates_index = active_dates_index

        if active_dates_index is None:
            raise ValueError(
                'processing a shape requires an active date index')

        self.time_slice_elements = Shapes.create_time_slice_elements(
            active_dates_index
        ) if time_slice_elements is None else time_slice_elements

        if self.shape_type == 'weather date':
            self.values = util.reindex_df_level_with_new_elements(
                self.raw_values, 'weather_datetime',
                active_dates_index)  # this step is slow, consider replacing
            if self.values.isnull().values.any():
                raise ValueError(
                    'Weather data did not give full coverage of the active dates'
                )

        elif self.shape_type == 'time slice':
            self.values = self.create_empty_shape_data()

            non_time_elements_in_levels = [
                list(util.get_elements_from_level(self.values, e))
                for e in self._non_time_keys
            ]
            time_elements_in_levels = [
                list(util.get_elements_from_level(self.values, e))
                for e in self._active_time_keys
            ]

            for ind, value in self.raw_values.iterrows():
                non_time_portion = [
                    ind[self._non_time_dict[e]] for e in self._non_time_keys
                ]
                time_portion = [
                    ind[self._active_time_dict[e]]
                    for e in self._active_time_keys
                ]
                if not np.all([
                        s in l for s, l in zip(
                            non_time_portion +
                            time_portion, non_time_elements_in_levels +
                            time_elements_in_levels)
                ]):
                    continue

                indexer = tuple(non_time_portion + time_portion +
                                [slice(None)])

                if self.shape_unit_type == 'energy':
                    len_slice = len(self.values.loc[indexer])
                    self.values.loc[indexer] = value[0] / float(
                        len_slice) * self.num_active_years
                elif self.shape_unit_type == 'power':
                    self.values.loc[indexer] = value[0]

            if self.values.isnull().values.any():
                raise ValueError(
                    'Shape time slice data did not give full coverage of the active dates'
                )
            # reindex to remove the helper columns
            self.values.index = self.values.index.droplevel(
                self._active_time_keys)

        self.values = self.values.swaplevel('weather_datetime',
                                            -1).sort_index()
        self.geomap_to_time_zone()
        self.localize_shapes()
        self.standardize_time_across_timezones()
        self.geomap_to_primary_geography()
        self.sum_over_time_zone()
        self.normalize()
        self.add_timeshift_type()