Esempio n. 1
0
    def _clean_multindex(data, time_index_name, interpolation_method=None, extrapolation_method=None, newindex=None, **kwargs):
        if time_index_name not in data.index.names:
            raise ValueError('Time_index_name must match one of the index level names if cleaning a multi-index dataframe')
        
        # TODO: duplicate values should raise an error when doing data validation
        # remove duplicates
        data = data.groupby(level=data.index.names).first()

        if newindex is None:
            exist_index = data.index.get_level_values(time_index_name)
            newindex = np.arange(min(exist_index), max(exist_index) + 1, dtype=int)
        elif not isinstance(newindex, np.ndarray):
            # We use newindex to calculate extrap_index using a method that takes an array
            newindex = np.array(newindex, dtype=int)
        
        # this is done so that we can take use data that falls outside of the newindex
        wholeindex = np.array(sorted(list(set(newindex) | set(data.index.get_level_values(time_index_name)))), dtype=int)

        # Add new levels to data for missing time indices
        # full_levels = [list(newindex) if name==time_index_name else list(level) for name, level in zip(data.index.names, data.index.levels)]
        # data = data.join(pd.DataFrame(index=pd.MultiIndex.from_product(full_levels, names=data.index.names)), how='outer').sort_index()
        data = util.reindex_df_level_with_new_elements(data, time_index_name, wholeindex)
        

        group_levels = tuple([n for n in data.index.names if n != time_index_name])
        data = data.groupby(level=group_levels).apply(TimeSeries._clean_multindex_helper,
                                                      time_index_name=time_index_name,
                                                      newindex=wholeindex,
                                                      interpolation_method=interpolation_method,
                                                      extrapolation_method=extrapolation_method,
                                                      **kwargs)
        
        data = util.reindex_df_level_with_new_elements(data, time_index_name, newindex)

        return data
Esempio n. 2
0
    def _clean_multindex(data, time_index_name, interpolation_method=None, extrapolation_method=None, newindex=None, **kwargs):
        if time_index_name not in data.index.names:
            raise ValueError('Time_index_name must match one of the index level names if cleaning a multi-index dataframe')

        if newindex is None:
            exist_index = data.index.get_level_values(time_index_name)
            newindex = np.array(sorted(set(exist_index)), dtype=int)
            # newindex = np.arange(min(exist_index), max(exist_index) + 1, dtype=int)
        elif not isinstance(newindex, np.ndarray):
            # We use newindex to calculate extrap_index using a method that takes an array
            newindex = np.array(newindex, dtype=int)
        
        # this is done so that we can take use data that falls outside of the newindex
        wholeindex = np.array(sorted(list(set(newindex) | set(data.index.get_level_values(time_index_name)))), dtype=int)

        # Add new levels to data for missing time indices
        data = util.reindex_df_level_with_new_elements(data, time_index_name, wholeindex)
        

        group_levels = tuple([n for n in data.index.names if n != time_index_name])
        data = data.groupby(level=group_levels).apply(TimeSeries._clean_multindex_helper,
                                                      time_index_name=time_index_name,
                                                      newindex=wholeindex,
                                                      interpolation_method=interpolation_method,
                                                      extrapolation_method=extrapolation_method,
                                                      **kwargs)
        
        data = util.reindex_df_level_with_new_elements(data, time_index_name, newindex)

        return data
Esempio n. 3
0
    def _clean_multindex(data,
                         time_index_name,
                         interpolation_method=None,
                         extrapolation_method=None,
                         newindex=None,
                         **kwargs):
        if time_index_name not in data.index.names:
            raise ValueError(
                'Time_index_name must match one of the index level names if cleaning a multi-index dataframe'
            )

        # TODO: duplicate values should raise an error when doing data validation
        # remove duplicates
        data = data.groupby(level=data.index.names).first()

        if newindex is None:
            exist_index = data.index.get_level_values(time_index_name)
            newindex = np.arange(min(exist_index),
                                 max(exist_index) + 1,
                                 dtype=int)
        elif not isinstance(newindex, np.ndarray):
            # We use newindex to calculate extrap_index using a method that takes an array
            newindex = np.array(newindex, dtype=int)

        # this is done so that we can take use data that falls outside of the newindex
        wholeindex = np.array(sorted(
            list(
                set(newindex)
                | set(data.index.get_level_values(time_index_name)))),
                              dtype=int)

        # Add new levels to data for missing time indices
        # full_levels = [list(newindex) if name==time_index_name else list(level) for name, level in zip(data.index.names, data.index.levels)]
        # data = data.join(pd.DataFrame(index=pd.MultiIndex.from_product(full_levels, names=data.index.names)), how='outer').sort_index()
        data = util.reindex_df_level_with_new_elements(data, time_index_name,
                                                       wholeindex)

        group_levels = tuple(
            [n for n in data.index.names if n != time_index_name])
        data = data.groupby(level=group_levels).apply(
            TimeSeries._clean_multindex_helper,
            time_index_name=time_index_name,
            newindex=wholeindex,
            interpolation_method=interpolation_method,
            extrapolation_method=extrapolation_method,
            **kwargs)

        data = util.reindex_df_level_with_new_elements(data, time_index_name,
                                                       newindex)

        return data
Esempio n. 4
0
    def _setup_and_validate(self):
        Abstract.__init__(self, self.id, primary_key='id', data_id_key='parent_id')
        if self.raw_values is None:
            self._setup_zero_constraints()
            return

        self._validate_gaus()
        self.values = self.clean_timeseries(attr='raw_values', inplace=False, time_index=cfg.supply_years, time_index_name='year', interpolation_method=self.interpolation_method, extrapolation_method=self.extrapolation_method)

        # fill in any missing combinations of geographies
        self.values = util.reindex_df_level_with_new_elements(self.values, 'geography_from', cfg.dispatch_geographies)
        self.values = util.reindex_df_level_with_new_elements(self.values, 'geography_to', cfg.dispatch_geographies)
        self.values = self.values.fillna(0)
        self.values = self.values.sort()
Esempio n. 5
0
 def _add_missing_geographies(self, df, current_geography, current_data_type):
     current_number_of_geographies = len(util.get_elements_from_level(df, current_geography))
     propper_number_of_geographies = len(cfg.geo.geographies_unfiltered[current_geography])
     if current_data_type == 'total' and current_number_of_geographies != propper_number_of_geographies:
         # we only want to do it when we have a total, otherwise we can't just fill with zero
         df = util.reindex_df_level_with_new_elements(df, current_geography, cfg.geo.geographies_unfiltered[current_geography], fill_value=np.nan)
     return df
Esempio n. 6
0
    def _clean_multindex(data, time_index_name, interpolation_method=None, extrapolation_method=None, newindex=None,
                         **kwargs):
        if time_index_name not in data.index.names:
            raise ValueError(
                'Time_index_name must match one of the index level names if cleaning a multi-index dataframe')

        # remove duplicates
        data = data.groupby(level=data.index.names).first()

        if newindex is None:
            time_index_level = data.index.names.index(time_index_name)
            exist_index = np.array(data.index.levels[time_index_level], dtype=int)
            newindex = np.arange(min(exist_index), max(exist_index) + 1, dtype=int)
        elif not isinstance(newindex, np.ndarray):
            # We use newindex to calculate extrap_index using a method that takes an array
            newindex = np.array(newindex, dtype=int)

        index2drop = list(np.setdiff1d(data.index.levels[data.index.names.index(time_index_name)], newindex))

        # Add new levels to data for missing time indices
        # full_levels = [list(newindex) if name==time_index_name else list(level) for name, level in zip(data.index.names, data.index.levels)]
        # data = data.join(pd.DataFrame(index=pd.MultiIndex.from_product(full_levels, names=data.index.names)), how='outer').sort_index()
        data = util.reindex_df_level_with_new_elements(data, time_index_name, newindex)

        group_levels = tuple([n for n in data.index.names if n != time_index_name])
        data = data.groupby(level=group_levels).apply(TimeSeries._clean_multindex_helper,
                                                      time_index_name=time_index_name,
                                                      newindex=newindex,
                                                      interpolation_method=interpolation_method,
                                                      extrapolation_method=extrapolation_method,
                                                      **kwargs)

        data.drop(index2drop, level=time_index_name, inplace=True)

        return data
Esempio n. 7
0
 def calculate(self, vintages, years):
     self.vintages = vintages
     self.years = years
     self.remap(converted_geography=GeoMapper.supply_primary_geography)
     self.values['supply_node'] = self.supply_node
     self.values.set_index('supply_node',append=True,inplace=True)
     primary_geography = GeoMapper.supply_primary_geography
     self.values = util.reindex_df_level_with_new_elements(self.values, primary_geography, GeoMapper.geography_to_gau[primary_geography],fill_value=0.0)
Esempio n. 8
0
 def _add_missing_level_elements_to_foreign_gaus(df, current_geography):
     y_or_v = GeoMapper._get_df_time_index_name(df)
     for index_name in df.index.names:
         if index_name == current_geography or index_name == y_or_v:
             continue
         needed_elements = list(set(df.index.get_level_values(index_name)))
         df = util.reindex_df_level_with_new_elements(df, index_name, needed_elements)
     df = df.fillna(0).sort()
     return df
Esempio n. 9
0
 def _add_missing_level_elements_to_foreign_gaus(df, current_geography):
     y_or_v = GeoMapper._get_df_time_index_name(df)
     for index_name in df.index.names:
         if index_name == current_geography or index_name == y_or_v:
             continue
         needed_elements = list(set(df.index.get_level_values(index_name)))
         df = util.reindex_df_level_with_new_elements(
             df, index_name, needed_elements)
     df = df.fillna(0).sort()
     return df
Esempio n. 10
0
 def standardize_time_across_timezones(self, attr='values', inplace=True):
     self.final_dates_index = pd.date_range(self.active_dates_index[0], periods=len(self.active_dates_index), freq='H', tz=self.dispatch_outputs_timezone)
     df = util.reindex_df_level_with_new_elements(getattr(self, attr).copy(), 'weather_datetime', self.final_dates_index)
     levels = [n for n in self.values.index.names if n!='weather_datetime']
     df = df.groupby(level=levels).fillna(method='bfill').fillna(method='ffill')
     
     if inplace:
         setattr(self, attr, df)
     else:
         return df
    def _setup_and_validate(self):
        if self.raw_values is None:
            self._setup_zero_constraints()
            return ""

        # self._validate_gaus()
        self.values = self.clean_timeseries(
            attr='raw_values',
            inplace=False,
            time_index=cfg.supply_years,
            time_index_name='year',
            interpolation_method=self.interpolation_method,
            extrapolation_method=self.extrapolation_method)
        # fill in any missing combinations of geographies
        self.values = util.reindex_df_level_with_new_elements(
            self.values, 'gau_from', GeoMapper.dispatch_geographies)
        self.values = util.reindex_df_level_with_new_elements(
            self.values, 'gau_to', GeoMapper.dispatch_geographies)
        self.values = self.values.fillna(0)
        self.values = self.values.sort_index()
Esempio n. 12
0
 def calculate(self, vintages, years):
     self.vintages = vintages
     self.years = years
     self.input_type = 'intensity'
     self.remap()
     self.values['supply_node'] = self.supply_node_id
     self.values.set_index('supply_node', append=True, inplace=True)
     primary_geography = cfg.primary_geography
     self.values = util.reindex_df_level_with_new_elements(
         self.values,
         primary_geography,
         cfg.geo.geographies[primary_geography],
         fill_value=0.0)
Esempio n. 13
0
    def incorporate_foreign_gaus(self, df, current_geography, data_type, map_key, keep_oth_index_over_oth_gau=False, zero_out_negatives=True):
        native_gaus, current_gaus, foreign_gaus = self.get_native_current_foreign_gaus(df, current_geography)
        # we don't have any foreign gaus
        if not foreign_gaus or not cfg.include_foreign_gaus:
            return df, current_geography

        y_or_v = GeoMapper._get_df_time_index_name(df)
            
        index_with_nans = [df.index.names[i] for i in set(np.nonzero([np.isnan(row) for row in df.index.get_values()])[1])]
        # if we have an index with nan, that typically indicates that one of the foreign gaus didn't have all the index levels
        # if this is the case, we have two options (1) ignore the foreign gau (2) get rid of the other index
        if index_with_nans and (keep_oth_index_over_oth_gau or data_type=='intensity'):
            return self.filter_foreign_gaus(df, current_geography), current_geography
        else:
            assert (y_or_v not in index_with_nans) and (current_geography not in index_with_nans)
            # we need to eliminate levels with nan before moving on
            df = util.remove_df_levels(df, index_with_nans)

        # add missing level indicies for foreign gaus, this must be done before we fill in years because we use a fill value of zero
        df = self._add_missing_level_elements_to_foreign_gaus(df, current_geography)

        # we need all the index level combinations to have all years for this to work correctly
        df_no_foreign_gaus = self.filter_foreign_gaus(df, current_geography)
        df_years = sorted(list(set(df_no_foreign_gaus.index.get_level_values(y_or_v).values)))
        df = util.reindex_df_level_with_new_elements(df, y_or_v, df_years)

        base_gaus = np.array(self.values.index.get_level_values(current_geography), dtype=int)
        for foreign_gau in foreign_gaus:
            foreign_geography = self.gau_to_geography[foreign_gau]
            index = np.nonzero(self.values.index.get_level_values(self.gau_to_geography[foreign_gau])==foreign_gau)[0]
            impacted_gaus = list(set(base_gaus[index]))
            base_gaus[index] = foreign_gau
            if any(impacted in foreign_gaus for impacted in impacted_gaus):
                raise ValueError('foreign gaus in the database cannot overlap geographically')
            
            # if the data_type is a total, we need to net out the total
            if data_type=='total':
                df = self._update_dataframe_totals_after_foreign_gau(df, current_geography, foreign_geography, impacted_gaus, foreign_gau, map_key, zero_out_negatives)
            elif data_type == 'intensity':
                logging.debug('Foreign GAUs with intensities is not yet implemented, totals will not be conserved')
        
        assert not any([any(np.isnan(row)) for row in df.index.get_values()])
        new_geography_name = self.make_new_geography_name(current_geography, list(foreign_gaus))
        df.index = df.index.rename(new_geography_name, level=current_geography)
        if new_geography_name not in self.geographies:
            self.add_new_geography(new_geography_name, base_gaus)
        # df = GeoMapper.reorder_level_names_after_incorporating_foreign_gaus(df, new_geography_name, y_or_v)
        return df, new_geography_name
Esempio n. 14
0
    def process_shape(self, active_dates_index=None, time_slice_elements=None):
        self.num_active_years = len(active_dates_index)/8766.
        if active_dates_index is not None:
            self.active_dates_index = active_dates_index

        if active_dates_index is None:
            raise ValueError('processing a shape requires an active date index')

        self.time_slice_elements = Shapes.create_time_slice_elements(active_dates_index) if time_slice_elements is None else time_slice_elements
        
        if self.shape_type=='weather date':
            self.values = util.reindex_df_level_with_new_elements(self.raw_values, 'weather_datetime', active_dates_index) # this step is slow, consider replacing
            if self.values.isnull().values.any():
                raise ValueError('Weather data did not give full coverage of the active dates')

        elif self.shape_type=='time slice':
            self.values = self.create_empty_shape_data()
            
            non_time_elements_in_levels = [list(util.get_elements_from_level(self.values, e)) for e in self._non_time_keys]
            time_elements_in_levels = [list(util.get_elements_from_level(self.values, e)) for e in self._active_time_keys]
            
            for ind, value in self.raw_values.iterrows():
                non_time_portion = [ind[self._non_time_dict[e]] for e in self._non_time_keys]
                time_portion = [ind[self._active_time_dict[e]] for e in self._active_time_keys]
                if not np.all([s in l for s, l in zip(non_time_portion+time_portion, non_time_elements_in_levels+time_elements_in_levels)]):
                    continue
                
                indexer = tuple(non_time_portion + time_portion + [slice(None)])
                                
                if self.shape_unit_type=='energy':
                    len_slice = len(self.values.loc[indexer])
                    self.values.loc[indexer] = value[0]/float(len_slice)*self.num_active_years
                elif self.shape_unit_type=='power':
                    self.values.loc[indexer] = value[0]
            
            if self.values.isnull().values.any():
                raise ValueError('Shape time slice data did not give full coverage of the active dates')
            # reindex to remove the helper columns
            self.values.index = self.values.index.droplevel(self._active_time_keys)
        
        self.values = self.values.swaplevel('weather_datetime', -1).sort_index()
        self.geomap_to_time_zone()
        self.localize_shapes()
        self.standardize_time_across_timezones()
        self.geomap_to_primary_geography()
        self.sum_over_time_zone()
        self.normalize()
        self.add_timeshift_type()
Esempio n. 15
0
 def standardize_shape_type(self, raw_values):
     if self.shape_type == 'weather date':
         final_data = util.reindex_df_level_with_new_elements(
             raw_values, 'weather_datetime', self.active_dates_index
         )  # this step is slow, consider replacing
         if final_data.isnull().values.any():
             # do some interpolation to fill missing values, that that still doesn't work to remove the NaNs, we raise an error
             final_data = final_data.groupby(level=[
                 name for name in final_data.index.names
                 if name != 'weather_datetime'
             ]).apply(pd.DataFrame.interpolate).ffill().bfill()
             if final_data.isnull().values.any():
                 raise ValueError(
                     'Weather data for shape {} did not give full coverage of the active dates:\n {}'
                     .format(self.name,
                             final_data[final_data.isnull().values]))
     elif self.shape_type == 'time slice':
         final_data = self.create_empty_shape_data(raw_values)
         final_data = pd.merge(final_data,
                               raw_values.reset_index(),
                               how='left')
         final_data = final_data.set_index(
             [c for c in final_data.columns if c != 'value']).sort_index()
         if self.shape_unit_type == 'energy':
             if 'week' in self._active_time_keys:
                 raise ValueError(
                     'Shape unit type energy with week timeslice is not recommended due to edge effects'
                 )
             final_data = self.convert_energy_to_power(final_data)
         if final_data.isnull().values.any():
             print final_data[final_data.isnull().values]
             raise ValueError(
                 'Shape {} time slice data did not give full coverage of the active dates.'
                 .format(self.name))
         # reindex to remove the helper columns
         active_time_keys_keep_hydro_year = list(
             set(self._active_time_keys) - set(['hydro_year']))
         final_data.index = final_data.index.droplevel(
             active_time_keys_keep_hydro_year)
         # drop any duplicates
         final_data = final_data.groupby(
             level=final_data.index.names).first()
     else:
         raise ValueError(
             '{} shape_type must be "weather date" or "time slice", not {}'.
             format(self.name, self.shape_type))
     final_data = final_data.swaplevel('weather_datetime', -1).sort_index()
     return final_data
Esempio n. 16
0
    def standardize_time_across_timezones(self, df):
        tz = pytz.timezone(cfg.getParam('dispatch_outputs_timezone'))
        offset = (tz.utcoffset(DT.datetime(2015, 1, 1)) +
                  tz.dst(DT.datetime(2015, 1, 1))).total_seconds() / 60.
        new_index = pd.DatetimeIndex(self.active_dates_index_unique,
                                     tz=pytz.FixedOffset(offset))
        # if we have hydro year, when this does a reindex, it can introduce NaNs, so we want to remove them after
        assert not df.isnull().any().any()
        standardize_df = util.reindex_df_level_with_new_elements(
            df.copy(), 'weather_datetime', new_index)

        levels = [n for n in df.index.names if n != 'weather_datetime']
        standardize_df = standardize_df.groupby(level=levels).fillna(
            method='bfill').fillna(method='ffill')
        standardize_df = standardize_df[~standardize_df.isnull().values]

        return standardize_df
Esempio n. 17
0
    def process_shape(self):
        logging.info('    shape: ' + self.name)
        self.num_active_years = num_active_years(self.active_dates_index)
        
        if self.shape_type=='weather date':
            self.values = util.reindex_df_level_with_new_elements(self.raw_values, 'weather_datetime', self.active_dates_index)
            self.values  = self.values.replace(np.nan,0)# this step is slow, consider replacing
            if self.values.isnull().values.any():
                raise ValueError('Weather data for shape {} did not give full coverage of the active dates'.format(self.name))

        elif self.shape_type=='time slice':
            self.values = self.create_empty_shape_data()
            
            non_time_elements_in_levels = [list(util.get_elements_from_level(self.values, e)) for e in self._non_time_keys]
            time_elements_in_levels = [list(util.get_elements_from_level(self.values, e)) for e in self._active_time_keys]
            
            for ind, value in self.raw_values.iterrows():
                non_time_portion = [ind[self._non_time_dict[e]] for e in self._non_time_keys]
                time_portion = [ind[self._active_time_dict[e]] for e in self._active_time_keys]
                if not np.all([s in l for s, l in zip(non_time_portion+time_portion, non_time_elements_in_levels+time_elements_in_levels)]):
                    continue
                
                indexer = tuple(non_time_portion + time_portion + [slice(None)])
                                
                if self.shape_unit_type=='energy':
                    len_slice = len(self.values.loc[indexer])
                    self.values.loc[indexer] = value[0]/float(len_slice)*self.num_active_years
                elif self.shape_unit_type=='power':
                    self.values.loc[indexer] = value[0]
            
            if self.values.isnull().values.any():
                raise ValueError('Shape time slice data did not give full coverage of the active dates')
            # reindex to remove the helper columns
            self.values.index = self.values.index.droplevel(self._active_time_keys)

        self.values = cfg.geo.filter_extra_geos_from_df(self.values.swaplevel('weather_datetime', -1).sort())
        self.geomap_to_time_zone()
        self.localize_shapes()
        self.standardize_time_across_timezones()
        self.geomap_to_primary_geography()
        self.sum_over_time_zone()
        self.normalize()
        self.add_timeshift_type()
        # raw values can be very large, so we delete it in this one case
        del self.raw_values
Esempio n. 18
0
    def process_shape(self, active_dates_index=None, time_slice_elements=None):
        self.num_active_years = len(active_dates_index)/8766.
        if active_dates_index is not None:
            self.active_dates_index = active_dates_index

        if active_dates_index is None:
            raise ValueError('processing a shape requires an active date index')

        self.time_slice_elements = Shapes.create_time_slice_elements(active_dates_index) if time_slice_elements is None else time_slice_elements
        
        if self.shape_type=='weather date':
            self.convert_index_to_datetime('raw_values', 'weather_datetime')
            # Reindex with a day on either side so that data is preserved when it is shifted for time zones
            self.values = util.reindex_df_level_with_new_elements(self.raw_values, 'weather_datetime', active_dates_index) # this step is slow, consider replacing
#            self.values = pd.merge(self.raw_values.reset_index(), 
#                                   pd.DataFrame(self.active_dates_index, columns=['weather_datetime']), 
#                                   how='right').set_index(self.raw_values.index.names)
            if self.values.isnull().values.any():
                raise ValueError('Weather data did not give full coverage of the active dates')

        elif self.shape_type=='time slice':
            self.values = self.create_empty_shape_data()
            
            for ind, value in self.raw_values.iterrows():
                indexer = tuple([ind[self._non_time_dict[e]] for e in self._non_time_keys] +
                                [ind[self._active_time_dict[e]] for e in self._active_time_keys] +
                                [slice(None)])
                if self.shape_unit_type=='energy':
                    len_slice = len(self.values.loc[indexer])
                    self.values.loc[indexer] = value[0]/float(len_slice)*self.num_active_years
                elif self.shape_unit_type=='power':
                    self.values.loc[indexer] = value[0]
            
            if self.values.isnull().values.any():
                raise ValueError('Shape time slice data did not give full coverage of the active dates')
            # reindex to remove the helper columns
            self.values.index = self.values.index.droplevel(self._active_time_keys)
            self.values.sort()

        self.geomap_to_time_zone()
        self.localize_shapes()
        self.geomap_to_primary_geography()
        self.sum_over_time_zone()
        self.normalize()
Esempio n. 19
0
    def process_shape(self, active_dates_index=None, time_slice_elements=None):
        self.num_active_years = len(active_dates_index) / 8766.
        if active_dates_index is not None:
            self.active_dates_index = active_dates_index

        if active_dates_index is None:
            raise ValueError(
                'processing a shape requires an active date index')

        self.time_slice_elements = Shapes.create_time_slice_elements(
            active_dates_index
        ) if time_slice_elements is None else time_slice_elements

        if self.shape_type == 'weather date':
            self.values = util.reindex_df_level_with_new_elements(
                self.raw_values, 'weather_datetime',
                active_dates_index)  # this step is slow, consider replacing
            if self.values.isnull().values.any():
                raise ValueError(
                    'Weather data did not give full coverage of the active dates'
                )

        elif self.shape_type == 'time slice':
            self.values = self.create_empty_shape_data()

            non_time_elements_in_levels = [
                list(util.get_elements_from_level(self.values, e))
                for e in self._non_time_keys
            ]
            time_elements_in_levels = [
                list(util.get_elements_from_level(self.values, e))
                for e in self._active_time_keys
            ]

            for ind, value in self.raw_values.iterrows():
                non_time_portion = [
                    ind[self._non_time_dict[e]] for e in self._non_time_keys
                ]
                time_portion = [
                    ind[self._active_time_dict[e]]
                    for e in self._active_time_keys
                ]
                if not np.all([
                        s in l for s, l in zip(
                            non_time_portion +
                            time_portion, non_time_elements_in_levels +
                            time_elements_in_levels)
                ]):
                    continue

                indexer = tuple(non_time_portion + time_portion +
                                [slice(None)])

                if self.shape_unit_type == 'energy':
                    len_slice = len(self.values.loc[indexer])
                    self.values.loc[indexer] = value[0] / float(
                        len_slice) * self.num_active_years
                elif self.shape_unit_type == 'power':
                    self.values.loc[indexer] = value[0]

            if self.values.isnull().values.any():
                raise ValueError(
                    'Shape time slice data did not give full coverage of the active dates'
                )
            # reindex to remove the helper columns
            self.values.index = self.values.index.droplevel(
                self._active_time_keys)

        self.values = self.values.swaplevel('weather_datetime',
                                            -1).sort_index()
        self.geomap_to_time_zone()
        self.localize_shapes()
        self.standardize_time_across_timezones()
        self.geomap_to_primary_geography()
        self.sum_over_time_zone()
        self.normalize()
        self.add_timeshift_type()
Esempio n. 20
0
    def incorporate_foreign_gaus(self,
                                 df,
                                 current_geography,
                                 data_type,
                                 map_key,
                                 keep_oth_index_over_oth_gau=False,
                                 zero_out_negatives=True):
        native_gaus, current_gaus, foreign_gaus = self.get_native_current_foreign_gaus(
            df, current_geography)
        # we don't have any foreign gaus
        if not foreign_gaus or not cfg.include_foreign_gaus:
            return df, current_geography

        y_or_v = GeoMapper._get_df_time_index_name(df)

        index_with_nans = [
            df.index.names[i] for i in set(
                np.nonzero([np.isnan(row)
                            for row in df.index.get_values()])[1])
        ]
        # if we have an index with nan, that typically indicates that one of the foreign gaus didn't have all the index levels
        # if this is the case, we have two options (1) ignore the foreign gau (2) get rid of the other index
        if index_with_nans and (keep_oth_index_over_oth_gau
                                or data_type == 'intensity'):
            return self.filter_foreign_gaus(
                df, current_geography), current_geography
        else:
            assert (y_or_v not in index_with_nans) and (current_geography
                                                        not in index_with_nans)
            # we need to eliminate levels with nan before moving on
            df = util.remove_df_levels(df, index_with_nans)

        # add missing level indicies for foreign gaus, this must be done before we fill in years because we use a fill value of zero
        df = self._add_missing_level_elements_to_foreign_gaus(
            df, current_geography)

        # we need all the index level combinations to have all years for this to work correctly
        df_no_foreign_gaus = self.filter_foreign_gaus(df, current_geography)
        df_years = sorted(
            list(set(
                df_no_foreign_gaus.index.get_level_values(y_or_v).values)))
        df = util.reindex_df_level_with_new_elements(df, y_or_v, df_years)

        base_gaus = np.array(
            self.values.index.get_level_values(current_geography), dtype=int)
        for foreign_gau in foreign_gaus:
            foreign_geography = self.gau_to_geography[foreign_gau]
            index = np.nonzero(
                self.values.index.get_level_values(
                    self.gau_to_geography[foreign_gau]) == foreign_gau)[0]
            impacted_gaus = list(set(base_gaus[index]))
            base_gaus[index] = foreign_gau
            if any(impacted in foreign_gaus for impacted in impacted_gaus):
                raise ValueError(
                    'foreign gaus in the database cannot overlap geographically'
                )

            # if the data_type is a total, we need to net out the total
            if data_type == 'total':
                df = self._update_dataframe_totals_after_foreign_gau(
                    df, current_geography, foreign_geography, impacted_gaus,
                    foreign_gau, map_key, zero_out_negatives)
            elif data_type == 'intensity':
                logging.warning(
                    'Foreign GAUs with intensities is not yet implemented, totals will not be conserved'
                )

        assert not any([any(np.isnan(row)) for row in df.index.get_values()])
        new_geography_name = self.make_new_geography_name(
            current_geography, list(foreign_gaus))
        df.index = df.index.rename(new_geography_name, level=current_geography)
        if new_geography_name not in self.geographies:
            self.add_new_geography(new_geography_name, base_gaus)
        # df = GeoMapper.reorder_level_names_after_incorporating_foreign_gaus(df, new_geography_name, y_or_v)
        return df, new_geography_name