def produce_flexible_load(shape_df, percent_flexible=None, hr_delay=None, hr_advance=None): percent_flexible = 0 if percent_flexible is None else percent_flexible hr_delay = 0 if hr_delay is None else hr_delay hr_advance = 0 if hr_advance is None else hr_advance if percent_flexible==0 or (hr_delay==0 and hr_advance==0): return util.df_slice(shape_df, elements=2, levels='timeshift_type') timeshift_levels = list(util.get_elements_from_level(shape_df, 'timeshift_type')) timeshift_levels.sort() if timeshift_levels==[1, 2, 3]: delay = util.df_slice(shape_df, elements=1, levels='timeshift_type') native = util.df_slice(shape_df, elements=2, levels='timeshift_type') advance = util.df_slice(shape_df, elements=3, levels='timeshift_type') elif timeshift_levels==[2]: # TODO this could be a lambda function def shift(df, hr): """ positive hours is a shift forward, negative hours a shift back""" return df.shift(hr).bfill().ffill() non_weather = [n for n in shape_df.index.names if n!='weather_datetime'] delay = shape_df.groupby(level=non_weather).apply(shift, hr=hr_delay) native = shape_df advance = shape_df.groupby(level=non_weather).apply(shift, hr=-hr_advance) else: raise ValueError("elements in the level timeshift_type are not recognized") return pd.concat([delay*percent_flexible + native*(1-percent_flexible), native, advance*percent_flexible + native*(1-percent_flexible)], keys=[1,2,3], names=['timeshift_type'])
def produce_flexible_load(shape_df, percent_flexible=None, hr_delay=None, hr_advance=None): percent_flexible = 0 if percent_flexible is None else percent_flexible hr_delay = 0 if hr_delay is None else hr_delay hr_advance = 0 if hr_advance is None else hr_advance if percent_flexible == 0 or (hr_delay == 0 and hr_advance == 0): return util.df_slice(shape_df, elements=2, levels='timeshift_type') timeshift_levels = list( util.get_elements_from_level(shape_df, 'timeshift_type')) timeshift_levels.sort() if timeshift_levels == [1, 2, 3]: delay = util.df_slice(shape_df, elements=1, levels='timeshift_type') native = util.df_slice(shape_df, elements=2, levels='timeshift_type') advance = util.df_slice(shape_df, elements=3, levels='timeshift_type') elif timeshift_levels == [2]: # TODO this could be a lambda function def shift(df, hr): """ positive hours is a shift forward, negative hours a shift back""" return df.shift(hr).bfill().ffill() non_weather = [ n for n in shape_df.index.names if n != 'weather_datetime' ] delay = shape_df.groupby(level=non_weather).apply(shift, hr=hr_delay) native = shape_df advance = shape_df.groupby(level=non_weather).apply(shift, hr=-hr_advance) else: raise ValueError( "elements in the level timeshift_type are not recognized") return pd.concat([ delay * percent_flexible + native * (1 - percent_flexible), native, advance * percent_flexible + native * (1 - percent_flexible) ], keys=[1, 2, 3], names=['timeshift_type'])
def solve_and_plot(self): self.solve_optimization() fig, axes = self.get_empty_plot(num_rows=len(self.dispatch_geographies), num_columns=len(self.dispatch_feeders)) flex_load = util.df_slice(self.flex_load_df, self.dispatch_feeders, 'dispatch_feeder') flex_load = Output.clean_df(flex_load.squeeze().unstack(self.dispatch_geography).unstack('dispatch_feeder')) flex_load.plot(subplots=True, ax=axes, title='FLEXIBLE LOAD') fig, axes = self.get_empty_plot(num_rows=len(self.dispatch_geographies), num_columns=len(self.dispatch_feeders)) datetime = flex_load.index.get_level_values('weather_datetime') hour = flex_load.groupby((datetime.hour+1)).mean() hour.plot(subplots=True, ax=axes, title='AVERAGE FLEXIBLE LOAD BY HOUR') fig, axes = self.get_empty_plot(num_rows=len(self.dispatch_geographies), num_columns=len(self.dispatch_feeders)+1) charge = -self.storage_df.xs('charge', level='charge_discharge') discharge = self.storage_df.xs('discharge', level='charge_discharge') charge = Output.clean_df(charge.squeeze().unstack(self.dispatch_geography).unstack('dispatch_feeder')) discharge = Output.clean_df(discharge.squeeze().unstack(self.dispatch_geography).unstack('dispatch_feeder')) charge.plot(subplots=True, ax=axes) discharge.plot(subplots=True, ax=axes, title='STORAGE CHARGE (-) AND DISCHARGE (+)') fig, axes = self.get_empty_plot(num_rows=len(self.dispatch_geographies), num_columns=len(self.dispatch_feeders)+1) datetime = charge.index.get_level_values('weather_datetime') hour_charge = charge.groupby((datetime.hour+1)).mean() hour_discharge = discharge.groupby((datetime.hour+1)).mean() hour_charge.plot(subplots=True, ax=axes) hour_discharge.plot(subplots=True, ax=axes, title='AVERAGE STORAGE CHARGE (-) AND DISCHARGE (+) BY HOUR')
def set_losses(self, transmission_losses, distribution_losses): self.t_and_d_losses = dict() for geography in self.dispatch_geographies: for feeder in self.feeders: if feeder == 0: self.t_and_d_losses[(geography,feeder)] = transmission_losses.loc[geography,:].values[0] else: self.t_and_d_losses[(geography,feeder)] = util.df_slice(distribution_losses, [geography,feeder],[self.dispatch_geography, 'dispatch_feeder']).values[0][0] * transmission_losses.loc[geography,:].values[0]
def set_losses(self, transmission_losses, distribution_losses): self.t_and_d_losses = dict() for geography in self.dispatch_geographies: for feeder in self.feeders: if feeder == 'bulk': self.t_and_d_losses[(geography,feeder)] = transmission_losses.loc[geography,:].values[0] else: self.t_and_d_losses[(geography,feeder)] = util.df_slice(distribution_losses, [geography,feeder],[self.dispatch_geography, 'dispatch_feeder']).values[0][0] * transmission_losses.loc[geography,:].values[0]
def set_gen_technologies(self, geography, thermal_dispatch_df): pmax = np.array(util.df_slice(thermal_dispatch_df,['capacity',geography],['IO',self.dispatch_geography]).values).T[0] marginal_cost = np.array(util.df_slice(thermal_dispatch_df,['cost',geography],['IO',self.dispatch_geography]).values).T[0] MORs = np.array(util.df_slice(thermal_dispatch_df,['maintenance_outage_rate',geography],['IO',self.dispatch_geography]).values).T[0] FORs = np.array(util.df_slice(thermal_dispatch_df,['forced_outage_rate',geography],['IO',self.dispatch_geography]).values).T[0] must_run = np.array(util.df_slice(thermal_dispatch_df,['must_run',geography],['IO',self.dispatch_geography]).values).T[0] clustered_dict = dispatch_generators.cluster_generators(n_clusters = int(cfg.cfgfile.get('opt','generator_steps')), pmax=pmax, marginal_cost=marginal_cost, FORs=FORs, MORs=MORs, must_run=must_run, pad_stack=False, zero_mc_4_must_run=True) generator_numbers = range(len(clustered_dict['derated_pmax'])) for number in generator_numbers: generator = str(((max(generator_numbers)+1)* (self.dispatch_geographies.index(geography))) + (number)+1) if generator not in self.generation_technologies: self.generation_technologies.append(generator) self.geography[generator] = geography self.feeder[generator] = 0 self.min_capacity[generator] = 0 self.capacity[generator] = clustered_dict['derated_pmax'][number] self.variable_costs[generator] = clustered_dict['marginal_cost'][number]
def set_gen_technologies(self, geography, thermal_dispatch_df): pmax = np.array(util.df_slice(thermal_dispatch_df,['capacity',geography],['IO',self.dispatch_geography]).values).T[0] marginal_cost = np.array(util.df_slice(thermal_dispatch_df,['cost',geography],['IO',self.dispatch_geography]).values).T[0] MORs = np.array(util.df_slice(thermal_dispatch_df,['maintenance_outage_rate',geography],['IO',self.dispatch_geography]).values).T[0] FORs = np.array(util.df_slice(thermal_dispatch_df,['forced_outage_rate',geography],['IO',self.dispatch_geography]).values).T[0] must_run = np.array(util.df_slice(thermal_dispatch_df,['must_run',geography],['IO',self.dispatch_geography]).values).T[0] clustered_dict = dispatch_generators.cluster_generators(n_clusters = cfg.getParamAsInt('generator_steps', 'opt'), pmax=pmax, marginal_cost=marginal_cost, FORs=FORs, MORs=MORs, must_run=must_run, pad_stack=False, zero_mc_4_must_run=True) generator_numbers = range(len(clustered_dict['derated_pmax'])) for number in generator_numbers: generator = str(((max(generator_numbers)+1)* (self.dispatch_geographies.index(geography))) + (number)+1) if generator not in self.generation_technologies: self.generation_technologies.append(generator) self.geography[generator] = geography self.feeder[generator] = 'bulk' self.min_capacity[generator] = 0 self.capacity[generator] = clustered_dict['derated_pmax'][number] self.variable_costs[generator] = clustered_dict['marginal_cost'][number]
def set_rio_duration(self,rio_inputs): if self.name in set(rio_inputs.duration.index.get_level_values('technology'))and self.name not in cfg.rio_excluded_technologies: self._has_data = True self.raw_values = util.df_slice(rio_inputs.duration,self.name,'technology') self.geography = cfg.rio_geography self.capacity_or_energy_unit = cfg.rio_energy_unit self.time_unit = cfg.rio_time_unit self.input_timestep = cfg.rio_timestep_multiplier self.interpolation_method = 'linear_interpolation' self.extrapolation_method = 'nearest'
def set_thresholds(self,distribution_stock, transmission_stock): self.bulk_net_load_thresholds = dict() self.dist_net_load_thresholds = dict() for geography in self.dispatch_geographies: self.bulk_net_load_thresholds[geography] = transmission_stock.loc[geography].values[0] for feeder in self.feeders: if feeder == 0: self.dist_net_load_thresholds[(geography,feeder)] = 0 else: self.dist_net_load_thresholds[(geography,feeder)] = util.df_slice(distribution_stock,[geography, feeder],[self.dispatch_geography, 'dispatch_feeder']).values[0][0]
def set_thresholds(self,distribution_stock, transmission_stock): self.bulk_net_load_thresholds = dict() self.dist_net_load_thresholds = dict() for geography in self.dispatch_geographies: self.bulk_net_load_thresholds[geography] = transmission_stock.loc[geography].values[0] for feeder in self.feeders: if feeder == 'bulk': self.dist_net_load_thresholds[(geography,feeder)] = 0 else: self.dist_net_load_thresholds[(geography,feeder)] = util.df_slice(distribution_stock,[geography, feeder],[self.dispatch_geography, 'dispatch_feeder']).values[0][0]
def get_values_as_dict(self, year): capacity = util.df_slice(self.values, year, 'year').squeeze().to_dict() for key in capacity.keys(): if key[0] == key[1]: del capacity[key] # tuple needs to be a string in the optimization capacity = dict( zip([str((key[0], key[1])) for key in capacity.keys()], capacity.values())) return capacity
def add_rio_stock_measures(self,rio_inputs): self.specified_stocks = {} df = rio_inputs.stock if self.name in set(df.index.get_level_values('technology')): df = util.df_slice(df,[self.name],['technology']) if np.any([isinstance(x,int) for x in df.index.get_level_values('resource_bin').values]): df = df[df.index.get_level_values('resource_bin')!='n/a'] df = df.groupby(level=df.index.names).sum() self.specified_stocks[1] = RioSpecifiedStock(df) else: self.specified_stocks[1] = RioSpecifiedStock(util.remove_df_levels(df,'resource_bin'))
def run_thermal_dispatch(params): dispatch_geography = params[0] thermal_dispatch_df = params[1] columns = params[1].columns #save for later since we are doing a squeeze thermal_dispatch_df = thermal_dispatch_df.squeeze().unstack('IO') dispatch_geography_index = params[2] load = util.df_slice(params[3], dispatch_geography, dispatch_geography_index) return_dispatch_by_category = params[4] reserves = params[5] schedule_maintenance = params[6] months = load.index.get_level_values('weather_datetime').month weeks = load.index.get_level_values('weather_datetime').week load = load.values.flatten() pmaxs = thermal_dispatch_df['capacity'].values marginal_costs = thermal_dispatch_df['cost'].values MOR = thermal_dispatch_df['maintenance_outage_rate'].values FOR = thermal_dispatch_df['forced_outage_rate'].values must_runs = thermal_dispatch_df['must_run'].values capacity_weights = thermal_dispatch_df['capacity_weights'].values thermal_capacity_multiplier = thermal_dispatch_df['thermal_capacity_multiplier'].values # #TODO we are setting these to 1 because sometimes it is incorrectly not 1 upstream, and if it is not 1 it can cause issues # thermal_capacity_multiplier[:] = 1 # grabs the technology from the label gen_categories = [int(s.split(', ')[1].rstrip('L')) for s in thermal_dispatch_df.index.get_level_values('thermal_generators')] if schedule_maintenance: # The capacity weights often come in with some really small numbers, which we shouldn't keep here capacity_weights = np.round(capacity_weights, 2) # TODO: if we have multiple years, we should schedule maintenance for each year one at a time scheduling_order = np.argsort(marginal_costs) maintenance_rates = dispatch_maintenance.schedule_generator_maintenance_loop(load=load, pmaxs=pmaxs, annual_maintenance_rates=MOR, dispatch_periods=weeks, scheduling_order=scheduling_order) # if we have capacity weights on a generator, we don't schedule maintenance for it to prevent errors in solving for generator stack changes maintenance_rates[:, np.nonzero(capacity_weights)] = MOR[np.nonzero(capacity_weights)] else: maintenance_rates = MOR dispatch_results = generator_stack_dispatch(load=load, pmaxs=pmaxs, marginal_costs=marginal_costs, MOR=maintenance_rates, FOR=FOR, must_runs=must_runs, dispatch_periods=weeks, capacity_weights=capacity_weights, gen_categories=gen_categories, return_dispatch_by_category=return_dispatch_by_category, reserves=reserves, thermal_capacity_multiplier=thermal_capacity_multiplier) if sum(dispatch_results['stock_changes']) > np.max(load)*1.15 and np.max(load) > 0: logging.error("we've built too much capacity") pdb.set_trace() for output in ['gen_cf', 'generation', 'stock_changes']: thermal_dispatch_df[output] = dispatch_results[output] thermal_dispatch_df = thermal_dispatch_df.stack('IO').to_frame() thermal_dispatch_df.columns = columns return (thermal_dispatch_df, dispatch_results)
def set_opt_bulk_net_loads(self, bulk_load, bulk_gen, dispatched_bulk_load, bulk_net_load, active_thermal_dispatch_df): bulk_load = self._convert_weather_datetime_to_hour(bulk_load) bulk_gen = self._convert_weather_datetime_to_hour(bulk_gen) dispatched_bulk_load = self._convert_weather_datetime_to_hour(dispatched_bulk_load) bulk_net_load = util.remove_df_levels(util.df_slice(bulk_net_load,2,'timeshift_type',drop_level=True),'year') bulk_net_load = self._convert_ld_weather_datetime_to_hour(bulk_net_load) self.bulk_load = self._timeseries_to_dict(bulk_load) self.dispatched_bulk_load = self._timeseries_to_dict(dispatched_bulk_load) self.bulk_gen = self._timeseries_to_dict(bulk_gen) thermal_unstacked = active_thermal_dispatch_df.squeeze().unstack('IO') must_run_sum = thermal_unstacked[thermal_unstacked['must_run']==1]['capacity'].groupby(level=cfg.dispatch_geography).sum().to_frame() # this includes must run generation self.ld_bulk_net_load_df = util.DfOper.subt((util.remove_df_levels(bulk_net_load,'period').to_frame(), must_run_sum)) self.ld_bulk_net_load = self.ld_bulk_net_load_df.squeeze().to_dict()
def _update_dataframe_totals_after_foreign_gau(self, df, current_geography, foreign_geography, impacted_gaus, foreign_gau, map_key, zero_out_negatives): y_or_v = GeoMapper._get_df_time_index_name(df) # we first need to do a clean time series # then we need to allocate out and subtract indexer = util.level_specific_indexer(df, current_geography, [impacted_gaus]) impacted_gaus_slice = df.loc[indexer, :].reset_index().set_index(df.index.names) foreign_gau_slice = util.df_slice(df, foreign_gau, current_geography, drop_level=False, reset_index=True) foreign_gau_slice.index = foreign_gau_slice.index.rename(foreign_geography, level=current_geography) # do the allocation, take the ratio of foreign to native, do a clean timeseries, then reconstitute the foreign gau data over all years allocation = self.map_df(foreign_geography, current_geography, map_key=map_key, primary_subset_id=[foreign_gau]) allocated_foreign_gau_slice = util.DfOper.mult((foreign_gau_slice, allocation), fill_value=np.nan) allocated_foreign_gau_slice = allocated_foreign_gau_slice.reorder_levels([-1]+range(df.index.nlevels)) ratio_allocated_to_impacted = util.DfOper.divi((allocated_foreign_gau_slice, impacted_gaus_slice), fill_value=np.nan, non_expandable_levels=[]) ratio_allocated_to_impacted.iloc[np.nonzero(impacted_gaus_slice.values==0)] = 0 clean_ratio = TimeSeries.clean(data=ratio_allocated_to_impacted, time_index_name=y_or_v, interpolation_method='linear_interpolation', extrapolation_method='nearest') allocated_foreign_gau_slice_all_years = util.DfOper.mult((clean_ratio, impacted_gaus_slice), fill_value=np.nan, non_expandable_levels=[]) allocated_foreign_gau_slice_new_geo = util.remove_df_levels(allocated_foreign_gau_slice_all_years, foreign_geography) allocated_foreign_gau_slice_foreign_geo = util.remove_df_levels(allocated_foreign_gau_slice_all_years, current_geography) allocated_foreign_gau_slice_foreign_geo.index = allocated_foreign_gau_slice_foreign_geo.index.rename(current_geography, level=foreign_geography) # update foreign GAUs after clean timeseries allocated_gau_years = list(allocated_foreign_gau_slice_foreign_geo.index.get_level_values(y_or_v).values) allocated_foreign_gau_slice_foreign_geo = allocated_foreign_gau_slice_foreign_geo.reorder_levels(df.index.names).sort() indexer = util.level_specific_indexer(allocated_foreign_gau_slice_foreign_geo, [current_geography, y_or_v], [foreign_gau, allocated_gau_years]) df.loc[indexer, :] = allocated_foreign_gau_slice_foreign_geo.loc[indexer, :] new_impacted_gaus = util.DfOper.subt((impacted_gaus_slice, allocated_foreign_gau_slice_new_geo), fill_value=np.nan, non_expandable_levels=[]) new_impacted_gaus = new_impacted_gaus.reorder_levels(df.index.names).sort() if new_impacted_gaus.min().min() < 0: if not zero_out_negatives: raise ValueError( 'Negative values resulted from subtracting the foreign gau from the base gaus. This is the resulting dataframe: {}'.format(new_impacted_gaus)) else: new_impacted_gaus[new_impacted_gaus < 0] = 0 if new_impacted_gaus.isnull().all().value: pdb.set_trace() raise ValueError('Year or vitages did not overlap between the foreign gaus and impacted gaus') # update native GAUs after netting out foreign gaus impacted_gau_years = list(impacted_gaus_slice.index.get_level_values(y_or_v).values) indexer = util.level_specific_indexer(df, [current_geography, y_or_v], [impacted_gaus, impacted_gau_years]) df.loc[indexer, :] = new_impacted_gaus.loc[indexer, :] return df
def set_rio_capacity_factor(self,rio_inputs): df =rio_inputs.capacity_factor if self.name in set(df.index.get_level_values('technology')) and self.name not in cfg.rio_excluded_technologies: df = util.df_slice(df, self.name, 'technology') if not np.any([isinstance(x,int) for x in df.index.get_level_values('resource_bin').values]): df = (util.remove_df_levels(df,'resource_bin')) else: df = df[df.index.get_level_values('resource_bin') != 'n/a'] df = df.groupby(level=df.index.names).sum() self.raw_values = df self._has_data = True self.geography = cfg.rio_geography self.capacity_or_energy_unit = cfg.rio_energy_unit self.time_unit = cfg.rio_time_unit self.input_timestep = cfg.rio_timestep_multiplier self.interpolation_method = 'linear_interpolation' self.extrapolation_method = 'nearest'
def solve_ld_optimization(self): if len(self.ld_technologies): model = dispatch_long_duration.ld_energy_formulation(self) results = self.run_pyomo(model, None) ld_opt_df = self.parse_ld_opt_result( ld_result_to_list(results.Provide_Power)) temp_df = pd.DataFrame( [[r[0], r[-2], r[-1]] for r in storage_result_to_list(results.Provide_Power)], columns=[cfg.dispatch_geography, 'hour', self.year]) temp_df = temp_df.set_index([ cfg.dispatch_geography, 'hour' ]).groupby(level=[cfg.dispatch_geography, 'hour']).sum() # this doesn't have transmission losses, so it is an approximation transmit_power = pd.DataFrame([[ key[0], key[1], value.value ] for key, value in results.Net_Transmit_Power_by_Geo.iteritems()], columns=[ cfg.dispatch_geography, 'hour', self.year ]) self.ld_bulk_net_load_df_updated = self.ld_bulk_net_load_df - temp_df - transmit_power.set_index( [cfg.dispatch_geography, 'hour']) ld_energy_budgets = util.recursivedict() def split_and_apply(array, dispatch_periods, fun): energy_by_block = np.array_split( array, np.where(np.diff(dispatch_periods) != 0)[0] + 1) return [fun(block) for block in energy_by_block] for tech in self.ld_technologies: energy_budgets = split_and_apply( util.df_slice(ld_opt_df, tech, 'ld_technology').values, self.period_repeated, sum) for period in self.periods: ld_energy_budgets[period][tech] = energy_budgets[period][0] return ld_energy_budgets else: self.ld_bulk_net_load_df_updated = self.ld_bulk_net_load_df return util.recursivedict()
def solve_ld_optimization(self): if len(self.ld_technologies): model = dispatch_long_duration.ld_energy_formulation(self) results = self.run_pyomo(model, None) ld_opt_df = self.parse_ld_opt_result(ld_result_to_list(results.Provide_Power)) temp_df = pd.DataFrame([[r[0], r[-2], r[-1]] for r in storage_result_to_list(results.Provide_Power)], columns=[cfg.dispatch_geography, 'hour', self.year]) temp_df = temp_df.set_index([cfg.dispatch_geography, 'hour']).groupby(level=[cfg.dispatch_geography, 'hour']).sum() # this doesn't have transmission losses, so it is an approximation transmit_power = pd.DataFrame([[key[0], key[1], value.value] for key, value in results.Net_Transmit_Power_by_Geo.iteritems()], columns=[cfg.dispatch_geography, 'hour', self.year]) self.ld_bulk_net_load_df_updated = self.ld_bulk_net_load_df - temp_df - transmit_power.set_index([cfg.dispatch_geography, 'hour']) ld_energy_budgets = util.recursivedict() def split_and_apply(array, dispatch_periods, fun): energy_by_block = np.array_split(array, np.where(np.diff(dispatch_periods)!=0)[0]+1) return [fun(block) for block in energy_by_block] for tech in self.ld_technologies: energy_budgets = split_and_apply(util.df_slice(ld_opt_df, tech, 'ld_technology').values,self.period_repeated, sum) for period in self.periods: ld_energy_budgets[period][tech] = energy_budgets[period][0] return ld_energy_budgets else: self.ld_bulk_net_load_df_updated = self.ld_bulk_net_load_df return util.recursivedict()
def set_opt_bulk_net_loads(self, bulk_load, bulk_gen, dispatched_bulk_load, bulk_net_load, active_thermal_dispatch_df): bulk_load = self._convert_weather_datetime_to_hour(bulk_load) bulk_gen = self._convert_weather_datetime_to_hour(bulk_gen) dispatched_bulk_load = self._convert_weather_datetime_to_hour( dispatched_bulk_load) bulk_net_load = util.remove_df_levels( util.df_slice(bulk_net_load, 2, 'timeshift_type', drop_level=True), 'year') bulk_net_load = self._convert_ld_weather_datetime_to_hour( bulk_net_load) self.bulk_load = self._timeseries_to_dict(bulk_load) self.dispatched_bulk_load = self._timeseries_to_dict( dispatched_bulk_load) self.bulk_gen = self._timeseries_to_dict(bulk_gen) thermal_unstacked = active_thermal_dispatch_df.squeeze().unstack('IO') must_run_sum = thermal_unstacked[ thermal_unstacked['must_run'] == 1]['capacity'].groupby( level=cfg.dispatch_geography).sum().to_frame() # this includes must run generation self.ld_bulk_net_load_df = util.DfOper.subt( (util.remove_df_levels(bulk_net_load, 'period').to_frame(), must_run_sum)) self.ld_bulk_net_load = self.ld_bulk_net_load_df.squeeze().to_dict()
def return_stock_slice(self, elements, levels, stock_name='technology'): group = util.df_slice(getattr(self, stock_name), elements, levels) return group
def format_specified_stock(self, elements, levels, stock_name='specified'): group = util.df_slice(getattr(self,stock_name), elements, levels) return group
def format_specified_stock(self, elements, levels, stock_name='specified'): group = util.df_slice(getattr(self, stock_name), elements, levels) return group
def incorporate_foreign_gaus(self, df, current_geography, data_type, map_key, keep_oth_index_over_oth_gau=False,zero_out_negatives=True): native_gaus, current_gaus, foreign_gaus = self.get_native_current_foreign_gaus(df, current_geography) # we don't have any foreign gaus if not foreign_gaus or not cfg.include_foreign_gaus: return df, current_geography if 'year' in df.index.names: y_or_v = 'year' elif 'vintage' in df.index.names: y_or_v = 'vintage' else: raise ValueError('df must either have year or vintage to incorporate foreign gaus') index_with_nans = [df.index.names[i] for i in set(np.nonzero([np.isnan(row) for row in df.index.get_values()])[1])] # if we have an index with nan, that typically indicates that one of the foreign gaus didn't have all the index levels # if this is the case, we have two options (1) get rid of the other index (2) ignore the foreign gau if index_with_nans and (keep_oth_index_over_oth_gau or data_type=='intensity'): return self.filter_foreign_gaus(df, current_geography), current_geography else: assert (y_or_v not in index_with_nans) and (current_geography not in index_with_nans) # we need to eliminate levels with nan before moving on df = util.remove_df_levels(df, index_with_nans) base_gaus = np.array(self.values.index.get_level_values(current_geography), dtype=int) for id in foreign_gaus: foreign_geography = self.gau_to_geography[id] index = np.nonzero(self.values.index.get_level_values(self.gau_to_geography[id])==id)[0] impacted_gaus = list(set(base_gaus[index])) base_gaus[index] = id if any(impacted in foreign_gaus for impacted in impacted_gaus): raise ValueError('foreign gaus in the database cannot overlap geographically') # if the data_type is a total, we need to net out the total from the neighboring if data_type=='total': # we first need to do a clean time series # then we need to allocate out and subtract allocation = self.map_df(foreign_geography, current_geography, map_key=map_key, primary_subset_id=[id]) foreign_gau_slice = util.df_slice(df, id, current_geography, drop_level=False, reset_index=True) foreign_gau_slice.index = foreign_gau_slice.index.rename(foreign_geography, level=current_geography) allocated_foreign_gau_slice = util.DfOper.mult((foreign_gau_slice, allocation)) allocated_foreign_gau_slice = util.remove_df_levels(allocated_foreign_gau_slice, foreign_geography) indexer = util.level_specific_indexer(df,current_geography,[impacted_gaus]) impacted_gaus_slice = df.loc[indexer,:].reset_index().set_index(df.index.names) impacted_gau_years = list(impacted_gaus_slice.index.get_level_values(y_or_v).values) new_impacted_gaus = util.DfOper.subt((impacted_gaus_slice, allocated_foreign_gau_slice), fill_value=np.nan, non_expandable_levels=[]) new_impacted_gaus = new_impacted_gaus.reorder_levels(df.index.names).sort() if new_impacted_gaus.min().min() < 0: if not zero_out_negatives: raise ValueError('Negative values resulted from subtracting the foreign gau from the base gaus. This is the resulting dataframe: {}'.format(new_impacted_gaus)) else: new_impacted_gaus[new_impacted_gaus<0] = 0 if new_impacted_gaus.isnull().all().value: pdb.set_trace() raise ValueError('Year or vitages did not overlap between the foreign gaus and impacted gaus') indexer = util.level_specific_indexer(df, [current_geography, y_or_v], [impacted_gaus, impacted_gau_years]) df.loc[indexer, :] = new_impacted_gaus.loc[indexer, :] assert not any([any(np.isnan(row)) for row in df.index.get_values()]) new_geography_name = self.make_new_geography_name(current_geography, list(foreign_gaus)) df.index = df.index.rename(new_geography_name, level=current_geography) if new_geography_name not in self.geographies: self.add_new_geography(new_geography_name, base_gaus) return df, new_geography_name
def produce_flexible_load(shape_df, percent_flexible=None, hr_delay=None, hr_advance=None): hr_delay = 0 if hr_delay is None else hr_delay hr_advance = 0 if hr_advance is None else hr_advance native_slice = util.df_slice(shape_df, elements=2, levels='timeshift_type') native_slice_stacked = pd.concat([native_slice] * 3, keys=[1, 2, 3], names=['timeshift_type']) pflex_stacked = pd.concat([percent_flexible] * 3, keys=[1, 2, 3], names=['timeshift_type']) timeshift_levels = sorted( list(util.get_elements_from_level(shape_df, 'timeshift_type'))) if timeshift_levels == [1, 2, 3]: # here, we have flexible load profiles already specified by the user names = shape_df.index.names full_load = shape_df.squeeze().unstack('timeshift_type') group_by_names = [ n for n in full_load.index.names if n != 'weather_datetime' ] full_load = full_load.groupby(level=group_by_names).apply( Shape.ensure_feasible_flexible_load) full_load = full_load.stack('timeshift_type').reorder_levels( names).sort_index().to_frame() full_load.columns = ['value'] elif timeshift_levels == [2]: # positive hours is a shift forward, negative hours a shift back shift = lambda df, hr: df.shift(hr).ffill().fillna(value=0) def fix_first_point(df, hr): df.iloc[0] += native_slice.iloc[:hr].sum().sum() return df non_weather = [ n for n in native_slice.index.names if n != 'weather_datetime' ] delay_load = native_slice.groupby(level=non_weather).apply( shift, hr=hr_delay) advance_load = native_slice.groupby(level=non_weather).apply( shift, hr=-hr_advance) advance_load = advance_load.groupby(level=non_weather).transform( fix_first_point, hr=hr_advance) full_load = pd.concat([delay_load, native_slice, advance_load], keys=[1, 2, 3], names=['timeshift_type']) else: raise ValueError( "elements in the level timeshift_type are not recognized") return util.DfOper.add((util.DfOper.mult((full_load, pflex_stacked), collapsible=False), util.DfOper.mult( (native_slice_stacked, 1 - pflex_stacked), collapsible=False)))
def _update_dataframe_totals_after_foreign_gau(self, df, current_geography, foreign_geography, impacted_gaus, foreign_gau, map_key, zero_out_negatives): y_or_v = GeoMapper._get_df_time_index_name(df) # we first need to do a clean time series # then we need to allocate out and subtract indexer = util.level_specific_indexer(df, current_geography, [impacted_gaus]) impacted_gaus_slice = df.loc[indexer, :].reset_index().set_index( df.index.names) foreign_gau_slice = util.df_slice(df, foreign_gau, current_geography, drop_level=False, reset_index=True) foreign_gau_slice.index = foreign_gau_slice.index.rename( foreign_geography, level=current_geography) # do the allocation, take the ratio of foreign to native, do a clean timeseries, then reconstitute the foreign gau data over all years allocation = self.map_df(foreign_geography, current_geography, map_key=map_key, primary_subset_id=[foreign_gau]) allocated_foreign_gau_slice = util.DfOper.mult( (foreign_gau_slice, allocation), fill_value=np.nan) allocated_foreign_gau_slice = allocated_foreign_gau_slice.reorder_levels( [-1] + range(df.index.nlevels)) ratio_allocated_to_impacted = util.DfOper.divi( (allocated_foreign_gau_slice, impacted_gaus_slice), fill_value=np.nan, non_expandable_levels=[]) clean_ratio = TimeSeries.clean( data=ratio_allocated_to_impacted, time_index_name=y_or_v, interpolation_method='linear_interpolation', extrapolation_method='nearest') allocated_foreign_gau_slice_all_years = util.DfOper.mult( (clean_ratio, impacted_gaus_slice), fill_value=np.nan, non_expandable_levels=[]) allocated_foreign_gau_slice_new_geo = util.remove_df_levels( allocated_foreign_gau_slice_all_years, foreign_geography) allocated_foreign_gau_slice_foreign_geo = util.remove_df_levels( allocated_foreign_gau_slice_all_years, current_geography) allocated_foreign_gau_slice_foreign_geo.index = allocated_foreign_gau_slice_foreign_geo.index.rename( current_geography, level=foreign_geography) # update foreign GAUs after clean timeseries allocated_gau_years = list( allocated_foreign_gau_slice_foreign_geo.index.get_level_values( y_or_v).values) indexer = util.level_specific_indexer( allocated_foreign_gau_slice_foreign_geo, [current_geography, y_or_v], [foreign_gau, allocated_gau_years]) try: df.loc[indexer, :] = allocated_foreign_gau_slice_foreign_geo.loc[ indexer, :] except: pdb.set_trace() new_impacted_gaus = util.DfOper.subt( (impacted_gaus_slice, allocated_foreign_gau_slice_new_geo), fill_value=np.nan, non_expandable_levels=[]) new_impacted_gaus = new_impacted_gaus.reorder_levels( df.index.names).sort() if new_impacted_gaus.min().min() < 0: if not zero_out_negatives: raise ValueError( 'Negative values resulted from subtracting the foreign gau from the base gaus. This is the resulting dataframe: {}' .format(new_impacted_gaus)) else: new_impacted_gaus[new_impacted_gaus < 0] = 0 if new_impacted_gaus.isnull().all().value: pdb.set_trace() raise ValueError( 'Year or vitages did not overlap between the foreign gaus and impacted gaus' ) # update native GAUs after netting out foreign gaus impacted_gau_years = list( impacted_gaus_slice.index.get_level_values(y_or_v).values) indexer = util.level_specific_indexer( df, [current_geography, y_or_v], [impacted_gaus, impacted_gau_years]) df.loc[indexer, :] = new_impacted_gaus.loc[indexer, :] return df
def return_stock_slice(self, elements, levels, stock_name='technology'): group = util.df_slice(getattr(self,stock_name), elements, levels) return group
def get_values(self, year): return util.df_slice(self.values, year, 'year')