def export_io(self): io_table_write_step = int(cfg.cfgfile.get('output_detail','io_table_write_step')) io_table_years = sorted([min(cfg.supply_years)] + range(max(cfg.supply_years), min(cfg.supply_years), -io_table_write_step)) df_list = [] for year in io_table_years: sector_df_list = [] keys = self.supply.demand_sectors name = ['sector'] for sector in self.supply.demand_sectors: sector_df_list.append(self.supply.io_dict[year][sector]) year_df = pd.concat(sector_df_list, keys=keys,names=name) year_df = pd.concat([year_df]*len(keys),keys=keys,names=name,axis=1) df_list.append(year_df) keys = io_table_years name = ['year'] df = pd.concat(df_list,keys=keys,names=name) for row_sector in self.supply.demand_sectors: for col_sector in self.supply.demand_sectors: if row_sector != col_sector: df.loc[util.level_specific_indexer(df,'sector',row_sector),util.level_specific_indexer(df,'sector',col_sector,axis=1)] = 0 self.supply.outputs.io = df result_df = self.supply.outputs.return_cleaned_output('io') keys = [self.scenario.name.upper(), cfg.timestamp] names = ['SCENARIO','TIMESTAMP'] for key, name in zip(keys,names): result_df = pd.concat([result_df], keys=[key],names=[name]) Output.write(result_df, 's_io.csv', os.path.join(cfg.workingdir, 'supply_outputs'))
def calculate_d_payback_energy(self): initial_vintage = min(cfg.supply_years) demand_side_df = self.demand.d_all_energy_demand_payback demand_side_df.columns = ['value'] demand_side_df = demand_side_df[demand_side_df.index.get_level_values('vintage')>=initial_vintage] demand_side_df = demand_side_df[demand_side_df.index.get_level_values('year')>=initial_vintage] sales_df = copy.deepcopy(self.demand.outputs.d_sales) util.replace_index_name(sales_df,'vintage','year') sales_df = sales_df[sales_df.index.get_level_values('vintage')>=initial_vintage] sales_df = util.add_and_set_index(sales_df,'year',cfg.supply_years) # sales_df.index = sales_df.index.reorder_levels(demand_side_df.index.names) # sales_df = sales_df.reindex(demand_side_df.index).sort_index() self.demand.outputs.d_payback_energy = util.DfOper.divi([demand_side_df, sales_df]) self.demand.outputs.d_payback_energy = self.demand.outputs.d_payback_energy[np.isfinite(self.demand.outputs.d_payback_energy.values)] self.demand.outputs.d_payback_energy = self.demand.outputs.d_payback_energy.replace([np.inf,np.nan],0) for sector in self.demand.sectors.values(): for subsector in sector.subsectors.values(): if hasattr(subsector,'stock') and subsector.sub_type!='link': indexer = util.level_specific_indexer(self.demand.outputs.d_payback_energy,'subsector',subsector.id) self.demand.outputs.d_payback_energy.loc[indexer,'unit'] = subsector.stock.unit.upper() self.demand.outputs.d_payback_energy = self.demand.outputs.d_payback_energy.set_index('unit', append=True) self.demand.outputs.d_payback_energy.columns = [cfg.calculation_energy_unit.upper()] self.demand.outputs.d_payback_energy['lifetime_year'] = self.demand.outputs.d_payback_energy.index.get_level_values('year')-self.demand.outputs.d_payback_energy.index.get_level_values('vintage')+1 self.demand.outputs.d_payback_energy = self.demand.outputs.d_payback_energy.set_index('lifetime_year',append=True) self.demand.outputs.d_payback_energy = util.remove_df_levels(self.demand.outputs.d_payback_energy,'year') self.demand.outputs.d_payback_energy = self.demand.outputs.d_payback_energy.groupby(level = [x for x in self.demand.outputs.d_payback_energy.index.names if x !='lifetime_year']).transform(lambda x: x.cumsum()) self.demand.outputs.d_payback_energy = self.demand.outputs.d_payback_energy[self.demand.outputs.d_payback_energy[cfg.calculation_energy_unit.upper()]!=0] self.demand.outputs.d_payback_energy = self.demand.outputs.return_cleaned_output('d_payback_energy')
def calculate_tco(self): # self.embodied_emissions_df = self.demand.outputs.return_cleaned_output('demand_embodied_emissions_tco') # del self.demand.outputs.demand_embodied_emissions #calculte and format direct demand emissions # self.direct_emissions_df = self.demand.outputs.return_cleaned_output('demand_direct_emissions') ## del self.demand.outputs.demand_direct_emissions # emissions = util.DfOper.add([self.embodied_emissions_df,self.direct_emissions_df]) # #calculate and format export costs cost_unit = cfg.cfgfile.get('case','currency_year_id') + " " + cfg.cfgfile.get('case','currency_name') initial_vintage = min(cfg.supply_years) supply_side_df = self.demand.outputs.demand_embodied_energy_costs_tco supply_side_df = supply_side_df[supply_side_df.index.get_level_values('vintage')>=initial_vintage] demand_side_df = self.demand.d_levelized_costs_tco demand_side_df.columns = ['value'] demand_side_df = demand_side_df[demand_side_df.index.get_level_values('vintage')>=initial_vintage] service_demand_df = self.demand.d_service_demand_tco service_demand_df = service_demand_df[service_demand_df.index.get_level_values('vintage')>=initial_vintage] keys = ['SUPPLY-SIDE', 'DEMAND-SIDE'] names = ['COST TYPE'] self.outputs.c_tco = pd.concat([util.DfOper.divi([supply_side_df,util.remove_df_levels(service_demand_df,'unit')]), util.DfOper.divi([demand_side_df,util.remove_df_levels(service_demand_df,'unit')])], keys=keys,names=names) self.outputs.c_tco = self.outputs.c_tco.replace([np.inf,np.nan],0) self.outputs.c_tco[self.outputs.c_tco<0]=0 for sector in self.demand.sectors.values(): for subsector in sector.subsectors.values(): if hasattr(subsector,'service_demand') and hasattr(subsector,'stock'): indexer = util.level_specific_indexer(self.outputs.c_tco,'subsector',subsector.id) self.outputs.c_tco.loc[indexer,'unit'] = subsector.service_demand.unit.upper() self.outputs.c_tco = self.outputs.c_tco.set_index('unit',append=True) self.outputs.c_tco.columns = [cost_unit.upper()] self.outputs.c_tco= self.outputs.c_tco[self.outputs.c_tco[cost_unit.upper()]!=0] self.outputs.c_tco = self.outputs.return_cleaned_output('c_tco')
def normalize(self): group_to_normalize = [ n for n in self.values.index.names if n != 'weather_datetime' ] # here is a special case where I have p_min and p_max in my dispatch constraints and these should not be normalized if 'dispatch_constraint' in group_to_normalize: # this first normailization does what we need for hydro pmin and pmax, which is a special case of normalization combined_map_df = util.DfOper.mult( (self.map_df_tz, self.map_df_primary)) normalization_factors = combined_map_df.groupby( level=cfg.cfgfile.get('case', 'primary_geography')).sum() self.values = util.DfOper.divi( (self.values, normalization_factors)) temp = self.values.groupby(level=group_to_normalize).transform( lambda x: x / x.sum()) * self.num_active_years # TODO: 2, and 3 should not be hard coded here, they represent p_min and p_max indexer = util.level_specific_indexer(temp, 'dispatch_constraint', [[2, 3]]) temp.loc[indexer, :] = self.values.loc[indexer, :] self.values = temp else: self.values = self.values.groupby( level=group_to_normalize).transform( lambda x: x / x.sum()) * self.num_active_years
def calculate_tco(self): cost_unit = cfg.getParam('currency_year') + " " + cfg.getParam('currency_name') initial_vintage = min(cfg.supply_years) supply_side_df = self.demand.outputs.demand_embodied_energy_costs_tco supply_side_df = supply_side_df[supply_side_df.index.get_level_values('vintage')>=initial_vintage] demand_side_df = self.demand.d_levelized_costs_tco demand_side_df.columns = ['value'] demand_side_df = demand_side_df[demand_side_df.index.get_level_values('vintage')>=initial_vintage] service_demand_df = self.demand.d_service_demand_tco service_demand_df = service_demand_df[service_demand_df.index.get_level_values('vintage')>=initial_vintage] keys = ['SUPPLY-SIDE', 'DEMAND-SIDE'] names = ['COST TYPE'] self.outputs.c_tco = pd.concat([util.DfOper.divi([supply_side_df,util.remove_df_levels(service_demand_df,'unit')]), util.DfOper.divi([demand_side_df,util.remove_df_levels(service_demand_df,'unit')])], keys=keys,names=names) self.outputs.c_tco = self.outputs.c_tco.replace([np.inf,np.nan],0) self.outputs.c_tco[self.outputs.c_tco<0]=0 for sector in self.demand.sectors.values(): for subsector in sector.subsectors.values(): if hasattr(subsector,'service_demand') and hasattr(subsector,'stock'): indexer = util.level_specific_indexer(self.outputs.c_tco,'subsector',subsector.id) self.outputs.c_tco.loc[indexer,'unit'] = subsector.service_demand.unit.upper() self.outputs.c_tco = self.outputs.c_tco.set_index('unit',append=True) self.outputs.c_tco.columns = [cost_unit.upper()] self.outputs.c_tco= self.outputs.c_tco[self.outputs.c_tco[cost_unit.upper()]!=0] self.outputs.c_tco = self.outputs.return_cleaned_output('c_tco')
def normalize(self, df): group_to_normalize = [ n for n in df.index.names if n != 'weather_datetime' ] # here is a special case where I have p_min and p_max in my dispatch constraints and these should not be normalized if 'dispatch_constraint' in group_to_normalize: # this first normailization does what we need for hydro pmin and pmax, which is a special case of normalization combined_map_df = util.DfOper.mult( (self.map_df_tz, self.map_df_primary)) normalization_factors = combined_map_df.groupby( level=self.primary_geography).sum() norm_df = util.DfOper.divi((df, normalization_factors)) temp = norm_df.groupby(level=group_to_normalize).transform( lambda x: x / x.sum()) * self.num_active_years indexer = util.level_specific_indexer(temp, 'dispatch_constraint', [['p_min', 'p_max']]) temp.loc[indexer, :] = norm_df.loc[indexer, :] norm_df = temp else: norm_df = df.groupby(level=group_to_normalize).transform( lambda x: x / x.sum()) * self.num_active_years return norm_df
def filter_foreign_gaus(self, df, current_geography, foreign_gaus=None): """ Remove foreign gaus from the dataframe """ ncf = self.get_native_current_foreign_gaus(df, current_geography) foreign_gaus = ncf[2] if foreign_gaus is None else foreign_gaus current_gaus = ncf[1] assert len(foreign_gaus - current_gaus) == 0 if not foreign_gaus: return df # if the index has nans, we need to be careful about data types index_with_nans_before = [df.index.names[i] for i in set(np.nonzero([np.isnan(row) for row in df.index.get_values()])[1])] indexer = util.level_specific_indexer(df, current_geography, [list(current_gaus-foreign_gaus)]) index_names = df.index.names df = df.loc[indexer] index_with_nans_after = [df.index.names[i] for i in set(np.nonzero([np.isnan(row) for row in df.index.get_values()])[1])] df = df.reset_index() index_without_nans_anymore = list(set(index_with_nans_before) - set(index_with_nans_after)) df[index_without_nans_anymore] = df[index_without_nans_anymore].values.astype(int) df = df.set_index(index_names) # we shouldn't have any nans (or anything but integers in the index) if tuple(sorted(foreign_gaus)) == tuple(sorted(ncf[2])): assert not any([any(np.isnan(row)) for row in df.index.get_values()]) return df
def _update_dataframe_totals_after_foreign_gau(self, df, current_geography, foreign_geography, impacted_gaus, foreign_gau, map_key, zero_out_negatives): y_or_v = GeoMapper._get_df_time_index_name(df) # we first need to do a clean time series # then we need to allocate out and subtract indexer = util.level_specific_indexer(df, current_geography, [impacted_gaus]) impacted_gaus_slice = df.loc[indexer, :].reset_index().set_index(df.index.names) foreign_gau_slice = util.df_slice(df, foreign_gau, current_geography, drop_level=False, reset_index=True) foreign_gau_slice.index = foreign_gau_slice.index.rename(foreign_geography, level=current_geography) # do the allocation, take the ratio of foreign to native, do a clean timeseries, then reconstitute the foreign gau data over all years allocation = self.map_df(foreign_geography, current_geography, map_key=map_key, primary_subset_id=[foreign_gau]) allocated_foreign_gau_slice = util.DfOper.mult((foreign_gau_slice, allocation), fill_value=np.nan) allocated_foreign_gau_slice = allocated_foreign_gau_slice.reorder_levels([-1]+range(df.index.nlevels)) ratio_allocated_to_impacted = util.DfOper.divi((allocated_foreign_gau_slice, impacted_gaus_slice), fill_value=np.nan, non_expandable_levels=[]) ratio_allocated_to_impacted.iloc[np.nonzero(impacted_gaus_slice.values==0)] = 0 clean_ratio = TimeSeries.clean(data=ratio_allocated_to_impacted, time_index_name=y_or_v, interpolation_method='linear_interpolation', extrapolation_method='nearest') allocated_foreign_gau_slice_all_years = util.DfOper.mult((clean_ratio, impacted_gaus_slice), fill_value=np.nan, non_expandable_levels=[]) allocated_foreign_gau_slice_new_geo = util.remove_df_levels(allocated_foreign_gau_slice_all_years, foreign_geography) allocated_foreign_gau_slice_foreign_geo = util.remove_df_levels(allocated_foreign_gau_slice_all_years, current_geography) allocated_foreign_gau_slice_foreign_geo.index = allocated_foreign_gau_slice_foreign_geo.index.rename(current_geography, level=foreign_geography) # update foreign GAUs after clean timeseries allocated_gau_years = list(allocated_foreign_gau_slice_foreign_geo.index.get_level_values(y_or_v).values) allocated_foreign_gau_slice_foreign_geo = allocated_foreign_gau_slice_foreign_geo.reorder_levels(df.index.names).sort() indexer = util.level_specific_indexer(allocated_foreign_gau_slice_foreign_geo, [current_geography, y_or_v], [foreign_gau, allocated_gau_years]) df.loc[indexer, :] = allocated_foreign_gau_slice_foreign_geo.loc[indexer, :] new_impacted_gaus = util.DfOper.subt((impacted_gaus_slice, allocated_foreign_gau_slice_new_geo), fill_value=np.nan, non_expandable_levels=[]) new_impacted_gaus = new_impacted_gaus.reorder_levels(df.index.names).sort() if new_impacted_gaus.min().min() < 0: if not zero_out_negatives: raise ValueError( 'Negative values resulted from subtracting the foreign gau from the base gaus. This is the resulting dataframe: {}'.format(new_impacted_gaus)) else: new_impacted_gaus[new_impacted_gaus < 0] = 0 if new_impacted_gaus.isnull().all().value: pdb.set_trace() raise ValueError('Year or vitages did not overlap between the foreign gaus and impacted gaus') # update native GAUs after netting out foreign gaus impacted_gau_years = list(impacted_gaus_slice.index.get_level_values(y_or_v).values) indexer = util.level_specific_indexer(df, [current_geography, y_or_v], [impacted_gaus, impacted_gau_years]) df.loc[indexer, :] = new_impacted_gaus.loc[indexer, :] return df
def filter_extra_geos_from_df(self, df): # we have a subset geography and should remove the data that is completely outside of the breakout if cfg.primary_subset_id: levels = [n for n in df.index.names if n in self.geographies] elements = [self.geographies[n] for n in levels] indexer = util.level_specific_indexer(df, levels=levels, elements=elements) df = df.sort_index() df = df.loc[indexer, :] return df.reset_index().set_index(df.index.names).sort() else: return df
def convert_energy_to_power(self, df): lengths = df.groupby( level=self._active_time_keys).apply(len).to_frame().rename( columns={0: 'len'}) # here is a special case where I have p_min and p_max in my dispatch constraints and these should not be normalized if 'dispatch_constraint' in df.index.names: # this first normailization does what we need for hydro pmin and pmax, which is a special case of normalization df_copy = df.copy() new_df = util.DfOper.divi((df, lengths)) indexer = util.level_specific_indexer(df_copy, 'dispatch_constraint', [['p_min', 'p_max']]) new_df.loc[indexer, :] = df_copy.loc[indexer, :] else: new_df = util.DfOper.divi((df, lengths)) return new_df
def normalize(self): group_to_normalize = [n for n in self.values.index.names if n!='weather_datetime'] # here is a special case where I have p_min and p_max in my dispatch constraints and these should not be normalized if 'dispatch_constraint' in group_to_normalize: # this first normailization does what we need for hydro pmin and pmax, which is a special case of normalization combined_map_df = util.DfOper.mult((self.map_df_tz, self.map_df_primary)) normalization_factors = combined_map_df.groupby(level=cfg.primary_geography).sum() self.values = util.DfOper.divi((self.values, normalization_factors)) temp = self.values.groupby(level=group_to_normalize).transform(lambda x: x / x.sum())*self.num_active_years # TODO: 2, and 3 should not be hard coded here, they represent p_min and p_max indexer = util.level_specific_indexer(temp, 'dispatch_constraint', [[2,3]]) temp.loc[indexer, :] = self.values.loc[indexer, :] self.values = temp else: self.values = self.values.groupby(level=group_to_normalize).transform(lambda x: x / x.sum())*self.num_active_years
def calculate_payback(self): # self.embodied_emissions_df = self.demand.outputs.return_cleaned_output('demand_embodied_emissions_tco') # del self.demand.outputs.demand_embodied_emissions #calculte and format direct demand emissions # self.direct_emissions_df = self.demand.outputs.return_cleaned_output('demand_direct_emissions') ## del self.demand.outputs.demand_direct_emissions # emissions = util.DfOper.add([self.embodied_emissions_df,self.direct_emissions_df]) # #calculate and format export costs cost_unit = cfg.cfgfile.get('case','currency_year_id') + " " + cfg.cfgfile.get('case','currency_name') initial_vintage = min(cfg.supply_years) supply_side_df = self.demand.outputs.demand_embodied_energy_costs_payback supply_side_df = supply_side_df[supply_side_df.index.get_level_values('vintage')>=initial_vintage] supply_side_df = supply_side_df[supply_side_df.index.get_level_values('year')>=initial_vintage] supply_side_df = supply_side_df.sort_index() demand_side_df = self.demand.d_annual_costs_payback demand_side_df.columns = ['value'] demand_side_df = demand_side_df[demand_side_df.index.get_level_values('vintage')>=initial_vintage] demand_side_df = demand_side_df[demand_side_df.index.get_level_values('year')>=initial_vintage] demand_side_df = demand_side_df.reindex(supply_side_df.index).sort_index() sales_df = copy.deepcopy(self.demand.outputs.d_sales) util.replace_index_name(sales_df,'vintage','year') sales_df = sales_df[sales_df.index.get_level_values('vintage')>=initial_vintage] sales_df = util.add_and_set_index(sales_df,'year',cfg.supply_years) sales_df.index = sales_df.index.reorder_levels(supply_side_df.index.names) sales_df = sales_df.reindex(supply_side_df.index).sort_index() keys = ['SUPPLY-SIDE', 'DEMAND-SIDE'] names = ['COST TYPE'] self.outputs.c_payback = pd.concat([util.DfOper.divi([supply_side_df, sales_df]), util.DfOper.divi([demand_side_df, sales_df])],keys=keys,names=names) self.outputs.c_payback = self.outputs.c_payback[np.isfinite(self.outputs.c_payback.values)] self.outputs.c_payback = self.outputs.c_payback.replace([np.inf,np.nan],0) for sector in self.demand.sectors.values(): for subsector in sector.subsectors.values(): if hasattr(subsector,'stock') and subsector.sub_type!='link': indexer = util.level_specific_indexer(self.outputs.c_payback,'subsector',subsector.id) self.outputs.c_payback.loc[indexer,'unit'] = subsector.stock.unit.upper() self.outputs.c_payback = self.outputs.c_payback.set_index('unit', append=True) self.outputs.c_payback.columns = [cost_unit.upper()] self.outputs.c_payback['lifetime_year'] = self.outputs.c_payback.index.get_level_values('year')-self.outputs.c_payback.index.get_level_values('vintage')+1 self.outputs.c_payback = self.outputs.c_payback.set_index('lifetime_year',append=True) self.outputs.c_payback = util.remove_df_levels(self.outputs.c_payback,'year') self.outputs.c_payback = self.outputs.c_payback.groupby(level = [x for x in self.outputs.c_payback.index.names if x !='lifetime_year']).transform(lambda x: x.cumsum()) self.outputs.c_payback = self.outputs.c_payback[self.outputs.c_payback[cost_unit.upper()]!=0] self.outputs.c_payback = self.outputs.return_cleaned_output('c_payback')
def _update_dataframe_totals_after_foreign_gau(self, df, current_geography, foreign_geography, impacted_gaus, foreign_gau, map_key, zero_out_negatives): y_or_v = GeoMapper._get_df_time_index_name(df) # we first need to do a clean time series # then we need to allocate out and subtract indexer = util.level_specific_indexer(df, current_geography, [impacted_gaus]) impacted_gaus_slice = df.loc[indexer, :].reset_index().set_index( df.index.names) foreign_gau_slice = util.df_slice(df, foreign_gau, current_geography, drop_level=False, reset_index=True) foreign_gau_slice.index = foreign_gau_slice.index.rename( foreign_geography, level=current_geography) # do the allocation, take the ratio of foreign to native, do a clean timeseries, then reconstitute the foreign gau data over all years allocation = self.map_df(foreign_geography, current_geography, map_key=map_key, primary_subset_id=[foreign_gau]) allocated_foreign_gau_slice = util.DfOper.mult( (foreign_gau_slice, allocation), fill_value=np.nan) allocated_foreign_gau_slice = allocated_foreign_gau_slice.reorder_levels( [-1] + range(df.index.nlevels)) ratio_allocated_to_impacted = util.DfOper.divi( (allocated_foreign_gau_slice, impacted_gaus_slice), fill_value=np.nan, non_expandable_levels=[]) clean_ratio = TimeSeries.clean( data=ratio_allocated_to_impacted, time_index_name=y_or_v, interpolation_method='linear_interpolation', extrapolation_method='nearest') allocated_foreign_gau_slice_all_years = util.DfOper.mult( (clean_ratio, impacted_gaus_slice), fill_value=np.nan, non_expandable_levels=[]) allocated_foreign_gau_slice_new_geo = util.remove_df_levels( allocated_foreign_gau_slice_all_years, foreign_geography) allocated_foreign_gau_slice_foreign_geo = util.remove_df_levels( allocated_foreign_gau_slice_all_years, current_geography) allocated_foreign_gau_slice_foreign_geo.index = allocated_foreign_gau_slice_foreign_geo.index.rename( current_geography, level=foreign_geography) # update foreign GAUs after clean timeseries allocated_gau_years = list( allocated_foreign_gau_slice_foreign_geo.index.get_level_values( y_or_v).values) indexer = util.level_specific_indexer( allocated_foreign_gau_slice_foreign_geo, [current_geography, y_or_v], [foreign_gau, allocated_gau_years]) try: df.loc[indexer, :] = allocated_foreign_gau_slice_foreign_geo.loc[ indexer, :] except: pdb.set_trace() new_impacted_gaus = util.DfOper.subt( (impacted_gaus_slice, allocated_foreign_gau_slice_new_geo), fill_value=np.nan, non_expandable_levels=[]) new_impacted_gaus = new_impacted_gaus.reorder_levels( df.index.names).sort() if new_impacted_gaus.min().min() < 0: if not zero_out_negatives: raise ValueError( 'Negative values resulted from subtracting the foreign gau from the base gaus. This is the resulting dataframe: {}' .format(new_impacted_gaus)) else: new_impacted_gaus[new_impacted_gaus < 0] = 0 if new_impacted_gaus.isnull().all().value: pdb.set_trace() raise ValueError( 'Year or vitages did not overlap between the foreign gaus and impacted gaus' ) # update native GAUs after netting out foreign gaus impacted_gau_years = list( impacted_gaus_slice.index.get_level_values(y_or_v).values) indexer = util.level_specific_indexer( df, [current_geography, y_or_v], [impacted_gaus, impacted_gau_years]) df.loc[indexer, :] = new_impacted_gaus.loc[indexer, :] return df
def incorporate_foreign_gaus(self, df, current_geography, data_type, map_key, keep_oth_index_over_oth_gau=False,zero_out_negatives=True): native_gaus, current_gaus, foreign_gaus = self.get_native_current_foreign_gaus(df, current_geography) # we don't have any foreign gaus if not foreign_gaus or not cfg.include_foreign_gaus: return df, current_geography if 'year' in df.index.names: y_or_v = 'year' elif 'vintage' in df.index.names: y_or_v = 'vintage' else: raise ValueError('df must either have year or vintage to incorporate foreign gaus') index_with_nans = [df.index.names[i] for i in set(np.nonzero([np.isnan(row) for row in df.index.get_values()])[1])] # if we have an index with nan, that typically indicates that one of the foreign gaus didn't have all the index levels # if this is the case, we have two options (1) get rid of the other index (2) ignore the foreign gau if index_with_nans and (keep_oth_index_over_oth_gau or data_type=='intensity'): return self.filter_foreign_gaus(df, current_geography), current_geography else: assert (y_or_v not in index_with_nans) and (current_geography not in index_with_nans) # we need to eliminate levels with nan before moving on df = util.remove_df_levels(df, index_with_nans) base_gaus = np.array(self.values.index.get_level_values(current_geography), dtype=int) for id in foreign_gaus: foreign_geography = self.gau_to_geography[id] index = np.nonzero(self.values.index.get_level_values(self.gau_to_geography[id])==id)[0] impacted_gaus = list(set(base_gaus[index])) base_gaus[index] = id if any(impacted in foreign_gaus for impacted in impacted_gaus): raise ValueError('foreign gaus in the database cannot overlap geographically') # if the data_type is a total, we need to net out the total from the neighboring if data_type=='total': # we first need to do a clean time series # then we need to allocate out and subtract allocation = self.map_df(foreign_geography, current_geography, map_key=map_key, primary_subset_id=[id]) foreign_gau_slice = util.df_slice(df, id, current_geography, drop_level=False, reset_index=True) foreign_gau_slice.index = foreign_gau_slice.index.rename(foreign_geography, level=current_geography) allocated_foreign_gau_slice = util.DfOper.mult((foreign_gau_slice, allocation)) allocated_foreign_gau_slice = util.remove_df_levels(allocated_foreign_gau_slice, foreign_geography) indexer = util.level_specific_indexer(df,current_geography,[impacted_gaus]) impacted_gaus_slice = df.loc[indexer,:].reset_index().set_index(df.index.names) impacted_gau_years = list(impacted_gaus_slice.index.get_level_values(y_or_v).values) new_impacted_gaus = util.DfOper.subt((impacted_gaus_slice, allocated_foreign_gau_slice), fill_value=np.nan, non_expandable_levels=[]) new_impacted_gaus = new_impacted_gaus.reorder_levels(df.index.names).sort() if new_impacted_gaus.min().min() < 0: if not zero_out_negatives: raise ValueError('Negative values resulted from subtracting the foreign gau from the base gaus. This is the resulting dataframe: {}'.format(new_impacted_gaus)) else: new_impacted_gaus[new_impacted_gaus<0] = 0 if new_impacted_gaus.isnull().all().value: pdb.set_trace() raise ValueError('Year or vitages did not overlap between the foreign gaus and impacted gaus') indexer = util.level_specific_indexer(df, [current_geography, y_or_v], [impacted_gaus, impacted_gau_years]) df.loc[indexer, :] = new_impacted_gaus.loc[indexer, :] assert not any([any(np.isnan(row)) for row in df.index.get_values()]) new_geography_name = self.make_new_geography_name(current_geography, list(foreign_gaus)) df.index = df.index.rename(new_geography_name, level=current_geography) if new_geography_name not in self.geographies: self.add_new_geography(new_geography_name, base_gaus) return df, new_geography_name