def remap(self, map_from='raw_values', map_to='values', drivers=None, time_index_name='year', time_index=None, fill_timeseries=True, interpolation_method='missing', extrapolation_method='missing', converted_geography=None, current_geography=None, current_data_type=None, fill_value=0., lower=0, upper=None): """ Map data to drivers and geography Args: map_from (string): starting variable name (defaults to 'raw_values') map_to (string): ending variable name (defaults to 'values') drivers (list of or single dataframe): drivers for the remap input_type_override (string): either 'total' or 'intensity' (defaults to self.type) """ converted_geography = cfg.cfgfile.get('case', 'primary_geography') if converted_geography is None else converted_geography current_data_type = self.input_type if current_data_type is None else current_data_type current_geography = self.geography if current_geography is None else current_geography # TODO fix pluralization if time_index is None: time_index = getattr(self, time_index_name + "s") if hasattr(self, time_index_name + "s") else cfg.cfgfile.get('case', 'years') setattr(self, map_to, getattr(self, map_from).copy()) mapf = getattr(self, map_from) if current_geography not in (mapf.index.names if mapf.index.nlevels > 1 else [mapf.index.name]): raise ValueError('current geography does not match the geography of the dataframe in remap') # else: # current_geography_index_levels = mapf.index.levels[util.position_in_index(mapf, current_geography)] if mapf.index.nlevels > 1 else mapf.index.tolist() if (drivers is None) or (not len(drivers)): if fill_timeseries: self.clean_timeseries(attr=map_to, inplace=True, time_index=time_index, time_index_name=time_index_name, interpolation_method=interpolation_method, extrapolation_method=extrapolation_method, lower=lower, upper=upper) if current_geography != converted_geography: self.geo_map(converted_geography, attr=map_to, inplace=True, current_geography=current_geography, current_data_type=current_data_type, fill_value=fill_value) current_geography = converted_geography else: total_driver = DfOper.mult(util.put_in_list(drivers)) if current_geography != converted_geography: # While not on primary geography, geography does have some information we would like to preserve self.geo_map(converted_geography, attr=map_to, inplace=True, current_geography=current_geography, current_data_type=current_data_type, fill_value=fill_value) current_geography = converted_geography if current_data_type == 'total': # Divide by drivers to turn a total to intensity. multindex_operation will aggregate to common levels. df_intensity = DfOper.divi((getattr(self, map_to), total_driver), expandable=(False, True), collapsible=(False, True),fill_value=fill_value).replace([np.inf,np.nan,-np.nan],0) setattr(self, map_to, df_intensity) # Clean the timeseries as an intensity if fill_timeseries: # print getattr(self,map_to) # print time_index self.clean_timeseries(attr=map_to, inplace=True, time_index=time_index, interpolation_method=interpolation_method, extrapolation_method=extrapolation_method) if current_data_type == 'total': setattr(self, map_to, DfOper.mult((getattr(self, map_to), total_driver),fill_value=fill_value)) else: setattr(self, map_to, DfOper.mult((getattr(self, map_to), total_driver), expandable=(True, False), collapsible=(False, True),fill_value=fill_value)) self.ensure_correct_geography(map_to, converted_geography, current_geography, current_data_type)
def remap(self, map_from='raw_values', map_to='values', drivers=None, time_index_name='year', time_index=None, fill_timeseries=True, interpolation_method='missing', extrapolation_method='missing', converted_geography=None, current_geography=None, current_data_type=None, fill_value=0., lower=0, upper=None): """ Map data to drivers and geography Args: map_from (string): starting variable name (defaults to 'raw_values') map_to (string): ending variable name (defaults to 'values') drivers (list of or single dataframe): drivers for the remap input_type_override (string): either 'total' or 'intensity' (defaults to self.type) """ converted_geography = cfg.cfgfile.get('case', 'primary_geography') if converted_geography is None else converted_geography current_data_type = self.input_type if current_data_type is None else current_data_type current_geography = self.geography if current_geography is None else current_geography # TODO fix pluralization if time_index is None: time_index = getattr(self, time_index_name + "s") if hasattr(self, time_index_name + "s") else cfg.cfgfile.get('case', 'years') setattr(self, map_to, getattr(self, map_from).copy()) mapf = getattr(self, map_from) if current_geography not in (mapf.index.names if mapf.index.nlevels > 1 else [mapf.index.name]): raise ValueError('current geography does not match the geography of the dataframe in remap') else: current_geography_index_levels = mapf.index.levels[util.position_in_index(mapf, current_geography)] if mapf.index.nlevels > 1 else mapf.index.tolist() if (drivers is None) or (not len(drivers)): if fill_timeseries: self.clean_timeseries(attr=map_to, inplace=True, time_index=time_index, time_index_name=time_index_name, interpolation_method=interpolation_method, extrapolation_method=extrapolation_method, lower=lower, upper=upper) if current_geography != converted_geography: self.geo_map(converted_geography, attr=map_to, inplace=True, current_geography=current_geography, current_data_type=current_data_type, fill_value=fill_value) current_geography = converted_geography else: total_driver = DfOper.mult(util.put_in_list(drivers)) if len(current_geography_index_levels) > 1 and current_geography != converted_geography: # While not on primary geography, geography does have some information we would like to preserve self.geo_map(converted_geography, attr=map_to, inplace=True, current_geography=current_geography, current_data_type=current_data_type, fill_value=fill_value) current_geography = converted_geography if current_data_type == 'total': # Divide by drivers to turn a total to intensity. multindex_operation will aggregate to common levels. df_intensity = DfOper.divi((getattr(self, map_to), total_driver), expandable=(False, True), collapsible=(False, True)) setattr(self, map_to, df_intensity) # Clean the timeseries as an intensity if fill_timeseries: # print getattr(self,map_to) # print time_index self.clean_timeseries(attr=map_to, inplace=True, time_index=time_index, interpolation_method=interpolation_method, extrapolation_method=extrapolation_method) if current_data_type == 'total': setattr(self, map_to, DfOper.mult((getattr(self, map_to), total_driver))) else: setattr(self, map_to, DfOper.mult((getattr(self, map_to), total_driver), expandable=(True, False), collapsible=(False, True))) self.ensure_correct_geography(map_to, converted_geography, current_geography, current_data_type)
def project(self, map_from='raw_values', map_to='values', additional_drivers=None, time_index_name='year', fill_timeseries=True, converted_geography=None, current_geography=None, current_data_type=None): converted_geography = cfg.cfgfile.get('case', 'primary_geography') if converted_geography is None else converted_geography current_data_type = self.input_type if current_data_type is None else current_data_type if hasattr(self, 'projected_input_type'): current_data_type = self.projected_input_type denominator_driver_ids = [] else: denominator_driver_ids = [getattr(self, col) for col in cfg.dnmtr_col_names if getattr(self, col) is not None] current_geography = self.geography if current_geography is None else current_geography setattr(self, map_to, getattr(self, map_from).copy()) if len(denominator_driver_ids): if current_data_type != 'intensity': raise ValueError(str(self.__class__) + ' id ' + str(self.id) + ': type must be intensity if variable has denominator drivers') if len(self.index_levels['geography_id']) > 1 and (current_geography != converted_geography): # While not on primary geography, geography does have some information we would like to preserve self.geo_map(converted_geography, attr=map_to, inplace=True) current_geography = converted_geography total_driver = DfOper.mult([self.drivers[id].values for id in denominator_driver_ids]) try: setattr(self, map_to, DfOper.mult((getattr(self, map_to), total_driver))) except: print getattr(self, map_to) print total_driver # the datatype is now total current_data_type = 'total' driver_ids = [getattr(self, col) for col in cfg.drivr_col_names if getattr(self, col) is not None] drivers = [self.drivers[id].values for id in driver_ids] if additional_drivers is not None: drivers += util.put_in_list(additional_drivers) # both map_from and map_to are the same self.remap(map_from=map_to, map_to=map_to, drivers=drivers, time_index_name=time_index_name, fill_timeseries=fill_timeseries, converted_geography=converted_geography, current_geography=current_geography, current_data_type=current_data_type) self.projected_input_type = 'total'
def geo_map(self, converted_geography, attr='values', inplace=True, current_geography=None, current_data_type=None, fill_value=0.): """ maps a dataframe to another geography using relational GeographyMapdatabase table if input type is a total, then the subsection is the geography to convert to and the supersection is the initial geography. Example: input_type = 'total' state --> census division. How much of state maine is in census division new england? new england = subsection maine = supersection Otherwise the subsection and supersection values are reversed. Example: input_type = 'intensity' state --> census division. How much of census division new england does the state of maine represent? maine = subsection new england = supersection """ # Unless specified, input_type used is attribute of the object current_data_type = self.input_type if current_data_type is None else current_data_type current_geography = self.geography if current_geography is None else current_geography geography_map_key = cfg.cfgfile.get('case', 'default_geography_map_key') if not hasattr(self, 'geography_map_key') else self.geography_map_key if current_geography == converted_geography: if inplace: return else: return getattr(self, attr) if current_data_type == 'total': subsection, supersection = converted_geography, current_geography elif current_data_type == 'intensity': subsection, supersection = current_geography, converted_geography else: raise ValueError('Input_type must be either "total" or "intensity"') # create dataframe with map from one geography to another map_df = cfg.geo.map_df(subsection, supersection, column=geography_map_key) # converted_gau = geo.geographies[converted_geography] # necessary to expand our dataframe over the new geography. keys and names set up a new dataframe level. # expanded = pd.concat([getattr(self, attr)]*len(converted_gau), keys=converted_gau, names=(converted_geography,)) mapped_data = DfOper.mult([getattr(self, attr), map_df],fill_value=fill_value) mapped_data = util.remove_df_levels(mapped_data, current_geography) if hasattr(mapped_data.index,'swaplevel'): mapped_data = mapped_data.swaplevel(converted_geography,0) mapped_data.sort(inplace=True) if inplace: setattr(self, attr, mapped_data) # setattr(self, 'geography', converted_geography) else: return mapped_data
def geo_map(self, converted_geography, attr='values', inplace=True, current_geography=None, current_data_type=None,fill_value=0.): """ maps a dataframe to another geography using relational GeographyMapdatabase table if input type is a total, then the subsection is the geography to convert to and the supersection is the initial geography. Example: input_type = 'total' state --> census division. How much of state maine is in census division new england? new england = subsection maine = supersection Otherwise the subsection and supersection values are reversed. Example: input_type = 'intensity' state --> census division. How much of census division new england does the state of maine represent? maine = subsection new england = supersection """ # Unless specified, input_type used is attribute of the object current_data_type = self.input_type if current_data_type is None else current_data_type current_geography = self.geography if current_geography is None else current_geography geography_map_key = cfg.cfgfile.get('case', 'default_geography_map_key') if not hasattr(self, 'geography_map_key') else self.geography_map_key if current_geography == converted_geography: if inplace: return else: return getattr(self, attr) if current_data_type == 'total': subsection, supersection = converted_geography, current_geography elif current_data_type == 'intensity': subsection, supersection = current_geography, converted_geography else: raise ValueError('Input_type must be either "total" or "intensity"') # create dataframe with map from one geography to another map_df = cfg.geo.map_df(subsection, supersection, column=geography_map_key) # converted_gau = geo.geographies[converted_geography] # necessary to expand our dataframe over the new geography. keys and names set up a new dataframe level. # expanded = pd.concat([getattr(self, attr)]*len(converted_gau), keys=converted_gau, names=(converted_geography,)) mapped_data = DfOper.mult([getattr(self, attr), map_df],fill_value=fill_value) mapped_data = util.remove_df_levels(mapped_data, current_geography) mapped_data = mapped_data.swaplevel(converted_geography,0) mapped_data.sort(inplace=True) if inplace: setattr(self, attr, mapped_data) # setattr(self, 'geography', converted_geography) else: return mapped_data
def geo_map(self, converted_geography, attr='values', inplace=True, current_geography=None, current_data_type=None, fill_value=0., filter_geo=True): """ maps a dataframe to another geography using relational GeographyMapdatabase table if input type is a total, then the subsection is the geography to convert to and the supersection is the initial geography. Example: input_type = 'total' state --> census division. How much of state maine is in census division new england? new england = subsection maine = supersection Otherwise the subsection and supersection values are reversed. Example: input_type = 'intensity' state --> census division. How much of census division new england does the state of maine represent? maine = subsection new england = supersection """ # Unless specified, input_type used is attribute of the object current_data_type = self.input_type if current_data_type is None else current_data_type current_geography = self.geography if current_geography is None else current_geography geography_map_key = cfg.cfgfile.get( 'case', 'default_geography_map_key') if not hasattr( self, 'geography_map_key') else self.geography_map_key # create dataframe with map from one geography to another map_df = cfg.geo.map_df(current_geography, converted_geography, normalize_as=current_data_type, map_key=geography_map_key, filter_geo=filter_geo) mapped_data = DfOper.mult([getattr(self, attr), map_df], fill_value=fill_value) if current_geography != converted_geography: mapped_data = util.remove_df_levels(mapped_data, current_geography) if hasattr(mapped_data.index, 'swaplevel'): mapped_data = DataMapFunctions.reorder_df_geo_left_year_right( mapped_data, converted_geography) if inplace: setattr(self, attr, mapped_data.sort()) else: return mapped_data.sort()
def energy_replace(self): self.replace_impact = DfOper.mult([self.energy_intensity, self.impact])
def remap(self, map_from='raw_values', map_to='values', drivers=None, time_index_name='year', time_index=None, fill_timeseries=True, interpolation_method='missing', extrapolation_method='missing', converted_geography=None, current_geography=None, current_data_type=None, fill_value=0., lower=0, upper=None, filter_geo=True, driver_geography=None): """ Map data to drivers and geography Args: map_from (string): starting variable name (defaults to 'raw_values') map_to (string): ending variable name (defaults to 'values') drivers (list of or single dataframe): drivers for the remap input_type_override (string): either 'total' or 'intensity' (defaults to self.type) """ driver_geography = cfg.disagg_geography if driver_geography is None else driver_geography converted_geography = cfg.primary_geography if converted_geography is None else converted_geography current_data_type = self.input_type if current_data_type is None else current_data_type current_geography = self.geography if current_geography is None else current_geography time_index = self._get_active_time_index(time_index, time_index_name) if current_geography not in self._get_df_index_names_in_a_list(getattr(self, map_from)): raise ValueError('Current geography does not match the geography of the dataframe in remap') # deals with foreign gaus and updates the geography df, current_geography = self.account_for_foreign_gaus(map_from, current_data_type, current_geography) setattr(self, map_to, df) # This happens when we are on a geography level and some of the elements are missing. Such as no PR when we have all the other U.S. States. setattr(self, map_to, self._add_missing_geographies(df, current_geography, current_data_type)) if (drivers is None) or (not len(drivers)): # we have no drivers, just need to do a clean timeseries and a geomap if fill_timeseries: self.clean_timeseries(attr=map_to, inplace=True, time_index=time_index, time_index_name=time_index_name, interpolation_method=interpolation_method, extrapolation_method=extrapolation_method, lower=lower, upper=upper) if current_geography != converted_geography: self.geo_map(converted_geography, attr=map_to, inplace=True, current_geography=current_geography, current_data_type=current_data_type, fill_value=fill_value,filter_geo=filter_geo) current_geography = converted_geography else: # becomes an attribute of self just because we may do a geomap on it self.total_driver = DfOper.mult(util.put_in_list(drivers)) # turns out we don't always have a year or vintage column for drivers. For instance when linked_demand_technology gets remapped if time_index_name in self.total_driver.index.names: # sometimes when we have a linked service demand driver in a demand subsector it will come in on a fewer number of years than self.years, making this clean timeseries necesary self.clean_timeseries(attr='total_driver', inplace=True, time_index_name=time_index_name, time_index=time_index, lower=None, upper=None, interpolation_method='missing', extrapolation_method='missing') # While not on primary geography, geography does have some information we would like to preserve if hasattr(self,'drivers') and len(drivers) == len(self.drivers) and set([x.input_type for x in self.drivers.values()]) == set(['intensity']) and set([x.base_driver_id for x in self.drivers.values()]) == set([None]): driver_mapping_data_type = 'intensity' else: driver_mapping_data_type = 'total' total_driver_current_geo = self.geo_map(current_geography, attr='total_driver', inplace=False, current_geography=driver_geography, current_data_type=driver_mapping_data_type, fill_value=fill_value, filter_geo=False) if current_data_type == 'total': if fill_value is np.nan: df_intensity = DfOper.divi((getattr(self, map_to), total_driver_current_geo), expandable=(False, True), collapsible=(False, True),fill_value=fill_value).replace([np.inf],0) else: df_intensity = DfOper.divi((getattr(self, map_to), total_driver_current_geo), expandable=(False, True), collapsible=(False, True),fill_value=fill_value).replace([np.inf,np.nan,-np.nan],0) setattr(self, map_to, df_intensity) # Clean the timeseries as an intensity if fill_timeseries: self.clean_timeseries(attr=map_to, inplace=True, time_index=time_index, interpolation_method=interpolation_method, extrapolation_method=extrapolation_method) # self.geo_map(converted_geography, attr=map_to, inplace=True, current_geography=current_geography, current_data_type='intensity', fill_value=fill_value, filter_geo=filter_geo) # total_driver_converted_geo = self.geo_map(converted_geography, attr='total_driver', inplace=False, current_geography=driver_geography, current_data_type=driver_mapping_data_type, fill_value=fill_value, filter_geo=filter_geo) if current_data_type == 'total': setattr(self, map_to, DfOper.mult((getattr(self, map_to), total_driver_current_geo), fill_value=fill_value)) else: try: setattr(self, map_to, DfOper.mult((getattr(self, map_to), total_driver_current_geo), expandable=(True, False), collapsible=(False, True), fill_value=fill_value)) except: pdb.set_trace() self.geo_map(converted_geography, attr=map_to, inplace=True, current_geography=current_geography, current_data_type='total', fill_value=fill_value, filter_geo=filter_geo) # we don't want to keep this around del self.total_driver
def project(self, map_from='raw_values', map_to='values', additional_drivers=None, interpolation_method='missing', extrapolation_method='missing', time_index_name='year', fill_timeseries=True, converted_geography=None, current_geography=None, current_data_type=None, fill_value=0., projected=False, filter_geo=True): converted_geography = cfg.primary_geography if converted_geography is None else converted_geography current_data_type = self.input_type if current_data_type is None else current_data_type if map_from != 'raw_values' and current_data_type == 'total': denominator_driver_ids = [] else: denominator_driver_ids = [ getattr(self, col) for col in cfg.dnmtr_col_names if getattr(self, col) is not None ] current_geography = self.geography if current_geography is None else current_geography setattr(self, map_to, getattr(self, map_from).copy()) if len(denominator_driver_ids): if current_data_type != 'intensity': raise ValueError( str(self.__class__) + ' id ' + str(self.id) + ': type must be intensity if variable has denominator drivers' ) if current_geography != converted_geography: # While not on primary geography, geography does have some information we would like to preserve self.geo_map(converted_geography, attr=map_to, inplace=True) current_geography = converted_geography total_driver = DfOper.mult( [self.drivers[id].values for id in denominator_driver_ids]) setattr(self, map_to, DfOper.mult((getattr(self, map_to), total_driver))) # the datatype is now total current_data_type = 'total' driver_ids = [ getattr(self, col) for col in cfg.drivr_col_names if getattr(self, col) is not None ] drivers = [self.drivers[id].values for id in driver_ids] if additional_drivers is not None: drivers += util.put_in_list(additional_drivers) # both map_from and map_to are the same self.remap(map_from=map_to, map_to=map_to, drivers=drivers, time_index_name=time_index_name, fill_timeseries=fill_timeseries, interpolation_method=interpolation_method, extrapolation_method=extrapolation_method, converted_geography=converted_geography, current_geography=current_geography, current_data_type=current_data_type, fill_value=fill_value, filter_geo=filter_geo)
def remap(self, map_from='raw_values', map_to='values', drivers=None, time_index_name='year', time_index=None, fill_timeseries=True, interpolation_method='missing', extrapolation_method='missing', converted_geography=None, current_geography=None, current_data_type=None, fill_value=0., lower=0, upper=None, filter_geo=True): """ Map data to drivers and geography Args: map_from (string): starting variable name (defaults to 'raw_values') map_to (string): ending variable name (defaults to 'values') drivers (list of or single dataframe): drivers for the remap input_type_override (string): either 'total' or 'intensity' (defaults to self.type) """ converted_geography = cfg.primary_geography if converted_geography is None else converted_geography current_data_type = self.input_type if current_data_type is None else current_data_type current_geography = self.geography if current_geography is None else current_geography time_index = self._get_active_time_index(time_index, time_index_name) if current_geography not in self._get_df_index_names_in_a_list( getattr(self, map_from)): raise ValueError( 'Current geography does not match the geography of the dataframe in remap' ) # deals with foreign gaus and updates the geography df, current_geography = self.account_for_foreign_gaus( map_from, current_data_type, current_geography) setattr(self, map_to, df) # This happens when we are on a geography level and some of the elements are missing. Such as no PR when we have all the other U.S. States. setattr( self, map_to, self._add_missing_geographies(df, current_geography, current_data_type)) if (drivers is None) or (not len(drivers)): # we have no drivers, just need to do a clean timeseries and a geomap if fill_timeseries: self.clean_timeseries( attr=map_to, inplace=True, time_index=time_index, time_index_name=time_index_name, interpolation_method=interpolation_method, extrapolation_method=extrapolation_method, lower=lower, upper=upper) if current_geography != converted_geography: self.geo_map(converted_geography, attr=map_to, inplace=True, current_geography=current_geography, current_data_type=current_data_type, fill_value=fill_value, filter_geo=filter_geo) current_geography = converted_geography else: self.total_driver = DfOper.mult(util.put_in_list(drivers)) if current_geography != converted_geography and len( util.put_in_list(drivers)) <= 1: # While not on primary geography, geography does have some information we would like to preserve # we put the driver on the same geography as our data self.geomapped_total_driver = self.geo_map( current_geography, attr='total_driver', inplace=False, current_geography=converted_geography, current_data_type='total', fill_value=fill_value, filter_geo=False) elif current_geography != converted_geography: self.geo_map(converted_geography, attr=map_to, inplace=True, current_geography=current_geography, current_data_type=current_data_type, fill_value=fill_value) current_geography = converted_geography # Divide by drivers to turn a total to intensity. multindex_operation will aggregate to common levels. if current_data_type == 'total': df_intensity = DfOper.divi( (getattr(self, map_to), self.geomapped_total_driver if hasattr(self, 'geomapped_total_driver') else self.total_driver), expandable=(False, True), collapsible=(False, True), fill_value=fill_value).replace([np.inf, np.nan, -np.nan], 0) setattr(self, map_to, df_intensity) # Clean the timeseries as an intensity if fill_timeseries: self.clean_timeseries( attr=map_to, inplace=True, time_index=time_index, interpolation_method=interpolation_method, extrapolation_method=extrapolation_method) self.geo_map(converted_geography, attr=map_to, inplace=True, current_geography=current_geography, current_data_type='intensity', fill_value=fill_value, filter_geo=filter_geo) current_geography = converted_geography if hasattr(self, 'geomapped_total_driver'): delattr(self, 'geomapped_total_driver') if current_data_type == 'total': setattr( self, map_to, DfOper.mult((getattr(self, map_to), self.total_driver), fill_value=fill_value)) else: setattr( self, map_to, DfOper.mult((getattr(self, map_to), self.total_driver), expandable=(True, False), collapsible=(False, True), fill_value=fill_value)) self.ensure_correct_geography(map_to, converted_geography, current_geography, current_data_type, filter_geo=filter_geo)