def read_timeseries_data(self, data_column_names='value', hide_exceptions=False, **filters): # This function needs to be sped up """reads timeseries data to dataframe from database. Stored in self.raw_values""" # rowmap is used in ordering the data when read from the sql table headers = util.sql_read_headers(self.sql_data_table) rowmap = [headers.index(self.column_names[level]) for level in self.index_levels] data_col_ind = [] for data_col in util.put_in_list(data_column_names): data_col_ind.append(headers.index(data_col)) # read each line of the data_table matching an id and assign the value to self.raw_values data = [] if len(filters): merged_dict = dict({self.data_id_key: self.id}, **filters) read_data = util.sql_read_table(self.sql_data_table, return_iterable=True, **merged_dict) else: read_data = util.sql_read_table(self.sql_data_table, return_iterable=True, **dict([(self.data_id_key, self.id)])) if read_data: for row in read_data: try: data.append([row[i] for i in rowmap] + [row[i] * (self.unit_prefix if hasattr(self, 'unit_prefix') else 1) for i in data_col_ind ]) except: if hide_exceptions == False: print (self.id, row, i) column_names = self.df_index_names + util.put_in_list(data_column_names) self.raw_values = pd.DataFrame(data, columns=column_names).set_index(keys=self.df_index_names).sort_index() else: self.raw_values = None
def populate_measures(self, scenario_id): self.scenario_id = scenario_id self.scenario = self.scenario_dict[self.scenario_id] self.demand_case_id = util.sql_read_table('Scenarios','demand_case',id=self.scenario_id) self.populate_demand_measures() self.supply_case_id = util.sql_read_table('Scenarios','supply_case',id=self.scenario_id) self.populate_supply_measures()
def __init__(self, dispatch_feeders, dispatch_geography, dispatch_geographies, scenario): #TODO replace 1 with a config parameter for col, att in util.object_att_from_table('DispatchConfig', 1): setattr(self, col, att) self.node_config_dict = dict() for supply_node in util.sql_read_table('DispatchNodeConfig','supply_node_id',return_iterable=True): self.node_config_dict[supply_node] = DispatchNodeConfig(supply_node) self.set_dispatch_orders() self.dispatch_window_dict = dict(util.sql_read_table('DispatchWindows')) self.curtailment_cost = util.unit_convert(0, unit_from_den='megawatt_hour',unit_to_den=cfg.calculation_energy_unit) self.unserved_capacity_cost = util.unit_convert(10000.0, unit_from_den='megawatt_hour',unit_to_den=cfg.calculation_energy_unit) self.dist_net_load_penalty = util.unit_convert(15000.0, unit_from_den='megawatt_hour',unit_to_den=cfg.calculation_energy_unit) # this bulk penalty is mostly for transmission self.bulk_net_load_penalty = util.unit_convert(5000.0, unit_from_den='megawatt_hour',unit_to_den=cfg.calculation_energy_unit) self.ld_upward_imbalance_penalty = util.unit_convert(150.0, unit_from_den='megawatt_hour',unit_to_den=cfg.calculation_energy_unit) self.ld_downward_imbalance_penalty = util.unit_convert(50.0, unit_from_den='megawatt_hour',unit_to_den=cfg.calculation_energy_unit) self.dispatch_feeders = dispatch_feeders self.feeders = [0] + dispatch_feeders self.dispatch_geography = dispatch_geography self.dispatch_geographies = dispatch_geographies self.stdout_detail = cfg.cfgfile.get('opt','stdout_detail') self.transmission = dispatch_transmission.DispatchTransmission(cfg.transmission_constraint_id, scenario) if self.stdout_detail == 'False': self.stdout_detail = False else: self.stdout_detail = True self.solve_kwargs = {"keepfiles": False, "tee": False}
def read_timeseries_data(self, **filters): # This function needs to be sped up """reads timeseries data to dataframe from database. Stored in self.raw_values""" # rowmap is used in ordering the data when read from the sql table headers = util.sql_read_headers(self.sql_data_table) rowmap = [headers.index(self.column_names[level]) for level in self.index_levels] data_col_ind = headers.index('value') # read each line of the data_table matching an id and assign the value to self.raw_values data = [] if len(filters): merged_dict = dict({self.primary_key: self.id}, **filters) read_data = util.sql_read_table(self.sql_data_table, return_iterable=True, **merged_dict) else: read_data = util.sql_read_table(self.sql_data_table, return_iterable=True, **dict([(self.primary_key, self.id)])) if read_data: for row in read_data: try: data.append([row[i] for i in rowmap] + [row[data_col_ind] * (self.unit_prefix if hasattr(self, 'unit_prefix') else 1)]) except: print (self.id, row, i) column_names = self.df_index_names + ['value'] self.raw_values = pd.DataFrame(data, columns=column_names).set_index(keys=self.df_index_names).sort_index() self.data = True else: self.raw_values = None self.data = False
def __init__(self, scenario_id, api_run=False): self.scenario_id = scenario_id self.api_run = api_run self.scenario = cfg.scenario_dict[self.scenario_id] self.demand_case_id = util.sql_read_table('Scenarios', 'demand_case', id=self.scenario_id) self.supply_case_id = util.sql_read_table('Scenarios', 'supply_case', id=self.scenario_id) self.outputs = Output() self.demand = Demand() self.supply = None self.demand_solved, self.supply_solved = False, False
def __init__(self, cfgfile_path, custom_pint_definitions_path=None, name=None, author=None): self.cfgfile_path = cfgfile_path self.custom_pint_definitions_path = custom_pint_definitions_path self.model_config(cfgfile_path, custom_pint_definitions_path) self.name = cfg.cfgfile.get('case', 'scenario') if name is None else name self.author = cfg.cfgfile.get('case', 'author') if author is None else author self.scenario_dict = dict(zip(util.sql_read_table('Scenarios','id', return_iterable=True, is_active=True), util.sql_read_table('Scenarios','name', return_iterable=True, is_active=True))) self.outputs = Output() self.geography = cfg.cfgfile.get('case', 'primary_geography')
def add_specified_stock_measures(self, package_id): self.specified_stocks = {} measure_ids = util.sql_read_table('SupplyStockMeasurePackagesData', 'measure_id', package_id=package_id, return_iterable=True) for measure_id in measure_ids: specified_stocks = util.sql_read_table('SupplyStockMeasures', 'id', supply_technology_id=self.id,return_iterable=True) for specified_stock in specified_stocks: self.specified_stocks[specified_stock] = SupplySpecifiedStock(id=specified_stock, sql_id_table='SupplyStockMeasures', sql_data_table='SupplyStockMeasuresData')
def add_sales_share_measures(self, package_id): self.sales_shares = {} measure_ids = util.sql_read_table('DemandSalesMeasurePackagesData', 'measure_id', package_id=package_id, return_iterable=True) for measure_id in measure_ids: sales_share_ids = util.sql_read_table('DemandSalesMeasures', 'id', demand_tech_id=self.id, id=measure_id, return_iterable=True) for sales_share_id in sales_share_ids: self.sales_shares[sales_share_id] = SalesShare(id=sales_share_id, subsector_id=self.subsector_id, reference=False, sql_id_table='DemandSalesMeasures', sql_data_table='DemandSalesMeasuresData')
def init_db(self, db_path): if not os.path.isfile(db_path): raise OSError('config file not found: ' + str(db_path)) # Open pathways database self.con = sqlite.connect(db_path) self.cur = self.con.cursor() # common data inputs self.dnmtr_col_names = util.sql_read_table('DemandUnitDenominators', 'datatable_column_name') self.drivr_col_names = util.sql_read_table('DemandDriverColumns', 'datatable_column_name')
def add_sales_measures(self, package_id): self.sales = {} measure_ids = util.sql_read_table('SupplySalesMeasurePackagesData', 'measure_id', package_id=package_id, return_iterable=True) for measure_id in measure_ids: sales_ids = util.sql_read_table('SupplySalesMeasures', 'id', demand_tech_id=self.id, id=measure_id, return_iterable=True) for sales_id in sales_ids: self.sales[sales_id] = SupplySales(id=sales_id, node_id=self.node_id, reference=False, sql_id_table='SupplySalesMeasures', sql_data_table='SupplySalesMeasuresData')
def populate_measures(self, scenario_id): self.scenario_id = scenario_id self.scenario = self.scenario_dict[self.scenario_id] self.demand_case_id = util.sql_read_table('Scenarios', 'demand_case', id=self.scenario_id) self.populate_demand_measures() self.supply_case_id = util.sql_read_table('Scenarios', 'supply_case', id=self.scenario_id) self.populate_supply_measures()
def add_service_links(self): """adds all technology service links""" self.service_links = {} service_links = util.sql_read_table('DemandTechsServiceLink', 'service_link_id', return_unique=True, demand_tech_id=self.id) if service_links is not None: service_links = util.ensure_iterable_and_not_string(service_links) for service_link in service_links: id = util.sql_read_table('DemandTechsServiceLink', 'id', return_unique=True, demand_tech_id=self.id, service_link_id=service_link) self.service_links[service_link] = DemandTechServiceLink(self, id, 'DemandTechsServiceLink', 'DemandTechsServiceLinkData')
def add_specified_stock_measures(self, package_id): self.specified_stocks = {} measure_ids = util.sql_read_table('DemandStockMeasurePackagesData', 'measure_id', package_id=package_id, return_iterable=True) for measure_id in measure_ids: specified_stocks = util.sql_read_table('DemandStockMeasures', 'id', demand_tech_id=self.id, subsector_id=self.subsector_id, return_iterable=True) for specified_stock in specified_stocks: self.specified_stocks[specified_stock] = SpecifiedStock(id=specified_stock, sql_id_table='DemandStockMeasures', sql_data_table='DemandStockMeasuresData')
def __init__(self): self.geographies = OrderedDict() self.geography_names = dict(util.sql_read_table('GeographiesData', ['id', 'name'], return_unique=True, return_iterable=True)) # this is used for outputs self.timezone_names = {} self.map_keys = [] self.read_geography_indicies() self.gau_to_geography = dict(util.flatten_list([(v, k) for v in vs] for k, vs in self.geographies.iteritems())) self.id_to_geography = dict((k, v) for k, v in util.sql_read_table('Geographies')) self.read_geography_data() self._create_composite_geography_levels() self.geographies_unfiltered = copy.copy(self.geographies) # keep a record self._update_geographies_after_subset()
def add_sales_measures(self, package_id): self.sales = {} measure_ids = util.sql_read_table('SupplySalesMeasurePackagesData', 'measure_id', package_id=package_id, return_iterable=True) for measure_id in measure_ids: sales_ids = util.sql_read_table('SupplySalesMeasures', 'id', supply_technology_id=self.id, id=measure_id, return_iterable=True) for sales_id in sales_ids: self.sales[sales_id] = SupplySales(id=sales_id, supply_node_id=self.supply_node_id, reference=False, sql_id_table='SupplySalesMeasures', sql_data_table='SupplySalesMeasuresData', primary_key='id', data_id_key='parent_id')
def add_specified_stock_measures(self, package_id): self.specified_stocks = {} measure_ids = util.sql_read_table('DemandStockMeasurePackagesData', 'measure_id', package_id=package_id, return_iterable=True) for measure_id in measure_ids: specified_stocks = util.sql_read_table('DemandStockMeasures', 'id', demand_tech_id=self.id, subsector_id=self.subsector_id, package_id=package_id, return_iterable=True) for specified_stock in specified_stocks: self.specified_stocks[specified_stock] = SpecifiedStock(id=specified_stock, subsector_id=self.subsector_id, sql_id_table='DemandStockMeasures', sql_data_table='DemandStockMeasuresData')
def __init__(self, dispatch_feeders, dispatch_geography, dispatch_geographies, scenario): #TODO replace 1 with a config parameter for col, att in util.object_att_from_table('DispatchConfig', 1): setattr(self, col, att) self.node_config_dict = dict() for supply_node in util.sql_read_table('DispatchNodeConfig', 'supply_node_id', return_iterable=True): self.node_config_dict[supply_node] = DispatchNodeConfig( supply_node) self.set_dispatch_orders() self.dispatch_window_dict = dict( util.sql_read_table('DispatchWindows')) self.curtailment_cost = util.unit_convert( 0, unit_from_den='megawatt_hour', unit_to_den=cfg.calculation_energy_unit) self.unserved_capacity_cost = util.unit_convert( 10000.0, unit_from_den='megawatt_hour', unit_to_den=cfg.calculation_energy_unit) self.dist_net_load_penalty = util.unit_convert( 15000.0, unit_from_den='megawatt_hour', unit_to_den=cfg.calculation_energy_unit) # this bulk penalty is mostly for transmission self.bulk_net_load_penalty = util.unit_convert( 5000.0, unit_from_den='megawatt_hour', unit_to_den=cfg.calculation_energy_unit) self.ld_upward_imbalance_penalty = util.unit_convert( 150.0, unit_from_den='megawatt_hour', unit_to_den=cfg.calculation_energy_unit) self.ld_downward_imbalance_penalty = util.unit_convert( 50.0, unit_from_den='megawatt_hour', unit_to_den=cfg.calculation_energy_unit) self.dispatch_feeders = dispatch_feeders self.feeders = [0] + dispatch_feeders self.dispatch_geography = dispatch_geography self.dispatch_geographies = dispatch_geographies self.stdout_detail = cfg.cfgfile.get('opt', 'stdout_detail') self.transmission = dispatch_transmission.DispatchTransmission( cfg.transmission_constraint_id, scenario) if self.stdout_detail == 'False': self.stdout_detail = False else: self.stdout_detail = True self.solve_kwargs = {"keepfiles": False, "tee": False}
def read_index_levels(self): """ creates a dictionary to store level headings (for database lookup) and level elements. Stored as attr 'index_level' """ data_table_columns = [x for x in util.sql_read_headers(self.sql_data_table) if x not in self.primary_key] for id, index_level, column_name in util.sql_read_table('IndexLevels'): if column_name not in data_table_columns: continue elements = util.sql_read_table(self.sql_data_table, column_names=column_name, return_iterable=True, return_unique=True, **dict([(self.primary_key, self.id)])) if len(elements): self.index_levels[index_level] = elements self.column_names[index_level] = column_name
def read_index_levels(self): """ creates a dictionary to store level headings (for database lookup) and level elements. Stored as attr 'index_level' """ data_table_columns = [x for x in util.sql_read_headers(self.sql_data_table) if x not in self.data_id_key] for id, index_level, column_name in util.sql_read_table('IndexLevels'): if column_name not in data_table_columns: continue elements = util.sql_read_table(self.sql_data_table, column_names=column_name, return_iterable=True, return_unique=True, **dict([(self.data_id_key, self.id)])) if len(elements): self.index_levels[index_level] = elements self.column_names[index_level] = column_name
def __init__(self, id, subsector_id, service_demand_unit, stock_time_unit, cost_of_capital, scenario=None, **kwargs): self.id = id self.subsector_id = subsector_id self.scenario = scenario StockItem.__init__(self) self.service_demand_unit = service_demand_unit self.stock_time_unit = stock_time_unit for col, att in util.object_att_from_table('DemandTechs', self.id): setattr(self, col, att) # if cost_of_capital at the technology level is None, it uses subsector defaults if self.cost_of_capital is None: self.cost_of_capital = cost_of_capital else: pass # we can have multiple sales shares because sales share may be specific # to the transition between two technolog) self.reference_sales_shares = {} if self.id in util.sql_read_table('DemandSalesData', 'demand_technology_id', return_unique=True, return_iterable=True): self.reference_sales_shares[1] = SalesShare(id=self.id, subsector_id=self.subsector_id, reference=True, sql_id_table='DemandSales', sql_data_table='DemandSalesData', primary_key='subsector_id', data_id_key='demand_technology_id', scenario=scenario) self.book_life() self.add_class() self.min_year() if self.shape_id is not None: self.shape = shape.shapes.data[self.shape_id]
def read_geography_indicies(self): geo_key = util.sql_read_table('Geographies', column_names='name') for key in geo_key: self.geographies[key] = [] for geography_id, name, id in util.sql_read_table('GeographiesData', column_names=['geography_id', 'name', 'id']): geography_name = util.id_to_name('geography_id', geography_id) self.geographies[geography_name].append(id) self.geography_names[id] = name for id, name in util.sql_read_table('TimeZones', column_names=['id', 'name']): self.timezone_names[id] = name for map_key in util.sql_read_table('GeographyMapKeys', 'name'): self.map_keys.append(map_key)
def __init__(self, id, cost_of_capital, **kwargs): self.id = id for col, att in util.object_att_from_table('SupplyTechs', id): setattr(self, col, att) if self.cost_of_capital is None: self.cost_of_capital = cost_of_capital self.add_costs() self.efficiency = SupplyTechEfficiency(id) self.capacity_factor = SupplyTechCapacityFactor(id) self.reference_sales_shares = {} if self.id in util.sql_read_table('SupplySalesShareData', 'supply_technology', return_unique=True, return_iterable=True): self.reference_sales_shares[1] = SupplySalesShare(id=self.id, supply_node_id=self.supply_node_id, reference=True,sql_id_table='SupplySalesShare', sql_data_table='SupplySalesShareData') self.reference_sales = {} if self.id in util.sql_read_table('SupplySalesData','supply_technology', return_unique=True, return_iterable=True): self.reference_sales[1] = SupplySales(id=self.id, supply_node_id=self.supply_node_id, reference=True,sql_id_table='SupplySales', sql_data_table='SupplySalesData') StockItem.__init__(self)
def __init__(self, id, subsector_id, service_demand_unit, stock_time_unit, cost_of_capital, scenario=None, **kwargs): self.id = id self.subsector_id = subsector_id self.scenario = scenario StockItem.__init__(self) self.service_demand_unit = service_demand_unit self.stock_time_unit = stock_time_unit for col, att in util.object_att_from_table('DemandTechs', self.id): setattr(self, col, att) # if cost_of_capital at the technology level is None, it uses subsector defaults if self.cost_of_capital is None: self.cost_of_capital = cost_of_capital else: pass # we can have multiple sales shares because sales share may be specific # to the transition between two technolog) self.reference_sales_shares = {} if self.id in util.sql_read_table('DemandSalesData', 'demand_technology_id', return_unique=True, return_iterable=True): self.reference_sales_shares[1] = SalesShare(id=self.id, subsector_id=self.subsector_id, reference=True, sql_id_table='DemandSales', sql_data_table='DemandSalesData', primary_key='subsector_id', data_id_key='demand_technology_id', scenario=scenario) self.book_life() self.add_class() self.min_year() self.shape = shape.shapes.data[self.shape_id] if self.shape_id is not None else None
def initialize_config(_path, _cfgfile_name, _log_name): global weibul_coeff_of_var, available_cpus, workingdir, cfgfile_name, log_name, log_initialized, index_levels, solver_name, timestamp workingdir = os.getcwd() if _path is None else _path cfgfile_name = _cfgfile_name init_cfgfile(os.path.join(workingdir, cfgfile_name)) log_name = '{} energyPATHWAYS log.log'.format( str(datetime.datetime.now())[:-4].replace( ':', '.')) if _log_name is None else _log_name setuplogging() init_db() init_units() init_geo() init_shapes() init_date_lookup() init_output_parameters() # used when reading in raw_values from data tables index_levels = util.sql_read_table( 'IndexLevels', column_names=['index_level', 'data_column_name']) solver_name = find_solver() available_cpus = int(cfgfile.get('case', 'num_cores')) weibul_coeff_of_var = util.create_weibul_coefficient_of_variation() timestamp = str(datetime.datetime.now().replace(second=0, microsecond=0))
def init_date_lookup(): global date_lookup, time_slice_col, electricity_energy_type_id, electricity_energy_type_shape_id, opt_period_length, transmission_constraint_id, filter_dispatch_less_than_x class DateTimeLookup: def __init__(self): self.dates = {} def lookup(self, series): """ This is a faster approach to datetime parsing. For large data, the same dates are often repeated. Rather than re-parse these, we store all unique dates, parse them, and use a lookup to convert all dates. """ self.dates.update({date: pd.to_datetime(date) for date in series.unique() if not self.dates.has_key(date)}) return series.apply(lambda v: self.dates[v]) ## Shapes date_lookup = DateTimeLookup() time_slice_col = ['year', 'month', 'week', 'hour', 'day_type'] electricity_energy_type_id, electricity_energy_type_shape_id = util.sql_read_table('FinalEnergy', column_names=['id', 'shape_id'], name='electricity') opt_period_length = int(cfgfile.get('opt', 'period_length')) transmission_constraint_id = cfgfile.get('opt','transmission_constraint_id') transmission_constraint_id = int(transmission_constraint_id) if transmission_constraint_id != "" else None filter_dispatch_less_than_x = cfgfile.get('output_detail','filter_dispatch_less_than_x') filter_dispatch_less_than_x = float(filter_dispatch_less_than_x) if filter_dispatch_less_than_x != "" else None
def init_date_lookup(): global date_lookup, time_slice_col, electricity_energy_type_id, electricity_energy_type_shape_id, opt_period_length class DateTimeLookup: def __init__(self): self.dates = {} def lookup(self, series): """ This is a faster approach to datetime parsing. For large data, the same dates are often repeated. Rather than re-parse these, we store all unique dates, parse them, and use a lookup to convert all dates. """ self.dates.update({ date: pd.to_datetime(date) for date in series.unique() if not self.dates.has_key(date) }) return series.apply(lambda v: self.dates[v]) ## Shapes date_lookup = DateTimeLookup() time_slice_col = ['year', 'month', 'week', 'hour', 'day_type'] electricity_energy_type_id, electricity_energy_type_shape_id = util.sql_read_table( 'FinalEnergy', column_names=['id', 'shape_id'], name='electricity') opt_period_length = int(cfgfile.get('opt', 'period_length'))
def create_empty_shapes(self): """ This should be called first as it creates a record of all of the shapes that are in the database.""" for id in util.sql_read_table(self.sql_id_table, column_names='id', return_unique=True, return_iterable=True): self.data[id] = Shape(id) self.active_shape_ids.append(id)
def read_geography_data(self): cfg.cur.execute('SELECT COUNT(*) FROM "GeographyIntersection"') expected_rows = cfg.cur.fetchone()[0] # This query pulls together the geography map from its constituent tables. Its rows look like: # intersection_id, [list of geographical units that define intersection], # [list of values for map keys for this intersection] # Note that those internal lists are specifically being drawn out in the order of their Geographies and # GeographyMapKeys, respectively, so that they are in the same order as the expected dataframe indexes # and column headers cfg.cur.execute(textwrap.dedent("""\ SELECT intersections.id, intersections.intersection, ARRAY_AGG("GeographyMap".value ORDER BY "GeographyMap".geography_map_key_id) AS values FROM ( SELECT "GeographyIntersection".id, ARRAY_AGG("GeographyIntersectionData".gau_id ORDER BY "GeographiesData".geography_id) AS intersection FROM "GeographyIntersection" JOIN "GeographyIntersectionData" ON "GeographyIntersectionData".intersection_id = "GeographyIntersection".id JOIN "GeographiesData" ON "GeographyIntersectionData".gau_id = "GeographiesData".id GROUP BY "GeographyIntersection".id ORDER BY "GeographyIntersection".id ) AS intersections JOIN "GeographyMap" ON "GeographyMap".intersection_id = intersections.id GROUP BY intersections.id, intersections.intersection; """)) map = cfg.cur.fetchall() assert len(map) == expected_rows, "Expected %i rows in the geography map but found %i" % (expected_rows, len(map)) # convert the query results into a list of indexes and a list of data (column values) that can be used # to construct a data frame expected_layers = len(self.geographies) expected_values = len(self.map_keys) index = [] data = [] for row in map: id_, intersection, values = row assert len(intersection) == expected_layers, "Expected each geography map row to have %i geographic layers but row id %i has %i" % (expected_layers, id_, len(intersection)) assert len(values) == expected_values, "Expected each geography map row to have %i data values but row id %i has %i" % (expected_values, id_, len(values)) index.append(tuple(intersection)) data.append(values) # construct the data frame names = self.geographies.keys() # values is the actual container for the data self.values = pd.DataFrame(data, index=pd.MultiIndex.from_tuples(index, names=names), columns=self.map_keys) self.values['intersection_id'] = sorted(util.sql_read_table('GeographyIntersection')) self.values = self.values.set_index('intersection_id', append=True) # sortlevel sorts all of the indicies so that we can slice the dataframe self.values = self.values.sort() self.values.replace(0,1e-10,inplace=True) self.values = self.values.groupby(level=[x for x in self.values.index.names if x not in ['intersection_id']]).first()
def _other_indexes_dict(): this_method = DataMapFunctions._other_indexes_dict if not hasattr(this_method, 'memoized_result'): other_indexes_data = util.sql_read_table('OtherIndexesData', ('id', 'other_index_id')) this_method.memoized_result = { row[0]: row[1] for row in other_indexes_data } return this_method.memoized_result
def __init__(self, id, cost_of_capital, scenario, **kwargs): self.id = id for col, att in util.object_att_from_table('SupplyTechs', id): setattr(self, col, att) if self.cost_of_capital is None: self.cost_of_capital = cost_of_capital self.scenario = scenario self.add_costs() self.efficiency = SupplyTechEfficiency(id, self.scenario) self.capacity_factor = SupplyTechCapacityFactor(id, self.scenario) self.co2_capture = SupplyTechCO2Capture(id, self.scenario) self.reference_sales_shares = {} if self.id in util.sql_read_table('SupplySalesShareData', 'supply_technology_id', return_unique=True, return_iterable=True): self.reference_sales_shares[1] = SupplySalesShare( id=self.id, supply_node_id=self.supply_node_id, reference=True, sql_id_table='SupplySalesShare', sql_data_table='SupplySalesShareData', primary_key='supply_node_id', data_id_key='supply_technology_id', scenario=self.scenario) self.reference_sales = {} if self.id in util.sql_read_table('SupplySalesData', 'supply_technology_id', return_unique=True, return_iterable=True): self.reference_sales[1] = SupplySales( id=self.id, supply_node_id=self.supply_node_id, reference=True, sql_id_table='SupplySales', sql_data_table='SupplySalesData', primary_key='supply_node_id', data_id_key='supply_technology_id', scenario=self.scenario) StockItem.__init__(self) if self.shape_id is not None: self.shape = shape.shapes.data[self.shape_id]
def read_geography_data(self): cfg.cur.execute('SELECT COUNT(*) FROM "GeographyIntersection"') expected_rows = cfg.cur.fetchone()[0] # This query pulls together the geography map from its constituent tables. Its rows look like: # intersection_id, [list of geographical units that define intersection], # [list of values for map keys for this intersection] # Note that those internal lists are specifically being drawn out in the order of their Geographies and # GeographyMapKeys, respectively, so that they are in the same order as the expected dataframe indexes # and column headers cfg.cur.execute(textwrap.dedent("""\ SELECT intersections.id, intersections.intersection, ARRAY_AGG("GeographyMap".value ORDER BY "GeographyMap".geography_map_key_id) AS values FROM ( SELECT "GeographyIntersection".id, ARRAY_AGG("GeographyIntersectionData".gau_id ORDER BY "GeographiesData".geography_id) AS intersection FROM "GeographyIntersection" JOIN "GeographyIntersectionData" ON "GeographyIntersectionData".intersection_id = "GeographyIntersection".id JOIN "GeographiesData" ON "GeographyIntersectionData".gau_id = "GeographiesData".id GROUP BY "GeographyIntersection".id ORDER BY "GeographyIntersection".id ) AS intersections JOIN "GeographyMap" ON "GeographyMap".intersection_id = intersections.id GROUP BY intersections.id, intersections.intersection; """)) map = cfg.cur.fetchall() assert len(map) == expected_rows, "Expected %i rows in the geography map but found %i" % (expected_rows, len(map)) # convert the query results into a list of indexes and a list of data (column values) that can be used # to construct a data frame expected_layers = len(self.geographies) expected_values = len(self.map_keys) index = [] data = [] for row in map: id_, intersection, values = row assert len(intersection) == expected_layers, "Expected each geography map row to have %i geographic layers but row id %i has %i" % (expected_layers, id_, len(intersection)) assert len(values) == expected_values, "Expected each geography map row to have %i data values but row id %i has %i" % (expected_values, id_, len(values)) index.append(tuple(intersection)) data.append(values) # construct the data frame names = self.geographies.keys() # values is the actual container for the data self.values = pd.DataFrame(data, index=pd.MultiIndex.from_tuples(index, names=names), columns=self.map_keys) self.values['intersection_id'] = sorted(util.sql_read_table('GeographyIntersection')) self.values = self.values.set_index('intersection_id', append=True) # sortlevel sorts all of the indicies so that we can slice the dataframe self.values = self.values.sort()
def create_time_slice_elements(active_dates_index): business_days = pd.bdate_range(active_dates_index[0].date(), active_dates_index[-1].date()) biz_map = {v: k for k, v in util.sql_read_table('DayType', column_names='*', return_iterable=False)} time_slice_elements = {} for ti in cfg.time_slice_col: if ti=='day_type_id': time_slice_elements['day_type_id'] = [biz_map['workday'] if s.date() in business_days else biz_map['non-workday'] for s in active_dates_index] else: time_slice_elements[ti] = getattr(active_dates_index, ti) time_slice_elements['hour24'] = time_slice_elements['hour'] + 1 return time_slice_elements
def __init__(self, id, cost_of_capital, scenario, **kwargs): self.id = id for col, att in util.object_att_from_table('SupplyTechs', id): setattr(self, col, att) if self.cost_of_capital is None: self.cost_of_capital = cost_of_capital self.scenario = scenario self.add_costs() self.efficiency = SupplyTechEfficiency(id, self.scenario) self.capacity_factor = SupplyTechCapacityFactor(id, self.scenario) self.co2_capture = SupplyTechCO2Capture(id, self.scenario) self.reference_sales_shares = {} if self.id in util.sql_read_table('SupplySalesShareData', 'supply_technology_id', return_unique=True, return_iterable=True): self.reference_sales_shares[1] = SupplySalesShare(id=self.id, supply_node_id=self.supply_node_id, reference=True,sql_id_table='SupplySalesShare', sql_data_table='SupplySalesShareData', primary_key='supply_node_id', data_id_key='supply_technology_id', scenario=self.scenario) self.reference_sales = {} if self.id in util.sql_read_table('SupplySalesData','supply_technology_id', return_unique=True, return_iterable=True): self.reference_sales[1] = SupplySales(id=self.id, supply_node_id=self.supply_node_id, reference=True,sql_id_table='SupplySales', sql_data_table='SupplySalesData', primary_key='supply_node_id', data_id_key='supply_technology_id', scenario=self.scenario) StockItem.__init__(self) if self.shape_id is not None: self.shape = shape.shapes.data[self.shape_id]
def read_geography_data(self): # df2.loc[('kentucky', 'total', 'east north central', 'western interconnection'), 'households'] headers = util.sql_read_headers('GeographyMap') # colmap and rowmap are used in ordering the data when read from the sql table colmap = [] for col in self.map_keys: colmap.append(headers.index(col)) rowmap = [] for row in self.geographies.keys(): rowmap.append(headers.index(row)) for row in util.sql_read_table('GeographyMap'): self.values.loc[tuple([row[i] for i in rowmap]), tuple(self.map_keys)] = [row[i] for i in colmap]
def __init__(self, cfgfile_path, custom_pint_definitions_path=None, name=None, author=None): self.cfgfile_path = cfgfile_path self.custom_pint_definitions_path = custom_pint_definitions_path self.model_config(cfgfile_path, custom_pint_definitions_path) self.name = cfg.cfgfile.get('case', 'scenario') if name is None else name self.author = cfg.cfgfile.get('case', 'author') if author is None else author self.scenario_dict = dict( zip( util.sql_read_table('Scenarios', 'id', return_iterable=True, is_active=True), util.sql_read_table('Scenarios', 'name', return_iterable=True, is_active=True))) self.outputs = Output() self.geography = cfg.cfgfile.get('case', 'primary_geography')
def read_geography_indicies(self): cfg.cur.execute(textwrap.dedent("""\ SELECT "Geographies".name, ARRAY_AGG("GeographiesData".id) AS geography_data_ids FROM "Geographies" JOIN "GeographiesData" ON "Geographies".id = "GeographiesData".geography_id GROUP BY "Geographies".id ORDER BY "Geographies".id; """)) for row in cfg.cur.fetchall(): self.geographies[row[0]] = row[1] for value in self.geographies.values(): value.sort() for id, name in util.sql_read_table('TimeZones', column_names=['id', 'name']): self.timezone_names[id] = name cfg.cur.execute('SELECT name FROM "GeographyMapKeys" ORDER BY id') self.map_keys = [name for (name,) in cfg.cur.fetchall()]
def init_date_lookup(self): class DateTimeLookup: def __init__(self): self.dates = {} def lookup(self, series): """ This is a faster approach to datetime parsing. For large data, the same dates are often repeated. Rather than re-parse these, we store all unique dates, parse them, and use a lookup to convert all dates. """ self.dates.update({date: pd.to_datetime(date) for date in series.unique() if not self.dates.has_key(date)}) return series.apply(lambda v: self.dates[v]) ## Shapes self.date_lookup = DateTimeLookup() self.time_slice_col = ['year', 'month', 'week', 'hour', 'day_type_id'] self.electricity_energy_type_id, self.electricity_energy_type_shape_id = util.sql_read_table('FinalEnergy', column_names=['id', 'shape_id'], name='electricity')
def initialize_config(_path, _cfgfile_name, _log_name): global weibul_coeff_of_var, scenario_dict, available_cpus, workingdir, cfgfile_name, log_name, log_initialized workingdir = os.getcwd() if _path is None else _path cfgfile_name = _cfgfile_name init_cfgfile(os.path.join(workingdir, cfgfile_name)) log_name = '{} energyPATHWAYS log.log'.format( str(datetime.datetime.now())[:-4].replace( ':', '.')) if _log_name is None else _log_name setuplogging() init_db() init_units() init_geo() init_date_lookup() init_output_parameters() scenario_dict = dict( util.sql_read_table('Scenarios', ['id', 'name'], return_iterable=True)) available_cpus = int(cfgfile.get('case', 'num_cores')) weibul_coeff_of_var = util.create_weibul_coefficient_of_variation()
def initialize_config(_path, _cfgfile_name, _log_name): global weibul_coeff_of_var, available_cpus, workingdir, cfgfile_name, log_name, log_initialized, index_levels, solver_name, timestamp workingdir = os.getcwd() if _path is None else _path cfgfile_name = _cfgfile_name init_cfgfile(os.path.join(workingdir, cfgfile_name)) log_name = '{} energyPATHWAYS log.log'.format(str(datetime.datetime.now())[:-4].replace(':', '.')) if _log_name is None else _log_name setuplogging() init_db() init_units() init_geo() init_shapes() init_date_lookup() init_output_parameters() # used when reading in raw_values from data tables index_levels = util.sql_read_table('IndexLevels', column_names=['index_level', 'data_column_name']) solver_name = find_solver() available_cpus = int(cfgfile.get('case','num_cores')) weibul_coeff_of_var = util.create_weibul_coefficient_of_variation() timestamp = str(datetime.datetime.now().replace(second=0,microsecond=0))
def init_outputs_id_map(): global outputs_id_map primary_geography_name = geo.get_primary_geography_name() dispatch_geography_name = geo.get_dispatch_geography_name() outputs_id_map[primary_geography_name] = util.upper_dict(geo.geography_names.items()) outputs_id_map[primary_geography_name + "_supply"] = outputs_id_map[primary_geography_name] outputs_id_map[primary_geography_name + "_input"] = outputs_id_map[primary_geography_name] outputs_id_map[primary_geography_name + "_output"] = outputs_id_map[primary_geography_name] outputs_id_map[dispatch_geography_name] = outputs_id_map[primary_geography_name] outputs_id_map['demand_technology'] = util.upper_dict(util.sql_read_table('DemandTechs', ['id', 'name'])) outputs_id_map['supply_technology'] = util.upper_dict(util.sql_read_table('SupplyTechs', ['id', 'name'])) outputs_id_map['final_energy'] = util.upper_dict(util.sql_read_table('FinalEnergy', ['id', 'name'])) outputs_id_map['supply_node'] = util.upper_dict(util.sql_read_table('SupplyNodes', ['id', 'name'])) outputs_id_map['blend_node'] = util.upper_dict(util.sql_read_table('SupplyNodes', ['id', 'name'])) outputs_id_map['input_node'] = util.upper_dict(util.sql_read_table('SupplyNodes', ['id', 'name'])) outputs_id_map['supply_node_output'] = outputs_id_map['supply_node'] outputs_id_map['supply_node_input'] = outputs_id_map['supply_node'] outputs_id_map['supply_node_export'] = util.upper_dict(util.sql_read_table('SupplyNodes', ['id', 'name'])," EXPORT") outputs_id_map['subsector'] = util.upper_dict(util.sql_read_table('DemandSubsectors', ['id', 'name'])) outputs_id_map['demand_sector'] = util.upper_dict(util.sql_read_table('DemandSectors', ['id', 'name'])) outputs_id_map['sector'] = outputs_id_map['demand_sector'] outputs_id_map['ghg'] = util.upper_dict(util.sql_read_table('GreenhouseGases', ['id', 'name'])) outputs_id_map['driver'] = util.upper_dict(util.sql_read_table('DemandDrivers', ['id', 'name'])) outputs_id_map['dispatch_feeder'] = util.upper_dict(util.sql_read_table('DispatchFeeders', ['id', 'name'])) outputs_id_map['dispatch_feeder'][0] = 'BULK' outputs_id_map['other_index_1'] = util.upper_dict(util.sql_read_table('OtherIndexesData', ['id', 'name'])) outputs_id_map['other_index_2'] = util.upper_dict(util.sql_read_table('OtherIndexesData', ['id', 'name'])) outputs_id_map['timeshift_type'] = util.upper_dict(util.sql_read_table('FlexibleLoadShiftTypes', ['id', 'name'])) for id, name in util.sql_read_table('OtherIndexes', ('id', 'name'), return_iterable=True): if name in ('demand_technology', 'final_energy'): continue outputs_id_map[name] = util.upper_dict(util.sql_read_table('OtherIndexesData', ['id', 'name'], other_index_id=id, return_unique=True))
def init_outputs_id_map(self): self.currency_name = util.sql_read_table('Currencies', 'name', id=int(self.cfgfile.get('case', 'currency_id'))) self.output_levels = self.cfgfile.get('case', 'output_levels').split(', ') self.outputs_id_map = defaultdict(dict) if 'primary_geography' in self.output_levels: self.output_levels[self.output_levels.index('primary_geography')] = self.primary_geography primary_geography_id = util.sql_read_table('Geographies', 'id', name=self.primary_geography) self.outputs_id_map[self.primary_geography] = util.upper_dict(util.sql_read_table('GeographiesData', ['id', 'name'], geography_id=primary_geography_id, return_unique=True, return_iterable=True)) self.outputs_id_map[self.primary_geography+"_supply"] = self.outputs_id_map[self.primary_geography] self.outputs_id_map['technology'] = util.upper_dict(util.sql_read_table('DemandTechs', ['id', 'name'])) self.outputs_id_map['final_energy'] = util.upper_dict(util.sql_read_table('FinalEnergy', ['id', 'name'])) self.outputs_id_map['supply_node'] = util.upper_dict(util.sql_read_table('SupplyNodes', ['id', 'name'])) self.outputs_id_map['subsector'] = util.upper_dict(util.sql_read_table('DemandSubsectors', ['id', 'name'])) self.outputs_id_map['sector'] = util.upper_dict(util.sql_read_table('DemandSectors', ['id', 'name'])) self.outputs_id_map['ghg'] = util.upper_dict(util.sql_read_table('GreenhouseGases', ['id', 'name'])) for id, name in util.sql_read_table('OtherIndexes', ('id', 'name'), return_iterable=True): if name in ('technology', 'final_energy'): continue self.outputs_id_map[name] = util.upper_dict(util.sql_read_table('OtherIndexesData', ['id', 'name'], other_index_id=id, return_unique=True))
def init_outputs_id_map(self): self.currency_name = self.cfgfile.get('case', 'currency_name') self.output_demand_levels = ['year','vintage','technology',self.cfgfile.get('case','primary_geography'),'sector','subsector','final_energy'] self.output_supply_levels = ['year','vintage','supply_technology',self.cfgfile.get('case','primary_geography'), self.cfgfile.get('case','primary_geography') + "_supply", 'demand_sector','final_energy','supply_node','ghg'] self.output_combined_levels = list(set(self.output_supply_levels+self.output_demand_levels)) vintage = self.cfgfile.get('output_detail','vintage') if vintage != 'True': self.output_combined_levels.remove('vintage') technology = self.cfgfile.get('output_detail','technology') if technology != 'True': self.output_combined_levels.remove('technology') supply_geography = self.cfgfile.get('output_detail','supply_geography') if supply_geography != 'True': self.output_combined_levels.remove(self.cfgfile.get('case','primary_geography') + "_supply") self.output_currency = self.cfgfile.get('case', 'currency_year_id') + ' ' + self.currency_name self.outputs_id_map = defaultdict(dict) # if 'primary_geography' in self.output_demand_levels: # self.output_demand_levels[self.output_demand_levels.index('primary_geography')] = self.primary_geography # if 'primary_geography' in self.output_supply_levels: # self.output_supply_levels[self.output_supply_levels.index('primary_geography')] = self.primary_geography primary_geography_id = util.sql_read_table('Geographies', 'id', name=self.primary_geography) self.outputs_id_map[self.primary_geography] = util.upper_dict(util.sql_read_table('GeographiesData', ['id', 'name'], geography_id=primary_geography_id, return_unique=True, return_iterable=True)) self.outputs_id_map[self.primary_geography+"_supply"] = self.outputs_id_map[self.primary_geography] self.outputs_id_map['technology'] = util.upper_dict(util.sql_read_table('DemandTechs', ['id', 'name'])) self.outputs_id_map['supply_technology'] = util.upper_dict(util.sql_read_table('SupplyTechs', ['id', 'name'])) self.outputs_id_map['final_energy'] = util.upper_dict(util.sql_read_table('FinalEnergy', ['id', 'name'])) self.outputs_id_map['supply_node'] = util.upper_dict(util.sql_read_table('SupplyNodes', ['id', 'name'])) self.outputs_id_map['supply_node_export'] = util.upper_dict(util.sql_read_table('SupplyNodes', ['id', 'name'])," EXPORT") self.outputs_id_map['subsector'] = util.upper_dict(util.sql_read_table('DemandSubsectors', ['id', 'name'])) self.outputs_id_map['sector'] = util.upper_dict(util.sql_read_table('DemandSectors', ['id', 'name'])) self.outputs_id_map['ghg'] = util.upper_dict(util.sql_read_table('GreenhouseGases', ['id', 'name'])) for id, name in util.sql_read_table('OtherIndexes', ('id', 'name'), return_iterable=True): if name in ('technology', 'final_energy'): continue self.outputs_id_map[name] = util.upper_dict(util.sql_read_table('OtherIndexesData', ['id', 'name'], other_index_id=id, return_unique=True))
def create_empty_shapes(self): for id in util.sql_read_table(self.sql_id_table, column_names='id', return_unique=True, return_iterable=True): self.data[id] = Shape(id)
def read_timeseries_data(self, data_column_names='value', **filters): # This function needs to be sped up """reads timeseries data to dataframe from database. Stored in self.raw_values""" # rowmap is used in ordering the data when read from the sql table headers = util.sql_read_headers(self.sql_data_table) filters[self.data_id_key] = self.id # Check for a sensitivity specification for this table and id. If there is no relevant sensitivity specified # but the data table has a sensitivity column, we set the sensitivity filter to "None", which will filter # the data table rows down to those where sensitivity is NULL, which is the default, no-sensitivity condition. if 'sensitivity' in headers: filters['sensitivity'] = None if hasattr(self, 'scenario'): # Note that this will return None if the scenario doesn't specify a sensitivity for this table and id filters['sensitivity'] = self.scenario.get_sensitivity(self.sql_data_table, self.id) # read each line of the data_table matching an id and assign the value to self.raw_values read_data = util.sql_read_table(self.sql_data_table, return_iterable=True, **filters) self.inspect_index_levels(headers, read_data) self._validate_other_indexes(headers, read_data) rowmap = [headers.index(self.column_names[level]) for level in self.index_levels] data_col_ind = [headers.index(data_col) for data_col in util.put_in_list(data_column_names)] unit_prefix = self.unit_prefix if hasattr(self, 'unit_prefix') else 1 if read_data: data = [] for row in read_data: try: data.append([row[i] for i in rowmap] + [row[i] * unit_prefix for i in data_col_ind]) except: logging.warning('error reading table: {}, row: {}'.format(self.sql_data_table, row)) raise column_names = self.df_index_names + util.put_in_list(data_column_names) self.raw_values = pd.DataFrame(data, columns=column_names).set_index(keys=self.df_index_names).sort() # print the duplicate values duplicate_index = self.raw_values.index.duplicated(keep=False) #keep = False keeps all of the duplicate indices if any(duplicate_index): logging.warning('Duplicate indices in table: {}, parent id: {}, by default the first index will be kept.'.format(self.sql_data_table, self.id)) logging.warning(self.raw_values[duplicate_index]) self.raw_values = self.raw_values.groupby(level=self.raw_values.index.names).first() else: self.raw_values = None # We didn't find any timeseries data for this object, so now we want to let the user know if that # might be a problem. We only expect to find timeseries data if self actually existed in the database # (as opposed to being a placeholder). The existence of self in the database is flagged by self.data. if self.data: if getattr(self, 'reference_tech_id', None): logging.debug('No {} found for {} with id {}; using reference technology values instead.'.format( self.sql_data_table, self.sql_id_table, self.id )) else: msg = 'No {} or reference technology found for {} with id {}.'.format( self.sql_data_table, self.sql_id_table, self.id ) if re.search("Cost(New|Replacement)?Data$", self.sql_data_table): # The model can run fine without cost data and this is sometimes useful during model # development so we just gently note if cost data is missing. logging.debug(msg) else: # Any other missing data is likely to be a real problem so we complain logging.critical(msg)
def init_outputs_id_map(): global outputs_id_map primary_geography_name = geo.get_primary_geography_name() dispatch_geography_name = geo.get_dispatch_geography_name() outputs_id_map[primary_geography_name] = util.upper_dict( geo.geography_names.items()) outputs_id_map[primary_geography_name + "_supply"] = outputs_id_map[primary_geography_name] outputs_id_map[primary_geography_name + "_input"] = outputs_id_map[primary_geography_name] outputs_id_map[primary_geography_name + "_output"] = outputs_id_map[primary_geography_name] outputs_id_map[dispatch_geography_name] = outputs_id_map[ primary_geography_name] outputs_id_map['demand_technology'] = util.upper_dict( util.sql_read_table('DemandTechs', ['id', 'name'])) outputs_id_map['supply_technology'] = util.upper_dict( util.sql_read_table('SupplyTechs', ['id', 'name'])) outputs_id_map['final_energy'] = util.upper_dict( util.sql_read_table('FinalEnergy', ['id', 'name'])) outputs_id_map['supply_node'] = util.upper_dict( util.sql_read_table('SupplyNodes', ['id', 'name'])) outputs_id_map['blend_node'] = util.upper_dict( util.sql_read_table('SupplyNodes', ['id', 'name'])) outputs_id_map['input_node'] = util.upper_dict( util.sql_read_table('SupplyNodes', ['id', 'name'])) outputs_id_map['supply_node_output'] = outputs_id_map['supply_node'] outputs_id_map['supply_node_input'] = outputs_id_map['supply_node'] outputs_id_map['supply_node_export'] = util.upper_dict( util.sql_read_table('SupplyNodes', ['id', 'name']), " EXPORT") outputs_id_map['subsector'] = util.upper_dict( util.sql_read_table('DemandSubsectors', ['id', 'name'])) outputs_id_map['demand_sector'] = util.upper_dict( util.sql_read_table('DemandSectors', ['id', 'name'])) outputs_id_map['sector'] = outputs_id_map['demand_sector'] outputs_id_map['ghg'] = util.upper_dict( util.sql_read_table('GreenhouseGases', ['id', 'name'])) outputs_id_map['driver'] = util.upper_dict( util.sql_read_table('DemandDrivers', ['id', 'name'])) outputs_id_map['dispatch_feeder'] = util.upper_dict( util.sql_read_table('DispatchFeeders', ['id', 'name'])) outputs_id_map['dispatch_feeder'][0] = 'BULK' outputs_id_map['other_index_1'] = util.upper_dict( util.sql_read_table('OtherIndexesData', ['id', 'name'])) outputs_id_map['other_index_2'] = util.upper_dict( util.sql_read_table('OtherIndexesData', ['id', 'name'])) for id, name in util.sql_read_table('OtherIndexes', ('id', 'name'), return_iterable=True): if name in ('demand_technology', 'final_energy'): continue outputs_id_map[name] = util.upper_dict( util.sql_read_table('OtherIndexesData', ['id', 'name'], other_index_id=id, return_unique=True))