def cache_database_table(self, table_name, base_year, database, in_storage, config): """Copy this table from input database into attribute cache. """ logger.start_block('Caching table %s' % table_name) try: #TODO: why is the config being modified...seems like its kind of useless here... config['storage_location'] = os.path.join(config['cache_directory'], str(base_year), table_name) if not os.path.exists(config['storage_location']): flt_storage = StorageFactory().get_storage( type='flt_storage', subdir='store', storage_location=config['storage_location']) table = database.get_table(table_name) id_name = [primary_key.name.lower() for primary_key in table.primary_key] dataset = Dataset(resources=config, in_storage=in_storage, out_storage=flt_storage, in_table_name=table_name, id_name = id_name) nchunks = config['creating_baseyear_cache_configuration'].tables_to_cache_nchunks.get(table_name, 1) current_time = SimulationState().get_current_time() SimulationState().set_current_time(base_year) dataset.load_dataset(nchunks=nchunks, flush_after_each_chunk=True) SimulationState().set_current_time(current_time) else: logger.log_status(config['storage_location'] + " already exits; skip caching " + table_name) finally: logger.end_block()
def __init__(self, resources=None, in_storage=None, out_storage=None, in_table_name=None, attributes=None, out_table_name=None, id_name=None, nchunks=None, other_in_table_names=None, debuglevel=0): debug = DebugPrinter(debuglevel) debug.print_debug("Creating DevelopmentGroupDataset object.",2) resources = ResourceCreatorDevelopmentGroups().get_resources_for_dataset( resources = resources, in_storage = in_storage, out_storage = out_storage, in_table_name = in_table_name, out_table_name = out_table_name, attributes = attributes, id_name = id_name, id_name_default = self.id_name_default, nchunks = nchunks, debug = debug ) Dataset.__init__(self,resources = resources) if isinstance(other_in_table_names,list): for place_name in other_in_table_names: #load other tables ds = Dataset(resources = resources) ds.load_dataset(in_table_name=place_name) self.connect_datasets(ds)
def get_households_for_estimation(agent_set, in_storage, agents_for_estimation_table_name, exclude_condition=None, join_datasets=True): estimation_set = Dataset(in_storage = in_storage, in_table_name=agents_for_estimation_table_name, id_name=agent_set.get_id_name(), dataset_name=agent_set.get_dataset_name()) agent_set.unload_primary_attributes() agent_set.load_dataset(attributes='*') estimation_set.load_dataset(attributes=agent_set.get_primary_attribute_names()) if join_datasets: agent_set.join_by_rows(estimation_set, require_all_attributes=False, change_ids_if_not_unique=True) index = arange(agent_set.size()-estimation_set.size(),agent_set.size()) else: index = agent_set.get_id_index(estimation_set.get_id_attribute()) exclude_ids = [] if exclude_condition is not None: exclude_ids = agent_set.get_id_attribute()[where(agent_set.compute_variables(exclude_condition))] for id in exclude_ids: minus = agent_set.get_id_index(id) if minus in index: index = index[index != minus] return (agent_set, index)
def __init__(self, resources=None, in_storage=None, out_storage=None, in_table_name=None, out_table_name=None, attributes=None, id_name=None, nchunks=None, other_in_table_names=None, debuglevel=0 ): debug = DebugPrinter(debuglevel) debug.print_debug("Creating EmploymentSectorGroupDataset object.",2) resources = ResourceCreatorEmploymentSectorGroups().get_resources_for_dataset( resources = resources, in_storage = in_storage, out_storage = out_storage, in_table_name = in_table_name, out_table_name = out_table_name, attributes = attributes, id_name = id_name, id_name_default = self.id_name_default, nchunks = nchunks, debug = debug, ) Dataset.__init__(self,resources = resources) if isinstance(other_in_table_names,list): for place_name in other_in_table_names: #load other tables ds = Dataset(resources = resources) ds.load_dataset(in_table_name=place_name) self.connect_datasets(ds)
def flush_dataset(self): Dataset.flush_dataset(self) group_dataset = Dataset(in_storage=self.resources["in_storage"], in_table_name=self.resources["in_table_name_groups"], id_name=[self.get_id_name()[0], self.group_id_name]) group_dataset.load_dataset() group_dataset.flush_dataset()
def get_households_for_estimation(agent_set, in_storage, agents_for_estimation_table_name, exclude_condition=None, join_datasets=True): estimation_set = Dataset(in_storage=in_storage, in_table_name=agents_for_estimation_table_name, id_name=agent_set.get_id_name(), dataset_name=agent_set.get_dataset_name()) agent_set.unload_primary_attributes() agent_set.load_dataset(attributes='*') estimation_set.load_dataset( attributes=agent_set.get_primary_attribute_names()) if join_datasets: agent_set.join_by_rows(estimation_set, require_all_attributes=False, change_ids_if_not_unique=True) index = arange(agent_set.size() - estimation_set.size(), agent_set.size()) else: index = agent_set.get_id_index(estimation_set.get_id_attribute()) exclude_ids = [] if exclude_condition is not None: exclude_ids = agent_set.get_id_attribute()[where( agent_set.compute_variables(exclude_condition))] for id in exclude_ids: minus = agent_set.get_id_index(id) if minus in index: index = index[index != minus] return (agent_set, index)
def flush_dataset(self): Dataset.flush_dataset(self) group_dataset = Dataset( in_storage=self.resources["in_storage"], in_table_name=self.resources["in_table_name_groups"], id_name=[self.get_id_name()[0], self.group_id_name]) group_dataset.load_dataset() group_dataset.flush_dataset()
def _write_data_to_year(self, data, cache_dir, year): """Writes this data to this year of the cache. Returns dataset. """ # Import in unit test, so that circular dependency is avoided. from opus_core.datasets.dataset import Dataset SimulationState().set_cache_directory(cache_dir) storage = dict_storage() storage.write_table(table_name=self._table_name, table_data=data) ds = Dataset(id_name=self._id_name, in_storage=storage, in_table_name=self._table_name) ds.load_dataset() self._write_dataset_to_cache(ds, cache_dir, year)
def __init__(self, resources=None, other_in_table_names=None, use_groups=True, ): Dataset.__init__(self,resources = resources) if isinstance(other_in_table_names,list): for place_name in other_in_table_names: #load other tables ds = Dataset(resources = resources) ds.load_dataset(in_table_name=place_name) self.connect_datasets(ds) if use_groups: self.groups = self._get_groups()
def __init__( self, resources=None, other_in_table_names=None, use_groups=True, ): Dataset.__init__(self, resources=resources) if isinstance(other_in_table_names, list): for place_name in other_in_table_names: #load other tables ds = Dataset(resources=resources) ds.load_dataset(in_table_name=place_name) self.connect_datasets(ds) if use_groups: self.groups = self._get_groups()
def _write_data_to_year(self, data, cache_dir, year): """Writes this data to this year of the cache. Returns dataset. """ # Import in unit test, so that circular dependency is avoided. from opus_core.datasets.dataset import Dataset SimulationState().set_cache_directory(cache_dir) storage = dict_storage() storage.write_table( table_name=self._table_name, table_data=data, ) ds = Dataset(id_name=self._id_name, in_storage=storage, in_table_name=self._table_name) ds.load_dataset() self._write_dataset_to_cache(ds, cache_dir, year)
def cache_database_table(self, table_name, base_year, database, in_storage, config): """Copy this table from input database into attribute cache. """ logger.start_block('Caching table %s' % table_name) try: #TODO: why is the config being modified...seems like its kind of useless here... config['storage_location'] = os.path.join( config['cache_directory'], str(base_year), table_name) if not os.path.exists(config['storage_location']): flt_storage = StorageFactory().get_storage( type='flt_storage', subdir='store', storage_location=config['storage_location']) table = database.get_table(table_name) id_name = [ primary_key.name.lower() for primary_key in table.primary_key ] dataset = Dataset(resources=config, in_storage=in_storage, out_storage=flt_storage, in_table_name=table_name, id_name=id_name) nchunks = config[ 'creating_baseyear_cache_configuration'].tables_to_cache_nchunks.get( table_name, 1) current_time = SimulationState().get_current_time() SimulationState().set_current_time(base_year) dataset.load_dataset(nchunks=nchunks, flush_after_each_chunk=True) SimulationState().set_current_time(current_time) else: logger.log_status(config['storage_location'] + " already exits; skip caching " + table_name) finally: logger.end_block()
us_path = urbansim.__path__[0] from opus_core.storage_factory import StorageFactory storage = StorageFactory().get_storage('tab_storage', storage_location = os.path.join(us_path, "data/tutorial")) from opus_core.datasets.dataset import Dataset households = Dataset(in_storage = storage, in_table_name = 'households', id_name='household_id', dataset_name='household') households.get_attribute_names() households.get_id_attribute() households.size() households.get_attribute("income") households.get_attribute_names() households.load_dataset() households.get_attribute_names() #households.plot_histogram("income", bins = 10) #households.r_histogram("income") #households.r_scatter("persons", "income") households.correlation_coefficient("persons", "income") households.correlation_matrix(["persons", "income"]) households.summary() households.add_primary_attribute(data=[4,6,9,2,4,8,2,1,3,2], name="location") households.get_attribute_names() households.modify_attribute(name="location", data=[0,0], index=[0,1]) households.get_attribute("location") households.get_data_element_by_id(5).location #households.write_dataset(out_storage=storage, out_table_name="households_output")
def _proxy_for_get_attribute(name): try: return native_get_attribute(name) except NameError: if not isinstance(name, VariableName): name = VariableName(name) short_name = name.get_alias() current_year = SimulationState().get_current_time() if short_name in dataset.exogenous_attribute_names.keys(): exogenous_table_name = dataset.exogenous_attribute_names[short_name] temporary_dataset = Dataset(in_storage=dataset.resources['in_storage'], in_table_name=exogenous_table_name, id_name='id') if ('year' not in dataset.get_attribute_names() or not self.attribute_boxes['year'].is_in_memory()): # Load the data into a temporary dataset because we # don't want dataset to save the values we retrieve, # since then we can't filter them by year. temporary_dataset.load_dataset( nchunks = 1, attributes = [short_name, 'year', 'base_table_id'], in_table_name = exogenous_table_name ) else: raise # re-raise NameError exogenous_data = temporary_dataset.attribute_boxes[short_name].get_data() year_data = temporary_dataset.attribute_boxes['year'].get_data() base_table_id_data = temporary_dataset.attribute_boxes['base_table_id'].get_data() exogenous_table_data = zip(exogenous_data, year_data, base_table_id_data) exogenous_attribute_values = [_attribute for _attribute, _year, _base_table_id in exogenous_table_data if _year == current_year] exogenous_base_table_ids = [_base_table_id for _attribute, _year, _base_table_id in exogenous_table_data if _year == current_year] base_table_ids = native_get_attribute(dataset.resources['id_name']) exogenous_attributes_by_base_table_id = {} for base_table_id, value in zip(exogenous_base_table_ids, exogenous_attribute_values): try: exogenous_attributes_by_base_table_id[base_table_id] except: exogenous_attributes_by_base_table_id[base_table_id] = value else: raise AttributeError("Duplicate data for base_table_id " "'%s', year %s." % (base_table_id, current_year)) result = [None]*len(base_table_ids) for index in range(len(base_table_ids)): try: result[index] = exogenous_attributes_by_base_table_id[base_table_ids[index]] except KeyError: raise AttributeError("Missing exogenous data for " "base_table_id '%s', year %s." % (base_table_ids[index], current_year)) return result
def _proxy_for_get_attribute(name): try: return native_get_attribute(name) except NameError: if not isinstance(name, VariableName): name = VariableName(name) short_name = name.get_alias() current_year = SimulationState().get_current_time() if short_name in dataset.exogenous_attribute_names.keys(): exogenous_table_name = dataset.exogenous_attribute_names[ short_name] temporary_dataset = Dataset( in_storage=dataset.resources['in_storage'], in_table_name=exogenous_table_name, id_name='id') if ('year' not in dataset.get_attribute_names() or not self.attribute_boxes['year'].is_in_memory()): # Load the data into a temporary dataset because we # don't want dataset to save the values we retrieve, # since then we can't filter them by year. temporary_dataset.load_dataset( nchunks=1, attributes=[short_name, 'year', 'base_table_id'], in_table_name=exogenous_table_name) else: raise # re-raise NameError exogenous_data = temporary_dataset.attribute_boxes[ short_name].get_data() year_data = temporary_dataset.attribute_boxes['year'].get_data( ) base_table_id_data = temporary_dataset.attribute_boxes[ 'base_table_id'].get_data() exogenous_table_data = zip(exogenous_data, year_data, base_table_id_data) exogenous_attribute_values = [ _attribute for _attribute, _year, _base_table_id in exogenous_table_data if _year == current_year ] exogenous_base_table_ids = [ _base_table_id for _attribute, _year, _base_table_id in exogenous_table_data if _year == current_year ] base_table_ids = native_get_attribute( dataset.resources['id_name']) exogenous_attributes_by_base_table_id = {} for base_table_id, value in zip(exogenous_base_table_ids, exogenous_attribute_values): try: exogenous_attributes_by_base_table_id[base_table_id] except: exogenous_attributes_by_base_table_id[ base_table_id] = value else: raise AttributeError( "Duplicate data for base_table_id " "'%s', year %s." % (base_table_id, current_year)) result = [None] * len(base_table_ids) for index in range(len(base_table_ids)): try: result[index] = exogenous_attributes_by_base_table_id[ base_table_ids[index]] except KeyError: raise AttributeError( "Missing exogenous data for " "base_table_id '%s', year %s." % (base_table_ids[index], current_year)) return result
us_path = urbansim.__path__[0] from opus_core.storage_factory import StorageFactory storage = StorageFactory().get_storage('tab_storage', storage_location = os.path.join(us_path, "data/tutorial")) from opus_core.datasets.dataset import Dataset households = Dataset(in_storage = storage, in_table_name = 'households', id_name='household_id', dataset_name='household') households.get_attribute_names() households.get_id_attribute() households.size() households.get_attribute("income") households.get_attribute_names() households.load_dataset() households.get_attribute_names() #households.plot_histogram("income", bins = 10) #households.r_histogram("income") #households.r_scatter("persons", "income") households.correlation_coefficient("persons", "income") households.correlation_matrix(["persons", "income"]) households.summary() households.add_primary_attribute(data=[4,6,9,2,4,8,2,1,3,2], name="location") households.get_attribute_names() households.modify_attribute(name="location", data=[0,0], index=[0,1]) households.get_attribute("location") households.get_data_element_by_id(5).location #households.write_dataset(out_storage=storage, out_table_name="households_output")