def run(self, config, year, *args, **kwargs): """This is the main entry point. It gets the appropriate configuration info from the travel_model_configuration part of this config, and then copies the specified UrbanSim data into files for travel mdel to read. """ cache_directory = config['cache_directory'] simulation_state = SimulationState() simulation_state.set_cache_directory(cache_directory) simulation_state.set_current_time(year) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration(new_instance=True, package_order=config['dataset_pool_configuration'].package_order, in_storage=attribute_cache).get_dataset_pool() #cache_storage = AttributeCache().get_flt_storage_for_year(year_for_base_year_cache) #datasets = DatasetFactory().create_datasets_from_flt(config.get('datasets_to_preload',{}), #"urbansim", #additional_arguments={'in_storage': attribute_cache}) zone_set = dataset_pool.get_dataset('travel_zone') self.prepare_for_run(config['travel_model_configuration'], year) self.create_travel_model_input_file(config=config, year=year, zone_set=zone_set, datasets=dataset_pool, *args, **kwargs)
def __init__(self, config): ss = SimulationState(new_instance=True) ss.set_current_time(config['base_year']) ss.set_cache_directory(config['cache_directory']) SessionConfiguration(new_instance=True, package_order=config['dataset_pool_configuration'].package_order, in_storage=AttributeCache()) #if not os.path.exists(config['cache_directory']): ## if cache exists, it will automatically skip cacher = CreateBaseyearCache() cache_dir = cacher.run(config) if 'estimation_database_configuration' in config: db_server = DatabaseServer(config['estimation_database_configuration']) db = db_server.get_database(config['estimation_database_configuration'].database_name) out_storage = StorageFactory().get_storage( 'sql_storage', storage_location = db) else: output_cache = os.path.join(config['cache_directory'], str(config['base_year']+1)) out_storage = StorageFactory().get_storage('flt_storage', storage_location=output_cache) dataset_pool = SessionConfiguration().get_dataset_pool() households = dataset_pool.get_dataset("household") buildings = dataset_pool.get_dataset("building") zones = dataset_pool.get_dataset("zone") zone_ids = zones.get_id_attribute() capacity_attribute_name = "residential_units" #_of_use_id_%s" % id capacity_variable_name = "%s=sanfrancisco.zone.aggregate_%s_from_building" % \ (capacity_attribute_name, capacity_attribute_name) buildings.compute_variables("sanfrancisco.building.zone_id", dataset_pool=dataset_pool) zones.compute_variables(capacity_variable_name, dataset_pool=dataset_pool) building_zone_id = buildings.get_attribute('zone_id') # is_household_unplace = datasets['household'].get_attribute("building_id") <= 0 is_household_unplaced = 1 #all households are unplaced household_building_id = zeros(households.size(), dtype='int32')-1 #datasets['household'].get_attribute("building_id") for zone_id in zone_ids: capacity = zones.get_attribute_by_id(capacity_attribute_name, zone_id) is_household_in_this_zone = (households.get_attribute('zone_id') == zone_id) is_unplaced_household_in_this_zone = is_household_in_this_zone * is_household_unplaced is_building_in_this_zone = (building_zone_id == zone_id) # if not is_household_in_this_zone.sum() <= capacity: if capacity == 0 or is_household_in_this_zone.sum()==0: print "WARNING: zone %s has %s households but only %s units" % (zone_id, is_household_in_this_zone.sum(), capacity) continue prob = buildings.get_attribute(capacity_attribute_name) * is_building_in_this_zone / array(capacity, dtype=float64) r = random(sum(is_unplaced_household_in_this_zone)) prob_cumsum = ncumsum(prob) index_to_bldg = searchsorted(prob_cumsum, r) household_building_id[where(is_unplaced_household_in_this_zone)] = buildings.get_attribute_by_index('building_id', index_to_bldg) # import pdb;pdb.set_trace() households.set_values_of_one_attribute('building_id', household_building_id) households.write_dataset(out_table_name='households', out_storage=out_storage)
def __init__(self, config): if 'estimation_database_configuration' in config: db_server = DatabaseServer(config['estimation_database_configuration']) db = db_server.get_database(config['estimation_database_configuration'].database_name) out_storage = StorageFactory().build_storage_for_dataset( type='sql_storage', storage_location=db) else: out_storage = StorageFactory().get_storage(type='flt_storage', storage_location=os.path.join(config['cache_directory'], str(config['base_year']+1))) simulation_state = SimulationState() simulation_state.set_cache_directory(config['cache_directory']) simulation_state.set_current_time(config['base_year']) attribute_cache = AttributeCache() SessionConfiguration(new_instance=True, package_order=config['dataset_pool_configuration'].package_order, in_storage=attribute_cache) if not os.path.exists(os.path.join(config['cache_directory'], str(config['base_year']))): #raise RuntimeError, "datasets uncached; run prepare_estimation_data.py first" CacheScenarioDatabase().run(config, unroll_gridcells=False) for dataset_name in config['datasets_to_preload']: SessionConfiguration().get_dataset_from_pool(dataset_name) households = SessionConfiguration().get_dataset_from_pool("household") household_ids = households.get_id_attribute() workers = households.get_attribute("workers") hh_ids = [] member_ids = [] is_worker = [] job_ids = [] for i in range(households.size()): if workers[i] > 0: hh_ids += [household_ids[i]] * workers[i] member_ids += range(1, workers[i]+1) is_worker += [1] * workers[i] job_ids += [-1] * workers[i] in_storage = StorageFactory().get_storage('dict_storage') persons_table_name = 'persons' in_storage.write_table( table_name=persons_table_name, table_data={ 'person_id':arange(len(hh_ids))+1, 'household_id':array(hh_ids), 'member_id':array(member_ids), 'is_worker':array(is_worker), 'job_id':array(job_ids), }, ) persons = PersonDataset(in_storage=in_storage, in_table_name=persons_table_name) persons.write_dataset(out_storage=out_storage, out_table_name=persons_table_name)
class RunSimulationFromMysql: def prepare_for_simulation(self, run_configuration, cache_directory=None): self.config = Resources(run_configuration) self.simulation_state = SimulationState( new_instance=True, base_cache_dir=cache_directory, start_time=self.config.get("base_year", 0) ) ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. if self.config["cache_directory"] is None: self.config["cache_directory"] = self.simulation_state.get_cache_directory() SessionConfiguration( new_instance=True, package_order=self.config["dataset_pool_configuration"].package_order, in_storage=AttributeCache(), ) ForkProcess().fork_new_process( self.config["creating_baseyear_cache_configuration"].cache_scenario_database, self.config ) # Create output database (normally done by run manager) if "estimation_database_configuration" in self.config: db_server = DatabaseServer(self.config["estimation_database_configuration"]) if not db_server.has_database(self.config["estimation_database_configuration"].database_name): db_server.create_database(self.config["estimation_database_configuration"].database_name) def run_simulation(self, simulation_instance=None): logger.start_block("Simulation on database %s" % self.config["scenario_database_configuration"].database_name) try: if simulation_instance is None: simulation_instance = ModelSystem() simulation_instance.run(self.config) # simulation_instance.run_multiprocess(self.config, is_run_subset=True) finally: logger.end_block() logger.log_status("Data cache in %s" % self.simulation_state.get_cache_directory()) def cleanup(self, remove_cache, remove_output_database): """Remove all outputs of this simulation.""" self.simulation_state.remove_singleton(delete_cache=remove_cache) # Remove SessionConfiguration singleton, if it exists Singleton().remove_singleton_for_class(SessionConfiguration) cache_dir = self.config["cache_directory"] if os.path.exists(cache_dir): rmtree(cache_dir) if remove_output_database and ("estimation_database_configuration" in self.config): db_server = DatabaseServer(self.config["estimation_database_configuration"]) db_server.drop_database(self.config["estimation_database_configuration"].database_name) def prepare_and_run(self, run_configuration, simulation_instance=None, remove_cache=True): self.prepare_for_simulation(run_configuration) self.run_simulation(simulation_instance) self.cleanup(remove_cache)
def run(self, year, cache_directory=None): """The class is initialized with the appropriate configuration info from the travel_model_configuration part of this config, and then copies the specified UrbanSim data into files for daysim to read. The variables/expressions to export are defined in the node travel_model_configuration/urbansim_to_tm_variable_mapping of the configuration file. """ if cache_directory is None: cache_directory = self.config['cache_directory'] simulation_state = SimulationState() simulation_state.set_cache_directory(cache_directory) simulation_state.set_current_time(year) attribute_cache = AttributeCache() sc = SessionConfiguration(new_instance=True, package_order=self.config['dataset_pool_configuration'].package_order, in_storage=attribute_cache) dataset_pool = sc.get_dataset_pool() tm_config = self.config['travel_model_configuration'] data_to_export = tm_config['urbansim_to_tm_variable_mapping'] table_names = data_to_export.keys() variable_names = {} datasets = {} filenames = {} in_table_names = {} for table_name in table_names: filter = data_to_export[table_name].get('__filter__', None) if filter is not None: del data_to_export[table_name]['__filter__'] out_table_name = data_to_export[table_name].get('__out_table_name__', None) if out_table_name is not None: del data_to_export[table_name]['__out_table_name__'] else: out_table_name = table_name variables_to_export = map(lambda alias: "%s = %s" % (alias, data_to_export[table_name][alias]), data_to_export[table_name].keys()) dataset_name = None for var in variables_to_export: var_name = VariableName(var) if dataset_name is None: dataset_name = var_name.get_dataset_name() ds = dataset_pool.get_dataset(dataset_name) datasets[dataset_name] = ds filenames[dataset_name] = out_table_name in_table_names[dataset_name] = table_name if dataset_name not in variable_names.keys(): variable_names[dataset_name] = [] variable_names[dataset_name].append(var_name.get_alias()) ds.compute_variables([var_name], dataset_pool=dataset_pool) if filter is not None: filter_idx = where(ds.compute_variables(["__filter__ = %s" % filter], dataset_pool=dataset_pool)>0)[0] ds = DatasetSubset(ds, index = filter_idx) datasets[dataset_name] = ds return self._call_input_file_writer(year, datasets, in_table_names, filenames, variable_names, dataset_pool)
def test_doing_refinements_from_specified_refinement_dataset(self): self.prepare_cache() os.system("%(python)s %(script)s -c %(cache_directory)s -s %(start_year)s -e %(end_year)s --refinements-directory=%(refinement_directory)s" % {'python': sys.executable, 'script': self.script, 'cache_directory': self.cache_dir, 'start_year': 2021, 'end_year': 2022, 'refinement_directory': os.path.join(self.cache_dir, '2000')} ) simulation_state = SimulationState() ## test refinement for 2021 simulation_state.set_current_time(2021) jobs = self.dataset_pool.get_dataset('job') buildings = self.dataset_pool.get_dataset('building') jobs13_raz3 = jobs.compute_variables('numpy.logical_and(job.sector_id==13, job.disaggregate(parcel.raz_id==3, intermediates=[building]))', dataset_pool=self.dataset_pool) jobs13_raz4 = jobs.compute_variables('numpy.logical_and(job.sector_id==13, job.disaggregate(parcel.raz_id, intermediates=[building])==4)', dataset_pool=self.dataset_pool) jobs13_raz5 = jobs.compute_variables('numpy.logical_and(job.sector_id==13, job.disaggregate(parcel.raz_id, intermediates=[building])==5 )', dataset_pool=self.dataset_pool) jobs_raz5 = jobs.compute_variables('job.disaggregate(parcel.raz_id, intermediates=[building])==5', dataset_pool=self.dataset_pool) #check results self.assertEqual(jobs13_raz3.sum(), 0) self.assertEqual(jobs13_raz4.sum(), 0) self.assertEqual(jobs13_raz5.sum() >= 5, True) self.assertEqual(jobs_raz5.sum(), 7) expected_nr_sqft = array([6, 0, 3, 6, 1, 6, 5, 0]) ## was array([6, 2, 3, 6, 1, 2, 5, 0]), self.assert_(allclose(buildings.get_attribute('non_residential_sqft'), expected_nr_sqft)) self.dataset_pool.remove_all_datasets() ## test refinement for 2022 simulation_state.set_current_time(2022) hhs = self.dataset_pool.get_dataset('household') buildings = self.dataset_pool.get_dataset('building') hhs_raz6 = hhs.compute_variables('household.disaggregate(building.disaggregate(parcel.raz_id)==6)', dataset_pool=self.dataset_pool) hhs_bldg = buildings.compute_variables('building.number_of_agents(household)', dataset_pool=self.dataset_pool) #check results self.assertEqual(hhs_raz6.sum(), 7) self.assert_(hhs_bldg.sum(), 7 ) self.assert_((hhs_bldg!=0).sum(), 2) self.assert_(buildings.get_attribute('residential_units').sum(), 7) self.dataset_pool.remove_all_datasets()
def run(self, year, skim_directory=None): """ It gets the appropriate values from the travel_model_configuration part of this config, and then copies the specified data into the specified travel_data variable names. Results in a new travel_data cache for year+1. """ cache_directory = self.config['cache_directory'] simulation_state = SimulationState() simulation_state.set_current_time(year) simulation_state.set_cache_directory(cache_directory) year_config = self.config['travel_model_configuration'][year] self.write_travel_data(year, cache_directory)
def _initialize_previous_years(self, attribute_cache, base_year, creating_baseyear_cache_configuration): simulation_state = SimulationState() cache_directory = simulation_state.get_cache_directory() baseyear_cache_path = os.path.join(cache_directory, str(base_year)) for table_name, year in creating_baseyear_cache_configuration.tables_to_copy_to_previous_years.iteritems(): year_cache_path = os.path.join(cache_directory, str(year)) dest_file_path = os.path.join(year_cache_path, table_name) if os.path.exists(dest_file_path): rmtree(dest_file_path) copytree(os.path.join(baseyear_cache_path, table_name), dest_file_path)
def setup_environment(cache_directory, year, package_order, additional_datasets={}): gc.collect() ss = SimulationState(new_instance=True) ss.set_cache_directory(cache_directory) ss.set_current_time(year) ac = AttributeCache() storage = ac.get_flt_storage_for_year(year) sc = SessionConfiguration(new_instance=True, package_order=package_order, in_storage=ac) logger.log_status("Setup environment for year %s. Use cache directory %s." % (year, storage.get_storage_location())) dp = sc.get_dataset_pool() for name, ds in additional_datasets.iteritems(): dp.replace_dataset(name, ds) return dp
def prepare_for_simulation(self, run_configuration, cache_directory=None): self.config = Resources(run_configuration) self.simulation_state = SimulationState( new_instance=True, base_cache_dir=cache_directory, start_time=self.config.get("base_year", 0) ) ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. if self.config["cache_directory"] is None: self.config["cache_directory"] = self.simulation_state.get_cache_directory() SessionConfiguration( new_instance=True, package_order=self.config["dataset_pool_configuration"].package_order, in_storage=AttributeCache(), ) ForkProcess().fork_new_process( self.config["creating_baseyear_cache_configuration"].cache_scenario_database, self.config ) # Create output database (normally done by run manager) if "estimation_database_configuration" in self.config: db_server = DatabaseServer(self.config["estimation_database_configuration"]) if not db_server.has_database(self.config["estimation_database_configuration"].database_name): db_server.create_database(self.config["estimation_database_configuration"].database_name)
def setUp(self): self.start_year = 2001 self.expected_sic_data = array([6,4,7,808,6]) self.job_id = array([1,2,3,4,5]) self.base_cache_dir = tempfile.mkdtemp(prefix='opus_tmp_test_dataset') self.simulation_state = SimulationState(low_memory_run=True, new_instance=True, base_cache_dir=self.base_cache_dir) self.dir = self.simulation_state.get_cache_directory() self.simulation_state.set_current_time(self.start_year) if not os.path.exists(self.dir): os.makedirs(self.dir) self.in_storage = StorageFactory().get_storage('dict_storage') self.in_storage.write_table( table_name='jobs', table_data={ 'grid_id':array([10,20,30,40,50]), 'job_id':self.job_id, }, ) self.out_storage = StorageFactory().get_storage('dict_storage') self.job_set_resources = ResourceFactory().get_resources_for_dataset( 'job', in_storage = self.in_storage, out_storage = self.out_storage, in_table_name_pair = ('jobs',None), out_table_name_pair = ('jobs_exported',None), attributes_pair = (None,AttributeType.PRIMARY), id_name_pair = ('job_id','job_id'), nchunks_pair = (1,1), debug_pair = (1,None) )
def setUp(self): self.config = TestCacheConfiguration() self.simulation_state = SimulationState(new_instance=True) SessionConfiguration(self.config, new_instance=True, package_order=['urbansim', 'opus_core'], in_storage=AttributeCache()) self.base_year = self.config['base_year'] creating_baseyear_cache_configuration = self.config['creating_baseyear_cache_configuration'] self.simulation_state.set_current_time(self.base_year) cache_directory = self.simulation_state.get_cache_directory() copytree(os.path.join(creating_baseyear_cache_configuration.baseyear_cache.existing_cache_to_copy, str(self.base_year)), os.path.join(cache_directory, str(self.base_year))) cacher = CacheScenarioDatabase() cacher.prepare_data_before_baseyear(cache_directory, self.base_year, creating_baseyear_cache_configuration) self.config['cache_directory'] = cache_directory cache_storage = AttributeCache().get_flt_storage_for_year(self.base_year) cache_directory = self.simulation_state.get_cache_directory() flt_directory = os.path.join(cache_directory, str(self.base_year)) self.gridcell = DatasetFactory().get_dataset('gridcell', package='urbansim', subdir='datasets', arguments={'in_storage':StorageFactory().get_storage('flt_storage', storage_location=flt_directory)} )
def run(self, year): """Like its parent, but report files have different format and there are no banks. Zones are assumed to have no gaps. """ cache_directory = self.config['cache_directory'] simulation_state = SimulationState() simulation_state.set_current_time(year) simulation_state.set_cache_directory(cache_directory) year_config = self.config['travel_model_configuration'][year] bank_path = os.path.sep.join([self.get_emme2_base_dir()] + self.config['travel_model_configuration'][year]['bank']) for path, variable_dict in year_config['matrix_variable_map'].iteritems(): path_name = os.path.sep.join([bank_path] + path.split('.')) self.get_needed_matrices_from_emme4(year, year_config['cache_directory'], path_name, variable_dict)
def run(self, config, show_output = False): logger.log_status("Caching large SQL tables to: " + config['cache_directory']) self.show_output = show_output #import pydevd;pydevd.settrace() server_configuration = config['scenario_database_configuration'] scenario_database_manager = ScenarioDatabaseManager( server_configuration = server_configuration, base_scenario_database_name = server_configuration.database_name ) self.database_server = DatabaseServer(server_configuration) database_to_table_mapping = scenario_database_manager.get_database_to_table_mapping() self.tables_to_cache = config['creating_baseyear_cache_configuration'].tables_to_cache simulation_state = SimulationState() if 'low_memory_run' in config: simulation_state.set_low_memory_run(config['low_memory_run']) simulation_state.set_cache_directory(config['cache_directory']) simulation_state.set_current_time(config['base_year']) self.tables_cached = set() for database_name, tables in database_to_table_mapping.items(): self.cache_database_tables(config, database_name, tables) un_cached_tables = set(self.tables_to_cache) - self.tables_cached if un_cached_tables: logger.log_warning('The following requested tables were NOT cached:') for table_name in un_cached_tables: logger.log_warning('\t%s' % table_name)
def setUp(self): self.start_year = 2001 self.expected_sic_data = array([6,4,7,808,6]) self.job_id = array([1,2,3,4,5]) self.base_cache_dir = tempfile.mkdtemp(prefix='opus_tmp') self.simulation_state = SimulationState(low_memory_run=True, new_instance=True, base_cache_dir=self.base_cache_dir) self.dir = self.simulation_state.get_cache_directory() self.simulation_state.set_current_time(self.start_year) if not os.path.exists(self.dir): os.makedirs(self.dir)
def __init__(self, package_name, attribute_name, lag_offset, dataset_name, index_name): self.package_name = package_name self.attribute_name = attribute_name self.simulation_state = SimulationState() self.attribute_cache = AttributeCache() self.lag_offset = lag_offset self.dataset_name = dataset_name self.index_name = index_name self.lag_index_name = "%s_lag%d" % (index_name, lag_offset) Variable.__init__(self)
def run(self, year): """This is the main entry point. The class is initialized with the appropriate configuration info from the travel_model_configuration part of this config, and then copies the specified UrbanSim data into files for emme/2 to read. If households and jobs do not have a primary attribute zone_id, the entry 'locations_to_disaggregate' in the travel_model_configuration should be a list of dataset names over which the zone_id will be dissaggregated, ordered from higher to lower aggregation level, e.g. ['parcel', 'building'] """ cache_directory = self.config['cache_directory'] simulation_state = SimulationState() simulation_state.set_cache_directory(cache_directory) simulation_state.set_current_time(year) attribute_cache = AttributeCache() sc = SessionConfiguration(new_instance=True, package_order=self.config['dataset_pool_configuration'].package_order, in_storage=attribute_cache) dataset_pool = sc.get_dataset_pool() hh_set = dataset_pool.get_dataset('household') zone_set = dataset_pool.get_dataset('zone') job_set = dataset_pool.get_dataset('job') locations_to_disaggregate = self.config['travel_model_configuration']['locations_to_disaggregate'] len_locations_to_disaggregate = len(locations_to_disaggregate) if len_locations_to_disaggregate > 0: primary_location = locations_to_disaggregate[0] if len_locations_to_disaggregate > 1: intermediates_string = ", intermediates=[" for i in range(1, len_locations_to_disaggregate): intermediates_string = "%s%s, " % (intermediates_string, locations_to_disaggregate[i]) intermediates_string = "%s]" % intermediates_string else: intermediates_string = "" hh_set.compute_variables(['%s = household.disaggregate(%s.%s %s)' % (zone_set.get_id_name()[0], primary_location, zone_set.get_id_name()[0], intermediates_string)], dataset_pool=dataset_pool) job_set.compute_variables(['%s = job.disaggregate(%s.%s %s)' % (zone_set.get_id_name()[0], primary_location, zone_set.get_id_name()[0], intermediates_string)], dataset_pool=dataset_pool) return self._call_input_file_writer(year, dataset_pool)
class RunSimulation(object): def prepare_for_simulation(self, config, cache_directory=None): self.config = Resources(config) base_cache_dir = self.config['creating_baseyear_cache_configuration'].cache_directory_root self.simulation_state = SimulationState(new_instance=True, base_cache_dir=base_cache_dir, start_time=self.config.get('base_year', 0)) ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. if self.config['cache_directory'] is None: self.config['cache_directory'] = self.simulation_state.get_cache_directory() SessionConfiguration(new_instance=True, package_order=self.config['dataset_pool_configuration'].package_order, in_storage=AttributeCache()) if config['creating_baseyear_cache_configuration'].cache_from_database: ForkProcess().fork_new_process(self.config['creating_baseyear_cache_configuration'].cache_scenario_database, self.config) else: CacheFltData().run(self.config) def run_simulation(self, simulation_instance=None): if simulation_instance is None: simulation_instance = ModelSystem() simulation_instance.run(self.config) #simulation_instance.run_multiprocess(self.config, is_run_subset=True) logger.log_status("Data cache in %s" % self.simulation_state.get_cache_directory()) def cleanup(self, remove_cache=True): """Remove all outputs of this simulation.""" self.simulation_state.remove_singleton(delete_cache=remove_cache) SessionConfiguration().remove_singleton() if remove_cache: cache_dir = self.config['cache_directory'] if os.path.exists(cache_dir): rmtree(cache_dir) def prepare_and_run(self, run_configuration, simulation_instance=None, remove_cache=True): self.prepare_for_simulation(run_configuration) self.run_simulation(simulation_instance) self.cleanup(remove_cache)
def run(self, year): """Like its parent, but skims are stored locally in matrix_directory in hdf5 format. It is one file per year, called xxxx-travelmodel.h5, where xxxx is the year. Each file has one group per bank, e.g. Bank1, which contains the matrices. Zones are assumed to have no gaps. """ cache_directory = self.config['cache_directory'] simulation_state = SimulationState() simulation_state.set_current_time(year) simulation_state.set_cache_directory(cache_directory) year_config = self.config['travel_model_configuration'][year] bank_path = os.path.sep.join([self.get_emme2_base_dir()] + self.config['travel_model_configuration'][year]['bank']) #bank_file = os.path.join(matrix_directory, "%s-travelmodel.h5" % bank_year) for path, variable_dict in year_config['matrix_variable_map'].iteritems(): path_name = os.path.sep.join([bank_path] + path.split('.')) self.get_needed_matrices_from_emme4(year, year_config['cache_directory'], path_name, variable_dict)
def target_func(self, est_v, func=lambda x, y: np.sum(np.abs(x - y)), **kwargs): """ Target function.""" simulation_state = SimulationState() simulation_state.set_current_time(self.base_year) simulation_state.set_cache_directory(self.cache_directory) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration( new_instance=True, package_order=self.package_order, in_storage=attribute_cache ).get_dataset_pool() calib_datasets = {} for dataset_name, calib_attr in calib_datasets.iteritems(): dataset = dataset_pool.get_dataset(dataset_name, dataset_arguments={"id_name": []}) assert ( subset is None or subset.get(dataset_name, None) is None or subset_patterns is None or subset_patterns.get(dataset_name, None) is None ) if subset is not None and subset.get(dataset_name, None) is not None: subset_attr, subset_cond = subset.get(dataset_name) index = np.in1d(dataset[subset_attr], subset_cond) elif subset_patterns is not None and subset_patterns.get(dataset_name, None) is not None: subset_attr, subset_pattern = subset_patterns.get(dataset_name) index = array([True if re.search(subset_pattern, attr_v) else False for attr_v in dataset[subset_attr]]) else: index = arange(dataset.size(), dtype="i") calib_datasets[dataset_name] = [dataset, calib_attr, index] prediction = self.update_prediction(est_v, simulation_state, dataset_pool, calib_datasets, **kwargs) ## allow keys in target not appearing in prediction ## assuming their values to be 0 ### every key in target should appear in prediction # assert np.all( np.in1d(self.target.keys(), prediction.keys()) ) target = np.array(self.target.values()) predct = np.array([prediction[k] if prediction.has_key(k) else 0 for k in self.target.keys()]) results = func(predct, target) return results
def import_travel_model_data(config, year): cache_directory = config['cache_directory'] simulation_state = SimulationState() simulation_state.set_current_time(year) simulation_state.set_cache_directory(cache_directory) out_store = AttributeCache().get_flt_storage_for_year(year+1) out_store_loc = out_store.get_storage_location() tm_config = config['travel_model_configuration'] data_to_import = tm_config['tm_to_urbansim_variable_mapping'] base_dir = mtc_common.tm_get_base_dir(config) data_dir = tm_config[year]['data_dir'] for dataset_name, skim_file in data_to_import.iteritems(): skim_file = os.path.join(base_dir, data_dir, skim_file) data = read_csv(skim_file, header=0) with block("Caching {} to {}".format(dataset_name, out_store_loc)): logger.log_status("Source file {}".format(skim_file)) opus_ds = to_opus_dataset(data, out_store, dataset_name)
def __init__( self, model, year, scenario_name=None, model_group=None, configuration=None, xml_configuration=None, cache_directory=None, ): self.model_group = model_group self.explored_model = model if configuration is None: if xml_configuration is None: raise StandardError, "Either dictionary based or XML based configuration must be given." config = xml_configuration.get_run_configuration(scenario_name) else: config = Configuration(configuration) self.scenario_models = config["models"] if config.get("models_in_year", None) is not None and config["models_in_year"].get(year, None) is not None: del config["models_in_year"][year] if model is not None: dependent_models = config["models_configuration"][model]["controller"].get("dependencies", []) config["models"] = dependent_models if model_group is None: config["models"] = config["models"] + [{model: ["run"]}] else: config["models"] = config["models"] + [{model: {"group_members": [{model_group: ["run"]}]}}] else: config["models"] = [] config["years"] = [year, year] config["datasets_to_cache_after_each_model"] = [] config["flush_variables"] = False self.config = Resources(config) self.xml_configuration = xml_configuration if cache_directory is None: cache_directory = config["creating_baseyear_cache_configuration"].baseyear_cache.existing_cache_to_copy self.simulation_state = SimulationState( new_instance=True, base_cache_dir=cache_directory, start_time=config.get("base_year", 0) ) self.config["cache_directory"] = cache_directory SessionConfiguration( new_instance=True, package_order=self.config["dataset_pool_configuration"].package_order, in_storage=AttributeCache(), )
def run(self, year): """ Copy skims stored in hdf5 format into the UrbanSim cache. Should run after psrc_parcel.emme.models.run_export_skims which creates the skims hdf5 file. It creates a travel_model dataset with each skim being an attribute of it. Zones are assumed to have no gaps. Arguments: year -- year of the urbansim run. Used to extract the TM year from the bank configuration. Configuration entries (in travel_model_configuration) used: matrix_variable_map -- dictionary of bank names and corresponding skim names. Bank names are the path where (back-)slashes are replaced by dots, e.g. skims.auto.am. A value for each of such bank name is a dictionary with keys being skim names and values being the desired urbansim attribute name. E.g. {'skims.nonmotorized.am': {'abketm': 'am_bike_to_work_travel_time', 'awlktm': 'am_walk_time_in_minutes' } } matrix_h5_directory -- path to the hdf5 file called xxxx-travelmodel.h5 where xxxx is replaced by the TM year (default is the Emme base directory), which contains the skims as n x n matrices. """ cache_directory = self.config['cache_directory'] simulation_state = SimulationState() simulation_state.set_current_time(year) simulation_state.set_cache_directory(cache_directory) tmconfig = self.config['travel_model_configuration'] year_config = tmconfig[year] matrix_directory = tmconfig.get('matrix_h5_directory', self.get_emme2_base_dir()) bank_year = tmconfig[year]['bank'][0] bank_file = os.path.join(matrix_directory, "%s-travelmodel.h5" % bank_year) for path, variable_dict in year_config['matrix_variable_map'].iteritems(): self.get_needed_matrices_from_emme4(year, year_config['cache_directory'], path, variable_dict, bank_file=bank_file)
def test_doing_other_refinements(self): self.prepare_cache() os.system("%(python)s %(script)s -c %(cache_directory)s -s %(start_year)s -e %(end_year)s" % {'python': sys.executable, 'script': self.script, 'cache_directory':self.cache_dir, 'start_year': 2023, 'end_year': 2027 } ) simulation_state = SimulationState() ## test refinement for 2023 simulation_state.set_current_time(2023) hhs = self.dataset_pool.get_dataset('household') hhs_p5 = hhs.compute_variables('household.persons>5') #check results self.assert_(hhs.size(), 2) self.assertEqual(hhs_p5.sum(), 0) self.dataset_pool.remove_all_datasets() ## test refinement for 2027 simulation_state.set_current_time(2027) hhs = self.dataset_pool.get_dataset('household') buildings = self.dataset_pool.get_dataset('building') persons = self.dataset_pool.get_dataset('person') hhs_raz6 = hhs.compute_variables('household.disaggregate(building.disaggregate(parcel.raz_id)==6)', dataset_pool=self.dataset_pool) hhs_bldg = buildings.compute_variables('building.number_of_agents(household)', dataset_pool=self.dataset_pool) #check results self.assertEqual(hhs_raz6.sum(), 3) self.assert_(hhs_bldg.sum(), 3 ) self.assert_((hhs_bldg!=0).sum(), 2) self.assert_(allclose(persons.get_attribute('job_id'), array([-1, -1, -1, -1, 3, 4, 7])))
def run(self, base_directory, urbansim_cache_directory, years): """ run the simulation base_directory: directory contains all years folder of lccm. urbansim_cache_directory: directory contains all years folder of urbansim cache. years: lists of year to run.""" model = LandCoverChangeModel(self.possible_lcts, submodel_string=self.lct_attribute, choice_attribute_name= self.lct_attribute, debuglevel=4) coefficients = Coefficients() storage = StorageFactory().get_storage('tab_storage', storage_location=os.path.join(self.package_path, 'data')) coefficients.load(in_storage=storage, in_table_name="land_cover_change_model_coefficients") specification = EquationSpecification(in_storage=storage) specification.load(in_table_name="land_cover_change_model_specification") specification.set_variable_prefix("biocomplexity.land_cover.") constants = Constants() simulation_state = SimulationState() simulation_state.set_cache_directory(urbansim_cache_directory) attribute_cache = AttributeCache() index = arange(100000) for year in years: simulation_state.set_current_time(year) #land_cover_path = os.path.join(base_directory, str(year)) land_cover_path = base_directory land_covers = LandCoverDataset(in_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path), out_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path), debuglevel=4) land_covers.subset_by_index(index) #land_covers.load_dataset() gridcells = GridcellDataset(in_storage=attribute_cache, debuglevel=4) agents_index = None model.run(specification, coefficients, land_covers, data_objects={"gridcell":gridcells, "constants":constants, "flush_variables":True}, chunk_specification = {'nchunks':1} ) land_covers.flush_dataset() del gridcells del land_covers
def run(self, table_names, out_storage=None, table_name_pattern=None, cache_directory=None, year=None, **kwargs): """ export specified tables to database table_name_pattern: For example '{table_name}_{scenario_name}_{year}' """ if not hasattr(self, "out_storage"): if out_storage is None: raise ValueError, "Either out_storage argument needs to be specified or " + "prepare_for_run called before run method to create a valid out_storage." else: self.out_storage = out_storage sim_state = SimulationState() if sim_state.get_current_time() == 0: sim_state.set_current_time(9999) if cache_directory is None: cache_directory = sim_state.get_cache_directory() attr_cache = AttributeCache(cache_directory=cache_directory) if year is None: years = attr_cache._get_sorted_list_of_years() else: assert isinstance(year, int) years = [year] for table_name in table_names: kwargs["table_name"] = table_name for year in years: kwargs["year"] = year out_table_name = table_name_pattern.format(**kwargs) in_storage = attr_cache.get_flt_storage_for_year(year) # cache_path = os.path.join(cache_directory, str(year)) # in_storage = flt_storage(storage_location=cache_path) # TODO drop_table(table_name) if table_name exists ExportStorage().export_dataset( table_name, in_storage=in_storage, out_storage=self.out_storage, out_dataset_name=out_table_name ) self.post_run(kwargs["scenario_name"], years)
def run(self, year, matrix_directory=None): """This is the main entry point. It gets the appropriate values from the travel_model_configuration part of this config, and then copies the specified emme/2 matrices into the specified travel_data variable names. Results in a new travel_data cache for year+1. If matrix_directory is not None, it is assumed the matrices files are already created in the given directory. """ cache_directory = self.config['cache_directory'] simulation_state = SimulationState() simulation_state.set_current_time(year) simulation_state.set_cache_directory(cache_directory) year_config = self.config['travel_model_configuration'][year] matrices_created = False if matrix_directory is not None: matrices_created = True reports = self.config['travel_model_configuration'].get('reports_to_copy', []) for x in 1,2,3: if matrix_directory is None: bank_dir = self.get_emme2_dir(year, "bank%i" % x) else: bank_dir = os.path.join(matrix_directory, "bank%i" % x) if "bank%i" % x in year_config['matrix_variable_map']: self.get_needed_matrices_from_emme2(year, year_config['cache_directory'], bank_dir, year_config['matrix_variable_map']["bank%i" % x], matrices_created) for report in reports: self.copy_report_to_cache(report, year, year_config['cache_directory'], bank_dir) if "bank%i" % x in year_config.get('node_matrix_variable_map', {}): node_variable_map = year_config['node_matrix_variable_map']["bank%i" % x] if len(node_variable_map.keys()) > 0: self.get_needed_node_matrices_from_emme2(year, year_config['cache_directory'], bank_dir, node_variable_map)
def __init__(self, config=None, save_estimation_results=False): if 'cache_directory' not in config or config['cache_directory'] is None: raise KeyError("The cache directory must be specified in the " "given configuration, giving the filesystem path to the cache " "directory containing the data with which to estimate. Please " "check that your configuration contains the 'cache_directory' " "entry and that it is not None.") self.simulation_state = SimulationState(new_instance=True) self.simulation_state.set_cache_directory(config['cache_directory']) SessionConfiguration(new_instance=True, package_order=config['dataset_pool_configuration'].package_order, in_storage=AttributeCache()) self.config = Resources(config) self.save_estimation_results = save_estimation_results self.debuglevel = self.config.get("debuglevel", 4) self.model_system = ModelSystem() self.agents_index_for_prediction = None models = self.config.get('models',[]) self.model_name = None if "model_name" in config.keys(): self.model_name = config["model_name"] else: for model in models: if isinstance(model, dict): model_name = model.keys()[0] if (model[model_name] == "estimate") or (isinstance(model[model_name], list) and ("estimate" in model[model_name])): self.model_name = model_name break estimate_config_changes = self.config.get('config_changes_for_estimation', {}).get('estimate_config', {}) if len(estimate_config_changes) > 0: change = Resources({'models_configuration': {self.model_name: {'controller': {'init': {'arguments': {}}}}}}) estimate_config_str = self.config['models_configuration'].get(self.model_name, {}).get('controller', {}).get('init', {}).get('arguments', {}).get('estimate_config', '{}') estimate_config = Resources({}) try: estimate_config = eval(estimate_config_str) except: pass estimate_config.merge(estimate_config_changes) self.config.merge(change) self.config['models_configuration'][self.model_name]['controller']['init']['arguments']['estimate_config'] = 'Resources(%s)' % estimate_config
def test_my_inputs(self): tester = VariableTester(__file__, package_order=['urbansim_parcel', 'urbansim'], test_data={ 'building': { 'building_id': array([1, 2, 3, 4]), 'year_built': array([1995, 1800, 2006, 200]) }, 'urbansim_constant': { 'absolute_min_year': array([1800]) } }) SimulationState().set_current_time(2005) should_be = array([True, True, True, False]) tester.test_is_equal_for_variable_defined_by_this_module( self, should_be)
def test_err_duplicate_data(self): weather_exogenous_override = { 'id': array([1, 4]), 'year': array([1980, 1980]), self.base_id: array([1, 1]), # Both 1980, both base_table_id 1. self.exogenous_attribute1: array([40, 70]), self.exogenous_attribute2: array([700, 1000]), } self.storage.write_table(table_name=self.weather_exogenous_table_name, table_data=weather_exogenous_override) base_dataset = Dataset(in_storage=self.storage, in_table_name=self.base_table_name, id_name=self.base_id) ExogenousAspectForDataset().apply(base_dataset) SimulationState().set_current_time(1980) self.assertRaises(AttributeError, base_dataset.get_attribute, self.exogenous_attribute1)
def test_delete_computed_tables_if_nothing_to_delete(self): year = 1980 tables_keep = ['test_table1', 'test_table2'] tables_delete = [] for table in tables_keep + tables_delete: os.makedirs(os.path.join(self.temp_dir, str(year), table)) open(os.path.join(self.temp_dir, str(year), table, 'attr1.i16'), 'w').close() open(os.path.join(self.temp_dir, str(year), table, 'attr2.i32'), 'w').close() SimulationState().set_current_time(year) #check if all tables exist before the test for table in tables_keep + tables_delete: self.assert_(os.path.exists(self.get_table_path(year, table))) self.storage.delete_computed_tables() #check if the right tables exist for table in tables_keep: self.assert_(os.path.exists(self.get_table_path(year, table)))
def _write_data_to_year(self, data, cache_dir, year): """Writes this data to this year of the cache. Returns dataset. """ # Import in unit test, so that circular dependency is avoided. from opus_core.datasets.dataset import Dataset SimulationState().set_cache_directory(cache_dir) storage = dict_storage() storage.write_table( table_name=self._table_name, table_data=data, ) ds = Dataset(id_name=self._id_name, in_storage=storage, in_table_name=self._table_name) ds.load_dataset() self._write_dataset_to_cache(ds, cache_dir, year)
def prepare_for_simulation(self, config, cache_directory=None): self.config = Resources(config) base_cache_dir = self.config['creating_baseyear_cache_configuration'].cache_directory_root self.simulation_state = SimulationState(new_instance=True, base_cache_dir=base_cache_dir) ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. if self.config['cache_directory'] is None: self.config['cache_directory'] = self.simulation_state.get_cache_directory() SessionConfiguration(new_instance=True, package_order=self.config['dataset_pool_configuration'].package_order, in_storage=AttributeCache()) if config['creating_baseyear_cache_configuration'].cache_from_database: ForkProcess().fork_new_process(self.config['creating_baseyear_cache_configuration'].cache_scenario_database, self.config) else: CacheFltData().run(self.config)
def _get_sorted_list_of_years(self): """Returns a sorted list (descending order) of the current and prior years having directories in the cache directory. """ from os import listdir current_year = SimulationState().get_current_time() dirs = flt_storage( self.get_storage_location()).listdir_in_base_directory() years = [] for dir_name in dirs: try: year = int(dir_name) if (year <= current_year): years.append(year) except: pass years.sort() years.reverse() return years
def setUp(self): self.base_table_name = 'base_table' self.base_id = 'base_table_id' self.exogenous_attribute1 = 'some_attribute' self.exogenous_attribute2 = 'some_other_attribute' base_table = { self.base_id: array([2, 1]), } self.weather_exogenous_table_name = 'weather_exogenous' weather_exogenous = { 'id': array([1, 2, 3, 4, 5, 6]), 'year': array([1980, 1981, 1982, 1980, 1981, 1982]), self.base_id: array([1, 1, 1, 2, 2, 2]), self.exogenous_attribute1: array([40, 50, 600, 70, 80, 900]), self.exogenous_attribute2: array([700, 800, 90, 1000, 1100, 120]), } SimulationState().set_current_time(1980) self.expected_exogenous_attribute_1980_1 = array([70, 40]) self.expected_exogenous_attribute_1980_2 = array([1000, 700]) self.expected_exogenous_attribute_1981_1 = array([80, 50]) self.expected_exogenous_attribute_1981_2 = array([1100, 800]) self.expected_exogenous_attribute_1982_1 = array([900, 600]) self.expected_exogenous_attribute_1982_2 = array([120, 90]) self.exogenous_relationships_table_name = 'exogenous_relationships' exogenous_relationships = { 'exogenous_id': array([1]), 'base_table': array([self.base_table_name]), 'exogenous_table': array([self.weather_exogenous_table_name]), } self.storage = dict_storage() for table_name, table_values in [ (self.base_table_name, base_table), (self.weather_exogenous_table_name, weather_exogenous), (self.exogenous_relationships_table_name, exogenous_relationships) ]: self.storage.write_table(table_name=table_name, table_data=table_values)
def target_func(self, est_v, func=lambda x, y: np.sum(np.abs(x - y)), **kwargs): ''' Target function.''' simulation_state = SimulationState() simulation_state.set_current_time(self.base_year) simulation_state.set_cache_directory(self.cache_directory) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration( new_instance=True, package_order=self.package_order, in_storage=attribute_cache).get_dataset_pool() calib_datasets = {} for dataset_name, calib_attr in calib_datasets.iteritems(): dataset = dataset_pool.get_dataset( dataset_name, dataset_arguments={'id_name': []}) assert subset is None or subset.get(dataset_name, None) is None or \ subset_patterns is None or subset_patterns.get(dataset_name, None) is None if subset is not None and subset.get(dataset_name, None) is not None: subset_attr, subset_cond = subset.get(dataset_name) index = np.in1d(dataset[subset_attr], subset_cond) elif subset_patterns is not None and subset_patterns.get( dataset_name, None) is not None: subset_attr, subset_pattern = subset_patterns.get(dataset_name) index = array([ True if re.search(subset_pattern, attr_v) else False for attr_v in dataset[subset_attr] ]) else: index = arange(dataset.size(), dtype='i') calib_datasets[dataset_name] = [dataset, calib_attr, index] prediction = self.update_prediction(est_v, simulation_state, dataset_pool, calib_datasets, **kwargs) ## allow keys in target not appearing in prediction ## assuming their values to be 0 ### every key in target should appear in prediction #assert np.all( np.in1d(self.target.keys(), prediction.keys()) ) target = np.array(self.target.values()) predct = np.array([prediction[k] if prediction.has_key(k) else 0 \ for k in self.target.keys() ]) results = func(predct, target) return results
def run(self, year): """This is the main entry point. The class is initialized with the appropriate configuration info from the travel_model_configuration part of this config, and then copies the specified UrbanSim data into files for emme/2 to read. If households and jobs do not have a primary attribute zone_id, the entry 'locations_to_disaggregate' in the travel_model_configuration should be a list of dataset names over which the zone_id will be dissaggregated, ordered from higher to lower aggregation level, e.g. ['parcel', 'building'] """ cache_directory = self.config['cache_directory'] simulation_state = SimulationState() simulation_state.set_cache_directory(cache_directory) simulation_state.set_current_time(year) attribute_cache = AttributeCache() sc = SessionConfiguration( new_instance=True, package_order=self.config['dataset_pool_configuration']. package_order, in_storage=attribute_cache) dataset_pool = sc.get_dataset_pool() hh_set = dataset_pool.get_dataset('household') zone_set = dataset_pool.get_dataset('zone') job_set = dataset_pool.get_dataset('job') locations_to_disaggregate = self.config['travel_model_configuration'][ 'locations_to_disaggregate'] len_locations_to_disaggregate = len(locations_to_disaggregate) if len_locations_to_disaggregate > 0: primary_location = locations_to_disaggregate[0] if len_locations_to_disaggregate > 1: intermediates_string = ", intermediates=[" for i in range(1, len_locations_to_disaggregate): intermediates_string = "%s%s, " % ( intermediates_string, locations_to_disaggregate[i]) intermediates_string = "%s]" % intermediates_string else: intermediates_string = "" hh_set.compute_variables([ '%s = household.disaggregate(%s.%s %s)' % (zone_set.get_id_name()[0], primary_location, zone_set.get_id_name()[0], intermediates_string) ], dataset_pool=dataset_pool) job_set.compute_variables([ '%s = job.disaggregate(%s.%s %s)' % (zone_set.get_id_name()[0], primary_location, zone_set.get_id_name()[0], intermediates_string) ], dataset_pool=dataset_pool) return self._call_input_file_writer(year, dataset_pool)
def run_in_one_process( self, resources, run_in_background=False, class_path='opus_core.model_coordinators.model_system'): resources = Resources(resources) if resources['cache_directory'] is not None: cache_directory = resources['cache_directory'] else: cache_directory = SimulationState().get_cache_directory() ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. resources['cache_directory'] = cache_directory self._fork_new_process('%s' % class_path, resources, delete_temp_dir=False, run_in_background=run_in_background) self._notify_stopped()
def opusRun(progressCB, logCB, params): params_dict = {} for key, val in params.iteritems(): params_dict[str(key)] = str(val) opus_data_directory = params_dict['opus_data_directory'] opus_data_year = params_dict['opus_data_year'] database_name = params_dict['database_name'] table_name = params_dict['table_name'] database_server_connection = params_dict['database_server_connection'] dbs_config = DatabaseServerConfiguration( database_configuration=database_server_connection) server = DatabaseServer(database_server_configuration=dbs_config) opusdb = server.get_database(database_name=database_name) input_storage = sql_storage(storage_location=opusdb) attribute_cache = AttributeCache(cache_directory=opus_data_directory) output_storage = attribute_cache.get_flt_storage_for_year(opus_data_year) SimulationState().set_current_time(opus_data_year) SessionConfiguration(new_instance=True, package_order=[], in_storage=AttributeCache()) if table_name == 'ALL': logCB('caching all tables...\n') lst = input_storage.get_table_names() for i in lst: ExportStorage().export_dataset( dataset_name=i, in_storage=input_storage, out_storage=output_storage, ) else: logCB("Exporting table '%s' to year %s of cache located at %s...\n" % (table_name, opus_data_year, opus_data_directory)) ExportStorage().export_dataset(dataset_name=table_name, in_storage=input_storage, out_storage=output_storage)
def compute(self, dataset_pool): ds = self.get_dataset() current_year = SimulationState().get_current_time() velocity = ds.get_attribute("_velocity") index = current_year - ds.get_attribute("_start_year") def get_one_velocity(velocity_string, idx): a = array(eval(velocity_string)) if idx >= a.size: return 0 if idx == 0: return a[idx] else: return a[idx] - a[idx - 1] percent_of_development_this_year = array( map(lambda vel, i: get_one_velocity(vel, i), velocity, index)) ds.touch_attribute( "_start_year" ) # in order to always recompute (because the simulation year can change) return percent_of_development_this_year
def test_simple_lag_variable2(self): test_data = { 1000: { 'tests': { 'id': array([1, 2, 3, 4]), 'attr1': array([10, 20, 30, 40]), }, }, 1001: { 'tests': { 'id': array([1, 2, 3, 5]), 'attr1': array([111, 222, 333, 555]), }, }, } cache_creator = CreateTestAttributeCache() cache_creator.create_attribute_cache_with_data(self._temp_dir, test_data) SimulationState().set_current_time(1001) attribute_cache = AttributeCache() SessionConfiguration(new_instance=True, package_order=['opus_core'], in_storage=attribute_cache) ds = Dataset(in_storage=attribute_cache, in_table_name='tests', id_name=['id'], dataset_name='tests') ds.compute_variables(['opus_core.tests.attr1']) self.assert_( ma.allequal(ds.get_attribute('attr1'), array([111, 222, 333, 555]))) ds.compute_variables(['opus_core.tests.attr1_lag1']) self.assert_( ma.allequal(ds.get_attribute('attr1_lag1'), array([10, 20, 30, 555])))
def run(self, config, show_output=False): logger.log_status("Caching large SQL tables to: " + config['cache_directory']) self.show_output = show_output #import pydevd;pydevd.settrace() server_configuration = config['scenario_database_configuration'] scenario_database_manager = ScenarioDatabaseManager( server_configuration=server_configuration, base_scenario_database_name=server_configuration.database_name) self.database_server = DatabaseServer(server_configuration) database_to_table_mapping = scenario_database_manager.get_database_to_table_mapping( ) self.tables_to_cache = config[ 'creating_baseyear_cache_configuration'].tables_to_cache simulation_state = SimulationState() if 'low_memory_run' in config: simulation_state.set_low_memory_run(config['low_memory_run']) simulation_state.set_cache_directory(config['cache_directory']) simulation_state.set_current_time(config['base_year']) self.tables_cached = set() for database_name, tables in database_to_table_mapping.items(): self.cache_database_tables(config, database_name, tables) un_cached_tables = set(self.tables_to_cache) - self.tables_cached if un_cached_tables: logger.log_warning( 'The following requested tables were NOT cached:') for table_name in un_cached_tables: logger.log_warning('\t%s' % table_name)
def test_my_inputs(self): SimulationState().set_current_time(2005) tester = VariableTester( __file__, package_order=['urbansim'], test_data={ "gridcell": { "grid_id": array([1,2,3]) }, "building": { "building_id": array([1,2,3,4,5,6,7]), "grid_id": array([1, 1, 2, 3, 1, 2, 1]), "year_built": array([1995, 2000, 2005, 0, 10, 0, 2005]) }, 'urbansim_constant':{ "absolute_min_year": array([1800]), } } ) should_be = array([True, True, False]) tester.test_is_equal_for_variable_defined_by_this_module(self, should_be)
def run_multiprocess(self, resources): resources = Resources(resources) profiler_name = resources.get("profile_filename", None) if resources['cache_directory'] is not None: cache_directory = resources['cache_directory'] else: cache_directory = SimulationState().get_cache_directory() ### TODO: Get rid of this! There is absolutely no good reason to be ### changing the Configuration! resources['cache_directory'] = cache_directory log_file = os.path.join(cache_directory, 'run_multiprocess.log') logger.enable_file_logging(log_file) start_year = resources["years"][0] end_year = resources["years"][-1] nyears = end_year - start_year + 1 root_seed = resources.get("seed", NO_SEED) if resources.get('_seed_dictionary_', None) is not None: # This is added by the RunManager to ensure reproducibility including restarted runs seed_dict = resources.get('_seed_dictionary_') seed_array = array( map(lambda year: seed_dict[year], range(start_year, end_year + 1))) else: seed(root_seed) seed_array = randint(1, 2**30, nyears) logger.log_status("Running simulation for years %d thru %d" % (start_year, end_year)) logger.log_status("Simulation root seed: %s" % root_seed) self._run_each_year_as_separate_process(start_year, end_year, seed_array, resources) if profiler_name is not None: # insert original value resources["profile_filename"] = profiler_name logger.log_status("Done running simulation for years %d thru %d" % (start_year, end_year))
def test_correct_land_value(self): #TODO: need to remove this when fixed LP correction only working when year >= 2002 from opus_core.simulation_state import SimulationState SimulationState().set_current_time(2002) storage = StorageFactory().get_storage('dict_storage') gridcell_set_table_name = 'gridcell_set' storage.write_table( table_name=gridcell_set_table_name, table_data={ "percent_residential_within_walking_distance":array([30, 0, 90, 100]), "gridcell_year_built":array([2002, 1968, 1880, 1921]), "fraction_residential_land":array([0.5, 0.1, 0.3, 0.9]), "residential_land_value":array([0, 0, 0, 0]), "residential_land_value_lag1":array([15000, 0, 7500, 0]), "nonresidential_land_value":array([0, 0, 0, 0]), "nonresidential_land_value_lag1":array([15000, 0, 17500, 0]), "development_type_id":array( [2, 1, 1, 1]), "development_type_id_lag2":array( [1, 1, 1, 1]), "grid_id": array([1,2,3,4]) } ) gridcell_set = GridcellDataset(in_storage=storage, in_table_name=gridcell_set_table_name) specification = EquationSpecification(variables=( "percent_residential_within_walking_distance", "gridcell_year_built", "constant"), coefficients=("PRWWD", "YB", "constant")) coefficients = Coefficients(names=("constant", "PRWWD", "YB"), values=(10.0, -0.0025, 0.0001)) lp = LandPriceModel(filter=None, debuglevel=3) lp.run(specification, coefficients, gridcell_set) correctmodel = CorrectLandValue() correctmodel.run(gridcell_set) result1 = gridcell_set.get_attribute("residential_land_value") result2 = gridcell_set.get_attribute("nonresidential_land_value") self.assertEqual(ma.allclose(result1, array([15000.0, 2681.723, 6367.914, 18708.617]), rtol=1e-3), True) self.assertEqual(ma.allclose(result2, array([15000.0, 24135.510, 14858.466, 2078.735]), rtol=1e-3), True)
def opusRun(progressCB, logCB, params): params_dict = {} for key, val in params.iteritems(): params_dict[str(key)] = str(val) esri_data_path = params_dict['esri_data_path'] esri_table_name = params_dict['esri_table_name'] opus_data_directory = params_dict['opus_data_directory'] opus_data_year = params_dict['opus_data_year'] input_storage = esri_storage(storage_location=esri_data_path) attribute_cache = AttributeCache(cache_directory=opus_data_directory) output_storage = attribute_cache.get_flt_storage_for_year(opus_data_year) SimulationState().set_current_time(opus_data_year) SessionConfiguration(new_instance=True, package_order=[], in_storage=AttributeCache()) if esri_table_name == 'ALL': logCB("Sending all tables to OPUS storage...\n") lst = input_storage.get_table_names() for i in lst: ExportStorage().export_dataset( dataset_name=i, in_storage=input_storage, out_storage=output_storage, ) else: logCB("Exporting table '%s' to OPUS storage located at %s...\n" % (esri_table_name, opus_data_directory)) ExportStorage().export_dataset( dataset_name=esri_table_name, in_storage=input_storage, out_storage=output_storage, ) logCB("Finished exporting table '%s'\n" % (esri_table_name))
def test_my_inputs2(self): tester = VariableTester(__file__, package_order=['urbansim_parcel', 'urbansim'], test_data={ 'development_project_proposal': { "proposal_id": array([1, 2, 3, 4, 5]), "start_year": array([2005, 2007, 2005, 2006, 2006]) }, 'development_project_proposal_component': { "proposal_component_id": arange(8) + 1, "proposal_id": array([3, 3, 5, 2, 5, 1, 3, 1]), "component_id": array([3, 1, 4, 2, 4, 1, 3, 4]), "annual_construction_schedule": array([ "[0, 50, 100]", "[100]", "[25, 50, 75, 100]", "[0, 50, 100]", "[0, 50, 100]", "[25, 50, 75, 100]", "[25, 50, 75, 100]", "[100]" ]) }, 'development_template_component': { "component_id": array([1, 2, 3, 4]), "velocity_function_id": array([2, 1, 1, 3]) }, }) SimulationState().set_current_time(2007) should_be = array([50, 0, 25, 0, 50, 25, 25, 0]) tester.test_is_equal_for_variable_defined_by_this_module( self, should_be)
def test_gridcell_unrolling(self): """Checks that the unrolling of the gridcells by CacheScenarioDatabase worked correctly. """ cache_directory = SimulationState().get_cache_directory() gridcells = SessionConfiguration().get_dataset_from_pool('gridcell') development_event_history = SessionConfiguration( ).get_dataset_from_pool('development_event_history') unroller = UnrollGridcells() unroller.unroll_gridcells_to_cache(gridcells, development_event_history, cache_directory, self.base_year) self.assertEqual(self.temp_dir, os.path.split(cache_directory)[0]) gridcell = {} for year in [1976, 1977, 1979, 1980]: #current_year = SimulationState().get_current_time() #SimulationState().set_current_time(year) #gridcell[year] = SessionConfiguration().get_dataset_from_pool('gridcell') #SimulationState().set_current_time(current_year) flt_directory = os.path.join(cache_directory, str(year)) gridcell[year] = DatasetFactory().get_dataset( 'gridcell', package='urbansim', subdir='datasets', arguments={ 'in_storage': StorageFactory().get_storage( 'flt_storage', storage_location=flt_directory) }) diff = gridcell[1980].get_attribute('residential_units') - gridcell[ 1979].get_attribute('residential_units') self.assertEqual(1, sum(diff)) diff = gridcell[1977].get_attribute('commercial_sqft') - gridcell[ 1976].get_attribute('commercial_sqft') self.assertEqual(2255 + 199 + 332 + 2785, sum(diff))
def run(self, prefix='', suffix='', dataset_pool=None, alt_location=None, target_location=None): """ prefix, suffix - 'YEAR' is a magic word in prefix/suffix which will be replaced with the current run year number, for example, - suffix='_YEAR' will swap 'zoning_2020' to 'zoning' in simulation year 2020, and, if existing, 'zoning' to 'zoning_ORIGINAL' by default, only search for datasets in the cache directory for current year - alt_location: alternative location to search for datasets of specified patterns, for example, base_year_cache - target_location: where to put the datasets to be renamed """ current_year = SimulationState().get_current_time() self.prefix = re.sub('YEAR', str(current_year), prefix) self.suffix = re.sub('YEAR', str(current_year), suffix) if target_location is not None: target_location = target_location.get_storage_location() locations = [] if dataset_pool is not None: storage = dataset_pool.get_storage().get_flt_storage_for_year(current_year) location = storage.get_storage_location() locations.append(location) if target_location is None: target_location = location if alt_location is not None: location = alt_location.get_storage_location() locations.append(location) if target_location is None: target_location = location for pattern in [os.path.join(location, '%s*%s' % (prefix, suffix)) for location in locations]: self._rename_datasets(pattern, target_location)
def test_my_inputs(self): SimulationState().set_current_time(2005) tester = VariableTester( __file__, package_order=['urbansim'], test_data={ "gridcell": { "grid_id": array([1, 2, 3]) }, "building": { "building_id": array([1, 2, 3, 4, 5, 6, 7]), "grid_id": array([1, 1, 2, 3, 1, 2, 1]), "year_built": array([1995, 2000, 2005, 0, 10, 0, 2005]), "is_building_type_commercial": array([0, 1, 0, 0, 0, 1, 1]) }, 'urbansim_constant': { "absolute_min_year": array([1800]), } }) should_be = array([True, False, False]) instance_name = "urbansim.gridcell.has_commercial_buildings_with_year_built" tester.test_is_equal_for_family_variable(self, should_be, instance_name)
def test_compute_unloads_from_memory(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='tests', table_data={ 'a_dependent_variable': array([1, 5, 10]), 'id': array([1, 3, 4]) }) dataset = Dataset(in_storage=storage, in_table_name='tests', id_name='id', dataset_name='tests') SessionConfiguration(in_storage=storage)["flush_variables"] = True dataset.get_attribute("a_dependent_variable") self.assert_( "a_dependent_variable" in dataset.get_attributes_in_memory()) dataset.compute_variables("opus_core.tests.a_test_variable") self.assert_( "a_dependent_variable" not in dataset.get_attributes_in_memory()) self.assert_("a_test_variable" in dataset.get_attributes_in_memory()) SimulationState().remove_singleton(delete_cache=True)
def setup_environment(cache_directory, year, package_order, additional_datasets={}): gc.collect() ss = SimulationState(new_instance=True) ss.set_cache_directory(cache_directory) ss.set_current_time(year) ac = AttributeCache() storage = ac.get_flt_storage_for_year(year) sc = SessionConfiguration(new_instance=True, package_order=package_order, in_storage=ac) logger.log_status( "Setup environment for year %s. Use cache directory %s." % (year, storage.get_storage_location())) dp = sc.get_dataset_pool() for name, ds in additional_datasets.iteritems(): dp.replace_dataset(name, ds) return dp
def run(self, year, matrix_directory=None): """This is the main entry point. It gets the appropriate values from the travel_model_configuration part of this config, and then copies the specified emme/2 matrices into the specified travel_data variable names. Results in a new travel_data cache for year+1. If matrix_directory is not None, it is assumed the matrices files are already created in the given directory. """ cache_directory = self.config['cache_directory'] simulation_state = SimulationState() simulation_state.set_current_time(year) simulation_state.set_cache_directory(cache_directory) year_config = self.config['travel_model_configuration'][year] matrices_created = False if matrix_directory is not None: matrices_created = True reports = self.config['travel_model_configuration'].get( 'reports_to_copy', []) for x in 1, 2, 3: if matrix_directory is None: bank_dir = self.get_emme2_dir(year, "bank%i" % x) else: bank_dir = os.path.join(matrix_directory, "bank%i" % x) if "bank%i" % x in year_config['matrix_variable_map']: self.get_needed_matrices_from_emme2( year, year_config['cache_directory'], bank_dir, year_config['matrix_variable_map']["bank%i" % x], matrices_created) for report in reports: self.copy_report_to_cache(report, year, year_config['cache_directory'], bank_dir) if "bank%i" % x in year_config.get('node_matrix_variable_map', {}): node_variable_map = year_config['node_matrix_variable_map'][ "bank%i" % x] if len(node_variable_map.keys()) > 0: self.get_needed_node_matrices_from_emme2( year, year_config['cache_directory'], bank_dir, node_variable_map)
def _compute_variable_for_prior_year(self, dataset, full_name, time, resources=None): """Create a new dataset for this variable, compute the variable, and then return the values for this variable.""" calling_dataset_pool = SessionConfiguration().get_dataset_pool() calling_time = SimulationState().get_current_time() SimulationState().set_current_time(time) # Do not flush any variables when computing dependencies for a lag variable. prior_flush_state = SimulationState().get_flush_datasets() SimulationState().set_flush_datasets(False) try: # Get an empty dataset pool with same search paths. my_dataset_pool = DatasetPool( package_order=calling_dataset_pool.get_package_order(), storage=AttributeCache()) try: ds = dataset.empty_dataset_like_me(in_storage=AttributeCache()) except FileNotFoundError: ## necessary when a dataset is not cached, but created on-the-fly, e.g submarket ds = my_dataset_pool.get_dataset(dataset.dataset_name) # Don't pass any datasets via resources, since they may be from a different time. my_resources = Resources(resources) for key in my_resources: if isinstance(key, Dataset): del my_resources[key] ds.compute_variables(full_name, my_dataset_pool, resources=my_resources) values = ds.get_attribute(full_name) return values finally: SimulationState().set_current_time(calling_time) SimulationState().set_flush_datasets(prior_flush_state)
def run(self, config, year, *args, **kwargs): """This is the main entry point. It gets the appropriate values from the travel_model_configuration part of this config, and then copies the specified data into the specified travel_data variable names. Results in a new travel_data cache for year+1. """ cache_directory = config['cache_directory'] simulation_state = SimulationState() simulation_state.set_current_time(year) simulation_state.set_cache_directory(cache_directory) logger.start_block('Getting data from travel model') next_year = year + 1 flt_dir_for_next_year = os.path.join(cache_directory, str(next_year)) if not os.path.exists(flt_dir_for_next_year): os.mkdir(flt_dir_for_next_year) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration(new_instance=True, package_order=config['dataset_pool_configuration'].package_order, in_storage=attribute_cache).get_dataset_pool() zone_set = dataset_pool.get_dataset('zone') # zone_set = ZoneDataset(in_storage_location=flt_dir_for_this_year, # in_storage_type='flt_storage', # in_table_name='zones') zone_set.load_dataset() self.prepare_for_run(config['travel_model_configuration'], year) travel_data_set = self.get_travel_data_from_travel_model(config, year, zone_set, *args, **kwargs) logger.end_block() logger.start_block('Writing travel data to cache') out_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_dir_for_next_year) #out_storage = flt_storage(Resources(data={"storage_location":flt_dir_for_next_year})) travel_data_set.write_dataset(attributes=travel_data_set.get_known_attribute_names(), out_storage=out_storage, out_table_name='travel_data') logger.end_block()
def run(self, config, year, *args, **kwargs): """This is the main entry point. It gets the appropriate configuration info from the travel_model_configuration part of this config, and then copies the specified UrbanSim data into files for travel mdel to read. """ cache_directory = config['cache_directory'] simulation_state = SimulationState() simulation_state.set_cache_directory(cache_directory) simulation_state.set_current_time(year) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration( new_instance=True, package_order=config['dataset_pool_configuration'].package_order, in_storage=attribute_cache).get_dataset_pool() #cache_storage = AttributeCache().get_flt_storage_for_year(year_for_base_year_cache) #datasets = DatasetFactory().create_datasets_from_flt(config.get('datasets_to_preload',{}), #"urbansim", #additional_arguments={'in_storage': attribute_cache}) zone_set = dataset_pool.get_dataset('zone') self.prepare_for_run(config['travel_model_configuration'], year) self.create_travel_model_input_file(config, year, zone_set, dataset_pool, *args, **kwargs)
tab_directory = options.tab_directory attribute_cache_directory = options.attribute_cache_directory table_name = options.table_name cache_year = options.cache_year if (tab_directory is None or attribute_cache_directory is None or table_name is None or cache_year is None): parser.print_help() sys.exit(1) input_storage = tab_storage(storage_location=tab_directory) attribute_cache = AttributeCache(cache_directory=attribute_cache_directory) output_storage = attribute_cache.get_flt_storage_for_year(cache_year) SimulationState().set_current_time(cache_year) SessionConfiguration(new_instance=True, package_order=[], in_storage=AttributeCache()) logger.start_block( "Exporting table '%s' to year %s of cache located at %s..." % (table_name, cache_year, attribute_cache_directory)) try: ExportStorage().export_dataset( dataset_name=table_name, in_storage=input_storage, out_storage=output_storage, ) finally: logger.end_block()
def run(self, specification, coefficients, dataset, index=None, chunk_specification=None, data_objects=None, run_config=None, debuglevel=0): """ For info on the arguments see RegressionModel. """ outcome_attribute_short = self.outcome_attribute.get_alias() if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) if self.filter_attribute <> None: res = Resources({"debug": debuglevel}) index = dataset.get_filtered_index(self.filter_attribute, threshold=0, index=index, dataset_pool=self.dataset_pool, resources=res) current_year = SimulationState().get_current_time() current_month = int(re.search('\d+$', outcome_attribute_short).group()) # date in YYYYMM format, matching to the id_name field of weather dataset date = int("%d%02d" % (current_year, current_month)) date = array([date] * dataset.size()) if "date" in dataset.get_known_attribute_names(): dataset.set_values_of_one_attribute("date", date) else: dataset.add_primary_attribute(date, "date") water_demand = RegressionModel.run(self, specification, coefficients, dataset, index, chunk_specification, run_config=run_config, debuglevel=debuglevel) if (water_demand == None) or (water_demand.size <= 0): return water_demand if index == None: index = arange(dataset.size()) if re.search("^ln_", outcome_attribute_short): # if the outcome attr. name starts with 'ln_' the results will be exponentiated. outcome_attribute_name = outcome_attribute_short[ 3:len(outcome_attribute_short)] water_demand = exp(water_demand) else: outcome_attribute_name = outcome_attribute_short if outcome_attribute_name in dataset.get_known_attribute_names(): dataset.set_values_of_one_attribute(outcome_attribute_name, water_demand, index) else: results = zeros(dataset.size(), dtype=water_demand.dtype) results[index] = water_demand dataset.add_primary_attribute(results, outcome_attribute_name) return water_demand
def write_table(self, table_name, table_data, mode=Storage.OVERWRITE): year = SimulationState().get_current_time() storage = flt_storage( os.path.join(self.get_storage_location(), '%s' % year)) return storage.write_table(table_name, table_data, mode)