def __init__(self, config): ss = SimulationState(new_instance=True) ss.set_current_time(config['base_year']) ss.set_cache_directory(config['cache_directory']) SessionConfiguration(new_instance=True, package_order=config['dataset_pool_configuration'].package_order, in_storage=AttributeCache()) #if not os.path.exists(config['cache_directory']): ## if cache exists, it will automatically skip cacher = CreateBaseyearCache() cache_dir = cacher.run(config) if 'estimation_database_configuration' in config: db_server = DatabaseServer(config['estimation_database_configuration']) db = db_server.get_database(config['estimation_database_configuration'].database_name) out_storage = StorageFactory().get_storage( 'sql_storage', storage_location = db) else: output_cache = os.path.join(config['cache_directory'], str(config['base_year']+1)) out_storage = StorageFactory().get_storage('flt_storage', storage_location=output_cache) dataset_pool = SessionConfiguration().get_dataset_pool() households = dataset_pool.get_dataset("household") buildings = dataset_pool.get_dataset("building") zones = dataset_pool.get_dataset("zone") zone_ids = zones.get_id_attribute() capacity_attribute_name = "residential_units" #_of_use_id_%s" % id capacity_variable_name = "%s=sanfrancisco.zone.aggregate_%s_from_building" % \ (capacity_attribute_name, capacity_attribute_name) buildings.compute_variables("sanfrancisco.building.zone_id", dataset_pool=dataset_pool) zones.compute_variables(capacity_variable_name, dataset_pool=dataset_pool) building_zone_id = buildings.get_attribute('zone_id') # is_household_unplace = datasets['household'].get_attribute("building_id") <= 0 is_household_unplaced = 1 #all households are unplaced household_building_id = zeros(households.size(), dtype='int32')-1 #datasets['household'].get_attribute("building_id") for zone_id in zone_ids: capacity = zones.get_attribute_by_id(capacity_attribute_name, zone_id) is_household_in_this_zone = (households.get_attribute('zone_id') == zone_id) is_unplaced_household_in_this_zone = is_household_in_this_zone * is_household_unplaced is_building_in_this_zone = (building_zone_id == zone_id) # if not is_household_in_this_zone.sum() <= capacity: if capacity == 0 or is_household_in_this_zone.sum()==0: print "WARNING: zone %s has %s households but only %s units" % (zone_id, is_household_in_this_zone.sum(), capacity) continue prob = buildings.get_attribute(capacity_attribute_name) * is_building_in_this_zone / array(capacity, dtype=float64) r = random(sum(is_unplaced_household_in_this_zone)) prob_cumsum = ncumsum(prob) index_to_bldg = searchsorted(prob_cumsum, r) household_building_id[where(is_unplaced_household_in_this_zone)] = buildings.get_attribute_by_index('building_id', index_to_bldg) # import pdb;pdb.set_trace() households.set_values_of_one_attribute('building_id', household_building_id) households.write_dataset(out_table_name='households', out_storage=out_storage)
def run(self, config, year, *args, **kwargs): """This is the main entry point. It gets the appropriate configuration info from the travel_model_configuration part of this config, and then copies the specified UrbanSim data into files for travel mdel to read. """ cache_directory = config['cache_directory'] simulation_state = SimulationState() simulation_state.set_cache_directory(cache_directory) simulation_state.set_current_time(year) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration(new_instance=True, package_order=config['dataset_pool_configuration'].package_order, in_storage=attribute_cache).get_dataset_pool() #cache_storage = AttributeCache().get_flt_storage_for_year(year_for_base_year_cache) #datasets = DatasetFactory().create_datasets_from_flt(config.get('datasets_to_preload',{}), #"urbansim", #additional_arguments={'in_storage': attribute_cache}) zone_set = dataset_pool.get_dataset('travel_zone') self.prepare_for_run(config['travel_model_configuration'], year) self.create_travel_model_input_file(config=config, year=year, zone_set=zone_set, datasets=dataset_pool, *args, **kwargs)
def __init__(self, config): ss = SimulationState(new_instance=True) ss.set_current_time(config['base_year']) ss.set_cache_directory(config['cache_directory']) SessionConfiguration(new_instance=True, package_order=config['dataset_pool_configuration'].package_order, in_storage=AttributeCache()) #if not os.path.exists(config['cache_directory']): ## if cache exists, it will automatically skip cacher = CreateBaseyearCache() cache_dir = cacher.run(config) if 'estimation_database_configuration' in config: db_server = DatabaseServer(config['estimation_database_configuration']) db = db_server.get_database(config['estimation_database_configuration'].database_name) out_storage = StorageFactory().get_storage( 'sql_storage', storage_location = db) else: output_cache = os.path.join(config['cache_directory'], str(config['base_year']+1)) out_storage = StorageFactory().get_storage('flt_storage', storage_location=output_cache) dataset_pool = SessionConfiguration().get_dataset_pool() households = dataset_pool.get_dataset("household") buildings = dataset_pool.get_dataset("building") zones = dataset_pool.get_dataset("zone") zone_ids = zones.get_id_attribute() capacity_attribute_name = "residential_units" #_of_use_id_%s" % id capacity_variable_name = "%s=sanfrancisco.zone.aggregate_%s_from_building" % \ (capacity_attribute_name, capacity_attribute_name) buildings.compute_variables("sanfrancisco.building.zone_id", dataset_pool=dataset_pool) zones.compute_variables(capacity_variable_name, dataset_pool=dataset_pool) building_zone_id = buildings.get_attribute('zone_id') # is_household_unplace = datasets['household'].get_attribute("building_id") <= 0 is_household_unplaced = 1 #all households are unplaced household_building_id = zeros(households.size(), dtype='int32')-1 #datasets['household'].get_attribute("building_id") for zone_id in zone_ids: capacity = zones.get_attribute_by_id(capacity_attribute_name, zone_id) is_household_in_this_zone = (households.get_attribute('zone_id') == zone_id) is_unplaced_household_in_this_zone = is_household_in_this_zone * is_household_unplaced is_building_in_this_zone = (building_zone_id == zone_id) # if not is_household_in_this_zone.sum() <= capacity: if capacity == 0 or is_household_in_this_zone.sum()==0: print "WARNING: zone %s has %s households but only %s units" % (zone_id, is_household_in_this_zone.sum(), capacity) continue prob = buildings.get_attribute(capacity_attribute_name) * is_building_in_this_zone / array(capacity, dtype=float64) r = random(sum(is_unplaced_household_in_this_zone)) prob_cumsum = ncumsum(prob) index_to_bldg = searchsorted(prob_cumsum, r) household_building_id[where(is_unplaced_household_in_this_zone)] = buildings.get_attribute_by_index('building_id', index_to_bldg) # import pdb;pdb.set_trace() households.set_values_of_one_attribute('building_id', household_building_id) households.write_dataset(out_table_name='households', out_storage=out_storage)
def run(self, config, show_output = False): logger.log_status("Caching large SQL tables to: " + config['cache_directory']) self.show_output = show_output #import pydevd;pydevd.settrace() server_configuration = config['scenario_database_configuration'] scenario_database_manager = ScenarioDatabaseManager( server_configuration = server_configuration, base_scenario_database_name = server_configuration.database_name ) self.database_server = DatabaseServer(server_configuration) database_to_table_mapping = scenario_database_manager.get_database_to_table_mapping() self.tables_to_cache = config['creating_baseyear_cache_configuration'].tables_to_cache simulation_state = SimulationState() if 'low_memory_run' in config: simulation_state.set_low_memory_run(config['low_memory_run']) simulation_state.set_cache_directory(config['cache_directory']) simulation_state.set_current_time(config['base_year']) self.tables_cached = set() for database_name, tables in database_to_table_mapping.items(): self.cache_database_tables(config, database_name, tables) un_cached_tables = set(self.tables_to_cache) - self.tables_cached if un_cached_tables: logger.log_warning('The following requested tables were NOT cached:') for table_name in un_cached_tables: logger.log_warning('\t%s' % table_name)
def __init__(self, config): if 'estimation_database_configuration' in config: db_server = DatabaseServer(config['estimation_database_configuration']) db = db_server.get_database(config['estimation_database_configuration'].database_name) out_storage = StorageFactory().build_storage_for_dataset( type='sql_storage', storage_location=db) else: out_storage = StorageFactory().get_storage(type='flt_storage', storage_location=os.path.join(config['cache_directory'], str(config['base_year']+1))) simulation_state = SimulationState() simulation_state.set_cache_directory(config['cache_directory']) simulation_state.set_current_time(config['base_year']) attribute_cache = AttributeCache() SessionConfiguration(new_instance=True, package_order=config['dataset_pool_configuration'].package_order, in_storage=attribute_cache) if not os.path.exists(os.path.join(config['cache_directory'], str(config['base_year']))): #raise RuntimeError, "datasets uncached; run prepare_estimation_data.py first" CacheScenarioDatabase().run(config, unroll_gridcells=False) for dataset_name in config['datasets_to_preload']: SessionConfiguration().get_dataset_from_pool(dataset_name) households = SessionConfiguration().get_dataset_from_pool("household") household_ids = households.get_id_attribute() workers = households.get_attribute("workers") hh_ids = [] member_ids = [] is_worker = [] job_ids = [] for i in range(households.size()): if workers[i] > 0: hh_ids += [household_ids[i]] * workers[i] member_ids += range(1, workers[i]+1) is_worker += [1] * workers[i] job_ids += [-1] * workers[i] in_storage = StorageFactory().get_storage('dict_storage') persons_table_name = 'persons' in_storage.write_table( table_name=persons_table_name, table_data={ 'person_id':arange(len(hh_ids))+1, 'household_id':array(hh_ids), 'member_id':array(member_ids), 'is_worker':array(is_worker), 'job_id':array(job_ids), }, ) persons = PersonDataset(in_storage=in_storage, in_table_name=persons_table_name) persons.write_dataset(out_storage=out_storage, out_table_name=persons_table_name)
def run(self, year, cache_directory=None): """The class is initialized with the appropriate configuration info from the travel_model_configuration part of this config, and then copies the specified UrbanSim data into files for daysim to read. The variables/expressions to export are defined in the node travel_model_configuration/urbansim_to_tm_variable_mapping of the configuration file. """ if cache_directory is None: cache_directory = self.config['cache_directory'] simulation_state = SimulationState() simulation_state.set_cache_directory(cache_directory) simulation_state.set_current_time(year) attribute_cache = AttributeCache() sc = SessionConfiguration(new_instance=True, package_order=self.config['dataset_pool_configuration'].package_order, in_storage=attribute_cache) dataset_pool = sc.get_dataset_pool() tm_config = self.config['travel_model_configuration'] data_to_export = tm_config['urbansim_to_tm_variable_mapping'] table_names = data_to_export.keys() variable_names = {} datasets = {} filenames = {} in_table_names = {} for table_name in table_names: filter = data_to_export[table_name].get('__filter__', None) if filter is not None: del data_to_export[table_name]['__filter__'] out_table_name = data_to_export[table_name].get('__out_table_name__', None) if out_table_name is not None: del data_to_export[table_name]['__out_table_name__'] else: out_table_name = table_name variables_to_export = map(lambda alias: "%s = %s" % (alias, data_to_export[table_name][alias]), data_to_export[table_name].keys()) dataset_name = None for var in variables_to_export: var_name = VariableName(var) if dataset_name is None: dataset_name = var_name.get_dataset_name() ds = dataset_pool.get_dataset(dataset_name) datasets[dataset_name] = ds filenames[dataset_name] = out_table_name in_table_names[dataset_name] = table_name if dataset_name not in variable_names.keys(): variable_names[dataset_name] = [] variable_names[dataset_name].append(var_name.get_alias()) ds.compute_variables([var_name], dataset_pool=dataset_pool) if filter is not None: filter_idx = where(ds.compute_variables(["__filter__ = %s" % filter], dataset_pool=dataset_pool)>0)[0] ds = DatasetSubset(ds, index = filter_idx) datasets[dataset_name] = ds return self._call_input_file_writer(year, datasets, in_table_names, filenames, variable_names, dataset_pool)
def run(self, base_directory, urbansim_cache_directory, years): """ run the simulation base_directory: directory contains all years folder of lccm. urbansim_cache_directory: directory contains all years folder of urbansim cache. years: lists of year to run.""" model = LandCoverChangeModel(self.possible_lcts, submodel_string=self.lct_attribute, choice_attribute_name=self.lct_attribute, debuglevel=4) coefficients = Coefficients() storage = StorageFactory().get_storage('tab_storage', storage_location=os.path.join( self.package_path, 'data')) coefficients.load(in_storage=storage, in_table_name="land_cover_change_model_coefficients") specification = EquationSpecification(in_storage=storage) specification.load( in_table_name="land_cover_change_model_specification") specification.set_variable_prefix("biocomplexity.land_cover.") constants = Constants() simulation_state = SimulationState() simulation_state.set_cache_directory(urbansim_cache_directory) attribute_cache = AttributeCache() index = arange(100000) for year in years: simulation_state.set_current_time(year) #land_cover_path = os.path.join(base_directory, str(year)) land_cover_path = base_directory land_covers = LandCoverDataset( in_storage=StorageFactory().get_storage( 'flt_storage', storage_location=land_cover_path), out_storage=StorageFactory().get_storage( 'flt_storage', storage_location=land_cover_path), debuglevel=4) land_covers.subset_by_index(index) #land_covers.load_dataset() gridcells = GridcellDataset(in_storage=attribute_cache, debuglevel=4) agents_index = None model.run(specification, coefficients, land_covers, data_objects={ "gridcell": gridcells, "constants": constants, "flush_variables": True }, chunk_specification={'nchunks': 1}) land_covers.flush_dataset() del gridcells del land_covers
def test_doing_refinements_from_specified_refinement_dataset(self): self.prepare_cache() os.system("%(python)s %(script)s -c %(cache_directory)s -s %(start_year)s -e %(end_year)s --refinements-directory=%(refinement_directory)s" % {'python': sys.executable, 'script': self.script, 'cache_directory': self.cache_dir, 'start_year': 2021, 'end_year': 2022, 'refinement_directory': os.path.join(self.cache_dir, '2000')} ) simulation_state = SimulationState() ## test refinement for 2021 simulation_state.set_current_time(2021) jobs = self.dataset_pool.get_dataset('job') buildings = self.dataset_pool.get_dataset('building') jobs13_raz3 = jobs.compute_variables('numpy.logical_and(job.sector_id==13, job.disaggregate(parcel.raz_id==3, intermediates=[building]))', dataset_pool=self.dataset_pool) jobs13_raz4 = jobs.compute_variables('numpy.logical_and(job.sector_id==13, job.disaggregate(parcel.raz_id, intermediates=[building])==4)', dataset_pool=self.dataset_pool) jobs13_raz5 = jobs.compute_variables('numpy.logical_and(job.sector_id==13, job.disaggregate(parcel.raz_id, intermediates=[building])==5 )', dataset_pool=self.dataset_pool) jobs_raz5 = jobs.compute_variables('job.disaggregate(parcel.raz_id, intermediates=[building])==5', dataset_pool=self.dataset_pool) #check results self.assertEqual(jobs13_raz3.sum(), 0) self.assertEqual(jobs13_raz4.sum(), 0) self.assertEqual(jobs13_raz5.sum() >= 5, True) self.assertEqual(jobs_raz5.sum(), 7) expected_nr_sqft = array([6, 0, 3, 6, 1, 6, 5, 0]) ## was array([6, 2, 3, 6, 1, 2, 5, 0]), self.assert_(allclose(buildings.get_attribute('non_residential_sqft'), expected_nr_sqft)) self.dataset_pool.remove_all_datasets() ## test refinement for 2022 simulation_state.set_current_time(2022) hhs = self.dataset_pool.get_dataset('household') buildings = self.dataset_pool.get_dataset('building') hhs_raz6 = hhs.compute_variables('household.disaggregate(building.disaggregate(parcel.raz_id)==6)', dataset_pool=self.dataset_pool) hhs_bldg = buildings.compute_variables('building.number_of_agents(household)', dataset_pool=self.dataset_pool) #check results self.assertEqual(hhs_raz6.sum(), 7) self.assert_(hhs_bldg.sum(), 7 ) self.assert_((hhs_bldg!=0).sum(), 2) self.assert_(buildings.get_attribute('residential_units').sum(), 7) self.dataset_pool.remove_all_datasets()
def run(self, year, skim_directory=None): """ It gets the appropriate values from the travel_model_configuration part of this config, and then copies the specified data into the specified travel_data variable names. Results in a new travel_data cache for year+1. """ cache_directory = self.config['cache_directory'] simulation_state = SimulationState() simulation_state.set_current_time(year) simulation_state.set_cache_directory(cache_directory) year_config = self.config['travel_model_configuration'][year] self.write_travel_data(year, cache_directory)
def run(self, year): """This is the main entry point. The class is initialized with the appropriate configuration info from the travel_model_configuration part of this config, and then copies the specified UrbanSim data into files for emme/2 to read. If households and jobs do not have a primary attribute zone_id, the entry 'locations_to_disaggregate' in the travel_model_configuration should be a list of dataset names over which the zone_id will be dissaggregated, ordered from higher to lower aggregation level, e.g. ['parcel', 'building'] """ cache_directory = self.config['cache_directory'] simulation_state = SimulationState() simulation_state.set_cache_directory(cache_directory) simulation_state.set_current_time(year) attribute_cache = AttributeCache() sc = SessionConfiguration( new_instance=True, package_order=self.config['dataset_pool_configuration']. package_order, in_storage=attribute_cache) dataset_pool = sc.get_dataset_pool() hh_set = dataset_pool.get_dataset('household') zone_set = dataset_pool.get_dataset('zone') job_set = dataset_pool.get_dataset('job') locations_to_disaggregate = self.config['travel_model_configuration'][ 'locations_to_disaggregate'] len_locations_to_disaggregate = len(locations_to_disaggregate) if len_locations_to_disaggregate > 0: primary_location = locations_to_disaggregate[0] if len_locations_to_disaggregate > 1: intermediates_string = ", intermediates=[" for i in range(1, len_locations_to_disaggregate): intermediates_string = "%s%s, " % ( intermediates_string, locations_to_disaggregate[i]) intermediates_string = "%s]" % intermediates_string else: intermediates_string = "" hh_set.compute_variables([ '%s = household.disaggregate(%s.%s %s)' % (zone_set.get_id_name()[0], primary_location, zone_set.get_id_name()[0], intermediates_string) ], dataset_pool=dataset_pool) job_set.compute_variables([ '%s = job.disaggregate(%s.%s %s)' % (zone_set.get_id_name()[0], primary_location, zone_set.get_id_name()[0], intermediates_string) ], dataset_pool=dataset_pool) return self._call_input_file_writer(year, dataset_pool)
def target_func(self, est_v, func=lambda x, y: np.sum(np.abs(x - y)), **kwargs): ''' Target function.''' simulation_state = SimulationState() simulation_state.set_current_time(self.base_year) simulation_state.set_cache_directory(self.cache_directory) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration( new_instance=True, package_order=self.package_order, in_storage=attribute_cache).get_dataset_pool() calib_datasets = {} for dataset_name, calib_attr in calib_datasets.iteritems(): dataset = dataset_pool.get_dataset( dataset_name, dataset_arguments={'id_name': []}) assert subset is None or subset.get(dataset_name, None) is None or \ subset_patterns is None or subset_patterns.get(dataset_name, None) is None if subset is not None and subset.get(dataset_name, None) is not None: subset_attr, subset_cond = subset.get(dataset_name) index = np.in1d(dataset[subset_attr], subset_cond) elif subset_patterns is not None and subset_patterns.get( dataset_name, None) is not None: subset_attr, subset_pattern = subset_patterns.get(dataset_name) index = array([ True if re.search(subset_pattern, attr_v) else False for attr_v in dataset[subset_attr] ]) else: index = arange(dataset.size(), dtype='i') calib_datasets[dataset_name] = [dataset, calib_attr, index] prediction = self.update_prediction(est_v, simulation_state, dataset_pool, calib_datasets, **kwargs) ## allow keys in target not appearing in prediction ## assuming their values to be 0 ### every key in target should appear in prediction #assert np.all( np.in1d(self.target.keys(), prediction.keys()) ) target = np.array(self.target.values()) predct = np.array([prediction[k] if prediction.has_key(k) else 0 \ for k in self.target.keys() ]) results = func(predct, target) return results
def setup_environment(cache_directory, year, package_order, additional_datasets={}): gc.collect() ss = SimulationState(new_instance=True) ss.set_cache_directory(cache_directory) ss.set_current_time(year) ac = AttributeCache() storage = ac.get_flt_storage_for_year(year) sc = SessionConfiguration(new_instance=True, package_order=package_order, in_storage=ac) logger.log_status("Setup environment for year %s. Use cache directory %s." % (year, storage.get_storage_location())) dp = sc.get_dataset_pool() for name, ds in additional_datasets.iteritems(): dp.replace_dataset(name, ds) return dp
def run(self, year): """Like its parent, but report files have different format and there are no banks. Zones are assumed to have no gaps. """ cache_directory = self.config['cache_directory'] simulation_state = SimulationState() simulation_state.set_current_time(year) simulation_state.set_cache_directory(cache_directory) year_config = self.config['travel_model_configuration'][year] bank_path = os.path.sep.join([self.get_emme2_base_dir()] + self.config['travel_model_configuration'][year]['bank']) for path, variable_dict in year_config['matrix_variable_map'].iteritems(): path_name = os.path.sep.join([bank_path] + path.split('.')) self.get_needed_matrices_from_emme4(year, year_config['cache_directory'], path_name, variable_dict)
def run(self, year): """This is the main entry point. The class is initialized with the appropriate configuration info from the travel_model_configuration part of this config, and then copies the specified UrbanSim data into files for emme/2 to read. If households and jobs do not have a primary attribute zone_id, the entry 'locations_to_disaggregate' in the travel_model_configuration should be a list of dataset names over which the zone_id will be dissaggregated, ordered from higher to lower aggregation level, e.g. ['parcel', 'building'] """ cache_directory = self.config['cache_directory'] simulation_state = SimulationState() simulation_state.set_cache_directory(cache_directory) simulation_state.set_current_time(year) attribute_cache = AttributeCache() sc = SessionConfiguration(new_instance=True, package_order=self.config['dataset_pool_configuration'].package_order, in_storage=attribute_cache) dataset_pool = sc.get_dataset_pool() hh_set = dataset_pool.get_dataset('household') zone_set = dataset_pool.get_dataset('zone') job_set = dataset_pool.get_dataset('job') locations_to_disaggregate = self.config['travel_model_configuration']['locations_to_disaggregate'] len_locations_to_disaggregate = len(locations_to_disaggregate) if len_locations_to_disaggregate > 0: primary_location = locations_to_disaggregate[0] if len_locations_to_disaggregate > 1: intermediates_string = ", intermediates=[" for i in range(1, len_locations_to_disaggregate): intermediates_string = "%s%s, " % (intermediates_string, locations_to_disaggregate[i]) intermediates_string = "%s]" % intermediates_string else: intermediates_string = "" hh_set.compute_variables(['%s = household.disaggregate(%s.%s %s)' % (zone_set.get_id_name()[0], primary_location, zone_set.get_id_name()[0], intermediates_string)], dataset_pool=dataset_pool) job_set.compute_variables(['%s = job.disaggregate(%s.%s %s)' % (zone_set.get_id_name()[0], primary_location, zone_set.get_id_name()[0], intermediates_string)], dataset_pool=dataset_pool) return self._call_input_file_writer(year, dataset_pool)
def run(self, year): """Like its parent, but skims are stored locally in matrix_directory in hdf5 format. It is one file per year, called xxxx-travelmodel.h5, where xxxx is the year. Each file has one group per bank, e.g. Bank1, which contains the matrices. Zones are assumed to have no gaps. """ cache_directory = self.config['cache_directory'] simulation_state = SimulationState() simulation_state.set_current_time(year) simulation_state.set_cache_directory(cache_directory) year_config = self.config['travel_model_configuration'][year] bank_path = os.path.sep.join([self.get_emme2_base_dir()] + self.config['travel_model_configuration'][year]['bank']) #bank_file = os.path.join(matrix_directory, "%s-travelmodel.h5" % bank_year) for path, variable_dict in year_config['matrix_variable_map'].iteritems(): path_name = os.path.sep.join([bank_path] + path.split('.')) self.get_needed_matrices_from_emme4(year, year_config['cache_directory'], path_name, variable_dict)
def setup_environment(cache_directory, year, package_order, additional_datasets={}): gc.collect() ss = SimulationState(new_instance=True) ss.set_cache_directory(cache_directory) ss.set_current_time(year) ac = AttributeCache() storage = ac.get_flt_storage_for_year(year) sc = SessionConfiguration(new_instance=True, package_order=package_order, in_storage=ac) logger.log_status( "Setup environment for year %s. Use cache directory %s." % (year, storage.get_storage_location())) dp = sc.get_dataset_pool() for name, ds in additional_datasets.iteritems(): dp.replace_dataset(name, ds) return dp
def target_func(self, est_v, func=lambda x, y: np.sum(np.abs(x - y)), **kwargs): """ Target function.""" simulation_state = SimulationState() simulation_state.set_current_time(self.base_year) simulation_state.set_cache_directory(self.cache_directory) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration( new_instance=True, package_order=self.package_order, in_storage=attribute_cache ).get_dataset_pool() calib_datasets = {} for dataset_name, calib_attr in calib_datasets.iteritems(): dataset = dataset_pool.get_dataset(dataset_name, dataset_arguments={"id_name": []}) assert ( subset is None or subset.get(dataset_name, None) is None or subset_patterns is None or subset_patterns.get(dataset_name, None) is None ) if subset is not None and subset.get(dataset_name, None) is not None: subset_attr, subset_cond = subset.get(dataset_name) index = np.in1d(dataset[subset_attr], subset_cond) elif subset_patterns is not None and subset_patterns.get(dataset_name, None) is not None: subset_attr, subset_pattern = subset_patterns.get(dataset_name) index = array([True if re.search(subset_pattern, attr_v) else False for attr_v in dataset[subset_attr]]) else: index = arange(dataset.size(), dtype="i") calib_datasets[dataset_name] = [dataset, calib_attr, index] prediction = self.update_prediction(est_v, simulation_state, dataset_pool, calib_datasets, **kwargs) ## allow keys in target not appearing in prediction ## assuming their values to be 0 ### every key in target should appear in prediction # assert np.all( np.in1d(self.target.keys(), prediction.keys()) ) target = np.array(self.target.values()) predct = np.array([prediction[k] if prediction.has_key(k) else 0 for k in self.target.keys()]) results = func(predct, target) return results
def run(self, year, matrix_directory=None): """This is the main entry point. It gets the appropriate values from the travel_model_configuration part of this config, and then copies the specified emme/2 matrices into the specified travel_data variable names. Results in a new travel_data cache for year+1. If matrix_directory is not None, it is assumed the matrices files are already created in the given directory. """ cache_directory = self.config['cache_directory'] simulation_state = SimulationState() simulation_state.set_current_time(year) simulation_state.set_cache_directory(cache_directory) year_config = self.config['travel_model_configuration'][year] matrices_created = False if matrix_directory is not None: matrices_created = True reports = self.config['travel_model_configuration'].get( 'reports_to_copy', []) for x in 1, 2, 3: if matrix_directory is None: bank_dir = self.get_emme2_dir(year, "bank%i" % x) else: bank_dir = os.path.join(matrix_directory, "bank%i" % x) if "bank%i" % x in year_config['matrix_variable_map']: self.get_needed_matrices_from_emme2( year, year_config['cache_directory'], bank_dir, year_config['matrix_variable_map']["bank%i" % x], matrices_created) for report in reports: self.copy_report_to_cache(report, year, year_config['cache_directory'], bank_dir) if "bank%i" % x in year_config.get('node_matrix_variable_map', {}): node_variable_map = year_config['node_matrix_variable_map'][ "bank%i" % x] if len(node_variable_map.keys()) > 0: self.get_needed_node_matrices_from_emme2( year, year_config['cache_directory'], bank_dir, node_variable_map)
def import_travel_model_data(config, year): cache_directory = config['cache_directory'] simulation_state = SimulationState() simulation_state.set_current_time(year) simulation_state.set_cache_directory(cache_directory) out_store = AttributeCache().get_flt_storage_for_year(year+1) out_store_loc = out_store.get_storage_location() tm_config = config['travel_model_configuration'] data_to_import = tm_config['tm_to_urbansim_variable_mapping'] base_dir = mtc_common.tm_get_base_dir(config) data_dir = tm_config[year]['data_dir'] for dataset_name, skim_file in data_to_import.iteritems(): skim_file = os.path.join(base_dir, data_dir, skim_file) data = read_csv(skim_file, header=0) with block("Caching {} to {}".format(dataset_name, out_store_loc)): logger.log_status("Source file {}".format(skim_file)) opus_ds = to_opus_dataset(data, out_store, dataset_name)
def test_doing_other_refinements(self): self.prepare_cache() os.system("%(python)s %(script)s -c %(cache_directory)s -s %(start_year)s -e %(end_year)s" % {'python': sys.executable, 'script': self.script, 'cache_directory':self.cache_dir, 'start_year': 2023, 'end_year': 2027 } ) simulation_state = SimulationState() ## test refinement for 2023 simulation_state.set_current_time(2023) hhs = self.dataset_pool.get_dataset('household') hhs_p5 = hhs.compute_variables('household.persons>5') #check results self.assert_(hhs.size(), 2) self.assertEqual(hhs_p5.sum(), 0) self.dataset_pool.remove_all_datasets() ## test refinement for 2027 simulation_state.set_current_time(2027) hhs = self.dataset_pool.get_dataset('household') buildings = self.dataset_pool.get_dataset('building') persons = self.dataset_pool.get_dataset('person') hhs_raz6 = hhs.compute_variables('household.disaggregate(building.disaggregate(parcel.raz_id)==6)', dataset_pool=self.dataset_pool) hhs_bldg = buildings.compute_variables('building.number_of_agents(household)', dataset_pool=self.dataset_pool) #check results self.assertEqual(hhs_raz6.sum(), 3) self.assert_(hhs_bldg.sum(), 3 ) self.assert_((hhs_bldg!=0).sum(), 2) self.assert_(allclose(persons.get_attribute('job_id'), array([-1, -1, -1, -1, 3, 4, 7])))
def run(self, year): """ Copy skims stored in hdf5 format into the UrbanSim cache. Should run after psrc_parcel.emme.models.run_export_skims which creates the skims hdf5 file. It creates a travel_model dataset with each skim being an attribute of it. Zones are assumed to have no gaps. Arguments: year -- year of the urbansim run. Used to extract the TM year from the bank configuration. Configuration entries (in travel_model_configuration) used: matrix_variable_map -- dictionary of bank names and corresponding skim names. Bank names are the path where (back-)slashes are replaced by dots, e.g. skims.auto.am. A value for each of such bank name is a dictionary with keys being skim names and values being the desired urbansim attribute name. E.g. {'skims.nonmotorized.am': {'abketm': 'am_bike_to_work_travel_time', 'awlktm': 'am_walk_time_in_minutes' } } matrix_h5_directory -- path to the hdf5 file called xxxx-travelmodel.h5 where xxxx is replaced by the TM year (default is the Emme base directory), which contains the skims as n x n matrices. """ cache_directory = self.config['cache_directory'] simulation_state = SimulationState() simulation_state.set_current_time(year) simulation_state.set_cache_directory(cache_directory) tmconfig = self.config['travel_model_configuration'] year_config = tmconfig[year] matrix_directory = tmconfig.get('matrix_h5_directory', self.get_emme2_base_dir()) bank_year = tmconfig[year]['bank'][0] bank_file = os.path.join(matrix_directory, "%s-travelmodel.h5" % bank_year) for path, variable_dict in year_config['matrix_variable_map'].iteritems(): self.get_needed_matrices_from_emme4(year, year_config['cache_directory'], path, variable_dict, bank_file=bank_file)
def run(self, config, year, *args, **kwargs): """This is the main entry point. It gets the appropriate values from the travel_model_configuration part of this config, and then copies the specified data into the specified travel_data variable names. Results in a new travel_data cache for year+1. """ cache_directory = config['cache_directory'] simulation_state = SimulationState() simulation_state.set_current_time(year) simulation_state.set_cache_directory(cache_directory) logger.start_block('Getting data from travel model') next_year = year + 1 flt_dir_for_next_year = os.path.join(cache_directory, str(next_year)) if not os.path.exists(flt_dir_for_next_year): os.mkdir(flt_dir_for_next_year) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration(new_instance=True, package_order=config['dataset_pool_configuration'].package_order, in_storage=attribute_cache).get_dataset_pool() zone_set = dataset_pool.get_dataset('zone') # zone_set = ZoneDataset(in_storage_location=flt_dir_for_this_year, # in_storage_type='flt_storage', # in_table_name='zones') zone_set.load_dataset() self.prepare_for_run(config['travel_model_configuration'], year) travel_data_set = self.get_travel_data_from_travel_model(config, year, zone_set, *args, **kwargs) logger.end_block() logger.start_block('Writing travel data to cache') out_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_dir_for_next_year) #out_storage = flt_storage(Resources(data={"storage_location":flt_dir_for_next_year})) travel_data_set.write_dataset(attributes=travel_data_set.get_known_attribute_names(), out_storage=out_storage, out_table_name='travel_data') logger.end_block()
def run(self, base_directory, urbansim_cache_directory, years): """ run the simulation base_directory: directory contains all years folder of lccm. urbansim_cache_directory: directory contains all years folder of urbansim cache. years: lists of year to run.""" model = LandCoverChangeModel(self.possible_lcts, submodel_string=self.lct_attribute, choice_attribute_name= self.lct_attribute, debuglevel=4) coefficients = Coefficients() storage = StorageFactory().get_storage('tab_storage', storage_location=os.path.join(self.package_path, 'data')) coefficients.load(in_storage=storage, in_table_name="land_cover_change_model_coefficients") specification = EquationSpecification(in_storage=storage) specification.load(in_table_name="land_cover_change_model_specification") specification.set_variable_prefix("biocomplexity.land_cover.") constants = Constants() simulation_state = SimulationState() simulation_state.set_cache_directory(urbansim_cache_directory) attribute_cache = AttributeCache() index = arange(100000) for year in years: simulation_state.set_current_time(year) #land_cover_path = os.path.join(base_directory, str(year)) land_cover_path = base_directory land_covers = LandCoverDataset(in_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path), out_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path), debuglevel=4) land_covers.subset_by_index(index) #land_covers.load_dataset() gridcells = GridcellDataset(in_storage=attribute_cache, debuglevel=4) agents_index = None model.run(specification, coefficients, land_covers, data_objects={"gridcell":gridcells, "constants":constants, "flush_variables":True}, chunk_specification = {'nchunks':1} ) land_covers.flush_dataset() del gridcells del land_covers
def run(self, config, year, *args, **kwargs): """This is the main entry point. It gets the appropriate configuration info from the travel_model_configuration part of this config, and then copies the specified UrbanSim data into files for travel mdel to read. """ cache_directory = config['cache_directory'] simulation_state = SimulationState() simulation_state.set_cache_directory(cache_directory) simulation_state.set_current_time(year) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration( new_instance=True, package_order=config['dataset_pool_configuration'].package_order, in_storage=attribute_cache).get_dataset_pool() #cache_storage = AttributeCache().get_flt_storage_for_year(year_for_base_year_cache) #datasets = DatasetFactory().create_datasets_from_flt(config.get('datasets_to_preload',{}), #"urbansim", #additional_arguments={'in_storage': attribute_cache}) zone_set = dataset_pool.get_dataset('zone') self.prepare_for_run(config['travel_model_configuration'], year) self.create_travel_model_input_file(config, year, zone_set, dataset_pool, *args, **kwargs)
def run(self, config, show_output=False): logger.log_status("Caching large SQL tables to: " + config['cache_directory']) self.show_output = show_output #import pydevd;pydevd.settrace() server_configuration = config['scenario_database_configuration'] scenario_database_manager = ScenarioDatabaseManager( server_configuration=server_configuration, base_scenario_database_name=server_configuration.database_name) self.database_server = DatabaseServer(server_configuration) database_to_table_mapping = scenario_database_manager.get_database_to_table_mapping( ) self.tables_to_cache = config[ 'creating_baseyear_cache_configuration'].tables_to_cache simulation_state = SimulationState() if 'low_memory_run' in config: simulation_state.set_low_memory_run(config['low_memory_run']) simulation_state.set_cache_directory(config['cache_directory']) simulation_state.set_current_time(config['base_year']) self.tables_cached = set() for database_name, tables in database_to_table_mapping.items(): self.cache_database_tables(config, database_name, tables) un_cached_tables = set(self.tables_to_cache) - self.tables_cached if un_cached_tables: logger.log_warning( 'The following requested tables were NOT cached:') for table_name in un_cached_tables: logger.log_warning('\t%s' % table_name)
def run(self, table_names, out_storage=None, table_name_pattern=None, cache_directory=None, year=None, **kwargs): """ export specified tables to database table_name_pattern: For example '{table_name}_{scenario_name}_{year}' """ if not hasattr(self, "out_storage"): if out_storage is None: raise ValueError, "Either out_storage argument needs to be specified or " + "prepare_for_run called before run method to create a valid out_storage." else: self.out_storage = out_storage sim_state = SimulationState() if sim_state.get_current_time() == 0: sim_state.set_current_time(9999) if cache_directory is None: cache_directory = sim_state.get_cache_directory() attr_cache = AttributeCache(cache_directory=cache_directory) if year is None: years = attr_cache._get_sorted_list_of_years() else: assert isinstance(year, int) years = [year] for table_name in table_names: kwargs["table_name"] = table_name for year in years: kwargs["year"] = year out_table_name = table_name_pattern.format(**kwargs) in_storage = attr_cache.get_flt_storage_for_year(year) # cache_path = os.path.join(cache_directory, str(year)) # in_storage = flt_storage(storage_location=cache_path) # TODO drop_table(table_name) if table_name exists ExportStorage().export_dataset( table_name, in_storage=in_storage, out_storage=self.out_storage, out_dataset_name=out_table_name ) self.post_run(kwargs["scenario_name"], years)
def run(self, year, matrix_directory=None): """This is the main entry point. It gets the appropriate values from the travel_model_configuration part of this config, and then copies the specified emme/2 matrices into the specified travel_data variable names. Results in a new travel_data cache for year+1. If matrix_directory is not None, it is assumed the matrices files are already created in the given directory. """ cache_directory = self.config['cache_directory'] simulation_state = SimulationState() simulation_state.set_current_time(year) simulation_state.set_cache_directory(cache_directory) year_config = self.config['travel_model_configuration'][year] matrices_created = False if matrix_directory is not None: matrices_created = True reports = self.config['travel_model_configuration'].get('reports_to_copy', []) for x in 1,2,3: if matrix_directory is None: bank_dir = self.get_emme2_dir(year, "bank%i" % x) else: bank_dir = os.path.join(matrix_directory, "bank%i" % x) if "bank%i" % x in year_config['matrix_variable_map']: self.get_needed_matrices_from_emme2(year, year_config['cache_directory'], bank_dir, year_config['matrix_variable_map']["bank%i" % x], matrices_created) for report in reports: self.copy_report_to_cache(report, year, year_config['cache_directory'], bank_dir) if "bank%i" % x in year_config.get('node_matrix_variable_map', {}): node_variable_map = year_config['node_matrix_variable_map']["bank%i" % x] if len(node_variable_map.keys()) > 0: self.get_needed_node_matrices_from_emme2(year, year_config['cache_directory'], bank_dir, node_variable_map)
class TestLagVariables(opus_unittest.OpusTestCase): def setUp(self): self.config = TestCacheConfiguration() self.simulation_state = SimulationState(new_instance=True) SessionConfiguration(self.config, new_instance=True, package_order=['urbansim', 'opus_core'], in_storage=AttributeCache()) self.base_year = self.config['base_year'] creating_baseyear_cache_configuration = self.config[ 'creating_baseyear_cache_configuration'] self.simulation_state.set_current_time(self.base_year) cache_directory = self.simulation_state.get_cache_directory() copytree( os.path.join( creating_baseyear_cache_configuration.baseyear_cache. existing_cache_to_copy, str(self.base_year)), os.path.join(cache_directory, str(self.base_year))) cacher = CacheScenarioDatabase() cacher.prepare_data_before_baseyear( cache_directory, self.base_year, creating_baseyear_cache_configuration) self.config['cache_directory'] = cache_directory cache_storage = AttributeCache().get_flt_storage_for_year( self.base_year) cache_directory = self.simulation_state.get_cache_directory() flt_directory = os.path.join(cache_directory, str(self.base_year)) self.gridcell = DatasetFactory().get_dataset( 'gridcell', package='urbansim', subdir='datasets', arguments={ 'in_storage': StorageFactory().get_storage('flt_storage', storage_location=flt_directory) }) def tearDown(self): self.simulation_state.remove_singleton(delete_cache=True) def test_lag_variables(self): """Test lag variables""" # A weak test that computing a lag variable on a realistic dataset does not crash. self.gridcell.compute_variables( 'urbansim.gridcell.n_recent_transitions_to_developed', resources=self.config) # The following tests are fragile, since they need to know exactly what values are being # subtracted, and ignore any negative amount that is truncated at zero. # If you change the "subset" dataset to a different region, you will # have to update the expected value. self.gridcell.compute_variables('urbansim.gridcell.commercial_sqft', resources=self.config) self.gridcell.compute_variables( 'urbansim.gridcell.commercial_sqft_lag1', resources=self.config) self.gridcell.compute_variables( 'urbansim.gridcell.commercial_sqft_lag2', resources=self.config) sqft = self.gridcell.get_attribute('commercial_sqft').sum() sqft_lag1 = self.gridcell.get_attribute('commercial_sqft_lag1').sum() sqft_lag2 = self.gridcell.get_attribute('commercial_sqft_lag2').sum() logger.log_status('sqft = %s' % sqft) logger.log_status('sqft_lag1 = %s' % sqft_lag1) logger.log_status('sqft_lag2 = %s' % sqft_lag2) logger.log_status('base_year = %s' % self.base_year) self.assertEqual(self.base_year, SimulationState().get_current_time()) self.assertEqual(sqft, sqft_lag1) self.assertEqual(578 + 2083 + 1103 + 87, sqft_lag1 - sqft_lag2) # Do lag variables produce different results for derived attributes? self.gridcell.compute_variables( 'urbansim.gridcell.n_recent_development_projects', resources=self.config) self.gridcell.compute_variables( 'urbansim.gridcell.n_recent_development_projects_lag1', resources=self.config) n_recent_projects = self.gridcell.get_attribute( 'n_recent_development_projects').sum() n_recent_projects_lag1 = self.gridcell.get_attribute( 'n_recent_development_projects_lag1').sum() self.assertEqual(n_recent_projects, 11) self.assertEqual(n_recent_projects_lag1, 15) # Do lag_variables produce different results for derived attributes without lags? self.gridcell.compute_variables('urbansim.gridcell.ln_commercial_sqft', resources=self.config) self.gridcell.compute_variables( 'urbansim.gridcell.ln_commercial_sqft_lag4', resources=self.config) sqft = self.gridcell.get_attribute('ln_commercial_sqft').sum() sqft_lag4 = self.gridcell.get_attribute( 'ln_commercial_sqft_lag4').sum() self.assertNotEqual(sqft, sqft_lag4)
def run(self, optimizer='lbfgsb', results_pickle_prefix="calib", optimizer_kwargs={}): ''' Call specifized optimizer to calibrate Arguments: - optimizer: optimization method chosen (fmin_bfgs, simulated anneal etc.) - results_pickle_prefix: prefix of the pickle file name that will be saved after the simulation; if None, results is not saved Returns: - the results from the opimizater - a pickle dump of the results in the cache_directory, if results_pickle_prefix is specified ''' simulation_state = SimulationState() simulation_state.set_current_time(self.base_year) simulation_state.set_cache_directory(self.cache_directory) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration( new_instance=True, package_order=self.package_order, in_storage=attribute_cache).get_dataset_pool() calib_datasets = {} for dataset_name, calib_attr in calib_datasets.iteritems(): dataset = dataset_pool.get_dataset( dataset_name, dataset_arguments={'id_name': []}) assert subset is None or subset.get(dataset_name, None) is None or \ subset_patterns is None or subset_patterns.get(dataset_name, None) is None if subset is not None and subset.get(dataset_name, None) is not None: subset_attr, subset_cond = subset.get(dataset_name) index = np.in1d(dataset[subset_attr], subset_cond) elif subset_patterns is not None and subset_patterns.get( dataset_name, None) is not None: subset_attr, subset_pattern = subset_patterns.get(dataset_name) index = array([ True if re.search(subset_pattern, attr_v) else False for attr_v in dataset[subset_attr] ]) else: index = arange(dataset.size(), dtype='i') calib_datasets[dataset_name] = [dataset, calib_attr, index] init_v = array([], dtype='f8') for dataset_name, calib in calib_datasets.iteritems(): dataset, calib_attr, index = calib if type(calib_attr) == str: init_v = np.concatenate((init_v, dataset[calib_attr][index])) elif type(calib_attr) in (list, tuple): for attr in calib_attr: init_v = np.concatenate((init_v, dataset[attr][index])) else: raise TypeError, "Unrecongized data type in calib_datasets" t0 = time.time() if is_parallelizable == True: set_parallel(True) print OKBLUE + "\noptimizer = {} (is_parallel = {})".format( optimizer, is_parallelizable) + ENDC print OKBLUE + "-------------------------------------------------------\n" + ENDC if optimizer == 'bfgs': default_kwargs = { 'fprime': None, 'epsilon': 1e-08, 'maxiter': None, 'full_output': 1, 'disp': 1, 'retall': 0, 'callback': None } optimizer_func = fmin_bfgs elif optimizer == 'lbfgsb': default_kwargs = { 'fprime': None, 'approx_grad': True, 'bounds': None, 'factr': 1e12, 'iprint': 1 } optimizer_func = fmin_l_bfgs_b elif optimizer == 'anneal': default_kwargs = { 'schedule': 'fast', 'full_output': 1, 'T0': None, 'Tf': 1e-12, 'maxeval': None, 'maxaccept': None, 'maxiter': 400, 'boltzmann': 1.0, 'learn_rate': 0.5, 'feps': 1e-06, 'quench': 1.0, 'm': 1.0, 'n': 1.0, 'lower': -1, 'upper': 1, 'dwell': 50, 'disp': True } optimizer_func = anneal elif optimizer == 'panneal': default_kwargs = { 'schedule': 'fast', 'full_output': 1, 'T0': None, 'Tf': 1e-12, 'maxeval': None, 'maxaccept': None, 'maxiter': 400, 'boltzmann': 1.0, 'learn_rate': 0.5, 'feps': 1e-06, 'quench': 1.0, 'm': 1.0, 'n': 1.0, 'lower': -1, 'upper': 1, 'dwell': 50, 'disp': True, 'cores': 24, 'interv': 20 } optimizer_func = panneal else: raise ValueError, "Unrecognized optimizer {}".format(optimizer) default_kwargs.update(optimizer_kwargs) results = optimizer_func(self.target_func, copy(init_v), **default_kwargs) duration = time.time() - t0 if results_pickle_prefix is not None: pickle_file = "{}_{}.pickle".format(results_pickle_prefix, optimizer) pickle_file = os.path.join(self.log_directory, pickle_file) pickle.dump(results, open(pickle_file, "wb")) if is_parallelizable == True: set_parallel(False) logger.log_status('init target_func: {}'.format( self.target_func(init_v))) logger.log_status('end target_func: {}'.format( results[:])) #which one? logger.log_status('outputs from optimizer: {}'.format(results)) logger.log_status('Execution time: {}'.format(duration))
("neighborhood.ln_price","ln_price"), ("paris.household_x_neighborhood.age_lnprice","age_lnprice"), ("paris.household_x_neighborhood.lninc_lnprice","lninc_lnprice"), ("paris.neighborhood.delta_pop","delta_pop"), ("neighborhood.rail9","rail"), ("neighborhood.subway","subway"), ("neighborhood.disthwy","disthwy"), ("neighborhood.tc","tc"), ("neighborhood.vp","vp"), ("paris.household_x_neighborhood.hhfem_nbtc","hhfem_nbtc") ) } from my_estimation_config import my_configuration ss = SimulationState() ss.set_current_time(2000) ss.set_cache_directory(my_configuration['cache_directory']) attribute_cache = AttributeCache() sc = SessionConfiguration(new_instance=True, package_order=my_configuration['dataset_pool_configuration'].package_order, in_storage=attribute_cache) #settings = ParisSettings() #settings.prepare_session_configuration() estimator = HLCMEstimator(config=my_configuration, save_estimation_results=False) #estimator = HLCMEstimator(settings=my_configuration, # run_land_price_model_before_estimation=False,
"hhfrench_nbforeign"), ("neighborhood.ln_price", "ln_price"), ("paris.household_x_neighborhood.age_lnprice", "age_lnprice"), ("paris.household_x_neighborhood.lninc_lnprice", "lninc_lnprice"), ("paris.neighborhood.delta_pop", "delta_pop"), ("neighborhood.rail9", "rail"), ("neighborhood.subway", "subway"), ("neighborhood.disthwy", "disthwy"), ("neighborhood.tc", "tc"), ("neighborhood.vp", "vp"), ("paris.household_x_neighborhood.hhfem_nbtc", "hhfem_nbtc")) } from my_estimation_config import my_configuration ss = SimulationState() ss.set_current_time(2000) ss.set_cache_directory(my_configuration['cache_directory']) attribute_cache = AttributeCache() sc = SessionConfiguration( new_instance=True, package_order=my_configuration['dataset_pool_configuration']. package_order, in_storage=attribute_cache) #settings = ParisSettings() #settings.prepare_session_configuration() estimator = HLCMEstimator(config=my_configuration, save_estimation_results=False) #estimator = HLCMEstimator(settings=my_configuration,
def __init__(self, config): if 'estimation_database_configuration' in config: db_server = DatabaseServer( config['estimation_database_configuration']) db = db_server.get_database( config['estimation_database_configuration'].database_name) out_storage = StorageFactory().build_storage_for_dataset( type='sql_storage', storage_location=db) else: out_storage = StorageFactory().get_storage( type='flt_storage', storage_location=os.path.join(config['cache_directory'], str(config['base_year'] + 1))) simulation_state = SimulationState() simulation_state.set_cache_directory(config['cache_directory']) simulation_state.set_current_time(config['base_year']) attribute_cache = AttributeCache() SessionConfiguration( new_instance=True, package_order=config['dataset_pool_configuration'].package_order, in_storage=attribute_cache) if not os.path.exists( os.path.join(config['cache_directory'], str( config['base_year']))): #raise RuntimeError, "datasets uncached; run prepare_estimation_data.py first" CacheScenarioDatabase().run(config, unroll_gridcells=False) for dataset_name in config['datasets_to_preload']: SessionConfiguration().get_dataset_from_pool(dataset_name) households = SessionConfiguration().get_dataset_from_pool("household") household_ids = households.get_id_attribute() workers = households.get_attribute("workers") hh_ids = [] member_ids = [] is_worker = [] job_ids = [] for i in range(households.size()): if workers[i] > 0: hh_ids += [household_ids[i]] * workers[i] member_ids += range(1, workers[i] + 1) is_worker += [1] * workers[i] job_ids += [-1] * workers[i] in_storage = StorageFactory().get_storage('dict_storage') persons_table_name = 'persons' in_storage.write_table( table_name=persons_table_name, table_data={ 'person_id': arange(len(hh_ids)) + 1, 'household_id': array(hh_ids), 'member_id': array(member_ids), 'is_worker': array(is_worker), 'job_id': array(job_ids), }, ) persons = PersonDataset(in_storage=in_storage, in_table_name=persons_table_name) persons.write_dataset(out_storage=out_storage, out_table_name=persons_table_name)
class MoreDatasetTests(opus_unittest.OpusTestCase): def setUp(self): self.start_year = 2001 self.expected_sic_data = array([6,4,7,808,6]) self.job_id = array([1,2,3,4,5]) self.base_cache_dir = tempfile.mkdtemp(prefix='opus_tmp_test_dataset') self.simulation_state = SimulationState(low_memory_run=True, new_instance=True, base_cache_dir=self.base_cache_dir) self.dir = self.simulation_state.get_cache_directory() self.simulation_state.set_current_time(self.start_year) if not os.path.exists(self.dir): os.makedirs(self.dir) self.in_storage = StorageFactory().get_storage('dict_storage') self.in_storage.write_table( table_name='jobs', table_data={ 'grid_id':array([10,20,30,40,50]), 'job_id':self.job_id, }, ) self.out_storage = StorageFactory().get_storage('dict_storage') self.job_set_resources = ResourceFactory().get_resources_for_dataset( 'job', in_storage = self.in_storage, out_storage = self.out_storage, in_table_name_pair = ('jobs',None), out_table_name_pair = ('jobs_exported',None), attributes_pair = (None,AttributeType.PRIMARY), id_name_pair = ('job_id','job_id'), nchunks_pair = (1,1), debug_pair = (1,None) ) def tearDown(self): if os.path.exists(self.base_cache_dir): rmtree(self.base_cache_dir) def test_err_when_asking_for_attribute_that_is_not_in_cache(self): job_set = Dataset(self.job_set_resources, dataset_name="jobs") job_set.add_attribute(self.job_id, "job_id", metadata=AttributeType.PRIMARY) job_set.flush_dataset() job_set.get_attribute('job_id') self.assertRaises(NameError, job_set.get_attribute, 'attribute_that_does_not_exist') def test_compute_one_variable_when_asking_for_attribute_that_is_not_in_cache(self): job_set = Dataset(self.job_set_resources, dataset_name="jobs") job_set.add_attribute(self.job_id, "job_id", metadata=AttributeType.PRIMARY) job_set.flush_dataset() job_id_variable_name = VariableName('opus_core.jobs.attribute_that_does_not_exist') logger.enable_hidden_error_and_warning_words() try: self.assertRaises(StandardError, job_set._compute_one_variable, job_id_variable_name) finally: logger.enable_hidden_error_and_warning_words() def test_flush_dataset_correct_flags(self): job_set = Dataset(self.job_set_resources, dataset_name="jobs") self.assert_(not 'job_id' in job_set.attribute_boxes) job_set.get_attribute("job_id") self.assert_(job_set.attribute_boxes["job_id"].is_in_memory()) self.assert_(not job_set.attribute_boxes["job_id"].is_cached()) job_set.flush_dataset() self.assert_(not job_set.attribute_boxes["job_id"].is_in_memory()) self.assert_(job_set.attribute_boxes["job_id"].is_cached()) job_set.get_attribute("job_id") self.assert_(job_set.attribute_boxes["job_id"].is_in_memory()) self.assert_(job_set.attribute_boxes["job_id"].is_cached()) def test_flush_dataset_correct_data(self): job_set = Dataset(self.job_set_resources, dataset_name="jobs") job_set.add_attribute(self.job_id, "job_id", metadata=AttributeType.PRIMARY) job_set.add_attribute(self.expected_sic_data, "sic", metadata=AttributeType.COMPUTED) job_set.flush_dataset() returned_sic_data = job_set.get_attribute("sic") returned_id_data = job_set.get_attribute("job_id") self.assert_(ma.allequal(returned_id_data,self.job_id)) self.assert_(ma.allequal(returned_sic_data,self.expected_sic_data))
refinements = None refinements_storage = None if options.refinements_directory is not None: refinements_storage = StorageFactory().get_storage( 'flt_storage', storage_location=options.refinements_directory) refinements = DatasetFactory().search_for_dataset( 'refinement', package_order, arguments={'in_storage': refinements_storage}) years = refinements.get_attribute('year') if start_year is None: start_year = years.min() if end_year is None: end_year = years.max() simulation_state = SimulationState() simulation_state.set_cache_directory(options.cache_directory) simulation_state.set_current_time(start_year) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration( new_instance=True, package_order=package_order, in_storage=attribute_cache).get_dataset_pool() if refinements is None: refinements = dataset_pool.get_dataset('refinement') years = refinements.get_attribute('year') if start_year is None: start_year = years.min() if end_year is None: end_year = years.max() for year in range(start_year, end_year + 1): logger.start_block("Doing refinement for %s" % year) simulation_state.set_current_time(year)
def run(my, cache_dir=None, year=None): global parcel_set, z, node_set, submarket, esubmarket, isr, parcelfees, costdiscount ''' if 0: z = Zoning() p = Parcels() cPickle.dump((z,p),open('databaseinfo.jar','w')) else: print "Reading db info from jar..." z,p = cPickle.load(open(os.path.join(os.environ['OPUS_DATA'],'bay_area_parcel/databaseinfo.jar'))) ''' ## when developer_model is invoked alone from command line if cache_dir is not None and year is not None: #data_path = paths.get_opus_data_path_path() cache_dir = os.path.join(data_path, 'bay_area_parcel/runs/run_105.2012_05_03_09') #year = 2011 simulation_state = SimulationState() simulation_state.set_current_time(year) SimulationState().set_cache_directory(cache_dir) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration(new_instance=True, package_order=['bayarea', 'urbansim_parcel', 'urbansim', 'opus_core'], in_storage=attribute_cache ).get_dataset_pool() dataset_pool = SessionConfiguration().get_dataset_pool() current_year = SimulationState().get_current_time() cache_dir = SimulationState().get_cache_directory() parcel_set = dataset_pool.get_dataset('parcel') building_set = dataset_pool.get_dataset('building') household_set = dataset_pool.get_dataset('household') node_set = dataset_pool.get_dataset('node') unit_set = dataset_pool.get_dataset('residential_unit') submarket = dataset_pool.get_dataset('submarket') esubmarket = dataset_pool.get_dataset('employment_submarket') #print numpy.array(unit_set['rent'] > 0).size #for i in range(unit_set.size()): # print unit_set['unit_price'][i], unit_set['unit_sqft'][i] #transit_set = dataset_pool.get_dataset('transit_station') #print dataset_pool.datasets_in_pool() ''' from bayarea.node import transit_type_DDD_within_DDD_meters for i in range(7): print i v = transit_type_DDD_within_DDD_meters.transit_type_DDD_within_DDD_meters(i,500) d = v.compute(dataset_pool) print d.size found = d[numpy.nonzero(d)] print found.size sys.exit() ''' compute_devmdl_accvars(node_set) ###################### ### CAREFUL - THIS IS WHERE SCNERARIO SPECIFIC INFO GOES ###################### current_year = SimulationState().get_current_time() z = Zoning(my.scenario,current_year) isr = None if my.scenario.startswith('Transit'): isr = ISR() parcelfees = None if my.scenario.startswith('Preferred'): parcelfees = ParcelFees(dataset_pool.get_dataset('parcelfees_preferred')) #elif my.scenario.startswith('Transit'): # parcelfees = ParcelFees(dataset_pool.get_dataset('parcelfees_transit')) elif my.scenario.startswith('Equity'): parcelfees = ParcelFees(dataset_pool.get_dataset('parcelfees_equity')) elif my.scenario.startswith('Infill'): parcelfees = ParcelFees(dataset_pool.get_dataset('parcelfees_infill')) costdiscount = 0.0 if not my.scenario.startswith('No Project') and not my.scenario.startswith('Equity'): costdiscount = .01 ################################# ################################# from numpy import logical_not empty_parcels = parcel_set.compute_variables("(parcel.number_of_agents(building)==0)*(parcel.node_id>0)*(parcel.shape_area>80)") res_parcels = parcel_set.compute_variables("(parcel.number_of_agents(building)>0)*(parcel.node_id>0)*(parcel.shape_area>80)") bart_parcels = parcel_set.compute_variables("(parcel.disaggregate(bayarea.node.transit_type_1_within_800_meters))") caltrain_parcels = parcel_set.compute_variables("(parcel.disaggregate(bayarea.node.transit_type_2_within_800_meters))") #pda_parcels = parcel_set.compute_variables("(parcel.pda_id > -1)*(numpy.logical_not(parcel.county_id==38))") pda_parcels = parcel_set.compute_variables("(parcel.pda_id > -1)") SAMPLE_RATE = 0.01 from opus_core.sampling_toolbox import sample_noreplace from numpy import concatenate, where sampled_res_parcels_index = sample_noreplace(where(res_parcels)[0], int(SAMPLE_RATE * parcel_set.size())) test_parcels = concatenate((where(empty_parcels==1)[0], sampled_res_parcels_index,where(bart_parcels==1)[0],where(caltrain_parcels==1)[0],where(pda_parcels==1)[0])) test_parcels = sample_noreplace(test_parcels, int(.08 * 154877)) numpy.random.shuffle(test_parcels) """ sample = [] for i in range(parcel_set.size()): if empty_parcels[i] == 1: sample.append(i+1) elif res_parcels[i] == 1 and numpy.random.ranf() < SAMPLE_RATE: sample.append(i+1) test_parcels = array(sample) """ #empty_parcels = parcel_set.compute_variables("(parcel.node_id>0)*(parcel.shape_area>80)") #test_parcels = numpy.where(empty_parcels==1)[0] global building_sqft, building_price building_sqft = parcel_set.compute_variables('parcel.aggregate(building.building_sqft)') building_price_owner_residential=parcel_set.compute_variables('building_price_owner_res=parcel.aggregate((residential_unit.sale_price)*(residential_unit.sale_price>0),intermediates=[building])') building_price_rental_residential=parcel_set.compute_variables('building_price_rental_res=parcel.aggregate((residential_unit.rent*12*17.9)*(residential_unit.rent>0),intermediates=[building])') building_price_nonresidential = parcel_set.compute_variables('building_price_nonres = parcel.aggregate((building.non_residential_rent*7*building.non_residential_sqft))') sum_building_p = parcel_set.compute_variables('sum_building_price = parcel.building_price_owner_res + parcel.building_price_rental_res + building_price_nonres') ##sum_building_price = building_price_owner_residential + building_price_rental_residential + building_price_nonresidential vacant_parcel = parcel_set.compute_variables('parcel.sum_building_price == 0') price_per_sqft_land = (parcel_set.compute_variables('parcel.disaggregate(safe_array_divide(zone.aggregate(parcel.sum_building_price),zone.aggregate(building.building_sqft)))'))/4 parcel_land_area = parcel_set.compute_variables('parcel.shape_area') vacant_land_price = vacant_parcel*price_per_sqft_land*parcel_land_area building_price = sum_building_p + vacant_land_price ##sum_building_price = building_price_owner_residential + building_price_rental_residential + building_price_nonresidential #land_price = (sum_building_p==0) * (parcel_set.compute_variables('parcel.disaggregate(safe_array_divide(zone.aggregate(parcel.sum_building_price),zone.aggregate(building.building_sqft)))'))* parcel_set.compute_variables('parcel.land_area') #info used to match from proposal_component to submarket parcel_set.compute_variables(["bayarea.parcel.within_half_mile_transit", "bayarea.parcel.schooldistrict", "bayarea.parcel.jurisdiction_id", ]) #test_parcels = array([i+1 for i in range(parcel_set.size())]) #test_parcels = test_parcels[:10000] #test_parcels = test_parcels[:150] #test_parcels = numpy.where(parcel_set['parcel_id'] == 1608920)[0] #print test_parcels logger.log_status("%s parcels to test" % (test_parcels.size)) print "Num of parcels:", test_parcels.size import time HOTSHOT = 0 if MP: from multiprocessing import Pool, Queue pool = Pool(processes=4) import hotshot, hotshot.stats#, test.pystone if HOTSHOT: prof = hotshot.Profile('devmdl.prof') prof.start() outf = open(os.path.join(cache_dir,'buildings-%d.csv' % current_year),'w') outf.write('pid,county,dev_btype,stories,sqft,res_sqft,nonres_sqft,tenure,year_built,res_units,npv,actualfee,btype\n') debugf = open(os.path.join(cache_dir,'proforma-debug-%d.csv' % current_year),'w') bformdbg = 'county_id,far,height,max_dua,bform.sf_builtarea(),bform.sfunitsizes,bform.mf_builtarea(),bform.mfunitsizes,bform.num_units,bform.nonres_sqft,bform.buildable_area' otherdbg = 'isr,parcelfees,existing_sqft,existing_price,lotsize,unitsize,unitsize2,bform.sales_absorption,bform.rent_absorption,bform.leases_absorption,bform.sales_vacancy_rates,bform.vacancy_rates' debugf.write('pid,btype,npv,actualfee,pricesf,pricemf,rentsf,rentmf,rentof,rentret,rentind,%s,%s\n' % (bformdbg,otherdbg)) t1 = time.time() aggd = {} def chunks(l, n): for i in xrange(0, len(l), n): yield l[i:i+n] for test_chunk in chunks(test_parcels,1000): print "Executing CHUNK" sales_absorption = submarket.compute_variables('bayarea.submarket.sales_absorption') rent_absorption = submarket.compute_variables('bayarea.submarket.rent_absorption') vacancy_rates = submarket.compute_variables('bayarea.submarket.vacancy_rates') leases_absorption = esubmarket.compute_variables('bayarea.employment_submarket.leases_absorption') nr_vacancy_rates = esubmarket.compute_variables('bayarea.employment_submarket.vacancy_rates') if HOTSHOT: results = [] for p in test_chunk: r = process_parcel(p) if r <> None and r <> -1: results.append(list(r)) else: if MP: results = pool.map(process_parcel,test_chunk) else: results = [process_parcel(p) for p in test_chunk] results_bldg = [list(x[0]) for x in results if x <> None and x[0] <> -1] #each row of units represents number of units of [1, 2, 3, 4] bedrooms units = array([x[1][0] for x in results if x <> None and x[0] <> -1]) sqft_per_unit = array([x[1][1] for x in results if x <> None and x[0] <> -1]) for x in results: if x <> None: debugf.write(x[2]) results = results_bldg for result in results: #print result out_btype = devmdltypes[int(result[2])-1] outf.write(string.join([str(x) for x in result]+[str(out_btype)],sep=',')+'\n') ##TODO: id of buildings to be demolished buildings_to_demolish = [] idx_buildings_to_demolish = building_set.get_id_index(buildings_to_demolish) JAMM = JoinAttributeModificationModel() JAMM.run(household_set, building_set, index=idx_buildings_to_demolish, value=-1) building_set.remove_elements(idx_buildings_to_demolish) column_names = ["parcel_id","county","building_type_id","stories", "building_sqft","residential_sqft","non_residential_sqft", "tenure","year_built","residential_units"] buildings_data = copy.deepcopy(results) for i in range(len(buildings_data)): buildings_data[i][2] = devmdltypes[int(buildings_data[i][2])-1] buildings_data = array(buildings_data) new_buildings = {} available_bldg_id = building_set['building_id'].max() + 1 new_bldg_ids = arange(available_bldg_id, available_bldg_id+buildings_data.shape[0], dtype=building_set['building_id'].dtype) if buildings_data.size > 0: for icol, col_name in enumerate(column_names): if col_name in building_set.get_known_attribute_names(): ddtype = building_set[col_name].dtype new_buildings[col_name] = (buildings_data[:, icol]).astype(ddtype) else: #if the col_name is not in dataset, it will be discarded anyway pass new_buildings['building_id'] = new_bldg_ids # recode tenure: 1 - rent, 2 - own from 0 - own, 1 - rent new_buildings['tenure'][new_buildings['tenure']==0] = 2 ## pid is the index to parcel_set; convert them to actual parcel_id #new_buildings['parcel_id'] = parcel_set['parcel_id'][new_buildings['parcel_id']] building_set.add_elements(new_buildings, require_all_attributes=False, change_ids_if_not_unique=True) building_set.flush_dataset() assert new_bldg_ids.size == units.shape[0] == sqft_per_unit.shape[0] units_bldg_ids = repeat(new_bldg_ids, 4) bedrooms = array([1, 2, 3, 4] * units.size) units = round(units.ravel()) sqft_per_unit = sqft_per_unit.ravel() new_units = {'building_id': array([], dtype='i4'), 'bedrooms': array([], dtype='i4'), 'sqft_per_unit': array([], dtype='i4') } for i_unit, unit in enumerate(units): if unit <= 0: continue new_units['building_id'] = concatenate((new_units['building_id'], repeat(units_bldg_ids[i_unit], unit)) ) new_units['bedrooms'] = concatenate((new_units['bedrooms'], repeat(bedrooms[i_unit], unit)) ) new_units['sqft_per_unit'] = concatenate((new_units['sqft_per_unit'], repeat(sqft_per_unit[i_unit], unit)) ) ##force dtype conversion to the same dtype as unit_set for col_name in ['building_id', 'bedrooms', 'sqft_per_unit']: if col_name in unit_set.get_known_attribute_names(): new_units[col_name] = new_units[col_name].astype(unit_set[col_name].dtype) unit_set.add_elements(new_units, require_all_attributes=False, change_ids_if_not_unique=True) unit_set.flush_dataset() for result in results: units = result[-1] nonres_sqft = 1 #result[6]/1000.0 county = result[1] btype = result[2] key = (county,btype) aggd.setdefault(key,0) if btype < 7: aggd[key] += units else: aggd[key] += nonres_sqft aggd.setdefault(county,0) aggd[county] += units aggf = open('county_aggregations-%d.csv' % current_year,'w') county_names = {49:'son',41:'smt',1:'ala',43:'scl',28:'nap',38:'sfr',7:'cnc',48:'sol',21:'mar',0:'n/a'} btype_names = {1:'SF',2:'SFBUILD',3:'MF',4:'MXMF',5:'CONDO',6:'MXC',7:'OF',8:'MXO',9:'CHOOD',10:'CAUTO',11:'CBOX',12:'MANU',13:'WHE'} aggf.write('county,total,'+string.join(btype_names.values(),sep=',')+'\n') for county in [38,41,43,1,7,48,28,49,21]: aggf.write(county_names[county]+','+str(aggd.get(county,0))) for btype in btype_names.keys(): key = (county,btype) val = aggd.get(key,0) aggf.write(','+str(val)) aggf.write('\n') t2 = time.time() print "Finished in %f seconds" % (t2-t1) print "Ran optimization %d times" % devmdl_optimize.OBJCNT global NOZONINGCNT, NOBUILDTYPES print "Did not find zoning for parcel %d times" % NOZONINGCNT print "Did not find building types for parcel %d times" % NOBUILDTYPES print "DONE" my.post_run() #remove price_shifter & cost_shifter to avoid them being cached if HOTSHOT: prof.stop() prof.close() stats = hotshot.stats.load('devmdl.prof') stats.strip_dirs() stats.sort_stats('cumulative') stats.print_stats(20)
def run(self, config, year, storage_type='sql'): """ """ tm_config = config['travel_model_configuration'] database_server_config = tm_config.get("database_server_configuration", 'simtravel_database_server') database_name = tm_config.get("database_name", 'mag_zone_baseyear') cache_directory = config['cache_directory'] simulation_state = SimulationState() simulation_state.set_cache_directory(cache_directory) simulation_state.set_current_time(year) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration(new_instance=True, package_order=config['dataset_pool_configuration'].package_order, in_storage=attribute_cache).get_dataset_pool() if storage_type == 'sql': db_server = DatabaseServer(DatabaseConfiguration( database_name = database_name, database_configuration = database_server_config ) ) if not db_server.has_database(database_name): print "Db doesn't exist creating one" db_server.create_database(database_name) db = db_server.get_database(database_name) output_storage = sql_storage(storage_location = db) elif storage_type == 'csv': csv_directory = os.path.join(cache_directory, 'csv', str(year)) output_storage = csv_storage(storage_location=csv_directory) else: raise ValueError, "Unsupported output storage type {}".format(storage_type) logger.start_block('Compute and export data to openAMOS...') hh = dataset_pool.get_dataset('household') hh_recs = dataset_pool.get_dataset('households_recs') #hh_recs.add_attribute(0,"htaz1") #hh_recs.flush_dataset() #syn_hh = dataset_pool.get_dataset('synthetic_household') hh_variables = ['houseid=household.household_id', "hhsize=household.number_of_agents(person)", "one=(household.household_id>0).astype('i')", "inclt35k=(household.income<35000).astype('i')", "incge35k=(household.income>=35000).astype('i')", "incge50k=(household.income>=50000).astype('i')", "incge75k=(household.income>=75000).astype('i')", "incge100k=(household.income>=100000).astype('i')", "inc35t50=((household.income>=35000) & (household.income<50000)).astype('i')", "inc50t75=((household.income>=50000) & (household.income<75000)).astype('i')", "inc75t100=((household.income>=75000) & (household.income<100000)).astype('i')", 'htaz = ((houseid>0)*(household.disaggregate(building.zone_id)-100) + (houseid<=0)*0)', #'htaz = ((houseid>0) & (htaz1>100))*(htaz1-100)+((houseid>0) & (htaz1==-1))*1122', "withchild = (household.aggregate(person.age<18)>0).astype('i')", "noc = household.aggregate(person.age<18)", "numadlt = household.aggregate(person.age>=18)", "hinc=household.income", "wif=household.workers", #"wif=household.aggregate(mag_zone.person.is_employed)", 'numwrkr=household.workers', #'numwrkr=household.aggregate(mag_zone.person.is_employed)', 'nwrkcnt=household.number_of_agents(person) - household.workers', #'nwrkcnt=household.number_of_agents(person) - household.aggregate(mag_zone.person.is_employed)', 'yrbuilt=mag_zone.household.yrbuilt', 'mag_zone.household.sparent', 'mag_zone.household.rur', 'mag_zone.household.urb', 'zonetid = household.disaggregate(building.zone_id)', ] self.prepare_attributes(hh, hh_variables) attrs_to_export = hh_recs.get_known_attribute_names() self.write_dataset(hh, attrs_to_export, output_storage) dataset_pool._remove_dataset(hh.dataset_name) persons = dataset_pool.get_dataset('person') persons.out_table_name_default = 'persons' # Recoding invalid work and school locations to some random valid values persons_recs = dataset_pool.get_dataset('persons_recs') persons_recs.add_attribute(persons['person_id'],"personuniqueid") persons_recs.add_attribute(persons['marriage_status'],"marstat") persons_recs.add_attribute(persons['student_status'],"schstat") """ persons_recs.add_attribute(persons['wtaz0'],"htaz_act") persons_recs.add_attribute(0,"wtaz_rec") persons_recs.add_attribute(0,"wtaz_rec1") persons_recs.add_attribute(0,"wtaz_rec2") persons_recs.add_attribute(0,"wtaz1_1") persons_recs.add_attribute(0,"wtaz1_2") persons_recs.add_attribute(0,"wtaz1_3") #persons_recs.add_attribute(persons['student_status'],"schstat") """ persons_recs.add_attribute(0,"wtaz1") persons_recs.add_attribute(0,"htaz") persons_recs.add_attribute(0,"schtaz1") persons_recs.flush_dataset() #syn_persons = dataset_pool.get_dataset('synthetic_person') persons_variables = ['personid=mag_zone.person.member_id', 'personuniqueid=person.person_id', 'houseid=person.household_id', "one=(person.person_id>0).astype('i')", 'trvtime=mag_zone.person.travel_time_from_home_to_work', 'timetowk=mag_zone.person.travel_time_from_home_to_work', #'mag_zone.person.tmtowrk', #'tmtowrk=person.disaggregate(synthetic_person.tmtowrk)', "ag5t10=((person.age>=5) & (person.age<=10)).astype('i')", "ag11t14=((person.age>=11) & (person.age<=14)).astype('i')", "ag15t17=((person.age>=15) & (person.age<=17)).astype('i')", "ag18t24=((person.age>=18) & (person.age<=24)).astype('i')", "ag25t34=((person.age>=25) & (person.age<=34)).astype('i')", "ag35t44=((person.age>=35) & (person.age<=44)).astype('i')", "ag45t54=((person.age>=45) & (person.age<=54)).astype('i')", "ag55t64=((person.age>=55) & (person.age<=64)).astype('i')", "agge65=(person.age>=65).astype('i')", "ag12t17=((person.age>=12) & (person.age<=17)).astype('i')", "ag5t14=((person.age>=5) & (person.age<=14)).astype('i')", "agge15=(person.age>=15).astype('i')", "wrkr=(person.employment_status==1).astype('i')", "isemploy=(person.employment_status==1).astype('i')", "fulltim=(mag_zone.person.full_time==1).astype('i')", 'parttim=mag_zone.person.part_time', 'htaz = ((houseid>0)*(person.disaggregate(building.zone_id, intermediates=[household])-100) + (houseid<=0)*0)', 'wtaz1=(person.wtaz <= 0)*0 + (person.wtaz > 0)*(person.wtaz-100)', "presch = ((person.age < 5)&(houseid>0)).astype('i')", "mag_zone.person.schstat", 'schtaz1 = (person.schtaz <= 0)*0 + (person.schtaz > 0)*(person.schtaz-100)', 'marstat = person.marriage_status', 'enroll = person.student_status', 'grade = person.student_status & person.education', 'educ = person.education', "male = (person.sex==1).astype('i')", "female = (person.sex==2).astype('i')", "coled = (person.education >= 10).astype('i')", 'race1 = person.race', "white = (person.race == 1).astype('i')", 'person.hispanic' ] self.prepare_attributes(persons, persons_variables) attrs_to_export = persons_recs.get_known_attribute_names() self.write_dataset(persons, attrs_to_export, output_storage) dataset_pool._remove_dataset(persons.dataset_name) zones = dataset_pool.get_dataset('zone') zones_variables = [ "retail_employment=zone.aggregate(mag_zone.job.sector_group=='retail')", "public_employment=zone.aggregate(mag_zone.job.sector_group=='public')", "office_employment=zone.aggregate(mag_zone.job.sector_group=='office')", "industrial_employment=zone.aggregate(mag_zone.job.sector_group=='individual')", "other_employment=zone.aggregate(mag_zone.job.sector_group=='other')", "retail_employment_density=zone.aggregate(mag_zone.job.sector_group=='retail')/zone.acres", "public_employment_density=zone.aggregate(mag_zone.job.sector_group=='public')/zone.acres", "office_employment_density=zone.aggregate(mag_zone.job.sector_group=='office')/zone.acres", "industrial_employment_density=zone.aggregate(mag_zone.job.sector_group=='individual')/zone.acres", "other_employment_density=zone.aggregate(mag_zone.job.sector_group=='other')/zone.acres", "total_area=zone.acres", "lowest_income=zone.aggregate(household.income < scoreatpercentile(household.income, 20))", "low_income=zone.aggregate(household.income < scoreatpercentile(household.income, 40))", "high_income=zone.aggregate(household.income > scoreatpercentile(household.income, 80))", #"institutional_population=zone.disaggregate(locations.institutional_population)", #"groupquarter_households=zone.disaggregate(locations.groupquarter_households)", "residential_households=zone.number_of_agents(household)", "locationid=zone.zone_id", ] locations = dataset_pool['locations'] self.prepare_attributes(zones, zones_variables, dataset2=locations) attrs_to_export = locations.get_known_attribute_names() self.write_dataset(locations, attrs_to_export, output_storage) dataset_pool._remove_dataset(locations.dataset_name) #raw_input("check location block") logger.end_block()
class ModelSystem(object): """ Uses the information in configuration to run/estimate a set of models for given set of years. """ def __init__(self): self.running = False self.forked_processes = [] self.running_conditional = threading.Condition() def run( self, resources, write_datasets_to_cache_at_end_of_year=True, log_file_name="run_model_system.log", cleanup_datasets=True, ): """Entries in resources: (entries with no defaults are required) models - a list containing names of models to be run. Each name must correspond to the name of the module/class of that model. Default(object): None years - a tuple (start year, end year) debuglevel - an integer. The higher the more output will be printed. Default: 0 expression_library - a dictionary. The keys in the dictionary are pairs (dataset_name, variable_name) and the values are the corresponding expressions. The model system needs to set the expression library (if it isn't None) in DatasetFactory for DatasetFactory to know about variables defined as expressions in the xml expression library. Default: None This method is called both to start up the simulation for all years, and also for each year when running with one process per year. In the latter case, 'years' consists of just (current_year, current_year) rather than the real start and end years for the simulation. """ if not isinstance(resources, Resources): raise TypeError, "Argument 'resources' must be of type 'Resources'." logger_settings = resources.get("log", {"tags": [], "verbosity_level": 3}) logger.set_tags(logger_settings.get("tags", [])) logger.set_verbosity_level(logger_settings.get("verbosity_level", 3)) self.simulation_state = SimulationState() self.simulation_state.set_low_memory_run(resources.get("low_memory_mode", False)) self.simulation_state.set_start_time(resources.get("base_year", 0)) self.run_year_namespace = {} if resources.get("cache_directory", None) is not None: self.simulation_state.set_cache_directory(resources["cache_directory"]) if "expression_library" in resources: VariableFactory().set_expression_library(resources["expression_library"]) if resources.get("sample_input", False): self.update_config_for_multiple_runs(resources) cache_directory = self.simulation_state.get_cache_directory() log_file = os.path.join(cache_directory, log_file_name) logger.enable_file_logging(log_file, verbose=False) try: logger.log_status("Cache Directory set to: " + cache_directory) with logger.block("Start simulation run"): models = resources.get("models", []) models_in_years = resources.get("models_in_year", {}) resources.check_obligatory_keys(["years"]) years = resources["years"] if (not isinstance(years, tuple)) and (not isinstance(years, list)): raise TypeError, "Entry 'years' in resources must be a tuple." if len(years) < 2: print years raise StandardError, "Entry 'years' in resources must be of length at least 2." start_year = years[0] end_year = years[-1] debuglevel = resources.get("debuglevel", 0) seed_values = resources.get("seed", NO_SEED) logger.log_status("random seed = %s" % str(seed_values)) seed(seed_values) for year in range(start_year, end_year + 1): with logger.block("Starting simulation for year " + str(year)): self.simulation_state.set_current_time(year) SessionConfiguration().get_dataset_pool().remove_all_datasets() logger.disable_file_logging(log_file) try: if models_in_years.get(year, None) is not None: models_to_run = models_in_years[year] else: models_to_run = models self._run_year( year=year, models=models_to_run, simulation_state=self.simulation_state, debuglevel=debuglevel, resources=resources, write_datasets_to_cache_at_end_of_year=write_datasets_to_cache_at_end_of_year, cleanup_datasets=cleanup_datasets, ) finally: logger.enable_file_logging(log_file, verbose=False) collect() finally: logger.disable_file_logging(log_file) def flush_datasets(self, dataset_names, after_model=False): dataset_pool = SessionConfiguration().get_dataset_pool() for dataset_name in dataset_names: if dataset_pool.has_dataset(dataset_name): self.flush_dataset(dataset_pool.get_dataset(dataset_name), after_model=after_model) def flush_dataset(self, dataset, after_model=False): """Write the PRIMARY attributes of this dataset to the cache.""" if dataset and isinstance(dataset, Dataset): # Do not flush after model if not necessary if after_model: if len(dataset.get_attribute_names()) <= len(dataset.get_id_name()): return if (len(dataset.get_attribute_names()) == len(dataset.get_known_attribute_names())) and ( len(dataset.get_attributes_in_memory()) <= len(dataset.get_id_name()) ): dataset.delete_computed_attributes() return dataset.delete_computed_attributes() dataset.load_and_flush_dataset() def flush_datasets_after_model(self, resources): if resources.get("flush_variables", False): AttributeCache().delete_computed_tables() # this will also delete computed attributes datasets_to_cache = SessionConfiguration().get_dataset_pool().datasets_in_pool().keys() else: datasets_to_cache = resources.get("datasets_to_cache_after_each_model", []) self.flush_datasets(datasets_to_cache, after_model=True) def _run_year( self, year, models, simulation_state, debuglevel, resources, write_datasets_to_cache_at_end_of_year, cleanup_datasets=True, ): """ Assumes that all datasets resides in the cache directory in binary format. """ try: import wingdbstub except: pass self.vardict = {} log_file_name = os.path.join(simulation_state.get_cache_directory(), "year_%s_log.txt" % year) logger.enable_file_logging(log_file_name, "w") try: logger.start_block("Simulate year %s" % year) try: base_year = resources["base_year"] if year == base_year: year_for_base_year_cache = year # case of estimation else: year_for_base_year_cache = year - 1 cache_storage = AttributeCache().get_flt_storage_for_year(year_for_base_year_cache) self.vardict["cache_storage"] = cache_storage base_cache_storage = AttributeCache().get_flt_storage_for_year(base_year) self.vardict["base_cache_storage"] = base_cache_storage simulation_state.set_flush_datasets(resources.get("flush_variables", False)) SessionConfiguration()["simulation_year"] = year SessionConfiguration()["debuglevel"] = debuglevel datasets_to_preload_in_year = resources.get("datasets_to_preload_in_year", {}) if datasets_to_preload_in_year.get(year, None) is not None: datasets_to_preload = datasets_to_preload_in_year[year] else: datasets_to_preload = resources.get("datasets_to_preload", {}) for dataset_name in datasets_to_preload: SessionConfiguration().get_dataset_from_pool(dataset_name) models_configuration = resources.get("models_configuration", {}) dataset_pool = SessionConfiguration().get_dataset_pool() datasets = {} for dataset_name, its_dataset in dataset_pool.datasets_in_pool().iteritems(): self.vardict[dataset_name] = its_dataset datasets[dataset_name] = its_dataset exec "%s=its_dataset" % dataset_name # This is needed. It resides in locals() # and is passed on to models as they run. ### TODO: There has got to be a better way! model_resources = Resources(datasets) n_models, model_group_members_to_run = self.get_number_of_models_and_model_group_members_to_run( models, models_configuration ) self.run_year_namespace = locals() # ========== # Run the models. # ========== model_number = -1 for model_entry in models: # list 'models' can be in the form: # [{'model_name_1': {'group_members': ['residential', 'commercial']}}, # {'model_name_2': {'group_members': [{'residential': ['estimate','run']}, # 'commercial']}}, # {'model_name_3': ['estimate', 'run']}, # 'model_name_4', # {'model_name_5': {'group_members': 'all'}} # ] # get list of methods to be processed evtl. for each group member if isinstance(model_entry, dict): model_name, value = model_entry.items()[0] if not isinstance(value, dict): # is a model group processes = value if not isinstance(processes, list): processes = [processes] else: # in the form 'model_name_4' in the comment above model_name = model_entry processes = ["run"] group_member = None model_group = model_group_members_to_run[model_name][1] last_member = max(1, len(model_group_members_to_run[model_name][0].keys())) for imember in range(last_member): controller_config = models_configuration[model_name]["controller"] model_configuration = models_configuration[model_name] if model_group_members_to_run[model_name][0].keys(): group_member_name = model_group_members_to_run[model_name][0].keys()[imember] group_member = ModelGroupMember(model_group, group_member_name) processes = model_group_members_to_run[model_name][0][group_member_name] member_model_name = "%s_%s" % (group_member_name, model_name) if member_model_name in models_configuration.keys(): model_configuration = models_configuration[member_model_name] if "controller" in model_configuration.keys(): controller_config = model_configuration["controller"] datasets_to_preload_for_this_model = controller_config.get( "_model_structure_dependencies_", {} ).get("dataset", []) for dataset_name in datasets_to_preload_for_this_model: try: if not dataset_pool.has_dataset(dataset_name) or (dataset_name not in datasets.keys()): ds = dataset_pool.get_dataset(dataset_name) self.vardict[dataset_name] = ds datasets[dataset_name] = ds exec "%s=ds" % dataset_name except: logger.log_warning("Failed to load dataset %s." % dataset_name) # import part if "import" in controller_config.keys(): import_config = controller_config["import"] for import_module in import_config.keys(): exec ("from %s import %s" % (import_module, import_config[import_module])) # gui_import_replacements part # This is a temporary hack -- replicates the functionality of the "import" section # for use with the GUI. The contents of this part of the config is a dictionary. # Keys are names of models (not used here). Values are 2 element pairs. # The first element is a name and the second is a value. Bind the name to the value. if "gui_import_replacements" in controller_config.keys(): import_replacement_config = controller_config["gui_import_replacements"] for model_name in import_replacement_config.keys(): pair = import_replacement_config[model_name] temp = pair[1] exec ("%s = temp") % pair[0] # init part model = self.do_init(locals()) # estimate and/or run part for process in processes: model_number = model_number + 1 # write status file model.set_model_system_status_parameters( year, n_models, model_number, resources.get("status_file_for_gui", None) ) model.write_status_for_gui() # prepare part exec (self.do_prepare(locals())) processmodel_config = controller_config[process] if "output" in processmodel_config.keys(): outputvar = processmodel_config["output"] else: outputvar = "process_output" self.vardict[outputvar] = self.do_process(locals()) exec outputvar + "=self.vardict[outputvar]" # check command file from gui, if the simulation should be stopped or paused self.do_commands_from_gui(resources.get("command_file_for_gui", None)) # capture namespace for interactive estimation self.run_year_namespace = locals() self.flush_datasets_after_model(resources) del model collect() # Write all datasets to cache. if write_datasets_to_cache_at_end_of_year: logger.start_block("Writing datasets to cache for year %s" % year) try: for dataset_name, its_dataset in ( SessionConfiguration().get_dataset_pool().datasets_in_pool().iteritems() ): self.flush_dataset(its_dataset) finally: logger.end_block() finally: logger.end_block() finally: logger.disable_file_logging(log_file_name) if cleanup_datasets: SessionConfiguration().delete_datasets() def do_init(self, parent_state): """Run the 'init' part of this model's configuration. Returns model object. """ # give this method the same local variables as its calling method has. for key in parent_state.keys(): if key <> "self": exec ('%s = parent_state["%s"]' % (key, key)) init_config = parent_state["controller_config"]["init"] group_member = parent_state["group_member"] if group_member is None: # No model group cmd = "%s(%s)" % (init_config["name"], self.construct_arguments_from_config(init_config)) model = eval(cmd) else: # Model belongs to a group model = eval( "%s(group_member, %s)" % (init_config["name"], self.construct_arguments_from_config(init_config)) ) return model def do_prepare(self, parent_state): """Prepares for the current model in the parent state's context. What to do is determined by the contents of the current model's controller configuration. controller_config is the 'controller' part of the model configuration. vardict is a dictionary into which the output of the model's 'prepare_output' method will be put. """ # give this method the same local variables as its calling method has. for key in parent_state.keys(): if key <> "self": exec ('%s = parent_state["%s"]' % (key, key)) key_name = "prepare_for_%s" % process if key_name in controller_config.keys(): prepare_config = controller_config[key_name] if "output" in prepare_config.keys(): outputvar = prepare_config["output"] else: outputvar = "prepare_output" self.vardict[outputvar] = eval( "model.%s(%s)" % (prepare_config["name"], self.construct_arguments_from_config(prepare_config)) ) return '%s=self.vardict["%s"]' % (outputvar, outputvar) else: # do nothing when return value is exec'ed return "" def do_process(self, parent_state): for key in parent_state.keys(): if key <> "self": exec ('%s = parent_state["%s"]' % (key, key)) ev = "model.%s(%s)" % (process, self.construct_arguments_from_config(processmodel_config)) return eval(ev) def get_number_of_models_and_model_group_members_to_run(self, models, models_configuration): """Count number_of models in the list 'models' that can include group members (each member and each process is one model).""" # list models can be in the form: # [{'model_name_1': {'group_members': ['residential', 'commercial']}}, # {'model_name_2': {'group_members': [{'residential': ['estimate','run']}, # 'commercial']}}, # {'model_name_3': ['estimate', 'run']}, # 'model_name_4', # {'model_name_5': {'group_members': 'all'}} # ] number_of_models = 1 model_group_members_to_run = {} for model_entry in models: if isinstance(model_entry, dict): model_name, value = model_entry.items()[0] if isinstance(value, dict): # is a model group if not value.keys()[0] == "group_members": raise KeyError, "Key for model " + model_name + " must be 'group_members'." group_members = value["group_members"] model_group = None if "group_by_attribute" in models_configuration[model_name]["controller"].keys(): group_dataset_name, group_attribute = models_configuration[model_name]["controller"][ "group_by_attribute" ] model_group = ModelGroup( SessionConfiguration().get_dataset_from_pool(group_dataset_name), group_attribute ) if not isinstance(group_members, list): group_members = [group_members] if group_members[0] == "_all_": # see 'model_name_5' example above if model_group is None: raise KeyError, "Entry 'group_by_attribute' is missing for model %s" % model_name group_members = model_group.get_member_names() model_group_members_to_run[model_name] = [{}, model_group] for member in group_members: if isinstance(member, dict): # see 'model_name_2' ('residential') in the comment above member_name = member.keys()[0] model_group_members_to_run[model_name][0][member_name] = member[member_name] if not isinstance(model_group_members_to_run[model_name][0][member_name], list): model_group_members_to_run[model_name][0][member_name] = [ model_group_members_to_run[model_name][0][member_name] ] number_of_models += len(model_group_members_to_run[model_name][0][member_name]) else: # see 'model_name_1' model_group_members_to_run[model_name][0][member] = ["run"] number_of_models += len(model_group_members_to_run[model_name][0][member]) else: # in the form 'model_name_3' in the comment above model_group_members_to_run[model_name] = [{}, None] if not isinstance(value, list): number_of_models += 1 else: number_of_models += len(value) else: # in the form 'model_name_4' in the comment above model_group_members_to_run[model_entry] = [{}, None] number_of_models += 1 return (number_of_models, model_group_members_to_run) def do_commands_from_gui(self, filename=None): if (filename is None) or not os.path.exists(filename): return while True: f = file(filename) line = f.read().strip() f.close() if line == "stop": logger.log_warning("Simulation stopped.") sys.exit() elif line == "resume": break elif line <> "pause": logger.log_warning("Unknown command '%s'. Allowed commands: 'stop', 'pause', 'resume'." % line) time.sleep(10) def run_multiprocess(self, resources): resources = Resources(resources) profiler_name = resources.get("profile_filename", None) if resources["cache_directory"] is not None: cache_directory = resources["cache_directory"] else: cache_directory = SimulationState().get_cache_directory() ### TODO: Get rid of this! There is absolutely no good reason to be ### changing the Configuration! resources["cache_directory"] = cache_directory log_file = os.path.join(cache_directory, "run_multiprocess.log") logger.enable_file_logging(log_file) start_year = resources["years"][0] end_year = resources["years"][-1] nyears = end_year - start_year + 1 root_seed = resources.get("seed", NO_SEED) if resources.get("_seed_dictionary_", None) is not None: # This is added by the RunManager to ensure reproducibility including restarted runs seed_dict = resources.get("_seed_dictionary_") seed_array = array(map(lambda year: seed_dict[year], range(start_year, end_year + 1))) else: seed(root_seed) seed_array = randint(1, 2 ** 30, nyears) logger.log_status("Running simulation for years %d thru %d" % (start_year, end_year)) logger.log_status("Simulation root seed: %s" % root_seed) for iyear, year in enumerate(range(start_year, end_year + 1)): success = self._run_each_year_as_separate_process( iyear, year, seed=seed_array[iyear], resources=resources, profiler_name=profiler_name, log_file=log_file ) if not success: break self._notify_stopped() if profiler_name is not None: # insert original value resources["profile_filename"] = profiler_name logger.log_status("Done running simulation for years %d thru %d" % (start_year, end_year)) # TODO: changing of configuration def _run_each_year_as_separate_process( self, iyear, year, seed=None, resources=None, profiler_name=None, log_file=None ): logger.start_block("Running simulation for year %d in new process" % year) resources["years"] = (year, year) resources["seed"] = (seed,) if profiler_name is not None: # add year to the profile name resources["profile_filename"] = "%s_%s" % (profiler_name, year) optional_args = [] if log_file: optional_args += ["--log-file-name", os.path.split(log_file)[-1]] success = False try: logger.disable_file_logging(log_file) success = self._fork_new_process( "opus_core.model_coordinators.model_system", resources, optional_args=optional_args ) logger.enable_file_logging(log_file, verbose=False) finally: logger.end_block() return success def run_in_one_process( self, resources, run_in_background=False, class_path="opus_core.model_coordinators.model_system" ): resources = Resources(resources) if resources["cache_directory"] is not None: cache_directory = resources["cache_directory"] else: cache_directory = SimulationState().get_cache_directory() ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. resources["cache_directory"] = cache_directory self._fork_new_process("%s" % class_path, resources, delete_temp_dir=False, run_in_background=run_in_background) self._notify_stopped() def run_in_same_process(self, resources, **kwargs): resources = Resources(resources) if resources["cache_directory"] is not None: cache_directory = resources["cache_directory"] else: cache_directory = SimulationState().get_cache_directory() ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. resources["cache_directory"] = cache_directory self._notify_started() RunModelSystem(model_system=self, resources=resources, **kwargs) self._notify_stopped() def construct_arguments_from_config(self, config): key = "arguments" if (key not in config.keys()) or (len(config[key].keys()) <= 0): return "" arg_dict = config[key] result = "" for arg_key in arg_dict.keys(): result += "%s=%s, " % (arg_key, arg_dict[arg_key]) return result def wait_for_start(self): self.running_conditional.acquire() while not self.running: self.running_conditional.wait() self.running_conditional.release() def wait_for_finish(self): self.running_conditional.acquire() while self.running: self.running_conditional.wait() self.running_conditional.release() def wait_for_process_or_finish(self, process_index): self.running_conditional.acquire() while process_index >= len(self.forked_processes) and self.running: self.running_conditional.wait() self.running_conditional.release() if not self.running: process_index = len(self.forked_processes) - 1 return process_index def _fork_new_process(self, module_name, resources, run_in_background=False, **key_args): self.running_conditional.acquire() self.running = True self.forked_processes.append(ForkProcess()) key_args["run_in_background"] = run_in_background success = self.forked_processes[-1].fork_new_process(module_name, resources, **key_args) self.running_conditional.notifyAll() self.running_conditional.release() if not run_in_background: self.forked_processes[-1].wait() self.forked_processes[-1].cleanup() return success def _notify_started(self): self.running_conditional.acquire() self.running = True self.running_conditional.notifyAll() self.running_conditional.release() def _notify_stopped(self): self.running_conditional.acquire() self.running = False self.running_conditional.notifyAll() self.running_conditional.release() def update_config_for_multiple_runs(self, config): models_to_update = config.get("models_with_sampled_coefficients", []) if "models_in_year" not in config.keys(): config["models_in_year"] = {} if config["models_in_year"].get(config["base_year"] + 1, None) is None: config["models_in_year"][config["base_year"] + 1] = config.get("models") for umodel in models_to_update: try: i = config["models_in_year"][config["base_year"] + 1].index(umodel) new_model_name = "%s_sampled_coef" % umodel config["models_in_year"][config["base_year"] + 1][i] = new_model_name except: pass config["models_configuration"][new_model_name] = Configuration(config["models_configuration"][umodel]) config["models_configuration"][new_model_name]["controller"]["prepare_for_run"]["arguments"][ "sample_coefficients" ] = True config["models_configuration"][new_model_name]["controller"]["prepare_for_run"]["arguments"][ "distribution" ] = "'normal'" config["models_configuration"][new_model_name]["controller"]["prepare_for_run"]["arguments"][ "cache_storage" ] = "base_cache_storage"
from opus_core.storage_factory import StorageFactory from opus_core.paths import get_opus_data_path_path from opus_core.logger import block import cPickle, pickle pickle_filename = '/workspace/price_equilibration/dump_cached.pickle' pickle_data = pickle.load(open(pickle_filename, 'rb')) pickle_filename = '/workspace/price_equilibration/submarket_id.pickle' submkt_data = pickle.load(open(pickle_filename, 'rb')) data_path = get_opus_data_path_path() cache_dir = os.path.join(data_path, 'bay_area_zone/base_year_data.original') year = 2000 simulation_state = SimulationState() simulation_state.set_current_time(year) SimulationState().set_cache_directory(cache_dir) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration(new_instance=True, package_order=['bayarea', 'urbansim_parcel', 'urbansim', 'opus_core'], in_storage=attribute_cache ).get_dataset_pool() hh = dataset_pool.get_dataset('household') proportion = float(pickle_data['Xpagents'].size) / hh.size() hh_cnty = hh.compute_variables('county=household.disaggregate(zone.county, intermediates=[building])') hh_bldg_type = hh.compute_variables('building_type_id=household.disaggregate(building.building_type_id)') hh_df = hh.to_dataframe() import pdb; pdb.set_trace()
class TestDataset(opus_unittest.OpusTestCase): def setUp(self): self.start_year = 2001 self.expected_sic_data = array([6,4,7,808,6]) self.job_id = array([1,2,3,4,5]) self.base_cache_dir = tempfile.mkdtemp(prefix='opus_tmp') self.simulation_state = SimulationState(low_memory_run=True, new_instance=True, base_cache_dir=self.base_cache_dir) self.dir = self.simulation_state.get_cache_directory() self.simulation_state.set_current_time(self.start_year) if not os.path.exists(self.dir): os.makedirs(self.dir) def tearDown(self): # the logger has a file open in the cache directory (by default, disable that file logging) if logger._file_stream: logger.disable_file_logging() for root, dirs, files in os.walk(self.dir, topdown=False): for filename in files: os.remove(os.path.join(root, filename)) for directory in dirs: os.rmdir(os.path.join(root, directory)) os.rmdir(self.dir) os.rmdir(self.base_cache_dir) def test_dict_dataset(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='dataset', table_data={ "id":array([1,2,3,4]), "attr":array([4,7,2,1]) } ) ds = Dataset(in_storage=storage, in_table_name='dataset', id_name="id") self.assert_(ds.get_attribute("attr").sum()==14, "Something is wrong with the dataset.") self.assert_(ds.size()==4, "Wrong size of dataset.") def test_flt_dataset(self): import opus_core from opus_core.store.flt_storage import flt_storage attribute = 'little_endian' location = os.path.join(opus_core.__path__[0], 'data', 'flt') storage = flt_storage(storage_location=location) ds = Dataset(in_storage=storage, id_name=attribute, in_table_name='endians') self.assertAlmostEqual(11.0, ds.get_attribute_by_index(attribute, 0)) self.assertEqual(None, ds.get_attribute_header(attribute)) def test_join_by_rows(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='dataset1', table_data={ 'id':array([2,4,6,8]), 'attr':array([4,7,2,1]) } ) storage.write_table( table_name='dataset2', table_data={ 'id':array([1,5,9]), 'attr':array([55,66,100]) } ) ds1 = Dataset(in_storage=storage, in_table_name='dataset1', id_name='id') ds2 = Dataset(in_storage=storage, in_table_name='dataset2', id_name='id') ds1.join_by_rows(ds2) self.assert_(ma.allclose(ds1.get_attribute('attr'), array([4,7,2,1,55,66,100]))) self.assert_(ma.allclose(ds2.get_attribute('attr'), array([55,66,100]))) def test_join_by_rows_for_unique_ids(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='dataset1', table_data={ "id":array([2,4]), "attr":array([4,7]) } ) storage.write_table( table_name='dataset2', table_data={ "id":array([1,2]), "attr":array([55,66]) } ) ds1 = Dataset(in_storage=storage, in_table_name='dataset1', id_name='id') ds2 = Dataset(in_storage=storage, in_table_name='dataset2', id_name='id') threw_exception = False try: ds1.join_by_rows(ds2) except StandardError: threw_exception = True self.assert_(threw_exception) def test_join_by_rows_for_char_arrays(self): from numpy import alltrue storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='dataset1', table_data={ 'id':array([2,4,6,8]), 'attr':array(['4','7','2','1']) } ) storage.write_table( table_name='dataset2', table_data={ 'id':array([1,5,9]), 'attr':array(['55','66','100']) } ) ds1 = Dataset(in_storage=storage, in_table_name='dataset1', id_name='id') ds2 = Dataset(in_storage=storage, in_table_name='dataset2', id_name='id') ds1.join_by_rows(ds2) self.assert_(alltrue(ds1.get_attribute('attr') == array(['4','7','2','1','55','66','100']))) self.assert_(alltrue(ds2.get_attribute('attr') == array(['55','66','100']))) def test_variable_dependencies_tree_with_versioning(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='tests', table_data={ 'id':array([2,4]), 'a_dependent_variable':array([4,7]), 'a_dependent_variable2':array([10,1]) } ) ds = Dataset(in_storage=storage, in_table_name='tests', id_name='id', dataset_name='tests') ds.compute_variables(["opus_core.tests.a_test_variable_with_two_dependencies"]) self.assert_(ds.get_version("a_test_variable_with_two_dependencies")==0) #initially version=0 self.assert_(ds.get_version("a_dependent_variable")==0) self.assert_(ds.get_version("a_dependent_variable2")==0) ds.modify_attribute("a_dependent_variable", array([0,0])) self.assert_(ds.get_version("a_dependent_variable")==1) # version=1 ds.modify_attribute("a_dependent_variable", array([1,1])) self.assert_(ds.get_version("a_dependent_variable")==2) # version=2 ds.compute_variables(["opus_core.tests.a_test_variable_with_two_dependencies"]) self.assert_(ds.get_version("a_test_variable_with_two_dependencies")==1) ds.compute_variables(["opus_core.tests.a_test_variable_with_two_dependencies"]) self.assert_(ds.get_version("a_test_variable_with_two_dependencies")==1) # version does not change autogen_variable = "my_var = 3 * opus_core.tests.a_dependent_variable" ds.compute_variables([autogen_variable]) self.assert_(ds.get_version("my_var")==0) ds.compute_variables([autogen_variable]) self.assert_(ds.get_version("my_var")==0) def test_compute_variable_with_unknown_package(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='tests', table_data={ 'id':array([2,4]), 'attr1':array([4,7]), } ) ds = Dataset(in_storage=storage, in_table_name='tests', id_name='id', dataset_name='test') ds.compute_one_variable_with_unknown_package("attr1_times_2", package_order=["opus_core"]) def test_join_datasets_with_2_ids(self): from numpy import ma storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='data1', table_data={ 'id1':array([2,4,2]), 'id2':array([1,2,3]), 'attr1':array([4,7,1]), 'attr2':array([100,0,1000]), } ) storage.write_table( table_name='data2', table_data={ 'id1':array([4,2,2]), 'id2':array([2,3,1]), 'attr1':array([50,60,70]) } ) ds1 = Dataset(in_storage=storage, in_table_name='data1', id_name=['id1', 'id2'], dataset_name='data1') ds2 = Dataset(in_storage=storage, in_table_name='data2', id_name=['id1', 'id2'], dataset_name='data2') ds1.join(ds2, 'attr1') self.assertEqual(ma.allequal(ds1.get_attribute('attr1'), array([70,50,60])), True) self.assertEqual(ma.allequal(ds1.get_attribute('attr2'), array([100,0,1000])), True)
if __name__ == "__main__": parser = OptionParser() parser.add_option("-a", "--augment-variables", dest="augment_variables", action="store_true", help="model name") parser.add_option("-c", "--indicator-config", dest="indicator_config", action="store", type="string", help="model name") parser.add_option("-y", "--year", dest="year", action="store", type="int", help="end year") (options, args) = parser.parse_args() runs = Runs.runs comparison_variables = Runs.comparison_variables baseline = Runs.baseline simulation_state = SimulationState() simulation_state.set_current_time(options.year) SessionConfiguration(new_instance=True, package_order=['psrc','urbansim','opus_core'], in_storage=AttributeCache()) if options.augment_variables == True: for dataset_name in comparison_variables.keys(): cache_directory = baseline simulation_state.set_cache_directory(cache_directory) dataset = DatasetFactory().get_dataset(dataset_name, package='urbansim', arguments={'in_storage': AttributeCache()}) variables = comparison_variables[dataset_name] dataset.compute_variables(variables, resources=Resources())
def run(self, base_directory, urbansim_cache_directory, years, output_directory, temp_folder, coefficients_name, specification_name, convert_flt=True, convert_input=False): """ run the simulation base_directory: directory contains all years folder of lccm. urbansim_cache_directory: directory contains all years folder of urbansim cache. years: lists of year to run.""" model = LandCoverChangeModel(self.possible_lcts, submodel_string=self.lct_attribute, choice_attribute_name=self.lct_attribute, debuglevel=4) coefficients = Coefficients() storage = StorageFactory().get_storage('tab_storage', storage_location=os.path.join(self.package_path, 'data')) coefficients.load(in_storage=storage, in_table_name=coefficients_name) specification = EquationSpecification(in_storage=storage) specification.load(in_table_name=specification_name) specification.set_variable_prefix("biocomplexity.land_cover.") constants = Constants() simulation_state = SimulationState() simulation_state.set_cache_directory(urbansim_cache_directory) attribute_cache = AttributeCache() SessionConfiguration(new_instance=True, package_order=['biocomplexity', 'urbansim', 'opus_core'], in_storage=AttributeCache()) ncols = LccmConfiguration.ncols if temp_folder is None: self.temp_land_cover_dir = tempfile.mkdtemp() else: self.temp_land_cover_dir = temp_folder for year in years: land_cover_path = self._generate_input_land_cover(year, base_directory, urbansim_cache_directory, years, output_directory, convert_flt, convert_input) #max_size = 174338406 (orig) - act. int: 19019944 (37632028 incl NoData) max_size = self._get_max_index(land_cover_path) # 1st instance of lc_dataset - but looks like a 'lite' version offset = min(LccmConfiguration.offset, max_size) s = 0 t = offset while (s < t and t <= max_size): logger.log_status("Offset: ", s, t) index = arange(s,t) land_cover_cache_path=os.path.join(urbansim_cache_directory,str(year),'land_covers') self._clean_up_land_cover_cache(land_cover_cache_path) simulation_state.set_current_time(year) # 2nd instance of lc_dataset land_covers = LandCoverDataset(in_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path), out_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path), debuglevel=4) land_covers.subset_by_index(index) # land_covers.load_dataset() gridcells = GridcellDataset(in_storage=attribute_cache, debuglevel=4) agents_index = None model.run(specification, coefficients, land_covers, data_objects={"gridcell":gridcells, "constants":constants, "flush_variables":True}, chunk_specification = {'nchunks':5}) ## chunk size set here land_covers.flush_dataset() del gridcells del land_covers # self._generate_output_flt(year, urbansim_cache_directory, output_directory, convert_flt) self._generate_output_flt2(year, urbansim_cache_directory, output_directory, convert_flt) if t >= max_size: break s = max(t-10*ncols,s) t = min(t+offset-10*ncols,max_size) # clean up temp storage after done simulation shutil.rmtree(self.temp_land_cover_dir)
class ModelSystem(object): """ Uses the information in configuration to run/estimate a set of models for given set of years. """ def __init__(self): self.running = False self.forked_processes = [] self.running_conditional = threading.Condition() def run(self, resources, write_datasets_to_cache_at_end_of_year=True, log_file_name='run_model_system.log', cleanup_datasets=True): """Entries in resources: (entries with no defaults are required) models - a list containing names of models to be run. Each name must correspond to the name of the module/class of that model. Default(object): None years - a tuple (start year, end year) debuglevel - an integer. The higher the more output will be printed. Default: 0 expression_library - a dictionary. The keys in the dictionary are pairs (dataset_name, variable_name) and the values are the corresponding expressions. The model system needs to set the expression library (if it isn't None) in DatasetFactory for DatasetFactory to know about variables defined as expressions in the xml expression library. Default: None This method is called both to start up the simulation for all years, and also for each year when running with one process per year. In the latter case, 'years' consists of just (current_year, current_year) rather than the real start and end years for the simulation. """ if not isinstance(resources, Resources): raise TypeError, "Argument 'resources' must be of type 'Resources'." logger_settings = resources.get("log", { "tags": [], "verbosity_level": 3 }) logger.set_tags(logger_settings.get("tags", [])) logger.set_verbosity_level(logger_settings.get("verbosity_level", 3)) self.simulation_state = SimulationState() self.simulation_state.set_low_memory_run( resources.get("low_memory_mode", False)) self.simulation_state.set_start_time(resources.get("base_year", 0)) self.run_year_namespace = {} if resources.get('cache_directory', None) is not None: self.simulation_state.set_cache_directory( resources['cache_directory']) if 'expression_library' in resources: VariableFactory().set_expression_library( resources['expression_library']) if resources.get('sample_input', False): self.update_config_for_multiple_runs(resources) cache_directory = self.simulation_state.get_cache_directory() log_file = os.path.join(cache_directory, log_file_name) logger.enable_file_logging(log_file, verbose=False) try: logger.log_status("Cache Directory set to: " + cache_directory) with logger.block('Start simulation run'): models = resources.get("models", []) models_in_years = resources.get("models_in_year", {}) resources.check_obligatory_keys(["years"]) years = resources["years"] if (not isinstance(years, tuple)) and (not isinstance( years, list)): raise TypeError, "Entry 'years' in resources must be a tuple." if len(years) < 2: print years raise StandardError, "Entry 'years' in resources must be of length at least 2." start_year = years[0] end_year = years[-1] debuglevel = resources.get("debuglevel", 0) seed_values = resources.get('seed', NO_SEED) logger.log_status("random seed = %s" % str(seed_values)) seed(seed_values) for year in range(start_year, end_year + 1): with logger.block("Starting simulation for year " + str(year)): self.simulation_state.set_current_time(year) SessionConfiguration().get_dataset_pool( ).remove_all_datasets() logger.disable_file_logging(log_file) try: if models_in_years.get(year, None) is not None: models_to_run = models_in_years[year] else: models_to_run = models self._run_year( year=year, models=models_to_run, simulation_state=self.simulation_state, debuglevel=debuglevel, resources=resources, write_datasets_to_cache_at_end_of_year= write_datasets_to_cache_at_end_of_year, cleanup_datasets=cleanup_datasets) finally: logger.enable_file_logging(log_file, verbose=False) collect() finally: logger.disable_file_logging(log_file) def flush_datasets(self, dataset_names, after_model=False): dataset_pool = SessionConfiguration().get_dataset_pool() for dataset_name in dataset_names: if dataset_pool.has_dataset(dataset_name): self.flush_dataset(dataset_pool.get_dataset(dataset_name), after_model=after_model) def flush_dataset(self, dataset, after_model=False): """Write the PRIMARY attributes of this dataset to the cache.""" if dataset and isinstance(dataset, Dataset): # Do not flush after model if not necessary if after_model: if len(dataset.get_attribute_names()) <= len( dataset.get_id_name()): return if (len(dataset.get_attribute_names()) == len(dataset.get_known_attribute_names())) and \ (len(dataset.get_attributes_in_memory()) <= len(dataset.get_id_name())): dataset.delete_computed_attributes() return dataset.delete_computed_attributes() dataset.load_and_flush_dataset() def flush_datasets_after_model(self, resources): if resources.get('flush_variables', False): AttributeCache().delete_computed_tables() # this will also delete computed attributes datasets_to_cache = SessionConfiguration().get_dataset_pool( ).datasets_in_pool().keys() else: datasets_to_cache = resources.get( "datasets_to_cache_after_each_model", []) self.flush_datasets(datasets_to_cache, after_model=True) def _run_year(self, year, models, simulation_state, debuglevel, resources, write_datasets_to_cache_at_end_of_year, cleanup_datasets=True): """ Assumes that all datasets resides in the cache directory in binary format. """ try: import wingdbstub except: pass self.vardict = {} log_file_name = os.path.join(simulation_state.get_cache_directory(), "year_%s_log.txt" % year) logger.enable_file_logging(log_file_name, 'w') try: logger.start_block('Simulate year %s' % year) try: base_year = resources['base_year'] if year == base_year: year_for_base_year_cache = year # case of estimation else: year_for_base_year_cache = year - 1 cache_storage = AttributeCache().get_flt_storage_for_year( year_for_base_year_cache) self.vardict['cache_storage'] = cache_storage base_cache_storage = AttributeCache().get_flt_storage_for_year( base_year) self.vardict['base_cache_storage'] = base_cache_storage simulation_state.set_flush_datasets( resources.get("flush_variables", False)) SessionConfiguration()["simulation_year"] = year SessionConfiguration()["debuglevel"] = debuglevel datasets_to_preload_in_year = resources.get( 'datasets_to_preload_in_year', {}) if datasets_to_preload_in_year.get(year, None) is not None: datasets_to_preload = datasets_to_preload_in_year[year] else: datasets_to_preload = resources.get( 'datasets_to_preload', {}) for dataset_name in datasets_to_preload: SessionConfiguration().get_dataset_from_pool(dataset_name) models_configuration = resources.get('models_configuration', {}) dataset_pool = SessionConfiguration().get_dataset_pool() datasets = {} for dataset_name, its_dataset in dataset_pool.datasets_in_pool( ).iteritems(): self.vardict[dataset_name] = its_dataset datasets[dataset_name] = its_dataset exec '%s=its_dataset' % dataset_name # This is needed. It resides in locals() # and is passed on to models as they run. ### TODO: There has got to be a better way! model_resources = Resources(datasets) n_models, model_group_members_to_run = self.get_number_of_models_and_model_group_members_to_run( models, models_configuration) self.run_year_namespace = locals() #========== # Run the models. #========== model_number = -1 for model_entry in models: # list 'models' can be in the form: # [{'model_name_1': {'group_members': ['residential', 'commercial']}}, # {'model_name_2': {'group_members': [{'residential': ['estimate','run']}, # 'commercial']}}, # {'model_name_3': ['estimate', 'run']}, # 'model_name_4', # {'model_name_5': {'group_members': 'all'}} # ] # get list of methods to be processed evtl. for each group member if isinstance(model_entry, dict): model_name, value = model_entry.items()[0] if not isinstance(value, dict): # is a model group processes = value if not isinstance(processes, list): processes = [processes] else: # in the form 'model_name_4' in the comment above model_name = model_entry processes = ["run"] group_member = None model_group = model_group_members_to_run[model_name][1] last_member = max( 1, len(model_group_members_to_run[model_name][0].keys())) for imember in range(last_member): controller_config = models_configuration[model_name][ "controller"] model_configuration = models_configuration[model_name] if model_group_members_to_run[model_name][0].keys(): group_member_name = model_group_members_to_run[ model_name][0].keys()[imember] group_member = ModelGroupMember( model_group, group_member_name) processes = model_group_members_to_run[model_name][ 0][group_member_name] member_model_name = "%s_%s" % (group_member_name, model_name) if member_model_name in models_configuration.keys( ): model_configuration = models_configuration[ member_model_name] if "controller" in model_configuration.keys(): controller_config = model_configuration[ "controller"] datasets_to_preload_for_this_model = controller_config.get( '_model_structure_dependencies_', {}).get('dataset', []) for dataset_name in datasets_to_preload_for_this_model: try: if not dataset_pool.has_dataset( dataset_name) or ( dataset_name not in datasets.keys()): ds = dataset_pool.get_dataset(dataset_name) self.vardict[dataset_name] = ds datasets[dataset_name] = ds exec '%s=ds' % dataset_name except: logger.log_warning( 'Failed to load dataset %s.' % dataset_name) # import part if "import" in controller_config.keys(): import_config = controller_config["import"] for import_module in import_config.keys(): exec("from %s import %s" % (import_module, import_config[import_module])) # gui_import_replacements part # This is a temporary hack -- replicates the functionality of the "import" section # for use with the GUI. The contents of this part of the config is a dictionary. # Keys are names of models (not used here). Values are 2 element pairs. # The first element is a name and the second is a value. Bind the name to the value. if "gui_import_replacements" in controller_config.keys( ): import_replacement_config = controller_config[ "gui_import_replacements"] for model_name in import_replacement_config.keys(): pair = import_replacement_config[model_name] temp = pair[1] exec("%s = temp") % pair[0] # init part model = self.do_init(locals()) # estimate and/or run part for process in processes: model_number = model_number + 1 # write status file model.set_model_system_status_parameters( year, n_models, model_number, resources.get('status_file_for_gui', None)) model.write_status_for_gui() # prepare part exec(self.do_prepare(locals())) processmodel_config = controller_config[process] if "output" in processmodel_config.keys(): outputvar = processmodel_config["output"] else: outputvar = "process_output" self.vardict[outputvar] = self.do_process(locals()) exec outputvar + '=self.vardict[outputvar]' # check command file from gui, if the simulation should be stopped or paused self.do_commands_from_gui( resources.get('command_file_for_gui', None)) # capture namespace for interactive estimation self.run_year_namespace = locals() self.flush_datasets_after_model(resources) del model collect() # Write all datasets to cache. if write_datasets_to_cache_at_end_of_year: logger.start_block( 'Writing datasets to cache for year %s' % year) try: for dataset_name, its_dataset in SessionConfiguration( ).get_dataset_pool().datasets_in_pool().iteritems(): self.flush_dataset(its_dataset) finally: logger.end_block() finally: logger.end_block() finally: logger.disable_file_logging(log_file_name) if cleanup_datasets: SessionConfiguration().delete_datasets() def do_init(self, parent_state): """Run the 'init' part of this model's configuration. Returns model object. """ # give this method the same local variables as its calling method has. for key in parent_state.keys(): if key <> 'self': exec('%s = parent_state["%s"]' % (key, key)) init_config = parent_state['controller_config']["init"] group_member = parent_state['group_member'] if group_member is None: # No model group cmd = "%s(%s)" % ( init_config["name"], self.construct_arguments_from_config(init_config)) model = eval(cmd) else: # Model belongs to a group model = eval("%s(group_member, %s)" % (init_config["name"], self.construct_arguments_from_config(init_config))) return model def do_prepare(self, parent_state): """Prepares for the current model in the parent state's context. What to do is determined by the contents of the current model's controller configuration. controller_config is the 'controller' part of the model configuration. vardict is a dictionary into which the output of the model's 'prepare_output' method will be put. """ # give this method the same local variables as its calling method has. for key in parent_state.keys(): if key <> 'self': exec('%s = parent_state["%s"]' % (key, key)) key_name = "prepare_for_%s" % process if key_name in controller_config.keys(): prepare_config = controller_config[key_name] if "output" in prepare_config.keys(): outputvar = prepare_config["output"] else: outputvar = "prepare_output" self.vardict[outputvar] = eval( "model.%s(%s)" % (prepare_config["name"], self.construct_arguments_from_config(prepare_config))) return '%s=self.vardict["%s"]' % (outputvar, outputvar) else: # do nothing when return value is exec'ed return '' def do_process(self, parent_state): for key in parent_state.keys(): if key <> 'self': exec('%s = parent_state["%s"]' % (key, key)) ev = "model.%s(%s)" % ( process, self.construct_arguments_from_config(processmodel_config)) return eval(ev) def get_number_of_models_and_model_group_members_to_run( self, models, models_configuration): """Count number_of models in the list 'models' that can include group members (each member and each process is one model).""" # list models can be in the form: # [{'model_name_1': {'group_members': ['residential', 'commercial']}}, # {'model_name_2': {'group_members': [{'residential': ['estimate','run']}, # 'commercial']}}, # {'model_name_3': ['estimate', 'run']}, # 'model_name_4', # {'model_name_5': {'group_members': 'all'}} # ] number_of_models = 1 model_group_members_to_run = {} for model_entry in models: if isinstance(model_entry, dict): model_name, value = model_entry.items()[0] if isinstance(value, dict): # is a model group if not value.keys()[0] == "group_members": raise KeyError, "Key for model " + model_name + " must be 'group_members'." group_members = value["group_members"] model_group = None if 'group_by_attribute' in models_configuration[ model_name]["controller"].keys(): group_dataset_name, group_attribute = models_configuration[ model_name]["controller"]['group_by_attribute'] model_group = ModelGroup( SessionConfiguration().get_dataset_from_pool( group_dataset_name), group_attribute) if not isinstance(group_members, list): group_members = [group_members] if group_members[ 0] == "_all_": # see 'model_name_5' example above if model_group is None: raise KeyError, "Entry 'group_by_attribute' is missing for model %s" % model_name group_members = model_group.get_member_names() model_group_members_to_run[model_name] = [{}, model_group] for member in group_members: if isinstance(member, dict): # see 'model_name_2' ('residential') in the comment above member_name = member.keys()[0] model_group_members_to_run[model_name][0][ member_name] = member[member_name] if not isinstance( model_group_members_to_run[model_name][0] [member_name], list): model_group_members_to_run[model_name][0][ member_name] = [ model_group_members_to_run[model_name] [0][member_name] ] number_of_models += len( model_group_members_to_run[model_name][0] [member_name]) else: # see 'model_name_1' model_group_members_to_run[model_name][0][ member] = ["run"] number_of_models += len( model_group_members_to_run[model_name][0] [member]) else: # in the form 'model_name_3' in the comment above model_group_members_to_run[model_name] = [{}, None] if not isinstance(value, list): number_of_models += 1 else: number_of_models += len(value) else: # in the form 'model_name_4' in the comment above model_group_members_to_run[model_entry] = [{}, None] number_of_models += 1 return (number_of_models, model_group_members_to_run) def do_commands_from_gui(self, filename=None): if (filename is None) or not os.path.exists(filename): return while True: f = file(filename) line = f.read().strip() f.close() if line == 'stop': logger.log_warning('Simulation stopped.') sys.exit() elif line == 'resume': break elif line <> 'pause': logger.log_warning( "Unknown command '%s'. Allowed commands: 'stop', 'pause', 'resume'." % line) time.sleep(10) def run_multiprocess(self, resources): resources = Resources(resources) profiler_name = resources.get("profile_filename", None) if resources['cache_directory'] is not None: cache_directory = resources['cache_directory'] else: cache_directory = SimulationState().get_cache_directory() ### TODO: Get rid of this! There is absolutely no good reason to be ### changing the Configuration! resources['cache_directory'] = cache_directory log_file = os.path.join(cache_directory, 'run_multiprocess.log') logger.enable_file_logging(log_file) start_year = resources["years"][0] end_year = resources["years"][-1] nyears = end_year - start_year + 1 root_seed = resources.get("seed", NO_SEED) if resources.get('_seed_dictionary_', None) is not None: # This is added by the RunManager to ensure reproducibility including restarted runs seed_dict = resources.get('_seed_dictionary_') seed_array = array( map(lambda year: seed_dict[year], range(start_year, end_year + 1))) else: seed(root_seed) seed_array = randint(1, 2**30, nyears) logger.log_status("Running simulation for years %d thru %d" % (start_year, end_year)) logger.log_status("Simulation root seed: %s" % root_seed) for iyear, year in enumerate(range(start_year, end_year + 1)): success = self._run_each_year_as_separate_process( iyear, year, seed=seed_array[iyear], resources=resources, profiler_name=profiler_name, log_file=log_file) if not success: break self._notify_stopped() if profiler_name is not None: # insert original value resources["profile_filename"] = profiler_name logger.log_status("Done running simulation for years %d thru %d" % (start_year, end_year)) #TODO: changing of configuration def _run_each_year_as_separate_process(self, iyear, year, seed=None, resources=None, profiler_name=None, log_file=None): logger.start_block('Running simulation for year %d in new process' % year) resources['years'] = (year, year) resources['seed'] = seed, if profiler_name is not None: # add year to the profile name resources["profile_filename"] = "%s_%s" % (profiler_name, year) optional_args = [] if log_file: optional_args += ['--log-file-name', os.path.split(log_file)[-1]] success = False try: logger.disable_file_logging(log_file) success = self._fork_new_process( 'opus_core.model_coordinators.model_system', resources, optional_args=optional_args) logger.enable_file_logging(log_file, verbose=False) finally: logger.end_block() return success def run_in_one_process( self, resources, run_in_background=False, class_path='opus_core.model_coordinators.model_system'): resources = Resources(resources) if resources['cache_directory'] is not None: cache_directory = resources['cache_directory'] else: cache_directory = SimulationState().get_cache_directory() ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. resources['cache_directory'] = cache_directory self._fork_new_process('%s' % class_path, resources, delete_temp_dir=False, run_in_background=run_in_background) self._notify_stopped() def run_in_same_process(self, resources, **kwargs): resources = Resources(resources) if resources['cache_directory'] is not None: cache_directory = resources['cache_directory'] else: cache_directory = SimulationState().get_cache_directory() ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. resources['cache_directory'] = cache_directory self._notify_started() RunModelSystem(model_system=self, resources=resources, **kwargs) self._notify_stopped() def construct_arguments_from_config(self, config): key = "arguments" if (key not in config.keys()) or (len(config[key].keys()) <= 0): return "" arg_dict = config[key] result = "" for arg_key in arg_dict.keys(): result += "%s=%s, " % (arg_key, arg_dict[arg_key]) return result def wait_for_start(self): self.running_conditional.acquire() while not self.running: self.running_conditional.wait() self.running_conditional.release() def wait_for_finish(self): self.running_conditional.acquire() while self.running: self.running_conditional.wait() self.running_conditional.release() def wait_for_process_or_finish(self, process_index): self.running_conditional.acquire() while process_index >= len(self.forked_processes) and self.running: self.running_conditional.wait() self.running_conditional.release() if not self.running: process_index = len(self.forked_processes) - 1 return process_index def _fork_new_process(self, module_name, resources, run_in_background=False, **key_args): self.running_conditional.acquire() self.running = True self.forked_processes.append(ForkProcess()) key_args["run_in_background"] = run_in_background success = self.forked_processes[-1].fork_new_process( module_name, resources, **key_args) self.running_conditional.notifyAll() self.running_conditional.release() if not run_in_background: self.forked_processes[-1].wait() self.forked_processes[-1].cleanup() return success def _notify_started(self): self.running_conditional.acquire() self.running = True self.running_conditional.notifyAll() self.running_conditional.release() def _notify_stopped(self): self.running_conditional.acquire() self.running = False self.running_conditional.notifyAll() self.running_conditional.release() def update_config_for_multiple_runs(self, config): models_to_update = config.get('models_with_sampled_coefficients', []) if 'models_in_year' not in config.keys(): config['models_in_year'] = {} if config['models_in_year'].get(config['base_year'] + 1, None) is None: config['models_in_year'][config['base_year'] + 1] = config.get('models') for umodel in models_to_update: try: i = config['models_in_year'][config['base_year'] + 1].index(umodel) new_model_name = '%s_sampled_coef' % umodel config['models_in_year'][config['base_year'] + 1][i] = new_model_name except: pass config["models_configuration"][new_model_name] = Configuration( config["models_configuration"][umodel]) config["models_configuration"][new_model_name]["controller"][ "prepare_for_run"]["arguments"]["sample_coefficients"] = True config["models_configuration"][new_model_name]["controller"][ "prepare_for_run"]["arguments"]["distribution"] = "'normal'" config["models_configuration"][new_model_name]["controller"][ "prepare_for_run"]["arguments"][ "cache_storage"] = "base_cache_storage"
if __name__ == "__main__": parser = OptionParser() parser.add_option("-a", "--augment-variables", dest="augment_variables", action="store_true", help="model name") parser.add_option("-c", "--indicator-config", dest="indicator_config", action="store", type="string", help="model name") parser.add_option("-y", "--year", dest="year", action="store", type="int", help="end year") (options, args) = parser.parse_args() runs = Runs.runs comparison_variables = Runs.comparison_variables baseline = Runs.baseline simulation_state = SimulationState() simulation_state.set_current_time(options.year) SessionConfiguration(new_instance=True, package_order=['psrc','urbansim','opus_core'], in_storage=AttributeCache()) if options.augment_variables == True: for dataset_name in comparison_variables.keys(): cache_directory = baseline simulation_state.set_cache_directory(cache_directory) dataset = DatasetFactory().get_dataset(dataset_name, package='urbansim', arguments={'in_storage': AttributeCache()}) variables = comparison_variables[dataset_name] dataset.compute_variables(variables, resources=Resources())
else: cache_directory = _cache_directory try: year = int(options.year) except IndexError: parser.error("year must be provided.") parser.print_help() sys.exit(1) if package_order is None: package_order = eval(options.package_order) st = SimulationState() st.set_current_time(year) st.set_cache_directory(cache_directory) attribute_cache = AttributeCache() dp = SessionConfiguration(new_instance=True, package_order=package_order, in_storage=attribute_cache ).get_dataset_pool() ## example usage: # python -i explore_run_cache.py -p bay_area_parcel -r 105 2025 # >>> h2025 = dp.get_dataset('household') # >>> children_5yr = h2025.compute_variables('household.aggregate(person.age <= 5)') # python -d /workspace/opus/data/bay_area_parcel/base_year_data 2010 # >>> h2010 = dp.get_dataset('household')
def run(self, resources, year): cache_directory = config['cache_directory'] simulation_state = SimulationState() simulation_state.set_cache_directory(cache_directory) simulation_state.set_current_time(year) attribute_cache = AttributeCache() SessionConfiguration(new_instance=True, in_storage=AttributeCache()) arguments = {'in_storage':attribute_cache} gc_set = DatasetFactory().get_dataset('gridcell', package='urbansim', arguments=arguments) runs = { #r'X:\urbansim_cache\run_1713.2007_01_03_11_16':r'(run 1713 - baseline)', #r'X:\urbansim_cache\run_1714.2007_01_03_11_20':r'(run 1714 - no ugb)', #r'X:\urbansim_cache\run_1731.2007_01_03_11_16':r'(run 1731 - no build)', r'X:\urbansim_cache\run_1847.2007_01_15_15_23':r'(run 1847 - no UGB 1/17/2007)', r'X:\urbansim_cache\run_1848.2007_01_15_15_40':r'(run 1848 - no UGB+1.5xhighway 1/17/2007)', # r'X:\urbansim_cache\run_1849.2007_01_15_16_09':r'(run 1849 - baseline 1/17/2007)', r'V:\psrc\run_1850.2007_01_15_17_03':r'(run 1850 - baseline 1/17/2007)', r'V:\psrc\run_1851.2007_01_15_17_07':r'(run 1851 - no build 1/17/2007)' } #baseline = r'X:\urbansim_cache\run_1713.2007_01_03_11_16' baseline = r'V:\psrc\run_1850.2007_01_15_17_03' comparison_variables = {'gridcell': ['urbansim.gridcell.population', 'urbansim.gridcell.number_of_jobs'], 'faz':['urbansim.faz.population', 'urbansim.faz.number_of_jobs'], } #datasets_to_preload = { # 'gridcell':{ 'nchunks':2}, # 'household':{}, # 'job':{}, # 'zone':{}, # 'faz':{}, # 'development_type':{}, # 'development_event_history':{}, # 'development_constraint':{}, # 'job_building_type':{}, # 'urbansim_constant':{}, # } year = 2025 simulation_state = SimulationState() simulation_state.set_current_time(year) SessionConfiguration(new_instance=True, package_order=['psrc','urbansim','opus_core'], in_storage=AttributeCache()) #cache_storage = AttributeCache().get_flt_storage_for_year(year_for_base_year_cache) #datasets = DatasetFactory().create_datasets_from_flt(datasets_to_preload, # "urbansim", # additional_arguments={'in_storage': AttributeCache()}) variable_augment = False if variable_augment == True: for dataset_name in comparison_variables.keys(): cache_directory = baseline simulation_state.set_cache_directory(cache_directory) dataset = DatasetFactory().get_dataset(dataset_name, package='urbansim', arguments={'in_storage': AttributeCache()}) variables = comparison_variables[dataset_name] dataset.compute_variables(variables, resources=Resources()) ids = dataset.get_id_attribute() for run in runs.keys(): cache_directory=run simulation_state.set_cache_directory(cache_directory) run_dataset = DatasetFactory().get_dataset(dataset_name, package='urbansim', arguments={'in_storage': AttributeCache()}) match_index = run_dataset.get_id_index(ids) for variable in variables: short_name = VariableName(variable).alias() attribute = dataset.get_attribute(short_name) run_dataset.add_attribute(attribute[match_index],'baseline_'+short_name,metadata=1) run_dataset.flush_attribute('baseline_'+short_name) SessionConfiguration().get_dataset_pool().remove_all_datasets() #indicators_module = args[0] #eval("from %s import config" % indicators_module) from make_indicators_openev import config from urbansim.indicators.indicator_configuration_handler_batch_mode import generate_indicators #from make_indicators_openev import config #from inprocess.travis.urbansim.indicators.indicator_configuration_handler_batch_mode import generate_indicators for run, descriptin in runs.iteritems(): config.request_years = [year] config.cache_directory = run config.run_description = descriptin generate_indicators(config)
def run(self, resources, year): cache_directory = config['cache_directory'] simulation_state = SimulationState() simulation_state.set_cache_directory(cache_directory) simulation_state.set_current_time(year) attribute_cache = AttributeCache() SessionConfiguration(new_instance=True, in_storage=AttributeCache()) arguments = {'in_storage': attribute_cache} gc_set = DatasetFactory().get_dataset('gridcell', package='urbansim', arguments=arguments) runs = { #r'X:\urbansim_cache\run_1713.2007_01_03_11_16':r'(run 1713 - baseline)', #r'X:\urbansim_cache\run_1714.2007_01_03_11_20':r'(run 1714 - no ugb)', #r'X:\urbansim_cache\run_1731.2007_01_03_11_16':r'(run 1731 - no build)', r'X:\urbansim_cache\run_1847.2007_01_15_15_23': r'(run 1847 - no UGB 1/17/2007)', r'X:\urbansim_cache\run_1848.2007_01_15_15_40': r'(run 1848 - no UGB+1.5xhighway 1/17/2007)', # r'X:\urbansim_cache\run_1849.2007_01_15_16_09':r'(run 1849 - baseline 1/17/2007)', r'V:\psrc\run_1850.2007_01_15_17_03': r'(run 1850 - baseline 1/17/2007)', r'V:\psrc\run_1851.2007_01_15_17_07': r'(run 1851 - no build 1/17/2007)' } #baseline = r'X:\urbansim_cache\run_1713.2007_01_03_11_16' baseline = r'V:\psrc\run_1850.2007_01_15_17_03' comparison_variables = { 'gridcell': [ 'urbansim.gridcell.population', 'urbansim.gridcell.number_of_jobs' ], 'faz': ['urbansim.faz.population', 'urbansim.faz.number_of_jobs'], } #datasets_to_preload = { # 'gridcell':{ 'nchunks':2}, # 'household':{}, # 'job':{}, # 'zone':{}, # 'faz':{}, # 'development_type':{}, # 'development_event_history':{}, # 'development_constraint':{}, # 'job_building_type':{}, # 'urbansim_constant':{}, # } year = 2025 simulation_state = SimulationState() simulation_state.set_current_time(year) SessionConfiguration(new_instance=True, package_order=['psrc', 'urbansim', 'opus_core'], in_storage=AttributeCache()) #cache_storage = AttributeCache().get_flt_storage_for_year(year_for_base_year_cache) #datasets = DatasetFactory().create_datasets_from_flt(datasets_to_preload, # "urbansim", # additional_arguments={'in_storage': AttributeCache()}) variable_augment = False if variable_augment == True: for dataset_name in comparison_variables.keys(): cache_directory = baseline simulation_state.set_cache_directory(cache_directory) dataset = DatasetFactory().get_dataset( dataset_name, package='urbansim', arguments={'in_storage': AttributeCache()}) variables = comparison_variables[dataset_name] dataset.compute_variables(variables, resources=Resources()) ids = dataset.get_id_attribute() for run in runs.keys(): cache_directory = run simulation_state.set_cache_directory(cache_directory) run_dataset = DatasetFactory().get_dataset( dataset_name, package='urbansim', arguments={'in_storage': AttributeCache()}) match_index = run_dataset.get_id_index(ids) for variable in variables: short_name = VariableName(variable).alias() attribute = dataset.get_attribute(short_name) run_dataset.add_attribute(attribute[match_index], 'baseline_' + short_name, metadata=1) run_dataset.flush_attribute('baseline_' + short_name) SessionConfiguration().get_dataset_pool( ).remove_all_datasets() #indicators_module = args[0] #eval("from %s import config" % indicators_module) from make_indicators_openev import config from urbansim.indicators.indicator_configuration_handler_batch_mode import generate_indicators #from make_indicators_openev import config #from inprocess.travis.urbansim.indicators.indicator_configuration_handler_batch_mode import generate_indicators for run, descriptin in runs.iteritems(): config.request_years = [year] config.cache_directory = run config.run_description = descriptin generate_indicators(config)