def init_run(self, create_baseyear_cache=True): ''' init run, get run_id & cache_directory. ''' ##avoid invoking start_run from cmd line - option_group = StartRunOptionGroup() option_group.parser.set_defaults(xml_configuration=self.xml_config, scenario_name=self.scenario) #run_id, cache_directory = start_run(option_group) options, args = option_group.parse() self.run_manager = RunManager( option_group.get_services_database_configuration(options)) resources = XMLConfiguration(self.xml_config).get_run_configuration( self.scenario) insert_auto_generated_cache_directory_if_needed(resources) cache_directory = resources['cache_directory'] self.run_manager.setup_new_run(cache_directory, resources) run_id, cache_directory = self.run_manager.run_id, self.run_manager.get_current_cache_directory( ) self.run_manager.add_row_to_history(run_id, resources, "done") if create_baseyear_cache: self.run_manager.create_baseyear_cache(resources) ## good for testing #run_id = 275 #cache_directory = '/home/lmwang/opus/data/paris_zone/runs/run_275.2012_05_26_00_20' assert run_id is not None assert cache_directory is not None return run_id, cache_directory
def main(option_group=None, args=None): if option_group is None: option_group = RestartRunOptionGroup() parser = option_group.parser if args is None: options, args = option_group.parse() else: options, _args = option_group.parse() run_manager = RunManager(option_group.get_services_database_configuration(options)) run_as_multiprocess = not options.run_as_single_process if len(args) < 2: parser.print_help() else: run_id, year = (int(args[0]), int(args[1])) end_year = int(options.end_year) if options.end_year is not None else None run_manager.restart_run(run_id, year, options.project_name, end_year=end_year, skip_urbansim=options.skip_urbansim, create_baseyear_cache_if_not_exists=options.create_baseyear_cache_if_not_exists, skip_cache_cleanup=options.skip_cache_cleanup, run_as_multiprocess=run_as_multiprocess )
def test_run(self): # The paths work as follows: opus_matsim.__path__ is the path of the opus_matsim python module. So we can use that # as anchor ... config_location = os.path.join(opus_matsim.__path__[0], 'tests') print "location: ", config_location run_config = XMLConfiguration( os.path.join(config_location,"test_config.xml")).get_run_configuration("Test") run_config['creating_baseyear_cache_configuration'].cache_directory_root = self.temp_dir run_config['creating_baseyear_cache_configuration'].baseyear_cache.existing_cache_to_copy = \ os.path.join(opus_matsim.__path__[0], 'tests', 'testdata', 'base_year_data') # insert_auto_generated_cache_directory... does things I don't understand. Need to do the following to obtain consistent # behavior independent from the file root: run_config['cache_directory'] = None insert_auto_generated_cache_directory_if_needed(run_config) run_manager = RunManager(ServicesDatabaseConfiguration()) run_manager.setup_new_run(cache_directory = run_config['cache_directory'], configuration = run_config) run_manager.run_run(run_config, run_as_multiprocess = True ) self.assert_(True) self.cleanup_test_run()
def main(option_group=None, args=None): if option_group is None: option_group = RestartRunOptionGroup() parser = option_group.parser if args is None: options, args = option_group.parse() else: options, _args = option_group.parse() run_manager = RunManager( option_group.get_services_database_configuration(options)) run_as_multiprocess = not options.run_as_single_process if len(args) < 2: parser.print_help() else: run_id, year = (int(args[0]), int(args[1])) end_year = int( options.end_year) if options.end_year is not None else None run_manager.restart_run(run_id, year, options.project_name, end_year=end_year, skip_urbansim=options.skip_urbansim, create_baseyear_cache_if_not_exists=options. create_baseyear_cache_if_not_exists, skip_cache_cleanup=options.skip_cache_cleanup, run_as_multiprocess=run_as_multiprocess)
def test_run(self): print "Entering test run" run_manager = RunManager(ServicesDatabaseConfiguration()) run_manager.setup_new_run(cache_directory = self.config['cache_directory'],configuration = self.config) run_manager.run_run(self.config, run_as_multiprocess = True ) print "Leaving test run"
def prepare_run_manager(option_group=None): if option_group is None: option_group = StartRunOptionGroup() parser = option_group.parser options, args = option_group.parse() run_manager = RunManager( option_group.get_services_database_configuration(options)) if options.pickled_resource_file is not None: f = file(options.pickled_resource_file, 'r') try: config = pickle.load(f) finally: f.close() elif options.configuration_path is not None: opus_path = options.configuration_path try: config = get_config_from_opus_path(opus_path) except ImportError: # TODO: Once all fully-specified configurations are stored as classes, # get rid of this use. import_stmt = 'from %s import run_configuration as config' % opus_path exec(import_stmt) insert_auto_generated_cache_directory_if_needed(config) elif options.xml_configuration is not None: if options.scenario_name is None: parser.print_help() sys.exit(1) config = XMLConfiguration( options.xml_configuration).get_run_configuration( options.scenario_name) insert_auto_generated_cache_directory_if_needed(config) else: parser.print_help() sys.exit(1) if options.existing_cache_to_copy is not None: config[ 'creating_baseyear_cache_configuration'].cache_from_database = False config[ 'creating_baseyear_cache_configuration'].baseyear_cache = BaseyearCacheConfiguration( existing_cache_to_copy=options.existing_cache_to_copy, ) if options.years_to_cache is not None: config[ 'creating_baseyear_cache_configuration'].baseyear_cache.years_to_cache = eval( options.years_to_cache) if options.profile_filename is not None: config["profile_filename"] = options.profile_filename run_manager.setup_new_run(cache_directory=config['cache_directory'], configuration=config) return options, config, run_manager
def run(self): logger.start_block() insert_auto_generated_cache_directory_if_needed(self.config) run_manager = RunManager(ServicesDatabaseConfiguration()) run_manager.setup_new_run(cache_directory = self.config['cache_directory'],configuration = self.config) run_manager.run_run(self.config, run_as_multiprocess = True ) logger.end_block()
def on_buttonBox_accepted(self): path = str(self.lePath.text()) if not os.path.exists(path): msg = 'Cannot import, %s does not exist' % path logger.log_warning(msg) MessageBox.warning(mainwindow=self, text=msg, detailed_text='') else: cache_directory = path years = [] for dir in os.listdir(cache_directory): if len(dir) == 4 and dir.isdigit(): years.append(int(dir)) if years == []: msg = 'Cannot import, %s has no run data' % path logger.log_warning(msg) MessageBox.warning(mainwindow=self, text=msg, detailed_text='') else: start_year = min(years) end_year = max(years) project_name = os.environ['OPUSPROJECTNAME'] run_name = os.path.basename(path) server_config = ServicesDatabaseConfiguration() run_manager = RunManager(server_config) run_id = run_manager._get_new_run_id() resources = { 'cache_directory': cache_directory, 'description': '', 'years': (start_year, end_year), 'project_name': project_name } try: run_manager.add_row_to_history(run_id=run_id, resources=resources, status='done', run_name=run_name) update_available_runs(self.project) logger.log_status( 'Added run %s of project %s to run_activity table' % (run_name, project_name)) except: errorInfo = formatExceptionInfo() logger.log_error(errorInfo) MessageBox.error( mainwindow=self, text= 'Could not add run %s of project %s to run_activity table' % (run_name, project_name), detailed_text=errorInfo) self.close()
def test_simulation(self): services_db = ServicesDatabaseConfiguration( database_name = 'services', database_configuration = 'services_database_server' ) run_manager = RunManager(services_db) run_as_multiprocess = True for scenario_name in ['san_antonio_baseline_test']: config = self.xml_config.get_run_configuration(scenario_name) insert_auto_generated_cache_directory_if_needed(config) run_manager.setup_new_run(cache_directory = config['cache_directory'], configuration = config) run_manager.run_run(config, run_as_multiprocess = run_as_multiprocess)
def on_buttonBox_accepted(self): path = str(self.lePath.text()) if not os.path.exists(path): msg = 'Cannot import, %s does not exist' % path logger.log_warning(msg) MessageBox.warning(mainwindow = self, text = msg, detailed_text = '') else: cache_directory = path years = [] for dir in os.listdir(cache_directory): if len(dir) == 4 and dir.isdigit(): years.append(int(dir)) if years == []: msg = 'Cannot import, %s has no run data'%path logger.log_warning(msg) MessageBox.warning(mainwindow = self, text = msg, detailed_text = '') else: start_year = min(years) end_year = max(years) project_name = os.environ['OPUSPROJECTNAME'] run_name = os.path.basename(path) server_config = ServicesDatabaseConfiguration() run_manager = RunManager(server_config) run_id = run_manager._get_new_run_id() resources = { 'cache_directory': cache_directory, 'description': '', 'years': (start_year, end_year), 'project_name': project_name } try: run_manager.add_row_to_history(run_id = run_id, resources = resources, status = 'done', run_name = run_name) update_available_runs(self.project) logger.log_status('Added run %s of project %s to run_activity table'%(run_name, project_name)) except: errorInfo = formatExceptionInfo() logger.log_error(errorInfo) MessageBox.error(mainwindow = self, text = 'Could not add run %s of project %s to run_activity table'%(run_name, project_name), detailed_text = errorInfo) self.close()
def test_simulation(self): eugene_dir = __import__('eugene').__path__[0] xml_config = XMLConfiguration(os.path.join(eugene_dir, 'configs', 'eugene_gridcell.xml')) option_group = StartRunOptionGroup() parser = option_group.parser # simulate 0 command line arguments by passing in [] (options, _) = parser.parse_args([]) run_manager = RunManager(option_group.get_services_database_configuration(options)) run_section = xml_config.get_run_configuration('Eugene_baseline') insert_auto_generated_cache_directory_if_needed(run_section) run_manager.setup_new_run(cache_directory = run_section['cache_directory'], configuration = run_section) run_manager.run_run(run_section)
def prepare_run_manager(option_group=None): if option_group is None: option_group = StartRunOptionGroup() parser = option_group.parser options, args = option_group.parse() run_manager = RunManager(option_group.get_services_database_configuration(options)) if options.pickled_resource_file is not None: f = file(options.pickled_resource_file, 'r') try: config = pickle.load(f) finally: f.close() elif options.configuration_path is not None: opus_path = options.configuration_path try: config = get_config_from_opus_path(opus_path) except ImportError: # TODO: Once all fully-specified configurations are stored as classes, # get rid of this use. import_stmt = 'from %s import run_configuration as config' % opus_path exec(import_stmt) insert_auto_generated_cache_directory_if_needed(config) elif options.xml_configuration is not None: if options.scenario_name is None: parser.print_help() sys.exit(1) config = XMLConfiguration(options.xml_configuration).get_run_configuration(options.scenario_name) insert_auto_generated_cache_directory_if_needed(config) else: parser.print_help() sys.exit(1) if options.existing_cache_to_copy is not None: config['creating_baseyear_cache_configuration'].cache_from_database = False config['creating_baseyear_cache_configuration'].baseyear_cache = BaseyearCacheConfiguration( existing_cache_to_copy = options.existing_cache_to_copy, ) if options.years_to_cache is not None: config['creating_baseyear_cache_configuration'].baseyear_cache.years_to_cache = eval(options.years_to_cache) if options.profile_filename is not None: config["profile_filename"] = options.profile_filename run_manager.setup_new_run(cache_directory = config['cache_directory'], configuration = config) return options, config, run_manager
def test_simulation(self): # check that the simulation proceeds without crashing # open the configuration for seattle_parcel.xml seattle_parcel_dir = __import__('seattle_parcel').__path__[0] xml_config = XMLConfiguration(os.path.join(seattle_parcel_dir, 'configs', 'seattle_parcel.xml')) option_group = StartRunOptionGroup() parser = option_group.parser # simulate 0 command line arguments by passing in [] (options, _) = parser.parse_args([]) run_manager = RunManager(option_group.get_services_database_configuration(options)) run_section = xml_config.get_run_configuration('Seattle_baseline') insert_auto_generated_cache_directory_if_needed(run_section) run_manager.setup_new_run(cache_directory = run_section['cache_directory'], configuration = run_section) run_manager.run_run(run_section)
def init_run(self, create_baseyear_cache=True): """ init run, get run_id & cache_directory. """ ##avoid invoking start_run from cmd line - option_group = StartRunOptionGroup() option_group.parser.set_defaults(xml_configuration=self.xml_config, scenario_name=self.scenario) # run_id, cache_directory = start_run(option_group) options, args = option_group.parse() self.run_manager = RunManager(option_group.get_services_database_configuration(options)) resources = XMLConfiguration(self.xml_config).get_run_configuration(self.scenario) insert_auto_generated_cache_directory_if_needed(resources) cache_directory = resources["cache_directory"] self.run_manager.setup_new_run(cache_directory, resources) run_id, cache_directory = self.run_manager.run_id, self.run_manager.get_current_cache_directory() self.run_manager.add_row_to_history(run_id, resources, "done") if create_baseyear_cache: self.run_manager.create_baseyear_cache(resources) ## good for testing # run_id = 275 # cache_directory = '/home/lmwang/opus/data/paris_zone/runs/run_275.2012_05_26_00_20' assert run_id is not None assert cache_directory is not None return run_id, cache_directory
def get_run_manager(): ''' Get an instance of a valid run manager @return: a run manager instance (RunManager) ''' config = ServicesDatabaseConfiguration() run_manager = RunManager(config) return run_manager
def get_run_manager(self): """in case the connection to services timeout, reconnect """ try: self._run_manager.services_db.table_exists('run_activity') except: #connection has gone away, re-create run_manager self._run_manager = RunManager(self.services_db_config) return self._run_manager
def update_prediction(self, est_v, simulation_state, dataset_pool, calib_datasets, *args, **kwargs): option_group = RestartRunOptionGroup() option_group.parser.set_defaults( project_name=self.project_name, skip_cache_cleanup=self.skip_cache_cleanup) options, args = option_group.parse() if self.run_manager is None: self.run_manager = RunManager( option_group.get_services_database_configuration(options)) if lock != None: lock.acquire() ## query runs available for re-use runs_done = self.run_manager.get_run_info(run_ids=self.run_ids, status='done') create_baseyear_cache = False import pdb pdb.set_trace() if len(runs_done ) == 0: ##there is no re-usable run directory, init a new run run_id, cache_directory = self.init_run( create_baseyear_cache=False) self.run_ids.append(run_id) create_baseyear_cache = True logger.log_status('Initializing new run with id ' + str(run_id)) else: run_id = runs_done[0].run_id ##take the first 'done' run_id cache_directory = self.run_manager.get_cache_directory(run_id) logger.log_status('Using old run with id ' + str(run_id)) resources = self.run_manager.get_resources_for_run_id_from_history( run_id, filter_by_status=False) self.run_manager.add_row_to_history(run_id, resources, "taken") if lock != None: lock.release() if create_baseyear_cache: self.run_manager.create_baseyear_cache(resources) self.update_parameters(est_v, cache_directory, simulation_state, dataset_pool, calib_datasets, *args, **kwargs) restart_run(option_group=option_group, args=[run_id, self.start_year]) prediction = self.summarize_prediction(cache_directory, simulation_state, dataset_pool, calib_datasets) return prediction
def test_restart_simple_run(self): _do_run_simple_test_run(self, self.temp_dir, self.config, end_year=1983) runs_manager = RunManager(self.config) config = _get_run_config(temp_dir=self.temp_dir) runs_manager.update_environment_variables(run_resources=config) run_activity = runs_manager.services_db.get_table("run_activity") s = select([func.max(run_activity.c.run_id)]) run_id = runs_manager.services_db.execute(s).fetchone()[0] s = select([run_activity.c.status], whereclause=run_activity.c.run_id == run_id) status = runs_manager.services_db.execute(s).fetchone()[0] expected = "done" self.assertEqual(status, expected) runs_manager.restart_run(run_id, restart_year=1981, project_name="eugene_gridcell", skip_urbansim=False) s = select([run_activity.c.status], whereclause=run_activity.c.run_id == run_id) status = runs_manager.services_db.execute(s).fetchone()[0] expected = "done" self.assertEqual(status, expected) # Restaring without running urbansim should not re-run that year. # TODO: test that no models are run this time. runs_manager.restart_run(run_id, restart_year=1982, project_name="eugene_gridcell", skip_urbansim=True) s = select([run_activity.c.status], whereclause=run_activity.c.run_id == run_id) status = runs_manager.services_db.execute(s).fetchone()[0] expected = "done" self.assertEqual(status, expected) self.cleanup_test_run()
def main(option_group=None, args=None): if option_group is None: option_group = restart_run.RestartRunOptionGroup() parser = option_group.parser if args is None: options, args = option_group.parse() else: options, _args = option_group.parse() run_manager = RunManager(option_group.get_services_database_configuration(options)) run_as_multiprocess = not options.run_as_single_process if len(args) < 1: parser.print_help() sys.exit(1) run_id = int(args[0]) year = None if len(args) > 1: year = int(args[1]) # Get configuration from DB and ensure the run directory is mounted. Note # that we pass a dummy value for restart_year because we might not know it # yet. But when we actually restart the run, we will pass the correct # year. run_resources = run_manager.create_run_resources_from_history(run_id=run_id, restart_year=2010) hudson_common.mount_cache_dir(run_resources) cache_dir = run_resources['cache_directory'] if not year: # guess the year based on how the cache dir is populated years = map(lambda y : int(os.path.basename(y)), glob.glob(os.path.join(cache_dir, "2*"))) year = max(years) end_year = int(options.end_year) if options.end_year is not None else None run_manager.restart_run(run_id, year, options.project_name, end_year=end_year, skip_urbansim=options.skip_urbansim, create_baseyear_cache_if_not_exists=options.create_baseyear_cache_if_not_exists, skip_cache_cleanup=options.skip_cache_cleanup, run_as_multiprocess=run_as_multiprocess )
def _do_run_simple_test_run(caller, temp_dir, config, end_year=None): """Runs model system with a single model (for speed). Sets the .resources property of the caller before starting the run. """ runs_manager = RunManager(config) run_configuration = SubsetConfiguration() run_configuration['creating_baseyear_cache_configuration'].cache_directory_root = temp_dir run_configuration['models'] = ['land_price_model'] if end_year is not None: run_configuration['years'] = (run_configuration['years'][0], end_year) SessionConfiguration(new_instance=True, package_order=run_configuration['dataset_pool_configuration'].package_order, in_storage=AttributeCache()) insert_auto_generated_cache_directory_if_needed(run_configuration) caller.resources = run_configuration runs_manager.setup_new_run(cache_directory = run_configuration['cache_directory'], configuration = run_configuration) runs_manager.run_run(run_configuration)
def testName(self): print "entering test_run" logger.log_status('Preparing MATsim test run ...') # unzip MATSim files matsim_zip = ExtractZipFile(self.matsim_source, self.destination) matsim_zip.extract() matsim_extracted_files = os.path.join(self.destination, 'MATSimTestClasses') # location of unziped MATSim files # unzip base_year_cache base_year_data_zip = ExtractZipFile(self.base_year_data_source, self.destination) base_year_data_zip.extract() base_year_data_extracted_files = os.path.join(self.destination, 'base_year_data') # location of unziped base_year_cache # updating location of base_year_data self.run_config['creating_baseyear_cache_configuration'].cache_directory_root = self.destination self.run_config['creating_baseyear_cache_configuration'].baseyear_cache.existing_cache_to_copy = base_year_data_extracted_files self.run_config['cache_directory'] = base_year_data_extracted_files self.run_config.add('matsim_files', matsim_extracted_files) self.run_config.add('matsim_config', self.matsim_config_full) self.run_config.add('root', self.destination) insert_auto_generated_cache_directory_if_needed(self.run_config) run_manager = RunManager(ServicesDatabaseConfiguration()) run_manager.setup_new_run(cache_directory = self.run_config['cache_directory'], configuration = self.run_config) logger.log_status('Strating UrbanSim run ... ') run_manager.run_run(self.run_config, run_as_multiprocess = True ) # after the UrbanSim run the travel data sets schould be equal # self.assertTrue( self.compare_travel_data_sets() ) logger.log_status('... UrbanSim run finished.') print "leaving test_run"
def _do_run_simple_test_run(caller, temp_dir, config, end_year=None): """Runs model system with a single model (for speed). Sets the .resources property of the caller before starting the run. """ runs_manager = RunManager(config) run_configuration = _get_run_config(temp_dir=temp_dir) insert_auto_generated_cache_directory_if_needed(run_configuration) run_configuration[ 'creating_baseyear_cache_configuration'].cache_directory_root = temp_dir run_configuration['models'] = ['land_price_model'] if end_year is not None: run_configuration['years'] = (run_configuration['years'][0], end_year) SessionConfiguration( new_instance=True, package_order=run_configuration['dataset_pool_configuration']. package_order, in_storage=AttributeCache()) insert_auto_generated_cache_directory_if_needed(run_configuration) caller.resources = run_configuration runs_manager.setup_new_run( cache_directory=run_configuration['cache_directory'], configuration=run_configuration) runs_manager.run_run(run_configuration)
def test_run(self): # The paths work as follows: opus_matsim.__path__ is the path of the opus_matsim python module. So we can use that # as anchor ... config_location = os.path.join(opus_matsim.__path__[0], 'tests') print "location: ", config_location run_config = XMLConfiguration( os.path.join(config_location, "test_config.xml")).get_run_configuration("Test") run_config[ 'creating_baseyear_cache_configuration'].cache_directory_root = self.temp_dir run_config['creating_baseyear_cache_configuration'].baseyear_cache.existing_cache_to_copy = \ os.path.join(opus_matsim.__path__[0], 'tests', 'testdata', 'base_year_data') # insert_auto_generated_cache_directory... does things I don't understand. Need to do the following to obtain consistent # behavior independent from the file root: run_config['cache_directory'] = None insert_auto_generated_cache_directory_if_needed(run_config) run_manager = RunManager(ServicesDatabaseConfiguration()) run_manager.setup_new_run( cache_directory=run_config['cache_directory'], configuration=run_config) run_manager.run_run(run_config, run_as_multiprocess=True) self.assert_(True) self.cleanup_test_run()
def testName(self): print "entering test_run" logger.log_status("Preparing MATsim test run ...") # unzip MATSim files matsim_zip = ExtractZipFile(self.matsim_source, self.destination) matsim_zip.extract() matsim_extracted_files = os.path.join(self.destination, "MATSimTestClasses") # location of unziped MATSim files # unzip base_year_cache base_year_data_zip = ExtractZipFile(self.base_year_data_source, self.destination) base_year_data_zip.extract() base_year_data_extracted_files = os.path.join( self.destination, "base_year_data" ) # location of unziped base_year_cache # updating location of base_year_data self.run_config["creating_baseyear_cache_configuration"].cache_directory_root = self.destination self.run_config[ "creating_baseyear_cache_configuration" ].baseyear_cache.existing_cache_to_copy = base_year_data_extracted_files self.run_config["cache_directory"] = base_year_data_extracted_files self.run_config.add("matsim_files", matsim_extracted_files) self.run_config.add("matsim_config", self.matsim_config_full) self.run_config.add("root", self.destination) insert_auto_generated_cache_directory_if_needed(self.run_config) run_manager = RunManager(ServicesDatabaseConfiguration()) run_manager.setup_new_run(cache_directory=self.run_config["cache_directory"], configuration=self.run_config) logger.log_status("Strating UrbanSim run ... ") run_manager.run_run(self.run_config, run_as_multiprocess=True) # after the UrbanSim run the travel data sets schould be equal # self.assertTrue( self.compare_travel_data_sets() ) logger.log_status("... UrbanSim run finished.") print "leaving test_run"
def test_restart_simple_run(self): _do_run_simple_test_run(self, self.temp_dir, self.config, end_year=1983) runs_manager = RunManager(self.config) config = _get_run_config(temp_dir=self.temp_dir) runs_manager.update_environment_variables(run_resources=config) run_activity = runs_manager.services_db.get_table('run_activity') s = select([func.max(run_activity.c.run_id)]) run_id = runs_manager.services_db.execute(s).fetchone()[0] s = select([run_activity.c.status], whereclause=run_activity.c.run_id == run_id) status = runs_manager.services_db.execute(s).fetchone()[0] expected = 'done' self.assertEqual(status, expected) runs_manager.restart_run(run_id, restart_year=1981, project_name='eugene_gridcell', skip_urbansim=False) s = select([run_activity.c.status], whereclause=run_activity.c.run_id == run_id) status = runs_manager.services_db.execute(s).fetchone()[0] expected = 'done' self.assertEqual(status, expected) # Restaring without running urbansim should not re-run that year. # TODO: test that no models are run this time. runs_manager.restart_run(run_id, restart_year=1982, project_name='eugene_gridcell', skip_urbansim=True) s = select([run_activity.c.status], whereclause=run_activity.c.run_id == run_id) status = runs_manager.services_db.execute(s).fetchone()[0] expected = 'done' self.assertEqual(status, expected) self.cleanup_test_run()
class StartRunOptionGroup(object): """ Helper class to start model from an xml config file. """ logger.start_block("Starting UrbanSim") # get program arguments from the command line program_arguments = sys.argv[1:] # default parameters are: # --config=opus_matsim/sustain_city/configs/seattle_parcel_prescheduled_events.xml # --executable=Seattle_baseline parser = optparse.OptionParser() parser.add_option("-c", "--config", dest="config_file_name", action="store", type="string", help="Name of file containing urbansim config") parser.add_option("-e", "--executable", dest="scenario_executable", action="store", type="string", help="Model to execute") (options, args) = parser.parse_args() if options.config_file_name == None: logger.log_error("Missing path to the urbansim config file") if options.scenario_executable == None: logger.log_error("Missing name of executable scenario") config = XMLConfiguration(options.config_file_name).get_run_configuration( options.scenario_executable) insert_auto_generated_cache_directory_if_needed(config) run_manager = RunManager(ServicesDatabaseConfiguration()) run_manager.setup_new_run(cache_directory=config['cache_directory'], configuration=config) #try: #tnicolai # import pydevd # pydevd.settrace() #except: pass run_manager.run_run(config, run_as_multiprocess=True)
def test_simulation(self): services_db = ServicesDatabaseConfiguration( database_name = 'services', database_configuration = 'services_database_server' ) run_manager = RunManager(services_db) run_as_multiprocess = True for scenario_name in ['psrc_baseline_test']: config = self.xml_config.get_run_configuration(scenario_name) insert_auto_generated_cache_directory_if_needed(config) run_manager.setup_new_run(cache_directory = config['cache_directory'], configuration = config) run_manager.run_run(config, run_as_multiprocess = run_as_multiprocess)
def run(self, config, executable): #--config=opus_matsim/sustain_city/configs/seattle_parcel.xml --executable=Seattle_baseline config = XMLConfiguration(config).get_run_configuration(executable) insert_auto_generated_cache_directory_if_needed(config) run_manager = RunManager(ServicesDatabaseConfiguration()) run_manager.setup_new_run(cache_directory = config['cache_directory'],configuration = config) run_manager.run_run(config, run_as_multiprocess = True )
class StartRunOptionGroup(object): """ Helper class to start model from an xml config file. """ config = XMLConfiguration("opus_matsim/configs/seattle_parcel.xml" ).get_run_configuration("Seattle_baseline") # config = XMLConfiguration("opus_matsim/configs/psrc_parcel.xml").get_run_configuration("PSRC_baseline") insert_auto_generated_cache_directory_if_needed(config) run_manager = RunManager(ServicesDatabaseConfiguration()) run_manager.setup_new_run(cache_directory=config['cache_directory'], configuration=config) # run_manager.create_baseyear_cache(config) run_manager.run_run(config, run_as_multiprocess=True)
def test_simulation(self): eugene_dir = __import__('eugene').__path__[0] xml_config = XMLConfiguration( os.path.join(eugene_dir, 'configs', 'eugene_gridcell.xml')) option_group = StartRunOptionGroup() parser = option_group.parser # simulate 0 command line arguments by passing in [] (options, _) = parser.parse_args([]) run_manager = RunManager( option_group.get_services_database_configuration(options)) run_section = xml_config.get_run_configuration('Eugene_baseline') insert_auto_generated_cache_directory_if_needed(run_section) run_manager.setup_new_run( cache_directory=run_section['cache_directory'], configuration=run_section) run_manager.run_run(run_section)
def test_restart_simple_run(self): _do_run_simple_test_run(self, self.temp_dir, self.config) runs_manager = RunManager(self.config) runs_manager.update_environment_variables(run_resources = SubsetConfiguration()) run_activity = runs_manager.services_db.get_table('run_activity') s = select([func.max(run_activity.c.run_id)]) run_id = runs_manager.services_db.execute(s).fetchone()[0] s = select([run_activity.c.status], whereclause = run_activity.c.run_id == run_id) status = runs_manager.services_db.execute(s).fetchone()[0] expected = 'done' self.assertEqual(status, expected) runs_manager.restart_run(run_id, restart_year=2001, project_name = SubsetConfiguration()['project_name'], skip_urbansim=False) s = select([run_activity.c.status], whereclause = run_activity.c.run_id == run_id) status = runs_manager.services_db.execute(s).fetchone()[0] expected = 'done' self.assertEqual(status, expected) # Restaring without running urbansim should not re-run that year. # TODO: test that no models are run this time. runs_manager.restart_run(run_id, restart_year=2002, project_name = SubsetConfiguration()['project_name'], skip_urbansim=True) s = select([run_activity.c.status], whereclause = run_activity.c.run_id == run_id) status = runs_manager.services_db.execute(s).fetchone()[0] expected = 'done' self.assertEqual(status, expected) self.cleanup_test_run()
def update_prediction(self, est_v, simulation_state, dataset_pool, calib_datasets, *args, **kwargs): option_group = RestartRunOptionGroup() option_group.parser.set_defaults(project_name=self.project_name, skip_cache_cleanup=self.skip_cache_cleanup) options, args = option_group.parse() if self.run_manager is None: self.run_manager = RunManager(option_group.get_services_database_configuration(options)) if lock != None: lock.acquire() ## query runs available for re-use runs_done = self.run_manager.get_run_info(run_ids=self.run_ids, status="done") create_baseyear_cache = False import pdb pdb.set_trace() if len(runs_done) == 0: ##there is no re-usable run directory, init a new run run_id, cache_directory = self.init_run(create_baseyear_cache=False) self.run_ids.append(run_id) create_baseyear_cache = True logger.log_status("Initializing new run with id " + str(run_id)) else: run_id = runs_done[0].run_id ##take the first 'done' run_id cache_directory = self.run_manager.get_cache_directory(run_id) logger.log_status("Using old run with id " + str(run_id)) resources = self.run_manager.get_resources_for_run_id_from_history(run_id, filter_by_status=False) self.run_manager.add_row_to_history(run_id, resources, "taken") if lock != None: lock.release() if create_baseyear_cache: self.run_manager.create_baseyear_cache(resources) self.update_parameters(est_v, cache_directory, simulation_state, dataset_pool, calib_datasets, *args, **kwargs) restart_run(option_group=option_group, args=[run_id, self.start_year]) prediction = self.summarize_prediction(cache_directory, simulation_state, dataset_pool, calib_datasets) return prediction
def test_simulation(self): base_year_data_path = os.path.join(self.data_path, 'base_year_data') if not os.path.exists(base_year_data_path): os.makedirs(base_year_data_path) ftp_url = os.environ["FTP_URL"] file_name = os.path.split(ftp_url)[1] ftp_user = os.environ["FTP_USERNAME"] ftp_password = os.environ["FTP_PASSWORD"] #stdout, stderr = Popen("ls -la %s" % base_year_data_path, shell=True).communicate() #stdout, stderr = Popen("echo '%s'" % (base_year_data_path), stdout=PIPE).communicate() #print stdout try: Popen( """ cd %s; pwd; ls -la; echo wget --timestamping %s --ftp-user=%s --ftp-password=%s > /dev/null 2>&1; rm -rf 2008; unzip -o %s """ % (base_year_data_path, ftp_url, ftp_user, ftp_password, file_name), shell = True ).communicate() except: print "Error when downloading and unzipping file from %s." % ftp_url raise services_db = ServicesDatabaseConfiguration( database_name = 'services', database_configuration = 'services_database_server' ) run_manager = RunManager(services_db) run_as_multiprocess = True xml_config = XMLConfiguration(os.path.join(self.opus_home, 'project_configs', 'washtenaw_parcel.xml')) for scenario_name in ['washtenaw_baseline_test']: config = xml_config.get_run_configuration(scenario_name) insert_auto_generated_cache_directory_if_needed(config) # base_year = config['base_year'] # config['years_to_run'] = (base_year+1, base_year+2) run_manager.setup_new_run(cache_directory = config['cache_directory'], configuration = config) run_manager.run_run(config, run_as_multiprocess = run_as_multiprocess)
except: #connection has gone away, re-create run_manager self._run_manager = RunManager(self.services_db_config) return self._run_manager if __name__ == "__main__": try: import wingdbstub except: pass option_group = OptionGroup() parser = option_group.parser (options, args) = parser.parse_args() services_db = option_group.get_services_database_configuration(options) run_manager = RunManager(services_db) if not run_manager.services_db: raise RuntimeError, "services database must exist; use --hostname argument to specify the database server containing services database." urbansim_server = options.urbansim_server or os.environ.get( 'URBANSIMHOSTNAME', 'localhost') urbansim_user = options.runserver_username or os.environ.get( 'URBANSIMUSERNAME', None) urbansim_password = os.environ.get('URBANSIMPASSWORD', None) travelmodel_server = options.travelmodel_server or os.environ.get( 'TRAVELMODELHOSTNAME', 'localhost') travelmodel_user = options.runserver_username or os.environ.get( 'TRAVELMODELUSERNAME', None) travelmodel_password = os.environ.get('TRAVELMODELPASSWORD', None) if not (options.configuration_path or options.run_id):
def on_pbnStartModel_released(self): duplicate = False self.diagnostic_go_button.setEnabled(True) if self.running and not self.paused: # Take care of pausing a run success = self.runThread.pause() if success: self.paused = True self.timer.stop() self.pbnStartModel.setText(QString("Resume simulation run...")) elif self.running and self.paused: # Need to resume a paused run success = self.runThread.resume() if success: self.paused = False self.timer.start(1000) self.pbnStartModel.setText(QString("Pause simulation run...")) elif not self.running: run_name = str(self.leRunName.text()) if run_name == '': run_name = None else: run_id = None run_nodes = get_available_run_nodes(self.project) for run_node in run_nodes: existing_run_name = run_node.tag if run_name == existing_run_name: duplicate = True r = run_node.get('run_id') if r is not None: run_id = int(r) break if duplicate: dlg_dup = OverwriteRunDialog(self) if dlg_dup.exec_() == QDialog.Rejected: return delete_simulation_run(self.project, run_node.tag) # todo change to run_node.get('name') # Update the XML self.project.update_xml_config() self.updateConfigAndGuiForRun() # Fire up a new thread and run the model self.pbnStartModel.setText(QString("Pause simulation run...")) # References to the GUI elements for status for this run... self.progressBarTotal = self.runProgressBarTotal self.progressBarYear = self.runProgressBarYear self.progressBarModel = self.runProgressBarModel #self.pbnRemoveModel.setEnabled(False) #self.pbnStartModel.setEnabled(False) # Initializing values self.progressBarTotal.setValue(0) self.progressBarYear.setValue(0) self.progressBarModel.setValue(0) self.progressBarTotal.setRange(0,0) self.progressBarYear.setRange(0,0) self.progressBarModel.setRange(0,0) batch_name = str(self.cboOptionalIndicatorBatch.currentText()) if batch_name == '(None)': batch_name = None self.runThread = RunModelThread(get_mainwindow_instance(), self, batch_name, run_name) if duplicate and run_id is not None: from opus_core.services.run_server.run_manager import RunManager as ServicesRunManager run_manager = ServicesRunManager(ServicesDatabaseConfiguration()) run_manager.delete_everything_for_this_run(run_id = run_id) run_manager.close() # Use this signal from the thread if it is capable of producing its own status signal QObject.connect(self.runThread, SIGNAL("runFinished(PyQt_PyObject)"), self.runFinishedFromThread) QObject.connect(self.runThread, SIGNAL("runError(PyQt_PyObject)"), self.runErrorFromThread) # Use this timer to call a function in the thread to check status if the thread is unable # to produce its own signal above self.timer = QTimer() QObject.connect(self.timer, SIGNAL("timeout()"), self.runStatusFromThread) self.timer.start(1000) self.running = True self.paused = False self.runThread.start() else: print "Unexpected state in the model run..."
class Calibration(object): """ Class to calibrate UrbanSim model coefficients. """ def __init__( self, xml_config, scenario, calib_datasets, target_expression, target_file, subset=None, subset_patterns=None, skip_cache_cleanup=False, log_directory=None, ): """ - xml_config: xml configuration file, for ex '/home/atschirhar/opus/project_configs/paris_zone.xml' - scenario: name of scenario to run for calibration, where models_to_run and simulation years are specified - calib_datasets: dictionary specifying dataset names and attributes to be calibrated, e.g. {'establishment_location_choice_model_coefficients': 'estimate'} - target_expression: opus expression computing values from prediction to be compared with targets - target_file: name of csv file providing targets - subset: dictionary specifying the dataset to be calibrated, {'etablishment_location_choice_model_coefficients': ['coefficient_name', ['paris_celcm, 'biotech_celcm']]} subset and subset_patterns can not be both specified for the same dataset - subset_patterns: dictionary specifying the dataset to be calibrated through a regular expression (re) pattern {'etablishment_location_choice_model_coefficients': ['coefficient_name', '*_celcm']} subset and subset_patterns can not be both specified for the same dataset """ self.target_expression = target_expression self.target = self.read_target(target_file) self.run_manager = None self.xml_config = xml_config self.scenario = scenario self.skip_cache_cleanup = skip_cache_cleanup self.run_id, self.cache_directory = self.init_run() self.run_ids = [self.run_id] # allow starting of multiple runs for parallel optimization self.log_directory = log_directory if self.log_directory is None: self.log_directory = self.cache_directory # legacy log_file = os.path.join(self.log_directory, "calibration.log") logger.enable_file_logging(log_file) dict_config = XMLConfiguration(self.xml_config).get_run_configuration(self.scenario) ## get parameters from config self.base_year = dict_config["base_year"] self.start_year, self.end_year = dict_config["years"] self.project_name = dict_config["project_name"] self.package_order = dict_config["dataset_pool_configuration"].package_order @log_block("Start Calibration") def run(self, optimizer="lbfgsb", results_pickle_prefix="calib", optimizer_kwargs={}): """ Call specifized optimizer to calibrate Arguments: - optimizer: optimization method chosen (fmin_bfgs, simulated anneal etc.) - results_pickle_prefix: prefix of the pickle file name that will be saved after the simulation; if None, results is not saved Returns: - the results from the opimizater - a pickle dump of the results in the cache_directory, if results_pickle_prefix is specified """ simulation_state = SimulationState() simulation_state.set_current_time(self.base_year) simulation_state.set_cache_directory(self.cache_directory) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration( new_instance=True, package_order=self.package_order, in_storage=attribute_cache ).get_dataset_pool() calib_datasets = {} for dataset_name, calib_attr in calib_datasets.iteritems(): dataset = dataset_pool.get_dataset(dataset_name, dataset_arguments={"id_name": []}) assert ( subset is None or subset.get(dataset_name, None) is None or subset_patterns is None or subset_patterns.get(dataset_name, None) is None ) if subset is not None and subset.get(dataset_name, None) is not None: subset_attr, subset_cond = subset.get(dataset_name) index = np.in1d(dataset[subset_attr], subset_cond) elif subset_patterns is not None and subset_patterns.get(dataset_name, None) is not None: subset_attr, subset_pattern = subset_patterns.get(dataset_name) index = array([True if re.search(subset_pattern, attr_v) else False for attr_v in dataset[subset_attr]]) else: index = arange(dataset.size(), dtype="i") calib_datasets[dataset_name] = [dataset, calib_attr, index] init_v = array([], dtype="f8") for dataset_name, calib in calib_datasets.iteritems(): dataset, calib_attr, index = calib if type(calib_attr) == str: init_v = np.concatenate((init_v, dataset[calib_attr][index])) elif type(calib_attr) in (list, tuple): for attr in calib_attr: init_v = np.concatenate((init_v, dataset[attr][index])) else: raise TypeError, "Unrecongized data type in calib_datasets" t0 = time.time() if is_parallelizable == True: set_parallel(True) print OKBLUE + "\noptimizer = {} (is_parallel = {})".format(optimizer, is_parallelizable) + ENDC print OKBLUE + "-------------------------------------------------------\n" + ENDC if optimizer == "bfgs": default_kwargs = { "fprime": None, "epsilon": 1e-08, "maxiter": None, "full_output": 1, "disp": 1, "retall": 0, "callback": None, } optimizer_func = fmin_bfgs elif optimizer == "lbfgsb": default_kwargs = {"fprime": None, "approx_grad": True, "bounds": None, "factr": 1e12, "iprint": 1} optimizer_func = fmin_l_bfgs_b elif optimizer == "anneal": default_kwargs = { "schedule": "fast", "full_output": 1, "T0": None, "Tf": 1e-12, "maxeval": None, "maxaccept": None, "maxiter": 400, "boltzmann": 1.0, "learn_rate": 0.5, "feps": 1e-06, "quench": 1.0, "m": 1.0, "n": 1.0, "lower": -1, "upper": 1, "dwell": 50, "disp": True, } optimizer_func = anneal elif optimizer == "panneal": default_kwargs = { "schedule": "fast", "full_output": 1, "T0": None, "Tf": 1e-12, "maxeval": None, "maxaccept": None, "maxiter": 400, "boltzmann": 1.0, "learn_rate": 0.5, "feps": 1e-06, "quench": 1.0, "m": 1.0, "n": 1.0, "lower": -1, "upper": 1, "dwell": 50, "disp": True, "cores": 24, "interv": 20, } optimizer_func = panneal else: raise ValueError, "Unrecognized optimizer {}".format(optimizer) default_kwargs.update(optimizer_kwargs) results = optimizer_func(self.target_func, copy(init_v), **default_kwargs) duration = time.time() - t0 if results_pickle_prefix is not None: pickle_file = "{}_{}.pickle".format(results_pickle_prefix, optimizer) pickle_file = os.path.join(self.log_directory, pickle_file) pickle.dump(results, open(pickle_file, "wb")) if is_parallelizable == True: set_parallel(False) logger.log_status("init target_func: {}".format(self.target_func(init_v))) logger.log_status("end target_func: {}".format(results[:])) # which one? logger.log_status("outputs from optimizer: {}".format(results)) logger.log_status("Execution time: {}".format(duration)) def init_run(self, create_baseyear_cache=True): """ init run, get run_id & cache_directory. """ ##avoid invoking start_run from cmd line - option_group = StartRunOptionGroup() option_group.parser.set_defaults(xml_configuration=self.xml_config, scenario_name=self.scenario) # run_id, cache_directory = start_run(option_group) options, args = option_group.parse() self.run_manager = RunManager(option_group.get_services_database_configuration(options)) resources = XMLConfiguration(self.xml_config).get_run_configuration(self.scenario) insert_auto_generated_cache_directory_if_needed(resources) cache_directory = resources["cache_directory"] self.run_manager.setup_new_run(cache_directory, resources) run_id, cache_directory = self.run_manager.run_id, self.run_manager.get_current_cache_directory() self.run_manager.add_row_to_history(run_id, resources, "done") if create_baseyear_cache: self.run_manager.create_baseyear_cache(resources) ## good for testing # run_id = 275 # cache_directory = '/home/lmwang/opus/data/paris_zone/runs/run_275.2012_05_26_00_20' assert run_id is not None assert cache_directory is not None return run_id, cache_directory def update_parameters( self, est_v, cache_directory, simulation_state, dataset_pool, calib_datasets, *args, **kwargs ): i_est_v = 0 current_year = simulation_state.get_current_time() simulation_state.set_current_time(self.base_year) simulation_state.set_cache_directory(cache_directory) for dataset_name, calib in calib_datasets.iteritems(): dataset, calib_attr, index = calib if type(calib_attr) == str: dtype = dataset[calib_attr].dtype dataset[calib_attr][index] = (est_v[i_est_v : i_est_v + index.size]).astype(dtype) i_est_v += index.size elif type(calib_attr) in (list, tuple): for attr in calib_attr: dtype = dataset[attr].dtype dataset[attr][index] = (est_v[i_est_v : i_est_v + index.size]).astype(dtype) i_est_v += index.size else: raise TypeError, "Unrecongized data type in calib_datasets" # dtype = dataset[calib_attr].dtype # dataset[calib_attr][index] = (est_v[i_est_v:i_est_v+index.size]).astype(dtype) # flush dataset dataset.flush_dataset() # i_est_v += index.size simulation_state.set_current_time(current_year) def update_prediction(self, est_v, simulation_state, dataset_pool, calib_datasets, *args, **kwargs): option_group = RestartRunOptionGroup() option_group.parser.set_defaults(project_name=self.project_name, skip_cache_cleanup=self.skip_cache_cleanup) options, args = option_group.parse() if self.run_manager is None: self.run_manager = RunManager(option_group.get_services_database_configuration(options)) if lock != None: lock.acquire() ## query runs available for re-use runs_done = self.run_manager.get_run_info(run_ids=self.run_ids, status="done") create_baseyear_cache = False import pdb pdb.set_trace() if len(runs_done) == 0: ##there is no re-usable run directory, init a new run run_id, cache_directory = self.init_run(create_baseyear_cache=False) self.run_ids.append(run_id) create_baseyear_cache = True logger.log_status("Initializing new run with id " + str(run_id)) else: run_id = runs_done[0].run_id ##take the first 'done' run_id cache_directory = self.run_manager.get_cache_directory(run_id) logger.log_status("Using old run with id " + str(run_id)) resources = self.run_manager.get_resources_for_run_id_from_history(run_id, filter_by_status=False) self.run_manager.add_row_to_history(run_id, resources, "taken") if lock != None: lock.release() if create_baseyear_cache: self.run_manager.create_baseyear_cache(resources) self.update_parameters(est_v, cache_directory, simulation_state, dataset_pool, calib_datasets, *args, **kwargs) restart_run(option_group=option_group, args=[run_id, self.start_year]) prediction = self.summarize_prediction(cache_directory, simulation_state, dataset_pool, calib_datasets) return prediction def summarize_prediction(self, cache_directory, simulation_state, dataset_pool, calib_datasets): dataset_name = VariableName(self.target_expression).get_dataset_name() current_year = simulation_state.get_current_time() simulation_state.set_current_time(self.end_year) simulation_state.set_cache_directory(cache_directory) # force reload dataset_pool.remove_all_datasets() dataset = dataset_pool[dataset_name] ids = dataset.get_id_attribute() results = dataset.compute_variables(self.target_expression, dataset_pool=dataset_pool) simulation_state.set_current_time(current_year) return dict(zip(ids, results)) def read_target(self, target_file): ## read (& process) target numbers into a dictionary: {id:value} ## csv file with header ## id, target header = file(target_file, "r").readline().strip().split(",") contents = np.genfromtxt(target_file, delimiter=",", comments="#", skip_header=1) target = dict(zip(contents[:, 0], contents[:, 1])) return target def target_func(self, est_v, func=lambda x, y: np.sum(np.abs(x - y)), **kwargs): """ Target function.""" simulation_state = SimulationState() simulation_state.set_current_time(self.base_year) simulation_state.set_cache_directory(self.cache_directory) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration( new_instance=True, package_order=self.package_order, in_storage=attribute_cache ).get_dataset_pool() calib_datasets = {} for dataset_name, calib_attr in calib_datasets.iteritems(): dataset = dataset_pool.get_dataset(dataset_name, dataset_arguments={"id_name": []}) assert ( subset is None or subset.get(dataset_name, None) is None or subset_patterns is None or subset_patterns.get(dataset_name, None) is None ) if subset is not None and subset.get(dataset_name, None) is not None: subset_attr, subset_cond = subset.get(dataset_name) index = np.in1d(dataset[subset_attr], subset_cond) elif subset_patterns is not None and subset_patterns.get(dataset_name, None) is not None: subset_attr, subset_pattern = subset_patterns.get(dataset_name) index = array([True if re.search(subset_pattern, attr_v) else False for attr_v in dataset[subset_attr]]) else: index = arange(dataset.size(), dtype="i") calib_datasets[dataset_name] = [dataset, calib_attr, index] prediction = self.update_prediction(est_v, simulation_state, dataset_pool, calib_datasets, **kwargs) ## allow keys in target not appearing in prediction ## assuming their values to be 0 ### every key in target should appear in prediction # assert np.all( np.in1d(self.target.keys(), prediction.keys()) ) target = np.array(self.target.values()) predct = np.array([prediction[k] if prediction.has_key(k) else 0 for k in self.target.keys()]) results = func(predct, target) return results
def add_runs_to_services_db_from_disk(projects = None): server_config = ServicesDatabaseConfiguration() if server_config.protocol == 'sqlite': datapath = paths.OPUS_DATA_PATH for project_name in os.listdir(datapath): if projects is not None and project_name not in projects: continue if not os.path.isdir(os.path.join(datapath, project_name)): continue os.environ['OPUSPROJECTNAME'] = project_name server = DatabaseServer(server_config) server.drop_database(database_name = 'run_activity') server.close() run_manager = RunManager(server_config) baseyear_directory = os.path.join(datapath, project_name, 'base_year_data') if os.path.exists(baseyear_directory): years = [] if os.path.exists(baseyear_directory): for dir in os.listdir(baseyear_directory): if len(dir) == 4 and dir.isdigit(): years.append(int(dir)) start_year = min(years) end_year = max(years) run_name = 'base_year_data' run_id = run_manager._get_new_run_id() resources = { 'cache_directory': baseyear_directory, 'description': 'base year data', 'years': (start_year, end_year) } logger.log_status('Adding run %s of project %s to run_activity table'%(run_name, project_name)) run_manager.add_row_to_history(run_id = run_id, resources = resources, status = 'done', run_name = run_name) data_directory = os.path.join(datapath, project_name, 'runs') if not os.path.exists(data_directory): continue for run_name in os.listdir(data_directory): try: cache_directory = os.path.join(data_directory,run_name) years = [] if not os.path.isdir(cache_directory): continue for dir in os.listdir(cache_directory): if len(dir) == 4 and dir.isdigit(): years.append(int(dir)) start_year = min(years) end_year = max(years) run_id = run_manager._get_new_run_id() resources = { 'cache_directory': cache_directory, 'description': '', 'years': (start_year, end_year) } logger.log_status('Adding run %s of project %s to run_activity table'%(run_name, project_name)) run_manager.add_row_to_history(run_id = run_id, resources = resources, status = 'done', run_name = run_name) except: pass
def __init__(self): GenericOptionGroup.__init__(self, usage="python %prog [options] configuration", description="Delete results of a simulation run.") self.parser.add_option("--run-id", dest="run_id", default=None, action="store", help="The simulation run to delete.") self.parser.add_option("--years-to-delete", dest="years_to_delete", default=None, action="store", help="Python expression specifying list of years to delete from the simulation's cache.") if __name__ == "__main__": try: import wingdbstub except: pass option_group = StartRunOptionGroup() parser = option_group.parser (options, args) = parser.parse_args() run_manager = RunManager(option_group.get_services_database_configuration(options)) if options.run_id is None: parser.print_help() elif options.years_to_delete: years_to_delete = eval(options.years_to_delete) if not isinstance(years_to_delete, list): years_to_delete = [years_to_delete] run_manager.delete_year_dirs_in_cache(options.run_id, years_to_delete=years_to_delete) else: run_manager.delete_everything_for_this_run(options.run_id)
help="name of the scenario to run") self.parser.add_option("--directory-to-cache", dest="existing_cache_to_copy", default=None, action="store", help="Directory containing data to put in new cache.") self.parser.add_option("--years-to-cache", dest="years_to_cache", default=None, action="store", help="List of years of data to take from the directory-to-cache (default is all years).") if __name__ == "__main__": try: import wingdbstub except: pass option_group = StartRunSetOptionGroup() parser = option_group.parser (options, args) = parser.parse_args() run_manager = RunManager(option_group.get_services_database_configuration(options)) if options.pickled_resource_file is not None: f = file(options.pickled_resource_file, 'r') try: config = pickle.load(f) finally: f.close() elif options.configuration_path is not None: opus_path = options.configuration_path try: config = get_config_from_opus_path(opus_path) except ImportError: # TODO: Once all fully-specified configurations are stored as classes, # get rid of this use.
self.parser.add_option("--years-to-cache", dest="years_to_cache", default=None, help="List of years of data to take from the directory-to-cache (default is all years).") self.parser.add_option("--run-as-single-process", dest="run_as_single_process", default=False, help="Determines if multiple processes may be used.") self.parser.add_option("-p", "--profile", dest="profile_filename", default=None, help="Turn on code profiling. Output data are in python hotshot format.") if __name__ == "__main__": #try: import wingdbstub #except: pass option_group = StartRunOptionGroup() parser = option_group.parser (options, args) = parser.parse_args() run_manager = RunManager(option_group.get_services_database_configuration(options)) run_as_multiprocess = not options.run_as_single_process if options.pickled_resource_file is not None: f = file(options.pickled_resource_file, 'r') try: config = pickle.load(f) finally: f.close() elif options.configuration_path is not None: opus_path = options.configuration_path try: config = get_config_from_opus_path(opus_path) except ImportError: # TODO: Once all fully-specified configurations are stored as classes, # get rid of this use.
password = None if options.server_file is None: if options.server is None: hostname = raw_input('Hostname [%s]: ' % RemoteRunSet.default_hostname) if len(hostname) == 0: hostname = RemoteRun.default_hostname else: hostname = options.server if hostname <> 'localhost': if options.user is None: username = raw_input('Username [%s]: ' % RemoteRunSet.default_username) if len(username) == 0: username = RemoteRun.default_username else: username = options.user password = getpass.getpass('Password for %s@%s: ' % (username, hostname)) #try: import wingdbstub #except: pass run_manager = RunManager( option_group.get_services_database_configuration(options)) run = RemoteRunSet(options.server_file, hostname, username, password, options.server, options.database_name, options.skip_travel_model, options.skip_urbansim, run_manager) run.run(options.start_year, options.end_year, options.configuration_path, options.run_id_file)
dest="force", default=False, action="store_true", help="force to overwrite pre-existing run_id") if __name__ == "__main__": try: import wingdbstub except: pass option_group = OptionGroup() parser = option_group.parser (options, args) = parser.parse_args() run_manager = RunManager( option_group.get_services_database_configuration(options)) if options.configuration_path is not None: opus_path = options.configuration_path try: config = get_config_from_opus_path(opus_path) except ImportError: import_stmt = 'from %s import run_configuration as config' % opus_path exec(import_stmt) config['cache_directory'] = options.cache_directory results = run_manager.storage.GetResultsFromQuery( "SELECT * from run_activity WHERE run_id = %s " % options.run_id) if len(results) > 1 and not options.force: print "WARNING: run_id %s exists in run_activity. Use --force to override." % options.run_id
def main(): option_group = ReportOptionGroup() parser = option_group.parser (options, args) = parser.parse_args() if not options.xml_configuration or \ not options.years or \ not options.output: print "ERROR: -x, -y, and -o are required options" sys.exit(1) # determine the run_id from the command line args if options.run_id: run_id = options.run_id elif options.cache_directory: run_id = hudson_common.run_id_from_cache_dir(options.cache_directory) if not run_id: print "Failed to parse run ID from cache directory name" sys.exit(1) else: # get most recent succesful run from DB conn = MySQLdb.connect('paris.urbansim.org', 'hudson', os.getenv('HUDSON_DBPASS'), 'services'); c = conn.cursor() c.execute("SELECT max(date_time), run_id FROM run_activity where status='done'") results = c.fetchone() run_id = str(results[1]) # Note the dummy restart_year. Because we're not actually going to # restart the run, this will be ignored. run_manager = RunManager(option_group.get_services_database_configuration(options)) run_resources = run_manager.create_run_resources_from_history(run_id=run_id, restart_year=2010) cache_directory = run_resources['cache_directory'] scenario = run_resources['scenario_name'] # The cache directory may not exist if this script is being run on a hudson # slave that did not perform the original run. Ensure that we mount it if # nec. hudson_common.mount_cache_dir(run_resources) # Hudson appends "_hudson" to the scenario names. Peel this off # if it's present. scenario = scenario.replace("_hudson", "") # prepare the report staging directory output_dir = os.path.join(options.output, scenario, "run_" + run_id) if os.path.exists(output_dir): shutil.rmtree(output_dir) os.makedirs(output_dir) f = open(os.path.join(output_dir, "README.txt"), 'w') years = eval(options.years) text = "" if os.getenv("HUDSON_DRAFT_RESULTS") == "true": text += "\n".join(textwrap.wrap( """NOTICE: This report contains unofficial draft model results that have not been reviewed or approved by any agency. """)) + "\n\n" text += "\n".join(textwrap.wrap( """This report has been auto-generated by urbansim for the following model run: """ )) + """ project: bay_area_parcel scenario: %s years: %d-%d cached data: %s """ % (scenario, years[0], years[-1], os.sep.join(cache_directory.split(os.sep)[-3:])) comment = os.getenv("HUDSON_COMMENT") if comment: text += "\n".join(textwrap.wrap("comment: " + comment)) + "\n" f.write(text.replace("\n", "\r\n")) f.close() # prepare the indicators shutil.rmtree(os.path.join(output_dir, "indicators"), True) for indicator_batch in ["county_indicators", "zone_data", "superdistrict_indicators", "regional_indicators_short","pda_indicators","abag_eir_area_permutation"]: urbansim.tools.make_indicators.run(options.xml_configuration, indicator_batch, None, cache_directory, options.years) my_location = os.path.split(__file__)[0] shp_path = os.path.join(os.getenv("OPUS_HOME"), "data", "bay_area_parcel", "shapefiles") ##county-level summary report pdf_county_script = os.path.join(my_location, "summarize_county_indicators_map.R") cmd = "Rscript %s %s %d %d %s %s %s %s" % (pdf_county_script, os.path.join(cache_directory, "indicators"), years[0], years[-1],shp_path,"bayarea_counties.shp", scenario, "county") print "Summarizing county indicators: " + cmd if os.system(cmd) != 0: print "WARNING: Failed to generate county indicators" ##superdistrict-level summary report --never mind the name of the script; is en route to being generalized pdf_supdist_script = os.path.join(my_location, "summarize_superdistrict_indicators_map.R") cmd = "Rscript %s %s %d %d %s %s %s %s" % (pdf_supdist_script, os.path.join(cache_directory, "indicators"), years[0], years[-1],shp_path,"superdistricts.shp", scenario, "superdistrict") print "Summarizing superdistrict indicators: " + cmd if os.system(cmd) != 0: print "WARNING: Failed to generate superdistrict indicators" shutil.copytree(os.path.join(cache_directory, "indicators"), os.path.join(output_dir, "indicators"), ignore=shutil.ignore_patterns("*_stored_data*", "*.log")) # add the travel model EMFAC output to the web report config = XMLConfiguration(options.xml_configuration).get_run_configuration(scenario) travel_model = os.getenv("HUDSON_TRAVEL_MODEL") travel_model_for_R ="FALSE" if travel_model and travel_model.lower() == "full": print "Copying over travel model output" travel_model_for_R = "TRUE" tm_base_dir = mtc_common.tm_get_base_dir(config) tm_dir = os.path.join(tm_base_dir, "runs", cache_directory.split(os.sep)[-1]) for f in glob.glob(os.path.join(tm_dir, '*', 'emfac', 'output', 'EMFAC2011-SG Summary*Group 1.*')): shutil.copy(f, output_dir) p = os.path.join(cache_directory, "mtc_data") if os.path.exists(p): shutil.copytree(p, os.path.join(output_dir, "mtc_data")) #Generate PDA comparison chart pda_script = os.path.join(my_location, "pda_compare.R") cmd = "Rscript %s %s %d %d %s %s" % (pda_script, os.path.join(cache_directory, "indicators"), years[0], years[-1], run_id, scenario) print "Creating pda comparison chart: " + cmd #Generate county x PDA facet chart pda_county_script = os.path.join(my_location, "growth_by_county_by_pda.R") cmd = "Rscript %s %s %d %d %s %s" % (pda_county_script, os.path.join(cache_directory, "indicators"), years[0], years[-1], run_id, scenario) print "Creating county by pda chart: " + cmd # topsheet--need the EMFAC outputs to be generated first, so this goes last #travel_model_for_R = "TRUE" if travel_model else "FALSE" topsheet_script = os.path.join(my_location, "regional_indicators.R") cmd = "Rscript %s %s %d %d %s %s %s" % (topsheet_script, os.path.join(cache_directory, "indicators"), years[0], years[-1], run_id, travel_model_for_R, scenario) print "Creating topsheet: " + cmd if os.system(cmd) != 0: print "WARNING: Failed to generate indicators"
def on_pbnStartModel_released(self): duplicate = False self.diagnostic_go_button.setEnabled(True) if self.running and not self.paused: # Take care of pausing a run success = self.runThread.pause() if success: self.paused = True self.timer.stop() self.pbnStartModel.setText(QString("Resume simulation run...")) elif self.running and self.paused: # Need to resume a paused run success = self.runThread.resume() if success: self.paused = False self.timer.start(1000) self.pbnStartModel.setText(QString("Pause simulation run...")) elif not self.running: run_name = str(self.leRunName.text()) if run_name == '': run_name = None else: run_id = None run_nodes = get_available_run_nodes(self.project) for run_node in run_nodes: existing_run_name = run_node.tag if run_name == existing_run_name: duplicate = True r = run_node.get('run_id') if r is not None: run_id = int(r) break if duplicate: dlg_dup = OverwriteRunDialog(self) if dlg_dup.exec_() == QDialog.Rejected: return delete_simulation_run( self.project, run_node.tag) # todo change to run_node.get('name') # Update the XML self.project.update_xml_config() self.updateConfigAndGuiForRun() # Fire up a new thread and run the model self.pbnStartModel.setText(QString("Pause simulation run...")) # References to the GUI elements for status for this run... self.progressBarTotal = self.runProgressBarTotal self.progressBarYear = self.runProgressBarYear self.progressBarModel = self.runProgressBarModel #self.pbnRemoveModel.setEnabled(False) #self.pbnStartModel.setEnabled(False) # Initializing values self.progressBarTotal.setValue(0) self.progressBarYear.setValue(0) self.progressBarModel.setValue(0) self.progressBarTotal.setRange(0, 0) self.progressBarYear.setRange(0, 0) self.progressBarModel.setRange(0, 0) batch_name = str(self.cboOptionalIndicatorBatch.currentText()) if batch_name == '(None)': batch_name = None self.runThread = RunModelThread(get_mainwindow_instance(), self, batch_name, run_name) if duplicate and run_id is not None: from opus_core.services.run_server.run_manager import RunManager as ServicesRunManager run_manager = ServicesRunManager( ServicesDatabaseConfiguration()) run_manager.delete_everything_for_this_run(run_id=run_id) run_manager.close() # Use this signal from the thread if it is capable of producing its own status signal QObject.connect(self.runThread, SIGNAL("runFinished(PyQt_PyObject)"), self.runFinishedFromThread) QObject.connect(self.runThread, SIGNAL("runError(PyQt_PyObject)"), self.runErrorFromThread) # Use this timer to call a function in the thread to check status if the thread is unable # to produce its own signal above self.timer = QTimer() QObject.connect(self.timer, SIGNAL("timeout()"), self.runStatusFromThread) self.timer.start(1000) self.running = True self.paused = False self.runThread.start() else: print "Unexpected state in the model run..."
"--profile", dest="profile_filename", default=None, help= "Turn on code profiling. Output data are in python hotshot format." ) if __name__ == "__main__": #try: import wingdbstub #except: pass option_group = StartRunOptionGroup() parser = option_group.parser (options, args) = parser.parse_args() run_manager = RunManager( option_group.get_services_database_configuration(options)) run_as_multiprocess = not options.run_as_single_process if options.pickled_resource_file is not None: f = file(options.pickled_resource_file, 'r') try: config = pickle.load(f) finally: f.close() elif options.configuration_path is not None: opus_path = options.configuration_path try: config = get_config_from_opus_path(opus_path) except ImportError: # TODO: Once all fully-specified configurations are stored as classes, # get rid of this use.
dest="project_name", default='', help="The project name") if __name__ == "__main__": option_group = OptionGroup() parser = option_group.parser (options, args) = parser.parse_args() try: run_id = int(args[0]) except IndexError: parser.error("run_id must be provided.") parser.print_help() sys.exit(1) if len(args) == 2: pickle_file = args[1] else: pickle_file = "resources.pickle" run_manager = RunManager( option_group.get_services_database_configuration(options)) if options.project_name: run_manager.update_environment_variables( run_resources={'project_name': options.project_name}) resources = run_manager.get_resources_for_run_id_from_history( run_id=run_id) write_resources_to_file(pickle_file, resources)
action="store", help= "List of years of data to take from the directory-to-cache (default is all years)." ) if __name__ == "__main__": try: import wingdbstub except: pass option_group = StartRunSetOptionGroup() parser = option_group.parser (options, args) = parser.parse_args() run_manager = RunManager( option_group.get_services_database_configuration(options)) if options.pickled_resource_file is not None: f = file(options.pickled_resource_file, 'r') try: config = pickle.load(f) finally: f.close() elif options.configuration_path is not None: opus_path = options.configuration_path try: config = get_config_from_opus_path(opus_path) except ImportError: # TODO: Once all fully-specified configurations are stored as classes, # get rid of this use.
class OptionGroup(GenericOptionGroup): def __init__(self): GenericOptionGroup.__init__(self, usage="python %prog [options] run_id [pickle_file]", description="dump resources.pickle from services db for the given run_id") self.parser.add_option("-p", "--project-name", dest="project_name", default='',help="The project name") if __name__ == "__main__": option_group = OptionGroup() parser = option_group.parser (options, args) = parser.parse_args() try: run_id = int(args[0]) except IndexError: parser.error("run_id must be provided.") parser.print_help() sys.exit(1) if len(args) == 2: pickle_file = args[1] else: pickle_file = "resources.pickle" run_manager = RunManager(option_group.get_services_database_configuration(options)) if options.project_name: run_manager.update_environment_variables(run_resources={'project_name':options.project_name}) resources = run_manager.get_resources_for_run_id_from_history(run_id=run_id) write_resources_to_file(pickle_file, resources)
self.parser.add_option("-p", "--project-name", dest="project_name", default='',help="The name project name") self.parser.add_option("--skip-urbansim", dest="skip_urbansim", default=False, action="store_true", help="Skip running UrbanSim for the restart year.") self.parser.add_option("--create-baseyear-cache-if-not-exists", dest="create_baseyear_cache_if_not_exists", default=False, action="store_true", help="Create baseyear cache if not already exists") self.parser.add_option("--skip-cache-cleanup", dest="skip_cache_cleanup", default=False, action="store_true", help="Skip removing year caches for this and future years.") if __name__ == "__main__": option_group = RestartRunOptionGroup() parser = option_group.parser (options, args) = parser.parse_args() run_manager = RunManager(option_group.get_services_database_configuration(options)) if len(args) < 2: parser.print_help() else: run_id, year = (int(args[0]), int(args[1])) run_manager.restart_run(run_id, year, options.project_name, skip_urbansim=options.skip_urbansim, create_baseyear_cache_if_not_exists=options.create_baseyear_cache_if_not_exists, skip_cache_cleanup=options.skip_cache_cleanup)
def main(): option_group = UrbanvisionOptionGroup() parser = option_group.parser (options, args) = parser.parse_args() if not options.run_id and not options.cache_directory: print "ERROR: either -r or -c is required" sys.exit(1) if options.run_id: run_id = options.run_id elif options.cache_directory: run_id = run_id_from_cache_dir(options.cache_directory) if not run_id: print "Failed to parse run ID from cache directory name" sys.exit(1) # Note the dummy restart_year. Because we're not actually going to # restart the run, this will be ignored. run_manager = RunManager(option_group.get_services_database_configuration(options)) run_resources = run_manager.create_run_resources_from_history(run_id=run_id, restart_year=2010) cache_directory = run_resources['cache_directory'] mount_cache_dir(run_resources) scenario = run_resources['scenario_name'] scenario = scenario.replace("_hudson", "") # Ensure that there's a suitable scenario abiding by the urbanvision # convention scenario_name = ("%s run %s" % (scenario, run_id)).replace("_", " ") passwd = os.environ['OPUS_DBPASS'] conn_string = "host='paris.urbansim.org' dbname='bayarea' user='******' password='******'" % passwd conn = psycopg2.connect(conn_string) cursor = conn.cursor() s = "select id from scenario where name='{}'".format(scenario_name) cursor.execute(s) records = cursor.fetchall() if len(records) == 0: print "Creating new scenario '" + scenario_name + "'" s = "insert into scenario (name, parent) values ('{}', 1)".format(scenario_name) cursor.execute(s) conn.commit() s = "select id from scenario where name='{}'".format(scenario_name) cursor.execute(s) records = cursor.fetchall() assert(len(records) == 1) id = records[0][0] elif len(records) == 1: id = records[0][0] else: print "ERROR: Found more than one scenario named %s!" % scenario_name cursor.close() conn.close() sys.exit(1) cursor.close() conn.close() # Now that we have the scenario id, we can run the script that inserts the # records into the db psql = os.path.join(os.path.split(__file__)[0], "process_building_to_sql.py") cmd = "%s %s %s %d" % (sys.executable, psql, cache_directory, id) cmd += " | psql -h paris.urbansim.org -q -U urbanvision bayarea > /dev/null" print "Exporting buildings to db: " + cmd if os.system("export PGPASSWORD=$OPUS_DBPASS; " + cmd) != 0: print "ERROR: Failed to export buildings to urbanvision DB" sys.exit(1)
help="cache directory") self.parser.add_option("--run-id", dest="run_id", default=None, help="which run_id to update") self.parser.add_option("--force", dest="force", default=False, action="store_true", help="force to overwrite pre-existing run_id") if __name__ == "__main__": try: import wingdbstub except: pass option_group = OptionGroup() parser = option_group.parser (options, args) = parser.parse_args() run_manager = RunManager(option_group.get_services_database_configuration(options)) if options.configuration_path is not None: opus_path = options.configuration_path try: config = get_config_from_opus_path(opus_path) except ImportError: import_stmt = 'from %s import run_configuration as config' % opus_path exec(import_stmt) config['cache_directory'] = options.cache_directory results = run_manager.storage.GetResultsFromQuery("SELECT * from run_activity WHERE run_id = %s " % options.run_id) if len(results) > 1 and not options.force: print "WARNING: run_id %s exists in run_activity. Use --force to override." % options.run_id sys.exit()
class Calibration(object): ''' Class to calibrate UrbanSim model coefficients. ''' def __init__(self, xml_config, scenario, calib_datasets, target_expression, target_file, subset=None, subset_patterns=None, skip_cache_cleanup=False, log_directory=None): """ - xml_config: xml configuration file, for ex '/home/atschirhar/opus/project_configs/paris_zone.xml' - scenario: name of scenario to run for calibration, where models_to_run and simulation years are specified - calib_datasets: dictionary specifying dataset names and attributes to be calibrated, e.g. {'establishment_location_choice_model_coefficients': 'estimate'} - target_expression: opus expression computing values from prediction to be compared with targets - target_file: name of csv file providing targets - subset: dictionary specifying the dataset to be calibrated, {'etablishment_location_choice_model_coefficients': ['coefficient_name', ['paris_celcm, 'biotech_celcm']]} subset and subset_patterns can not be both specified for the same dataset - subset_patterns: dictionary specifying the dataset to be calibrated through a regular expression (re) pattern {'etablishment_location_choice_model_coefficients': ['coefficient_name', '*_celcm']} subset and subset_patterns can not be both specified for the same dataset """ self.target_expression = target_expression self.target = self.read_target(target_file) self.run_manager = None self.xml_config = xml_config self.scenario = scenario self.skip_cache_cleanup = skip_cache_cleanup self.run_id, self.cache_directory = self.init_run() self.run_ids = [ self.run_id ] #allow starting of multiple runs for parallel optimization self.log_directory = log_directory if self.log_directory is None: self.log_directory = self.cache_directory #legacy log_file = os.path.join(self.log_directory, "calibration.log") logger.enable_file_logging(log_file) dict_config = XMLConfiguration(self.xml_config).get_run_configuration( self.scenario) ## get parameters from config self.base_year = dict_config['base_year'] self.start_year, self.end_year = dict_config['years'] self.project_name = dict_config['project_name'] self.package_order = dict_config[ 'dataset_pool_configuration'].package_order @log_block("Start Calibration") def run(self, optimizer='lbfgsb', results_pickle_prefix="calib", optimizer_kwargs={}): ''' Call specifized optimizer to calibrate Arguments: - optimizer: optimization method chosen (fmin_bfgs, simulated anneal etc.) - results_pickle_prefix: prefix of the pickle file name that will be saved after the simulation; if None, results is not saved Returns: - the results from the opimizater - a pickle dump of the results in the cache_directory, if results_pickle_prefix is specified ''' simulation_state = SimulationState() simulation_state.set_current_time(self.base_year) simulation_state.set_cache_directory(self.cache_directory) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration( new_instance=True, package_order=self.package_order, in_storage=attribute_cache).get_dataset_pool() calib_datasets = {} for dataset_name, calib_attr in calib_datasets.iteritems(): dataset = dataset_pool.get_dataset( dataset_name, dataset_arguments={'id_name': []}) assert subset is None or subset.get(dataset_name, None) is None or \ subset_patterns is None or subset_patterns.get(dataset_name, None) is None if subset is not None and subset.get(dataset_name, None) is not None: subset_attr, subset_cond = subset.get(dataset_name) index = np.in1d(dataset[subset_attr], subset_cond) elif subset_patterns is not None and subset_patterns.get( dataset_name, None) is not None: subset_attr, subset_pattern = subset_patterns.get(dataset_name) index = array([ True if re.search(subset_pattern, attr_v) else False for attr_v in dataset[subset_attr] ]) else: index = arange(dataset.size(), dtype='i') calib_datasets[dataset_name] = [dataset, calib_attr, index] init_v = array([], dtype='f8') for dataset_name, calib in calib_datasets.iteritems(): dataset, calib_attr, index = calib if type(calib_attr) == str: init_v = np.concatenate((init_v, dataset[calib_attr][index])) elif type(calib_attr) in (list, tuple): for attr in calib_attr: init_v = np.concatenate((init_v, dataset[attr][index])) else: raise TypeError, "Unrecongized data type in calib_datasets" t0 = time.time() if is_parallelizable == True: set_parallel(True) print OKBLUE + "\noptimizer = {} (is_parallel = {})".format( optimizer, is_parallelizable) + ENDC print OKBLUE + "-------------------------------------------------------\n" + ENDC if optimizer == 'bfgs': default_kwargs = { 'fprime': None, 'epsilon': 1e-08, 'maxiter': None, 'full_output': 1, 'disp': 1, 'retall': 0, 'callback': None } optimizer_func = fmin_bfgs elif optimizer == 'lbfgsb': default_kwargs = { 'fprime': None, 'approx_grad': True, 'bounds': None, 'factr': 1e12, 'iprint': 1 } optimizer_func = fmin_l_bfgs_b elif optimizer == 'anneal': default_kwargs = { 'schedule': 'fast', 'full_output': 1, 'T0': None, 'Tf': 1e-12, 'maxeval': None, 'maxaccept': None, 'maxiter': 400, 'boltzmann': 1.0, 'learn_rate': 0.5, 'feps': 1e-06, 'quench': 1.0, 'm': 1.0, 'n': 1.0, 'lower': -1, 'upper': 1, 'dwell': 50, 'disp': True } optimizer_func = anneal elif optimizer == 'panneal': default_kwargs = { 'schedule': 'fast', 'full_output': 1, 'T0': None, 'Tf': 1e-12, 'maxeval': None, 'maxaccept': None, 'maxiter': 400, 'boltzmann': 1.0, 'learn_rate': 0.5, 'feps': 1e-06, 'quench': 1.0, 'm': 1.0, 'n': 1.0, 'lower': -1, 'upper': 1, 'dwell': 50, 'disp': True, 'cores': 24, 'interv': 20 } optimizer_func = panneal else: raise ValueError, "Unrecognized optimizer {}".format(optimizer) default_kwargs.update(optimizer_kwargs) results = optimizer_func(self.target_func, copy(init_v), **default_kwargs) duration = time.time() - t0 if results_pickle_prefix is not None: pickle_file = "{}_{}.pickle".format(results_pickle_prefix, optimizer) pickle_file = os.path.join(self.log_directory, pickle_file) pickle.dump(results, open(pickle_file, "wb")) if is_parallelizable == True: set_parallel(False) logger.log_status('init target_func: {}'.format( self.target_func(init_v))) logger.log_status('end target_func: {}'.format( results[:])) #which one? logger.log_status('outputs from optimizer: {}'.format(results)) logger.log_status('Execution time: {}'.format(duration)) def init_run(self, create_baseyear_cache=True): ''' init run, get run_id & cache_directory. ''' ##avoid invoking start_run from cmd line - option_group = StartRunOptionGroup() option_group.parser.set_defaults(xml_configuration=self.xml_config, scenario_name=self.scenario) #run_id, cache_directory = start_run(option_group) options, args = option_group.parse() self.run_manager = RunManager( option_group.get_services_database_configuration(options)) resources = XMLConfiguration(self.xml_config).get_run_configuration( self.scenario) insert_auto_generated_cache_directory_if_needed(resources) cache_directory = resources['cache_directory'] self.run_manager.setup_new_run(cache_directory, resources) run_id, cache_directory = self.run_manager.run_id, self.run_manager.get_current_cache_directory( ) self.run_manager.add_row_to_history(run_id, resources, "done") if create_baseyear_cache: self.run_manager.create_baseyear_cache(resources) ## good for testing #run_id = 275 #cache_directory = '/home/lmwang/opus/data/paris_zone/runs/run_275.2012_05_26_00_20' assert run_id is not None assert cache_directory is not None return run_id, cache_directory def update_parameters(self, est_v, cache_directory, simulation_state, dataset_pool, calib_datasets, *args, **kwargs): i_est_v = 0 current_year = simulation_state.get_current_time() simulation_state.set_current_time(self.base_year) simulation_state.set_cache_directory(cache_directory) for dataset_name, calib in calib_datasets.iteritems(): dataset, calib_attr, index = calib if type(calib_attr) == str: dtype = dataset[calib_attr].dtype dataset[calib_attr][index] = (est_v[i_est_v:i_est_v + index.size]).astype(dtype) i_est_v += index.size elif type(calib_attr) in (list, tuple): for attr in calib_attr: dtype = dataset[attr].dtype dataset[attr][index] = (est_v[i_est_v:i_est_v + index.size]).astype(dtype) i_est_v += index.size else: raise TypeError, "Unrecongized data type in calib_datasets" #dtype = dataset[calib_attr].dtype #dataset[calib_attr][index] = (est_v[i_est_v:i_est_v+index.size]).astype(dtype) #flush dataset dataset.flush_dataset() #i_est_v += index.size simulation_state.set_current_time(current_year) def update_prediction(self, est_v, simulation_state, dataset_pool, calib_datasets, *args, **kwargs): option_group = RestartRunOptionGroup() option_group.parser.set_defaults( project_name=self.project_name, skip_cache_cleanup=self.skip_cache_cleanup) options, args = option_group.parse() if self.run_manager is None: self.run_manager = RunManager( option_group.get_services_database_configuration(options)) if lock != None: lock.acquire() ## query runs available for re-use runs_done = self.run_manager.get_run_info(run_ids=self.run_ids, status='done') create_baseyear_cache = False import pdb pdb.set_trace() if len(runs_done ) == 0: ##there is no re-usable run directory, init a new run run_id, cache_directory = self.init_run( create_baseyear_cache=False) self.run_ids.append(run_id) create_baseyear_cache = True logger.log_status('Initializing new run with id ' + str(run_id)) else: run_id = runs_done[0].run_id ##take the first 'done' run_id cache_directory = self.run_manager.get_cache_directory(run_id) logger.log_status('Using old run with id ' + str(run_id)) resources = self.run_manager.get_resources_for_run_id_from_history( run_id, filter_by_status=False) self.run_manager.add_row_to_history(run_id, resources, "taken") if lock != None: lock.release() if create_baseyear_cache: self.run_manager.create_baseyear_cache(resources) self.update_parameters(est_v, cache_directory, simulation_state, dataset_pool, calib_datasets, *args, **kwargs) restart_run(option_group=option_group, args=[run_id, self.start_year]) prediction = self.summarize_prediction(cache_directory, simulation_state, dataset_pool, calib_datasets) return prediction def summarize_prediction(self, cache_directory, simulation_state, dataset_pool, calib_datasets): dataset_name = VariableName(self.target_expression).get_dataset_name() current_year = simulation_state.get_current_time() simulation_state.set_current_time(self.end_year) simulation_state.set_cache_directory(cache_directory) #force reload dataset_pool.remove_all_datasets() dataset = dataset_pool[dataset_name] ids = dataset.get_id_attribute() results = dataset.compute_variables(self.target_expression, dataset_pool=dataset_pool) simulation_state.set_current_time(current_year) return dict(zip(ids, results)) def read_target(self, target_file): ## read (& process) target numbers into a dictionary: {id:value} ## csv file with header ## id, target header = file(target_file, 'r').readline().strip().split(',') contents = np.genfromtxt(target_file, delimiter=",", comments='#', skip_header=1) target = dict(zip(contents[:, 0], contents[:, 1])) return target def target_func(self, est_v, func=lambda x, y: np.sum(np.abs(x - y)), **kwargs): ''' Target function.''' simulation_state = SimulationState() simulation_state.set_current_time(self.base_year) simulation_state.set_cache_directory(self.cache_directory) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration( new_instance=True, package_order=self.package_order, in_storage=attribute_cache).get_dataset_pool() calib_datasets = {} for dataset_name, calib_attr in calib_datasets.iteritems(): dataset = dataset_pool.get_dataset( dataset_name, dataset_arguments={'id_name': []}) assert subset is None or subset.get(dataset_name, None) is None or \ subset_patterns is None or subset_patterns.get(dataset_name, None) is None if subset is not None and subset.get(dataset_name, None) is not None: subset_attr, subset_cond = subset.get(dataset_name) index = np.in1d(dataset[subset_attr], subset_cond) elif subset_patterns is not None and subset_patterns.get( dataset_name, None) is not None: subset_attr, subset_pattern = subset_patterns.get(dataset_name) index = array([ True if re.search(subset_pattern, attr_v) else False for attr_v in dataset[subset_attr] ]) else: index = arange(dataset.size(), dtype='i') calib_datasets[dataset_name] = [dataset, calib_attr, index] prediction = self.update_prediction(est_v, simulation_state, dataset_pool, calib_datasets, **kwargs) ## allow keys in target not appearing in prediction ## assuming their values to be 0 ### every key in target should appear in prediction #assert np.all( np.in1d(self.target.keys(), prediction.keys()) ) target = np.array(self.target.values()) predct = np.array([prediction[k] if prediction.has_key(k) else 0 \ for k in self.target.keys() ]) results = func(predct, target) return results
dest="years_to_delete", default=None, action="store", help= "Python expression specifying list of years to delete from the simulation's cache." ) if __name__ == "__main__": try: import wingdbstub except: pass option_group = StartRunOptionGroup() parser = option_group.parser (options, args) = parser.parse_args() run_manager = RunManager( option_group.get_services_database_configuration(options)) if options.run_id is None: parser.print_help() elif options.years_to_delete: years_to_delete = eval(options.years_to_delete) if not isinstance(years_to_delete, list): years_to_delete = [years_to_delete] run_manager.delete_year_dirs_in_cache(options.run_id, years_to_delete=years_to_delete) else: run_manager.delete_everything_for_this_run(options.run_id)