def create_indicators(self, indicators, file_name_for_indicator_results = 'indicator_results.html', display_error_box = False, show_results = False): '''Handles the computation of a list of indicators.''' if (len(indicators) == 0): return source_data = indicators[0].source_data log_file_path = os.path.join(source_data.cache_directory, 'indicators.log') logger.enable_file_logging(log_file_path, 'a') logger.log_status('\n%s BEGIN %s %s' % ('='*29, strftime('%Y_%m_%d_%H_%M', localtime()), '='*29)) for indicator in indicators: indicator.create(display_error_box = display_error_box) logger.log_status('%s END %s %s\n' % ('='*30, strftime('%Y_%m_%d_%H_%M', localtime()), '='*30)) logger.disable_file_logging(log_file_path) results_page_path = self._write_results(indicators, source_data, file_name_for_indicator_results, display_error_box) if show_results: self.show_results(results_page_path) return results_page_path
def _run_each_year_as_separate_process(self, start_year, end_year, seed_array, resources, log_file_name='run_multiprocess.log'): skip_first_year_of_urbansim = resources.get('skip_urbansim', False) log_file = os.path.join(resources['cache_directory'], log_file_name) profiler_name = resources.get("profile_filename", None) iyear = 0 for year in range(start_year, end_year+1): if (year <> start_year) or ((year == start_year) and (not skip_first_year_of_urbansim)): logger.start_block('Running UrbanSim for year %d in new process' % year) try: resources['years'] = (year, year) resources['seed'] = seed_array[iyear], logger.disable_file_logging(log_file) if profiler_name is not None: resources["profile_filename"] = "%s_%s" % (profiler_name, year) # add year to the profile name self._fork_new_process( 'urbansim.model_coordinators.model_system', resources, optional_args=['--log-file-name', log_file_name]) logger.enable_file_logging(log_file, verbose=False) finally: logger.end_block() if ('travel_model_configuration' in resources) and (not resources.get('skip_travel_model', False)): # tnicolai add start year to travel model config tmc = resources['travel_model_configuration'] tmc['start_year'] = start_year # end tnicolai self._run_travel_models_in_separate_processes(resources['travel_model_configuration'], year, resources) if 'post_year_configuration' in resources: self._run_travel_models_in_separate_processes(resources['post_year_configuration'], year, resources) iyear +=1 self._notify_stopped()
def _run_each_year_as_separate_process( self, iyear, year, seed=None, resources=None, profiler_name=None, log_file=None ): logger.start_block("Running simulation for year %d in new process" % year) resources["years"] = (year, year) resources["seed"] = (seed,) if profiler_name is not None: # add year to the profile name resources["profile_filename"] = "%s_%s" % (profiler_name, year) optional_args = [] if log_file: optional_args += ["--log-file-name", os.path.split(log_file)[-1]] success = False try: logger.disable_file_logging(log_file) success = self._fork_new_process( "opus_core.model_coordinators.model_system", resources, optional_args=optional_args ) logger.enable_file_logging(log_file, verbose=False) finally: logger.end_block() return success
def create_indicators( self, indicators, file_name_for_indicator_results="indicator_results.html", display_error_box=False, show_results=False, ): """Handles the computation of a list of indicators.""" if len(indicators) == 0: return source_data = indicators[0].source_data log_file_path = os.path.join(source_data.cache_directory, "indicators.log") logger.enable_file_logging(log_file_path, "a") logger.log_status("\n%s BEGIN %s %s" % ("=" * 29, strftime("%Y_%m_%d_%H_%M", localtime()), "=" * 29)) for indicator in indicators: indicator.create(display_error_box=display_error_box) logger.log_status("%s END %s %s\n" % ("=" * 30, strftime("%Y_%m_%d_%H_%M", localtime()), "=" * 30)) logger.disable_file_logging(log_file_path) results_page_path = self._write_results( indicators, source_data, file_name_for_indicator_results, display_error_box ) if show_results: self.show_results(results_page_path) return results_page_path
def _run_each_year_as_separate_process( self, start_year, end_year, seed_array, resources, log_file_name='run_multiprocess.log'): log_file = os.path.join(resources['cache_directory'], log_file_name) profiler_name = resources.get("profile_filename", None) iyear = 0 for year in range(start_year, end_year + 1): logger.start_block( 'Running simulation for year %d in new process' % year) try: resources['years'] = (year, year) resources['seed'] = seed_array[iyear], logger.disable_file_logging(log_file) if profiler_name is not None: resources["profile_filename"] = "%s_%s" % ( profiler_name, year) # add year to the profile name self._fork_new_process( 'opus_core.model_coordinators.model_system', resources, optional_args=['--log-file-name', log_file_name]) logger.enable_file_logging(log_file, verbose=False) finally: logger.end_block() iyear += 1 self._notify_stopped()
def log_results(self): procedure = self.model_system.run_year_namespace["model"].procedure if not hasattr(procedure, 'print_results'): logger.log_warning("Estimation procedure %s doesn't have a print_results() method, " % procedure + \ "which is needed to log estimation results.") return tmp_config = Resources(self.config) outputvar = tmp_config['models_configuration'][self.model_name]['controller']['estimate']['arguments']['output'] results = self.model_system.vardict.get(outputvar, "process_output")[1] storage_location = AttributeCache().get_storage_location() log_file_name = "estimate_models.log" ## one file for all estimation results logger.enable_file_logging( os.path.join(storage_location, log_file_name), mode = 'a') ##appending instead of overwriting logger.start_block("%s Estimation Results" % self.model_name) for submodel, submodel_results in results.items(): logger.log_status( "Submodel %s" % submodel) if submodel_results == {}: logger.log_warning("No estimation results for submodel %s" % submodel) else: try: procedure.print_results(submodel_results) except: logger.log_warning("Problems in printing results for submodel %s" % submodel) logger.end_block() logger.disable_file_logging()
def _run_each_year_as_separate_process(self, iyear, year, seed=None, resources=None, profiler_name=None, log_file=None): logger.start_block('Running simulation for year %d in new process' % year) resources['years'] = (year, year) resources['seed'] = seed, if profiler_name is not None: # add year to the profile name resources["profile_filename"] = "%s_%s" % (profiler_name, year) optional_args = [] if log_file: optional_args += ['--log-file-name', os.path.split(log_file)[-1]] success = False try: logger.disable_file_logging(log_file) success = self._fork_new_process( 'opus_core.model_coordinators.model_system', resources, optional_args=optional_args) logger.enable_file_logging(log_file, verbose=False) finally: logger.end_block() return success
def xtest_simulation( self ): # temporarily switched off, since the database data does not match the code for now """Checks that the simulation proceeds without caching. """ self.simulation.run_simulation() logger.disable_file_logging() self.completed_without_error = True
def tearDown(self): # the logger has a file open in the cache directory (by default, disable that file logging) if logger._file_stream: logger.disable_file_logging() for root, dirs, files in os.walk(self.dir, topdown=False): for filename in files: os.remove(os.path.join(root, filename)) for directory in dirs: os.rmdir(os.path.join(root, directory)) os.rmdir(self.dir) os.rmdir(self.base_cache_dir)
def create_batch(self, indicators, source_data): self.source_data = source_data cache_directory = self.source_data.cache_directory self.storage_location = os.path.join(self.source_data.get_indicator_directory(), '_stored_data') if not os.path.exists(self.source_data.get_indicator_directory()): os.mkdir(self.source_data.get_indicator_directory()) if not os.path.exists(self.storage_location): os.mkdir(self.storage_location) log_file_path = os.path.join(cache_directory, 'indicators', 'indicators.log') logger.enable_file_logging(log_file_path, 'a') logger.log_status('\n%s Indicator Generation BEGIN %s %s' % ('='*10, strftime('%Y_%m_%d_%H_%M', localtime()), '='*10)) self.dataset = None self.dataset_state = { 'current_cache_directory':None, 'year':None, 'dataset_name':None } self.package_order = self.source_data.get_package_order() SimulationState().set_cache_directory(cache_directory) self._check_integrity(indicators = indicators, source_data = source_data) computed_indicators = self._make_all_indicators( indicators = indicators, source_data = source_data) if not self.test: self.write_computed_indicators_to_db(computed_indicator_group = computed_indicators, project_name = self.project_name) logger.log_status('%s Indicator Generation END %s %s\n' % ('='*11, strftime('%Y_%m_%d_%H_%M', localtime()), '='*11)) logger.disable_file_logging(log_file_path) return computed_indicators
def create_batch(self, indicators, source_data): self.source_data = source_data cache_directory = self.source_data.cache_directory self.storage_location = os.path.join( self.source_data.get_indicator_directory(), '_stored_data') if not os.path.exists(self.source_data.get_indicator_directory()): os.mkdir(self.source_data.get_indicator_directory()) if not os.path.exists(self.storage_location): os.mkdir(self.storage_location) log_file_path = os.path.join(cache_directory, 'indicators', 'indicators.log') logger.enable_file_logging(log_file_path, 'a') logger.log_status( '\n%s Indicator Generation BEGIN %s %s' % ('=' * 10, strftime('%Y_%m_%d_%H_%M', localtime()), '=' * 10)) self.dataset = None self.dataset_state = { 'current_cache_directory': None, 'year': None, 'dataset_name': None } self.package_order = self.source_data.get_package_order() SimulationState().set_cache_directory(cache_directory) self._check_integrity(indicators=indicators, source_data=source_data) computed_indicators = self._make_all_indicators( indicators=indicators, source_data=source_data) if not self.test: self.write_computed_indicators_to_db( computed_indicator_group=computed_indicators, project_name=self.project_name) logger.log_status( '%s Indicator Generation END %s %s\n' % ('=' * 11, strftime('%Y_%m_%d_%H_%M', localtime()), '=' * 11)) logger.disable_file_logging(log_file_path) return computed_indicators
def _run_each_year_as_separate_process( self, start_year, end_year, seed_array, resources, log_file_name='run_multiprocess.log'): skip_first_year_of_urbansim = resources.get('skip_urbansim', False) log_file = os.path.join(resources['cache_directory'], log_file_name) profiler_name = resources.get("profile_filename", None) iyear = 0 for year in range(start_year, end_year + 1): if (year <> start_year) or ((year == start_year) and (not skip_first_year_of_urbansim)): logger.start_block( 'Running UrbanSim for year %d in new process' % year) try: resources['years'] = (year, year) resources['seed'] = seed_array[iyear], logger.disable_file_logging(log_file) if profiler_name is not None: resources["profile_filename"] = "%s_%s" % ( profiler_name, year ) # add year to the profile name self._fork_new_process( 'urbansim.model_coordinators.model_system', resources, optional_args=['--log-file-name', log_file_name]) logger.enable_file_logging(log_file, verbose=False) finally: logger.end_block() if ('travel_model_configuration' in resources) and ( not resources.get('skip_travel_model', False)): # tnicolai add start year to travel model config tmc = resources['travel_model_configuration'] tmc['start_year'] = start_year # end tnicolai self._run_travel_models_in_separate_processes( resources['travel_model_configuration'], year, resources) if 'post_year_configuration' in resources: self._run_travel_models_in_separate_processes( resources['post_year_configuration'], year, resources) iyear += 1 self._notify_stopped()
def create_batch(self, indicators, source_data): self.source_data = source_data cache_directory = self.source_data.cache_directory self.storage_location = os.path.join(self.source_data.get_indicator_directory(), "_stored_data") if not os.path.exists(self.source_data.get_indicator_directory()): os.mkdir(self.source_data.get_indicator_directory()) if not os.path.exists(self.storage_location): os.mkdir(self.storage_location) log_file_path = os.path.join(cache_directory, "indicators", "indicators.log") logger.enable_file_logging(log_file_path, "a") logger.log_status( "\n%s Indicator Generation BEGIN %s %s" % ("=" * 10, strftime("%Y_%m_%d_%H_%M", localtime()), "=" * 10) ) self.dataset = None self.dataset_state = {"current_cache_directory": None, "year": None, "dataset_name": None} self.package_order = self.source_data.get_package_order() SimulationState().set_cache_directory(cache_directory) self._check_integrity(indicators=indicators, source_data=source_data) computed_indicators = self._make_all_indicators(indicators=indicators, source_data=source_data) if not self.test: self.write_computed_indicators_to_db( computed_indicator_group=computed_indicators, project_name=self.project_name ) logger.log_status( "%s Indicator Generation END %s %s\n" % ("=" * 11, strftime("%Y_%m_%d_%H_%M", localtime()), "=" * 11) ) logger.disable_file_logging(log_file_path) return computed_indicators
def log_results(self): procedure = self.model_system.run_year_namespace["model"].procedure if not hasattr(procedure, 'print_results'): logger.log_warning("Estimation procedure %s doesn't have a print_results() method, " % procedure + \ "which is needed to log estimation results.") return tmp_config = Resources(self.config) outputvar = tmp_config['models_configuration'][self.model_name]['controller']['estimate']['arguments']['output'] results = self.model_system.vardict.get(outputvar, "process_output")[1] storage_location = AttributeCache().get_storage_location() log_file_name = "estimate_models.log" ## one file for all estimation results logger.enable_file_logging( os.path.join(storage_location, log_file_name), mode = 'a') ##appending instead of overwriting logger.start_block("%s Estimation Results" % self.model_name) for submodel, submodel_results in results.items(): logger.log_status( "Submodel %s" % submodel) if submodel_results == {}: logger.log_warning("No estimation results for submodel %s" % submodel) else: procedure.print_results(submodel_results) logger.end_block() logger.disable_file_logging()
root_seed = config.get("seed", None) seed(root_seed) # generate different seed for each run (each seed contains 1 number) seed_array = randint(1,2**30, number_of_runs) list_of_cache_directories = [] for irun in range(number_of_runs): config['seed']= (seed_array[irun],) this_config = config.copy() if ((irun + 1) % number_of_runs_in_parallel) == 0: run_in_background = False else: run_in_background = True run_manager.setup_new_run(cache_directory = this_config['cache_directory'], configuration = this_config) run_manager.run_run(this_config, run_as_multiprocess=False, run_in_background=run_in_background) if irun == 0: # log file for the multiple runs will be located in the first cache first_cache_directory = this_config['cache_directory'] log_file = os.path.join(first_cache_directory, 'multiple_runs.log') logger.enable_file_logging(log_file) logger.log_status("Multiple runs: %s replications" % number_of_runs) logger.log_status("root random seed = %s" % str(root_seed)) else: logger.enable_file_logging(log_file, verbose=False) logger.log_status("Run %s: %s" % (irun+1, this_config['cache_directory'])) logger.disable_file_logging(log_file) list_of_cache_directories.append(this_config['cache_directory']) write_to_text_file(os.path.join(first_cache_directory,"cache_directories"), list_of_cache_directories)
def create_prediction_success_table( self, summarize_by=None, predicted_choice_id_name=None, predicted_choice_id_prefix="predicted_", log_to_file=None, force_predict=True): agents = self.get_agent_set() choices = self.get_choice_set() choice_id_name = choices.get_id_name()[0] if self.agents_index_for_prediction is not None: agents_index = self.agents_index_for_prediction else: agents_index = self.get_agent_set_index() if predicted_choice_id_name is None or len( predicted_choice_id_name) == 0: predicted_choice_id_name = predicted_choice_id_prefix + choice_id_name if force_predict or (predicted_choice_id_name not in agents.get_known_attribute_names()): if not self.predict( predicted_choice_id_name=predicted_choice_id_name, agents_index=agents_index): logger.log_error( "Failed to run simulation for prediction; unable to create prediction success table." ) return if log_to_file is not None and len(log_to_file) > 0: logger.enable_file_logging(log_to_file) ## by default, compare predicted choice with observed choice ## this is not feasible for location choice model, where the ## alternative set is too large to be useful if summarize_by is None: summarize_by = "%s.%s" % (agents.dataset_name, choice_id_name) summarize_dataset_name = VariableName(summarize_by).get_dataset_name() if summarize_dataset_name == choices.dataset_name: summary_id = choices.compute_variables(summarize_by) chosen_choice_id = agents.get_attribute_by_index( choices.get_id_name()[0], agents_index) predicted_choice_id = agents.get_attribute_by_index( predicted_choice_id_name, agents_index) chosen_choice_index = choices.get_id_index(chosen_choice_id) predicted_choice_index = choices.get_id_index(predicted_choice_id) chosen_summary_id = summary_id[chosen_choice_index] predicted_summary_id = summary_id[predicted_choice_index] unique_summary_id = unique(summary_id) elif summarize_dataset_name == agents.dataset_name: chosen_summary_id = agents.compute_variables( summarize_by)[agents_index] chosen_choice_id = agents.get_attribute(choice_id_name).copy() predicted_choice_id = agents.get_attribute( predicted_choice_id_name) agents.modify_attribute(name=choice_id_name, data=predicted_choice_id) predicted_summary_id = agents.compute_variables( summarize_by)[agents_index] agents.modify_attribute(name=choice_id_name, data=chosen_choice_id) unique_summary_id = unique( concatenate((chosen_summary_id, predicted_summary_id))) else: logger.log_error( "summarize_by expression '%s' is specified for dataset %s, which is neither the choice_set '%s' nor the agent_set '%s'." % (summarize_by, summarize_dataset_name, choices.dataset_name, agents.dataset_name)) return False # observed on row, predicted on column prediction_matrix = zeros( (unique_summary_id.size, unique_summary_id.size), dtype="int32") def _convert_array_to_tab_delimited_string(an_array): from numpy import dtype if an_array.dtype == dtype('f'): return "\t".join(["%5.4f" % item for item in an_array]) return "\t".join([str(item) for item in an_array]) logger.log_status("Observed_id\tSuccess_rate\t%s" % \ _convert_array_to_tab_delimited_string(unique_summary_id) ) i = 0 success_rate = zeros(unique_summary_id.size, dtype="float32") for observed_id in unique_summary_id: predicted_id = predicted_summary_id[chosen_summary_id == observed_id] prediction_matrix[i] = ndimage.sum(ones(predicted_id.size), labels=predicted_id, index=unique_summary_id) if prediction_matrix[i].sum() > 0: if prediction_matrix[i].sum() > 0: success_rate[i] = float( prediction_matrix[i, i]) / prediction_matrix[i].sum() else: success_rate[i] = 0 logger.log_status( "%s\t\t%5.4f\t\t%s" % (observed_id, success_rate[i], _convert_array_to_tab_delimited_string(prediction_matrix[i]))) i += 1 success_rate2 = zeros(i, dtype="float32") for j in range(i): if prediction_matrix[j, :].sum() > 0: success_rate2[j] = float(prediction_matrix[:, j].sum() ) / prediction_matrix[j, :].sum() else: success_rate2[j] = 0 logger.log_status( "%s\t\t%s\t\t%s" % (' ', ' ', _convert_array_to_tab_delimited_string(success_rate2))) logger.disable_file_logging(filename=log_to_file)
def test_psrc_opus_simulation(self): """Checks that the simulation proceeds without crashing. """ self.simulation.run_simulation() self._check_simulation_produces_changes() logger.disable_file_logging()
def create_prediction_success_table(self, summarize_by=None, predicted_choice_id_name=None, predicted_choice_id_prefix="predicted_", log_to_file=None, force_predict=True): agents = self.get_agent_set() choices = self.get_choice_set() choice_id_name = choices.get_id_name()[0] if self.agents_index_for_prediction is not None: agents_index = self.agents_index_for_prediction else: agents_index = self.get_agent_set_index() if predicted_choice_id_name is None or len(predicted_choice_id_name) == 0: predicted_choice_id_name = predicted_choice_id_prefix + choice_id_name if force_predict or (predicted_choice_id_name not in agents.get_known_attribute_names()): if not self.predict(predicted_choice_id_name=predicted_choice_id_name, agents_index=agents_index ): logger.log_error("Failed to run simulation for prediction; unable to create prediction success table.") return if log_to_file is not None and len(log_to_file) > 0: logger.enable_file_logging(log_to_file) ## by default, compare predicted choice with observed choice ## this is not feasible for location choice model, where the ## alternative set is too large to be useful if summarize_by is None: summarize_by = "%s.%s" % (agents.dataset_name, choice_id_name) summarize_dataset_name = VariableName(summarize_by).get_dataset_name() if summarize_dataset_name == choices.dataset_name: summary_id = choices.compute_variables(summarize_by) chosen_choice_id = agents.get_attribute_by_index(choices.get_id_name()[0], agents_index) predicted_choice_id = agents.get_attribute_by_index(predicted_choice_id_name, agents_index) chosen_choice_index = choices.get_id_index(chosen_choice_id) predicted_choice_index = choices.get_id_index(predicted_choice_id) chosen_summary_id = summary_id[chosen_choice_index] predicted_summary_id = summary_id[predicted_choice_index] unique_summary_id = unique(summary_id) elif summarize_dataset_name == agents.dataset_name: chosen_summary_id = agents.compute_variables(summarize_by)[agents_index] chosen_choice_id = agents.get_attribute(choice_id_name).copy() predicted_choice_id = agents.get_attribute(predicted_choice_id_name) agents.modify_attribute(name=choice_id_name, data=predicted_choice_id) predicted_summary_id = agents.compute_variables(summarize_by)[agents_index] agents.modify_attribute(name=choice_id_name, data=chosen_choice_id) unique_summary_id = unique( concatenate((chosen_summary_id, predicted_summary_id)) ) else: logger.log_error("summarize_by expression '%s' is specified for dataset %s, which is neither the choice_set '%s' nor the agent_set '%s'." % (summarize_by, summarize_dataset_name, choices.dataset_name, agents.dataset_name)) return False # observed on row, predicted on column prediction_matrix = zeros( (unique_summary_id.size, unique_summary_id.size), dtype="int32" ) def _convert_array_to_tab_delimited_string(an_array): from numpy import dtype if an_array.dtype == dtype('f'): return "\t".join(["%5.4f" % item for item in an_array]) return "\t".join([str(item) for item in an_array]) logger.log_status("Observed_id\tSuccess_rate\t%s" % \ _convert_array_to_tab_delimited_string(unique_summary_id) ) i = 0 success_rate = zeros( unique_summary_id.size, dtype="float32" ) for observed_id in unique_summary_id: predicted_id = predicted_summary_id[chosen_summary_id==observed_id] prediction_matrix[i] = ndimage.sum(ones(predicted_id.size), labels=predicted_id, index=unique_summary_id ) if prediction_matrix[i].sum() > 0: if prediction_matrix[i].sum() > 0: success_rate[i] = float(prediction_matrix[i, i]) / prediction_matrix[i].sum() else: success_rate[i] = 0 logger.log_status("%s\t\t%5.4f\t\t%s" % (observed_id, success_rate[i], _convert_array_to_tab_delimited_string(prediction_matrix[i]) ) ) i+=1 success_rate2 = zeros( i, dtype="float32" ) for j in range(i): if prediction_matrix[j, :].sum() > 0: success_rate2[j]=float(prediction_matrix[:,j].sum()) / prediction_matrix[j, :].sum() else: success_rate2[j]=0 logger.log_status("%s\t\t%s\t\t%s" % (' ', ' ', _convert_array_to_tab_delimited_string( success_rate2 ) )) logger.disable_file_logging(filename=log_to_file)
for irun in range(number_of_runs): config['seed'] = (seed_array[irun], ) this_config = config.copy() if ((irun + 1) % number_of_runs_in_parallel) == 0: run_in_background = False else: run_in_background = True run_manager.setup_new_run( cache_directory=this_config['cache_directory'], configuration=this_config) run_manager.run_run(this_config, run_as_multiprocess=False, run_in_background=run_in_background) if irun == 0: # log file for the multiple runs will be located in the first cache first_cache_directory = this_config['cache_directory'] log_file = os.path.join(first_cache_directory, 'multiple_runs.log') logger.enable_file_logging(log_file) logger.log_status("Multiple runs: %s replications" % number_of_runs) logger.log_status("root random seed = %s" % str(root_seed)) else: logger.enable_file_logging(log_file, verbose=False) logger.log_status("Run %s: %s" % (irun + 1, this_config['cache_directory'])) logger.disable_file_logging(log_file) list_of_cache_directories.append(this_config['cache_directory']) write_to_text_file( os.path.join(first_cache_directory, "cache_directories"), list_of_cache_directories)
def xtest_simulation(self): # temporarily switched off, since the database data does not match the code for now """Checks that the simulation proceeds without caching. """ self.simulation.run_simulation() logger.disable_file_logging() self.completed_without_error = True
def _run_year(self, year, models, simulation_state, debuglevel, resources, write_datasets_to_cache_at_end_of_year, cleanup_datasets=True): """ Assumes that all datasets resides in the cache directory in binary format. """ try: import wingdbstub except: pass self.vardict = {} log_file_name = os.path.join(simulation_state.get_cache_directory(), "year_%s_log.txt" % year) logger.enable_file_logging(log_file_name, 'w') try: logger.start_block('Simulate year %s' % year) try: base_year = resources['base_year'] if year == base_year: year_for_base_year_cache = year # case of estimation else: year_for_base_year_cache = year - 1 cache_storage = AttributeCache().get_flt_storage_for_year( year_for_base_year_cache) self.vardict['cache_storage'] = cache_storage base_cache_storage = AttributeCache().get_flt_storage_for_year( base_year) self.vardict['base_cache_storage'] = base_cache_storage simulation_state.set_flush_datasets( resources.get("flush_variables", False)) SessionConfiguration()["simulation_year"] = year SessionConfiguration()["debuglevel"] = debuglevel datasets_to_preload_in_year = resources.get( 'datasets_to_preload_in_year', {}) if datasets_to_preload_in_year.get(year, None) is not None: datasets_to_preload = datasets_to_preload_in_year[year] else: datasets_to_preload = resources.get( 'datasets_to_preload', {}) for dataset_name in datasets_to_preload: SessionConfiguration().get_dataset_from_pool(dataset_name) models_configuration = resources.get('models_configuration', {}) dataset_pool = SessionConfiguration().get_dataset_pool() datasets = {} for dataset_name, its_dataset in dataset_pool.datasets_in_pool( ).iteritems(): self.vardict[dataset_name] = its_dataset datasets[dataset_name] = its_dataset exec '%s=its_dataset' % dataset_name # This is needed. It resides in locals() # and is passed on to models as they run. ### TODO: There has got to be a better way! model_resources = Resources(datasets) n_models, model_group_members_to_run = self.get_number_of_models_and_model_group_members_to_run( models, models_configuration) self.run_year_namespace = locals() #========== # Run the models. #========== model_number = -1 for model_entry in models: # list 'models' can be in the form: # [{'model_name_1': {'group_members': ['residential', 'commercial']}}, # {'model_name_2': {'group_members': [{'residential': ['estimate','run']}, # 'commercial']}}, # {'model_name_3': ['estimate', 'run']}, # 'model_name_4', # {'model_name_5': {'group_members': 'all'}} # ] # get list of methods to be processed evtl. for each group member if isinstance(model_entry, dict): model_name, value = model_entry.items()[0] if not isinstance(value, dict): # is a model group processes = value if not isinstance(processes, list): processes = [processes] else: # in the form 'model_name_4' in the comment above model_name = model_entry processes = ["run"] group_member = None model_group = model_group_members_to_run[model_name][1] last_member = max( 1, len(model_group_members_to_run[model_name][0].keys())) for imember in range(last_member): controller_config = models_configuration[model_name][ "controller"] model_configuration = models_configuration[model_name] if model_group_members_to_run[model_name][0].keys(): group_member_name = model_group_members_to_run[ model_name][0].keys()[imember] group_member = ModelGroupMember( model_group, group_member_name) processes = model_group_members_to_run[model_name][ 0][group_member_name] member_model_name = "%s_%s" % (group_member_name, model_name) if member_model_name in models_configuration.keys( ): model_configuration = models_configuration[ member_model_name] if "controller" in model_configuration.keys(): controller_config = model_configuration[ "controller"] datasets_to_preload_for_this_model = controller_config.get( '_model_structure_dependencies_', {}).get('dataset', []) for dataset_name in datasets_to_preload_for_this_model: try: if not dataset_pool.has_dataset( dataset_name) or ( dataset_name not in datasets.keys()): ds = dataset_pool.get_dataset(dataset_name) self.vardict[dataset_name] = ds datasets[dataset_name] = ds exec '%s=ds' % dataset_name except: logger.log_warning( 'Failed to load dataset %s.' % dataset_name) # import part if "import" in controller_config.keys(): import_config = controller_config["import"] for import_module in import_config.keys(): exec("from %s import %s" % (import_module, import_config[import_module])) # gui_import_replacements part # This is a temporary hack -- replicates the functionality of the "import" section # for use with the GUI. The contents of this part of the config is a dictionary. # Keys are names of models (not used here). Values are 2 element pairs. # The first element is a name and the second is a value. Bind the name to the value. if "gui_import_replacements" in controller_config.keys( ): import_replacement_config = controller_config[ "gui_import_replacements"] for model_name in import_replacement_config.keys(): pair = import_replacement_config[model_name] temp = pair[1] exec("%s = temp") % pair[0] # init part model = self.do_init(locals()) # estimate and/or run part for process in processes: model_number = model_number + 1 # write status file model.set_model_system_status_parameters( year, n_models, model_number, resources.get('status_file_for_gui', None)) model.write_status_for_gui() # prepare part exec(self.do_prepare(locals())) processmodel_config = controller_config[process] if "output" in processmodel_config.keys(): outputvar = processmodel_config["output"] else: outputvar = "process_output" self.vardict[outputvar] = self.do_process(locals()) exec outputvar + '=self.vardict[outputvar]' # check command file from gui, if the simulation should be stopped or paused self.do_commands_from_gui( resources.get('command_file_for_gui', None)) # capture namespace for interactive estimation self.run_year_namespace = locals() self.flush_datasets_after_model(resources) del model collect() # Write all datasets to cache. if write_datasets_to_cache_at_end_of_year: logger.start_block( 'Writing datasets to cache for year %s' % year) try: for dataset_name, its_dataset in SessionConfiguration( ).get_dataset_pool().datasets_in_pool().iteritems(): self.flush_dataset(its_dataset) finally: logger.end_block() finally: logger.end_block() finally: logger.disable_file_logging(log_file_name) if cleanup_datasets: SessionConfiguration().delete_datasets()
def run( self, resources, write_datasets_to_cache_at_end_of_year=True, log_file_name="run_model_system.log", cleanup_datasets=True, ): """Entries in resources: (entries with no defaults are required) models - a list containing names of models to be run. Each name must correspond to the name of the module/class of that model. Default(object): None years - a tuple (start year, end year) debuglevel - an integer. The higher the more output will be printed. Default: 0 expression_library - a dictionary. The keys in the dictionary are pairs (dataset_name, variable_name) and the values are the corresponding expressions. The model system needs to set the expression library (if it isn't None) in DatasetFactory for DatasetFactory to know about variables defined as expressions in the xml expression library. Default: None This method is called both to start up the simulation for all years, and also for each year when running with one process per year. In the latter case, 'years' consists of just (current_year, current_year) rather than the real start and end years for the simulation. """ if not isinstance(resources, Resources): raise TypeError, "Argument 'resources' must be of type 'Resources'." logger_settings = resources.get("log", {"tags": [], "verbosity_level": 3}) logger.set_tags(logger_settings.get("tags", [])) logger.set_verbosity_level(logger_settings.get("verbosity_level", 3)) self.simulation_state = SimulationState() self.simulation_state.set_low_memory_run(resources.get("low_memory_mode", False)) self.simulation_state.set_start_time(resources.get("base_year", 0)) self.run_year_namespace = {} if resources.get("cache_directory", None) is not None: self.simulation_state.set_cache_directory(resources["cache_directory"]) if "expression_library" in resources: VariableFactory().set_expression_library(resources["expression_library"]) if resources.get("sample_input", False): self.update_config_for_multiple_runs(resources) cache_directory = self.simulation_state.get_cache_directory() log_file = os.path.join(cache_directory, log_file_name) logger.enable_file_logging(log_file, verbose=False) try: logger.log_status("Cache Directory set to: " + cache_directory) with logger.block("Start simulation run"): models = resources.get("models", []) models_in_years = resources.get("models_in_year", {}) resources.check_obligatory_keys(["years"]) years = resources["years"] if (not isinstance(years, tuple)) and (not isinstance(years, list)): raise TypeError, "Entry 'years' in resources must be a tuple." if len(years) < 2: print years raise StandardError, "Entry 'years' in resources must be of length at least 2." start_year = years[0] end_year = years[-1] debuglevel = resources.get("debuglevel", 0) seed_values = resources.get("seed", NO_SEED) logger.log_status("random seed = %s" % str(seed_values)) seed(seed_values) for year in range(start_year, end_year + 1): with logger.block("Starting simulation for year " + str(year)): self.simulation_state.set_current_time(year) SessionConfiguration().get_dataset_pool().remove_all_datasets() logger.disable_file_logging(log_file) try: if models_in_years.get(year, None) is not None: models_to_run = models_in_years[year] else: models_to_run = models self._run_year( year=year, models=models_to_run, simulation_state=self.simulation_state, debuglevel=debuglevel, resources=resources, write_datasets_to_cache_at_end_of_year=write_datasets_to_cache_at_end_of_year, cleanup_datasets=cleanup_datasets, ) finally: logger.enable_file_logging(log_file, verbose=False) collect() finally: logger.disable_file_logging(log_file)
def _run_year( self, year, models, simulation_state, debuglevel, resources, write_datasets_to_cache_at_end_of_year, cleanup_datasets=True, ): """ Assumes that all datasets resides in the cache directory in binary format. """ try: import wingdbstub except: pass self.vardict = {} log_file_name = os.path.join(simulation_state.get_cache_directory(), "year_%s_log.txt" % year) logger.enable_file_logging(log_file_name, "w") try: logger.start_block("Simulate year %s" % year) try: base_year = resources["base_year"] if year == base_year: year_for_base_year_cache = year # case of estimation else: year_for_base_year_cache = year - 1 cache_storage = AttributeCache().get_flt_storage_for_year(year_for_base_year_cache) self.vardict["cache_storage"] = cache_storage base_cache_storage = AttributeCache().get_flt_storage_for_year(base_year) self.vardict["base_cache_storage"] = base_cache_storage simulation_state.set_flush_datasets(resources.get("flush_variables", False)) SessionConfiguration()["simulation_year"] = year SessionConfiguration()["debuglevel"] = debuglevel datasets_to_preload_in_year = resources.get("datasets_to_preload_in_year", {}) if datasets_to_preload_in_year.get(year, None) is not None: datasets_to_preload = datasets_to_preload_in_year[year] else: datasets_to_preload = resources.get("datasets_to_preload", {}) for dataset_name in datasets_to_preload: SessionConfiguration().get_dataset_from_pool(dataset_name) models_configuration = resources.get("models_configuration", {}) dataset_pool = SessionConfiguration().get_dataset_pool() datasets = {} for dataset_name, its_dataset in dataset_pool.datasets_in_pool().iteritems(): self.vardict[dataset_name] = its_dataset datasets[dataset_name] = its_dataset exec "%s=its_dataset" % dataset_name # This is needed. It resides in locals() # and is passed on to models as they run. ### TODO: There has got to be a better way! model_resources = Resources(datasets) n_models, model_group_members_to_run = self.get_number_of_models_and_model_group_members_to_run( models, models_configuration ) self.run_year_namespace = locals() # ========== # Run the models. # ========== model_number = -1 for model_entry in models: # list 'models' can be in the form: # [{'model_name_1': {'group_members': ['residential', 'commercial']}}, # {'model_name_2': {'group_members': [{'residential': ['estimate','run']}, # 'commercial']}}, # {'model_name_3': ['estimate', 'run']}, # 'model_name_4', # {'model_name_5': {'group_members': 'all'}} # ] # get list of methods to be processed evtl. for each group member if isinstance(model_entry, dict): model_name, value = model_entry.items()[0] if not isinstance(value, dict): # is a model group processes = value if not isinstance(processes, list): processes = [processes] else: # in the form 'model_name_4' in the comment above model_name = model_entry processes = ["run"] group_member = None model_group = model_group_members_to_run[model_name][1] last_member = max(1, len(model_group_members_to_run[model_name][0].keys())) for imember in range(last_member): controller_config = models_configuration[model_name]["controller"] model_configuration = models_configuration[model_name] if model_group_members_to_run[model_name][0].keys(): group_member_name = model_group_members_to_run[model_name][0].keys()[imember] group_member = ModelGroupMember(model_group, group_member_name) processes = model_group_members_to_run[model_name][0][group_member_name] member_model_name = "%s_%s" % (group_member_name, model_name) if member_model_name in models_configuration.keys(): model_configuration = models_configuration[member_model_name] if "controller" in model_configuration.keys(): controller_config = model_configuration["controller"] datasets_to_preload_for_this_model = controller_config.get( "_model_structure_dependencies_", {} ).get("dataset", []) for dataset_name in datasets_to_preload_for_this_model: try: if not dataset_pool.has_dataset(dataset_name) or (dataset_name not in datasets.keys()): ds = dataset_pool.get_dataset(dataset_name) self.vardict[dataset_name] = ds datasets[dataset_name] = ds exec "%s=ds" % dataset_name except: logger.log_warning("Failed to load dataset %s." % dataset_name) # import part if "import" in controller_config.keys(): import_config = controller_config["import"] for import_module in import_config.keys(): exec ("from %s import %s" % (import_module, import_config[import_module])) # gui_import_replacements part # This is a temporary hack -- replicates the functionality of the "import" section # for use with the GUI. The contents of this part of the config is a dictionary. # Keys are names of models (not used here). Values are 2 element pairs. # The first element is a name and the second is a value. Bind the name to the value. if "gui_import_replacements" in controller_config.keys(): import_replacement_config = controller_config["gui_import_replacements"] for model_name in import_replacement_config.keys(): pair = import_replacement_config[model_name] temp = pair[1] exec ("%s = temp") % pair[0] # init part model = self.do_init(locals()) # estimate and/or run part for process in processes: model_number = model_number + 1 # write status file model.set_model_system_status_parameters( year, n_models, model_number, resources.get("status_file_for_gui", None) ) model.write_status_for_gui() # prepare part exec (self.do_prepare(locals())) processmodel_config = controller_config[process] if "output" in processmodel_config.keys(): outputvar = processmodel_config["output"] else: outputvar = "process_output" self.vardict[outputvar] = self.do_process(locals()) exec outputvar + "=self.vardict[outputvar]" # check command file from gui, if the simulation should be stopped or paused self.do_commands_from_gui(resources.get("command_file_for_gui", None)) # capture namespace for interactive estimation self.run_year_namespace = locals() self.flush_datasets_after_model(resources) del model collect() # Write all datasets to cache. if write_datasets_to_cache_at_end_of_year: logger.start_block("Writing datasets to cache for year %s" % year) try: for dataset_name, its_dataset in ( SessionConfiguration().get_dataset_pool().datasets_in_pool().iteritems() ): self.flush_dataset(its_dataset) finally: logger.end_block() finally: logger.end_block() finally: logger.disable_file_logging(log_file_name) if cleanup_datasets: SessionConfiguration().delete_datasets()
def run(self, resources, write_datasets_to_cache_at_end_of_year=True, log_file_name='run_model_system.log', cleanup_datasets=True): """Entries in resources: (entries with no defaults are required) models - a list containing names of models to be run. Each name must correspond to the name of the module/class of that model. Default(object): None years - a tuple (start year, end year) debuglevel - an integer. The higher the more output will be printed. Default: 0 expression_library - a dictionary. The keys in the dictionary are pairs (dataset_name, variable_name) and the values are the corresponding expressions. The model system needs to set the expression library (if it isn't None) in DatasetFactory for DatasetFactory to know about variables defined as expressions in the xml expression library. Default: None This method is called both to start up the simulation for all years, and also for each year when running with one process per year. In the latter case, 'years' consists of just (current_year, current_year) rather than the real start and end years for the simulation. """ if not isinstance(resources, Resources): raise TypeError, "Argument 'resources' must be of type 'Resources'." logger_settings = resources.get("log", { "tags": [], "verbosity_level": 3 }) logger.set_tags(logger_settings.get("tags", [])) logger.set_verbosity_level(logger_settings.get("verbosity_level", 3)) self.simulation_state = SimulationState() self.simulation_state.set_low_memory_run( resources.get("low_memory_mode", False)) self.simulation_state.set_start_time(resources.get("base_year", 0)) self.run_year_namespace = {} if resources.get('cache_directory', None) is not None: self.simulation_state.set_cache_directory( resources['cache_directory']) if 'expression_library' in resources: VariableFactory().set_expression_library( resources['expression_library']) if resources.get('sample_input', False): self.update_config_for_multiple_runs(resources) cache_directory = self.simulation_state.get_cache_directory() log_file = os.path.join(cache_directory, log_file_name) logger.enable_file_logging(log_file, verbose=False) try: logger.log_status("Cache Directory set to: " + cache_directory) with logger.block('Start simulation run'): models = resources.get("models", []) models_in_years = resources.get("models_in_year", {}) resources.check_obligatory_keys(["years"]) years = resources["years"] if (not isinstance(years, tuple)) and (not isinstance( years, list)): raise TypeError, "Entry 'years' in resources must be a tuple." if len(years) < 2: print years raise StandardError, "Entry 'years' in resources must be of length at least 2." start_year = years[0] end_year = years[-1] debuglevel = resources.get("debuglevel", 0) seed_values = resources.get('seed', NO_SEED) logger.log_status("random seed = %s" % str(seed_values)) seed(seed_values) for year in range(start_year, end_year + 1): with logger.block("Starting simulation for year " + str(year)): self.simulation_state.set_current_time(year) SessionConfiguration().get_dataset_pool( ).remove_all_datasets() logger.disable_file_logging(log_file) try: if models_in_years.get(year, None) is not None: models_to_run = models_in_years[year] else: models_to_run = models self._run_year( year=year, models=models_to_run, simulation_state=self.simulation_state, debuglevel=debuglevel, resources=resources, write_datasets_to_cache_at_end_of_year= write_datasets_to_cache_at_end_of_year, cleanup_datasets=cleanup_datasets) finally: logger.enable_file_logging(log_file, verbose=False) collect() finally: logger.disable_file_logging(log_file)