class RunSimulationFromMysql: def prepare_for_simulation(self, run_configuration, cache_directory=None): self.config = Resources(run_configuration) self.simulation_state = SimulationState( new_instance=True, base_cache_dir=cache_directory, start_time=self.config.get("base_year", 0) ) ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. if self.config["cache_directory"] is None: self.config["cache_directory"] = self.simulation_state.get_cache_directory() SessionConfiguration( new_instance=True, package_order=self.config["dataset_pool_configuration"].package_order, in_storage=AttributeCache(), ) ForkProcess().fork_new_process( self.config["creating_baseyear_cache_configuration"].cache_scenario_database, self.config ) # Create output database (normally done by run manager) if "estimation_database_configuration" in self.config: db_server = DatabaseServer(self.config["estimation_database_configuration"]) if not db_server.has_database(self.config["estimation_database_configuration"].database_name): db_server.create_database(self.config["estimation_database_configuration"].database_name) def run_simulation(self, simulation_instance=None): logger.start_block("Simulation on database %s" % self.config["scenario_database_configuration"].database_name) try: if simulation_instance is None: simulation_instance = ModelSystem() simulation_instance.run(self.config) # simulation_instance.run_multiprocess(self.config, is_run_subset=True) finally: logger.end_block() logger.log_status("Data cache in %s" % self.simulation_state.get_cache_directory()) def cleanup(self, remove_cache, remove_output_database): """Remove all outputs of this simulation.""" self.simulation_state.remove_singleton(delete_cache=remove_cache) # Remove SessionConfiguration singleton, if it exists Singleton().remove_singleton_for_class(SessionConfiguration) cache_dir = self.config["cache_directory"] if os.path.exists(cache_dir): rmtree(cache_dir) if remove_output_database and ("estimation_database_configuration" in self.config): db_server = DatabaseServer(self.config["estimation_database_configuration"]) db_server.drop_database(self.config["estimation_database_configuration"].database_name) def prepare_and_run(self, run_configuration, simulation_instance=None, remove_cache=True): self.prepare_for_simulation(run_configuration) self.run_simulation(simulation_instance) self.cleanup(remove_cache)
class RunSimulation(object): def prepare_for_simulation(self, config, cache_directory=None): self.config = Resources(config) base_cache_dir = self.config[ 'creating_baseyear_cache_configuration'].cache_directory_root self.simulation_state = SimulationState(new_instance=True, base_cache_dir=base_cache_dir, start_time=self.config.get( 'base_year', 0)) ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. if self.config['cache_directory'] is None: self.config[ 'cache_directory'] = self.simulation_state.get_cache_directory( ) SessionConfiguration( new_instance=True, package_order=self.config['dataset_pool_configuration']. package_order, in_storage=AttributeCache()) if config['creating_baseyear_cache_configuration'].cache_from_database: ForkProcess().fork_new_process( self.config['creating_baseyear_cache_configuration']. cache_scenario_database, self.config) else: CacheFltData().run(self.config) def run_simulation(self, simulation_instance=None): if simulation_instance is None: simulation_instance = ModelSystem() simulation_instance.run(self.config) #simulation_instance.run_multiprocess(self.config, is_run_subset=True) logger.log_status("Data cache in %s" % self.simulation_state.get_cache_directory()) def cleanup(self, remove_cache=True): """Remove all outputs of this simulation.""" self.simulation_state.remove_singleton(delete_cache=remove_cache) SessionConfiguration().remove_singleton() if remove_cache: cache_dir = self.config['cache_directory'] if os.path.exists(cache_dir): rmtree(cache_dir) def prepare_and_run(self, run_configuration, simulation_instance=None, remove_cache=True): self.prepare_for_simulation(run_configuration) self.run_simulation(simulation_instance) self.cleanup(remove_cache)
class RunSimulationFromMysql: def prepare_for_simulation(self, run_configuration, cache_directory=None): self.config = Resources(run_configuration) self.simulation_state = SimulationState(new_instance=True, base_cache_dir=cache_directory, start_time=self.config.get('base_year', 0)) ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. if self.config['cache_directory'] is None: self.config['cache_directory'] = self.simulation_state.get_cache_directory() SessionConfiguration(new_instance=True, package_order=self.config['dataset_pool_configuration'].package_order, in_storage=AttributeCache()) ForkProcess().fork_new_process(self.config['creating_baseyear_cache_configuration'].cache_scenario_database, self.config) # Create output database (normally done by run manager) if 'estimation_database_configuration' in self.config: db_server = DatabaseServer(self.config['estimation_database_configuration']) if not db_server.has_database(self.config['estimation_database_configuration'].database_name): db_server.create_database(self.config['estimation_database_configuration'].database_name) def run_simulation(self, simulation_instance=None): logger.start_block('Simulation on database %s' % self.config['scenario_database_configuration'].database_name) try: if simulation_instance is None: simulation_instance = ModelSystem() simulation_instance.run(self.config) #simulation_instance.run_multiprocess(self.config, is_run_subset=True) finally: logger.end_block() logger.log_status("Data cache in %s" % self.simulation_state.get_cache_directory()) def cleanup(self, remove_cache, remove_output_database): """Remove all outputs of this simulation.""" self.simulation_state.remove_singleton(delete_cache=remove_cache) # Remove SessionConfiguration singleton, if it exists Singleton().remove_singleton_for_class(SessionConfiguration) cache_dir = self.config['cache_directory'] if os.path.exists(cache_dir): rmtree(cache_dir) if remove_output_database and ('estimation_database_configuration' in self.config): db_server = DatabaseServer(self.config['estimation_database_configuration']) db_server.drop_database(self.config['estimation_database_configuration'].database_name) def prepare_and_run(self, run_configuration, simulation_instance=None, remove_cache=True): self.prepare_for_simulation(run_configuration) self.run_simulation(simulation_instance) self.cleanup(remove_cache)
class RunSimulation(object): def prepare_for_simulation(self, config, cache_directory=None): self.config = Resources(config) base_cache_dir = self.config['creating_baseyear_cache_configuration'].cache_directory_root self.simulation_state = SimulationState(new_instance=True, base_cache_dir=base_cache_dir, start_time=self.config.get('base_year', 0)) ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. if self.config['cache_directory'] is None: self.config['cache_directory'] = self.simulation_state.get_cache_directory() SessionConfiguration(new_instance=True, package_order=self.config['dataset_pool_configuration'].package_order, in_storage=AttributeCache()) if config['creating_baseyear_cache_configuration'].cache_from_database: ForkProcess().fork_new_process(self.config['creating_baseyear_cache_configuration'].cache_scenario_database, self.config) else: CacheFltData().run(self.config) def run_simulation(self, simulation_instance=None): if simulation_instance is None: simulation_instance = ModelSystem() simulation_instance.run(self.config) #simulation_instance.run_multiprocess(self.config, is_run_subset=True) logger.log_status("Data cache in %s" % self.simulation_state.get_cache_directory()) def cleanup(self, remove_cache=True): """Remove all outputs of this simulation.""" self.simulation_state.remove_singleton(delete_cache=remove_cache) SessionConfiguration().remove_singleton() if remove_cache: cache_dir = self.config['cache_directory'] if os.path.exists(cache_dir): rmtree(cache_dir) def prepare_and_run(self, run_configuration, simulation_instance=None, remove_cache=True): self.prepare_for_simulation(run_configuration) self.run_simulation(simulation_instance) self.cleanup(remove_cache)
def _initialize_previous_years(self, attribute_cache, base_year, creating_baseyear_cache_configuration): simulation_state = SimulationState() cache_directory = simulation_state.get_cache_directory() baseyear_cache_path = os.path.join(cache_directory, str(base_year)) for table_name, year in creating_baseyear_cache_configuration.tables_to_copy_to_previous_years.iteritems(): year_cache_path = os.path.join(cache_directory, str(year)) dest_file_path = os.path.join(year_cache_path, table_name) if os.path.exists(dest_file_path): rmtree(dest_file_path) copytree(os.path.join(baseyear_cache_path, table_name), dest_file_path)
def _initialize_previous_years(self, attribute_cache, base_year, creating_baseyear_cache_configuration): simulation_state = SimulationState() cache_directory = simulation_state.get_cache_directory() baseyear_cache_path = os.path.join(cache_directory, str(base_year)) for table_name, year in creating_baseyear_cache_configuration.tables_to_copy_to_previous_years.iteritems( ): year_cache_path = os.path.join(cache_directory, str(year)) dest_file_path = os.path.join(year_cache_path, table_name) if os.path.exists(dest_file_path): rmtree(dest_file_path) copytree(os.path.join(baseyear_cache_path, table_name), dest_file_path)
def run(self, table_names, out_storage=None, table_name_pattern=None, cache_directory=None, year=None, **kwargs): """ export specified tables to database table_name_pattern: For example '{table_name}_{scenario_name}_{year}' """ if not hasattr(self, "out_storage"): if out_storage is None: raise ValueError, "Either out_storage argument needs to be specified or " + "prepare_for_run called before run method to create a valid out_storage." else: self.out_storage = out_storage sim_state = SimulationState() if sim_state.get_current_time() == 0: sim_state.set_current_time(9999) if cache_directory is None: cache_directory = sim_state.get_cache_directory() attr_cache = AttributeCache(cache_directory=cache_directory) if year is None: years = attr_cache._get_sorted_list_of_years() else: assert isinstance(year, int) years = [year] for table_name in table_names: kwargs["table_name"] = table_name for year in years: kwargs["year"] = year out_table_name = table_name_pattern.format(**kwargs) in_storage = attr_cache.get_flt_storage_for_year(year) # cache_path = os.path.join(cache_directory, str(year)) # in_storage = flt_storage(storage_location=cache_path) # TODO drop_table(table_name) if table_name exists ExportStorage().export_dataset( table_name, in_storage=in_storage, out_storage=self.out_storage, out_dataset_name=out_table_name ) self.post_run(kwargs["scenario_name"], years)
class ModelExplorer(object): def __init__(self, model, year, scenario_name=None, model_group=None, configuration=None, xml_configuration=None, cache_directory=None): self.model_group = model_group self.explored_model = model if configuration is None: if xml_configuration is None: raise StandardError, "Either dictionary based or XML based configuration must be given." config = xml_configuration.get_run_configuration(scenario_name) else: config = Configuration(configuration) self.scenario_models = config['models'] if config.get('models_in_year', None) is not None and config['models_in_year'].get(year, None) is not None: del config['models_in_year'][year] if model is not None: dependent_models = config['models_configuration'][model]['controller'].get('dependencies', []) config['models'] = dependent_models if model_group is None: config['models'] = config['models'] + [{model: ["run"]}] else: config['models'] = config['models'] + [{model: {"group_members": [{model_group: ["run"]}]}}] else: config['models'] = [] config['years'] = [year, year] config["datasets_to_cache_after_each_model"]=[] config['flush_variables'] = False self.config = Resources(config) self.xml_configuration = xml_configuration if cache_directory is None: cache_directory = config['creating_baseyear_cache_configuration'].baseyear_cache.existing_cache_to_copy self.simulation_state = SimulationState(new_instance=True, base_cache_dir=cache_directory, start_time=config.get('base_year', 0)) self.config['cache_directory'] = cache_directory SessionConfiguration(new_instance=True, package_order=self.config['dataset_pool_configuration'].package_order, in_storage=AttributeCache()) def run(self): self.model_system = ModelSystem() self.model_system.run(self.config, write_datasets_to_cache_at_end_of_year=False, cleanup_datasets=False) logger.log_status("Data cache in %s" % self.simulation_state.get_cache_directory()) def get_agents_for_simulation(self): return self.get_active_agent_set() def get_model_name(self): return (self.explored_model, self.model_group) def get_specification(self): return self.get_model().get_specified_coefficients().specification def get_probabilities(self, submodel=-2): """Return a tuple of probabilities and choices, see ChoiceModel.get_probabilities_and_choices. Works only for the ChoiceModel class. """ model = self.get_model() #if isinstance(model, ChoiceModel): return model.get_probabilities_and_choices(submodel) #print '\nMethod is implemented only for ChoiceModels.\n' def export_probabilities(self, submodel=-2, filename='./choice_model.txt'): """Export probabilities and choices into a file. Works only for the ChoiceModel class""" model = self.get_model() #if isinstance(model, ChoiceModel): model.export_probabilities(submodel, file_name=filename) #else: # print '\nMethod is implemented only for ChoiceModels.\n' def get_model(self): """Return a model object.""" return self.model_system.run_year_namespace["model"] def get_dataset(self, dataset_name): """Return a Dataset object of the given name.""" ds = self.model_system.run_year_namespace.get(dataset_name, None) if ds is None: if dataset_name not in self.model_system.run_year_namespace["datasets"].keys(): ds = self.get_dataset_pool().get_dataset(dataset_name) else: ds = self.model_system.run_year_namespace["datasets"][dataset_name] return ds def get_data(self, coefficient, submodel=-2): """Calls method get_data of the Model object. Should return a data array for the given coefficient and submodel. Can be used only on in models that are estimable.""" return self.get_model().get_data(coefficient, submodel) def get_coefficient_names(self, submodel=-2): """Calls method get_coefficient_names of the Model object which should return coefficient names for the given submodel. Can be used only on in models that are estimable.""" return self.get_model().get_coefficient_names(submodel) def get_coefficients(self, submodel=-2): """Return an object of class SpecifiedCoefficientsFor1Submodel giving the model coefficients. Can be used only on in models that are estimable.""" return SpecifiedCoefficientsFor1Submodel(self.get_model().get_specified_coefficients(), submodel) def get_data_as_dataset(self, submodel=-2, **kwargs): """Calls method get_data_as_dataset of the Model object which should return an object of class Dataset containing model data. Works only for ChoiceModel (returns InteractionDataset), and for RegressionModel (returns Dataset). """ return self.get_model().get_data_as_dataset(submodel, **kwargs) def get_choice_set(self): """Return a Dataset of choices. Works only for the ChoiceModel class. """ return self.get_model().model_interaction.interaction_dataset.get_dataset(2) def get_choice_set_index(self): """Return an array of indices of choices. Works only for the ChoiceModel class. """ return self.get_model().model_interaction.interaction_dataset.get_index(2) def get_choice_set_index_for_submodel(self, submodel): """Return an array of indices of choices for the given submodel. Works only for the ChoiceModel class. """ index = self.get_choice_set_index() return take (index, indices=self.get_model().observations_mapping[submodel], axis=0) def get_active_choice_set(self, submodel=None): """Return choice set as seen by agents in the model. Works only for the ChoiceModel class. """ if submodel is None: choices = self.get_choice_set_index() else: choices = self.get_choice_set_index_for_submodel(submodel) choices = unique(choices.flatten()) ds = self.get_choice_set() return DatasetSubset(ds, choices) def get_agent_set(self): """Return a Dataset of all agents. """ return self.get_model().get_agent_set() def get_agent_set_index(self): """Return an array of indices of agents active in the model. """ return self.get_model().get_agent_set_index() def get_agent_set_index_for_submodel(self, submodel): """Return an array of indices of agents for the given submodel. """ return self.get_model().get_agent_set_index_for_submodel(submodel) def get_active_agent_set(self, submodel=None): """Return agent set that make choices in the model. Works only for the ChoiceModel class. """ agents = self.get_agent_set() if submodel is None: index = self.get_agent_set_index() else: index = self.get_agent_set_index_for_submodel(submodel) return DatasetSubset(agents, index) def agent_summary(self, submodel=None): ds = self.get_active_agent_set(submodel=submodel) ds.summary() def choice_summary(self, submodel=None): ds = self.get_active_choice_set(submodel=submodel) ds.summary() def data_summary(self, **kwargs): ds = self.get_data_as_dataset(**kwargs) ds.summary() def _get_before_after_dataset_from_attribute(self, var_name, storage, **kwargs): dataset_name = var_name.get_dataset_name() ds = self.get_dataset(dataset_name) ds.compute_variables([var_name], dataset_pool=self.get_dataset_pool()) ds.copy_attribute_by_reload(var_name, storage=storage, **kwargs) return ds def get_before_after_attribute(self, attribute_name): """Return a dictionary with elements 'before' (contains an array of the given attribute that is reloaded from the cache) and 'after' (contains an array of the given attribute with the current values). """ from opus_core.store.attribute_cache import AttributeCache var_name = VariableName(attribute_name) storage = AttributeCache(self.simulation_state.get_cache_directory()) ds = self._get_before_after_dataset_from_attribute(var_name, storage=storage, package_order=self.get_dataset_pool().get_package_order()) return {'after': ds[var_name.get_alias()], 'before': ds.get_attribute('%s_reload__' % var_name.get_alias())} def summary_before_after(self, attribute_name): """Print summary of the given attribute 'before' (values reloaded from the cache) and 'after' (current values). """ from opus_core.store.attribute_cache import AttributeCache var_name = VariableName(attribute_name) storage = AttributeCache(self.simulation_state.get_cache_directory()) ds = self._get_before_after_dataset_from_attribute(var_name, storage=storage, package_order=self.get_dataset_pool().get_package_order()) print '' print 'Before model run:' print '=================' ds.summary(names=['%s_reload__' % var_name.get_alias()]) print '' print 'After model run:' print '=================' #ds.summary(names=[var_name.get_alias()]) ds.summary(names=[var_name.get_alias()]) def model_dependencies(self, model=None, group=None): """Prints out all dependencies for the model.""" from opus_core.variables.dependency_query import DependencyChart if model is None: # current model model, group = self.get_model_name() spec = self.get_specification() else: spec = None if model == 'all': # print dependencies for all models for thismodel in self.scenario_models: thisgroups = None if isinstance(thismodel, dict): thisgroups = thismodel[thismodel.keys()[0]].get('group_members', None) thismodel = thismodel.keys()[0] if not isinstance(thisgroups, list): thisgroups = [thisgroups] for group in thisgroups: chart = DependencyChart(self.xml_configuration, model=thismodel, model_group=group) chart.print_model_dependencies() else: chart = DependencyChart(self.xml_configuration, model=model, model_group=group, specification=spec) chart.print_model_dependencies() def variable_dependencies(self, name): """Prints out dependencies of this variable. 'name' can be either an alias from the model specification or an expression.""" from opus_core.variables.dependency_query import DependencyChart varname = None allvars = self.get_specification().get_variable_names() for ivar in range(len(allvars)): thisvar = allvars[ivar] if not isinstance(thisvar, VariableName): thisvar = VariableName(thisvar) if name == thisvar.get_alias(): varname = thisvar break if varname is None: varname = VariableName(name) chart = DependencyChart(self.xml_configuration) chart.print_dependencies(varname.get_expression()) def compute_expression(self, attribute_name): """Compute any expression and return its values.""" var_name = VariableName(attribute_name) dataset_name = var_name.get_dataset_name() ds = self.get_dataset(dataset_name) return ds.compute_variables([var_name], dataset_pool=self.get_dataset_pool()) def get_dataset_pool(self): return self.model_system.run_year_namespace["dataset_pool"] def plot_histogram_before_after(self, attribute_name, bins=None): """Plot histograms of values returned by the method get_before_after_attribute.""" from opus_core.plot_functions import create_histogram, show_plots from matplotlib.pylab import figure values = self.get_before_after_attribute(attribute_name) alias = VariableName(attribute_name).get_alias() fig = figure() fig.add_subplot(121) create_histogram(values['before'], main='%s (before)' % alias, bins=bins) fig.add_subplot(122) create_histogram(values['after'], main='%s (after)' % alias, bins=bins) show_plots() def get_correlation(self, submodel=-2): """Return an array of correlations between all variables of the model data (for given submodel). Works only for ChoiceModel and RegressionModel""" ds = self.get_data_as_dataset(submodel) attrs = [attr for attr in ds.get_known_attribute_names() if attr not in ds.get_id_name()] return ds.correlation_matrix(attrs) def plot_correlation(self, submodel=-2, useR=False, **kwargs): """Plot correlations between all variables of the model data (for given submodel). Works only for ChoiceModel and RegressionModel""" ds = self.get_data_as_dataset(submodel) attrs = [attr for attr in ds.get_known_attribute_names() if attr not in ds.get_id_name()] ds.correlation_image(attrs, useR=useR, **kwargs) def plot_choice_set(self, agents_index=None, aggregate_to=None, matplotlib=True, **kwargs): """Plot map of the sampled choice set. agents_index can be given to restrict the set of agents to which the choice set belongs to. aggregate_to is a name of a dataset which the choice set should be aggregated to. If matplotlib is False, mapnik is used (and required). Additional arguments are passed to plot_map or plot_map_matplotlib. E.g. (choice set are buildings, aggregated to zones, for the first agent) er.plot_choice_set(aggregate_to='zone', matplotlib=False, project_name='psrc_parcel', file='choice_set0.png', agents_index=0) """ choice_set = self.get_choice_set() if agents_index is None: flatten_choice_index = self.get_choice_set_index().ravel() else: flatten_choice_index = self.get_choice_set_index()[agents_index,:].ravel() if aggregate_to is not None: ds_aggr = self.get_dataset(aggregate_to) result = ds_aggr.sum_over_ids(choice_set[ds_aggr.get_id_name()[0]][flatten_choice_index], ones(flatten_choice_index.size)) ds = ds_aggr else: result = choice_set.sum_over_ids(choice_set.get_id_attribute()[flatten_choice_index], ones(flatten_choice_index.size)) ds = choice_set dummy_attribute_name = '__sampled_choice_set__' ds.add_attribute(name=dummy_attribute_name, data=result) if matplotlib: coord_syst = None if ds.get_coordinate_system() is None and hasattr(ds, 'compute_coordinate_system'): coord_syst = ds.compute_coordinate_system(dataset_pool=self.get_dataset_pool()) ds.plot_map_matplotlib(dummy_attribute_name, background=-1, coordinate_system=coord_syst, **kwargs) else: ds.plot_map(dummy_attribute_name, background=-1, **kwargs) ds.delete_one_attribute(dummy_attribute_name) def plot_choice_set_attribute(self, name, agents_index=None, aggregate_to=None, function='sum', matplotlib=True, **kwargs): """Plot map of the given attribute for the sampled choice set. agents_index can be given to restrict the set of agents to which the choice set belongs to. aggregate_to is a name of a dataset which the choice set should be aggregated to. function defines the aggregating function (e.g. sum, mean, median, etc.) If matplotlib is False, mapnik is used (and required). Additional arguments are passed to plot_map or plot_map_matplotlib. E.g. er.plot_choice_set_attribute('residential_units', aggregate_to='zone', matplotlib=False, project_name='psrc_parcel', file='choice_resunits.png') """ choice_set = self.get_choice_set() if agents_index is None: flatten_choice_index = self.get_choice_set_index().ravel() else: flatten_choice_index = self.get_choice_set_index()[agents_index,:].ravel() filter_var = ones(choice_set.size(), dtype='int16') filter_var[unique(flatten_choice_index)] = 0 filter_idx = where(filter_var)[0] if aggregate_to is not None: ds_aggr = self.get_dataset(aggregate_to) result = ds_aggr.aggregate_over_ids(choice_set[ds_aggr.get_id_name()[0]][flatten_choice_index], what=choice_set[name][flatten_choice_index], function=function) filter = ds_aggr.sum_over_ids(choice_set[ds_aggr.get_id_name()[0]][filter_idx], ones(filter_idx.size)) filter = filter > 0 ds = ds_aggr else: result = choice_set.aggregate_over_ids(choice_set.get_id_attribute()[flatten_choice_index], what=choice_set[name][flatten_choice_index], function=function) filter = filter_var ds = choice_set dummy_attribute_name = '__sampled_choice_set_attribute__' ds.add_attribute(name=dummy_attribute_name, data=result) dummy_filter_name = '__sampled_choice_set_filter__' ds.add_attribute(name=dummy_filter_name, data=filter) if matplotlib: coord_syst = None if ds.get_coordinate_system() is None and hasattr(ds, 'compute_coordinate_system'): coord_syst = ds.compute_coordinate_system(dataset_pool=self.get_dataset_pool()) ds.plot_map_matplotlib(dummy_attribute_name, filter=dummy_filter_name, coordinate_system=coord_syst, **kwargs) else: ds.plot_map(dummy_attribute_name, filter=dummy_filter_name, **kwargs) ds.delete_one_attribute(dummy_attribute_name) ds.delete_one_attribute(dummy_filter_name) def plot_coefficients(self, submodel=-2, exclude_constant=True, eqidx=0, plot=True, store_values_to_file=None): """ Plot a barchart of coefficient values. This can be used in a regression model, when coefficients are standardized (i.e. using the estimation module opus_core.estimate_linear_regression_standardized). store_values_to_file can be a file name where the values are stored. """ coef = self.get_coefficients(submodel) values = coef.get_coefficient_values() names = coef.get_coefficient_names() sd = coef.get_standard_errors() idx=ones(names.shape[1], dtype="bool") if exclude_constant: pos = coef.get_constants_positions() if pos.size > 0: idx[pos]=0 if store_values_to_file is not None: n = idx.sum() result = concatenate((reshape(names[eqidx, idx], (n,1)), reshape(values[eqidx, idx], (n,1)), reshape(sd[eqidx, idx], (n,1))), axis=1) write_to_text_file(store_values_to_file, array(['coefficient_name', 'estimate', 'standard_error']), delimiter='\t') write_table_to_text_file(store_values_to_file, result, delimiter='\t', mode='a') if plot: plot_barchart(values[eqidx, idx], labels = names[eqidx, idx], errors=sd[eqidx, idx]) else: return {'names': names[eqidx, idx], 'values': values[eqidx, idx], 'errors': sd[eqidx, idx]} def create_latex_tables(self, directory, other_info_keys=None): from opus_core.latex_table_creator import LatexTableCreator LTC = LatexTableCreator() LTC.create_latex_table_for_coefficients_for_model( self.get_model().get_specified_coefficients().coefficients, self.explored_model, directory, other_info_keys=other_info_keys) LTC.create_latex_table_for_specifications_for_model( self.get_model().get_specified_coefficients().specification, self.explored_model, directory)
class MoreDatasetTests(opus_unittest.OpusTestCase): def setUp(self): self.start_year = 2001 self.expected_sic_data = array([6,4,7,808,6]) self.job_id = array([1,2,3,4,5]) self.base_cache_dir = tempfile.mkdtemp(prefix='opus_tmp_test_dataset') self.simulation_state = SimulationState(low_memory_run=True, new_instance=True, base_cache_dir=self.base_cache_dir) self.dir = self.simulation_state.get_cache_directory() self.simulation_state.set_current_time(self.start_year) if not os.path.exists(self.dir): os.makedirs(self.dir) self.in_storage = StorageFactory().get_storage('dict_storage') self.in_storage.write_table( table_name='jobs', table_data={ 'grid_id':array([10,20,30,40,50]), 'job_id':self.job_id, }, ) self.out_storage = StorageFactory().get_storage('dict_storage') self.job_set_resources = ResourceFactory().get_resources_for_dataset( 'job', in_storage = self.in_storage, out_storage = self.out_storage, in_table_name_pair = ('jobs',None), out_table_name_pair = ('jobs_exported',None), attributes_pair = (None,AttributeType.PRIMARY), id_name_pair = ('job_id','job_id'), nchunks_pair = (1,1), debug_pair = (1,None) ) def tearDown(self): if os.path.exists(self.base_cache_dir): rmtree(self.base_cache_dir) def test_err_when_asking_for_attribute_that_is_not_in_cache(self): job_set = Dataset(self.job_set_resources, dataset_name="jobs") job_set.add_attribute(self.job_id, "job_id", metadata=AttributeType.PRIMARY) job_set.flush_dataset() job_set.get_attribute('job_id') self.assertRaises(NameError, job_set.get_attribute, 'attribute_that_does_not_exist') def test_compute_one_variable_when_asking_for_attribute_that_is_not_in_cache(self): job_set = Dataset(self.job_set_resources, dataset_name="jobs") job_set.add_attribute(self.job_id, "job_id", metadata=AttributeType.PRIMARY) job_set.flush_dataset() job_id_variable_name = VariableName('opus_core.jobs.attribute_that_does_not_exist') logger.enable_hidden_error_and_warning_words() try: self.assertRaises(StandardError, job_set._compute_one_variable, job_id_variable_name) finally: logger.enable_hidden_error_and_warning_words() def test_flush_dataset_correct_flags(self): job_set = Dataset(self.job_set_resources, dataset_name="jobs") self.assert_(not 'job_id' in job_set.attribute_boxes) job_set.get_attribute("job_id") self.assert_(job_set.attribute_boxes["job_id"].is_in_memory()) self.assert_(not job_set.attribute_boxes["job_id"].is_cached()) job_set.flush_dataset() self.assert_(not job_set.attribute_boxes["job_id"].is_in_memory()) self.assert_(job_set.attribute_boxes["job_id"].is_cached()) job_set.get_attribute("job_id") self.assert_(job_set.attribute_boxes["job_id"].is_in_memory()) self.assert_(job_set.attribute_boxes["job_id"].is_cached()) def test_flush_dataset_correct_data(self): job_set = Dataset(self.job_set_resources, dataset_name="jobs") job_set.add_attribute(self.job_id, "job_id", metadata=AttributeType.PRIMARY) job_set.add_attribute(self.expected_sic_data, "sic", metadata=AttributeType.COMPUTED) job_set.flush_dataset() returned_sic_data = job_set.get_attribute("sic") returned_id_data = job_set.get_attribute("job_id") self.assert_(ma.allequal(returned_id_data,self.job_id)) self.assert_(ma.allequal(returned_sic_data,self.expected_sic_data))
class TestDataset(opus_unittest.OpusTestCase): def setUp(self): self.start_year = 2001 self.expected_sic_data = array([6,4,7,808,6]) self.job_id = array([1,2,3,4,5]) self.base_cache_dir = tempfile.mkdtemp(prefix='opus_tmp') self.simulation_state = SimulationState(low_memory_run=True, new_instance=True, base_cache_dir=self.base_cache_dir) self.dir = self.simulation_state.get_cache_directory() self.simulation_state.set_current_time(self.start_year) if not os.path.exists(self.dir): os.makedirs(self.dir) def tearDown(self): # the logger has a file open in the cache directory (by default, disable that file logging) if logger._file_stream: logger.disable_file_logging() for root, dirs, files in os.walk(self.dir, topdown=False): for filename in files: os.remove(os.path.join(root, filename)) for directory in dirs: os.rmdir(os.path.join(root, directory)) os.rmdir(self.dir) os.rmdir(self.base_cache_dir) def test_dict_dataset(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='dataset', table_data={ "id":array([1,2,3,4]), "attr":array([4,7,2,1]) } ) ds = Dataset(in_storage=storage, in_table_name='dataset', id_name="id") self.assert_(ds.get_attribute("attr").sum()==14, "Something is wrong with the dataset.") self.assert_(ds.size()==4, "Wrong size of dataset.") def test_flt_dataset(self): import opus_core from opus_core.store.flt_storage import flt_storage attribute = 'little_endian' location = os.path.join(opus_core.__path__[0], 'data', 'flt') storage = flt_storage(storage_location=location) ds = Dataset(in_storage=storage, id_name=attribute, in_table_name='endians') self.assertAlmostEqual(11.0, ds.get_attribute_by_index(attribute, 0)) self.assertEqual(None, ds.get_attribute_header(attribute)) def test_join_by_rows(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='dataset1', table_data={ 'id':array([2,4,6,8]), 'attr':array([4,7,2,1]) } ) storage.write_table( table_name='dataset2', table_data={ 'id':array([1,5,9]), 'attr':array([55,66,100]) } ) ds1 = Dataset(in_storage=storage, in_table_name='dataset1', id_name='id') ds2 = Dataset(in_storage=storage, in_table_name='dataset2', id_name='id') ds1.join_by_rows(ds2) self.assert_(ma.allclose(ds1.get_attribute('attr'), array([4,7,2,1,55,66,100]))) self.assert_(ma.allclose(ds2.get_attribute('attr'), array([55,66,100]))) def test_join_by_rows_for_unique_ids(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='dataset1', table_data={ "id":array([2,4]), "attr":array([4,7]) } ) storage.write_table( table_name='dataset2', table_data={ "id":array([1,2]), "attr":array([55,66]) } ) ds1 = Dataset(in_storage=storage, in_table_name='dataset1', id_name='id') ds2 = Dataset(in_storage=storage, in_table_name='dataset2', id_name='id') threw_exception = False try: ds1.join_by_rows(ds2) except StandardError: threw_exception = True self.assert_(threw_exception) def test_join_by_rows_for_char_arrays(self): from numpy import alltrue storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='dataset1', table_data={ 'id':array([2,4,6,8]), 'attr':array(['4','7','2','1']) } ) storage.write_table( table_name='dataset2', table_data={ 'id':array([1,5,9]), 'attr':array(['55','66','100']) } ) ds1 = Dataset(in_storage=storage, in_table_name='dataset1', id_name='id') ds2 = Dataset(in_storage=storage, in_table_name='dataset2', id_name='id') ds1.join_by_rows(ds2) self.assert_(alltrue(ds1.get_attribute('attr') == array(['4','7','2','1','55','66','100']))) self.assert_(alltrue(ds2.get_attribute('attr') == array(['55','66','100']))) def test_variable_dependencies_tree_with_versioning(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='tests', table_data={ 'id':array([2,4]), 'a_dependent_variable':array([4,7]), 'a_dependent_variable2':array([10,1]) } ) ds = Dataset(in_storage=storage, in_table_name='tests', id_name='id', dataset_name='tests') ds.compute_variables(["opus_core.tests.a_test_variable_with_two_dependencies"]) self.assert_(ds.get_version("a_test_variable_with_two_dependencies")==0) #initially version=0 self.assert_(ds.get_version("a_dependent_variable")==0) self.assert_(ds.get_version("a_dependent_variable2")==0) ds.modify_attribute("a_dependent_variable", array([0,0])) self.assert_(ds.get_version("a_dependent_variable")==1) # version=1 ds.modify_attribute("a_dependent_variable", array([1,1])) self.assert_(ds.get_version("a_dependent_variable")==2) # version=2 ds.compute_variables(["opus_core.tests.a_test_variable_with_two_dependencies"]) self.assert_(ds.get_version("a_test_variable_with_two_dependencies")==1) ds.compute_variables(["opus_core.tests.a_test_variable_with_two_dependencies"]) self.assert_(ds.get_version("a_test_variable_with_two_dependencies")==1) # version does not change autogen_variable = "my_var = 3 * opus_core.tests.a_dependent_variable" ds.compute_variables([autogen_variable]) self.assert_(ds.get_version("my_var")==0) ds.compute_variables([autogen_variable]) self.assert_(ds.get_version("my_var")==0) def test_compute_variable_with_unknown_package(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='tests', table_data={ 'id':array([2,4]), 'attr1':array([4,7]), } ) ds = Dataset(in_storage=storage, in_table_name='tests', id_name='id', dataset_name='test') ds.compute_one_variable_with_unknown_package("attr1_times_2", package_order=["opus_core"]) def test_join_datasets_with_2_ids(self): from numpy import ma storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='data1', table_data={ 'id1':array([2,4,2]), 'id2':array([1,2,3]), 'attr1':array([4,7,1]), 'attr2':array([100,0,1000]), } ) storage.write_table( table_name='data2', table_data={ 'id1':array([4,2,2]), 'id2':array([2,3,1]), 'attr1':array([50,60,70]) } ) ds1 = Dataset(in_storage=storage, in_table_name='data1', id_name=['id1', 'id2'], dataset_name='data1') ds2 = Dataset(in_storage=storage, in_table_name='data2', id_name=['id1', 'id2'], dataset_name='data2') ds1.join(ds2, 'attr1') self.assertEqual(ma.allequal(ds1.get_attribute('attr1'), array([70,50,60])), True) self.assertEqual(ma.allequal(ds1.get_attribute('attr2'), array([100,0,1000])), True)
class ModelSystem(object): """ Uses the information in configuration to run/estimate a set of models for given set of years. """ def __init__(self): self.running = False self.forked_processes = [] self.running_conditional = threading.Condition() def run( self, resources, write_datasets_to_cache_at_end_of_year=True, log_file_name="run_model_system.log", cleanup_datasets=True, ): """Entries in resources: (entries with no defaults are required) models - a list containing names of models to be run. Each name must correspond to the name of the module/class of that model. Default(object): None years - a tuple (start year, end year) debuglevel - an integer. The higher the more output will be printed. Default: 0 expression_library - a dictionary. The keys in the dictionary are pairs (dataset_name, variable_name) and the values are the corresponding expressions. The model system needs to set the expression library (if it isn't None) in DatasetFactory for DatasetFactory to know about variables defined as expressions in the xml expression library. Default: None This method is called both to start up the simulation for all years, and also for each year when running with one process per year. In the latter case, 'years' consists of just (current_year, current_year) rather than the real start and end years for the simulation. """ if not isinstance(resources, Resources): raise TypeError, "Argument 'resources' must be of type 'Resources'." logger_settings = resources.get("log", {"tags": [], "verbosity_level": 3}) logger.set_tags(logger_settings.get("tags", [])) logger.set_verbosity_level(logger_settings.get("verbosity_level", 3)) self.simulation_state = SimulationState() self.simulation_state.set_low_memory_run(resources.get("low_memory_mode", False)) self.simulation_state.set_start_time(resources.get("base_year", 0)) self.run_year_namespace = {} if resources.get("cache_directory", None) is not None: self.simulation_state.set_cache_directory(resources["cache_directory"]) if "expression_library" in resources: VariableFactory().set_expression_library(resources["expression_library"]) if resources.get("sample_input", False): self.update_config_for_multiple_runs(resources) cache_directory = self.simulation_state.get_cache_directory() log_file = os.path.join(cache_directory, log_file_name) logger.enable_file_logging(log_file, verbose=False) try: logger.log_status("Cache Directory set to: " + cache_directory) with logger.block("Start simulation run"): models = resources.get("models", []) models_in_years = resources.get("models_in_year", {}) resources.check_obligatory_keys(["years"]) years = resources["years"] if (not isinstance(years, tuple)) and (not isinstance(years, list)): raise TypeError, "Entry 'years' in resources must be a tuple." if len(years) < 2: print years raise StandardError, "Entry 'years' in resources must be of length at least 2." start_year = years[0] end_year = years[-1] debuglevel = resources.get("debuglevel", 0) seed_values = resources.get("seed", NO_SEED) logger.log_status("random seed = %s" % str(seed_values)) seed(seed_values) for year in range(start_year, end_year + 1): with logger.block("Starting simulation for year " + str(year)): self.simulation_state.set_current_time(year) SessionConfiguration().get_dataset_pool().remove_all_datasets() logger.disable_file_logging(log_file) try: if models_in_years.get(year, None) is not None: models_to_run = models_in_years[year] else: models_to_run = models self._run_year( year=year, models=models_to_run, simulation_state=self.simulation_state, debuglevel=debuglevel, resources=resources, write_datasets_to_cache_at_end_of_year=write_datasets_to_cache_at_end_of_year, cleanup_datasets=cleanup_datasets, ) finally: logger.enable_file_logging(log_file, verbose=False) collect() finally: logger.disable_file_logging(log_file) def flush_datasets(self, dataset_names, after_model=False): dataset_pool = SessionConfiguration().get_dataset_pool() for dataset_name in dataset_names: if dataset_pool.has_dataset(dataset_name): self.flush_dataset(dataset_pool.get_dataset(dataset_name), after_model=after_model) def flush_dataset(self, dataset, after_model=False): """Write the PRIMARY attributes of this dataset to the cache.""" if dataset and isinstance(dataset, Dataset): # Do not flush after model if not necessary if after_model: if len(dataset.get_attribute_names()) <= len(dataset.get_id_name()): return if (len(dataset.get_attribute_names()) == len(dataset.get_known_attribute_names())) and ( len(dataset.get_attributes_in_memory()) <= len(dataset.get_id_name()) ): dataset.delete_computed_attributes() return dataset.delete_computed_attributes() dataset.load_and_flush_dataset() def flush_datasets_after_model(self, resources): if resources.get("flush_variables", False): AttributeCache().delete_computed_tables() # this will also delete computed attributes datasets_to_cache = SessionConfiguration().get_dataset_pool().datasets_in_pool().keys() else: datasets_to_cache = resources.get("datasets_to_cache_after_each_model", []) self.flush_datasets(datasets_to_cache, after_model=True) def _run_year( self, year, models, simulation_state, debuglevel, resources, write_datasets_to_cache_at_end_of_year, cleanup_datasets=True, ): """ Assumes that all datasets resides in the cache directory in binary format. """ try: import wingdbstub except: pass self.vardict = {} log_file_name = os.path.join(simulation_state.get_cache_directory(), "year_%s_log.txt" % year) logger.enable_file_logging(log_file_name, "w") try: logger.start_block("Simulate year %s" % year) try: base_year = resources["base_year"] if year == base_year: year_for_base_year_cache = year # case of estimation else: year_for_base_year_cache = year - 1 cache_storage = AttributeCache().get_flt_storage_for_year(year_for_base_year_cache) self.vardict["cache_storage"] = cache_storage base_cache_storage = AttributeCache().get_flt_storage_for_year(base_year) self.vardict["base_cache_storage"] = base_cache_storage simulation_state.set_flush_datasets(resources.get("flush_variables", False)) SessionConfiguration()["simulation_year"] = year SessionConfiguration()["debuglevel"] = debuglevel datasets_to_preload_in_year = resources.get("datasets_to_preload_in_year", {}) if datasets_to_preload_in_year.get(year, None) is not None: datasets_to_preload = datasets_to_preload_in_year[year] else: datasets_to_preload = resources.get("datasets_to_preload", {}) for dataset_name in datasets_to_preload: SessionConfiguration().get_dataset_from_pool(dataset_name) models_configuration = resources.get("models_configuration", {}) dataset_pool = SessionConfiguration().get_dataset_pool() datasets = {} for dataset_name, its_dataset in dataset_pool.datasets_in_pool().iteritems(): self.vardict[dataset_name] = its_dataset datasets[dataset_name] = its_dataset exec "%s=its_dataset" % dataset_name # This is needed. It resides in locals() # and is passed on to models as they run. ### TODO: There has got to be a better way! model_resources = Resources(datasets) n_models, model_group_members_to_run = self.get_number_of_models_and_model_group_members_to_run( models, models_configuration ) self.run_year_namespace = locals() # ========== # Run the models. # ========== model_number = -1 for model_entry in models: # list 'models' can be in the form: # [{'model_name_1': {'group_members': ['residential', 'commercial']}}, # {'model_name_2': {'group_members': [{'residential': ['estimate','run']}, # 'commercial']}}, # {'model_name_3': ['estimate', 'run']}, # 'model_name_4', # {'model_name_5': {'group_members': 'all'}} # ] # get list of methods to be processed evtl. for each group member if isinstance(model_entry, dict): model_name, value = model_entry.items()[0] if not isinstance(value, dict): # is a model group processes = value if not isinstance(processes, list): processes = [processes] else: # in the form 'model_name_4' in the comment above model_name = model_entry processes = ["run"] group_member = None model_group = model_group_members_to_run[model_name][1] last_member = max(1, len(model_group_members_to_run[model_name][0].keys())) for imember in range(last_member): controller_config = models_configuration[model_name]["controller"] model_configuration = models_configuration[model_name] if model_group_members_to_run[model_name][0].keys(): group_member_name = model_group_members_to_run[model_name][0].keys()[imember] group_member = ModelGroupMember(model_group, group_member_name) processes = model_group_members_to_run[model_name][0][group_member_name] member_model_name = "%s_%s" % (group_member_name, model_name) if member_model_name in models_configuration.keys(): model_configuration = models_configuration[member_model_name] if "controller" in model_configuration.keys(): controller_config = model_configuration["controller"] datasets_to_preload_for_this_model = controller_config.get( "_model_structure_dependencies_", {} ).get("dataset", []) for dataset_name in datasets_to_preload_for_this_model: try: if not dataset_pool.has_dataset(dataset_name) or (dataset_name not in datasets.keys()): ds = dataset_pool.get_dataset(dataset_name) self.vardict[dataset_name] = ds datasets[dataset_name] = ds exec "%s=ds" % dataset_name except: logger.log_warning("Failed to load dataset %s." % dataset_name) # import part if "import" in controller_config.keys(): import_config = controller_config["import"] for import_module in import_config.keys(): exec ("from %s import %s" % (import_module, import_config[import_module])) # gui_import_replacements part # This is a temporary hack -- replicates the functionality of the "import" section # for use with the GUI. The contents of this part of the config is a dictionary. # Keys are names of models (not used here). Values are 2 element pairs. # The first element is a name and the second is a value. Bind the name to the value. if "gui_import_replacements" in controller_config.keys(): import_replacement_config = controller_config["gui_import_replacements"] for model_name in import_replacement_config.keys(): pair = import_replacement_config[model_name] temp = pair[1] exec ("%s = temp") % pair[0] # init part model = self.do_init(locals()) # estimate and/or run part for process in processes: model_number = model_number + 1 # write status file model.set_model_system_status_parameters( year, n_models, model_number, resources.get("status_file_for_gui", None) ) model.write_status_for_gui() # prepare part exec (self.do_prepare(locals())) processmodel_config = controller_config[process] if "output" in processmodel_config.keys(): outputvar = processmodel_config["output"] else: outputvar = "process_output" self.vardict[outputvar] = self.do_process(locals()) exec outputvar + "=self.vardict[outputvar]" # check command file from gui, if the simulation should be stopped or paused self.do_commands_from_gui(resources.get("command_file_for_gui", None)) # capture namespace for interactive estimation self.run_year_namespace = locals() self.flush_datasets_after_model(resources) del model collect() # Write all datasets to cache. if write_datasets_to_cache_at_end_of_year: logger.start_block("Writing datasets to cache for year %s" % year) try: for dataset_name, its_dataset in ( SessionConfiguration().get_dataset_pool().datasets_in_pool().iteritems() ): self.flush_dataset(its_dataset) finally: logger.end_block() finally: logger.end_block() finally: logger.disable_file_logging(log_file_name) if cleanup_datasets: SessionConfiguration().delete_datasets() def do_init(self, parent_state): """Run the 'init' part of this model's configuration. Returns model object. """ # give this method the same local variables as its calling method has. for key in parent_state.keys(): if key <> "self": exec ('%s = parent_state["%s"]' % (key, key)) init_config = parent_state["controller_config"]["init"] group_member = parent_state["group_member"] if group_member is None: # No model group cmd = "%s(%s)" % (init_config["name"], self.construct_arguments_from_config(init_config)) model = eval(cmd) else: # Model belongs to a group model = eval( "%s(group_member, %s)" % (init_config["name"], self.construct_arguments_from_config(init_config)) ) return model def do_prepare(self, parent_state): """Prepares for the current model in the parent state's context. What to do is determined by the contents of the current model's controller configuration. controller_config is the 'controller' part of the model configuration. vardict is a dictionary into which the output of the model's 'prepare_output' method will be put. """ # give this method the same local variables as its calling method has. for key in parent_state.keys(): if key <> "self": exec ('%s = parent_state["%s"]' % (key, key)) key_name = "prepare_for_%s" % process if key_name in controller_config.keys(): prepare_config = controller_config[key_name] if "output" in prepare_config.keys(): outputvar = prepare_config["output"] else: outputvar = "prepare_output" self.vardict[outputvar] = eval( "model.%s(%s)" % (prepare_config["name"], self.construct_arguments_from_config(prepare_config)) ) return '%s=self.vardict["%s"]' % (outputvar, outputvar) else: # do nothing when return value is exec'ed return "" def do_process(self, parent_state): for key in parent_state.keys(): if key <> "self": exec ('%s = parent_state["%s"]' % (key, key)) ev = "model.%s(%s)" % (process, self.construct_arguments_from_config(processmodel_config)) return eval(ev) def get_number_of_models_and_model_group_members_to_run(self, models, models_configuration): """Count number_of models in the list 'models' that can include group members (each member and each process is one model).""" # list models can be in the form: # [{'model_name_1': {'group_members': ['residential', 'commercial']}}, # {'model_name_2': {'group_members': [{'residential': ['estimate','run']}, # 'commercial']}}, # {'model_name_3': ['estimate', 'run']}, # 'model_name_4', # {'model_name_5': {'group_members': 'all'}} # ] number_of_models = 1 model_group_members_to_run = {} for model_entry in models: if isinstance(model_entry, dict): model_name, value = model_entry.items()[0] if isinstance(value, dict): # is a model group if not value.keys()[0] == "group_members": raise KeyError, "Key for model " + model_name + " must be 'group_members'." group_members = value["group_members"] model_group = None if "group_by_attribute" in models_configuration[model_name]["controller"].keys(): group_dataset_name, group_attribute = models_configuration[model_name]["controller"][ "group_by_attribute" ] model_group = ModelGroup( SessionConfiguration().get_dataset_from_pool(group_dataset_name), group_attribute ) if not isinstance(group_members, list): group_members = [group_members] if group_members[0] == "_all_": # see 'model_name_5' example above if model_group is None: raise KeyError, "Entry 'group_by_attribute' is missing for model %s" % model_name group_members = model_group.get_member_names() model_group_members_to_run[model_name] = [{}, model_group] for member in group_members: if isinstance(member, dict): # see 'model_name_2' ('residential') in the comment above member_name = member.keys()[0] model_group_members_to_run[model_name][0][member_name] = member[member_name] if not isinstance(model_group_members_to_run[model_name][0][member_name], list): model_group_members_to_run[model_name][0][member_name] = [ model_group_members_to_run[model_name][0][member_name] ] number_of_models += len(model_group_members_to_run[model_name][0][member_name]) else: # see 'model_name_1' model_group_members_to_run[model_name][0][member] = ["run"] number_of_models += len(model_group_members_to_run[model_name][0][member]) else: # in the form 'model_name_3' in the comment above model_group_members_to_run[model_name] = [{}, None] if not isinstance(value, list): number_of_models += 1 else: number_of_models += len(value) else: # in the form 'model_name_4' in the comment above model_group_members_to_run[model_entry] = [{}, None] number_of_models += 1 return (number_of_models, model_group_members_to_run) def do_commands_from_gui(self, filename=None): if (filename is None) or not os.path.exists(filename): return while True: f = file(filename) line = f.read().strip() f.close() if line == "stop": logger.log_warning("Simulation stopped.") sys.exit() elif line == "resume": break elif line <> "pause": logger.log_warning("Unknown command '%s'. Allowed commands: 'stop', 'pause', 'resume'." % line) time.sleep(10) def run_multiprocess(self, resources): resources = Resources(resources) profiler_name = resources.get("profile_filename", None) if resources["cache_directory"] is not None: cache_directory = resources["cache_directory"] else: cache_directory = SimulationState().get_cache_directory() ### TODO: Get rid of this! There is absolutely no good reason to be ### changing the Configuration! resources["cache_directory"] = cache_directory log_file = os.path.join(cache_directory, "run_multiprocess.log") logger.enable_file_logging(log_file) start_year = resources["years"][0] end_year = resources["years"][-1] nyears = end_year - start_year + 1 root_seed = resources.get("seed", NO_SEED) if resources.get("_seed_dictionary_", None) is not None: # This is added by the RunManager to ensure reproducibility including restarted runs seed_dict = resources.get("_seed_dictionary_") seed_array = array(map(lambda year: seed_dict[year], range(start_year, end_year + 1))) else: seed(root_seed) seed_array = randint(1, 2 ** 30, nyears) logger.log_status("Running simulation for years %d thru %d" % (start_year, end_year)) logger.log_status("Simulation root seed: %s" % root_seed) for iyear, year in enumerate(range(start_year, end_year + 1)): success = self._run_each_year_as_separate_process( iyear, year, seed=seed_array[iyear], resources=resources, profiler_name=profiler_name, log_file=log_file ) if not success: break self._notify_stopped() if profiler_name is not None: # insert original value resources["profile_filename"] = profiler_name logger.log_status("Done running simulation for years %d thru %d" % (start_year, end_year)) # TODO: changing of configuration def _run_each_year_as_separate_process( self, iyear, year, seed=None, resources=None, profiler_name=None, log_file=None ): logger.start_block("Running simulation for year %d in new process" % year) resources["years"] = (year, year) resources["seed"] = (seed,) if profiler_name is not None: # add year to the profile name resources["profile_filename"] = "%s_%s" % (profiler_name, year) optional_args = [] if log_file: optional_args += ["--log-file-name", os.path.split(log_file)[-1]] success = False try: logger.disable_file_logging(log_file) success = self._fork_new_process( "opus_core.model_coordinators.model_system", resources, optional_args=optional_args ) logger.enable_file_logging(log_file, verbose=False) finally: logger.end_block() return success def run_in_one_process( self, resources, run_in_background=False, class_path="opus_core.model_coordinators.model_system" ): resources = Resources(resources) if resources["cache_directory"] is not None: cache_directory = resources["cache_directory"] else: cache_directory = SimulationState().get_cache_directory() ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. resources["cache_directory"] = cache_directory self._fork_new_process("%s" % class_path, resources, delete_temp_dir=False, run_in_background=run_in_background) self._notify_stopped() def run_in_same_process(self, resources, **kwargs): resources = Resources(resources) if resources["cache_directory"] is not None: cache_directory = resources["cache_directory"] else: cache_directory = SimulationState().get_cache_directory() ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. resources["cache_directory"] = cache_directory self._notify_started() RunModelSystem(model_system=self, resources=resources, **kwargs) self._notify_stopped() def construct_arguments_from_config(self, config): key = "arguments" if (key not in config.keys()) or (len(config[key].keys()) <= 0): return "" arg_dict = config[key] result = "" for arg_key in arg_dict.keys(): result += "%s=%s, " % (arg_key, arg_dict[arg_key]) return result def wait_for_start(self): self.running_conditional.acquire() while not self.running: self.running_conditional.wait() self.running_conditional.release() def wait_for_finish(self): self.running_conditional.acquire() while self.running: self.running_conditional.wait() self.running_conditional.release() def wait_for_process_or_finish(self, process_index): self.running_conditional.acquire() while process_index >= len(self.forked_processes) and self.running: self.running_conditional.wait() self.running_conditional.release() if not self.running: process_index = len(self.forked_processes) - 1 return process_index def _fork_new_process(self, module_name, resources, run_in_background=False, **key_args): self.running_conditional.acquire() self.running = True self.forked_processes.append(ForkProcess()) key_args["run_in_background"] = run_in_background success = self.forked_processes[-1].fork_new_process(module_name, resources, **key_args) self.running_conditional.notifyAll() self.running_conditional.release() if not run_in_background: self.forked_processes[-1].wait() self.forked_processes[-1].cleanup() return success def _notify_started(self): self.running_conditional.acquire() self.running = True self.running_conditional.notifyAll() self.running_conditional.release() def _notify_stopped(self): self.running_conditional.acquire() self.running = False self.running_conditional.notifyAll() self.running_conditional.release() def update_config_for_multiple_runs(self, config): models_to_update = config.get("models_with_sampled_coefficients", []) if "models_in_year" not in config.keys(): config["models_in_year"] = {} if config["models_in_year"].get(config["base_year"] + 1, None) is None: config["models_in_year"][config["base_year"] + 1] = config.get("models") for umodel in models_to_update: try: i = config["models_in_year"][config["base_year"] + 1].index(umodel) new_model_name = "%s_sampled_coef" % umodel config["models_in_year"][config["base_year"] + 1][i] = new_model_name except: pass config["models_configuration"][new_model_name] = Configuration(config["models_configuration"][umodel]) config["models_configuration"][new_model_name]["controller"]["prepare_for_run"]["arguments"][ "sample_coefficients" ] = True config["models_configuration"][new_model_name]["controller"]["prepare_for_run"]["arguments"][ "distribution" ] = "'normal'" config["models_configuration"][new_model_name]["controller"]["prepare_for_run"]["arguments"][ "cache_storage" ] = "base_cache_storage"
class ModelSystem(object): """ Uses the information in configuration to run/estimate a set of models for given set of years. """ def __init__(self): self.running = False self.forked_processes = [] self.running_conditional = threading.Condition() def run(self, resources, write_datasets_to_cache_at_end_of_year=True, log_file_name='run_model_system.log', cleanup_datasets=True): """Entries in resources: (entries with no defaults are required) models - a list containing names of models to be run. Each name must correspond to the name of the module/class of that model. Default(object): None years - a tuple (start year, end year) debuglevel - an integer. The higher the more output will be printed. Default: 0 expression_library - a dictionary. The keys in the dictionary are pairs (dataset_name, variable_name) and the values are the corresponding expressions. The model system needs to set the expression library (if it isn't None) in DatasetFactory for DatasetFactory to know about variables defined as expressions in the xml expression library. Default: None This method is called both to start up the simulation for all years, and also for each year when running with one process per year. In the latter case, 'years' consists of just (current_year, current_year) rather than the real start and end years for the simulation. """ if not isinstance(resources, Resources): raise TypeError, "Argument 'resources' must be of type 'Resources'." logger_settings = resources.get("log", { "tags": [], "verbosity_level": 3 }) logger.set_tags(logger_settings.get("tags", [])) logger.set_verbosity_level(logger_settings.get("verbosity_level", 3)) self.simulation_state = SimulationState() self.simulation_state.set_low_memory_run( resources.get("low_memory_mode", False)) self.simulation_state.set_start_time(resources.get("base_year", 0)) self.run_year_namespace = {} if resources.get('cache_directory', None) is not None: self.simulation_state.set_cache_directory( resources['cache_directory']) if 'expression_library' in resources: VariableFactory().set_expression_library( resources['expression_library']) if resources.get('sample_input', False): self.update_config_for_multiple_runs(resources) cache_directory = self.simulation_state.get_cache_directory() log_file = os.path.join(cache_directory, log_file_name) logger.enable_file_logging(log_file, verbose=False) try: logger.log_status("Cache Directory set to: " + cache_directory) with logger.block('Start simulation run'): models = resources.get("models", []) models_in_years = resources.get("models_in_year", {}) resources.check_obligatory_keys(["years"]) years = resources["years"] if (not isinstance(years, tuple)) and (not isinstance( years, list)): raise TypeError, "Entry 'years' in resources must be a tuple." if len(years) < 2: print years raise StandardError, "Entry 'years' in resources must be of length at least 2." start_year = years[0] end_year = years[-1] debuglevel = resources.get("debuglevel", 0) seed_values = resources.get('seed', NO_SEED) logger.log_status("random seed = %s" % str(seed_values)) seed(seed_values) for year in range(start_year, end_year + 1): with logger.block("Starting simulation for year " + str(year)): self.simulation_state.set_current_time(year) SessionConfiguration().get_dataset_pool( ).remove_all_datasets() logger.disable_file_logging(log_file) try: if models_in_years.get(year, None) is not None: models_to_run = models_in_years[year] else: models_to_run = models self._run_year( year=year, models=models_to_run, simulation_state=self.simulation_state, debuglevel=debuglevel, resources=resources, write_datasets_to_cache_at_end_of_year= write_datasets_to_cache_at_end_of_year, cleanup_datasets=cleanup_datasets) finally: logger.enable_file_logging(log_file, verbose=False) collect() finally: logger.disable_file_logging(log_file) def flush_datasets(self, dataset_names, after_model=False): dataset_pool = SessionConfiguration().get_dataset_pool() for dataset_name in dataset_names: if dataset_pool.has_dataset(dataset_name): self.flush_dataset(dataset_pool.get_dataset(dataset_name), after_model=after_model) def flush_dataset(self, dataset, after_model=False): """Write the PRIMARY attributes of this dataset to the cache.""" if dataset and isinstance(dataset, Dataset): # Do not flush after model if not necessary if after_model: if len(dataset.get_attribute_names()) <= len( dataset.get_id_name()): return if (len(dataset.get_attribute_names()) == len(dataset.get_known_attribute_names())) and \ (len(dataset.get_attributes_in_memory()) <= len(dataset.get_id_name())): dataset.delete_computed_attributes() return dataset.delete_computed_attributes() dataset.load_and_flush_dataset() def flush_datasets_after_model(self, resources): if resources.get('flush_variables', False): AttributeCache().delete_computed_tables() # this will also delete computed attributes datasets_to_cache = SessionConfiguration().get_dataset_pool( ).datasets_in_pool().keys() else: datasets_to_cache = resources.get( "datasets_to_cache_after_each_model", []) self.flush_datasets(datasets_to_cache, after_model=True) def _run_year(self, year, models, simulation_state, debuglevel, resources, write_datasets_to_cache_at_end_of_year, cleanup_datasets=True): """ Assumes that all datasets resides in the cache directory in binary format. """ try: import wingdbstub except: pass self.vardict = {} log_file_name = os.path.join(simulation_state.get_cache_directory(), "year_%s_log.txt" % year) logger.enable_file_logging(log_file_name, 'w') try: logger.start_block('Simulate year %s' % year) try: base_year = resources['base_year'] if year == base_year: year_for_base_year_cache = year # case of estimation else: year_for_base_year_cache = year - 1 cache_storage = AttributeCache().get_flt_storage_for_year( year_for_base_year_cache) self.vardict['cache_storage'] = cache_storage base_cache_storage = AttributeCache().get_flt_storage_for_year( base_year) self.vardict['base_cache_storage'] = base_cache_storage simulation_state.set_flush_datasets( resources.get("flush_variables", False)) SessionConfiguration()["simulation_year"] = year SessionConfiguration()["debuglevel"] = debuglevel datasets_to_preload_in_year = resources.get( 'datasets_to_preload_in_year', {}) if datasets_to_preload_in_year.get(year, None) is not None: datasets_to_preload = datasets_to_preload_in_year[year] else: datasets_to_preload = resources.get( 'datasets_to_preload', {}) for dataset_name in datasets_to_preload: SessionConfiguration().get_dataset_from_pool(dataset_name) models_configuration = resources.get('models_configuration', {}) dataset_pool = SessionConfiguration().get_dataset_pool() datasets = {} for dataset_name, its_dataset in dataset_pool.datasets_in_pool( ).iteritems(): self.vardict[dataset_name] = its_dataset datasets[dataset_name] = its_dataset exec '%s=its_dataset' % dataset_name # This is needed. It resides in locals() # and is passed on to models as they run. ### TODO: There has got to be a better way! model_resources = Resources(datasets) n_models, model_group_members_to_run = self.get_number_of_models_and_model_group_members_to_run( models, models_configuration) self.run_year_namespace = locals() #========== # Run the models. #========== model_number = -1 for model_entry in models: # list 'models' can be in the form: # [{'model_name_1': {'group_members': ['residential', 'commercial']}}, # {'model_name_2': {'group_members': [{'residential': ['estimate','run']}, # 'commercial']}}, # {'model_name_3': ['estimate', 'run']}, # 'model_name_4', # {'model_name_5': {'group_members': 'all'}} # ] # get list of methods to be processed evtl. for each group member if isinstance(model_entry, dict): model_name, value = model_entry.items()[0] if not isinstance(value, dict): # is a model group processes = value if not isinstance(processes, list): processes = [processes] else: # in the form 'model_name_4' in the comment above model_name = model_entry processes = ["run"] group_member = None model_group = model_group_members_to_run[model_name][1] last_member = max( 1, len(model_group_members_to_run[model_name][0].keys())) for imember in range(last_member): controller_config = models_configuration[model_name][ "controller"] model_configuration = models_configuration[model_name] if model_group_members_to_run[model_name][0].keys(): group_member_name = model_group_members_to_run[ model_name][0].keys()[imember] group_member = ModelGroupMember( model_group, group_member_name) processes = model_group_members_to_run[model_name][ 0][group_member_name] member_model_name = "%s_%s" % (group_member_name, model_name) if member_model_name in models_configuration.keys( ): model_configuration = models_configuration[ member_model_name] if "controller" in model_configuration.keys(): controller_config = model_configuration[ "controller"] datasets_to_preload_for_this_model = controller_config.get( '_model_structure_dependencies_', {}).get('dataset', []) for dataset_name in datasets_to_preload_for_this_model: try: if not dataset_pool.has_dataset( dataset_name) or ( dataset_name not in datasets.keys()): ds = dataset_pool.get_dataset(dataset_name) self.vardict[dataset_name] = ds datasets[dataset_name] = ds exec '%s=ds' % dataset_name except: logger.log_warning( 'Failed to load dataset %s.' % dataset_name) # import part if "import" in controller_config.keys(): import_config = controller_config["import"] for import_module in import_config.keys(): exec("from %s import %s" % (import_module, import_config[import_module])) # gui_import_replacements part # This is a temporary hack -- replicates the functionality of the "import" section # for use with the GUI. The contents of this part of the config is a dictionary. # Keys are names of models (not used here). Values are 2 element pairs. # The first element is a name and the second is a value. Bind the name to the value. if "gui_import_replacements" in controller_config.keys( ): import_replacement_config = controller_config[ "gui_import_replacements"] for model_name in import_replacement_config.keys(): pair = import_replacement_config[model_name] temp = pair[1] exec("%s = temp") % pair[0] # init part model = self.do_init(locals()) # estimate and/or run part for process in processes: model_number = model_number + 1 # write status file model.set_model_system_status_parameters( year, n_models, model_number, resources.get('status_file_for_gui', None)) model.write_status_for_gui() # prepare part exec(self.do_prepare(locals())) processmodel_config = controller_config[process] if "output" in processmodel_config.keys(): outputvar = processmodel_config["output"] else: outputvar = "process_output" self.vardict[outputvar] = self.do_process(locals()) exec outputvar + '=self.vardict[outputvar]' # check command file from gui, if the simulation should be stopped or paused self.do_commands_from_gui( resources.get('command_file_for_gui', None)) # capture namespace for interactive estimation self.run_year_namespace = locals() self.flush_datasets_after_model(resources) del model collect() # Write all datasets to cache. if write_datasets_to_cache_at_end_of_year: logger.start_block( 'Writing datasets to cache for year %s' % year) try: for dataset_name, its_dataset in SessionConfiguration( ).get_dataset_pool().datasets_in_pool().iteritems(): self.flush_dataset(its_dataset) finally: logger.end_block() finally: logger.end_block() finally: logger.disable_file_logging(log_file_name) if cleanup_datasets: SessionConfiguration().delete_datasets() def do_init(self, parent_state): """Run the 'init' part of this model's configuration. Returns model object. """ # give this method the same local variables as its calling method has. for key in parent_state.keys(): if key <> 'self': exec('%s = parent_state["%s"]' % (key, key)) init_config = parent_state['controller_config']["init"] group_member = parent_state['group_member'] if group_member is None: # No model group cmd = "%s(%s)" % ( init_config["name"], self.construct_arguments_from_config(init_config)) model = eval(cmd) else: # Model belongs to a group model = eval("%s(group_member, %s)" % (init_config["name"], self.construct_arguments_from_config(init_config))) return model def do_prepare(self, parent_state): """Prepares for the current model in the parent state's context. What to do is determined by the contents of the current model's controller configuration. controller_config is the 'controller' part of the model configuration. vardict is a dictionary into which the output of the model's 'prepare_output' method will be put. """ # give this method the same local variables as its calling method has. for key in parent_state.keys(): if key <> 'self': exec('%s = parent_state["%s"]' % (key, key)) key_name = "prepare_for_%s" % process if key_name in controller_config.keys(): prepare_config = controller_config[key_name] if "output" in prepare_config.keys(): outputvar = prepare_config["output"] else: outputvar = "prepare_output" self.vardict[outputvar] = eval( "model.%s(%s)" % (prepare_config["name"], self.construct_arguments_from_config(prepare_config))) return '%s=self.vardict["%s"]' % (outputvar, outputvar) else: # do nothing when return value is exec'ed return '' def do_process(self, parent_state): for key in parent_state.keys(): if key <> 'self': exec('%s = parent_state["%s"]' % (key, key)) ev = "model.%s(%s)" % ( process, self.construct_arguments_from_config(processmodel_config)) return eval(ev) def get_number_of_models_and_model_group_members_to_run( self, models, models_configuration): """Count number_of models in the list 'models' that can include group members (each member and each process is one model).""" # list models can be in the form: # [{'model_name_1': {'group_members': ['residential', 'commercial']}}, # {'model_name_2': {'group_members': [{'residential': ['estimate','run']}, # 'commercial']}}, # {'model_name_3': ['estimate', 'run']}, # 'model_name_4', # {'model_name_5': {'group_members': 'all'}} # ] number_of_models = 1 model_group_members_to_run = {} for model_entry in models: if isinstance(model_entry, dict): model_name, value = model_entry.items()[0] if isinstance(value, dict): # is a model group if not value.keys()[0] == "group_members": raise KeyError, "Key for model " + model_name + " must be 'group_members'." group_members = value["group_members"] model_group = None if 'group_by_attribute' in models_configuration[ model_name]["controller"].keys(): group_dataset_name, group_attribute = models_configuration[ model_name]["controller"]['group_by_attribute'] model_group = ModelGroup( SessionConfiguration().get_dataset_from_pool( group_dataset_name), group_attribute) if not isinstance(group_members, list): group_members = [group_members] if group_members[ 0] == "_all_": # see 'model_name_5' example above if model_group is None: raise KeyError, "Entry 'group_by_attribute' is missing for model %s" % model_name group_members = model_group.get_member_names() model_group_members_to_run[model_name] = [{}, model_group] for member in group_members: if isinstance(member, dict): # see 'model_name_2' ('residential') in the comment above member_name = member.keys()[0] model_group_members_to_run[model_name][0][ member_name] = member[member_name] if not isinstance( model_group_members_to_run[model_name][0] [member_name], list): model_group_members_to_run[model_name][0][ member_name] = [ model_group_members_to_run[model_name] [0][member_name] ] number_of_models += len( model_group_members_to_run[model_name][0] [member_name]) else: # see 'model_name_1' model_group_members_to_run[model_name][0][ member] = ["run"] number_of_models += len( model_group_members_to_run[model_name][0] [member]) else: # in the form 'model_name_3' in the comment above model_group_members_to_run[model_name] = [{}, None] if not isinstance(value, list): number_of_models += 1 else: number_of_models += len(value) else: # in the form 'model_name_4' in the comment above model_group_members_to_run[model_entry] = [{}, None] number_of_models += 1 return (number_of_models, model_group_members_to_run) def do_commands_from_gui(self, filename=None): if (filename is None) or not os.path.exists(filename): return while True: f = file(filename) line = f.read().strip() f.close() if line == 'stop': logger.log_warning('Simulation stopped.') sys.exit() elif line == 'resume': break elif line <> 'pause': logger.log_warning( "Unknown command '%s'. Allowed commands: 'stop', 'pause', 'resume'." % line) time.sleep(10) def run_multiprocess(self, resources): resources = Resources(resources) profiler_name = resources.get("profile_filename", None) if resources['cache_directory'] is not None: cache_directory = resources['cache_directory'] else: cache_directory = SimulationState().get_cache_directory() ### TODO: Get rid of this! There is absolutely no good reason to be ### changing the Configuration! resources['cache_directory'] = cache_directory log_file = os.path.join(cache_directory, 'run_multiprocess.log') logger.enable_file_logging(log_file) start_year = resources["years"][0] end_year = resources["years"][-1] nyears = end_year - start_year + 1 root_seed = resources.get("seed", NO_SEED) if resources.get('_seed_dictionary_', None) is not None: # This is added by the RunManager to ensure reproducibility including restarted runs seed_dict = resources.get('_seed_dictionary_') seed_array = array( map(lambda year: seed_dict[year], range(start_year, end_year + 1))) else: seed(root_seed) seed_array = randint(1, 2**30, nyears) logger.log_status("Running simulation for years %d thru %d" % (start_year, end_year)) logger.log_status("Simulation root seed: %s" % root_seed) for iyear, year in enumerate(range(start_year, end_year + 1)): success = self._run_each_year_as_separate_process( iyear, year, seed=seed_array[iyear], resources=resources, profiler_name=profiler_name, log_file=log_file) if not success: break self._notify_stopped() if profiler_name is not None: # insert original value resources["profile_filename"] = profiler_name logger.log_status("Done running simulation for years %d thru %d" % (start_year, end_year)) #TODO: changing of configuration def _run_each_year_as_separate_process(self, iyear, year, seed=None, resources=None, profiler_name=None, log_file=None): logger.start_block('Running simulation for year %d in new process' % year) resources['years'] = (year, year) resources['seed'] = seed, if profiler_name is not None: # add year to the profile name resources["profile_filename"] = "%s_%s" % (profiler_name, year) optional_args = [] if log_file: optional_args += ['--log-file-name', os.path.split(log_file)[-1]] success = False try: logger.disable_file_logging(log_file) success = self._fork_new_process( 'opus_core.model_coordinators.model_system', resources, optional_args=optional_args) logger.enable_file_logging(log_file, verbose=False) finally: logger.end_block() return success def run_in_one_process( self, resources, run_in_background=False, class_path='opus_core.model_coordinators.model_system'): resources = Resources(resources) if resources['cache_directory'] is not None: cache_directory = resources['cache_directory'] else: cache_directory = SimulationState().get_cache_directory() ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. resources['cache_directory'] = cache_directory self._fork_new_process('%s' % class_path, resources, delete_temp_dir=False, run_in_background=run_in_background) self._notify_stopped() def run_in_same_process(self, resources, **kwargs): resources = Resources(resources) if resources['cache_directory'] is not None: cache_directory = resources['cache_directory'] else: cache_directory = SimulationState().get_cache_directory() ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. resources['cache_directory'] = cache_directory self._notify_started() RunModelSystem(model_system=self, resources=resources, **kwargs) self._notify_stopped() def construct_arguments_from_config(self, config): key = "arguments" if (key not in config.keys()) or (len(config[key].keys()) <= 0): return "" arg_dict = config[key] result = "" for arg_key in arg_dict.keys(): result += "%s=%s, " % (arg_key, arg_dict[arg_key]) return result def wait_for_start(self): self.running_conditional.acquire() while not self.running: self.running_conditional.wait() self.running_conditional.release() def wait_for_finish(self): self.running_conditional.acquire() while self.running: self.running_conditional.wait() self.running_conditional.release() def wait_for_process_or_finish(self, process_index): self.running_conditional.acquire() while process_index >= len(self.forked_processes) and self.running: self.running_conditional.wait() self.running_conditional.release() if not self.running: process_index = len(self.forked_processes) - 1 return process_index def _fork_new_process(self, module_name, resources, run_in_background=False, **key_args): self.running_conditional.acquire() self.running = True self.forked_processes.append(ForkProcess()) key_args["run_in_background"] = run_in_background success = self.forked_processes[-1].fork_new_process( module_name, resources, **key_args) self.running_conditional.notifyAll() self.running_conditional.release() if not run_in_background: self.forked_processes[-1].wait() self.forked_processes[-1].cleanup() return success def _notify_started(self): self.running_conditional.acquire() self.running = True self.running_conditional.notifyAll() self.running_conditional.release() def _notify_stopped(self): self.running_conditional.acquire() self.running = False self.running_conditional.notifyAll() self.running_conditional.release() def update_config_for_multiple_runs(self, config): models_to_update = config.get('models_with_sampled_coefficients', []) if 'models_in_year' not in config.keys(): config['models_in_year'] = {} if config['models_in_year'].get(config['base_year'] + 1, None) is None: config['models_in_year'][config['base_year'] + 1] = config.get('models') for umodel in models_to_update: try: i = config['models_in_year'][config['base_year'] + 1].index(umodel) new_model_name = '%s_sampled_coef' % umodel config['models_in_year'][config['base_year'] + 1][i] = new_model_name except: pass config["models_configuration"][new_model_name] = Configuration( config["models_configuration"][umodel]) config["models_configuration"][new_model_name]["controller"][ "prepare_for_run"]["arguments"]["sample_coefficients"] = True config["models_configuration"][new_model_name]["controller"][ "prepare_for_run"]["arguments"]["distribution"] = "'normal'" config["models_configuration"][new_model_name]["controller"][ "prepare_for_run"]["arguments"][ "cache_storage"] = "base_cache_storage"
class ModelExplorer(object): def __init__(self, model, year, scenario_name=None, model_group=None, configuration=None, xml_configuration=None, cache_directory=None): self.model_group = model_group self.explored_model = model if configuration is None: if xml_configuration is None: raise StandardError, "Either dictionary based or XML based configuration must be given." config = xml_configuration.get_run_configuration(scenario_name) else: config = Configuration(configuration) self.scenario_models = config['models'] if config.get('models_in_year', None) is not None and config['models_in_year'].get(year, None) is not None: del config['models_in_year'][year] if model is not None: dependent_models = config['models_configuration'][model]['controller'].get('dependencies', []) config['models'] = dependent_models if model_group is None: config['models'] = config['models'] + [{model: ["run"]}] else: config['models'] = config['models'] + [{model: {"group_members": [{model_group: ["run"]}]}}] else: config['models'] = [] config['years'] = [year, year] config["datasets_to_cache_after_each_model"]=[] config['flush_variables'] = False self.config = Resources(config) self.xml_configuration = xml_configuration if cache_directory is None: cache_directory = config['creating_baseyear_cache_configuration'].baseyear_cache.existing_cache_to_copy self.simulation_state = SimulationState(new_instance=True, base_cache_dir=cache_directory, start_time=config.get('base_year', 0)) self.config['cache_directory'] = cache_directory SessionConfiguration(new_instance=True, package_order=self.config['dataset_pool_configuration'].package_order, in_storage=AttributeCache()) def run(self): self.model_system = ModelSystem() self.model_system.run(self.config, write_datasets_to_cache_at_end_of_year=False, cleanup_datasets=False) logger.log_status("Data cache in %s" % self.simulation_state.get_cache_directory()) def get_agents_for_simulation(self): return self.get_active_agent_set() def get_model_name(self): return (self.explored_model, self.model_group) def get_specification(self): return self.get_model().get_specified_coefficients().specification def get_probabilities(self, submodel=-2): """Return a tuple of probabilities and choices, see ChoiceModel.get_probabilities_and_choices. Works only for the ChoiceModel class. """ model = self.get_model() #if isinstance(model, ChoiceModel): return model.get_probabilities_and_choices(submodel) #print '\nMethod is implemented only for ChoiceModels.\n' def export_probabilities(self, submodel=-2, filename='./choice_model.txt'): """Export probabilities and choices into a file. Works only for the ChoiceModel class""" model = self.get_model() #if isinstance(model, ChoiceModel): model.export_probabilities(submodel, file_name=filename) #else: # print '\nMethod is implemented only for ChoiceModels.\n' def get_model(self): """Return a model object.""" return self.model_system.run_year_namespace["model"] def get_dataset(self, dataset_name): """Return a Dataset object of the given name.""" ds = self.model_system.run_year_namespace.get(dataset_name, None) if ds is None: if dataset_name not in self.model_system.run_year_namespace["datasets"].keys(): ds = self.get_dataset_pool().get_dataset(dataset_name) else: ds = self.model_system.run_year_namespace["datasets"][dataset_name] return ds def get_data(self, coefficient, submodel=-2): """Calls method get_data of the Model object. Should return a data array for the given coefficient and submodel. Can be used only on in models that are estimable.""" return self.get_model().get_data(coefficient, submodel) def get_coefficient_names(self, submodel=-2): """Calls method get_coefficient_names of the Model object which should return coefficient names for the given submodel. Can be used only on in models that are estimable.""" return self.get_model().get_coefficient_names(submodel) def get_coefficients(self, submodel=-2): """Return an object of class SpecifiedCoefficientsFor1Submodel giving the model coefficients. Can be used only on in models that are estimable.""" return SpecifiedCoefficientsFor1Submodel(self.get_model().get_specified_coefficients(), submodel) def get_data_as_dataset(self, submodel=-2, **kwargs): """Calls method get_data_as_dataset of the Model object which should return an object of class Dataset containing model data. Works only for ChoiceModel (returns InteractionDataset), and for RegressionModel (returns Dataset). """ return self.get_model().get_data_as_dataset(submodel, **kwargs) def get_choice_set(self): """Return a Dataset of choices. Works only for the ChoiceModel class. """ return self.get_model().model_interaction.interaction_dataset.get_dataset(2) def get_choice_set_index(self): """Return an array of indices of choices. Works only for the ChoiceModel class. """ return self.get_model().model_interaction.interaction_dataset.get_index(2) def get_choice_set_index_for_submodel(self, submodel): """Return an array of indices of choices for the given submodel. Works only for the ChoiceModel class. """ index = self.get_choice_set_index() return take (index, indices=self.get_model().observations_mapping[submodel], axis=0) def get_active_choice_set(self, submodel=None): """Return choice set as seen by agents in the model. Works only for the ChoiceModel class. """ if submodel is None: choices = self.get_choice_set_index() else: choices = self.get_choice_set_index_for_submodel(submodel) choices = unique(choices.flatten()) ds = self.get_choice_set() return DatasetSubset(ds, choices) def get_agent_set(self): """Return a Dataset of all agents. Works only for the ChoiceModel class. """ return self.get_model().model_interaction.interaction_dataset.get_dataset(1) def get_agent_set_index(self): """Return an array of indices of agents that are the choosers. Works only for the ChoiceModel class. """ return self.get_model().model_interaction.interaction_dataset.get_index(1) def get_agent_set_index_for_submodel(self, submodel): """Return an array of indices of agents for the given submodel that are the choosers. Works only for the ChoiceModel class. """ model = self.get_model() return model.model_interaction.interaction_dataset.get_index(1)[model.observations_mapping[submodel]] def get_active_agent_set(self, submodel=None): """Return agent set that make choices in the model. Works only for the ChoiceModel class. """ agents = self.get_agent_set() if submodel is None: index = self.get_agent_set_index() else: index = self.get_agent_set_index_for_submodel(submodel) return DatasetSubset(agents, index) def agent_summary(self, submodel=None): ds = self.get_active_agent_set(submodel=submodel) ds.summary() def choice_summary(self, submodel=None): ds = self.get_active_choice_set(submodel=submodel) ds.summary() def data_summary(self, **kwargs): ds = self.get_data_as_dataset(**kwargs) ds.summary() def _get_before_after_dataset_from_attribute(self, var_name, storage, **kwargs): dataset_name = var_name.get_dataset_name() ds = self.get_dataset(dataset_name) ds.compute_variables([var_name], dataset_pool=self.get_dataset_pool()) ds.copy_attribute_by_reload(var_name, storage=storage, **kwargs) return ds def get_before_after_attribute(self, attribute_name): """Return a dictionary with elements 'before' (contains an array of the given attribute that is reloaded from the cache) and 'after' (contains an array of the given attribute with the current values). """ from opus_core.store.attribute_cache import AttributeCache var_name = VariableName(attribute_name) storage = AttributeCache(self.simulation_state.get_cache_directory()) ds = self._get_before_after_dataset_from_attribute(var_name, storage=storage, package_order=self.get_dataset_pool().get_package_order()) return {'after': ds[var_name.get_alias()], 'before': ds.get_attribute('%s_reload__' % var_name.get_alias())} def summary_before_after(self, attribute_name): """Print summary of the given attribute 'before' (values reloaded from the cache) and 'after' (current values). """ from opus_core.store.attribute_cache import AttributeCache var_name = VariableName(attribute_name) storage = AttributeCache(self.simulation_state.get_cache_directory()) ds = self._get_before_after_dataset_from_attribute(var_name, storage=storage, package_order=self.get_dataset_pool().get_package_order()) print '' print 'Before model run:' print '=================' ds.summary(names=['%s_reload__' % var_name.get_alias()]) print '' print 'After model run:' print '=================' #ds.summary(names=[var_name.get_alias()]) ds.summary(names=[var_name.get_alias()]) def model_dependencies(self, model=None, group=None): """Prints out all dependencies for the model.""" from opus_core.variables.dependency_query import DependencyChart if model is None: # current model model, group = self.get_model_name() spec = self.get_specification() else: spec = None if model == 'all': # print dependencies for all models for thismodel in self.scenario_models: thisgroups = None if isinstance(thismodel, dict): thisgroups = thismodel[thismodel.keys()[0]].get('group_members', None) thismodel = thismodel.keys()[0] if not isinstance(thisgroups, list): thisgroups = [thisgroups] for group in thisgroups: chart = DependencyChart(self.xml_configuration, model=thismodel, model_group=group) chart.print_model_dependencies() else: chart = DependencyChart(self.xml_configuration, model=model, model_group=group, specification=spec) chart.print_model_dependencies() def variable_dependencies(self, name): """Prints out dependencies of this variable. 'name' can be either an alias from the model specification or an expression.""" from opus_core.variables.dependency_query import DependencyChart varname = None allvars = self.get_specification().get_variable_names() for ivar in range(len(allvars)): thisvar = allvars[ivar] if not isinstance(thisvar, VariableName): thisvar = VariableName(thisvar) if name == thisvar.get_alias(): varname = thisvar break if varname is None: varname = VariableName(name) chart = DependencyChart(self.xml_configuration) chart.print_dependencies(varname.get_expression()) def compute_expression(self, attribute_name): """Compute any expression and return its values.""" var_name = VariableName(attribute_name) dataset_name = var_name.get_dataset_name() ds = self.get_dataset(dataset_name) return ds.compute_variables([var_name], dataset_pool=self.get_dataset_pool()) def get_dataset_pool(self): return self.model_system.run_year_namespace["dataset_pool"] def plot_histogram_before_after(self, attribute_name, bins=None): """Plot histograms of values returned by the method get_before_after_attribute.""" from opus_core.plot_functions import create_histogram, show_plots from matplotlib.pylab import figure values = self.get_before_after_attribute(attribute_name) alias = VariableName(attribute_name).get_alias() fig = figure() fig.add_subplot(121) create_histogram(values['before'], main='%s (before)' % alias, bins=bins) fig.add_subplot(122) create_histogram(values['after'], main='%s (after)' % alias, bins=bins) show_plots() def get_correlation(self, submodel=-2): """Return an array of correlations between all variables of the model data (for given submodel). Works only for ChoiceModel and RegressionModel""" ds = self.get_data_as_dataset(submodel) attrs = [attr for attr in ds.get_known_attribute_names() if attr not in ds.get_id_name()] return ds.correlation_matrix(attrs) def plot_correlation(self, submodel=-2, useR=False, **kwargs): """Plot correlations between all variables of the model data (for given submodel). Works only for ChoiceModel and RegressionModel""" ds = self.get_data_as_dataset(submodel) attrs = [attr for attr in ds.get_known_attribute_names() if attr not in ds.get_id_name()] ds.correlation_image(attrs, useR=useR, **kwargs) def plot_choice_set(self, agents_index=None, aggregate_to=None, matplotlib=True, **kwargs): """Plot map of the sampled choice set. agents_index can be given to restrict the set of agents to which the choice set belongs to. aggregate_to is a name of a dataset which the choice set should be aggregated to. If matplotlib is False, mapnik is used (and required). Additional arguments are passed to plot_map or plot_map_matplotlib. E.g. (choice set are buildings, aggregated to zones, for the first agent) er.plot_choice_set(aggregate_to='zone', matplotlib=False, project_name='psrc_parcel', file='choice_set0.png', agents_index=0) """ choice_set = self.get_choice_set() if agents_index is None: flatten_choice_index = self.get_choice_set_index().ravel() else: flatten_choice_index = self.get_choice_set_index()[agents_index,:].ravel() if aggregate_to is not None: ds_aggr = self.get_dataset(aggregate_to) result = ds_aggr.sum_over_ids(choice_set[ds_aggr.get_id_name()[0]][flatten_choice_index], ones(flatten_choice_index.size)) ds = ds_aggr else: result = choice_set.sum_over_ids(choice_set.get_id_attribute()[flatten_choice_index], ones(flatten_choice_index.size)) ds = choice_set dummy_attribute_name = '__sampled_choice_set__' ds.add_attribute(name=dummy_attribute_name, data=result) if matplotlib: coord_syst = None if ds.get_coordinate_system() is None and hasattr(ds, 'compute_coordinate_system'): coord_syst = ds.compute_coordinate_system(dataset_pool=self.get_dataset_pool()) ds.plot_map_matplotlib(dummy_attribute_name, background=-1, coordinate_system=coord_syst, **kwargs) else: ds.plot_map(dummy_attribute_name, background=-1, **kwargs) ds.delete_one_attribute(dummy_attribute_name) def plot_choice_set_attribute(self, name, agents_index=None, aggregate_to=None, function='sum', matplotlib=True, **kwargs): """Plot map of the given attribute for the sampled choice set. agents_index can be given to restrict the set of agents to which the choice set belongs to. aggregate_to is a name of a dataset which the choice set should be aggregated to. function defines the aggregating function (e.g. sum, mean, median, etc.) If matplotlib is False, mapnik is used (and required). Additional arguments are passed to plot_map or plot_map_matplotlib. E.g. er.plot_choice_set_attribute('residential_units', aggregate_to='zone', matplotlib=False, project_name='psrc_parcel', file='choice_resunits.png') """ choice_set = self.get_choice_set() if agents_index is None: flatten_choice_index = self.get_choice_set_index().ravel() else: flatten_choice_index = self.get_choice_set_index()[agents_index,:].ravel() filter_var = ones(choice_set.size(), dtype='int16') filter_var[unique(flatten_choice_index)] = 0 filter_idx = where(filter_var)[0] if aggregate_to is not None: ds_aggr = self.get_dataset(aggregate_to) result = ds_aggr.aggregate_over_ids(choice_set[ds_aggr.get_id_name()[0]][flatten_choice_index], what=choice_set[name][flatten_choice_index], function=function) filter = ds_aggr.sum_over_ids(choice_set[ds_aggr.get_id_name()[0]][filter_idx], ones(filter_idx.size)) filter = filter > 0 ds = ds_aggr else: result = choice_set.aggregate_over_ids(choice_set.get_id_attribute()[flatten_choice_index], what=choice_set[name][flatten_choice_index], function=function) filter = filter_var ds = choice_set dummy_attribute_name = '__sampled_choice_set_attribute__' ds.add_attribute(name=dummy_attribute_name, data=result) dummy_filter_name = '__sampled_choice_set_filter__' ds.add_attribute(name=dummy_filter_name, data=filter) if matplotlib: coord_syst = None if ds.get_coordinate_system() is None and hasattr(ds, 'compute_coordinate_system'): coord_syst = ds.compute_coordinate_system(dataset_pool=self.get_dataset_pool()) ds.plot_map_matplotlib(dummy_attribute_name, filter=dummy_filter_name, coordinate_system=coord_syst, **kwargs) else: ds.plot_map(dummy_attribute_name, filter=dummy_filter_name, **kwargs) ds.delete_one_attribute(dummy_attribute_name) ds.delete_one_attribute(dummy_filter_name) def plot_coefficients(self, submodel=-2, exclude_constant=True, eqidx=0, plot=True, store_values_to_file=None): """ Plot a barchart of coefficient values. This can be used in a regression model, when coefficients are standardized (i.e. using the estimation module opus_core.estimate_linear_regression_standardized). store_values_to_file can be a file name where the values are stored. """ coef = self.get_coefficients(submodel) values = coef.get_coefficient_values() names = coef.get_coefficient_names() sd = coef.get_standard_errors() idx=ones(names.shape[1], dtype="bool") if exclude_constant: pos = coef.get_constants_positions() if pos.size > 0: idx[pos]=0 if store_values_to_file is not None: n = idx.sum() result = concatenate((reshape(names[eqidx, idx], (n,1)), reshape(values[eqidx, idx], (n,1)), reshape(sd[eqidx, idx], (n,1))), axis=1) write_to_text_file(store_values_to_file, array(['coefficient_name', 'estimate', 'standard_error']), delimiter='\t') write_table_to_text_file(store_values_to_file, result, delimiter='\t', mode='a') if plot: plot_barchart(values[eqidx, idx], labels = names[eqidx, idx], errors=sd[eqidx, idx]) else: return {'names': names[eqidx, idx], 'values': values[eqidx, idx], 'errors': sd[eqidx, idx]} def create_latex_tables(self, directory, other_info_keys=None): from opus_core.latex_table_creator import LatexTableCreator LTC = LatexTableCreator() LTC.create_latex_table_for_coefficients_for_model( self.get_model().get_specified_coefficients().coefficients, self.explored_model, directory, other_info_keys=other_info_keys) LTC.create_latex_table_for_specifications_for_model( self.get_model().get_specified_coefficients().specification, self.explored_model, directory)
class ModelExplorer(GenericModelExplorer): def __init__(self, model, year, scenario_name=None, model_group=None, configuration=None, xml_configuration=None, cache_directory=None): self.model_group = model_group self.explored_model = model if configuration is None: if xml_configuration is None: raise StandardError, "Either dictionary based or XML based configuration must be given." config = xml_configuration.get_run_configuration(scenario_name) else: config = Configuration(configuration) if model is not None: dependent_models = config['models_configuration'][model][ 'controller'].get('dependencies', []) config['models'] = dependent_models if model_group is None: config['models'] = config['models'] + [{model: ["run"]}] else: config['models'] = config['models'] + [{ model: { "group_members": [{ model_group: ["run"] }] } }] else: config['models'] = [] config['years'] = [year, year] config["datasets_to_cache_after_each_model"] = [] config['flush_variables'] = False self.config = Resources(config) self.xml_configuration = xml_configuration if cache_directory is None: cache_directory = config[ 'creating_baseyear_cache_configuration'].baseyear_cache.existing_cache_to_copy self.simulation_state = SimulationState(new_instance=True, base_cache_dir=cache_directory) self.config['cache_directory'] = cache_directory SessionConfiguration( new_instance=True, package_order=self.config['dataset_pool_configuration']. package_order, in_storage=AttributeCache()) def run(self): self.model_system = ModelSystem() self.model_system.run(self.config, write_datasets_to_cache_at_end_of_year=False) logger.log_status("Data cache in %s" % self.simulation_state.get_cache_directory()) def get_agents_for_simulation(self): return self.get_active_agent_set() def get_model_name(self): return (self.explored_model, self.model_group) def get_specification(self): return self.get_model().get_specified_coefficients().specification def get_probabilities(self, submodel=-2): """Return a tuple of probabilities and choices, see ChoiceModel.get_probabilities_and_choices. Works only for the ChoiceModel class. """ model = self.get_model() if isinstance(model, ChoiceModel): return model.get_probabilities_and_choices(submodel) print '\nMethod is implemented only for ChoiceModels.\n' def export_probabilities(self, submodel=-2, filename='./choice_model.txt'): """Export probabilities and choices into a file. Works only for the ChoiceModel class""" model = self.get_model() if isinstance(model, ChoiceModel): model.export_probabilities(submodel, file_name=filename) else: print '\nMethod is implemented only for ChoiceModels.\n'
class TestLagVariables(opus_unittest.OpusTestCase): def setUp(self): self.config = TestCacheConfiguration() self.simulation_state = SimulationState(new_instance=True) SessionConfiguration(self.config, new_instance=True, package_order=['urbansim', 'opus_core'], in_storage=AttributeCache()) self.base_year = self.config['base_year'] creating_baseyear_cache_configuration = self.config['creating_baseyear_cache_configuration'] self.simulation_state.set_current_time(self.base_year) cache_directory = self.simulation_state.get_cache_directory() copytree(os.path.join(creating_baseyear_cache_configuration.baseyear_cache.existing_cache_to_copy, str(self.base_year)), os.path.join(cache_directory, str(self.base_year))) cacher = CacheScenarioDatabase() cacher.prepare_data_before_baseyear(cache_directory, self.base_year, creating_baseyear_cache_configuration) self.config['cache_directory'] = cache_directory cache_storage = AttributeCache().get_flt_storage_for_year(self.base_year) cache_directory = self.simulation_state.get_cache_directory() flt_directory = os.path.join(cache_directory, str(self.base_year)) self.gridcell = DatasetFactory().get_dataset('gridcell', package='urbansim', subdir='datasets', arguments={'in_storage':StorageFactory().get_storage('flt_storage', storage_location=flt_directory)} ) def tearDown(self): self.simulation_state.remove_singleton(delete_cache=True) def test_lag_variables(self): """Test lag variables""" # A weak test that computing a lag variable on a realistic dataset does not crash. self.gridcell.compute_variables('urbansim.gridcell.n_recent_transitions_to_developed', resources=self.config) # The following tests are fragile, since they need to know exactly what values are being # subtracted, and ignore any negative amount that is truncated at zero. # If you change the "subset" dataset to a different region, you will # have to update the expected value. self.gridcell.compute_variables('urbansim.gridcell.commercial_sqft', resources=self.config) self.gridcell.compute_variables('urbansim.gridcell.commercial_sqft_lag1', resources=self.config) self.gridcell.compute_variables('urbansim.gridcell.commercial_sqft_lag2', resources=self.config) sqft = self.gridcell.get_attribute('commercial_sqft').sum() sqft_lag1 = self.gridcell.get_attribute('commercial_sqft_lag1').sum() sqft_lag2 = self.gridcell.get_attribute('commercial_sqft_lag2').sum() logger.log_status('sqft = %s' % sqft) logger.log_status('sqft_lag1 = %s' % sqft_lag1) logger.log_status('sqft_lag2 = %s' % sqft_lag2) logger.log_status('base_year = %s' % self.base_year) self.assertEqual(self.base_year, SimulationState().get_current_time()) self.assertEqual(sqft, sqft_lag1) self.assertEqual(578+2083+1103+87, sqft_lag1 - sqft_lag2) # Do lag variables produce different results for derived attributes? self.gridcell.compute_variables('urbansim.gridcell.n_recent_development_projects', resources=self.config) self.gridcell.compute_variables('urbansim.gridcell.n_recent_development_projects_lag1', resources=self.config) n_recent_projects = self.gridcell.get_attribute('n_recent_development_projects').sum() n_recent_projects_lag1 = self.gridcell.get_attribute('n_recent_development_projects_lag1').sum() self.assertEqual(n_recent_projects, 11) self.assertEqual(n_recent_projects_lag1, 15) # Do lag_variables produce different results for derived attributes without lags? self.gridcell.compute_variables('urbansim.gridcell.ln_commercial_sqft', resources=self.config) self.gridcell.compute_variables('urbansim.gridcell.ln_commercial_sqft_lag4', resources=self.config) sqft = self.gridcell.get_attribute('ln_commercial_sqft').sum() sqft_lag4 = self.gridcell.get_attribute('ln_commercial_sqft_lag4').sum() self.assertNotEqual(sqft, sqft_lag4)
class ModelExplorer(GenericModelExplorer): def __init__(self, model, year, scenario_name=None, model_group=None, configuration=None, xml_configuration=None, cache_directory=None): self.model_group = model_group self.explored_model = model if configuration is None: if xml_configuration is None: raise StandardError, "Either dictionary based or XML based configuration must be given." config = xml_configuration.get_run_configuration(scenario_name) else: config = Configuration(configuration) if model is not None: dependent_models = config['models_configuration'][model]['controller'].get('dependencies', []) config['models'] = dependent_models if model_group is None: config['models'] = config['models'] + [{model: ["run"]}] else: config['models'] = config['models'] + [{model: {"group_members": [{model_group: ["run"]}]}}] else: config['models'] = [] config['years'] = [year, year] config["datasets_to_cache_after_each_model"]=[] config['flush_variables'] = False self.config = Resources(config) self.xml_configuration = xml_configuration if cache_directory is None: cache_directory = config['creating_baseyear_cache_configuration'].baseyear_cache.existing_cache_to_copy self.simulation_state = SimulationState(new_instance=True, base_cache_dir=cache_directory) self.config['cache_directory'] = cache_directory SessionConfiguration(new_instance=True, package_order=self.config['dataset_pool_configuration'].package_order, in_storage=AttributeCache()) def run(self): self.model_system = ModelSystem() self.model_system.run(self.config, write_datasets_to_cache_at_end_of_year=False) logger.log_status("Data cache in %s" % self.simulation_state.get_cache_directory()) def get_agents_for_simulation(self): return self.get_active_agent_set() def get_model_name(self): return (self.explored_model, self.model_group) def get_specification(self): return self.get_model().get_specified_coefficients().specification def get_probabilities(self, submodel=-2): """Return a tuple of probabilities and choices, see ChoiceModel.get_probabilities_and_choices. Works only for the ChoiceModel class. """ model = self.get_model() if isinstance(model, ChoiceModel): return model.get_probabilities_and_choices(submodel) print '\nMethod is implemented only for ChoiceModels.\n' def export_probabilities(self, submodel=-2, filename='./choice_model.txt'): """Export probabilities and choices into a file. Works only for the ChoiceModel class""" model = self.get_model() if isinstance(model, ChoiceModel): model.export_probabilities(submodel, file_name=filename) else: print '\nMethod is implemented only for ChoiceModels.\n'
class TestLagVariables(opus_unittest.OpusTestCase): def setUp(self): self.config = TestCacheConfiguration() self.simulation_state = SimulationState(new_instance=True) SessionConfiguration(self.config, new_instance=True, package_order=['urbansim', 'opus_core'], in_storage=AttributeCache()) self.base_year = self.config['base_year'] creating_baseyear_cache_configuration = self.config[ 'creating_baseyear_cache_configuration'] self.simulation_state.set_current_time(self.base_year) cache_directory = self.simulation_state.get_cache_directory() copytree( os.path.join( creating_baseyear_cache_configuration.baseyear_cache. existing_cache_to_copy, str(self.base_year)), os.path.join(cache_directory, str(self.base_year))) cacher = CacheScenarioDatabase() cacher.prepare_data_before_baseyear( cache_directory, self.base_year, creating_baseyear_cache_configuration) self.config['cache_directory'] = cache_directory cache_storage = AttributeCache().get_flt_storage_for_year( self.base_year) cache_directory = self.simulation_state.get_cache_directory() flt_directory = os.path.join(cache_directory, str(self.base_year)) self.gridcell = DatasetFactory().get_dataset( 'gridcell', package='urbansim', subdir='datasets', arguments={ 'in_storage': StorageFactory().get_storage('flt_storage', storage_location=flt_directory) }) def tearDown(self): self.simulation_state.remove_singleton(delete_cache=True) def test_lag_variables(self): """Test lag variables""" # A weak test that computing a lag variable on a realistic dataset does not crash. self.gridcell.compute_variables( 'urbansim.gridcell.n_recent_transitions_to_developed', resources=self.config) # The following tests are fragile, since they need to know exactly what values are being # subtracted, and ignore any negative amount that is truncated at zero. # If you change the "subset" dataset to a different region, you will # have to update the expected value. self.gridcell.compute_variables('urbansim.gridcell.commercial_sqft', resources=self.config) self.gridcell.compute_variables( 'urbansim.gridcell.commercial_sqft_lag1', resources=self.config) self.gridcell.compute_variables( 'urbansim.gridcell.commercial_sqft_lag2', resources=self.config) sqft = self.gridcell.get_attribute('commercial_sqft').sum() sqft_lag1 = self.gridcell.get_attribute('commercial_sqft_lag1').sum() sqft_lag2 = self.gridcell.get_attribute('commercial_sqft_lag2').sum() logger.log_status('sqft = %s' % sqft) logger.log_status('sqft_lag1 = %s' % sqft_lag1) logger.log_status('sqft_lag2 = %s' % sqft_lag2) logger.log_status('base_year = %s' % self.base_year) self.assertEqual(self.base_year, SimulationState().get_current_time()) self.assertEqual(sqft, sqft_lag1) self.assertEqual(578 + 2083 + 1103 + 87, sqft_lag1 - sqft_lag2) # Do lag variables produce different results for derived attributes? self.gridcell.compute_variables( 'urbansim.gridcell.n_recent_development_projects', resources=self.config) self.gridcell.compute_variables( 'urbansim.gridcell.n_recent_development_projects_lag1', resources=self.config) n_recent_projects = self.gridcell.get_attribute( 'n_recent_development_projects').sum() n_recent_projects_lag1 = self.gridcell.get_attribute( 'n_recent_development_projects_lag1').sum() self.assertEqual(n_recent_projects, 11) self.assertEqual(n_recent_projects_lag1, 15) # Do lag_variables produce different results for derived attributes without lags? self.gridcell.compute_variables('urbansim.gridcell.ln_commercial_sqft', resources=self.config) self.gridcell.compute_variables( 'urbansim.gridcell.ln_commercial_sqft_lag4', resources=self.config) sqft = self.gridcell.get_attribute('ln_commercial_sqft').sum() sqft_lag4 = self.gridcell.get_attribute( 'ln_commercial_sqft_lag4').sum() self.assertNotEqual(sqft, sqft_lag4)