def run_simulation(self, simulation_instance=None): if simulation_instance is None: simulation_instance = ModelSystem() simulation_instance.run(self.config) #simulation_instance.run_multiprocess(self.config, is_run_subset=True) logger.log_status("Data cache in %s" % self.simulation_state.get_cache_directory())
class IterativeMetaModel(Model): """ This meta model iterates over a set of given models and stops when a given condition is fulfilled. """ model_name = "Iterative Meta Model" def __init__(self, models, configuration, datasets_to_preload=None): """ 'models' is a list of strings determining the models to be run. 'configuration' is a dictionary based configuration used for ModelSystem. Its entry 'models_configuration' must contain the given 'models'. 'datasets_to_preload' is a list of dataset names that should be pre-loaded for the use of the 'models'. If it is None, all datasets in configuration['datasets_to_preload'] are loaded prior to each run. Setting this entry can speed the run-time, since all pre-loaded datasets are also cached after each iteration. """ self.config = Resources(configuration) self.config['models'] = models if datasets_to_preload is not None: new_datasets_to_preload = {} for dataset in datasets_to_preload: new_datasets_to_preload[dataset] = self.config['datasets_to_preload'].get(dataset, {}) self.config['datasets_to_preload'] = new_datasets_to_preload self.model_system = ModelSystem() def run(self, year, condition=None, max_iter=10): """ 'year' is the current year of the simulation. 'condition' should be a boolean expression defined on any dataset. The method iterates over the given models until all values of the expression are True. 'max_iter' gives the maximum number of iterations to run, if 'condition' is not fulfilled. If it is None, there is no limit and thus, the condition must be fulfilled in order to terminate. If 'condition' is None, the set of models is run only once. """ self.config['years'] = (year, year) if condition is None: return self.model_system.run_in_same_process(self.config) dataset_pool = SessionConfiguration().get_dataset_pool() variable_name = VariableName(condition) dataset = dataset_pool.get_dataset(variable_name.get_dataset_name()) condition_value = dataset.compute_variables(variable_name, dataset_pool=dataset_pool) result = None iter = 1 while not alltrue(condition_value): result = self.model_system.run_in_same_process(self.config) if max_iter is None or iter > max_iter: break iter = iter + 1 # force to recompute the condition dataset = SessionConfiguration().get_dataset_pool().get_dataset(variable_name.get_dataset_name()) dataset.delete_computed_attributes() condition_value = dataset.compute_variables(variable_name, dataset_pool=SessionConfiguration().get_dataset_pool()) if not alltrue(condition_value): logger.log_status('%s did not converge. Maximum number of iterations (%s) reached.' % (self.model_name, max_iter)) else: logger.log_status('%s converged in %s iterations.' % (self.model_name, iter-1)) return result
def run_simulation(self, simulation_instance=None): logger.start_block("Simulation on database %s" % self.config["scenario_database_configuration"].database_name) try: if simulation_instance is None: simulation_instance = ModelSystem() simulation_instance.run(self.config) # simulation_instance.run_multiprocess(self.config, is_run_subset=True) finally: logger.end_block() logger.log_status("Data cache in %s" % self.simulation_state.get_cache_directory())
def run_simulation(self, simulation_instance=None): logger.start_block('Simulation on database %s' % self.config['scenario_database_configuration'].database_name) try: if simulation_instance is None: simulation_instance = ModelSystem() simulation_instance.run(self.config) #simulation_instance.run_multiprocess(self.config, is_run_subset=True) finally: logger.end_block() logger.log_status("Data cache in %s" % self.simulation_state.get_cache_directory())
def __init__(self, config=None, save_estimation_results=False): if 'cache_directory' not in config or config['cache_directory'] is None: raise KeyError("The cache directory must be specified in the " "given configuration, giving the filesystem path to the cache " "directory containing the data with which to estimate. Please " "check that your configuration contains the 'cache_directory' " "entry and that it is not None.") self.simulation_state = SimulationState(new_instance=True, start_time=config.get('base_year', 0)) self.simulation_state.set_cache_directory(config['cache_directory']) SessionConfiguration(new_instance=True, package_order=config['dataset_pool_configuration'].package_order, in_storage=AttributeCache()) self.config = Resources(config) self.save_estimation_results = save_estimation_results self.debuglevel = self.config.get("debuglevel", 4) self.model_system = ModelSystem() self.agents_index_for_prediction = None models = self.config.get('models',[]) self.model_name = None if "model_name" in config.keys(): self.model_name = config["model_name"] else: for model in models: if isinstance(model, dict): model_name = model.keys()[0] if (model[model_name] == "estimate") or (isinstance(model[model_name], list) and ("estimate" in model[model_name])): self.model_name = model_name break estimate_config_changes = self.config.get('config_changes_for_estimation', {}).get('estimate_config', {}) if len(estimate_config_changes) > 0: change = Resources({'models_configuration': {self.model_name: {'controller': {'init': {'arguments': {}}}}}}) estimate_config_str = self.config['models_configuration'].get(self.model_name, {}).get('controller', {}).get('init', {}).get('arguments', {}).get('estimate_config', '{}') estimate_config = Resources({}) try: estimate_config = eval(estimate_config_str) except: pass estimate_config.merge(estimate_config_changes) self.config.merge(change) self.config['models_configuration'][self.model_name]['controller']['init']['arguments']['estimate_config'] = 'Resources(%s)' % estimate_config
def __init__(self, models, configuration, datasets_to_preload=None): """ 'models' is a list of strings determining the models to be run. 'configuration' is a dictionary based configuration used for ModelSystem. Its entry 'models_configuration' must contain the given 'models'. 'datasets_to_preload' is a list of dataset names that should be pre-loaded for the use of the 'models'. If it is None, all datasets in configuration['datasets_to_preload'] are loaded prior to each run. Setting this entry can speed the run-time, since all pre-loaded datasets are also cached after each iteration. """ self.config = Resources(configuration) self.config['models'] = models if datasets_to_preload is not None: new_datasets_to_preload = {} for dataset in datasets_to_preload: new_datasets_to_preload[dataset] = self.config[ 'datasets_to_preload'].get(dataset, {}) self.config['datasets_to_preload'] = new_datasets_to_preload self.model_system = ModelSystem()
def __init__(self, models, configuration, datasets_to_preload=None): """ 'models' is a list of strings determining the models to be run. 'configuration' is a dictionary based configuration used for ModelSystem. Its entry 'models_configuration' must contain the given 'models'. 'datasets_to_preload' is a list of dataset names that should be pre-loaded for the use of the 'models'. If it is None, all datasets in configuration['datasets_to_preload'] are loaded prior to each run. Setting this entry can speed the run-time, since all pre-loaded datasets are also cached after each iteration. """ self.config = Resources(configuration) self.config['models'] = models if datasets_to_preload is not None: new_datasets_to_preload = {} for dataset in datasets_to_preload: new_datasets_to_preload[dataset] = self.config['datasets_to_preload'].get(dataset, {}) self.config['datasets_to_preload'] = new_datasets_to_preload self.model_system = ModelSystem()
def _run_each_year_as_separate_process(self, iyear, year, seed=None, resources=None, profiler_name=None, log_file=None): skip_first_year_of_urbansim = resources.get('skip_urbansim', False) if iyear == 0 and skip_first_year_of_urbansim: return True #run urbansim success = CoreModelSystem._run_each_year_as_separate_process(self, iyear, year, seed=seed, resources=resources, profiler_name=profiler_name, log_file=log_file ) success = success and self._run_travel_models_from_resources_in_separate_processes(year, resources) return success
class ModelExplorer(object): def __init__(self, model, year, scenario_name=None, model_group=None, configuration=None, xml_configuration=None, cache_directory=None): self.model_group = model_group self.explored_model = model if configuration is None: if xml_configuration is None: raise StandardError, "Either dictionary based or XML based configuration must be given." config = xml_configuration.get_run_configuration(scenario_name) else: config = Configuration(configuration) self.scenario_models = config['models'] if config.get('models_in_year', None) is not None and config['models_in_year'].get(year, None) is not None: del config['models_in_year'][year] if model is not None: dependent_models = config['models_configuration'][model]['controller'].get('dependencies', []) config['models'] = dependent_models if model_group is None: config['models'] = config['models'] + [{model: ["run"]}] else: config['models'] = config['models'] + [{model: {"group_members": [{model_group: ["run"]}]}}] else: config['models'] = [] config['years'] = [year, year] config["datasets_to_cache_after_each_model"]=[] config['flush_variables'] = False self.config = Resources(config) self.xml_configuration = xml_configuration if cache_directory is None: cache_directory = config['creating_baseyear_cache_configuration'].baseyear_cache.existing_cache_to_copy self.simulation_state = SimulationState(new_instance=True, base_cache_dir=cache_directory, start_time=config.get('base_year', 0)) self.config['cache_directory'] = cache_directory SessionConfiguration(new_instance=True, package_order=self.config['dataset_pool_configuration'].package_order, in_storage=AttributeCache()) def run(self): self.model_system = ModelSystem() self.model_system.run(self.config, write_datasets_to_cache_at_end_of_year=False, cleanup_datasets=False) logger.log_status("Data cache in %s" % self.simulation_state.get_cache_directory()) def get_agents_for_simulation(self): return self.get_active_agent_set() def get_model_name(self): return (self.explored_model, self.model_group) def get_specification(self): return self.get_model().get_specified_coefficients().specification def get_probabilities(self, submodel=-2): """Return a tuple of probabilities and choices, see ChoiceModel.get_probabilities_and_choices. Works only for the ChoiceModel class. """ model = self.get_model() #if isinstance(model, ChoiceModel): return model.get_probabilities_and_choices(submodel) #print '\nMethod is implemented only for ChoiceModels.\n' def export_probabilities(self, submodel=-2, filename='./choice_model.txt'): """Export probabilities and choices into a file. Works only for the ChoiceModel class""" model = self.get_model() #if isinstance(model, ChoiceModel): model.export_probabilities(submodel, file_name=filename) #else: # print '\nMethod is implemented only for ChoiceModels.\n' def get_model(self): """Return a model object.""" return self.model_system.run_year_namespace["model"] def get_dataset(self, dataset_name): """Return a Dataset object of the given name.""" ds = self.model_system.run_year_namespace.get(dataset_name, None) if ds is None: if dataset_name not in self.model_system.run_year_namespace["datasets"].keys(): ds = self.get_dataset_pool().get_dataset(dataset_name) else: ds = self.model_system.run_year_namespace["datasets"][dataset_name] return ds def get_data(self, coefficient, submodel=-2): """Calls method get_data of the Model object. Should return a data array for the given coefficient and submodel. Can be used only on in models that are estimable.""" return self.get_model().get_data(coefficient, submodel) def get_coefficient_names(self, submodel=-2): """Calls method get_coefficient_names of the Model object which should return coefficient names for the given submodel. Can be used only on in models that are estimable.""" return self.get_model().get_coefficient_names(submodel) def get_coefficients(self, submodel=-2): """Return an object of class SpecifiedCoefficientsFor1Submodel giving the model coefficients. Can be used only on in models that are estimable.""" return SpecifiedCoefficientsFor1Submodel(self.get_model().get_specified_coefficients(), submodel) def get_data_as_dataset(self, submodel=-2, **kwargs): """Calls method get_data_as_dataset of the Model object which should return an object of class Dataset containing model data. Works only for ChoiceModel (returns InteractionDataset), and for RegressionModel (returns Dataset). """ return self.get_model().get_data_as_dataset(submodel, **kwargs) def get_choice_set(self): """Return a Dataset of choices. Works only for the ChoiceModel class. """ return self.get_model().model_interaction.interaction_dataset.get_dataset(2) def get_choice_set_index(self): """Return an array of indices of choices. Works only for the ChoiceModel class. """ return self.get_model().model_interaction.interaction_dataset.get_index(2) def get_choice_set_index_for_submodel(self, submodel): """Return an array of indices of choices for the given submodel. Works only for the ChoiceModel class. """ index = self.get_choice_set_index() return take (index, indices=self.get_model().observations_mapping[submodel], axis=0) def get_active_choice_set(self, submodel=None): """Return choice set as seen by agents in the model. Works only for the ChoiceModel class. """ if submodel is None: choices = self.get_choice_set_index() else: choices = self.get_choice_set_index_for_submodel(submodel) choices = unique(choices.flatten()) ds = self.get_choice_set() return DatasetSubset(ds, choices) def get_agent_set(self): """Return a Dataset of all agents. Works only for the ChoiceModel class. """ return self.get_model().model_interaction.interaction_dataset.get_dataset(1) def get_agent_set_index(self): """Return an array of indices of agents that are the choosers. Works only for the ChoiceModel class. """ return self.get_model().model_interaction.interaction_dataset.get_index(1) def get_agent_set_index_for_submodel(self, submodel): """Return an array of indices of agents for the given submodel that are the choosers. Works only for the ChoiceModel class. """ model = self.get_model() return model.model_interaction.interaction_dataset.get_index(1)[model.observations_mapping[submodel]] def get_active_agent_set(self, submodel=None): """Return agent set that make choices in the model. Works only for the ChoiceModel class. """ agents = self.get_agent_set() if submodel is None: index = self.get_agent_set_index() else: index = self.get_agent_set_index_for_submodel(submodel) return DatasetSubset(agents, index) def agent_summary(self, submodel=None): ds = self.get_active_agent_set(submodel=submodel) ds.summary() def choice_summary(self, submodel=None): ds = self.get_active_choice_set(submodel=submodel) ds.summary() def data_summary(self, **kwargs): ds = self.get_data_as_dataset(**kwargs) ds.summary() def _get_before_after_dataset_from_attribute(self, var_name, storage, **kwargs): dataset_name = var_name.get_dataset_name() ds = self.get_dataset(dataset_name) ds.compute_variables([var_name], dataset_pool=self.get_dataset_pool()) ds.copy_attribute_by_reload(var_name, storage=storage, **kwargs) return ds def get_before_after_attribute(self, attribute_name): """Return a dictionary with elements 'before' (contains an array of the given attribute that is reloaded from the cache) and 'after' (contains an array of the given attribute with the current values). """ from opus_core.store.attribute_cache import AttributeCache var_name = VariableName(attribute_name) storage = AttributeCache(self.simulation_state.get_cache_directory()) ds = self._get_before_after_dataset_from_attribute(var_name, storage=storage, package_order=self.get_dataset_pool().get_package_order()) return {'after': ds[var_name.get_alias()], 'before': ds.get_attribute('%s_reload__' % var_name.get_alias())} def summary_before_after(self, attribute_name): """Print summary of the given attribute 'before' (values reloaded from the cache) and 'after' (current values). """ from opus_core.store.attribute_cache import AttributeCache var_name = VariableName(attribute_name) storage = AttributeCache(self.simulation_state.get_cache_directory()) ds = self._get_before_after_dataset_from_attribute(var_name, storage=storage, package_order=self.get_dataset_pool().get_package_order()) print '' print 'Before model run:' print '=================' ds.summary(names=['%s_reload__' % var_name.get_alias()]) print '' print 'After model run:' print '=================' #ds.summary(names=[var_name.get_alias()]) ds.summary(names=[var_name.get_alias()]) def model_dependencies(self, model=None, group=None): """Prints out all dependencies for the model.""" from opus_core.variables.dependency_query import DependencyChart if model is None: # current model model, group = self.get_model_name() spec = self.get_specification() else: spec = None if model == 'all': # print dependencies for all models for thismodel in self.scenario_models: thisgroups = None if isinstance(thismodel, dict): thisgroups = thismodel[thismodel.keys()[0]].get('group_members', None) thismodel = thismodel.keys()[0] if not isinstance(thisgroups, list): thisgroups = [thisgroups] for group in thisgroups: chart = DependencyChart(self.xml_configuration, model=thismodel, model_group=group) chart.print_model_dependencies() else: chart = DependencyChart(self.xml_configuration, model=model, model_group=group, specification=spec) chart.print_model_dependencies() def variable_dependencies(self, name): """Prints out dependencies of this variable. 'name' can be either an alias from the model specification or an expression.""" from opus_core.variables.dependency_query import DependencyChart varname = None allvars = self.get_specification().get_variable_names() for ivar in range(len(allvars)): thisvar = allvars[ivar] if not isinstance(thisvar, VariableName): thisvar = VariableName(thisvar) if name == thisvar.get_alias(): varname = thisvar break if varname is None: varname = VariableName(name) chart = DependencyChart(self.xml_configuration) chart.print_dependencies(varname.get_expression()) def compute_expression(self, attribute_name): """Compute any expression and return its values.""" var_name = VariableName(attribute_name) dataset_name = var_name.get_dataset_name() ds = self.get_dataset(dataset_name) return ds.compute_variables([var_name], dataset_pool=self.get_dataset_pool()) def get_dataset_pool(self): return self.model_system.run_year_namespace["dataset_pool"] def plot_histogram_before_after(self, attribute_name, bins=None): """Plot histograms of values returned by the method get_before_after_attribute.""" from opus_core.plot_functions import create_histogram, show_plots from matplotlib.pylab import figure values = self.get_before_after_attribute(attribute_name) alias = VariableName(attribute_name).get_alias() fig = figure() fig.add_subplot(121) create_histogram(values['before'], main='%s (before)' % alias, bins=bins) fig.add_subplot(122) create_histogram(values['after'], main='%s (after)' % alias, bins=bins) show_plots() def get_correlation(self, submodel=-2): """Return an array of correlations between all variables of the model data (for given submodel). Works only for ChoiceModel and RegressionModel""" ds = self.get_data_as_dataset(submodel) attrs = [attr for attr in ds.get_known_attribute_names() if attr not in ds.get_id_name()] return ds.correlation_matrix(attrs) def plot_correlation(self, submodel=-2, useR=False, **kwargs): """Plot correlations between all variables of the model data (for given submodel). Works only for ChoiceModel and RegressionModel""" ds = self.get_data_as_dataset(submodel) attrs = [attr for attr in ds.get_known_attribute_names() if attr not in ds.get_id_name()] ds.correlation_image(attrs, useR=useR, **kwargs) def plot_choice_set(self, agents_index=None, aggregate_to=None, matplotlib=True, **kwargs): """Plot map of the sampled choice set. agents_index can be given to restrict the set of agents to which the choice set belongs to. aggregate_to is a name of a dataset which the choice set should be aggregated to. If matplotlib is False, mapnik is used (and required). Additional arguments are passed to plot_map or plot_map_matplotlib. E.g. (choice set are buildings, aggregated to zones, for the first agent) er.plot_choice_set(aggregate_to='zone', matplotlib=False, project_name='psrc_parcel', file='choice_set0.png', agents_index=0) """ choice_set = self.get_choice_set() if agents_index is None: flatten_choice_index = self.get_choice_set_index().ravel() else: flatten_choice_index = self.get_choice_set_index()[agents_index,:].ravel() if aggregate_to is not None: ds_aggr = self.get_dataset(aggregate_to) result = ds_aggr.sum_over_ids(choice_set[ds_aggr.get_id_name()[0]][flatten_choice_index], ones(flatten_choice_index.size)) ds = ds_aggr else: result = choice_set.sum_over_ids(choice_set.get_id_attribute()[flatten_choice_index], ones(flatten_choice_index.size)) ds = choice_set dummy_attribute_name = '__sampled_choice_set__' ds.add_attribute(name=dummy_attribute_name, data=result) if matplotlib: coord_syst = None if ds.get_coordinate_system() is None and hasattr(ds, 'compute_coordinate_system'): coord_syst = ds.compute_coordinate_system(dataset_pool=self.get_dataset_pool()) ds.plot_map_matplotlib(dummy_attribute_name, background=-1, coordinate_system=coord_syst, **kwargs) else: ds.plot_map(dummy_attribute_name, background=-1, **kwargs) ds.delete_one_attribute(dummy_attribute_name) def plot_choice_set_attribute(self, name, agents_index=None, aggregate_to=None, function='sum', matplotlib=True, **kwargs): """Plot map of the given attribute for the sampled choice set. agents_index can be given to restrict the set of agents to which the choice set belongs to. aggregate_to is a name of a dataset which the choice set should be aggregated to. function defines the aggregating function (e.g. sum, mean, median, etc.) If matplotlib is False, mapnik is used (and required). Additional arguments are passed to plot_map or plot_map_matplotlib. E.g. er.plot_choice_set_attribute('residential_units', aggregate_to='zone', matplotlib=False, project_name='psrc_parcel', file='choice_resunits.png') """ choice_set = self.get_choice_set() if agents_index is None: flatten_choice_index = self.get_choice_set_index().ravel() else: flatten_choice_index = self.get_choice_set_index()[agents_index,:].ravel() filter_var = ones(choice_set.size(), dtype='int16') filter_var[unique(flatten_choice_index)] = 0 filter_idx = where(filter_var)[0] if aggregate_to is not None: ds_aggr = self.get_dataset(aggregate_to) result = ds_aggr.aggregate_over_ids(choice_set[ds_aggr.get_id_name()[0]][flatten_choice_index], what=choice_set[name][flatten_choice_index], function=function) filter = ds_aggr.sum_over_ids(choice_set[ds_aggr.get_id_name()[0]][filter_idx], ones(filter_idx.size)) filter = filter > 0 ds = ds_aggr else: result = choice_set.aggregate_over_ids(choice_set.get_id_attribute()[flatten_choice_index], what=choice_set[name][flatten_choice_index], function=function) filter = filter_var ds = choice_set dummy_attribute_name = '__sampled_choice_set_attribute__' ds.add_attribute(name=dummy_attribute_name, data=result) dummy_filter_name = '__sampled_choice_set_filter__' ds.add_attribute(name=dummy_filter_name, data=filter) if matplotlib: coord_syst = None if ds.get_coordinate_system() is None and hasattr(ds, 'compute_coordinate_system'): coord_syst = ds.compute_coordinate_system(dataset_pool=self.get_dataset_pool()) ds.plot_map_matplotlib(dummy_attribute_name, filter=dummy_filter_name, coordinate_system=coord_syst, **kwargs) else: ds.plot_map(dummy_attribute_name, filter=dummy_filter_name, **kwargs) ds.delete_one_attribute(dummy_attribute_name) ds.delete_one_attribute(dummy_filter_name) def plot_coefficients(self, submodel=-2, exclude_constant=True, eqidx=0, plot=True, store_values_to_file=None): """ Plot a barchart of coefficient values. This can be used in a regression model, when coefficients are standardized (i.e. using the estimation module opus_core.estimate_linear_regression_standardized). store_values_to_file can be a file name where the values are stored. """ coef = self.get_coefficients(submodel) values = coef.get_coefficient_values() names = coef.get_coefficient_names() sd = coef.get_standard_errors() idx=ones(names.shape[1], dtype="bool") if exclude_constant: pos = coef.get_constants_positions() if pos.size > 0: idx[pos]=0 if store_values_to_file is not None: n = idx.sum() result = concatenate((reshape(names[eqidx, idx], (n,1)), reshape(values[eqidx, idx], (n,1)), reshape(sd[eqidx, idx], (n,1))), axis=1) write_to_text_file(store_values_to_file, array(['coefficient_name', 'estimate', 'standard_error']), delimiter='\t') write_table_to_text_file(store_values_to_file, result, delimiter='\t', mode='a') if plot: plot_barchart(values[eqidx, idx], labels = names[eqidx, idx], errors=sd[eqidx, idx]) else: return {'names': names[eqidx, idx], 'values': values[eqidx, idx], 'errors': sd[eqidx, idx]} def create_latex_tables(self, directory, other_info_keys=None): from opus_core.latex_table_creator import LatexTableCreator LTC = LatexTableCreator() LTC.create_latex_table_for_coefficients_for_model( self.get_model().get_specified_coefficients().coefficients, self.explored_model, directory, other_info_keys=other_info_keys) LTC.create_latex_table_for_specifications_for_model( self.get_model().get_specified_coefficients().specification, self.explored_model, directory)
class IterativeMetaModel(Model): """ This meta model iterates over a set of given models and stops when a given condition is fulfilled. """ model_name = "Iterative Meta Model" def __init__(self, models, configuration, datasets_to_preload=None): """ 'models' is a list of strings determining the models to be run. 'configuration' is a dictionary based configuration used for ModelSystem. Its entry 'models_configuration' must contain the given 'models'. 'datasets_to_preload' is a list of dataset names that should be pre-loaded for the use of the 'models'. If it is None, all datasets in configuration['datasets_to_preload'] are loaded prior to each run. Setting this entry can speed the run-time, since all pre-loaded datasets are also cached after each iteration. """ self.config = Resources(configuration) self.config['models'] = models if datasets_to_preload is not None: new_datasets_to_preload = {} for dataset in datasets_to_preload: new_datasets_to_preload[dataset] = self.config[ 'datasets_to_preload'].get(dataset, {}) self.config['datasets_to_preload'] = new_datasets_to_preload self.model_system = ModelSystem() def run(self, year, condition=None, max_iter=10): """ 'year' is the current year of the simulation. 'condition' should be a boolean expression defined on any dataset. The method iterates over the given models until all values of the expression are True. 'max_iter' gives the maximum number of iterations to run, if 'condition' is not fulfilled. If it is None, there is no limit and thus, the condition must be fulfilled in order to terminate. If 'condition' is None, the set of models is run only once. """ self.config['years'] = (year, year) if condition is None: return self.model_system.run_in_same_process(self.config) dataset_pool = SessionConfiguration().get_dataset_pool() variable_name = VariableName(condition) dataset = dataset_pool.get_dataset(variable_name.get_dataset_name()) condition_value = dataset.compute_variables(variable_name, dataset_pool=dataset_pool) result = None iter = 1 while not alltrue(condition_value): result = self.model_system.run_in_same_process(self.config) if max_iter is None or iter > max_iter: break iter = iter + 1 # force to recompute the condition dataset = SessionConfiguration().get_dataset_pool().get_dataset( variable_name.get_dataset_name()) dataset.delete_computed_attributes() condition_value = dataset.compute_variables( variable_name, dataset_pool=SessionConfiguration().get_dataset_pool()) if not alltrue(condition_value): logger.log_status( '%s did not converge. Maximum number of iterations (%s) reached.' % (self.model_name, max_iter)) else: logger.log_status('%s converged in %s iterations.' % (self.model_name, iter - 1)) return result
def __init__(self): CoreModelSystem.__init__(self)
def run(self): self.model_system = ModelSystem() self.model_system.run(self.config, write_datasets_to_cache_at_end_of_year=False, cleanup_datasets=False) logger.log_status("Data cache in %s" % self.simulation_state.get_cache_directory())
class ModelExplorer(object): def __init__(self, model, year, scenario_name=None, model_group=None, configuration=None, xml_configuration=None, cache_directory=None): self.model_group = model_group self.explored_model = model if configuration is None: if xml_configuration is None: raise StandardError, "Either dictionary based or XML based configuration must be given." config = xml_configuration.get_run_configuration(scenario_name) else: config = Configuration(configuration) self.scenario_models = config['models'] if config.get('models_in_year', None) is not None and config['models_in_year'].get(year, None) is not None: del config['models_in_year'][year] if model is not None: dependent_models = config['models_configuration'][model]['controller'].get('dependencies', []) config['models'] = dependent_models if model_group is None: config['models'] = config['models'] + [{model: ["run"]}] else: config['models'] = config['models'] + [{model: {"group_members": [{model_group: ["run"]}]}}] else: config['models'] = [] config['years'] = [year, year] config["datasets_to_cache_after_each_model"]=[] config['flush_variables'] = False self.config = Resources(config) self.xml_configuration = xml_configuration if cache_directory is None: cache_directory = config['creating_baseyear_cache_configuration'].baseyear_cache.existing_cache_to_copy self.simulation_state = SimulationState(new_instance=True, base_cache_dir=cache_directory, start_time=config.get('base_year', 0)) self.config['cache_directory'] = cache_directory SessionConfiguration(new_instance=True, package_order=self.config['dataset_pool_configuration'].package_order, in_storage=AttributeCache()) def run(self): self.model_system = ModelSystem() self.model_system.run(self.config, write_datasets_to_cache_at_end_of_year=False, cleanup_datasets=False) logger.log_status("Data cache in %s" % self.simulation_state.get_cache_directory()) def get_agents_for_simulation(self): return self.get_active_agent_set() def get_model_name(self): return (self.explored_model, self.model_group) def get_specification(self): return self.get_model().get_specified_coefficients().specification def get_probabilities(self, submodel=-2): """Return a tuple of probabilities and choices, see ChoiceModel.get_probabilities_and_choices. Works only for the ChoiceModel class. """ model = self.get_model() #if isinstance(model, ChoiceModel): return model.get_probabilities_and_choices(submodel) #print '\nMethod is implemented only for ChoiceModels.\n' def export_probabilities(self, submodel=-2, filename='./choice_model.txt'): """Export probabilities and choices into a file. Works only for the ChoiceModel class""" model = self.get_model() #if isinstance(model, ChoiceModel): model.export_probabilities(submodel, file_name=filename) #else: # print '\nMethod is implemented only for ChoiceModels.\n' def get_model(self): """Return a model object.""" return self.model_system.run_year_namespace["model"] def get_dataset(self, dataset_name): """Return a Dataset object of the given name.""" ds = self.model_system.run_year_namespace.get(dataset_name, None) if ds is None: if dataset_name not in self.model_system.run_year_namespace["datasets"].keys(): ds = self.get_dataset_pool().get_dataset(dataset_name) else: ds = self.model_system.run_year_namespace["datasets"][dataset_name] return ds def get_data(self, coefficient, submodel=-2): """Calls method get_data of the Model object. Should return a data array for the given coefficient and submodel. Can be used only on in models that are estimable.""" return self.get_model().get_data(coefficient, submodel) def get_coefficient_names(self, submodel=-2): """Calls method get_coefficient_names of the Model object which should return coefficient names for the given submodel. Can be used only on in models that are estimable.""" return self.get_model().get_coefficient_names(submodel) def get_coefficients(self, submodel=-2): """Return an object of class SpecifiedCoefficientsFor1Submodel giving the model coefficients. Can be used only on in models that are estimable.""" return SpecifiedCoefficientsFor1Submodel(self.get_model().get_specified_coefficients(), submodel) def get_data_as_dataset(self, submodel=-2, **kwargs): """Calls method get_data_as_dataset of the Model object which should return an object of class Dataset containing model data. Works only for ChoiceModel (returns InteractionDataset), and for RegressionModel (returns Dataset). """ return self.get_model().get_data_as_dataset(submodel, **kwargs) def get_choice_set(self): """Return a Dataset of choices. Works only for the ChoiceModel class. """ return self.get_model().model_interaction.interaction_dataset.get_dataset(2) def get_choice_set_index(self): """Return an array of indices of choices. Works only for the ChoiceModel class. """ return self.get_model().model_interaction.interaction_dataset.get_index(2) def get_choice_set_index_for_submodel(self, submodel): """Return an array of indices of choices for the given submodel. Works only for the ChoiceModel class. """ index = self.get_choice_set_index() return take (index, indices=self.get_model().observations_mapping[submodel], axis=0) def get_active_choice_set(self, submodel=None): """Return choice set as seen by agents in the model. Works only for the ChoiceModel class. """ if submodel is None: choices = self.get_choice_set_index() else: choices = self.get_choice_set_index_for_submodel(submodel) choices = unique(choices.flatten()) ds = self.get_choice_set() return DatasetSubset(ds, choices) def get_agent_set(self): """Return a Dataset of all agents. """ return self.get_model().get_agent_set() def get_agent_set_index(self): """Return an array of indices of agents active in the model. """ return self.get_model().get_agent_set_index() def get_agent_set_index_for_submodel(self, submodel): """Return an array of indices of agents for the given submodel. """ return self.get_model().get_agent_set_index_for_submodel(submodel) def get_active_agent_set(self, submodel=None): """Return agent set that make choices in the model. Works only for the ChoiceModel class. """ agents = self.get_agent_set() if submodel is None: index = self.get_agent_set_index() else: index = self.get_agent_set_index_for_submodel(submodel) return DatasetSubset(agents, index) def agent_summary(self, submodel=None): ds = self.get_active_agent_set(submodel=submodel) ds.summary() def choice_summary(self, submodel=None): ds = self.get_active_choice_set(submodel=submodel) ds.summary() def data_summary(self, **kwargs): ds = self.get_data_as_dataset(**kwargs) ds.summary() def _get_before_after_dataset_from_attribute(self, var_name, storage, **kwargs): dataset_name = var_name.get_dataset_name() ds = self.get_dataset(dataset_name) ds.compute_variables([var_name], dataset_pool=self.get_dataset_pool()) ds.copy_attribute_by_reload(var_name, storage=storage, **kwargs) return ds def get_before_after_attribute(self, attribute_name): """Return a dictionary with elements 'before' (contains an array of the given attribute that is reloaded from the cache) and 'after' (contains an array of the given attribute with the current values). """ from opus_core.store.attribute_cache import AttributeCache var_name = VariableName(attribute_name) storage = AttributeCache(self.simulation_state.get_cache_directory()) ds = self._get_before_after_dataset_from_attribute(var_name, storage=storage, package_order=self.get_dataset_pool().get_package_order()) return {'after': ds[var_name.get_alias()], 'before': ds.get_attribute('%s_reload__' % var_name.get_alias())} def summary_before_after(self, attribute_name): """Print summary of the given attribute 'before' (values reloaded from the cache) and 'after' (current values). """ from opus_core.store.attribute_cache import AttributeCache var_name = VariableName(attribute_name) storage = AttributeCache(self.simulation_state.get_cache_directory()) ds = self._get_before_after_dataset_from_attribute(var_name, storage=storage, package_order=self.get_dataset_pool().get_package_order()) print '' print 'Before model run:' print '=================' ds.summary(names=['%s_reload__' % var_name.get_alias()]) print '' print 'After model run:' print '=================' #ds.summary(names=[var_name.get_alias()]) ds.summary(names=[var_name.get_alias()]) def model_dependencies(self, model=None, group=None): """Prints out all dependencies for the model.""" from opus_core.variables.dependency_query import DependencyChart if model is None: # current model model, group = self.get_model_name() spec = self.get_specification() else: spec = None if model == 'all': # print dependencies for all models for thismodel in self.scenario_models: thisgroups = None if isinstance(thismodel, dict): thisgroups = thismodel[thismodel.keys()[0]].get('group_members', None) thismodel = thismodel.keys()[0] if not isinstance(thisgroups, list): thisgroups = [thisgroups] for group in thisgroups: chart = DependencyChart(self.xml_configuration, model=thismodel, model_group=group) chart.print_model_dependencies() else: chart = DependencyChart(self.xml_configuration, model=model, model_group=group, specification=spec) chart.print_model_dependencies() def variable_dependencies(self, name): """Prints out dependencies of this variable. 'name' can be either an alias from the model specification or an expression.""" from opus_core.variables.dependency_query import DependencyChart varname = None allvars = self.get_specification().get_variable_names() for ivar in range(len(allvars)): thisvar = allvars[ivar] if not isinstance(thisvar, VariableName): thisvar = VariableName(thisvar) if name == thisvar.get_alias(): varname = thisvar break if varname is None: varname = VariableName(name) chart = DependencyChart(self.xml_configuration) chart.print_dependencies(varname.get_expression()) def compute_expression(self, attribute_name): """Compute any expression and return its values.""" var_name = VariableName(attribute_name) dataset_name = var_name.get_dataset_name() ds = self.get_dataset(dataset_name) return ds.compute_variables([var_name], dataset_pool=self.get_dataset_pool()) def get_dataset_pool(self): return self.model_system.run_year_namespace["dataset_pool"] def plot_histogram_before_after(self, attribute_name, bins=None): """Plot histograms of values returned by the method get_before_after_attribute.""" from opus_core.plot_functions import create_histogram, show_plots from matplotlib.pylab import figure values = self.get_before_after_attribute(attribute_name) alias = VariableName(attribute_name).get_alias() fig = figure() fig.add_subplot(121) create_histogram(values['before'], main='%s (before)' % alias, bins=bins) fig.add_subplot(122) create_histogram(values['after'], main='%s (after)' % alias, bins=bins) show_plots() def get_correlation(self, submodel=-2): """Return an array of correlations between all variables of the model data (for given submodel). Works only for ChoiceModel and RegressionModel""" ds = self.get_data_as_dataset(submodel) attrs = [attr for attr in ds.get_known_attribute_names() if attr not in ds.get_id_name()] return ds.correlation_matrix(attrs) def plot_correlation(self, submodel=-2, useR=False, **kwargs): """Plot correlations between all variables of the model data (for given submodel). Works only for ChoiceModel and RegressionModel""" ds = self.get_data_as_dataset(submodel) attrs = [attr for attr in ds.get_known_attribute_names() if attr not in ds.get_id_name()] ds.correlation_image(attrs, useR=useR, **kwargs) def plot_choice_set(self, agents_index=None, aggregate_to=None, matplotlib=True, **kwargs): """Plot map of the sampled choice set. agents_index can be given to restrict the set of agents to which the choice set belongs to. aggregate_to is a name of a dataset which the choice set should be aggregated to. If matplotlib is False, mapnik is used (and required). Additional arguments are passed to plot_map or plot_map_matplotlib. E.g. (choice set are buildings, aggregated to zones, for the first agent) er.plot_choice_set(aggregate_to='zone', matplotlib=False, project_name='psrc_parcel', file='choice_set0.png', agents_index=0) """ choice_set = self.get_choice_set() if agents_index is None: flatten_choice_index = self.get_choice_set_index().ravel() else: flatten_choice_index = self.get_choice_set_index()[agents_index,:].ravel() if aggregate_to is not None: ds_aggr = self.get_dataset(aggregate_to) result = ds_aggr.sum_over_ids(choice_set[ds_aggr.get_id_name()[0]][flatten_choice_index], ones(flatten_choice_index.size)) ds = ds_aggr else: result = choice_set.sum_over_ids(choice_set.get_id_attribute()[flatten_choice_index], ones(flatten_choice_index.size)) ds = choice_set dummy_attribute_name = '__sampled_choice_set__' ds.add_attribute(name=dummy_attribute_name, data=result) if matplotlib: coord_syst = None if ds.get_coordinate_system() is None and hasattr(ds, 'compute_coordinate_system'): coord_syst = ds.compute_coordinate_system(dataset_pool=self.get_dataset_pool()) ds.plot_map_matplotlib(dummy_attribute_name, background=-1, coordinate_system=coord_syst, **kwargs) else: ds.plot_map(dummy_attribute_name, background=-1, **kwargs) ds.delete_one_attribute(dummy_attribute_name) def plot_choice_set_attribute(self, name, agents_index=None, aggregate_to=None, function='sum', matplotlib=True, **kwargs): """Plot map of the given attribute for the sampled choice set. agents_index can be given to restrict the set of agents to which the choice set belongs to. aggregate_to is a name of a dataset which the choice set should be aggregated to. function defines the aggregating function (e.g. sum, mean, median, etc.) If matplotlib is False, mapnik is used (and required). Additional arguments are passed to plot_map or plot_map_matplotlib. E.g. er.plot_choice_set_attribute('residential_units', aggregate_to='zone', matplotlib=False, project_name='psrc_parcel', file='choice_resunits.png') """ choice_set = self.get_choice_set() if agents_index is None: flatten_choice_index = self.get_choice_set_index().ravel() else: flatten_choice_index = self.get_choice_set_index()[agents_index,:].ravel() filter_var = ones(choice_set.size(), dtype='int16') filter_var[unique(flatten_choice_index)] = 0 filter_idx = where(filter_var)[0] if aggregate_to is not None: ds_aggr = self.get_dataset(aggregate_to) result = ds_aggr.aggregate_over_ids(choice_set[ds_aggr.get_id_name()[0]][flatten_choice_index], what=choice_set[name][flatten_choice_index], function=function) filter = ds_aggr.sum_over_ids(choice_set[ds_aggr.get_id_name()[0]][filter_idx], ones(filter_idx.size)) filter = filter > 0 ds = ds_aggr else: result = choice_set.aggregate_over_ids(choice_set.get_id_attribute()[flatten_choice_index], what=choice_set[name][flatten_choice_index], function=function) filter = filter_var ds = choice_set dummy_attribute_name = '__sampled_choice_set_attribute__' ds.add_attribute(name=dummy_attribute_name, data=result) dummy_filter_name = '__sampled_choice_set_filter__' ds.add_attribute(name=dummy_filter_name, data=filter) if matplotlib: coord_syst = None if ds.get_coordinate_system() is None and hasattr(ds, 'compute_coordinate_system'): coord_syst = ds.compute_coordinate_system(dataset_pool=self.get_dataset_pool()) ds.plot_map_matplotlib(dummy_attribute_name, filter=dummy_filter_name, coordinate_system=coord_syst, **kwargs) else: ds.plot_map(dummy_attribute_name, filter=dummy_filter_name, **kwargs) ds.delete_one_attribute(dummy_attribute_name) ds.delete_one_attribute(dummy_filter_name) def plot_coefficients(self, submodel=-2, exclude_constant=True, eqidx=0, plot=True, store_values_to_file=None): """ Plot a barchart of coefficient values. This can be used in a regression model, when coefficients are standardized (i.e. using the estimation module opus_core.estimate_linear_regression_standardized). store_values_to_file can be a file name where the values are stored. """ coef = self.get_coefficients(submodel) values = coef.get_coefficient_values() names = coef.get_coefficient_names() sd = coef.get_standard_errors() idx=ones(names.shape[1], dtype="bool") if exclude_constant: pos = coef.get_constants_positions() if pos.size > 0: idx[pos]=0 if store_values_to_file is not None: n = idx.sum() result = concatenate((reshape(names[eqidx, idx], (n,1)), reshape(values[eqidx, idx], (n,1)), reshape(sd[eqidx, idx], (n,1))), axis=1) write_to_text_file(store_values_to_file, array(['coefficient_name', 'estimate', 'standard_error']), delimiter='\t') write_table_to_text_file(store_values_to_file, result, delimiter='\t', mode='a') if plot: plot_barchart(values[eqidx, idx], labels = names[eqidx, idx], errors=sd[eqidx, idx]) else: return {'names': names[eqidx, idx], 'values': values[eqidx, idx], 'errors': sd[eqidx, idx]} def create_latex_tables(self, directory, other_info_keys=None): from opus_core.latex_table_creator import LatexTableCreator LTC = LatexTableCreator() LTC.create_latex_table_for_coefficients_for_model( self.get_model().get_specified_coefficients().coefficients, self.explored_model, directory, other_info_keys=other_info_keys) LTC.create_latex_table_for_specifications_for_model( self.get_model().get_specified_coefficients().specification, self.explored_model, directory)
class Estimator(ModelExplorer): def __init__(self, config=None, save_estimation_results=False): if 'cache_directory' not in config or config['cache_directory'] is None: raise KeyError("The cache directory must be specified in the " "given configuration, giving the filesystem path to the cache " "directory containing the data with which to estimate. Please " "check that your configuration contains the 'cache_directory' " "entry and that it is not None.") self.simulation_state = SimulationState(new_instance=True, start_time=config.get('base_year', 0)) self.simulation_state.set_cache_directory(config['cache_directory']) SessionConfiguration(new_instance=True, package_order=config['dataset_pool_configuration'].package_order, in_storage=AttributeCache()) self.config = Resources(config) self.save_estimation_results = save_estimation_results self.debuglevel = self.config.get("debuglevel", 4) self.model_system = ModelSystem() self.agents_index_for_prediction = None models = self.config.get('models',[]) self.model_name = None if "model_name" in config.keys(): self.model_name = config["model_name"] else: for model in models: if isinstance(model, dict): model_name = model.keys()[0] if (model[model_name] == "estimate") or (isinstance(model[model_name], list) and ("estimate" in model[model_name])): self.model_name = model_name break estimate_config_changes = self.config.get('config_changes_for_estimation', {}).get('estimate_config', {}) if len(estimate_config_changes) > 0: change = Resources({'models_configuration': {self.model_name: {'controller': {'init': {'arguments': {}}}}}}) estimate_config_str = self.config['models_configuration'].get(self.model_name, {}).get('controller', {}).get('init', {}).get('arguments', {}).get('estimate_config', '{}') estimate_config = Resources({}) try: estimate_config = eval(estimate_config_str) except: pass estimate_config.merge(estimate_config_changes) self.config.merge(change) self.config['models_configuration'][self.model_name]['controller']['init']['arguments']['estimate_config'] = 'Resources(%s)' % estimate_config def estimate(self, out_storage=None): self.model_system.run(self.config, write_datasets_to_cache_at_end_of_year=False) self.extract_coefficients_and_specification() if self.save_estimation_results: self.save_results(out_storage=out_storage) self.log_results() def reestimate(self, specification_module_name=None, specification_dict=None, out_storage=None, type=None, submodels=None): """specification_module_name is name of a module that contains a dictionary called 'specification'. If it is not given, the argument specification_dict must be given which is a dictionary object. 'type' is the name of model member, such as 'commercial', 'residential'. The specification dictionary is expected to have an entry of this name. If 'submodels' is given (list or a number), the restimation is done only for those submodels. """ if specification_module_name is not None: exec("import " + specification_module_name) eval("reload (" + specification_module_name + ")") exec("specification_dict =" + specification_module_name + ".specification") if type is not None: specification_dict = specification_dict[type] if submodels is not None: #remove all submodels but the given ones from specification submodels_to_be_deleted = specification_dict.keys() if not isinstance(submodels, list): submodels = [submodels] for sm in submodels: if sm not in submodels_to_be_deleted: raise ValueError, "Submodel %s not in the specification." % sm submodels_to_be_deleted.remove(sm) if "_definition_" in submodels_to_be_deleted: submodels_to_be_deleted.remove("_definition_") for sm in submodels_to_be_deleted: del specification_dict[sm] self.specification = EquationSpecification(specification_dict=specification_dict) new_namespace = self.model_system.run_year_namespace keys_coeff_spec = self.get_keys_for_coefficients_and_specification() new_namespace[keys_coeff_spec["specification"]] = self.specification self.coefficients, coeff_dict_dummy = self.model_system.do_process(new_namespace) ## update run_year_namespce since it's not been updated by do_process self.model_system.run_year_namespace = new_namespace self.model_system.run_year_namespace[keys_coeff_spec["coefficients"]] = self.coefficients ## this gets coeff and spec from run_year_namespce and is only updated in _run_year method #self.extract_coefficients_and_specification() if self.save_estimation_results: self.save_results(out_storage=out_storage) def predict(self, predicted_choice_id_name, agents_index=None): """ Run prediction. Currently makes sense only for choice models.""" # Create temporary configuration where all words 'estimate' are replaced by 'run' tmp_config = Resources(self.config) if self.agents_index_for_prediction is None: self.agents_index_for_prediction = self.get_agent_set_index().copy() if agents_index is None: agents_index = self.agents_index_for_prediction tmp_config['models_configuration'][self.model_name]['controller']['run']['arguments']['coefficients'] = "coeff_est" tmp_config['models_configuration'][self.model_name]['controller']['run']['arguments']['agents_index'] = "agents_index" tmp_config['models_configuration'][self.model_name]['controller']['run']['arguments']['chunk_specification'] = "{'nchunks':1}" ### save specification and coefficients to cache (no matter the save_estimation_results flag) ### so that the prepare_for_run method could load specification and coefficients from there #output_configuration = self.config['output_configuration'] #del self.config['output_configuration'] #self.save_results() #self.config['output_configuration'] = output_configuration #self.model_system.run_year_namespace["coefficients"] = self.coefficients #del tmp_config['models_configuration'][self.model_name]['controller']['prepare_for_run'] try: run_year_namespace = copy.copy(self.model_system.run_year_namespace) except: logger.log_error("The estimate() method must be run first") return False try: agents = self.get_agent_set() choice_id_name = self.get_choice_set().get_id_name()[0] # save current locations of agents current_choices = agents.get_attribute(choice_id_name).copy() dummy_data = zeros(current_choices.size, dtype=current_choices.dtype)-1 #agents.modify_attribute(name=choice_id_name, data=dummy_data) #reset choices for all agents agents.modify_attribute(name=choice_id_name, data=dummy_data, index=agents_index) #reset choices for agents in agents_index run_year_namespace["process"] = "run" run_year_namespace["coeff_est"] = self.coefficients run_year_namespace["agents_index"] = agents_index run_year_namespace["processmodel_config"] = tmp_config['models_configuration'][self.model_name]['controller']['run'] new_choices = self.model_system.do_process(run_year_namespace) #self.model_system.run(tmp_config, write_datasets_to_cache_at_end_of_year=False) #new_choices = agents.get_attribute(choice_id_name).copy() agents.modify_attribute(name=choice_id_name, data=current_choices) dummy_data[agents_index] = new_choices if predicted_choice_id_name not in agents.get_known_attribute_names(): agents.add_primary_attribute(name=predicted_choice_id_name, data=dummy_data) else: agents.modify_attribute(name=predicted_choice_id_name, data=dummy_data) logger.log_status("Predictions saved into attribute " + predicted_choice_id_name) return True except Exception, e: logger.log_error("Error encountered in prediction: %s" % e) logger.log_stack_trace() return False