def __init__(self, regression_procedure="opus_core.linear_regression", submodel_string=None, run_config=None, estimate_config=None, debuglevel=0, dataset_pool=None): self.debug = DebugPrinter(debuglevel) self.dataset_pool = self.create_dataset_pool(dataset_pool) self.regression = RegressionModelFactory().get_model(name=regression_procedure) if self.regression == None: raise StandardError, "No regression procedure given." self.submodel_string = submodel_string self.run_config = run_config if self.run_config == None: self.run_config = Resources() if not isinstance(self.run_config,Resources) and isinstance(self.run_config, dict): self.run_config = Resources(self.run_config) self.estimate_config = estimate_config if self.estimate_config == None: self.estimate_config = Resources() if not isinstance(self.estimate_config,Resources) and isinstance(self.estimate_config, dict): self.estimate_config = Resources(self.estimate_config) self.data = {} self.coefficient_names = {} ChunkModel.__init__(self) self.get_status_for_gui().initialize_pieces(3, pieces_description = array(['initialization', 'computing variables', 'submodel: 1']))
def estimate(self, specification, agent_set, agents_index=None, procedure=None, data_objects=None, estimate_config=None, debuglevel=0): """ Computes capacity if required and calls the estimate method of ChoiceModel. See ChoiceModel.estimate for details on arguments. """ if agents_index==None: agents_index=arange(agent_set.size()) if agents_index.size <= 0: logger.log_status("Nothing to be done.") return (None, None) if estimate_config == None: estimate_config = Resources() self.estimate_config = estimate_config.merge_with_defaults(self.estimate_config) if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) if self.location_id_string is not None: agent_set.compute_variables(self.location_id_string, dataset_pool=self.dataset_pool) capacity_for_estimation = None if self.estimate_config.get("compute_capacity_flag", False): capacity_string_for_estimation = self.estimate_config.get("capacity_string", None) capacity_for_estimation = self.determine_capacity(capacity_string=capacity_string_for_estimation, agent_set=agent_set, agents_index=agents_index) self.estimate_config.merge({"capacity":capacity_for_estimation}) return ChoiceModel.estimate(self,specification, agent_set, agents_index, procedure, estimate_config=self.estimate_config, debuglevel=debuglevel)
def prepare_for_estimate(self, specification_dict=None, specification_storage=None, specification_table=None, events_for_estimation_storage=None, events_for_estimation_table=None, agents_filter='', compute_variables=[], data_objects={}): specification = get_specification_for_estimation( specification_dict, specification_storage, specification_table) projects = None # create agents for estimation if events_for_estimation_storage is not None: projects = Dataset(in_storage=events_for_estimation_storage, in_table_name=events_for_estimation_table, id_name=[], dataset_name='development_project') if compute_variables: projects.compute_variables(compute_variables, resources=Resources(data_objects)) # needs to be a primary attribute because of the join method below #projects.add_primary_attribute(estimation_set.get_attribute(location_id_variable), # VariableName(location_id_variable).get_alias()) if agents_filter: values = projects.compute_variables( agents_filter, resources=Resources(data_objects)) index = where(values > 0)[0] projects.subset_by_index(index, flush_attributes_if_not_loaded=False) return (specification, projects)
def load(self, resources=None, in_storage=None, in_table_name=None): """ """ # TODO: insert docstring local_resources = Resources(resources) local_resources.merge_with_defaults({ "field_submodel_id":self.field_submodel_id, "field_coefficient_name":self.field_coefficient_name, "field_estimate":self.field_estimate, "field_standard_error":self.field_standard_error, "other_fields":self.other_fields}) if in_storage <> None: self.in_storage = in_storage if not isinstance(self.in_storage, Storage): logger.log_warning("in_storage has to be of type Storage. No coefficients loaded.") else: data = self.in_storage.load_table(table_name=in_table_name) submodels = data[local_resources["field_submodel_id"]] self.names = data[local_resources["field_coefficient_name"]] self.values = data[local_resources["field_estimate"]] self.standard_errors = data[local_resources["field_standard_error"]] for measure in local_resources["other_fields"]: if measure in data.keys(): self.other_measures[measure] = data[measure] if submodels.max() >= 0: self.submodels=submodels self.check_consistency()
def prepare_for_simulation(self, config, cache_directory=None): self.config = Resources(config) base_cache_dir = self.config[ 'creating_baseyear_cache_configuration'].cache_directory_root self.simulation_state = SimulationState(new_instance=True, base_cache_dir=base_cache_dir, start_time=self.config.get( 'base_year', 0)) ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. if self.config['cache_directory'] is None: self.config[ 'cache_directory'] = self.simulation_state.get_cache_directory( ) SessionConfiguration( new_instance=True, package_order=self.config['dataset_pool_configuration']. package_order, in_storage=AttributeCache()) if config['creating_baseyear_cache_configuration'].cache_from_database: ForkProcess().fork_new_process( self.config['creating_baseyear_cache_configuration']. cache_scenario_database, self.config) else: CacheFltData().run(self.config)
def run(self, specification, coefficients, dataset, index=None, chunk_specification=None, data_objects=None, run_config=None, initial_values=None, procedure=None, debuglevel=0): """'specification' is of type EquationSpecification, 'coefficients' is of type Coefficients, 'dataset' is of type Dataset, 'index' are indices of individuals in dataset for which the model runs. If it is None, the whole dataset is considered. 'chunk_specification' determines number of chunks in which the simulation is processed. 'data_objects' is a dictionary where each key is the name of an data object ('zone', ...) and its value is an object of class Dataset. 'run_config' is of type Resources, it gives additional arguments for the run. If 'procedure' is given, it overwrites the regression_procedure of the constructor. 'initial_values' is an array of the initial values of the results. It will be overwritten by the results for those elements that are handled by the model (defined by submodels in the specification). By default the results are initialized with 0. 'debuglevel' overwrites the constructor 'debuglevel'. """ self.debug.flag = debuglevel if run_config == None: run_config = Resources() if not isinstance(run_config, Resources) and isinstance( run_config, dict): run_config = Resources(run_config) self.run_config = run_config.merge_with_defaults(self.run_config) self.run_config.merge({"debug": self.debug}) if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) self.dataset_pool.replace_dataset(dataset.get_dataset_name(), dataset) if procedure is not None: self.regression = RegressionModelFactory().get_model( name=procedure) if initial_values is None: self.initial_values = zeros((dataset.size(), ), dtype=float32) else: self.initial_values = zeros((dataset.size(), ), dtype=initial_values.dtype) self.initial_values[index] = initial_values if dataset.size() <= 0: # no data loaded yet dataset.get_id_attribute() if index == None: index = arange(dataset.size()) result = ChunkModel.run(self, chunk_specification, dataset, index, float32, specification=specification, coefficients=coefficients) return result
def load(self, resources=None, in_storage=None, in_table_name=None): """ """ # TODO: insert docstring local_resources = Resources(resources) local_resources.merge_with_defaults({ "field_submodel_id": self.field_submodel_id, "field_coefficient_name": self.field_coefficient_name, "field_estimate": self.field_estimate, "field_standard_error": self.field_standard_error, "other_fields": self.other_fields }) if in_storage <> None: self.in_storage = in_storage if not isinstance(self.in_storage, Storage): logger.log_warning( "in_storage has to be of type Storage. No coefficients loaded." ) else: data = self.in_storage.load_table(table_name=in_table_name) submodels = data[local_resources["field_submodel_id"]] self.names = data[local_resources["field_coefficient_name"]] self.values = data[local_resources["field_estimate"]] self.standard_errors = data[ local_resources["field_standard_error"]] for measure in local_resources["other_fields"]: if measure in data.keys(): self.other_measures[measure] = data[measure] if submodels.max() >= 0: self.submodels = submodels self.check_consistency()
def prepare_for_estimate(self, specification_dict=None, specification_storage=None, specification_table=None, agent_set=None, agents_for_estimation_storage=None, agents_for_estimation_table=None, join_datasets=False, index_to_unplace=None, portion_to_unplace=1.0, agent_filter=None, data_objects={}): from opus_core.model import get_specification_for_estimation specification = get_specification_for_estimation( specification_dict, specification_storage, specification_table) if (agent_set is not None) and (index_to_unplace is not None): if self.location_id_string is not None: agent_set.compute_variables(self.location_id_string, resources=Resources(data_objects)) if portion_to_unplace < 1: unplace_size = int(portion_to_unplace * index_to_unplace.size) end_index_to_unplace = sample_noreplace( index_to_unplace, unplace_size) else: end_index_to_unplace = index_to_unplace logger.log_status("Unplace " + str(end_index_to_unplace.size) + " agents.") agent_set.modify_attribute(self.choice_set.get_id_name()[0], -1 * ones(end_index_to_unplace.size), end_index_to_unplace) # create agents for estimation if agents_for_estimation_storage is not None: estimation_set = Dataset(in_storage=agents_for_estimation_storage, in_table_name=agents_for_estimation_table, id_name=agent_set.get_id_name(), dataset_name=agent_set.get_dataset_name()) if agent_filter is not None: estimation_set.compute_variables( agent_filter, resources=Resources(data_objects)) index = where( estimation_set.get_attribute(agent_filter) > 0)[0] estimation_set.subset_by_index( index, flush_attributes_if_not_loaded=False) if join_datasets: agent_set.join_by_rows(estimation_set, require_all_attributes=False, change_ids_if_not_unique=True) index = arange(agent_set.size() - estimation_set.size(), agent_set.size()) else: index = agent_set.get_id_index( estimation_set.get_id_attribute()) else: index = arange(agent_set.size()) return (specification, index)
def __init__(self, location_set, model_name=None, short_name=None, sampler="opus_core.samplers.weighted_sampler", utilities="opus_core.linear_utilities", probabilities="opus_core.mnl_probabilities", choices="opus_core.random_choices", filter=None, submodel_string=None, location_id_string=None, run_config=None, estimate_config=None, debuglevel=0, dataset_pool=None, variable_package="urbansim", **kwargs): """ :number_of_units_string: number of units string is used to determine whether a choice is over-filled, by comparing it with number_of_agents_string in get_locations_vacancy(). TODO: How does it differ from capacity_string? """ if model_name is not None: self.model_name = model_name if short_name is not None: self.model_short_name = short_name if (run_config is not None) and not isinstance(run_config, Resources): run_config = Resources(run_config) if (estimate_config is not None) and not isinstance(estimate_config, Resources): estimate_config = Resources(estimate_config) self.add_prefix_to_variable_names([ "capacity_string", "number_of_agents_string", "number_of_units_string" ], location_set, variable_package, run_config) self.add_prefix_to_variable_names("weights_for_estimation_string", location_set, variable_package, estimate_config) LocationChoiceModel.__init__(self, location_set=location_set, sampler=sampler, utilities=utilities, probabilities=probabilities, choices=choices, filter=filter, submodel_string=submodel_string, location_id_string=location_id_string, run_config=run_config, estimate_config=estimate_config, debuglevel=debuglevel, dataset_pool=dataset_pool, **kwargs)
def run(self, data=None, coefficients=None, resources=None): local_resources = Resources() if resources: local_resources.merge(resources) last_result = self.compute_utilities(data=data, coefficients=coefficients, resources=local_resources) this_result = self.compute_probabilities(resources=local_resources) if this_result <> None: last_result = this_result this_result = self.compute_choices(resources=local_resources) if this_result <> None: last_result = this_result return last_result
def _compute_vacancy_variables(self, location_set, dev_model_configs, resources): compute_resources = Resources(resources) compute_resources.merge({"debug": self.debug}) self.units_variable = {} self.variable_for_vacancy = {} for project_type in dev_model_configs: self.units_variable[project_type] = dev_model_configs[project_type]["units"] self.variable_for_vacancy[project_type] = compute_resources.get( "%s_vacant_variable" % project_type, "urbansim.%s.vacant_%s" % (location_set.get_dataset_name(), self.units_variable[project_type]), ) location_set.compute_variables([self.variable_for_vacancy[project_type]], resources=compute_resources)
def preprocess_projects(self, agent_set, agents_index=None, data_objects=None): """Split projects that don't find enough choices to smaller ones (of average size). """ resources=Resources(data_objects) resources.merge({"debug":self.debug}) self.choice_set.compute_variables([self.developable_maximum_unit_full_name, self.developable_minimum_unit_full_name], resources=resources) max_capacity = self.choice_set.get_attribute(self.developable_maximum_unit_short_name) min_capacity = self.choice_set.get_attribute(self.developable_minimum_unit_short_name) self.set_choice_set_size() nchoices = self.get_choice_set_size() project_average_size = agent_set.get_attribute(agent_set.get_attribute_name()).mean() add_projects = 0 remove_projects = 0 if agents_index == None: agents_index=arange(agent_set.size()) # order agents by size ordered_indices = argsort(-1*agent_set.get_attribute_by_index(agent_set.get_attribute_name(), agents_index)) improvement_values=[] projects_ids = agent_set.get_id_attribute()[agents_index].tolist() # how many projects fit in each developable location project_sizes = agent_set.get_attribute_by_index(agent_set.get_attribute_name(), agents_index) for iagent in ordered_indices: project_size = project_sizes[iagent] capacity = logical_and(project_size > min_capacity, (max_capacity / project_size) > 0) if where(capacity)[0].size < nchoices: # not enough choices found nsplitted = int(project_size/project_average_size) add_projects += nsplitted remove_projects+=1 projects_ids.remove(agent_set.get_id_attribute()[agents_index[iagent]]) improvement_values = improvement_values + \ nsplitted*[agent_set.get_attribute_by_index("improvement_value", agents_index[iagent])] else: break # we can break here, since the projects are sorted by size if remove_projects > 0: agent_set.remove_elements(agents_index[ordered_indices[0:remove_projects]]) agents_index = agent_set.get_id_index(projects_ids) if add_projects > 0: max_id = agent_set.get_attribute(agent_set.get_id_name()[0]).max() ids = arange(max_id+1,max_id+1+add_projects) agent_set.add_elements(data={"project_id":ids, self.location_set.get_id_name()[0]:zeros((add_projects,)), "improvement_value":array(improvement_values), agent_set.get_attribute_name(): project_average_size*ones((add_projects,))}, require_all_attributes=False) agents_index = agent_set.get_id_index(projects_ids + ids.tolist())
def test_my_inputs(self): storage = StorageFactory().get_storage('dict_storage') building_types_table_name = 'building_types' storage.write_table(table_name=building_types_table_name, table_data={ 'building_type_id': array([1, 2]), 'name': array(['residential', 'commercial']), 'units': array(['residential_units', 'commercial_sqft']) }) buildings_table_name = 'buildings' storage.write_table( table_name=buildings_table_name, table_data={ 'building_id': arange(7) + 1, 'building_type_id': array([1, 2, 1, 2, 1, 1, 2]), 'sqft': array([100, 350, 1000, 0, 430, 95, 750]), 'residential_units': array([300, 0, 100, 0, 1300, 600, 10]) }, ) building_types = BuildingTypeDataset( in_storage=storage, in_table_name=building_types_table_name) buildings = BuildingDataset(in_storage=storage, in_table_name=buildings_table_name, resources=Resources({ 'building_categories': { 'residential': array([200, 500, 1200]), 'commercial': array([200, 500]) } })) variable_names = map( lambda type: '%s_%s' % (self.variable_name_prefix, type), ['commercial', 'residential']) buildings.compute_variables(variable_names, resources=Resources( {'building_type': building_types})) should_be_residential = array([2, 0, 1, 0, 4, 3, 0]) should_be_commercial = array([0, 2, 0, 1, 0, 0, 3]) values_commercial = buildings.get_attribute(variable_names[0]) values_residential = buildings.get_attribute(variable_names[1]) self.assert_(ma.allequal(values_commercial, should_be_commercial), 'Error in ' + variable_names[0]) self.assert_(ma.allequal(values_residential, should_be_residential), 'Error in ' + variable_names[1])
def __init__(self, resources=None, dataset1=None, dataset2=None, index1 = None, index2 = None, debuglevel=0): debug = DebugPrinter(debuglevel) debug.print_debug("Creating object %s.%s" % (self.__class__.__module__, self.__class__.__name__), 2) local_resources = Resources(resources) local_resources.merge_if_not_None({"dataset1":dataset1, "dataset2":dataset2, "debug":debug, "index1":index1, "index2":index2}) CoreInteractionDataset.__init__(self, resources = local_resources)
def load(self, resources=None, in_storage=None, in_table_name=None, variables=[]): local_resources = Resources(resources) local_resources.merge_with_defaults({ "field_submodel_id": self.field_submodel_id, "field_equation_id": self.field_equation_id, "field_coefficient_name": self.field_coefficient_name, "field_variable_name": self.field_variable_name, "field_fixed_value": self.field_fixed_value }) if in_storage <> None: self.in_storage = in_storage if not isinstance(self.in_storage, Storage): logger.log_warning( "in_storage is not of type Storage. No EquationSpecification loaded." ) else: data = self.in_storage.load_table(table_name=in_table_name) equations = array([-1]) if local_resources["field_equation_id"] in data: equations = data[local_resources["field_equation_id"]] vars = data[local_resources["field_variable_name"]] self.variables = tuple(map(lambda x: VariableName(x), vars)) self.coefficients = data[local_resources["field_coefficient_name"]] if local_resources["field_submodel_id"] in data: submodels = data[local_resources["field_submodel_id"]] else: submodels = array([-2] * self.coefficients.size, dtype="int32") self.submodels = submodels if equations.max() >= 0: self.equations = equations if local_resources["field_fixed_value"] in data: self.fixed_values = data[local_resources["field_fixed_value"]] for field in data: if field not in [ local_resources["field_submodel_id"], local_resources["field_equation_id"], local_resources["field_variable_name"], local_resources["field_coefficient_name"], local_resources["field_fixed_value"] ]: self.other_fields[field] = data[field] self.set_other_dim_field_names() if variables: self.shrink(variables)
def run(self, data=None, coefficients=None, resources=None): local_resources = Resources() if resources: local_resources.merge(resources) last_result = self.compute_utilities(data, coefficients, local_resources) this_result = self.compute_probabilities(local_resources) if this_result <> None: last_result = this_result this_result = self.compute_choices(local_resources) if this_result <> None: last_result = this_result return last_result
def run(self, specification, coefficients, agent_set, agents_index=None, chunk_specification=None, data_objects=None, run_config=None, debuglevel=0): """ Run a simulation and return a numpy array of length agents_index, giving agent choices (ids of locations). 'specification' is of type EquationSpecification, 'coefficients' is of type Coefficients, 'agent_set' is of type Dataset, 'agent_index' are indices of individuals in the agent_set for which the model runs. If it is None, the whole agent_set is considered. 'chunk_specification' determines number of chunks in which the simulation is processed. Default is to use 300 rows per chunk. 'data_objects' is a dictionary where each key is the name of an data object ('zone', ...) and its value is an object of class Dataset. 'run_config' is of type Resources, it gives additional arguments for the run. 'debuglevel' overwrites the constructor 'debuglevel'. """ if run_config == None: run_config = Resources() self.run_config = run_config.merge_with_defaults(self.run_config) if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) self.dataset_pool.add_datasets_if_not_included({agent_set.get_dataset_name():agent_set}) ## what is the use of compute location_id string in run? it gets new values anyway #if self.location_id_string is not None: # location_id = agent_set.compute_variables(self.location_id_string, dataset_pool=self.dataset_pool) ## done in choice_model #location_id_name = self.choice_set.get_id_name()[0] #if (location_id_name not in agent_set.get_known_attribute_names()): # agent_set.add_attribute(name=location_id_name, data=resize(array([-1]), agent_set.size())) if self.run_config.get("agent_units_string", None): # used when agents take different amount of capacity from the total capacity agent_set.compute_variables([self.run_config["agent_units_string"]], dataset_pool=self.dataset_pool) self.compute_capacity_flag = self.run_config.get("compute_capacity_flag", False) capacity_string = None self.capacity = None if self.compute_capacity_flag: capacity_string = self.run_config.get("capacity_string", None) if capacity_string is None: raise KeyError, \ "Entry 'capacity_string' has to be specified in 'run_config' if 'compute_capacity_flag' is True" ## if weights is None, use capacity for weights if self.run_config.get("weights_for_simulation_string", None) is None and capacity_string is not None: self.run_config.merge({"weights_for_simulation_string" : capacity_string}) return ChoiceModel.run(self,specification, coefficients, agent_set, agents_index=agents_index, chunk_specification=chunk_specification, run_config=self.run_config, debuglevel=debuglevel)
def __init__(self, **kargs): # Estimator.__init__(self, settings=None, run_land_price_model_before_estimation=False, **kargs) # <-- old __init__ # Estimator.__init__(self, config=None, save_estimation_results=True) # <-- new __init__ doesn't work, but not needed parent_dir_path = package().get_package_parent_path() package_path = OpusPackage().get_path_for_package("biocomplexity") self.storage = StorageFactory().get_storage( 'tab_storage', storage_location=os.path.join(package_path, 'data')) ## 1. directory path of full (4 county spatial extent) dataset flt_directory = os.path.join(parent_dir_path, "biocomplexity", "data", "LCCM_4County") ## 2. select (uncomment) from one the following choices of directory pathes of subsetted sample input data/variables # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "LCCM_small_test_set_opus") flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_estimation_all") # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_estimation_all_orig") # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_suburban_orig") # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban") # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban_orig") ## note - must rename lct-forusewith91sample.Float32 to lct.lf4 if doing 1991-1995 ## note - must rename lct-forusewith95sample.Float32 to lct.lf4 if doing 1995-1999 ## 3. select (uncomment) from one the following choices of land cover data (input data) date pairs (years) # years = [1991, 1995] years = [1995, 1999] # years = [1999, 2002] self.lc1 = LandCoverDataset(in_storage=StorageFactory().get_storage( "flt_storage", storage_location=os.path.join(flt_directory_est, str(years[0]))), resources=Resources({"lowercase": 1})) self.lc2 = LandCoverDataset(in_storage=StorageFactory().get_storage( "flt_storage", storage_location=os.path.join(flt_directory_est, str(years[1]))), resources=Resources({"lowercase": 1})) self.lc1_all = LandCoverDataset( in_storage=StorageFactory().get_storage( "flt_storage", storage_location=os.path.join(flt_directory, str(years[0]))), resources=Resources({"lowercase": 1})) self.lc1_all.flush_dataset() self.lc2_all = LandCoverDataset( in_storage=StorageFactory().get_storage( "flt_storage", storage_location=os.path.join(flt_directory, str(years[1]))), resources=Resources({"lowercase": 1})) self.lc2_all.flush_dataset()
def __init__(self, resources=None, dataset1=None, dataset2=None, index1=None, index2=None, dataset_name=None, debug=None): """ Argument 'resources' is of type Resources. It is merged with arguments. It should contain: dataset1 - agent class dataset2 - class of the choice dataset Optional: index1 - 1D array, indices of dataset1 index2 - If 2D array: row i contains indices of individuals of dataset2 that belong to i-th individual of dataset1[index1]. If 1D array: indices of individuals of dataset2 for all individuals of dataset1[index1]. dataset_name - subdirectory in which implementation of the interaction variables is placed (default "") dataset1.resources and dataset2.resources should contain key 'dataset_name' (see Dataset.get_dataset_name()). """ self.resources = Resources(resources) self.resources.merge_if_not_None({ "dataset1": dataset1, "dataset2": dataset2, "index1": index1, "index2": index2, "dataset_name": dataset_name, "debug": debug }) self.attribute_boxes = {} self.attribute_names = [] self.debug = self.resources.get("debug", 0) if not isinstance(self.debug, DebugPrinter): self.debug = DebugPrinter(self.debug) self.resources.check_obligatory_keys(["dataset1", "dataset2"]) self.dataset1 = self.resources["dataset1"] self.dataset2 = self.resources["dataset2"] self.index1 = self.resources.get("index1", None) self.index2 = self.resources.get("index2", None) self.dataset_name = self.resources.get("dataset_name", None) if self.dataset_name == None: self.dataset_name = self.dataset1.get_dataset_name( ) + '_x_' + self.dataset2.get_dataset_name() self._primary_attribute_names = [] self.index1_mapping = {} if self.index1 <> None: self.index1_mapping = do_id_mapping_dict_from_array(self.index1) self._id_names = None # for compatibility with Dataset self.variable_factory = VariableFactory() self._aliases = {} # for compatibility with Dataset
def run(self, data=None, coefficients=None, resources=None): local_resources = Resources() if resources: local_resources.merge(resources) last_result = self.compute_utilities(data, coefficients, local_resources) # self.debug.print_debug("utilities: %s" % last_result, 3) # added 7 jul 09 this_result = self.compute_probabilities(local_resources) # self.debug.print_debug("probabilities: %s" % this_result, 3) # added 7 jul 09 if this_result <> None: last_result = this_result this_result = self.compute_choices(local_resources) # determines choices based on probabilities # self.debug.print_debug("choices: %s" % this_result, 3) # added 7 jul 09 if this_result <> None: last_result = this_result return last_result
def run(self, specification, coefficients, agent_set, agents_index=None, agents_filter=None, chunk_specification=None, data_objects=None, run_config=None, debuglevel=0, maximum_runs=10): if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) if agents_index is None: if agents_filter is not None: agent_set.compute_variables(agents_filter, dataset_pool=self.dataset_pool) agents_index = where(agent_set.get_attribute(VariableName(agents_filter).get_alias()))[0] else: agents_index = arange(agent_set.size()) if not isinstance(agents_index, ndarray): try: agents_index = array(agents_index) except: raise TypeError, "Argument agents_index is of wrong type (numpy array or list allowed.)" if agents_index.size <= 0: logger.log_status("Nothing to be done.") return array([], dtype='int32') if run_config == None: run_config = Resources() self.run_config = run_config.merge_with_defaults(self.run_config) self.number_of_units_string = self.run_config.get("number_of_units_string", None) self.number_of_agents_string = self.run_config.get( "number_of_agents_string", "%s.number_of_agents(%s)" % (self.choice_set.get_dataset_name(), agent_set.get_dataset_name())) if self.number_of_units_string is None: maximum_runs = 1 unplaced = arange(agents_index.size) id_name = self.choice_set.get_id_name()[0] for run in range(maximum_runs): unplaced_size_before_model = unplaced.size choices = LocationChoiceModel.run(self, specification, coefficients, agent_set, agents_index[unplaced], chunk_specification, debuglevel=debuglevel) if run == 0: all_choices=choices else: all_choices[unplaced]=choices unplaced = self.get_movers_from_overfilled_locations(agent_set, agents_index, config=run_config) if (unplaced.size <= 0) or (unplaced_size_before_model == unplaced.size) or (unplaced.size == (unplaced_size_before_model - self.observations_mapping['mapped_index'].size)): break agent_set.set_values_of_one_attribute(id_name, -1, agents_index[unplaced]) return all_choices
def setUp(self): run_configuration = TestCacheConfiguration() SimulationState(new_instance=True) SessionConfiguration(run_configuration, new_instance=True, package_order=['urbansim', 'opus_core'], in_storage=AttributeCache()) self.base_year = run_configuration['base_year'] self.temp_dir = tempfile.mkdtemp(prefix='opus_tmp') # Use the test cache. opus_core_path = package().get_opus_core_path() test_cache_path = os.path.join(opus_core_path, 'data', 'test_cache') new_cache_path = os.path.join(self.temp_dir, 'cache') copytree(test_cache_path, new_cache_path) # Make sure the copied files are writable. for (dirpath, dirnames, filenames) in os.walk(new_cache_path): for file_name in filenames: full_path = os.path.join(dirpath, file_name) os.chmod(full_path, S_IWRITE | S_IREAD) SimulationState().set_cache_directory(new_cache_path) SimulationState().set_current_time(self.base_year) self.config = Resources(run_configuration) cache_directory = SimulationState().get_cache_directory() self.assertEqual(self.temp_dir, os.path.split(cache_directory)[0])
def test_read_resources_from_string(self): data = {"arg1":1, "arg2":"2", "dict1":{"three":3,"four":4}} resources = Resources(data) write_resources_to_file(self.file_name, resources) resources_string = read_file_content(self.file_name) loaded_resources = get_resources_from_string(resources_string) self.assertEquals(resources, loaded_resources)
def apply_filter(self, filter, agent_set, agents_index, submodel=-2): """ apply filter comparing to mean project size by submodel instead of 0, by shifting self.filter """ project_size_filter = None if (filter is not None): if isinstance(filter, dict): submodel_filter = filter[submodel] else: submodel_filter = filter mean_project_size = agent_set.get_attribute( agent_set.get_attribute_name())[agents_index].mean() if isinstance(submodel_filter, str): resources = Resources({"debug": self.debug}) self.choice_set.compute_variables( [submodel_filter], dataset_pool=self.dataset_pool, resources=resources) filter_name = VariableName(submodel_filter) project_size_filter = self.choice_set.get_attribute( filter_name.get_alias()) - mean_project_size else: project_size_filter = submodel_filter - mean_project_size return LocationChoiceModel.apply_filter(self, project_size_filter, agent_set=agent_set, agents_index=agents_index, submodel=submodel)
def _compute_variable_for_prior_year(self, dataset, full_name, time, resources=None): """Create a new dataset for this variable, compute the variable, and then return the values for this variable.""" calling_dataset_pool = SessionConfiguration().get_dataset_pool() calling_time = SimulationState().get_current_time() SimulationState().set_current_time(time) try: # Get an empty dataset pool with same search paths. my_dataset_pool = DatasetPool( package_order=calling_dataset_pool.get_package_order(), storage=AttributeCache()) ds = dataset.empty_dataset_like_me(in_storage=AttributeCache()) # Don't pass any datasets via resources, since they may be from a different time. my_resources = Resources(resources) for key in my_resources: if isinstance(key, Dataset): del my_resources[key] ds.compute_variables(full_name, my_dataset_pool, resources=my_resources) values = ds.get_attribute(full_name) return values finally: SimulationState().set_current_time(calling_time)
def _compute_vacancy_and_total_units_variables(self, location_set, project_types, resources=None): compute_resources = Resources(resources) compute_resources.merge({"debug":self.debug}) self.variable_for_vacancy = {} self.variable_for_total_units = {} for ptype in project_types: self.variable_for_vacancy[ptype] = compute_resources.get( "%s_vacant_variable" % ptype, "urbansim_zone.%s.vacant_%s" % (location_set.get_dataset_name(), self.project_specific_units[ptype])) self.variable_for_total_units[ptype] = compute_resources.get( "%s_total_units_variable" % ptype, "%s.aggregate(urbansim_zone.building.total_%s)" % (location_set.get_dataset_name(), self.project_specific_units[ptype])) location_set.compute_variables([self.variable_for_vacancy[ptype], self.variable_for_total_units[ptype]], dataset_pool=self.dataset_pool, resources = compute_resources)
def prepare_for_running_macro(parser): from opus_core.file_utilities import get_resources_from_file parser.add_option("-r", "--resources", dest="resources_file_name", action="store", type="string", help="Name of file containing resources") parser.add_option("-y", "--year", dest="year", action="store", type="int", help="Year in which to 'run' the travel model") parser.add_option( "-o", "--output-file", dest="output_file", action="store", type="string", default=None, help= "Output log file. If not given, it is written into urbansim cache directory." ) (options, args) = parser.parse_args() r = get_resources_from_file(options.resources_file_name) resources = Resources(get_resources_from_file(options.resources_file_name)) SessionConfiguration( new_instance=True, package_order=resources['dataset_pool_configuration'].package_order, in_storage=AttributeCache()) return (resources, options)
def prepare_for_simulation(self, run_configuration, cache_directory=None): self.config = Resources(run_configuration) self.simulation_state = SimulationState( new_instance=True, base_cache_dir=cache_directory, start_time=self.config.get("base_year", 0) ) ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. if self.config["cache_directory"] is None: self.config["cache_directory"] = self.simulation_state.get_cache_directory() SessionConfiguration( new_instance=True, package_order=self.config["dataset_pool_configuration"].package_order, in_storage=AttributeCache(), ) ForkProcess().fork_new_process( self.config["creating_baseyear_cache_configuration"].cache_scenario_database, self.config ) # Create output database (normally done by run manager) if "estimation_database_configuration" in self.config: db_server = DatabaseServer(self.config["estimation_database_configuration"]) if not db_server.has_database(self.config["estimation_database_configuration"].database_name): db_server.create_database(self.config["estimation_database_configuration"].database_name)
def prepare_for_run(self, specification_storage=None, specification_table=None, coefficients_storage=None, coefficients_table=None, agent_set=None, agents_filter=None, data_objects=None, **kwargs): spec, coeff = prepare_specification_and_coefficients( specification_storage=specification_storage, specification_table=specification_table, coefficients_storage=coefficients_storage, coefficients_table=coefficients_table, **kwargs) if agents_filter is not None: agent_set.compute_variables(agents_filter, resources=Resources(data_objects)) index = where( agent_set.get_attribute( VariableName(agents_filter).get_alias()) > 0)[0] return (spec, coeff, index)
def run_model(): households = HouseholdDataset(in_storage=storage, in_table_name='households') hlcm = RegionalHouseholdLocationChoiceModel( location_set=gridcells, compute_capacity_flag=False, choices="opus_core.random_choices_from_index", sample_size_locations=4) hlcm.run(specification, coefficients, agent_set=households, debuglevel=1) # get results gridcells.compute_variables( ["urbansim.gridcell.number_of_households"], resources=Resources({"household": households})) result_area1 = gridcells.get_attribute_by_id( "number_of_households", arange(ngcs_attr) + 1) result_area2 = gridcells.get_attribute_by_id( "number_of_households", arange(ngcs_attr + 1, ngcs + 1)) gridcells.delete_one_attribute("number_of_households") result = concatenate((result_area1, result_area2)) return result
def test_my_inputs(self): storage = StorageFactory().get_storage('dict_storage') job_building_types_table_name = 'job_building_types' storage.write_table(table_name=job_building_types_table_name, table_data={ 'id': array([1, 2, 3, 4]), 'home_based': array([1, 0, 1, 0]) }) jobs_table_name = 'jobs' storage.write_table(table_name=jobs_table_name, table_data={ 'job_id': arange(10) + 1, 'building_type': array([3, 3, 2, 2, 4, 2, 1, 3, 4, 1]) }) job_building_types = JobBuildingTypeDataset( in_storage=storage, in_table_name=job_building_types_table_name) jobs = JobDataset(in_storage=storage, in_table_name=jobs_table_name) jobs.compute_variables(self.variable_name, resources=Resources( {'job_building_type': job_building_types})) values = jobs.get_attribute(self.variable_name) should_be = array([0, 0, 1, 1, 1, 1, 0, 0, 1, 0]) self.assert_(ma.allequal(values, should_be), 'Error in ' + self.variable_name)
def run_model_2(): storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='households', table_data=household_data) households = HouseholdDataset(in_storage=storage, in_table_name='households') storage.write_table(table_name='gridcells', table_data=gridcell_data) gridcells = GridcellDataset(in_storage=storage, in_table_name='gridcells') hlcm = HouseholdLocationChoiceModelCreator().get_model( location_set=gridcells, compute_capacity_flag=False, choices="opus_core.random_choices_from_index", sample_size_locations=8) hlcm.run(specification, coefficients, agent_set=households, debuglevel=1) # get results gridcells.compute_variables( ["urbansim.gridcell.number_of_households"], resources=Resources({"household": households})) result_more_attractive = gridcells.get_attribute_by_id( "number_of_households", arange(ngcs_attr) + 1) result_less_attractive = gridcells.get_attribute_by_id( "number_of_households", arange(ngcs_attr + 1, ngcs + 1)) return array( [result_more_attractive.sum(), result_less_attractive.sum()])
def prepare_for_simulation(self, run_configuration, cache_directory=None): self.config = Resources(run_configuration) self.simulation_state = SimulationState(new_instance=True, base_cache_dir=cache_directory) ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. if self.config['cache_directory'] is None: self.config[ 'cache_directory'] = self.simulation_state.get_cache_directory( ) SessionConfiguration( new_instance=True, package_order=self.config['dataset_pool_configuration']. package_order, in_storage=AttributeCache()) ForkProcess().fork_new_process( self.config['creating_baseyear_cache_configuration']. cache_scenario_database, self.config) # Create output database (normally done by run manager) if 'estimation_database_configuration' in self.config: db_server = DatabaseServer( self.config['estimation_database_configuration']) if not db_server.has_database( self.config['estimation_database_configuration']. database_name): db_server.create_database( self.config['estimation_database_configuration']. database_name)
def _compute_vacancy_variables(self, location_set, dev_model_configs, resources): compute_resources = Resources(resources) compute_resources.merge({"debug": self.debug}) self.units_variable = {} self.variable_for_vacancy = {} for project_type in dev_model_configs: self.units_variable[project_type] = dev_model_configs[ project_type]['units'] self.variable_for_vacancy[project_type] = compute_resources.get( "%s_vacant_variable" % project_type, "urbansim.%s.vacant_%s" % (location_set.get_dataset_name(), self.units_variable[project_type])) location_set.compute_variables( [self.variable_for_vacancy[project_type]], resources=compute_resources)
def test_my_inputs(self): storage = StorageFactory().get_storage('dict_storage') building_types_table_name = 'building_types' storage.write_table( table_name=building_types_table_name, table_data={ 'building_type_id':array([0,2]), 'name': array(['foo', 'commercial']) } ) buildings_table_name = 'buildings' storage.write_table( table_name=buildings_table_name, table_data={ 'building_id':array([1,2,3]), 'building_type_id': array([2,0,2]) } ) building_types = BuildingTypeDataset(in_storage=storage, in_table_name=building_types_table_name) buildings = BuildingDataset(in_storage=storage, in_table_name=buildings_table_name) buildings.compute_variables(self.variable_name, resources=Resources({'building_type':building_types})) values = buildings.get_attribute(self.variable_name) should_be = array([1,0,1]) self.assert_(ma.allequal(values, should_be), 'Error in ' + self.variable_name)
def predict(self, predicted_choice_id_name, agents_index=None): """ Run prediction. Currently makes sense only for choice models.""" # Create temporary configuration where all words 'estimate' are replaced by 'run' tmp_config = Resources(self.config) if self.agents_index_for_prediction is None: self.agents_index_for_prediction = self.get_agent_set_index().copy() if agents_index is None: agents_index = self.agents_index_for_prediction tmp_config['models_configuration'][self.model_name]['controller']['run']['arguments']['coefficients'] = "coeff_est" tmp_config['models_configuration'][self.model_name]['controller']['run']['arguments']['agents_index'] = "agents_index" tmp_config['models_configuration'][self.model_name]['controller']['run']['arguments']['chunk_specification'] = "{'nchunks':1}" ### save specification and coefficients to cache (no matter the save_estimation_results flag) ### so that the prepare_for_run method could load specification and coefficients from there #output_configuration = self.config['output_configuration'] #del self.config['output_configuration'] #self.save_results() #self.config['output_configuration'] = output_configuration #self.model_system.run_year_namespace["coefficients"] = self.coefficients #del tmp_config['models_configuration'][self.model_name]['controller']['prepare_for_run'] try: run_year_namespace = copy.copy(self.model_system.run_year_namespace) except: logger.log_error("The estimate() method must be run first") return False try: agents = self.get_agent_set() choice_id_name = self.get_choice_set().get_id_name()[0] # save current locations of agents current_choices = agents.get_attribute(choice_id_name).copy() dummy_data = zeros(current_choices.size, dtype=current_choices.dtype)-1 #agents.modify_attribute(name=choice_id_name, data=dummy_data) #reset choices for all agents agents.modify_attribute(name=choice_id_name, data=dummy_data, index=agents_index) #reset choices for agents in agents_index run_year_namespace["process"] = "run" run_year_namespace["coeff_est"] = self.coefficients run_year_namespace["agents_index"] = agents_index run_year_namespace["processmodel_config"] = tmp_config['models_configuration'][self.model_name]['controller']['run'] new_choices = self.model_system.do_process(run_year_namespace) #self.model_system.run(tmp_config, write_datasets_to_cache_at_end_of_year=False) #new_choices = agents.get_attribute(choice_id_name).copy() agents.modify_attribute(name=choice_id_name, data=current_choices) dummy_data[agents_index] = new_choices if predicted_choice_id_name not in agents.get_known_attribute_names(): agents.add_primary_attribute(name=predicted_choice_id_name, data=dummy_data) else: agents.modify_attribute(name=predicted_choice_id_name, data=dummy_data) logger.log_status("Predictions saved into attribute " + predicted_choice_id_name) return True except Exception, e: logger.log_error("Error encountered in prediction: %s" % e) logger.log_stack_trace()
def test_number_of_agents_expression(self): expr = "mygridcell.number_of_agents(myjob)+10" storage = StorageFactory().get_storage('dict_storage') gridcell_grid_id = array([1, 2, 3]) job_grid_id = array( [2, 1, 3, 1] ) #specify an array of 4 jobs, 1st job's grid_id = 2 (it's in gridcell 2), etc. storage.write_table(table_name='gridcells', table_data={'gid': gridcell_grid_id}) storage.write_table(table_name='jobs', table_data={ 'jid': arange(4) + 1, 'gid': job_grid_id }) gs = Dataset(in_storage=storage, in_table_name='gridcells', id_name="gid", dataset_name="mygridcell") jobs = Dataset(in_storage=storage, in_table_name='jobs', id_name="jid", dataset_name="myjob") values = gs.compute_variables([expr], resources=Resources({ "myjob": jobs, "mygridcell": gs })) should_be = array([12, 11, 11]) self.assert_(ma.allclose(values, should_be, rtol=1e-7), msg="Error in " + expr)
def estimate(self, specification, dataset, outcome_attribute="unit_price", index=None, procedure="opus_core.estimate_linear_regression", data_objects=None, estimate_config=None, debuglevel=0): if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) if self.filter_attribute <> None: res = Resources({"debug": debuglevel}) index = dataset.get_filtered_index(self.filter_attribute, threshold=0, index=index, dataset_pool=self.dataset_pool, resources=res) return RegressionModelWithAdditionInitialResiduals.estimate( self, specification, dataset, outcome_attribute, index, procedure, estimate_config=estimate_config, debuglevel=debuglevel)
def create_from_data(self, resources=None, id_name=None, in_storage=None, dataset_name=None, out_storage=None, in_table_name=None, out_table_name=None): self.resources = Resources(resources) self.resources.merge_if_not_None({ "id_name":id_name, "dataset_name":dataset_name, "in_storage":in_storage, "out_storage":out_storage, "in_table_name":in_table_name, "out_table_name":out_table_name}) self.resources.merge_with_defaults({"dataset_name":"dataset"}) self.dataset_name = self.resources.get("dataset_name", None) self.attribute_cache = AttributeCache() self._aliases = {} self._id_names = self.resources.get("id_name", []) if not isinstance(self._id_names, list): self._id_names = [self._id_names] self.variable_factory = VariableFactory() self.debug = self.resources.get("debug", 0) self.df = pd.DataFrame(self.resources.get('in_storage').load_table(self.resources.get('in_table_name'))) self._primary_attribute_names = self.get_attribute_names() self.df.set_index(self._id_names, inplace=True) self.attribute_boxes = {} for attr in self._primary_attribute_names: self.attribute_boxes[attr] = AttributeBox(self, [], variable_name=self.create_and_check_qualified_variable_name(attr), type=AttributeType.PRIMARY, is_in_memory=True, header=None, version=0) self.n = self.df.shape[0]
def run_model(): hlcm = HouseholdLocationChoiceModelCreator().get_model( location_set=gridcells, compute_capacity_flag=False, choices="opus_core.random_choices_from_index", sample_size_locations=8) hlcm.run(specification, coefficients, agent_set=households, debuglevel=1) # get results gridcells.compute_variables( ["urbansim.gridcell.number_of_households"], resources=Resources({"household": households})) result_more_attractive = gridcells.get_attribute_by_id( "number_of_households", arange(ngcs_attr) + 1) result_less_attractive = gridcells.get_attribute_by_id( "number_of_households", arange(ngcs_attr + 1, ngcs + 1)) households.set_values_of_one_attribute(attribute="grid_id", values=hh_grid_ids) gridcells.delete_one_attribute("number_of_households") result = concatenate( (result_more_attractive, result_less_attractive)) return result
def run_chunk(self, index, dataset, specification, coefficients): self.specified_coefficients = SpecifiedCoefficients().create( coefficients, specification, neqs=1) compute_resources = Resources({"debug": self.debug}) submodels = self.specified_coefficients.get_submodels() self.get_status_for_gui().update_pieces_using_submodels( submodels=submodels, leave_pieces=2) self.map_agents_to_submodels(submodels, self.submodel_string, dataset, index, dataset_pool=self.dataset_pool, resources=compute_resources) variables = self.specified_coefficients.get_full_variable_names_without_constants( ) self.debug.print_debug("Compute variables ...", 4) self.increment_current_status_piece() dataset.compute_variables(variables, dataset_pool=self.dataset_pool, resources=compute_resources) data = {} coef = {} outcome = self.initial_values[index].copy() for submodel in submodels: coef[submodel] = SpecifiedCoefficientsFor1Submodel( self.specified_coefficients, submodel) self.coefficient_names[submodel] = coef[ submodel].get_coefficient_names_without_constant()[0, :] self.debug.print_debug( "Compute regression for submodel " + str(submodel), 4) self.increment_current_status_piece() self.data[submodel] = dataset.create_regression_data( coef[submodel], index=index[self.observations_mapping[submodel]]) nan_index = where(isnan(self.data[submodel]))[1] inf_index = where(isinf(self.data[submodel]))[1] vnames = asarray(coef[submodel].get_variable_names()) if nan_index.size > 0: nan_var_index = unique(nan_index) self.data[submodel] = nan_to_num(self.data[submodel]) logger.log_warning( "NaN(Not A Number) is returned from variable %s; it is replaced with %s." % (vnames[nan_var_index], nan_to_num(nan))) #raise ValueError, "NaN(Not A Number) is returned from variable %s; check the model specification table and/or attribute values used in the computation for the variable." % vnames[nan_var_index] if inf_index.size > 0: inf_var_index = unique(inf_index) self.data[submodel] = nan_to_num(self.data[submodel]) logger.log_warning( "Inf is returned from variable %s; it is replaced with %s." % (vnames[inf_var_index], nan_to_num(inf))) #raise ValueError, "Inf is returned from variable %s; check the model specification table and/or attribute values used in the computation for the variable." % vnames[inf_var_index] if (self.data[submodel].shape[0] > 0) and (self.data[submodel].size > 0): # observations for this submodel available outcome[self.observations_mapping[submodel]] = \ self.regression.run(self.data[submodel], coef[submodel].get_coefficient_values()[0,:], resources=self.run_config).astype(outcome.dtype) return outcome
def test_number_of_agents(self): expr = "mygridcell.number_of_agents(myjob)" storage = StorageFactory().get_storage('dict_storage') gridcell_grid_id = array([1, 2, 3]) job_grid_id = array([2, 1, 3, 1]) #specify an array of 4 jobs, 1st job's grid_id = 2 (it's in gridcell 2), etc. storage.write_table(table_name='gridcells', table_data={'gid':gridcell_grid_id}) storage.write_table(table_name='jobs', table_data={'jid':arange(4)+1, 'gid':job_grid_id}) gs = Dataset(in_storage=storage, in_table_name='gridcells', id_name="gid", dataset_name="mygridcell") jobs = Dataset(in_storage=storage, in_table_name='jobs', id_name="jid", dataset_name="myjob") values = gs.compute_variables([expr], resources=Resources({"myjob":jobs, "mygridcell":gs})) should_be = array([2, 1, 1]) self.assert_(ma.allclose(values, should_be, rtol=1e-7), msg = "Error in " + expr) # change gids of jobs (to test if computing dependencies is working) jobs.modify_attribute(name="gid", data=array([1,1,1,1])) values2 = gs.compute_variables([expr], resources=Resources({"myjob":jobs, "mygridcell":gs})) should_be2 = array([4, 0, 0]) self.assert_(ma.allclose(values2, should_be2, rtol=1e-7), msg = "Error in " + expr)
def _search_for_dataset_helper(self, dataset_name, package_order, use_hidden_id, **kwargs): # this part of the search_for_dataset code is factored into a helper method, rather than passing in # use_hidden_id as a keyword parameter with a default value of False, so that we don't pass this # keyword parameter along to the get_dataset method for package_name in package_order: try: dataset = self.get_dataset(dataset_name, package=package_name, **kwargs) if dataset is not None: break except ImportError: continue else: from opus_core.datasets.dataset import Dataset from opus_core.resources import Resources resources = Resources(kwargs.get('arguments', {})) if use_hidden_id: id_name_default = [] else: id_name_default = "%s_id" % dataset_name (table_name, module_name, class_name ) = self._table_module_class_names_for_dataset(dataset_name) ## set table_name and id_name_default as default values in resources (arguments) resources.merge_with_defaults({ 'dataset_name': dataset_name, 'in_table_name': table_name, 'out_table_name': table_name, 'id_name': id_name_default }) try: dataset = Dataset(resources=resources) except: # try to create a dataset using deprecated values (table_name, module_name, class_name ) = self._table_module_class_names_for_dataset_deprecated( dataset_name) resources = Resources(kwargs.get('arguments', {})) resources.merge_with_defaults({ 'dataset_name': dataset_name, 'in_table_name': table_name, 'out_table_name': table_name, 'id_name': id_name_default }) try: dataset = Dataset(resources=resources) except: logger.log_warning( "Could not create a generic Dataset '%s'." % dataset_name) raise #TODO: uncomment this warning when we change to singular #logger.log_warning("Dataset %s was created using deprecated table name - using plural will not be supported in the future." % dataset_name) return dataset
def __init__(self, config=None, save_estimation_results=False): if 'cache_directory' not in config or config['cache_directory'] is None: raise KeyError("The cache directory must be specified in the " "given configuration, giving the filesystem path to the cache " "directory containing the data with which to estimate. Please " "check that your configuration contains the 'cache_directory' " "entry and that it is not None.") self.simulation_state = SimulationState(new_instance=True) self.simulation_state.set_cache_directory(config['cache_directory']) SessionConfiguration(new_instance=True, package_order=config['dataset_pool_configuration'].package_order, in_storage=AttributeCache()) self.config = Resources(config) self.save_estimation_results = save_estimation_results self.debuglevel = self.config.get("debuglevel", 4) self.model_system = ModelSystem() self.agents_index_for_prediction = None models = self.config.get('models',[]) self.model_name = None if "model_name" in config.keys(): self.model_name = config["model_name"] else: for model in models: if isinstance(model, dict): model_name = model.keys()[0] if (model[model_name] == "estimate") or (isinstance(model[model_name], list) and ("estimate" in model[model_name])): self.model_name = model_name break estimate_config_changes = self.config.get('config_changes_for_estimation', {}).get('estimate_config', {}) if len(estimate_config_changes) > 0: change = Resources({'models_configuration': {self.model_name: {'controller': {'init': {'arguments': {}}}}}}) estimate_config_str = self.config['models_configuration'].get(self.model_name, {}).get('controller', {}).get('init', {}).get('arguments', {}).get('estimate_config', '{}') estimate_config = Resources({}) try: estimate_config = eval(estimate_config_str) except: pass estimate_config.merge(estimate_config_changes) self.config.merge(change) self.config['models_configuration'][self.model_name]['controller']['init']['arguments']['estimate_config'] = 'Resources(%s)' % estimate_config
def run(self, specification, coefficients, dataset, index=None, chunk_specification=None, data_objects=None, run_config=None, initial_values=None, procedure=None, debuglevel=0): """'specification' is of type EquationSpecification, 'coefficients' is of type Coefficients, 'dataset' is of type Dataset, 'index' are indices of individuals in dataset for which the model runs. If it is None, the whole dataset is considered. 'chunk_specification' determines number of chunks in which the simulation is processed. 'data_objects' is a dictionary where each key is the name of an data object ('zone', ...) and its value is an object of class Dataset. 'run_config' is of type Resources, it gives additional arguments for the run. If 'procedure' is given, it overwrites the regression_procedure of the constructor. 'initial_values' is an array of the initial values of the results. It will be overwritten by the results for those elements that are handled by the model (defined by submodels in the specification). By default the results are initialized with 0. 'debuglevel' overwrites the constructor 'debuglevel'. """ self.debug.flag = debuglevel if run_config == None: run_config = Resources() if not isinstance(run_config,Resources) and isinstance(run_config, dict): run_config = Resources(run_config) self.run_config = run_config.merge_with_defaults(self.run_config) self.run_config.merge({"debug":self.debug}) if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) self.dataset_name = dataset.get_dataset_name() self.dataset_pool.replace_dataset(self.dataset_name, dataset) if procedure is not None: self.regression = RegressionModelFactory().get_model(name=procedure) if initial_values is None: self.initial_values = zeros((dataset.size(),), dtype=float32) else: self.initial_values = zeros((dataset.size(),), dtype=initial_values.dtype) self.initial_values[index] = initial_values if dataset.size()<=0: # no data loaded yet dataset.get_id_attribute() if index == None: index = arange(dataset.size()) result = ChunkModel.run(self, chunk_specification, dataset, index, float32, specification=specification, coefficients=coefficients) return result
def write(self, resources=None, out_storage=None, out_table_name=None): """ """ # TODO: insert docstring local_resources = Resources(resources) local_resources.merge_with_defaults({ "field_submodel_id":self.field_submodel_id, "field_equation_id":self.field_equation_id, "field_coefficient_name":self.field_coefficient_name, "field_variable_name":self.field_variable_name, "field_fixed_value":self.field_fixed_value, "out_table_name":out_table_name}) if out_storage <> None: self.out_storage = out_storage if not isinstance(self.out_storage, Storage): logger.log_warning("out_storage has to be of type Storage. No EquationSpecifications written.") return submodel_ids = self.get_submodels() if submodel_ids.size == 0: submodel_ids = resize(array([-2], dtype="int32"), len(self.get_coefficient_names())) #set sub_model_id = -2 when there is no or 1 submodels equation_ids = self.get_equations() if equation_ids.size == 0: equation_ids = resize(array([-2], dtype="int32"), submodel_ids.size) values = {local_resources["field_submodel_id"]: submodel_ids, local_resources["field_equation_id"]: equation_ids, local_resources["field_coefficient_name"]: self.get_coefficient_names(), local_resources["field_variable_name"]: self.get_long_variable_names()} if self.fixed_values.size > 0: values[local_resources["field_fixed_value"]] = self.fixed_values for field in self.other_fields.keys(): values[field] = self.other_fields[field] types = {local_resources["field_submodel_id"]: 'integer', local_resources["field_equation_id"]: 'integer', local_resources["field_coefficient_name"]: 'text', local_resources["field_variable_name"]: 'text'} local_resources.merge({"values":values, 'valuetypes': types, "drop_table_flag":1}) self.out_storage.write_table(table_name = local_resources['out_table_name'], table_data=local_resources['values'] )
class RunSimulationFromMysql: def prepare_for_simulation(self, run_configuration, cache_directory=None): self.config = Resources(run_configuration) self.simulation_state = SimulationState( new_instance=True, base_cache_dir=cache_directory, start_time=self.config.get("base_year", 0) ) ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. if self.config["cache_directory"] is None: self.config["cache_directory"] = self.simulation_state.get_cache_directory() SessionConfiguration( new_instance=True, package_order=self.config["dataset_pool_configuration"].package_order, in_storage=AttributeCache(), ) ForkProcess().fork_new_process( self.config["creating_baseyear_cache_configuration"].cache_scenario_database, self.config ) # Create output database (normally done by run manager) if "estimation_database_configuration" in self.config: db_server = DatabaseServer(self.config["estimation_database_configuration"]) if not db_server.has_database(self.config["estimation_database_configuration"].database_name): db_server.create_database(self.config["estimation_database_configuration"].database_name) def run_simulation(self, simulation_instance=None): logger.start_block("Simulation on database %s" % self.config["scenario_database_configuration"].database_name) try: if simulation_instance is None: simulation_instance = ModelSystem() simulation_instance.run(self.config) # simulation_instance.run_multiprocess(self.config, is_run_subset=True) finally: logger.end_block() logger.log_status("Data cache in %s" % self.simulation_state.get_cache_directory()) def cleanup(self, remove_cache, remove_output_database): """Remove all outputs of this simulation.""" self.simulation_state.remove_singleton(delete_cache=remove_cache) # Remove SessionConfiguration singleton, if it exists Singleton().remove_singleton_for_class(SessionConfiguration) cache_dir = self.config["cache_directory"] if os.path.exists(cache_dir): rmtree(cache_dir) if remove_output_database and ("estimation_database_configuration" in self.config): db_server = DatabaseServer(self.config["estimation_database_configuration"]) db_server.drop_database(self.config["estimation_database_configuration"].database_name) def prepare_and_run(self, run_configuration, simulation_instance=None, remove_cache=True): self.prepare_for_simulation(run_configuration) self.run_simulation(simulation_instance) self.cleanup(remove_cache)
def write(self, resources=None, out_storage=None, out_table_name=None): """ """ # TODO: insert docstring local_resources = Resources(resources) local_resources.merge_with_defaults({ "field_submodel_id":self.field_submodel_id, "field_coefficient_name":self.field_coefficient_name, "field_estimate":self.field_estimate, "field_standard_error":self.field_standard_error, "other_fields":self.other_fields, "out_table_name":out_table_name}) if out_storage <> None: self.out_storage = out_storage if not isinstance(self.out_storage, Storage): logger.log_warning("out_storage has to be of type Storage. No coefficients written.") return submodels = self.get_submodels() if submodels.size <= 0 : submodels = resize(array([-2], dtype=int32), self.size()) values = {local_resources["field_submodel_id"]: submodels, local_resources["field_coefficient_name"]: self.get_names(), local_resources["field_estimate"]: self.get_values(), local_resources["field_standard_error"]: self.get_standard_errors()} for measure in self.other_measures.keys(): values[measure] = self.other_measures[measure] types = {local_resources["field_submodel_id"]: 'integer', local_resources["field_coefficient_name"]: 'text', local_resources["field_estimate"]: 'double', local_resources["field_standard_error"]: 'double'} attrtypes = {local_resources["field_submodel_id"]: AttributeType.PRIMARY, local_resources["field_coefficient_name"]: AttributeType.PRIMARY, local_resources["field_estimate"]: AttributeType.PRIMARY, local_resources["field_standard_error"]: AttributeType.PRIMARY} for measure in self.other_measures.keys(): types[measure]= 'double' attrtypes[measure] = AttributeType.PRIMARY local_resources.merge({"values":values, 'valuetypes': types, "drop_table_flag":1, "attrtype":attrtypes}) self.out_storage.write_table(table_name=local_resources['out_table_name'], table_data = local_resources['values'])
def run_multiprocess(self, resources): resources = Resources(resources) profiler_name = resources.get("profile_filename", None) if resources['cache_directory'] is not None: cache_directory = resources['cache_directory'] else: cache_directory = SimulationState().get_cache_directory() ### TODO: Get rid of this! There is absolutely no good reason to be ### changing the Configuration! resources['cache_directory'] = cache_directory log_file = os.path.join(cache_directory, 'run_multiprocess.log') logger.enable_file_logging(log_file) start_year = resources["years"][0] end_year = resources["years"][-1] nyears = end_year - start_year + 1 root_seed = resources.get("seed", NO_SEED) if resources.get('_seed_dictionary_', None) is not None: # This is added by the RunManager to ensure reproducibility including restarted runs seed_dict = resources.get('_seed_dictionary_') seed_array = array(map(lambda year : seed_dict[year], range(start_year, end_year+1))) else: seed(root_seed) seed_array = randint(1,2**30, nyears) logger.log_status("Running simulation for years %d thru %d" % (start_year, end_year)) logger.log_status("Simulation root seed: %s" % root_seed) for iyear, year in enumerate(range(start_year, end_year+1)): success = self._run_each_year_as_separate_process(iyear, year, seed=seed_array[iyear], resources=resources, profiler_name=profiler_name, log_file=log_file) if not success: break self._notify_stopped() if profiler_name is not None: # insert original value resources["profile_filename"] = profiler_name logger.log_status("Done running simulation for years %d thru %d" % (start_year, end_year))
def plot_map(self, name, gridcell=None, **opt_args): if gridcell is None: gridcell = Resources()["gridcell"] gridcell.compute_variables("urbansim.gridcell.fazdistrict_id") name = VariableName(name).get_alias() if name in self.get_known_attribute_names(): # attribute of fazes new_name = name+'_of_fazdistrict' gridcell.join(self, name=name, new_name=new_name) elif name in gridcell.get_known_attribute_names(): # attribute of gridcells new_name = name else: raise StandardError, "Attribute " + name + " not known." gridcell.plot_map(new_name, **opt_args)
def run(self, data, coefficients, resources=None): """ Like linear_utilities, but in addition it runs linear utilities for modified data and stores utilities when each variable is set to its 5%, 95% quantiles, keeping the other variables at their median. Last row in the resulting file is the difference in utilities between these two. The file name can be passed in resources - entry 'utilities_diagnose_file'. """ if data.ndim < 3: raise StandardError, "Argument 'data' must be a 3D numpy array." if not isinstance(resources, Resources): resources= Resources(resources) nobs, neqs, nvar = data.shape medians = zeros(nvar, dtype=float32) quant = zeros((2,nvar), dtype=float32) data_with_medians = array(data[0,:,:]) for ivar in range(nvar): # compute medain and quantiles for each variable medians[ivar], quant[0,ivar], quant[1,ivar] = quantile(data[:,:,ivar].ravel(), array([0.5, 0.05, 0.95])) data_with_medians[:,ivar] = medians[ivar] file_name = resources.get("utilities_diagnose_file", "util") if resources.get("submodel", None) is not None: file_name = "%s_submodel_%s" % (file_name, resources.get("submodel", 1)) diagnose_utilities = zeros((3, nvar), dtype=float32) argcor = () for ivar in range(nvar): # iterate over variables for iquant in [0,1]: # 0 for 5% quantile, 1 for 95% quantile mod_data = array(data_with_medians).reshape(1,neqs, nvar) # copy original data mod_data[0,:,ivar] = quant[iquant, ivar] utility = linear_utilities.run(self, mod_data, coefficients, resources) diagnose_utilities[iquant, ivar] = utility[0,0] argcor = argcor + (data[:,:,ivar].ravel(),) diagnose_utilities[2,:] = diagnose_utilities[1,:] - diagnose_utilities[0,:] coef_names = resources.get("coefficient_names", map(lambda x: 'x%s' % x, arange(nvar)+1)) #write_to_text_file(file_name, coef_names, delimiter=' ') #write_table_to_text_file( file_name, diagnose_utilities, mode='ab') logger.log_status("Diagnosed utilities written into %s." % file_name) return linear_utilities.run(self, data, coefficients, resources)
def get_resources_for_dataset(self, dataset_name, in_storage, out_storage, resources={}, in_table_name_pair=(None,None), out_table_name_pair=(None,None), attributes_pair=(None,None), id_name_pair=(None,None), nchunks_pair=(None,None), debug_pair=(None,None) ): """Create an object of class Resources to be used in a Dataset object. The created resources are merged with the resources given as an argument 'resources'. The first element of each tuple of the remaining arguments contains the desired value, the second element contains the default value which is used if the first element is None. Entries in resources of the same name as the argument values are overwritten if the one of the tuple values is not equal None. """ # merge resources with arguments local_resources = Resources(resources) local_resources.merge_if_not_None({ "in_storage":in_storage, "out_storage":out_storage, "nchunks":nchunks_pair[0], "attributes":attributes_pair[0], "in_table_name": in_table_name_pair[0], "out_table_name": out_table_name_pair[0], "id_name":id_name_pair[0], "debug":debug_pair[0], "dataset_name":dataset_name}) # merge resources with default values local_resources.merge_with_defaults({ "nchunks":nchunks_pair[1], "attributes":attributes_pair[1], "in_table_name":in_table_name_pair[1], "out_table_name":out_table_name_pair[1], "id_name":id_name_pair[1], "debug":debug_pair[1], "dataset_name":dataset_name}) return local_resources
def load(self, resources=None, in_storage=None, in_table_name=None, variables = []): local_resources = Resources(resources) local_resources.merge_with_defaults({ "field_submodel_id":self.field_submodel_id, "field_equation_id":self.field_equation_id, "field_coefficient_name":self.field_coefficient_name, "field_variable_name":self.field_variable_name, "field_fixed_value":self.field_fixed_value}) if in_storage <> None: self.in_storage = in_storage if not isinstance(self.in_storage, Storage): logger.log_warning("in_storage is not of type Storage. No EquationSpecification loaded.") else: data = self.in_storage.load_table(table_name=in_table_name) equations=array([-1]) if local_resources["field_equation_id"] in data: equations = data[local_resources["field_equation_id"]] vars=data[local_resources["field_variable_name"]] self.variables=tuple(map(lambda x: VariableName(x), vars)) self.coefficients=data[local_resources["field_coefficient_name"]] if local_resources["field_submodel_id"] in data: submodels = data[local_resources["field_submodel_id"]] else: submodels = array([-2]*self.coefficients.size, dtype="int32") self.submodels=submodels if equations.max() >= 0: self.equations=equations if local_resources["field_fixed_value"] in data: self.fixed_values = data[local_resources["field_fixed_value"]] for field in data: if field not in [local_resources["field_submodel_id"], local_resources["field_equation_id"], local_resources["field_variable_name"], local_resources["field_coefficient_name"], local_resources["field_fixed_value"]]: self.other_fields[field] = data[field] self.set_other_dim_field_names() if variables: self.shrink(variables)
def run(self, config = None, ### TODO: Get rid of this parameter! unroll_gridcells = None, ### TODO: Get rid of this parameter! cache_directory = None, base_year = None, creating_baseyear_cache_configuration = None, debuglevel = None, ): """ Copy large baseyear datasets from MySQL into cache. """ config = Resources(config) if unroll_gridcells is None: unroll_gridcells = config['creating_baseyear_cache_configuration'].unroll_gridcells if cache_directory is None: cache_directory = config['cache_directory'] if base_year is None: base_year = config['base_year'] if creating_baseyear_cache_configuration is None: creating_baseyear_cache_configuration = copy.deepcopy(config['creating_baseyear_cache_configuration']) if debuglevel is None: debuglevel = config.get('debuglevel', 3) CoreCacheScenarioDatabase().run(config) self.prepare_data_before_baseyear( cache_directory, base_year, creating_baseyear_cache_configuration )
def skip_test_estimation_one_var(self): """ Test a regression estimation for a model with one independent variable """ # First, use scipy to get reference values to compare the results of our # R-based regression to. #print "using scipy to calculate reference regression..." # Example regression from: http://www2.warwick.ac.uk/fac/sci/moac/currentstudents/peter_cock/python/lin_reg/ from scipy import stats x = [5.05, 6.75, 3.21, 2.66] y = [1.65, 26.5, -5.93, 7.96] gradient, intercept, r_value, p_value, std_err = stats.linregress(x,y) r_squared = r_value**2 #print "Gradient and intercept", gradient, intercept ##Gradient and intercept 5.3935773612 -16.2811279931 #print "R-squared", r_squared ##R-squared 0.524806275136 #print "p-value", p_value ##p-value 0.275564857882 # Next, setup the call to estimate_linear_regression_r.run(...) # Need to call run method on estimate_linear_regression_r, whose prototype is: # def run(self, data, regression=None, resources=None): # regresion is not used by the run method # things I need to store in resources: # constant_position = resources.get("constant_position", array([], dtype='int32')) #position for intercept # coef_names = resources.get("coefficient_names", nvar*[]) # outcome = resources["outcome"].astype("float64") # Create resources coeff = array(['EX']) resources = Resources() # No constant resources.add("constant_position", array([], dtype='int32')) resources.add("coefficient_names", coeff) resources.add("outcome", array(y)) #data = array([x, y]) data = resize(array([x]), (len(x), 1)) # run RPy-based regression estimateR = estimate_linear_regression_r() result = estimateR.run(data, resources=resources) #print "results from RPy-base estimation: " + str(result) # Finally, compare the scipy-based regression to the R-based regression # Compare estimate of the independent self.assertEqual(round(intercept, 4), round(result['estimators'][0], 4)) # Compare the R-Squared self.assertEqual(round(r_squared, 6), round(result['other_info']['R-Squared'], 6))
def _search_for_dataset_helper(self, dataset_name, package_order, use_hidden_id, **kwargs): # this part of the search_for_dataset code is factored into a helper method, rather than passing in # use_hidden_id as a keyword parameter with a default value of False, so that we don't pass this # keyword parameter along to the get_dataset method for package_name in package_order: try: dataset = self.get_dataset(dataset_name, package=package_name, **kwargs) if dataset is not None: break except ImportError: continue else: from opus_core.datasets.dataset import Dataset from opus_core.resources import Resources resources = Resources(kwargs.get("arguments", {})) if use_hidden_id: id_name_default = [] else: id_name_default = "%s_id" % dataset_name (table_name, module_name, class_name) = self._table_module_class_names_for_dataset(dataset_name) ## set table_name and id_name_default as default values in resources (arguments) resources.merge_with_defaults( { "dataset_name": dataset_name, "in_table_name": table_name, "out_table_name": table_name, "id_name": id_name_default, } ) try: dataset = Dataset(resources=resources) except: # try to create a dataset using deprecated values (table_name, module_name, class_name) = self._table_module_class_names_for_dataset_deprecated( dataset_name ) resources = Resources(kwargs.get("arguments", {})) resources.merge_with_defaults( { "dataset_name": dataset_name, "in_table_name": table_name, "out_table_name": table_name, "id_name": id_name_default, } ) try: dataset = Dataset(resources=resources) except: logger.log_warning("Could not create a generic Dataset '%s'." % dataset_name) raise # TODO: uncomment this warning when we change to singular # logger.log_warning("Dataset %s was created using deprecated table name - using plural will not be supported in the future." % dataset_name) return dataset
def openev_plot(self, name, gridcell=None, **opt_args): if gridcell is None: gridcell = Resources()["gridcell"] gridcell.compute_variables("urbansim.gridcell.city_id") # if prototype_dataset is None and self.default_prototype_dataset is not None: # prototype_dataset = self.default_prototype_dataset # if template_project is None and self.default_template_project is not None: # template_project = self.default_template_project # if legend_file is None and self.default_legend_file is not None: # legend_file = self.default_legend_file if name in self.get_known_attribute_names(): # attribute of fazes new_name = name+'_of_city' gridcell.join(self, name=name, new_name=new_name) elif name in gridcell.get_known_attribute_names(): # attribute of gridcells new_name = name else: raise StandardError, "Attribute " + name + " not known." gridcell.openev_plot(new_name, **opt_args)
class RunSimulation(object): def prepare_for_simulation(self, config, cache_directory=None): self.config = Resources(config) base_cache_dir = self.config['creating_baseyear_cache_configuration'].cache_directory_root self.simulation_state = SimulationState(new_instance=True, base_cache_dir=base_cache_dir, start_time=self.config.get('base_year', 0)) ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. if self.config['cache_directory'] is None: self.config['cache_directory'] = self.simulation_state.get_cache_directory() SessionConfiguration(new_instance=True, package_order=self.config['dataset_pool_configuration'].package_order, in_storage=AttributeCache()) if config['creating_baseyear_cache_configuration'].cache_from_database: ForkProcess().fork_new_process(self.config['creating_baseyear_cache_configuration'].cache_scenario_database, self.config) else: CacheFltData().run(self.config) def run_simulation(self, simulation_instance=None): if simulation_instance is None: simulation_instance = ModelSystem() simulation_instance.run(self.config) #simulation_instance.run_multiprocess(self.config, is_run_subset=True) logger.log_status("Data cache in %s" % self.simulation_state.get_cache_directory()) def cleanup(self, remove_cache=True): """Remove all outputs of this simulation.""" self.simulation_state.remove_singleton(delete_cache=remove_cache) SessionConfiguration().remove_singleton() if remove_cache: cache_dir = self.config['cache_directory'] if os.path.exists(cache_dir): rmtree(cache_dir) def prepare_and_run(self, run_configuration, simulation_instance=None, remove_cache=True): self.prepare_for_simulation(run_configuration) self.run_simulation(simulation_instance) self.cleanup(remove_cache)
def prepare_for_simulation(self, config, cache_directory=None): self.config = Resources(config) base_cache_dir = self.config['creating_baseyear_cache_configuration'].cache_directory_root self.simulation_state = SimulationState(new_instance=True, base_cache_dir=base_cache_dir, start_time=self.config.get('base_year', 0)) ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. if self.config['cache_directory'] is None: self.config['cache_directory'] = self.simulation_state.get_cache_directory() SessionConfiguration(new_instance=True, package_order=self.config['dataset_pool_configuration'].package_order, in_storage=AttributeCache()) if config['creating_baseyear_cache_configuration'].cache_from_database: ForkProcess().fork_new_process(self.config['creating_baseyear_cache_configuration'].cache_scenario_database, self.config) else: CacheFltData().run(self.config)
def __init__(self, resources=None, dataset1=None, dataset2=None, index1=None, index2=None, dataset_name=None, debug=None): """ Argument 'resources' is of type Resources. It is merged with arguments. It should contain: dataset1 - agent class dataset2 - class of the choice dataset Optional: index1 - 1D array, indices of dataset1 index2 - If 2D array: row i contains indices of individuals of dataset2 that belong to i-th individual of dataset1[index1]. If 1D array: indices of individuals of dataset2 for all individuals of dataset1[index1]. dataset_name - subdirectory in which implementation of the interaction variables is placed (default "") dataset1.resources and dataset2.resources should contain key 'dataset_name' (see Dataset.get_dataset_name()). """ self.resources = Resources(resources) self.resources.merge_if_not_None({ "dataset1":dataset1, "dataset2":dataset2, "index1":index1, "index2":index2, "dataset_name":dataset_name, "debug":debug}) self.attribute_boxes = {} self.attribute_names = [] self.debug = self.resources.get("debug", 0) if not isinstance(self.debug, DebugPrinter): self.debug = DebugPrinter(self.debug) self.resources.check_obligatory_keys(["dataset1", "dataset2"]) self.dataset1 = self.resources["dataset1"] self.dataset2 = self.resources["dataset2"] self.index1 = self.resources.get("index1", None) self.index2 = self.resources.get("index2", None) self.dataset_name = self.resources.get("dataset_name", None) if self.dataset_name == None: self.dataset_name = self.dataset1.get_dataset_name() + '_x_' + self.dataset2.get_dataset_name() self._primary_attribute_names=[] self.index1_mapping = {} if self.index1 <> None: self.index1_mapping = do_id_mapping_dict_from_array(self.index1) self._id_names = None # for compatibility with Dataset self.variable_factory = VariableFactory() self._aliases = {} # for compatibility with Dataset