def __init__(self, choice_set, nested_structure=None, stratum=None, **kwargs): """'nested_structure' is a dictionary with keys being the nest identifiers and each value being a list of identifiers of the elemental alternatives belonging to that nest. 'stratum' is either a string giving the name of variable/expression determining the membership of choice's elements to nests. Or, it is an array of the size as choice set giving directly the membership of choice's elements to nests. Either 'nested_structure' or 'stratum' must be given. All arguments of the Choice Model can be used. """ ChoiceModel.__init__(self, choice_set, **kwargs) self.create_nested_and_tree_structure(nested_structure, stratum, **kwargs) self.set_model_interaction(**kwargs)
def get_sampling_weights(self, config, **kwargs): ## there are cases where filter and weights are mutual dependent (e.g. DPLCM) ## pass the filter through self.filter_index to apply_filter, ## which is either boolean array of the same size as self.choice_set or ## index of self.choice_set self.filter_index = None return ChoiceModel.get_sampling_weights(self, config, **kwargs)
def prepare_for_estimate(self, agents_for_estimation_storage, agents_for_estimation_table, agent_set, **kwargs): estimation_set = Dataset(in_storage = agents_for_estimation_storage, in_table_name=agents_for_estimation_table, id_name=agent_set.get_id_name(), dataset_name=agent_set.get_dataset_name()) hhs_est = HouseholdDataset(in_storage=agents_for_estimation_storage, in_table_name='households_for_estimation') self.dataset_pool.replace_dataset('household', hhs_est) self.dataset_pool.replace_dataset(estimation_set.get_dataset_name(), estimation_set) spec, index = ChoiceModel.prepare_for_estimate(self, estimation_set, **kwargs) return (spec, index, estimation_set)
def prepare_for_estimate_hh(self, estimation_storage, agents_for_estimation_table, agent_set, persons_for_estimation_table=None, **kwargs): estimation_set = Dataset(in_storage = estimation_storage, in_table_name=agents_for_estimation_table, id_name=agent_set.get_id_name(), dataset_name=agent_set.get_dataset_name()) if persons_for_estimation_table is not None: pers = PersonDataset(in_storage=estimation_storage, in_table_name=persons_for_estimation_table) self.dataset_pool.replace_dataset('person', pers) self.dataset_pool.replace_dataset(estimation_set.get_dataset_name(), estimation_set) spec, index = ChoiceModel.prepare_for_estimate(self, estimation_set, **kwargs) return (spec, index, estimation_set)
def __init__(self, location_set, filter=None, location_id_string=None, dataset_pool=None, *args, **kwargs): """ Arguments: location_set - Dataset of locations to be chosen from. sampler - name of sampling module to be used for sampling locations. If it is None, no sampling is performed and all locations are considered for choice. utilities - name of utilities module probabilities - name of probabilities module choices - name of module for computing agent choices filter - filter is applied on location weights for sampling (by multiplication). It is either a string specifying an attribute name of the filter, or a 1D/2D array giving the filter directly, or a dictionary specifying filter for each submodel. If it is None, no filter is applied. submodel_string - character string specifying what agent attribute determines submodels. location_id_string - character string giving the fully qualified name of the agent attribute that specifies the location. Only needed when the attribute is a variable. Use it without the "as" clausel, since the code adds an alias which is the id name of the location set. run_config - collection of additional arguments that control a simulation run. It is of class Resources. estimate_config - collection of additional arguments that control an estimation run. It is of class Resources. debuglevel - debuglevel for the constructor. The level is overwritten by the argument in the run and estimate method. An instance of upc_sequence class with components utilities, probabilities and choices is created. Also an instance of Sampler class for given sampler procedure is created. """ self.dataset_pool = self.create_dataset_pool(dataset_pool, ["urbansim", "opus_core"]) self.location_id_string = location_id_string if self.location_id_string is not None: self.location_id_string = VariableName(self.location_id_string) # self.location_id_string.set_alias(location_set.get_id_name()[0]) ChoiceModel.__init__( self, choice_set=location_set, dataset_pool=self.dataset_pool, choice_attribute_name=location_id_string, *args, **kwargs ) self.filter = filter
def estimate( self, specification, agent_set, agents_index=None, procedure=None, data_objects=None, estimate_config=None, debuglevel=0, ): """ Computes capacity if required and calls the estimate method of ChoiceModel. See ChoiceModel.estimate for details on arguments. """ if agents_index is None: agents_index = arange(agent_set.size()) if agents_index.size <= 0: logger.log_status("Nothing to be done.") return (None, None) logger.log_note( "Using dataset pool: %s" % self.dataset_pool.get_package_order() if self.dataset_pool is not None else self.dataset_pool ) if estimate_config == None: estimate_config = Resources() self.estimate_config = estimate_config.merge_with_defaults(self.estimate_config) if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) if self.location_id_string is not None: agent_set.compute_variables(self.location_id_string, dataset_pool=self.dataset_pool) self.capacity = None if self.estimate_config.get("compute_capacity_flag", False): capacity_string_for_estimation = self.estimate_config.get("capacity_string", None) self.capacity = self.determine_capacity( capacity_string=capacity_string_for_estimation, agent_set=agent_set, agents_index=agents_index ) self.estimate_config.merge({"capacity": self.capacity}) return ChoiceModel.estimate( self, specification, agent_set, agents_index, procedure, estimate_config=self.estimate_config, debuglevel=debuglevel, )
def prepare_for_run(self, agent_set=None, agent_filter=None, agents_index=None, convert_index_to_person=False, filter_threshold=0, **kwargs): """Combine agent_filter and agents_index. If convert_index_to_person is True, it means agents_index is an index of households whereas agent_set is a person dataset. Thus, a conversion need to be done.""" spec, coef, index = ChoiceModel.prepare_for_run(self, agent_set=agent_set, agent_filter=agent_filter, filter_threshold=filter_threshold, **kwargs) if agents_index is not None: if convert_index_to_person: hhs = self.dataset_pool.get_dataset('household') agents_index = where(ismember(agent_set['%s' % hhs.get_id_name()[0]], hhs.get_id_attribute()[agents_index])) tmp1 = zeros(agent_set.size(), dtype='bool8') tmp1[agents_index] = True if index is not None: tmp2 = zeros(agent_set.size(), dtype='bool8') tmp2[index] = True tmp1 = logical_and(tmp1, tmp2) index = where(tmp1)[0] return (spec, coef, index)
def run(self, specification, coefficients, agent_set, agents_index=None, sync_persons=False, **kwargs): """Set sync_persons to True if the model is run on households level and the persons table should be synchronized. """ results = ChoiceModel.run(self, specification, coefficients, agent_set, agents_index=agents_index, **kwargs) if sync_persons: persons = self.dataset_pool.get_dataset('person') choice_id_name = self.choice_set.get_id_name()[0] values = persons.compute_variables( ['_tmp_ = person.disaggregate(%s.%s)' % (agent_set.get_dataset_name(), choice_id_name)], dataset_pool=self.dataset_pool) if agents_index==None: agents_index=arange(agent_set.size()) pers_idx = where(ismember(persons['%s' % agent_set.get_id_name()[0]], agent_set.get_id_attribute()[agents_index])) if choice_id_name not in persons.get_known_attribute_names(): persons.add_primary_attribute(data=zeros(persons.size(), dtype=values.dtype), name=choice_id_name) persons.modify_attribute(data=values, name=choice_id_name, index=pers_idx) persons.delete_one_attribute('_tmp_') agent_set.modify_attribute(data=results, name=self.choice_attribute_name.get_alias(), index=agents_index) return results
def run_chunk(self, agents_index, agent_set, specification, coefficients): # unplaced agents in agents_index location_id_name = self.choice_set.get_id_name()[0] agent_set.set_values_of_one_attribute(location_id_name, resize(array([-1]), agents_index.size), agents_index) ## capacity may need to be re-computed for every chunk if self.compute_capacity_flag: self.capacity = ma.filled( self.determine_capacity( capacity_string=self.run_config.get("capacity_string", None), agent_set=agent_set, agents_index=agents_index, ), 0.0, ) if self.capacity is not None: logger.log_status("Available capacity: %s units." % self.capacity.sum()) if self.capacity.sum() <= 0 and self.run_config.get("accept_unavailability_of_choices", False): return array(agents_index.size * [-1], dtype="int32") self.run_config.merge({"capacity": self.capacity}) if self.run_config.get("agent_units_string", None): self.run_config["agent_units_all"] = agent_set.get_attribute_by_index( self.run_config["agent_units_string"], agents_index ) choices = ChoiceModel.run_chunk(self, agents_index, agent_set, specification, coefficients) ## this is done in choice_model # modify locations # agent_set.set_values_of_one_attribute(location_id_name, choices, agents_index) if self.run_config.has_key("capacity"): del self.run_config["capacity"] return choices
def run_chunk(self, agents_index, agent_set, specification, coefficients): self.add_logsum_to_specification(specification, coefficients) self.init_membership_in_nests() return ChoiceModel.run_chunk(self, agents_index, agent_set, specification, coefficients)
def run( self, specification, coefficients, agent_set, agents_index=None, chunk_specification=None, data_objects=None, run_config=None, debuglevel=0, ): """ Run a simulation and return a numpy array of length agents_index, giving agent choices (ids of locations). 'specification' is of type EquationSpecification, 'coefficients' is of type Coefficients, 'agent_set' is of type Dataset, 'agent_index' are indices of individuals in the agent_set for which the model runs. If it is None, the whole agent_set is considered. 'chunk_specification' determines number of chunks in which the simulation is processed. Default is to use 300 rows per chunk. 'data_objects' is a dictionary where each key is the name of an data object ('zone', ...) and its value is an object of class Dataset. 'run_config' is of type Resources, it gives additional arguments for the run. 'debuglevel' overwrites the constructor 'debuglevel'. """ if run_config == None: run_config = Resources() self.run_config = run_config.merge_with_defaults(self.run_config) if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) ## what is the use of compute location_id string in run? it gets new values anyway # if self.location_id_string is not None: # location_id = agent_set.compute_variables(self.location_id_string, dataset_pool=self.dataset_pool) ## done in choice_model # location_id_name = self.choice_set.get_id_name()[0] # if (location_id_name not in agent_set.get_known_attribute_names()): # agent_set.add_attribute(name=location_id_name, data=resize(array([-1]), agent_set.size())) if self.run_config.get( "agent_units_string", None ): # used when agents take different amount of capacity from the total capacity agent_set.compute_variables([self.run_config["agent_units_string"]], dataset_pool=self.dataset_pool) self.compute_capacity_flag = self.run_config.get("compute_capacity_flag", False) capacity_string = None self.capacity = None if self.compute_capacity_flag: capacity_string = self.run_config.get("capacity_string", None) if capacity_string is None: raise KeyError, "Entry 'capacity_string' has to be specified in 'run_config' if 'compute_capacity_flag' is True" ## if weights is None, use capacity for weights if self.run_config.get("weights_for_simulation_string", None) is None and capacity_string is not None: self.run_config.merge({"weights_for_simulation_string": capacity_string}) return ChoiceModel.run( self, specification, coefficients, agent_set, agents_index=agents_index, chunk_specification=chunk_specification, run_config=self.run_config, debuglevel=debuglevel, )
def estimate_step(self): self.set_correct_for_sampling() self.init_membership_in_nests() result = ChoiceModel.estimate_step(self) self.add_logsum_to_coefficients(result) return result
def set_choice_set_size(self, **kwargs): if self.sampler_size is None: self.sampler_size = 0 for nest, values in self.nested_structure.iteritems(): self.sampler_size += len(values) ChoiceModel.set_choice_set_size(self, **kwargs)
def create_interaction_datasets(self, agent_set, agents_index, config, submodels=[], **kwargs): """Create interaction dataset with or without sampling of alternatives arguments to sampler_class is passed through config (run_config or estimation_config in configuration file), such as: 'include_chosen_choice', 'with_replacement', 'stratum', 'sample_size_from_each_stratum', 'sample_size_from_chosen_stratum' (for stratified sampler) """ nchoices = self.get_choice_set_size() sampling = True iterate_by_submodels = False if self.filter is not None and ( isinstance(self.filter, dict) or re.search("SUBMODEL", self.filter) is not None ): iterate_by_submodels = True ## apply (alternative) filter when alternative size equals to the size of choice set, or sampler class is None if (self.sampler_class is None) or (nchoices == self.choice_set.size()): if self.filter is None: return ChoiceModel.create_interaction_datasets(self, agent_set, agents_index, config) elif iterate_by_submodels: sampling = False else: # apply filter without doing sampling if filter is not defined by submodels filter_index = self.apply_filter(self.filter, agent_set, agents_index) self.model_interaction.create_interaction_datasets(agents_index, filter_index) self.update_choice_set_size(filter_index.size) return sampling_weights = self.get_sampling_weights(config, agent_set=agent_set, agents_index=agents_index) interaction_dataset = None # if filter is specified by submodel in a dict, call sampler submodel by submodel sampling_by_groups = False if ( iterate_by_submodels or config.get("sample_alternatives_by_submodel", False) or config.get("sample_alternatives_by_group", False) ): groups_equal_submodels = True groups = submodels sampling_by_groups = True if config.get("sample_alternatives_by_group", False): group_var = config.get("group_definition_for_sampling_alternatives", None) if group_var is None: logger.log_warning( 'No group variable defined for sampling alternatives. Set "group_definition_for_sampling_alternatives" in run_config/estimate_config.' ) if isinstance(self.filter, dict): logger.log_warning("Alternatives are sampled by submodel.") else: groups = [] sampling_by_groups = False else: group_values = agent_set.compute_variables([group_var], dataset_pool=self.dataset_pool)[ agents_index ] groups = unique(group_values) groups_equal_submodels = False index2 = -1 + zeros((agents_index.size, nchoices), dtype="int32") attributes = {} ###TODO: it may be possible to merge this loop with sample_alternatives_by_chunk or put it in a common function for group in groups: if groups_equal_submodels: where_group = self.observations_mapping[group] else: where_group = where(group_values == group)[0] if where_group.size == 0: continue agents_index_in_group = agents_index[where_group] choice_index = self.apply_filter( self.filter, agent_set=agent_set, agents_index=agents_index_in_group, submodel=group, replace_dict={"SUBMODEL": group} if groups_equal_submodels else {"GROUP": group}, ) if choice_index is not None and choice_index.size == 0: logger.log_error( "There is no alternative that passes filter %s for %s=%s; %s agents with id %s will remain unplaced." % ( self.filter, "SUBMODEL" if groups_equal_submodels else "GROUP", group, agents_index_in_group.size, agent_set.get_id_attribute()[agents_index_in_group], ) ) continue if sampling: group_sampling_weights = sampling_weights if isinstance(sampling_weights, str): group_sampling_weights = re.sub( "SUBMODEL" if groups_equal_submodels else "GROUP", str(int(group)), sampling_weights ) chunk_specification = config.get("chunk_specification_for_sampling", {"nchunks": 1}) if type(chunk_specification) == str: chunk_specification = eval(chunk_specification) chunk_specification = ChunkSpecification(chunk_specification) nchunks = chunk_specification.nchunks(agents_index_in_group) chunksize = chunk_specification.chunk_size(agents_index_in_group) interaction_dataset = self.sample_alternatives_by_chunk( agent_set, agents_index_in_group, choice_index, nchoices, weights=group_sampling_weights, config=config, nchunks=nchunks, chunksize=chunksize, ) filter_idx = interaction_dataset.index2 else: self.model_interaction.create_interaction_datasets(agents_index_in_group, choice_index) interaction_dataset = self.model_interaction.interaction_dataset filter_idx = -1 * ones((agents_index_in_group.size, nchoices), dtype="int32") filter_idx[:, interaction_dataset.index2] = interaction_dataset.index2 if ( not config.get("accept_unavailability_of_choices", False) and interaction_dataset.get_reduced_m() == 0 ): raise StandardError, "There are no locations available for the given sampling weights for group %s." % group if len(groups) > 1 or (agents_index.size > agents_index_in_group.size): if interaction_dataset.get_reduced_m() > 0: index2[where_group, :] = filter_idx for name in interaction_dataset.get_known_attribute_names(): attr_val = interaction_dataset.get_attribute(name) if not attributes.has_key(name): attributes[name] = zeros(index2.shape, dtype=attr_val.dtype) attributes[name][where_group, :] = attr_val if interaction_dataset is None: logger.log_warning( "There is no agent for groups %s. " % (groups) + "This may be due to mismatch between agent_filter and submodels included in specification." ) self.model_interaction.interaction_dataset = None return if len(groups) > 1 or ( agents_index.size > agents_index_in_group.size ): ## if there are more than 1 group, merge the data by submodel and recreate interaction_dataset if sampling: interaction_dataset = self.sampler_class.create_interaction_dataset( interaction_dataset.dataset1, interaction_dataset.dataset2, index1=agents_index, index2=index2 ) for name in attributes.keys(): interaction_dataset.add_primary_attribute(attributes[name], name) else: self.model_interaction.create_interaction_datasets(agents_index, index2) interaction_dataset = self.model_interaction.interaction_dataset self.update_choice_set_size(interaction_dataset.get_reduced_m()) if not sampling_by_groups: # no sampling by submodels/groups choice_index = self.apply_filter(self.filter, agent_set=agent_set, agents_index=agents_index) if choice_index is not None and choice_index.size == 0: message = ( "There is no alternative that passes filter %s; %s agents with id %s will remain unplaced." % (self.filter, agents_index.size, agent_set.get_id_attribute()[agents_index]) ) if not config.get("accept_unavailability_of_choices", False): raise StandardError, message logger.log_error(message) self.model_interaction.interaction_dataset = None return chunk_specification = config.get("chunk_specification_for_sampling", {"nchunks": 1}) if type(chunk_specification) == str: chunk_specification = eval(chunk_specification) chunk_specification = ChunkSpecification(chunk_specification) nchunks = chunk_specification.nchunks(agents_index) chunksize = chunk_specification.chunk_size(agents_index) interaction_dataset = self.sample_alternatives_by_chunk( agent_set, agents_index, choice_index, nchoices, weights=sampling_weights, config=config, nchunks=nchunks, chunksize=chunksize, ) if not config.get("accept_unavailability_of_choices", False) and interaction_dataset.get_reduced_m() == 0: raise StandardError, "There are no locations available for the given sampling weights." self.update_choice_set_size(interaction_dataset.get_reduced_m()) self.model_interaction.interaction_dataset = interaction_dataset
def simulate_chunk(self, *args, **kwargs): if self.run_config.get("agent_units_all", None) is not None: self.run_config["agent_units"] = self.run_config["agent_units_all"][ self.observations_mapping["mapped_index"] ] return ChoiceModel.simulate_chunk(self, *args, **kwargs)
def estimate(self, specification, *args, **kwargs): self.init_membership_in_nests() # This is because there will be __logsum_ variables in the specification when configured from the GUI, # in order to define starting values. They are not supposed to be included there. self.delete_logsum_from_specification(specification) return ChoiceModel.estimate(self, specification, *args, **kwargs)