def _assign_job_to_worker(self, worker_index, job_index): logger.log_status("Attempt to assign %s jobs to %s workers" % (job_index.size, worker_index.size)) if worker_index.size >= job_index.size: #number of at home workers is greater than the available choice (home_based jobs by default) assigned_worker_index = sample_noreplace(worker_index, job_index.size) assigned_job_index = job_index else: assigned_worker_index = worker_index assigned_job_index=sample_noreplace(job_index,worker_index.size) logger.log_status("Assigned %s jobs to %s workers" % (assigned_job_index.size, assigned_worker_index.size)) return (assigned_worker_index, assigned_job_index)
def _delete(self, agents_pool, amount, agent_dataset, location_dataset, this_refinement, dataset_pool ): """similar to subtract action, instead of unplacing agents delete remove agents from the agent dataset, those agents won't be available for later action """ fit_index = self.get_fit_agents_index(agent_dataset, this_refinement.agent_filter, '', dataset_pool) if amount > fit_index.size or amount < 0: logger.log_warning("Request to delete %i agents, but there are %i agents in total satisfying %s;" \ "delete %i agents instead" % (amount, fit_index.size, this_refinement.agent_filter, fit_index.size) ) amount = fit_index.size if amount == fit_index.size: movers_index = fit_index else: movers_index = sample_noreplace( fit_index, amount ) agents_pool = list( set(agents_pool) - set(movers_index) ) agent_dataset.remove_elements( array(movers_index) )
def sample_and_unplace_agents(self, spaces_with_negatives, dataset, unique_redevelopment_building_ids_index, index_overfilled_spaces): """ Using overfilled spaces, agents, and buildings, randomly sample and unplace agents until the spaces are no longer overfilled. - spaces_with_negatives is an array of vacant residential or job spaces that includes negative values (overfilled) - dataset is the dataset of agents overfilling the spaces specified in spaces_with_negatives """ # set up table for logging unplaced building occupants occupants_log = PrettyTable() occupants_log.set_field_names(["building_id","HH unplaced","Jobs unplaced","HB Jobs unplaced"]) number_of_overfilled_spaces = abs(spaces_with_negatives[index_overfilled_spaces].astype('int')) overfilled_spaces_building_ids = self.buildings_dataset.get_id_attribute()[unique_redevelopment_building_ids_index] overfilled_spaces_building_ids = overfilled_spaces_building_ids[index_overfilled_spaces] look_for_home_based_jobs = False dataset_name = dataset.get_dataset_name() if 'household' in dataset_name: look_for_home_based_jobs = True for building_id, number_of_agents_to_unplace in zip(overfilled_spaces_building_ids, number_of_overfilled_spaces): occupants_log_line = [] occupants_log_line.append(building_id) index_of_agents_to_sample_from = dataset.get_filtered_index('%s.building_id==%s' % (dataset_name,building_id)) sample_of_agents_to_unplace = sample_noreplace(index_of_agents_to_sample_from, number_of_agents_to_unplace) dataset.set_values_of_one_attribute('building_id', array([-1]), sample_of_agents_to_unplace) if look_for_home_based_jobs: occupants_log_line.append(number_of_agents_to_unplace) occupants_log_line.append(0) number_of_home_based_jobs_to_unplace = self.unplace_home_based_jobs(building_id) occupants_log_line.append(number_of_home_based_jobs_to_unplace) else: occupants_log_line.append(0) occupants_log_line.append(number_of_agents_to_unplace) occupants_log_line.append(0) occupants_log.add_row(occupants_log_line) return occupants_log
def _do_sector_for_businesses(self, sector, diff, business_set, is_in_sector): available_business_index = where(is_in_sector)[0] if diff < 0: # sample_array, non_placed, size_non_placed = \ get_array_without_non_placed_agents(business_set, available_business_index, -1*diff, self.location_id_name) self.remove_businesses = concatenate( (self.remove_businesses, non_placed, sample_noreplace(sample_array, max(0, abs(diff) - size_non_placed)))) if diff > 0: # self.new_businesses[self.location_id_name] = concatenate( (self.new_businesses[self.location_id_name], zeros((diff, )))) self.new_businesses["sector_id"] = concatenate( (self.new_businesses["sector_id"], sector * ones((diff, )))) sampled_business = probsample_replace(available_business_index, diff, None) self.new_businesses["sqft"] = concatenate( (self.new_businesses["sqft"], business_set.get_attribute("sqft")[sampled_business])) self.new_businesses["employment"] = concatenate( (self.new_businesses["employment"], business_set.get_attribute("employment")[sampled_business])) self.new_businesses["activity_id"] = concatenate( (self.new_businesses["activity_id"], business_set.get_attribute("activity_id")[sampled_business])) new_max_id = self.max_id + diff self.new_businesses[self.business_id_name] = concatenate( (self.new_businesses[self.business_id_name], arange(self.max_id + 1, new_max_id + 1))) self.max_id = new_max_id
def run(self, specification, coefficients, agent_set, agents_index=None, **kwargs): choices = ChoiceModel.run(self, specification, coefficients, agent_set, agents_index=agents_index, **kwargs) if agents_index is None: agents_index = arange(agent_set.size()) movers_indices = agents_index[where(choices > 0)] if self.movers_ratio is not None: n = rint(self.movers_ratio * agents_index.size) if n < movers_indices.size: movers_indices = sample_noreplace(movers_indices, n) # add unplaced agents unplaced_agents = agents_index[agent_set.get_attribute_by_index( self.location_id_name, agents_index) <= 0] logger.log_status( "%s agents selected by the logit model; %s agents without %s." % (movers_indices.size, unplaced_agents.size, self.location_id_name)) movers_indices = unique(concatenate((movers_indices, unplaced_agents))) logger.log_status("Number of movers: " + str(movers_indices.size)) return movers_indices
def _assign_job_to_worker(self, worker_index, job_index): logger.log_status("Atempt to assign %s jobs to %s workers" % (worker_index.size, job_index.size)) if worker_index.size >= job_index.size: #number of at home workers is greater than the available choice (home_based jobs by default) assigned_worker_index = sample_noreplace(worker_index, job_index.size) assigned_job_index = job_index else: assigned_worker_index = worker_index assigned_job_index = sample_noreplace(job_index, worker_index.size) logger.log_status( "Assigned %s jobs to %s workers" % (assigned_worker_index.size, assigned_job_index.size)) return (assigned_worker_index, assigned_job_index)
def map_agents_to_submodels(self, submodels, submodel_string, agent_set, agents_index, dataset_pool=None, resources=None, submodel_size_max=None): """ Creates a class attribute self.observations_mapping which is a dictionary where each entry corresponds to one submodel. It contains indices of agents (within agents_index) that belong to that submodel. Additionally, self.observations_mapping has an entry 'index' which contains agents_index, and an entry 'mapped_index' which contains only indices of agents_index that are included in any of the submodel entries of observations_mapping. Thus, missing entries of 'index' are agents that do not belong to any submodel. 'submodels' is a list of submodels to be considered. 'submodel_string' specifies the name of attribute/variable that distinguishes submodels. 'resources' are passed to the computation of variable 'submodel_string'. 'submodel_size_max' determines the maximum size of a submodel. If the actual size exceeds this number, agents are randomly sampled so that the submodel size matches this number. """ self.observations_mapping = { } # maps to which submodel each observation belongs to nsubmodels = len(submodels) if (nsubmodels > 1) or ((nsubmodels == 1) and (submodels[0] <> -2)): try: agent_set.compute_variables(submodel_string, dataset_pool=dataset_pool, resources=resources) except: pass submodel_alias = None if submodel_string is not None: submodel_alias = VariableName(submodel_string).get_alias() if (nsubmodels == 1) and ( (submodel_string is None) or (submodel_alias not in agent_set.get_known_attribute_names())): self.observations_mapping[submodels[0]] = arange( agents_index.size) else: for submodel in submodels: #mapping agents to submodels w = where( agent_set.get_attribute_by_index( submodel_alias, agents_index) == submodel)[0] if submodel_size_max is not None and submodel_size_max < w.size: # sub-sample from submodel wnew = sample_noreplace(w, submodel_size_max) logger.log_status( 'Number of records in submodel %s reduced from %s to %s.' % (submodel, w.size, wnew.size)) w = sort(wnew) self.observations_mapping[submodel] = w else: # no submodel distinction self.observations_mapping[-2] = arange(agents_index.size) mapped = zeros(agents_index.size, dtype='bool8') for submodel, index in self.observations_mapping.iteritems(): mapped[index] = True self.observations_mapping["index"] = agents_index self.observations_mapping["mapped_index"] = where(mapped)[0]
def _delete(self, agents_pool, amount, agent_dataset, location_dataset, this_refinement, dataset_pool): """similar to subtract action, instead of unplacing agents delete remove agents from the agent dataset, those agents won't be available for later action """ fit_index = self.get_fit_agents_index( agent_dataset, this_refinement.agent_expression, this_refinement.location_expression, dataset_pool) if amount > fit_index.size or amount < 0: logger.log_warning("Refinement requests to delete %i agents, but there are %i agents in total satisfying %s;" \ "delete %i agents instead" % (amount, fit_index.size, ' and '.join( [this_refinement.agent_expression, this_refinement.location_expression] ).strip(' and '), fit_index.size) ) amount = fit_index.size if amount == fit_index.size: movers_index = fit_index else: movers_index = sample_noreplace(fit_index, amount) agents_pool = list(set(agents_pool) - set(movers_index)) ## modify location capacity attribute if specified if this_refinement.location_capacity_attribute is not None and len( this_refinement.location_capacity_attribute) > 0: location_dataset = dataset_pool.get_dataset( VariableName( this_refinement.location_expression).get_dataset_name()) movers_location_id = agent_dataset.get_attribute( location_dataset.get_id_name()[0])[movers_index] movers_location_index = location_dataset.get_id_index( movers_location_id) # see previous comment about histogram function num_of_movers_by_location = histogram( movers_location_index, bins=arange(location_dataset.size() + 1))[0] num_of_agents_by_location = location_dataset.compute_variables( "number_of_agents=%s.number_of_agents(%s)" % \ (location_dataset.dataset_name, agent_dataset.dataset_name), dataset_pool=dataset_pool) shrink_factor = safe_array_divide( (num_of_agents_by_location - num_of_movers_by_location).astype('float32'), num_of_agents_by_location, return_value_if_denominator_is_zero=1.0) new_values = round_(shrink_factor * location_dataset.get_attribute( this_refinement.location_capacity_attribute)) location_dataset.modify_attribute( this_refinement.location_capacity_attribute, new_values) self._add_refinement_info_to_dataset(location_dataset, self.id_names, this_refinement, index=movers_location_index) agent_dataset.remove_elements(array(movers_index))
def test_sample_noreplace(self): start_time = time.time() sample = sample_noreplace(self.all, self.size, return_index=True) logger.log_status("sample_noreplace %s from %s items array in " % (self.size,self.n) + str(time.time() - start_time) + " sec") self.assertEqual(sample.size, self.size, msg ="sample size not equal to size parameter") assert isinstance(sample, ndarray), "sample is not of type ndarray" assert 0 <= sample.min() <= self.n-1, "sampled elements not in between min and max of source array" assert 0 <= sample.max() <= self.n-1, "sampled elements not in between min and max of source array" assert not sometrue(find_duplicates(sample)), "there are duplicates in samples"
def prepare_for_estimate(self, specification_dict=None, specification_storage=None, specification_table=None, agent_set=None, agents_for_estimation_storage=None, agents_for_estimation_table=None, join_datasets=False, index_to_unplace=None, portion_to_unplace=1.0, agent_filter=None, data_objects={}): from opus_core.model import get_specification_for_estimation specification = get_specification_for_estimation( specification_dict, specification_storage, specification_table) if (agent_set is not None) and (index_to_unplace is not None): if self.location_id_string is not None: agent_set.compute_variables(self.location_id_string, resources=Resources(data_objects)) if portion_to_unplace < 1: unplace_size = int(portion_to_unplace * index_to_unplace.size) end_index_to_unplace = sample_noreplace( index_to_unplace, unplace_size) else: end_index_to_unplace = index_to_unplace logger.log_status("Unplace " + str(end_index_to_unplace.size) + " agents.") agent_set.modify_attribute(self.choice_set.get_id_name()[0], -1 * ones(end_index_to_unplace.size), end_index_to_unplace) # create agents for estimation if agents_for_estimation_storage is not None: estimation_set = Dataset(in_storage=agents_for_estimation_storage, in_table_name=agents_for_estimation_table, id_name=agent_set.get_id_name(), dataset_name=agent_set.get_dataset_name()) if agent_filter is not None: estimation_set.compute_variables( agent_filter, resources=Resources(data_objects)) index = where( estimation_set.get_attribute(agent_filter) > 0)[0] estimation_set.subset_by_index( index, flush_attributes_if_not_loaded=False) if join_datasets: agent_set.join_by_rows(estimation_set, require_all_attributes=False, change_ids_if_not_unique=True) index = arange(agent_set.size() - estimation_set.size(), agent_set.size()) else: index = agent_set.get_id_index( estimation_set.get_id_attribute()) else: index = arange(agent_set.size()) return (specification, index)
def _subtract(self, agents_pool, amount, agent_dataset, location_dataset, this_refinement, dataset_pool ): fit_index = self.get_fit_agents_index(agent_dataset, this_refinement.agent_expression, this_refinement.location_expression, dataset_pool) if amount > fit_index.size: logger.log_warning("Refinement requests to subtract %i agents, but there are %i agents in total satisfying %s;" \ "subtract %i agents instead" % (amount, fit_index.size, ' and '.join( [this_refinement.agent_expression, this_refinement.location_expression] ).strip(' and '), fit_index.size) ) amount = fit_index.size if amount == fit_index.size: movers_index = fit_index else: movers_index = sample_noreplace( fit_index, amount ) agents_pool += movers_index.tolist() ## modify location capacity attribute if specified if this_refinement.location_capacity_attribute is not None and len(this_refinement.location_capacity_attribute) > 0: location_dataset = dataset_pool.get_dataset( VariableName( this_refinement.location_expression ).get_dataset_name() ) movers_location_id = agent_dataset.get_attribute( location_dataset.get_id_name()[0] )[movers_index] movers_location_index = location_dataset.get_id_index( movers_location_id ) # backward compatability code for older versions of numpy -- no longer required since we need numpy 1.2.1 or greater # new=False argument to histogram tells it to use deprecated behavior for now (to be removed in numpy 1.3) # See numpy release notes -- search for histogram # if numpy.__version__ >= '1.2.0': # num_of_movers_by_location = histogram( movers_location_index, bins=arange(location_dataset.size()), new=False)[0] # else: # num_of_movers_by_location = histogram( movers_location_index, bins=arange(location_dataset.size()))[0] num_of_movers_by_location = histogram( movers_location_index, bins=arange(location_dataset.size() +1) )[0] num_of_agents_by_location = location_dataset.compute_variables( "number_of_agents=%s.number_of_agents(%s)" % \ (location_dataset.dataset_name, agent_dataset.dataset_name), dataset_pool=dataset_pool) shrink_factor = safe_array_divide( (num_of_agents_by_location - num_of_movers_by_location ).astype('float32'), num_of_agents_by_location, return_value_if_denominator_is_zero = 1.0 ) new_values = round_( shrink_factor * location_dataset.get_attribute(this_refinement.location_capacity_attribute) ) location_dataset.modify_attribute( this_refinement.location_capacity_attribute, new_values ) self._add_refinement_info_to_dataset(location_dataset, ("refinement_id", "transaction_id"), this_refinement, index=movers_location_index) agent_dataset.modify_attribute(location_dataset.get_id_name()[0], -1 * ones( movers_index.size, dtype='int32' ), index = movers_index ) self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=movers_index)
def run(self, household, person, work_eligible='person.age>15', full_time_ratio=1.0, **kwargs): if 'employment_status' in person.get_known_attribute_names(): employment_status = person['employment_status'] assigned_workers = household.compute_variables('household.aggregate(person.employment_status)') else: employment_status = zeros(person.size(), dtype='i4') assigned_workers = zeros(household.size(), dtype='i4') if 'full_time' in person.get_known_attribute_names(): full_time = person['full_time'] else: full_time = zeros(person.size(), dtype='i4') predicted_workers = household['workers'] diff = predicted_workers - assigned_workers indices = where(diff != 0)[0] eligible = person.compute_variables(work_eligible) logger.log_status('Updating employment_status for {} workers in {} households'.format(abs(diff).sum(), indices.size)) for index in indices: in_hh = person['household_id'] == household['household_id'][index] if diff[index] > 0: sample_pool = where( (~ employment_status) & eligible & in_hh )[0] new_workers = sample_noreplace(sample_pool, diff[index]) employment_status[new_workers] = 1 chance = random(new_workers.size) full_time[new_workers] = ((1 - full_time_ratio) < chance).astype('i4') else: sample_pool = where( (employment_status) & in_hh )[0] exit_workers = sample_noreplace(sample_pool, -diff[index]) employment_status[exit_workers] = 0 full_time[exit_workers] = -1 if 'employment_status' in person.get_known_attribute_names(): person.modify_attribute('employment_status', employment_status) else: person.add_primary_attribute(employment_status, 'employment_status') if 'full_time' in person.get_known_attribute_names(): person.modify_attribute('full_time', full_time) else: person.add_primary_attribute(full_time, 'full_time')
def run(self, dataset_pool): workers = dataset_pool['person'] faz_ids = workers.compute_variables('faz_id = person.disaggregate(zone.faz_id, intermediates=[parcel, building, household])', dataset_pool=dataset_pool) is_worker = workers.compute_variables('urbansim_parcel.person.is_worker', dataset_pool=dataset_pool) workers_jobs = workers['job_id'] job_ids = arange(self.job_id_range[0], self.job_id_range[1]+1) for area, values in self.faz_worker_mapping.iteritems(): fazes = array(values[0]) amount = values[1] indicator = logical_and(ismember(faz_ids, fazes), is_worker) job_idx = where(job_ids > 0)[0] sampled_jobs = sample_noreplace(job_idx, amount) workers_idx = where(indicator > 0)[0] sampled_workers = sample_noreplace(workers_idx, amount) workers_jobs[sampled_workers] = job_ids[sampled_jobs] job_ids[sampled_jobs] = 0 workers.modify_attribute(name='job_id', data=workers_jobs)
def run(self, location_set, deletion_event_set, current_year, dataset_pool=None): """ The deletion_event_set is expected to have attributes: grid_id, scheduled_year, number_of_jobs, number_of_households The method finds jobs/househods located in the given locations (grid_id), then samples the given amount for this year and unplaces them. If the value for number_of_jobs/number_of_households is -2, the model removes all jobs/households from the location. """ if not deletion_event_set or (deletion_event_set.size() == 0): logger.log_status("No jobs/households to be deleted.") return idx_of_events_this_year = deletion_event_set.get_attribute("scheduled_year") == current_year if idx_of_events_this_year.sum() == 0: logger.log_status("No jobs/households to be deleted.") return self.dataset_pool = self.create_dataset_pool(dataset_pool) location_id_name = location_set.get_id_name()[0] location_ids_in_event_set = deletion_event_set.get_attribute_by_index(location_id_name, idx_of_events_this_year) agents_dict = {"number_of_jobs": "job", "number_of_households": "household"} # load attributes and datasets needed delete_agents = {"job": False, "household": False} agent_sets = {} for attribute_name, dataset_name in agents_dict.iteritems(): if attribute_name in deletion_event_set.get_known_attribute_names(): values = deletion_event_set.get_attribute_by_index(attribute_name, idx_of_events_this_year) if values.sum() > 0: agent_sets[dataset_name] = self.dataset_pool.get_dataset(dataset_name) if location_id_name not in agent_sets[dataset_name].get_known_attribute_names(): # compute agents locations agent_sets[dataset_name].compute_one_variable_with_unknown_package(location_id_name, self.dataset_pool) delete_agents[dataset_name] = True for attribute_name, dataset_name in agents_dict.iteritems(): if not delete_agents[dataset_name]: continue # iterate over locations for location_id in location_ids_in_event_set: number_of_agents = eval("deletion_event_set.get_data_element_by_id((location_id, current_year)).%s" % attribute_name) agent_ids = agent_sets[dataset_name].get_attribute(location_id_name) agent_index = where(agent_ids == location_id)[0] if (number_of_agents == -2) or (agent_index.size <= number_of_agents): # unplace all agents unplace_index = agent_index else: # sample agents unplace_index = sample_noreplace(agent_index, number_of_agents) agent_sets[dataset_name].modify_attribute(name=location_id_name, data=resize(array([-1], dtype=agent_ids.dtype), unplace_index.size), index = unplace_index)
def get_array_without_non_placed_agents(business_set, arr, max_value=None, location_id_name="grid_id"): if location_id_name in business_set.get_known_attribute_names(): non_placed = where(business_set.get_attribute_by_index(location_id_name, arr) <= 0)[0] else: non_placed=array([], dtype='int32') size_non_placed = non_placed.size if size_non_placed <= 0: return (arr, non_placed, 0) if (max_value is not None) and (size_non_placed > max_value): non_placed = sample_noreplace(non_placed, max_value) size_non_placed = non_placed.size a = ones((arr.size,), dtype="int8") a[non_placed] = 0 return (compress(a, arr), arr[non_placed], size_non_placed)
def prepare_for_estimate(self, agent_set=None, index_to_unplace=None, portion_to_unplace=1.0, compute_lambda=False, grouping_location_set=None, movers_variable=None, movers_index=None, location_id_variable=None, data_objects={}, *args, **kwargs ): """Put 'location_id_variable' always in, if the location id is to be computed on the estimation set, i.e. if it is not a primary attribute of the estimation set. Set 'index_to_unplace' to None, if 'compute_lambda' is True. In such a case, the annual supply is estimated without unplacing agents. 'grouping_location_set', 'movers_variable' and 'movers_index' must be given, if 'compute_lambda' is True. """ from urbansim.functions import compute_supply_and_add_to_location_set if (agent_set is not None) and (index_to_unplace is not None): if self.location_id_string is not None: agent_set.compute_variables(self.location_id_string, resources=Resources(data_objects)) if portion_to_unplace < 1: unplace_size = int(portion_to_unplace*index_to_unplace.size) end_index_to_unplace = sample_noreplace(index_to_unplace, unplace_size) else: end_index_to_unplace = index_to_unplace logger.log_status("Unplace " + str(end_index_to_unplace.size) + " agents.") agent_set.modify_attribute(self.choice_set.get_id_name()[0], resize(array([-1]), end_index_to_unplace.size), end_index_to_unplace) if compute_lambda: movers = zeros(agent_set.size(), dtype="bool8") if movers_index is not None: movers[movers_index] = 1 agent_set.add_primary_attribute(movers, "potential_movers") self.estimate_config["weights_for_estimation_string"] = self.estimate_config["weights_for_estimation_string"]+"_from_lambda" compute_supply_and_add_to_location_set(self.choice_set, grouping_location_set, self.run_config["number_of_units_string"], self.run_config["capacity_string"], movers_variable, self.estimate_config["weights_for_estimation_string"], resources=Resources(data_objects)) specification, index = prepare_for_estimate(agent_set=agent_set, *args, **kwargs) return (specification, index)
def _delete(self, agents_pool, amount, agent_dataset, location_dataset, this_refinement, dataset_pool ): """similar to subtract action, instead of unplacing agents delete remove agents from the agent dataset, those agents won't be available for later action """ fit_index = self.get_fit_agents_index(agent_dataset, this_refinement.agent_expression, this_refinement.location_expression, dataset_pool) if amount > fit_index.size or amount < 0: logger.log_warning("Refinement requests to delete %i agents, but there are %i agents in total satisfying %s;" \ "delete %i agents instead" % (amount, fit_index.size, ' and '.join( [this_refinement.agent_expression, this_refinement.location_expression] ).strip(' and '), fit_index.size) ) amount = fit_index.size if amount == fit_index.size: movers_index = fit_index else: movers_index = sample_noreplace( fit_index, amount ) agents_pool = list( set(agents_pool) - set(movers_index) ) ## modify location capacity attribute if specified if this_refinement.location_capacity_attribute is not None and len(this_refinement.location_capacity_attribute) > 0: location_dataset = dataset_pool.get_dataset( VariableName( this_refinement.location_expression ).get_dataset_name() ) movers_location_id = agent_dataset.get_attribute( location_dataset.get_id_name()[0] )[movers_index] movers_location_index = location_dataset.get_id_index( movers_location_id ) # see previous comment about histogram function num_of_movers_by_location = histogram( movers_location_index, bins=arange(location_dataset.size() +1) )[0] num_of_agents_by_location = location_dataset.compute_variables( "number_of_agents=%s.number_of_agents(%s)" % \ (location_dataset.dataset_name, agent_dataset.dataset_name), dataset_pool=dataset_pool) shrink_factor = safe_array_divide( (num_of_agents_by_location - num_of_movers_by_location ).astype('float32'), num_of_agents_by_location, return_value_if_denominator_is_zero = 1.0 ) new_values = round_( shrink_factor * location_dataset.get_attribute(this_refinement.location_capacity_attribute) ) location_dataset.modify_attribute( this_refinement.location_capacity_attribute, new_values ) self._add_refinement_info_to_dataset(location_dataset, self.id_names, this_refinement, index=movers_location_index) agent_dataset.remove_elements( array(movers_index) )
def _remove(self, amount=0, attribute='', dataset=None, index=None, **kwargs): if index is None: index = arange(dataset.size()) if index.size < amount: logger.log_warning("Number of observations satisfying event condition (%s) is less than the number to be removed (%s); remove %s instead" % (index.size, amount, index.size)) amount = index.size if index.size == amount: to_be_removed = index else: to_be_removed = sample_noreplace(index, amount) if to_be_removed.size > 0: dataset.remove_elements(to_be_removed)
def run(self, specification, coefficients, agent_set, agents_index=None, **kwargs): choices = ChoiceModel.run(self, specification, coefficients, agent_set, agents_index=agents_index, **kwargs) if agents_index is None: agents_index=arange(agent_set.size()) movers_indices = agents_index[where(choices>0)] if self.movers_ratio is not None: n = rint(self.movers_ratio*agents_index.size) if n < movers_indices.size: movers_indices = sample_noreplace(movers_indices, n) # add unplaced agents unplaced_agents = agents_index[agent_set.get_attribute_by_index(self.location_id_name, agents_index) <= 0] logger.log_status("%s agents selected by the logit model; %s agents without %s." % (movers_indices.size, unplaced_agents.size, self.location_id_name)) movers_indices = unique(concatenate((movers_indices, unplaced_agents))) logger.log_status("Number of movers: " + str(movers_indices.size)) return movers_indices
def run(self, jobs, in_storage, out_storage=None): dataset_pool = DatasetPool(storage=in_storage, package_order=['psrc_parcel', 'urbansim_parcel', 'urbansim', 'opus_core'] ) if jobs is None: jobs = dataset_pool.get_dataset('job') else: dataset_pool.replace_dataset('job', jobs) hhs = dataset_pool.get_dataset('household') buildings = dataset_pool.get_dataset('building') buildings.compute_variables(["psrc_parcel.building.census_block_group_id", "psrc_parcel.building.number_of_home_based_jobs", "urbansim_parcel.building.number_of_households", "urbansim_parcel.building.residential_units" ], dataset_pool=dataset_pool) ubusiness, ubusiness_idx = unique(jobs['business_id']*(jobs['home_based_status']==1), return_index=True) jobs_ubusiness = zeros(jobs.size(), dtype='bool8') jobs_ubusiness[ubusiness_idx] = True jobs_ubusiness[jobs['home_based_status']==0] = False nhbbus = minimum(ndsum(jobs_ubusiness, labels=jobs['building_id'], index=buildings['building_id']), buildings["residential_units"]) affected_buildings_ind = logical_and((buildings["number_of_households"] - nhbbus) < 0, buildings["number_of_households"] < buildings["residential_units"]) not_affected_buildings_ind = logical_and(logical_not(affected_buildings_ind), buildings["number_of_home_based_jobs"] == 0) blocks = unique(buildings["census_block_group_id"][where(affected_buildings_ind)]) hh_building_id = hhs['building_id'].copy() seed(1) logger.log_status("%s buildings in %s census block affected for moving households to jobs." % (affected_buildings_ind.sum(), blocks.size)) logger.start_block("Moving households to jobs") for block in blocks: bidx = where(logical_and(affected_buildings_ind, buildings["census_block_group_id"] == block))[0] bidx_out = where(logical_and(not_affected_buildings_ind, buildings["census_block_group_id"] == block))[0] if bidx_out.size == 0: continue hh_idx = where(in1d(hhs['building_id'], buildings['building_id'][bidx_out]))[0] if hh_idx.size == 0: continue nhh_needed = maximum(nhbbus[bidx] - buildings["number_of_households"][bidx], 0) if nhh_needed.sum() <= 0: continue for i in arange(bidx.size): if nhh_needed[i] == 0: continue hh_idx_sampled = sample_noreplace(hh_idx, nhh_needed[i]) hh_building_id[hh_idx_sampled] = buildings['building_id'][bidx[i]] logger.end_block() if out_storage is not None: households.write_dataset(out_storage=out_storage, out_table_name="households") logger.log_status("%s households re-located." % (hh_building_id <> hhs['building_id']).sum())
def get_array_without_non_placed_agents(business_set, arr, max_value=None, location_id_name="grid_id"): if location_id_name in business_set.get_known_attribute_names(): non_placed = where( business_set.get_attribute_by_index(location_id_name, arr) <= 0)[0] else: non_placed = array([], dtype='int32') size_non_placed = non_placed.size if size_non_placed <= 0: return (arr, non_placed, 0) if (max_value is not None) and (size_non_placed > max_value): non_placed = sample_noreplace(non_placed, max_value) size_non_placed = non_placed.size a = ones((arr.size, ), dtype="int8") a[non_placed] = 0 return (compress(a, arr), arr[non_placed], size_non_placed)
def map_agents_to_submodels(self, submodels, submodel_string, agent_set, agents_index, dataset_pool=None, resources=None, submodel_size_max=None): """ Creates a class attribute self.observations_mapping which is a dictionary where each entry corresponds to one submodel. It contains indices of agents (within agents_index) that belong to that submodel. Additionally, self.observations_mapping has an entry 'index' which contains agents_index, and an entry 'mapped_index' which contains only indices of agents_index that are included in any of the submodel entries of observations_mapping. Thus, missing entries of 'index' are agents that do not belong to any submodel. 'submodels' is a list of submodels to be considered. 'submodel_string' specifies the name of attribute/variable that distinguishes submodels. 'resources' are passed to the computation of variable 'submodel_string'. 'submodel_size_max' determines the maximum size of a submodel. If the actual size exceeds this number, agents are randomly sampled so that the submodel size matches this number. """ self.observations_mapping = {} # maps to which submodel each observation belongs to nsubmodels = len(submodels) if (nsubmodels > 1) or ((nsubmodels == 1) and (submodels[0] <> -2)): try: agent_set.compute_variables(submodel_string, dataset_pool=dataset_pool, resources=resources) except: pass submodel_alias = None if submodel_string is not None: submodel_alias = VariableName(submodel_string).get_alias() if (nsubmodels == 1) and ((submodel_string is None) or (submodel_alias not in agent_set.get_known_attribute_names())): self.observations_mapping[submodels[0]] = arange(agents_index.size) else: for submodel in submodels: #mapping agents to submodels w = where(agent_set.get_attribute_by_index(submodel_alias, agents_index) == submodel)[0] if submodel_size_max is not None and submodel_size_max < w.size: # sub-sample from submodel wnew = sample_noreplace(w, submodel_size_max) logger.log_status('Number of records in submodel %s reduced from %s to %s.' % (submodel, w.size, wnew.size)) w = sort(wnew) self.observations_mapping[submodel] = w else: # no submodel distinction self.observations_mapping[-2] = arange(agents_index.size) mapped = zeros(agents_index.size, dtype='bool8') for submodel, index in self.observations_mapping.iteritems(): mapped[index] = True self.observations_mapping["index"] = agents_index self.observations_mapping["mapped_index"] = where(mapped)[0]
def get_array_without_non_placed_agents(dataset, arr, max_value=None, location_id_name="grid_id"): """ 'arr' is an index within dataset. The function returns a triple (arr_without_nonplaced, arr_nonplaced, nonplaced_size). 'arr_without_nonplaced' is 'arr' with those elements being removed that don't have any location assigned. 'arr_nonplaced' are the elements of 'arr' that were removed from arr_without_nonplaced'. 'nonplaced_size' is the size of the second item of the triple. """ if location_id_name in dataset.get_known_attribute_names(): non_placed = where(dataset.get_attribute_by_index(location_id_name, arr) <= 0)[0] else: non_placed=array([], dtype='int32') size_non_placed = non_placed.size if size_non_placed <= 0: return (arr, non_placed, 0) if (max_value is not None) and (size_non_placed > max_value): non_placed = sample_noreplace(non_placed, max_value) size_non_placed = non_placed.size a = ones((arr.size,)) a[non_placed] = 0 return (compress(a, arr), arr[non_placed], size_non_placed)
def prepare_for_estimate(self, specification_dict = None, specification_storage=None, specification_table=None, agent_set=None, agents_for_estimation_storage=None, agents_for_estimation_table=None, join_datasets=False, index_to_unplace=None, portion_to_unplace=1.0, agent_filter=None, data_objects={}): from opus_core.models.model import get_specification_for_estimation specification = get_specification_for_estimation(specification_dict, specification_storage, specification_table) if (agent_set is not None) and (index_to_unplace is not None): if self.location_id_string is not None: agent_set.compute_variables(self.location_id_string, resources=Resources(data_objects)) if portion_to_unplace < 1: unplace_size = int(portion_to_unplace*index_to_unplace.size) end_index_to_unplace = sample_noreplace(index_to_unplace, unplace_size) else: end_index_to_unplace = index_to_unplace logger.log_status("Unplace " + str(end_index_to_unplace.size) + " agents.") agent_set.modify_attribute(self.choice_set.get_id_name()[0], -1*ones(end_index_to_unplace.size), end_index_to_unplace) # create agents for estimation if agents_for_estimation_storage is not None: estimation_set = Dataset(in_storage = agents_for_estimation_storage, in_table_name=agents_for_estimation_table, id_name=agent_set.get_id_name(), dataset_name=agent_set.get_dataset_name()) if agent_filter is not None: estimation_set.compute_variables(agent_filter, resources=Resources(data_objects)) index = where(estimation_set.get_attribute(agent_filter) > 0)[0] estimation_set.subset_by_index(index, flush_attributes_if_not_loaded=False) if join_datasets: agent_set.join_by_rows(estimation_set, require_all_attributes=False, change_ids_if_not_unique=True) index = arange(agent_set.size()-estimation_set.size(),agent_set.size()) else: index = agent_set.get_id_index(estimation_set.get_id_attribute()) else: index = arange(agent_set.size()) return (specification, index)
def unplace_home_based_jobs(self, building_id): """ If households are being unplaced, check the building_ids that those households occupied for any home_based_jobs that need to be unplaced as well. """ # get the index of the building to check building_index = self.buildings_dataset.get_id_index(building_id) # compute some necessary variables number_of_home_based_jobs = self.buildings_dataset.compute_variables("urbansim_zone.building.number_of_home_based_jobs", dataset_pool=self.dataset_pool).astype('int') number_of_home_based_job_spaces = self.buildings_dataset.compute_variables("urbansim_zone.building.total_home_based_job_spaces", dataset_pool=self.dataset_pool).astype('int') number_of_home_based_jobs_in_building = number_of_home_based_jobs[building_index] number_of_home_based_job_spaces_in_building = number_of_home_based_job_spaces[building_index] number_of_home_based_jobs_to_unplace = number_of_home_based_jobs_in_building - number_of_home_based_job_spaces_in_building if number_of_home_based_jobs_to_unplace < 1: return 0 # unplace jobs # get jobs to sample from index_of_jobs_to_sample_from = self.jobs_dataset.get_filtered_index('job.building_id==%s' % building_id) sample_of_jobs_to_unplace = sample_noreplace(index_of_jobs_to_sample_from, number_of_home_based_jobs_to_unplace) self.jobs_dataset.set_values_of_one_attribute('building_id', array([-1]), sample_of_jobs_to_unplace) return number_of_home_based_jobs_to_unplace
def _remove(self, amount=0, attribute='', dataset=None, index=None, **kwargs): if index is None: index = arange(dataset.size()) if index.size < amount: logger.log_warning( "Number of observations satisfying event condition (%s) is less than the number to be removed (%s); remove %s instead" % (index.size, amount, index.size)) amount = index.size if index.size == amount: to_be_removed = index else: to_be_removed = sample_noreplace(index, amount) if to_be_removed.size > 0: dataset.remove_elements(to_be_removed)
def _do_sector_for_businesses(self, sector, diff, business_set, is_in_sector): available_business_index = where(is_in_sector)[0] if diff < 0: # sample_array, non_placed, size_non_placed = \ get_array_without_non_placed_agents(business_set, available_business_index, -1*diff, self.location_id_name) self.remove_businesses = concatenate((self.remove_businesses, non_placed, sample_noreplace(sample_array, max(0,abs(diff)-size_non_placed)))) if diff > 0: # self.new_businesses[self.location_id_name]=concatenate((self.new_businesses[self.location_id_name],zeros((diff,)))) self.new_businesses["sector_id"]=concatenate((self.new_businesses["sector_id"], sector*ones((diff,)))) sampled_business = probsample_replace(available_business_index, diff, None) self.new_businesses["sqft"] = concatenate((self.new_businesses["sqft"], business_set.get_attribute("sqft")[sampled_business])) self.new_businesses["employment"] = concatenate((self.new_businesses["employment"], business_set.get_attribute("employment")[sampled_business])) self.new_businesses["activity_id"] = concatenate((self.new_businesses["activity_id"], business_set.get_attribute("activity_id")[sampled_business])) new_max_id = self.max_id+diff self.new_businesses[self.business_id_name]=concatenate((self.new_businesses[self.business_id_name], arange(self.max_id+1, new_max_id+1))) self.max_id = new_max_id
def get_array_without_non_placed_agents(dataset, arr, max_value=None, location_id_name="grid_id"): """ 'arr' is an index within dataset. The function returns a triple (arr_without_nonplaced, arr_nonplaced, nonplaced_size). 'arr_without_nonplaced' is 'arr' with those elements being removed that don't have any location assigned. 'arr_nonplaced' are the elements of 'arr' that were removed from arr_without_nonplaced'. 'nonplaced_size' is the size of the second item of the triple. """ if location_id_name in dataset.get_known_attribute_names(): non_placed = where( dataset.get_attribute_by_index(location_id_name, arr) <= 0)[0] else: non_placed = array([], dtype='int32') size_non_placed = non_placed.size if size_non_placed <= 0: return (arr, non_placed, 0) if (max_value is not None) and (size_non_placed > max_value): non_placed = sample_noreplace(non_placed, max_value) size_non_placed = non_placed.size a = ones((arr.size, )) a[non_placed] = 0 return (compress(a, arr), arr[non_placed], size_non_placed)
def run(self, in_storage, out_storage=None, business_dsname="business", zone_dsname=None): dataset_pool = DatasetPool(storage=in_storage, package_order=['psrc_parcel', 'urbansim_parcel', 'urbansim', 'opus_core'] ) seed(1) allbusinesses = dataset_pool.get_dataset(business_dsname) parcels = dataset_pool.get_dataset('parcel') buildings = dataset_pool.get_dataset('building') parcels.compute_variables(["urbansim_parcel.parcel.residential_units", "number_of_buildings = parcel.number_of_agents(building)", "non_residential_sqft = (parcel.aggregate(building.non_residential_sqft)).astype(int32)", "number_of_res_buildings = parcel.aggregate(urbansim_parcel.building.is_residential)", "number_of_nonres_buildings = parcel.aggregate(urbansim_parcel.building.is_non_residential)", "number_of_mixed_use_buildings = parcel.aggregate(urbansim_parcel.building.is_generic_building_type_6)" ], dataset_pool=dataset_pool) restypes = [12, 4, 19, 11, 34, 10, 33] reslutypes = [13,14,15,24] is_valid_business = ones(allbusinesses.size(), dtype='bool8') parcels_not_matched = logical_and(in1d(allbusinesses["parcel_id"], parcels.get_id_attribute(), invert=True), allbusinesses["parcel_id"] > 0) if(parcels_not_matched.sum() > 0): is_valid_business[where(parcels_not_matched)] = False logger.log_warning(message="No parcel exists for %s businesses (%s jobs)" % (parcels_not_matched.sum(), allbusinesses[self.number_of_jobs_attr][where(parcels_not_matched)].sum())) zero_parcel = allbusinesses["parcel_id"]<=0 if zero_parcel.sum() > 0: is_valid_business[where(zero_parcel)] = False logger.log_warning(message="%s businesses (%s jobs) located on zero parcel_id" % (zero_parcel.sum(), allbusinesses[self.number_of_jobs_attr][where(zero_parcel)].sum())) zero_size = logical_and(is_valid_business, allbusinesses[self.number_of_jobs_attr].round() == 0) if(sum(zero_size) > 0): is_valid_business[where(zero_size)] = False logger.log_warning(message="%s businesses are of size 0." % sum(zero_size)) businesses = DatasetSubset(allbusinesses, index=where(is_valid_business)[0]) parcels.add_attribute(name="number_of_workplaces", data=parcels.sum_dataset_over_ids(businesses, constant=1)) has_single_res_buildings = logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_res_buildings"] == 1) # 1 (1 residential) parcels.add_attribute(data=has_single_res_buildings.astype("int32"), name="buildings_code") has_mult_res_buildings = logical_and(parcels["number_of_buildings"] > 1, parcels["number_of_nonres_buildings"] == 0) # 2 (mult residential) parcels.modify_attribute("buildings_code", data=2*ones(has_mult_res_buildings.sum()), index=where(has_mult_res_buildings)) has_single_nonres_buildings = logical_and(logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_nonres_buildings"] == 1), parcels["number_of_mixed_use_buildings"] == 0) # 3 (1 non-res) parcels.modify_attribute("buildings_code", data=3*ones(has_single_nonres_buildings.sum()), index=where(has_single_nonres_buildings)) has_mult_nonres_buildings = logical_and(logical_and(parcels["number_of_buildings"] > 1, parcels["number_of_res_buildings"] == 0), parcels["number_of_mixed_use_buildings"] == 0) # 4 (mult non-res) parcels.modify_attribute("buildings_code", data=4*ones(has_mult_nonres_buildings.sum()), index=where(has_mult_nonres_buildings)) has_single_mixed_buildings = logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_mixed_use_buildings"] == 1) # 5 (1 mixed-use) parcels.modify_attribute("buildings_code", data=5*ones(has_single_mixed_buildings.sum()), index=where(has_single_mixed_buildings)) has_mult_mixed_buildings = logical_and(parcels["number_of_buildings"] > 1, logical_or(logical_and(parcels["number_of_res_buildings"] > 0, parcels["number_of_nonres_buildings"] > 0), logical_or(parcels["number_of_mixed_use_buildings"] > 1, logical_and(parcels["number_of_res_buildings"] == 0, parcels["number_of_mixed_use_buildings"] > 0)))) # 6 parcels.modify_attribute("buildings_code", data=6*ones(has_mult_mixed_buildings.sum()), index=where(has_mult_mixed_buildings)) has_no_building_res_lutype = logical_and(parcels["number_of_buildings"] == 0, in1d(parcels["land_use_type_id"], reslutypes)) # 7 (vacant with res LU type) parcels.modify_attribute("buildings_code", data=7*ones(has_no_building_res_lutype.sum()), index=where(has_no_building_res_lutype)) has_no_building_nonres_lutype = logical_and(parcels["number_of_buildings"] == 0, in1d(parcels["land_use_type_id"], reslutypes)==0) # 8 (vacant with non-res LU type) parcels.modify_attribute("buildings_code", data=8*ones(has_no_building_nonres_lutype.sum()), index=where(has_no_building_nonres_lutype)) business_sizes = businesses[self.number_of_jobs_attr].round().astype("int32") business_location = {} business_location1wrkpl = zeros(businesses.size(), dtype="int32") business_location1wrkplres = zeros(businesses.size(), dtype="int32") business_ids = businesses.get_id_attribute() # sample one building for cases when sampling is required. for ibusid in range(businesses.size()): idx = where(buildings['parcel_id'] == businesses['parcel_id'][ibusid])[0] bldgids = buildings['building_id'][idx] business_location[business_ids[ibusid]] = bldgids if bldgids.size == 1: business_location1wrkpl[ibusid] = bldgids[0] elif bldgids.size > 1: business_location1wrkpl[ibusid] = bldgids[sample_noreplace(arange(bldgids.size), 1)] if buildings['residential_units'][idx].sum() > 0: # Residential buildings are sampled with probabilities proportional to residential units business_location1wrkplres[ibusid] = bldgids[probsample_noreplace(arange(bldgids.size), 1, prob_array=buildings['residential_units'][idx])] else: business_location1wrkplres[ibusid] = business_location1wrkpl[ibusid] home_based = zeros(business_sizes.sum(), dtype="bool8") job_building_id = zeros(business_sizes.sum(), dtype="int32") job_array_labels = business_ids.repeat(business_sizes) job_assignment_case = zeros(business_sizes.sum(), dtype="int32") processed_bindicator = zeros(businesses.size(), dtype="bool8") business_codes = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"]) business_nworkplaces = parcels.get_attribute_by_id("number_of_workplaces", businesses["parcel_id"]) logger.log_status("Total number of jobs: %s" % home_based.size) # 1. 1-2 worker business in 1 residential building idx_sngl_wrk_1bld_fit = where(logical_and(business_sizes < 3, business_codes == 1))[0] jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_1bld_fit]) home_based[jidx] = True job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_1bld_fit].repeat(business_sizes[idx_sngl_wrk_1bld_fit]) job_assignment_case[jidx] = 1 processed_bindicator[idx_sngl_wrk_1bld_fit] = True logger.log_status("1. %s jobs (%s businesses) set as home-based due to 1-2 worker x 1 residential building fit." % ( business_sizes[idx_sngl_wrk_1bld_fit].sum(), idx_sngl_wrk_1bld_fit.size)) # 2. 1-2 worker business in multiple residential buildings idx_sngl_wrk_multbld_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 2))[0] jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_multbld_fit]) home_based[jidx] = True job_building_id[jidx] = business_location1wrkplres[idx_sngl_wrk_multbld_fit].repeat(business_sizes[idx_sngl_wrk_multbld_fit]) job_assignment_case[jidx] = 2 processed_bindicator[idx_sngl_wrk_multbld_fit] = True logger.log_status("2. %s jobs (%s businesses) set as home-based due to 1-2 worker x multiple residential buildings fit." % ( business_sizes[idx_sngl_wrk_multbld_fit].sum(), idx_sngl_wrk_multbld_fit.size)) # 3. 1-2 worker in single non-res building (not mixed-use) idx_sngl_wrk_single_nonres_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 3))[0] jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_single_nonres_fit]) job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_single_nonres_fit].repeat(business_sizes[idx_sngl_wrk_single_nonres_fit]) job_assignment_case[jidx] = 3 processed_bindicator[idx_sngl_wrk_single_nonres_fit] = True logger.log_status("3. %s jobs (%s businesses) placed due to 1-2 worker x single non-res building fit." % ( business_sizes[idx_sngl_wrk_single_nonres_fit].sum(), idx_sngl_wrk_single_nonres_fit.size)) # 4. 1-2 worker in multiple non-res building (not mixed-use) idx_sngl_wrk_mult_nonres_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 4))[0] jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_mult_nonres_fit]) job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_mult_nonres_fit].repeat(business_sizes[idx_sngl_wrk_mult_nonres_fit]) job_assignment_case[jidx] = 4 processed_bindicator[idx_sngl_wrk_mult_nonres_fit] = True logger.log_status("4. %s jobs (%s businesses) placed due to 1-2 worker x multiple non-res building fit." % ( business_sizes[idx_sngl_wrk_mult_nonres_fit].sum(), idx_sngl_wrk_mult_nonres_fit.size)) # 5. 1-2 worker in single mixed-use building idx_sngl_wrk_smu_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 5))[0] jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_smu_fit]) job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_smu_fit].repeat(business_sizes[idx_sngl_wrk_smu_fit]) job_assignment_case[jidx] = 5 processed_bindicator[idx_sngl_wrk_smu_fit] = True logger.log_status("5. %s jobs (%s businesses) in 1-2 worker x single mixed-use building." % ( business_sizes[idx_sngl_wrk_smu_fit].sum(), idx_sngl_wrk_smu_fit.size)) # 6. 1-2 worker in multiple mixed-type buildings idx_sngl_wrk_mmu_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 6))[0] jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_mmu_fit]) job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_mmu_fit].repeat(business_sizes[idx_sngl_wrk_mmu_fit]) bldtype = buildings.get_attribute_by_id("building_type_id", business_location1wrkpl[idx_sngl_wrk_mmu_fit]) is_bldtype_res = in1d(bldtype, restypes) home_based[in1d(job_array_labels, business_ids[idx_sngl_wrk_mmu_fit][where(is_bldtype_res)])] = True job_assignment_case[jidx] = 6 processed_bindicator[idx_sngl_wrk_mmu_fit] = True logger.log_status("6. %s jobs (%s businesses) in 1-2 worker x multiple mixed-type buildings. %s jobs classified as home-based." % ( business_sizes[idx_sngl_wrk_mmu_fit].sum(), idx_sngl_wrk_mmu_fit.size, business_sizes[idx_sngl_wrk_mmu_fit][where(is_bldtype_res)].sum())) # 7. 1-2 worker business in residential parcel with no building idx_sngl_wrk_vacant_res = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 7))[0] jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_vacant_res]) job_assignment_case[jidx] = 7 home_based[jidx] = True processed_bindicator[idx_sngl_wrk_vacant_res] = True logger.log_status("7. %s jobs (%s businesses of size 1-2) could not be placed due to non-existing buildings in parcels with residential LU type." % ( business_sizes[idx_sngl_wrk_vacant_res].sum(), idx_sngl_wrk_vacant_res.size)) # 8. 3+ workers of governmental workplaces in 1+ residential building ind_bussiness_case8 = logical_and(logical_and(processed_bindicator==0, logical_and(business_sizes > 2, in1d(businesses['sector_id'], [18,19]))), in1d(business_codes, [1,2])) idx_wrk_fit = where(ind_bussiness_case8)[0] jidx = in1d(job_array_labels, business_ids[idx_wrk_fit]) job_assignment_case[jidx] = 8 processed_bindicator[idx_wrk_fit] = True logger.log_status("8. %s governmental jobs (%s businesses of size 3+) could not be placed due to residing in residential buildings only." % ( business_sizes[idx_wrk_fit].sum(), idx_wrk_fit.size)) # 9. 3-30 workers in single residential building. Make two of them home based. idx_sngl_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, logical_and(business_sizes > 2, business_sizes <= 30)), business_codes == 1))[0] jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_fit]) job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_fit].repeat(business_sizes[idx_sngl_wrk_fit]) bsizeminus2 = vstack((2*ones(idx_sngl_wrk_fit.size), business_sizes[idx_sngl_wrk_fit]-2)).ravel("F").astype("int32") # interweaving 2 and remaining business size hbidx = tile(array([True, False]), bsizeminus2.size/2).repeat(bsizeminus2) # set the first two jobs of every business to True, others to False home_based[(where(jidx)[0])[hbidx]] = True job_assignment_case[jidx] = 9 processed_bindicator[idx_sngl_wrk_fit] = True logger.log_status("9. %s jobs (%s businesses) in 3-30 worker x single residential building. %s jobs assigned as home-based." % ( business_sizes[idx_sngl_wrk_fit].sum(), idx_sngl_wrk_fit.size, hbidx.sum())) # 10. 3-30 workers in multiple residential buildings. Make two of them home based. idx_sngl_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, logical_and(business_sizes > 2, business_sizes <= 30)), business_codes == 2))[0] jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_fit]) job_assignment_case[jidx] = 10 processed_bindicator[idx_sngl_wrk_fit] = True # sample buildings to businesses by parcels bpcls = unique(businesses["parcel_id"][idx_sngl_wrk_fit]) for ipcl in range(bpcls.size): bidx = where(buildings['parcel_id'] == bpcls[ipcl])[0] bldgids = buildings['building_id'][bidx] bussids = intersect1d(business_ids[businesses["parcel_id"] == bpcls[ipcl]], business_ids[idx_sngl_wrk_fit]) # multiply by units for sampling prop. to units rather than buildings bldgids = bldgids.repeat(maximum(1, buildings['residential_units'][bidx].astype('int32'))) if bldgids.size < bussids.size: bldarray = bldgids.repeat(1+ceil((bussids.size - bldgids.size)/float(bldgids.size)) ) else: bldarray = bldgids shuffle(bldarray) # randomly reorder in-place for ib in range(bussids.size): jidx = where(job_array_labels == bussids[ib])[0] job_building_id[jidx] = bldarray[ib] home_based[jidx[0:2]] = True logger.log_status("10. %s jobs (%s businesses) in 3-30 worker x multiple residential building. %s jobs assigned as home-based." % ( business_sizes[idx_sngl_wrk_fit].sum(), idx_sngl_wrk_fit.size, idx_sngl_wrk_fit.size*2)) # 11. single workplace, 3+ workers in single non-res or mixed-use building (11.) idx_sngl_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2), logical_or(business_codes==3, business_codes==5)), business_nworkplaces==1))[0] which_labels = where(in1d(job_array_labels, business_ids[idx_sngl_wrkplace_2plus_workers]))[0] job_building_id[which_labels] = business_location1wrkpl[idx_sngl_wrkplace_2plus_workers].repeat(business_sizes[idx_sngl_wrkplace_2plus_workers]) job_assignment_case[which_labels] = 11 processed_bindicator[idx_sngl_wrkplace_2plus_workers] = True logger.log_status("11. %s jobs (%s businesses) could be placed due to single workplace x 3+ workers x single non-res/mixed-use building fit." % ( business_sizes[idx_sngl_wrkplace_2plus_workers].sum(), idx_sngl_wrkplace_2plus_workers.size)) # 12. single workplace, 3+ workers in multiple mixed-type building idx_sngl_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2), logical_or(business_codes==4, business_codes==6)), business_nworkplaces==1))[0] jidx = in1d(job_array_labels, business_ids[idx_sngl_wrkplace_2plus_workers]) job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrkplace_2plus_workers].repeat(business_sizes[idx_sngl_wrkplace_2plus_workers]) job_assignment_case[jidx] = 12 processed_bindicator[idx_sngl_wrkplace_2plus_workers] = True logger.log_status("12. %s jobs (%s businesses) could be placed due to single workplace x 3+ workers x multiple non-res/mixed building fit." % ( business_sizes[idx_sngl_wrkplace_2plus_workers].sum(), idx_sngl_wrkplace_2plus_workers.size)) # 13. multiple workplaces, 3+ workers in single non-res or mixed building idx_mult_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2), logical_or(business_codes==3, business_codes==5)), business_nworkplaces > 1))[0] jidx = in1d(job_array_labels, business_ids[idx_mult_wrkplace_2plus_workers]) job_building_id[jidx] = business_location1wrkpl[idx_mult_wrkplace_2plus_workers].repeat(business_sizes[idx_mult_wrkplace_2plus_workers]) job_assignment_case[jidx] = 13 processed_bindicator[idx_mult_wrkplace_2plus_workers] = True logger.log_status("13. %s jobs (%s businesses) could be placed due to multiple workplaces x 3+ workers x single non-res/mixed building fit." % ( business_sizes[idx_mult_wrkplace_2plus_workers].sum(), idx_mult_wrkplace_2plus_workers.size)) # 14. multiple workplaces, 3+ workers in multiple non-res or mixed building idx_mult_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2), logical_or(business_codes==4, business_codes==6)), business_nworkplaces > 1))[0] processed_bindicator[idx_mult_wrkplace_2plus_workers] = True # sample buildings to businesses by parcels bpcls = unique(businesses["parcel_id"][idx_mult_wrkplace_2plus_workers]) #hbasedsum = home_based.sum() for ipcl in range(bpcls.size): bldgids = buildings['building_id'][buildings['parcel_id'] == bpcls[ipcl]] bussids = intersect1d(business_ids[businesses["parcel_id"] == bpcls[ipcl]], business_ids[idx_mult_wrkplace_2plus_workers]) if bldgids.size < bussids.size: bldarray = bldgids.repeat(1+ceil((bussids.size - bldgids.size)/float(bldgids.size))) else: bldarray = bldgids shuffle(bldarray) # randomly reorder in-place is_res = in1d(bldarray, restypes) for ib in range(bussids.size): jidx = where(job_array_labels == bussids[ib]) job_building_id[jidx] = bldarray[ib] #home_based[jidx] = is_res job_assignment_case[jidx] = 14 logger.log_status("14. %s jobs (%s businesses) could be placed due to multiple workplaces x 3+ workers x multiple non-res/mixed building fit." % ( business_sizes[idx_mult_wrkplace_2plus_workers].sum(), idx_mult_wrkplace_2plus_workers.size)) # 15. 3+ workers in residential parcel with no building idx_wrk_vacant_res = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 2), business_codes == 7))[0] jidx = in1d(job_array_labels, business_ids[idx_wrk_vacant_res]) job_assignment_case[jidx] = 15 processed_bindicator[idx_wrk_vacant_res] = True logger.log_status("15. %s jobs (%s businesses of 3+ workers) could not be placed due to non-existing buildings in parcels with residential LU type." % ( business_sizes[idx_wrk_vacant_res].sum(), idx_wrk_vacant_res.size)) # 16. nonresidential parcel with no building idx_wrk_vacant_nonres = where(logical_and(processed_bindicator==0, business_codes == 8))[0] jidx = in1d(job_array_labels, business_ids[idx_wrk_vacant_nonres]) job_assignment_case[jidx] = 16 processed_bindicator[idx_wrk_vacant_nonres] = True logger.log_status("16. %s jobs (%s businesses) could not be placed due to non-existing buildings in parcels with non-esidential LU type." % ( business_sizes[idx_wrk_vacant_nonres].sum(), idx_wrk_vacant_nonres.size)) # 17. 31+ workers in single residential building. Do not place - will go into ELCM. idx_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 30), business_codes == 1))[0] jidx = in1d(job_array_labels, business_ids[idx_wrk_fit]) job_assignment_case[jidx] = 17 processed_bindicator[idx_wrk_fit] = True logger.log_status("17. %s jobs (%s businesses) in 31+ workers x single residential building." % ( business_sizes[idx_wrk_fit].sum(), idx_wrk_fit.size)) # 18. 31+ workers in multiple residential buildings. idx_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 30), business_codes == 2))[0] jidx = in1d(job_array_labels, business_ids[idx_wrk_fit]) job_assignment_case[jidx] = 18 processed_bindicator[idx_wrk_fit] = True logger.log_status("18. %s jobs (%s businesses) in 31+ workers x multiple residential building." % ( business_sizes[idx_wrk_fit].sum(), idx_wrk_fit.size)) # jobs in messy buildings idx_messy_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 0), business_codes == 0))[0] processed_bindicator[idx_messy_fit] = True logger.log_status("%s jobs (%s businesses) could not be placed due to messy buildings." % ( business_sizes[idx_messy_fit].sum(), idx_messy_fit.size)) # build new buildings for jobs in cases 7, 8, 15 and 16 jidx_no_bld = where(in1d(job_assignment_case, [7,8,15,16]))[0] bus = unique(job_array_labels[jidx_no_bld]) bsidx = businesses.get_id_index(bus) # first create buildings for single workplaces per parcel single_workplace_idx = where(business_nworkplaces[bsidx] == 1)[0] newbld_parcel_id = businesses['parcel_id'][bsidx][single_workplace_idx] newbld_bt = sector2building_type(businesses['sector_id'][bsidx][single_workplace_idx]) newbids = arange(buildings.get_id_attribute().max()+1, buildings.get_id_attribute().max()+single_workplace_idx.size+1) bbldid = zeros(bsidx.size, dtype='int32') bbldid[single_workplace_idx] = newbids # for parcels with multiple workplaces select the largest business to determine its building type mult_bsidx = bsidx[where(business_nworkplaces[bsidx] > 1)[0]] empty_parcels = businesses['parcel_id'][mult_bsidx] uempty_parcels = unique(empty_parcels) bsize_on_empty_pcl = ndmax(business_sizes[mult_bsidx], labels=empty_parcels, index=uempty_parcels) newbld2_sec = zeros(uempty_parcels.size, dtype='int32') newbids2 = arange(newbids.max()+1, newbids.max()+uempty_parcels.size+1) for ipcl in range(uempty_parcels.size): newbld2_sec[ipcl] = businesses['sector_id'][mult_bsidx][logical_and(businesses['parcel_id'][mult_bsidx] == uempty_parcels[ipcl], business_sizes[mult_bsidx]==bsize_on_empty_pcl[ipcl])][0] this_bidx = where(businesses['parcel_id'][bsidx] == uempty_parcels[ipcl]) bbldid[this_bidx] = newbids2[ipcl] newbld_parcel_id = concatenate((newbld_parcel_id, uempty_parcels)) newbld_bt = concatenate((newbld_bt, sector2building_type(newbld2_sec))) newbldgs = {'building_id': concatenate((newbids, newbids2)), 'parcel_id': newbld_parcel_id, 'building_type_id': newbld_bt, } buildings.add_elements(newbldgs, require_all_attributes=False) jidx = where(in1d(job_array_labels, business_ids[bsidx]))[0] job_building_id[jidx] = bbldid.repeat(business_sizes[bsidx]) logger.log_status("Build %s new buildings to accommodate %s jobs (out of which %s are governmental) from cases 7, 15, 16." % ( newbld_parcel_id.size, jidx.size, business_sizes[bsidx][where(in1d(businesses['sector_id'][bsidx], [18,19]))].sum())) logger.log_status("Assigned %s (%s percent) home-based jobs." % (home_based.sum(), round(home_based.sum()/(home_based.size/100.),2))) logger.log_status("Finished %s percent (%s) jobs (%s businesses) processed. %s jobs (%s businesses) remain to be processed." % \ (round(business_sizes[processed_bindicator].sum()/(home_based.size/100.),2), business_sizes[processed_bindicator].sum(), processed_bindicator.sum(), business_sizes[logical_not(processed_bindicator)].sum(), business_sizes[logical_not(processed_bindicator)].size)) logger.start_block("Storing jobs data.") # create job dataset job_data = {"job_id": (arange(job_building_id.size)+1).astype("int32"), "home_based_status" : home_based, "building_id": job_building_id, "business_id": job_array_labels.astype("int32"), "sector_id": businesses['sector_id'].repeat(business_sizes).astype("int32"), "parcel_id": businesses['parcel_id'].repeat(business_sizes).astype("int32"), "assignment_case": job_assignment_case} # join with zones if zone_dsname is not None: zones = dataset_pool.get_dataset(zone_dsname) idname = zones.get_id_name()[0] #jpcls = buildings.get_attribute_by_id('parcel_id', job_building_id) job_data[idname] = parcels.get_attribute_by_id(idname, job_data["parcel_id"]) dictstorage = StorageFactory().get_storage('dict_storage') dictstorage.write_table(table_name="jobs", table_data=job_data) jobs = Dataset(in_storage=dictstorage, in_table_name="jobs", dataset_name="job", id_name="job_id") if out_storage is not None: jobs.write_dataset(out_storage=out_storage, out_table_name="jobs") buildings.write_dataset(out_storage=out_storage, attributes=AttributeType.PRIMARY) logger.end_block() return jobs
def select_proposals_within_parcels(self, nmax=2, weight_string=None, compete_among_types=False, filter_threshold=75, MU_same_weight=False, transpose_interpcl_weight=True): # Allow only nmax proposals per parcel in order to not disadvantage parcels with small amount of proposals. # It takes proposals with the highest weights. #parcels_with_proposals = unique(self.proposal_set['parcel_id']) #parcel_set = self.dataset_pool.get_dataset('parcel') if weight_string is not None: within_parcel_weights = self.proposal_set.compute_variables([weight_string], dataset_pool=self.dataset_pool) else: within_parcel_weights = self.weight egligible = logical_and(self.weight > 0, self.proposal_set['status_id'] == self.proposal_set.id_tentative) wegligible = where(egligible)[0] if wegligible.size <=0: return #parcels_with_proposals = unique(self.proposal_set['parcel_id'][wegligible]) #min_type = {} #egligible_proposals = {} tobechosen_ind = ones(wegligible.size).astype('bool8') if not compete_among_types: for key in self.column_names: utypes_all = unique(self.proposal_component_set[key]) categories = zeros(self.proposal_set.size(), dtype='int32') for btype in utypes_all: w = where(ndimage.sum(self.proposal_component_set[key] == btype, labels=self.proposal_component_set['proposal_id'], index=self.proposal_set.get_id_attribute() ) == self.proposal_set["number_of_components"])[0] categories[w] = btype # categories equal zero means mix-used type with components of different type utypes = unique(categories[wegligible]) for value in utypes: type_is_value_ind = categories[wegligible]==value for i in range(nmax): parcels_with_proposals = (unique(self.proposal_set['parcel_id'][wegligible][where(type_is_value_ind)])).astype(int32) if parcels_with_proposals.size <= 0: continue labels = (self.proposal_set['parcel_id'][wegligible])*type_is_value_ind chosen_prop = array(maximum_position(within_parcel_weights[wegligible], labels=labels, index=parcels_with_proposals)).flatten().astype(int32) egligible[wegligible[chosen_prop]] = False type_is_value_ind[chosen_prop] = False else: parcels_with_proposals = unique(self.proposal_set['parcel_id'][wegligible]).astype(int32) max_prop = array(maximum_position(within_parcel_weights[wegligible], labels=self.proposal_set['parcel_id'][wegligible], index=parcels_with_proposals)).flatten().astype(int32) max_value_by_parcel = within_parcel_weights[wegligible][max_prop] incompetition = ones(wegligible.size, dtype='bool8') incompetition[max_prop] = False egligible[wegligible[max_prop]] = False for i in range(nmax-1): labels = (self.proposal_set['parcel_id'][wegligible])*incompetition valid_parcels = where(in1d(parcels_with_proposals, self.proposal_set['parcel_id'][wegligible][where(incompetition)]))[0] if valid_parcels.size <= 0: break chosen_prop = array(maximum_position(within_parcel_weights[wegligible], labels=labels, index=parcels_with_proposals[valid_parcels])).flatten().astype(int32) percent = within_parcel_weights[wegligible][chosen_prop]/(max_value_by_parcel[valid_parcels]/100.0) where_lower = where(in1d(self.proposal_set['parcel_id'][wegligible], parcels_with_proposals[valid_parcels][percent <= filter_threshold]))[0] egligible[wegligible[setdiff1d(chosen_prop, where_lower)]] = False # proposals with egligible=True get eliminated, so we dont want to set it to False for the where_lower ones incompetition[union1d(chosen_prop, where_lower)] = False if incompetition.sum() <= 0: break self.proposal_set['status_id'][where(egligible)] = self.proposal_set.id_eliminated_in_within_parcel_selection if MU_same_weight: # Set weights of mix-use proposals within the same parcel to the same value parcels = self.dataset_pool.get_dataset('parcel') # parcels.compute_variables(['mu_ind = parcel.aggregate(numpy.logical_or(development_project_proposal_component.building_type_id==4, development_project_proposal_component.building_type_id==12) + numpy.logical_or(development_project_proposal_component.building_type_id==3, development_project_proposal_component.building_type_id==13), intermediates=[development_project_proposal])'], # dataset_pool=self.dataset_pool) # pcl_ids = parcels.get_id_attribute()[parcels['mu_ind'] > 1] # is_mu = logical_and(logical_and(self.weight > 0, # self.proposal_set['status_id'] == self.proposal_set.id_tentative), # in1d(self.proposal_set['parcel_id'], pcl_ids)) # where_mu = where(is_mu)[0] # if where_mu.size <= 0: # return # trans_weights = self.weight[where_mu] # if transpose_interpcl_weight: # trans_weights = log(trans_weights) # pcl_idx = parcels.get_id_index(self.proposal_set['parcel_id'][where_mu]) # upcl_idx = unique(pcl_idx) # weight_mean = array(ndimage_mean(trans_weights, labels=pcl_idx, index=upcl_idx)) # if transpose_interpcl_weight: # weight_mean = exp(weight_mean) # weight_mean_tmp = zeros(upcl_idx.max()+1).astype(weight_mean.dtype) # weight_mean_tmp[upcl_idx]=weight_mean # self.weight[where_mu]=weight_mean_tmp[pcl_idx] self.proposal_set.compute_variables(['is_mfres = development_project_proposal.aggregate(numpy.logical_or(development_project_proposal_component.building_type_id==4, development_project_proposal_component.building_type_id==12))'], dataset_pool=self.dataset_pool) parcels.compute_variables(['mu_ind = (parcel.aggregate(development_project_proposal.is_mfres)>0) * (parcel.mix_split_id > 0)'], dataset_pool=self.dataset_pool) pcl_ids = parcels.get_id_attribute()[parcels['mu_ind'] > 0] egligible_props = logical_and(self.weight > 0, logical_and( self.proposal_set['status_id'] == self.proposal_set.id_tentative, self.proposal_set['is_mfres']>0)) where_prop_to_modify = where(logical_and(egligible_props, in1d(self.proposal_set['parcel_id'], pcl_ids)))[0] if where_prop_to_modify.size <= 0: return upcl = unique(self.proposal_set['parcel_id'][where_prop_to_modify]) npcl_to_modify = int(upcl.size/10.0) if npcl_to_modify == 0: return pcls_to_modify = sample_noreplace(upcl, npcl_to_modify) where_prop_to_modify_final = where(logical_and(egligible_props, in1d(self.proposal_set['parcel_id'], pcls_to_modify)))[0] trans_weights = self.weight[where_prop_to_modify_final] if transpose_interpcl_weight: trans_weights = log(trans_weights) #trans_weights = 1.2*trans_weights if transpose_interpcl_weight: trans_weights = exp(trans_weights) self.weight[where_prop_to_modify_final] = trans_weights return
def run(self, year, business_set, control_totals, data_objects=None, resources=None): business_id_name = business_set.get_id_name()[0] control_totals.get_attribute("total_number_of_businesses") idx = where(control_totals.get_attribute("year") == year) sectors = unique( control_totals.get_attribute_by_index("building_use_id", idx)) max_id = business_set.get_id_attribute().max() business_size = business_set.size() new_businesses = { self.location_id_name: array([], dtype='int32'), "building_use_id": array([], dtype='int32'), business_id_name: array([], dtype='int32'), "sqft": array([], dtype=int32), "employees": array([], dtype=int32), } compute_resources = Resources(data_objects) # compute_resources.merge({job_building_types.get_dataset_name():job_building_types, "debug":self.debug}) business_set.compute_variables(map( lambda x: "%s.%s.is_sector_%s" % (self.variable_package, business_set.get_dataset_name(), x), sectors), resources=compute_resources) remove_businesses = array([], dtype='int32') for sector in sectors: total_businesses = control_totals.get_data_element_by_id( (year, sector)).total_number_of_businesses is_in_sector = business_set.get_attribute("is_sector_%s" % sector) diff = int(total_businesses - is_in_sector.astype(int8).sum()) if diff < 0: # w = where(is_in_sector == 1)[0] sample_array, non_placed, size_non_placed = \ get_array_without_non_placed_agents(business_set, w, -1*diff, self.location_id_name) remove_businesses = concatenate( (remove_businesses, non_placed, sample_noreplace(sample_array, max(0, abs(diff) - size_non_placed)))) if diff > 0: # new_businesses[self.location_id_name] = concatenate( (new_businesses[self.location_id_name], zeros((diff, ), dtype="int32"))) new_businesses["building_use_id"] = concatenate( (new_businesses["building_use_id"], sector * ones( (diff, ), dtype="int32"))) available_business_index = where(is_in_sector)[0] sampled_business = probsample_replace(available_business_index, diff, None) new_businesses["sqft"] = concatenate( (new_businesses["sqft"], business_set.get_attribute("sqft")[sampled_business])) new_businesses["employees"] = concatenate(( new_businesses["employees"], business_set.get_attribute("employees")[sampled_business])) new_max_id = max_id + diff new_businesses[business_id_name] = concatenate( (new_businesses[business_id_name], arange(max_id + 1, new_max_id + 1))) max_id = new_max_id business_set.remove_elements(remove_businesses) business_set.add_elements(new_businesses, require_all_attributes=False) difference = business_set.size() - business_size self.debug.print_debug( "Difference in number of businesses: %s (original %s," " new %s, created %s, deleted %s)" % (difference, business_size, business_set.size(), new_businesses[business_id_name].size, remove_businesses.size), 3) self.debug.print_debug( "Number of unplaced businesses: %s" % where(business_set.get_attribute(self.location_id_name) <= 0) [0].size, 3) return difference
def run(self, in_storage, out_storage, business_table="business", jobs_table="jobs", control_totals_table=None): logger.log_status("Unrolling %s table." % business_table) # get attributes from the establisments table business_dataset = BusinessDataset(in_storage=in_storage, in_table_name=business_table) business_sizes = business_dataset.get_attribute(self.number_of_jobs_attr).astype("int32") sectors = business_dataset.get_attribute("sector_id") tazes = business_dataset.get_attribute(self.geography_id_attr).astype("int32") building_ids = array([], dtype='int32') if "building_id" in business_dataset.get_primary_attribute_names(): building_ids = business_dataset.get_attribute("building_id") parcel_ids = array([], dtype='int32') if "parcel_id" in business_dataset.get_primary_attribute_names(): parcel_ids = business_dataset.get_attribute("parcel_id") home_based = array([], dtype='int16') if "home_based" in business_dataset.get_primary_attribute_names(): home_based = business_dataset.get_attribute("home_based") building_sqft = business_dataset.get_attribute(self.sqft_attr) building_sqft[building_sqft <= 0] = 0 join_flags = None if "join_flag" in business_dataset.get_primary_attribute_names(): join_flags = business_dataset.get_attribute("join_flag") impute_sqft_flag = None if "impute_building_sqft_flag" in business_dataset.get_primary_attribute_names(): impute_sqft_flag = business_dataset.get_attribute("impute_building_sqft_flag") # inititalize jobs attributes total_size = business_sizes.sum() jobs_data = {} jobs_data["sector_id"] = resize(array([-1], dtype=sectors.dtype), total_size) jobs_data["building_id"] = resize(array([-1], dtype=building_ids.dtype), total_size) jobs_data["parcel_id"] = resize(array([-1], dtype=parcel_ids.dtype), total_size) jobs_data[self.geography_id_attr] = resize(array([-1], dtype=tazes.dtype), total_size) jobs_data["building_type"] = resize(array([-1], dtype=home_based.dtype), total_size) jobs_data["sqft"] = resize(array([], dtype=building_sqft.dtype), total_size) if join_flags is not None: jobs_data["join_flag"] = resize(array([], dtype=join_flags.dtype), total_size) if impute_sqft_flag is not None: jobs_data["impute_building_sqft_flag"] = resize(array([], dtype=impute_sqft_flag.dtype), total_size) indices = cumsum(business_sizes) # iterate over establishments. For each business create the corresponding number of jobs by filling the corresponding part # of the arrays start_index=0 for i in range(business_dataset.size()): end_index = indices[i] jobs_data["sector_id"][start_index:end_index] = sectors[i] if building_ids.size > 0: jobs_data["building_id"][start_index:end_index] = building_ids[i] if parcel_ids.size > 0: jobs_data["parcel_id"][start_index:end_index] = parcel_ids[i] jobs_data[self.geography_id_attr][start_index:end_index] = tazes[i] if home_based.size > 0: jobs_data["building_type"][start_index:end_index] = home_based[i] if self.compute_sqft_per_job: jobs_data["sqft"][start_index:end_index] = round((building_sqft[i]-building_sqft[i]/10.0)/float(business_sizes[i])) # sqft per employee else: jobs_data["sqft"][start_index:end_index] = building_sqft[i] if join_flags is not None: jobs_data["join_flag"][start_index:end_index] = join_flags[i] if impute_sqft_flag is not None: jobs_data["impute_building_sqft_flag"][start_index:end_index] = impute_sqft_flag[i] start_index = end_index jobs_data["job_id"] = arange(total_size)+1 if self.compute_sqft_per_job: jobs_data["sqft"] = clip(jobs_data["sqft"], 0, self.maximum_sqft) jobs_data["sqft"][logical_and(jobs_data["sqft"]>0, jobs_data["sqft"]<self.minimum_sqft)] = self.minimum_sqft # correct missing job_building_types wmissing_bt = where(jobs_data["building_type"]<=0)[0] if wmissing_bt.size > 0: jobs_data["building_type"][wmissing_bt] = 2 # assign non-homebased type for now. It can be re-classified in the assign_bldgs_to_jobs... script # create jobs table and write it out storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name="jobs", table_data=jobs_data ) job_dataset = JobDataset(in_storage=storage) if self.unplace_jobs_with_non_existing_buildings: self.do_unplace_jobs_with_non_existing_buildings(job_dataset, out_storage) # Match to control totals (only eliminate jobs if control totals are smaller than the actual number of jobs). if control_totals_table is not None: logger.log_status("Matching to control totals.") control_totals = ControlTotalDataset(what='employment', id_name=['zone_id', 'sector_id'], in_table_name=control_totals_table, in_storage=in_storage) control_totals.load_dataset(attributes=['zone_id', 'sector_id', 'jobs']) zones_sectors = control_totals.get_id_attribute() njobs = control_totals.get_attribute('jobs') remove = array([], dtype='int32') for i in range(zones_sectors.shape[0]): zone, sector = zones_sectors[i,:] in_sector = job_dataset.get_attribute("sector_id") == sector in_zone_in_sector = logical_and(in_sector, job_dataset.get_attribute("zone_id") == zone) if in_zone_in_sector.sum() <= njobs[i]: continue to_be_removed = in_zone_in_sector.sum() - njobs[i] this_removal = 0 not_considered = ones(job_dataset.size(), dtype='bool8') for unit in ['parcel_id', 'building_id', None]: # first consider jobs without parcel id, then without building_id, then all if unit is not None: wnunit = job_dataset.get_attribute(unit) <= 0 eligible = logical_and(not_considered, logical_and(in_zone_in_sector, wnunit)) not_considered[where(wnunit)] = False else: eligible = logical_and(not_considered, in_zone_in_sector) eligible_sum = eligible.sum() if eligible_sum > 0: where_eligible = where(eligible)[0] if eligible_sum <= to_be_removed-this_removal: draw = arange(eligible_sum) else: draw = sample_noreplace(where_eligible, to_be_removed-this_removal, eligible_sum) remove = concatenate((remove, where_eligible[draw])) this_removal += draw.size if this_removal >= to_be_removed: break job_dataset.remove_elements(remove) logger.log_status("%s jobs removed." % remove.size) logger.log_status("Write jobs table.") job_dataset.write_dataset(out_table_name=jobs_table, out_storage=out_storage) logger.log_status("Created %s jobs." % job_dataset.size())
def run(self, job_dataset, dataset_pool, out_storage=None, jobs_table="jobs"): """ Algorithm: 1. For all non_home_based jobs that have parcel_id assigned but no building_id, try to choose a building from all buildings in that parcel. Draw the building with probabilities given by the sector-building_type distribution. The job sizes are fitted into the available space (the attribute job.sqft is updated). 2. For all non_home_based jobs for which no building was found in step 1, check if the parcel has residential buildings. In such a case, re-assign the jobs to be home-based. Otherwise, if sum of non_residential_sqft over the involved buildings is 0, for all jobs that have impute_building_sqft_flag=True draw a building using the sector-building_type distribution and impute the corresponding sqft to the non_residential_sqft of that building. 3. For all home_based jobs that have parcel_id assigned but no building_id, try to choose a building from all buildings in that parcel. The capacity of a single-family building is determined from sizes of the households living there (for each household the minimum of number of members and 2 is taken). For multi-family buildings the capacity is 50. 4. Assign a building type to jobs that have missing building type. It is sampled from the regional-wide distribution of home based and non-home based jobs. 5. Update the table 'building_sqft_per_job' using the updated job.sqft. 'in_storage' should contain the jobs table and the zone_averages_table. The 'dataset_pool_storage' should contain all other tables needed (buildings, households, building_types). """ parcel_ids = job_dataset.get_attribute("parcel_id") building_ids = job_dataset.get_attribute("building_id") building_types = job_dataset.get_attribute("building_type") try: impute_sqft_flags = job_dataset.get_attribute("impute_building_sqft_flag") except: impute_sqft_flags = zeros(job_dataset.size()) is_considered = logical_and(parcel_ids > 0, building_ids <= 0) # jobs that have assigned parcel but not building job_index_home_based = where(logical_and(is_considered, building_types == 1))[0] job_index_governmental = where(logical_and(is_considered, building_types == 3))[0] building_dataset = dataset_pool.get_dataset('building') parcel_ids_in_bldgs = building_dataset.get_attribute("parcel_id") bldg_ids_in_bldgs = building_dataset.get_id_attribute() bldg_types_in_bldgs = building_dataset.get_attribute("building_type_id") non_res_sqft = building_dataset.get_attribute("non_residential_sqft") occupied = building_dataset.compute_variables(["urbansim_parcel.building.occupied_building_sqft_by_jobs"], dataset_pool=dataset_pool) is_governmental = building_dataset.compute_variables(["building.disaggregate(building_type.generic_building_type_id == 7)"], dataset_pool=dataset_pool) # assign buildings to governmental jobs randomly unique_parcels = unique(parcel_ids[job_index_governmental]) logger.log_status("Placing governmental jobs ...") for parcel in unique_parcels: idx_in_bldgs = where(parcel_ids_in_bldgs[is_governmental] == parcel)[0] if idx_in_bldgs.size <= 0: continue idx_in_jobs = where(parcel_ids[job_index_governmental] == parcel)[0] draw = sample_replace(idx_in_bldgs, idx_in_jobs.size) building_ids[job_index_governmental[idx_in_jobs]] = bldg_ids_in_bldgs[where(is_governmental)[0][draw]] logger.log_status("%s governmental jobs (out of %s gov. jobs) were placed." % ( (building_ids[job_index_governmental]>0).sum(), job_index_governmental.size)) logger.log_status("The not-placed governmental jobs will be added to the non-home based jobs.") # consider the unplaced governmental jobs together with other non-home-based jobs is_now_considered = logical_and(is_considered, building_ids <= 0) job_index_non_home_based = where(logical_and(is_now_considered, logical_or(building_types == 2, building_types == 3)))[0] # assign buildings to non_home_based jobs based on available space unique_parcels = unique(parcel_ids[job_index_non_home_based]) job_building_types = job_dataset.compute_variables(["bldgs_building_type_id = job.disaggregate(building.building_type_id)"], dataset_pool=dataset_pool) where_valid_jbt = where(logical_and(job_building_types>0, logical_or(building_types == 2, building_types==3)))[0] building_type_dataset = dataset_pool.get_dataset("building_type") available_building_types= building_type_dataset.get_id_attribute() idx_available_bt = building_type_dataset.get_id_index(available_building_types) sectors = job_dataset.get_attribute("sector_id") unique_sectors = unique(sectors) sector_bt_distribution = zeros((unique_sectors.size, building_type_dataset.size()), dtype="float32") jobs_sqft = job_dataset.get_attribute_by_index("sqft", job_index_non_home_based).astype("float32") job_dataset._compute_if_needed("urbansim_parcel.job.zone_id", dataset_pool=dataset_pool) jobs_zones = job_dataset.get_attribute_by_index("zone_id", job_index_non_home_based) new_jobs_sqft = job_dataset.get_attribute("sqft").copy() # find sector -> building_type distribution sector_index_mapping = {} for isector in range(unique_sectors.size): idx = where(sectors[where_valid_jbt]==unique_sectors[isector])[0] if idx.size == 0: continue o = ones(idx.size, dtype="int32") sector_bt_distribution[isector,:] = ndimage_sum(o, labels=job_building_types[where_valid_jbt[idx]], index=available_building_types) sector_bt_distribution[isector,:] = sector_bt_distribution[isector,:]/sector_bt_distribution[isector,:].sum() sector_index_mapping[unique_sectors[isector]] = isector # create a lookup table for zonal average per building type of sqft per employee zone_average_dataset = dataset_pool.get_dataset("building_sqft_per_job") zone_bt_lookup = zone_average_dataset.get_building_sqft_as_table(job_dataset.get_attribute("zone_id").max(), available_building_types.max()) counter_zero_capacity = 0 counter_zero_distr = 0 # iterate over parcels logger.log_status("Placing non-home-based jobs ...") for parcel in unique_parcels: idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0] if idx_in_bldgs.size <= 0: continue idx_in_jobs = where(parcel_ids[job_index_non_home_based] == parcel)[0] capacity = maximum(non_res_sqft[idx_in_bldgs] - occupied[idx_in_bldgs],0) #capacity = non_res_sqft[idx_in_bldgs] - occupied[idx_in_bldgs] if capacity.sum() <= 0: counter_zero_capacity += idx_in_jobs.size continue this_jobs_sectors = sectors[job_index_non_home_based][idx_in_jobs] this_jobs_sqft_table = resize(jobs_sqft[idx_in_jobs], (idx_in_bldgs.size, idx_in_jobs.size)) wn = jobs_sqft[idx_in_jobs] <= 0 for i in range(idx_in_bldgs.size): this_jobs_sqft_table[i, where(wn)[0]] = zone_bt_lookup[jobs_zones[idx_in_jobs[wn]], bldg_types_in_bldgs[idx_in_bldgs[i]]] supply_demand_ratio = (resize(capacity, (capacity.size, 1))/this_jobs_sqft_table.astype("float32").sum(axis=0))/float(idx_in_jobs.size)*0.9 if any(supply_demand_ratio < 1): # correct only if supply is smaller than demand this_jobs_sqft_table = this_jobs_sqft_table * supply_demand_ratio probcomb = zeros(this_jobs_sqft_table.shape) bt = bldg_types_in_bldgs[idx_in_bldgs] ibt = building_type_dataset.get_id_index(bt) for i in range(probcomb.shape[0]): for j in range(probcomb.shape[1]): probcomb[i,j] = sector_bt_distribution[sector_index_mapping[this_jobs_sectors[j]],ibt[i]] pcs = probcomb.sum(axis=0) probcomb = probcomb/pcs wz = where(pcs<=0)[0] counter_zero_distr += wz.size probcomb[:, wz] = 0 # to avoid nan values taken = zeros(capacity.shape) has_sqft = this_jobs_sqft_table > 0 while True: if (has_sqft * probcomb).sum() <= 0: break req = (this_jobs_sqft_table * probcomb).sum(axis=0) maxi = req.max() wmaxi = where(req==maxi)[0] drawjob = sample_noreplace(arange(wmaxi.size), 1) # draw job from jobs with the maximum size imax_req = wmaxi[drawjob] weights = has_sqft[:,imax_req] * probcomb[:,imax_req] draw = probsample_noreplace(arange(probcomb.shape[0]), 1, resize(weights/weights.sum(), (probcomb.shape[0],))) if (taken[draw] + this_jobs_sqft_table[draw,imax_req]) > capacity[draw]: probcomb[draw,imax_req]=0 continue taken[draw] = taken[draw] + this_jobs_sqft_table[draw,imax_req] building_ids[job_index_non_home_based[idx_in_jobs[imax_req]]] = bldg_ids_in_bldgs[idx_in_bldgs[draw]] probcomb[:,imax_req] = 0 new_jobs_sqft[job_index_non_home_based[idx_in_jobs[imax_req]]] = int(min(self.maximum_sqft, max(round(this_jobs_sqft_table[draw,imax_req]), self.minimum_sqft))) logger.log_status("%s non home based jobs (out of %s nhb jobs) were placed." % ( (building_ids[job_index_non_home_based]>0).sum(), job_index_non_home_based.size)) logger.log_status("Unplaced due to zero capacity: %s" % counter_zero_capacity) logger.log_status("Unplaced due to zero distribution: %s" % counter_zero_distr) job_dataset.modify_attribute(name="building_id", data = building_ids) # re-classify unplaced non-home based jobs to home-based if parcels contain residential buildings bldgs_is_residential = logical_and(logical_not(is_governmental), building_dataset.compute_variables(["urbansim_parcel.building.is_residential"], dataset_pool=dataset_pool)) is_now_considered = logical_and(parcel_ids > 0, building_ids <= 0) job_index_non_home_based_unplaced = where(logical_and(is_now_considered, building_types == 2))[0] unique_parcels = unique(parcel_ids[job_index_non_home_based_unplaced]) imputed_sqft = 0 logger.log_status("Try to reclassify non-home-based jobs (excluding governemtal jobs) ...") for parcel in unique_parcels: idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0] if idx_in_bldgs.size <= 0: continue idx_in_jobs = where(parcel_ids[job_index_non_home_based_unplaced] == parcel)[0] where_residential = where(bldgs_is_residential[idx_in_bldgs])[0] if where_residential.size > 0: building_types[job_index_non_home_based_unplaced[idx_in_jobs]] = 1 # set to home-based jobs elif non_res_sqft[idx_in_bldgs].sum() <= 0: # impute non_residential_sqft and assign buildings this_jobs_sectors = sectors[job_index_non_home_based_unplaced][idx_in_jobs] this_jobs_sqft_table = resize(jobs_sqft[idx_in_jobs], (idx_in_bldgs.size, idx_in_jobs.size)) wn = jobs_sqft[idx_in_jobs] <= 0 for i in range(idx_in_bldgs.size): this_jobs_sqft_table[i, where(wn)[0]] = zone_bt_lookup[jobs_zones[idx_in_jobs[wn]], bldg_types_in_bldgs[idx_in_bldgs[i]]] probcomb = zeros(this_jobs_sqft_table.shape) bt = bldg_types_in_bldgs[idx_in_bldgs] ibt = building_type_dataset.get_id_index(bt) for i in range(probcomb.shape[0]): for j in range(probcomb.shape[1]): probcomb[i,j] = sector_bt_distribution[sector_index_mapping[this_jobs_sectors[j]],ibt[i]] for ijob in range(probcomb.shape[1]): if (probcomb[:,ijob].sum() <= 0) or (impute_sqft_flags[job_index_non_home_based_unplaced[ijob]] == 0): continue weights = probcomb[:,ijob] draw = probsample_noreplace(arange(probcomb.shape[0]), 1, resize(weights/weights.sum(), (probcomb.shape[0],))) non_res_sqft[idx_in_bldgs[draw]] += this_jobs_sqft_table[draw,ijob] imputed_sqft += this_jobs_sqft_table[draw,ijob] building_ids[job_index_non_home_based_unplaced[idx_in_jobs[ijob]]] = bldg_ids_in_bldgs[idx_in_bldgs[draw]] new_jobs_sqft[job_index_non_home_based[idx_in_jobs[ijob]]] = int(min(self.maximum_sqft, max(round(this_jobs_sqft_table[draw,ijob]), self.minimum_sqft))) building_dataset.modify_attribute(name="non_residential_sqft", data = non_res_sqft) job_dataset.modify_attribute(name="building_id", data = building_ids) job_dataset.modify_attribute(name="building_type", data = building_types) job_dataset.modify_attribute(name="sqft", data = new_jobs_sqft) old_nhb_size = job_index_non_home_based.size job_index_home_based = where(logical_and(is_considered, building_types == 1))[0] job_index_non_home_based = where(logical_and(is_considered, building_types == 2))[0] logger.log_status("%s non-home based jobs reclassified as home-based." % (old_nhb_size-job_index_non_home_based.size)) logger.log_status("%s non-residential sqft imputed." % imputed_sqft) logger.log_status("Additionaly, %s non home based jobs were placed due to imputed sqft." % \ (building_ids[job_index_non_home_based_unplaced]>0).sum()) # home_based jobs unique_parcels = unique(parcel_ids[job_index_home_based]) capacity_in_buildings = building_dataset.compute_variables([ "urbansim_parcel.building.vacant_home_based_job_space"], dataset_pool=dataset_pool) parcels_with_exceeded_capacity = [] # iterate over parcels logger.log_status("Placing home-based jobs ...") for parcel in unique_parcels: idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0] idx_in_jobs = where(parcel_ids[job_index_home_based] == parcel)[0] capacity = capacity_in_buildings[idx_in_bldgs] if capacity.sum() <= 0: continue probcomb = ones((idx_in_bldgs.size, idx_in_jobs.size)) taken = zeros(capacity.shape, dtype="int32") while True: zero_cap = where((capacity - taken) <= 0)[0] probcomb[zero_cap,:] = 0 if probcomb.sum() <= 0: break req = probcomb.sum(axis=0) wmaxi = where(req==req.max())[0] drawjob = sample_noreplace(arange(wmaxi.size), 1) # draw job from available jobs imax_req = wmaxi[drawjob] weights = probcomb[:,imax_req] # sample building draw = probsample_noreplace(arange(probcomb.shape[0]), 1, resize(weights/weights.sum(), (probcomb.shape[0],))) taken[draw] = taken[draw] + 1 building_ids[job_index_home_based[idx_in_jobs[imax_req]]] = bldg_ids_in_bldgs[idx_in_bldgs[draw]] probcomb[:,imax_req] = 0 if -1 in building_ids[job_index_home_based[idx_in_jobs]]: parcels_with_exceeded_capacity.append(parcel) parcels_with_exceeded_capacity = array(parcels_with_exceeded_capacity) logger.log_status("%s home based jobs (out of %s hb jobs) were placed." % ((building_ids[job_index_home_based]>0).sum(), job_index_home_based.size)) # assign building type where missing # determine regional distribution idx_home_based = where(building_types == 1)[0] idx_non_home_based = where(building_types == 2)[0] idx_bt_missing = where(building_types <= 0)[0] if idx_bt_missing.size > 0: # sample building types sample_bt = probsample_replace(array([1,2]), idx_bt_missing.size, array([idx_home_based.size, idx_non_home_based.size])/float(idx_home_based.size + idx_non_home_based.size)) # coerce to int32 (on a 64 bit machine, sample_bt will be of type int64) building_types[idx_bt_missing] = sample_bt.astype(int32) job_dataset.modify_attribute(name="building_type", data = building_types) if out_storage is not None: job_dataset.write_dataset(out_table_name=jobs_table, out_storage=out_storage, attributes=AttributeType.PRIMARY) building_dataset.write_dataset(out_table_name='buildings', out_storage=out_storage, attributes=AttributeType.PRIMARY) logger.log_status("Assigning building_id to jobs done.")
def estimate(self, specification, dataset, outcome_attribute, index = None, procedure=None, data_objects=None, estimate_config=None, debuglevel=0): """'specification' is of type EquationSpecification, 'dataset' is of type Dataset, 'outcome_attribute' - string that determines the dependent variable, 'index' are indices of individuals in dataset for which the model runs. If it is None, the whole dataset is considered. 'procedure' - name of the estimation procedure. If it is None, there should be an entry "estimation" in 'estimate_config' that determines the procedure. The class must have a method 'run' that takes as arguments 'data', 'regression_procedure' and 'resources'. It returns a dictionary with entries 'estimators', 'standard_errors' and 't_values' (all 1D numpy arrays). 'data_objects' is a dictionary where each key is the name of an data object ('zone', ...) and its value is an object of class Dataset. 'estimate_config' is of type Resources, it gives additional arguments for the estimation procedure. 'debuglevel' overwrites the class 'debuglevel'. """ #import wingdbstub self.debug.flag = debuglevel if estimate_config == None: estimate_config = Resources() if not isinstance(estimate_config,Resources) and isinstance(estimate_config, dict): estimate_config = Resources(estimate_config) self.estimate_config = estimate_config.merge_with_defaults(self.estimate_config) if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) self.procedure=procedure if self.procedure == None: self.procedure = self.estimate_config.get("estimation", None) if self.procedure is not None: self.procedure = ModelComponentCreator().get_model_component(self.procedure) else: logger.log_warning("No estimation procedure given, or problems with loading the corresponding module.") compute_resources = Resources({"debug":self.debug}) if dataset.size()<=0: # no data loaded yet dataset.get_id_attribute() if index == None: index = arange(dataset.size()) if not isinstance(index,ndarray): index=array(index) estimation_size_agents = self.estimate_config.get("estimation_size_agents", None) # should be a proportion of the agent_set if estimation_size_agents == None: estimation_size_agents = 1.0 else: estimation_size_agents = max(min(estimation_size_agents,1.0),0.0) # between 0 and 1 if estimation_size_agents < 1.0: self.debug.print_debug("Sampling agents for estimation ...",3) estimation_idx = sample_noreplace(arange(index.size), int(index.size*estimation_size_agents)) else: estimation_idx = arange(index.size) estimation_idx = index[estimation_idx] self.debug.print_debug("Number of observations for estimation: " + str(estimation_idx.size),2) if estimation_idx.size <= 0: self.debug.print_debug("Nothing to be done.",2) return (None, None) coefficients = create_coefficient_from_specification(specification) self.specified_coefficients = SpecifiedCoefficients().create(coefficients, specification, neqs=1) submodels = self.specified_coefficients.get_submodels() self.get_status_for_gui().update_pieces_using_submodels(submodels=submodels, leave_pieces=2) self.map_agents_to_submodels(submodels, self.submodel_string, dataset, estimation_idx, dataset_pool=self.dataset_pool, resources = compute_resources, submodel_size_max=self.estimate_config.get('submodel_size_max', None)) variables = self.specified_coefficients.get_full_variable_names_without_constants() self.debug.print_debug("Compute variables ...",4) self.increment_current_status_piece() dataset.compute_variables(variables, dataset_pool=self.dataset_pool, resources = compute_resources) coef = {} estimated_coef={} self.outcome = {} dataset.compute_variables([outcome_attribute], dataset_pool=self.dataset_pool, resources=compute_resources) regression_resources=Resources(estimate_config) regression_resources.merge({"debug":self.debug}) outcome_variable_name = VariableName(outcome_attribute) for submodel in submodels: coef[submodel] = SpecifiedCoefficientsFor1Submodel(self.specified_coefficients,submodel) self.increment_current_status_piece() logger.log_status("Estimate regression for submodel " +str(submodel), tags=["estimate"], verbosity_level=2) #logger.log_status("Number of observations: " +str(self.observations_mapping[submodel].size), #tags=["estimate"], verbosity_level=2) self.data[submodel] = dataset.create_regression_data_for_estimation(coef[submodel], index = estimation_idx[self.observations_mapping[submodel]]) self.coefficient_names[submodel] = coef[submodel].get_coefficient_names_without_constant()[0,:] if (self.data[submodel].shape[0] > 0) and (self.data[submodel].size > 0) and (self.procedure is not None): # observations for this submodel available self.outcome[submodel] = dataset.get_attribute_by_index(outcome_variable_name.get_alias(), estimation_idx[self.observations_mapping[submodel]]) regression_resources.merge({"outcome": self.outcome[submodel]}) regression_resources.merge({"coefficient_names":self.coefficient_names[submodel].tolist(), "constant_position": coef[submodel].get_constants_positions()}) regression_resources.merge({"submodel": submodel}) estimated_coef[submodel] = self.procedure.run(self.data[submodel], self.regression, resources=regression_resources) if "estimators" in estimated_coef[submodel].keys(): coef[submodel].set_coefficient_values(estimated_coef[submodel]["estimators"]) if "standard_errors" in estimated_coef[submodel].keys(): coef[submodel].set_standard_errors(estimated_coef[submodel]["standard_errors"]) if "other_measures" in estimated_coef[submodel].keys(): for measure in estimated_coef[submodel]["other_measures"].keys(): coef[submodel].set_measure(measure, estimated_coef[submodel]["other_measures"][measure]) if "other_info" in estimated_coef[submodel].keys(): for info in estimated_coef[submodel]["other_info"]: coef[submodel].set_other_info(info, estimated_coef[submodel]["other_info"][info]) coefficients.fill_coefficients(coef) self.specified_coefficients.coefficients = coefficients self.save_predicted_values_and_errors(specification, coefficients, dataset, outcome_variable_name, index=index, data_objects=data_objects) return (coefficients, estimated_coef)
def _do_run_for_this_year(self, job_set): building_type = job_set.get_attribute("building_type") sectors = unique(self.control_totals_for_this_year.get_attribute("sector_id")) self._compute_sector_variables(sectors, job_set) for sector in sectors: isector = where(self.control_totals_for_this_year.get_attribute("sector_id") == sector)[0] total_hb_jobs = self.control_totals_for_this_year.get_attribute("total_home_based_employment")[isector] total_nhb_jobs = self.control_totals_for_this_year.get_attribute("total_non_home_based_employment")[isector] is_in_sector_hb = job_set.get_attribute("is_in_employment_sector_%s_home_based" % sector) is_in_sector_nhb = job_set.get_attribute("is_in_employment_sector_%s_non_home_based" % sector) diff_hb = int(total_hb_jobs - is_in_sector_hb.astype(int8).sum()) diff_nhb = int(total_nhb_jobs - is_in_sector_nhb.astype(int8).sum()) if diff_hb < 0: # home based jobs to be removed w = where(is_in_sector_hb == 1)[0] sample_array, non_placed, size_non_placed = \ get_array_without_non_placed_agents(job_set, w, -1*diff_hb, self.location_id_name) self.remove_jobs = concatenate((self.remove_jobs, non_placed, sample_noreplace(sample_array, max(0,abs(diff_hb)-size_non_placed)))) if diff_nhb < 0: # non home based jobs to be removed w = where(is_in_sector_nhb == 1)[0] sample_array, non_placed, size_non_placed = \ get_array_without_non_placed_agents(job_set, w, -1*diff_nhb, self.location_id_name) self.remove_jobs = concatenate((self.remove_jobs, non_placed, sample_noreplace(sample_array, max(0,abs(diff_nhb)-size_non_placed)))) if diff_hb > 0: # home based jobs to be created self.new_jobs[self.location_id_name] = concatenate((self.new_jobs[self.location_id_name], zeros((diff_hb,), dtype=self.new_jobs[self.location_id_name].dtype.type))) self.new_jobs["sector_id"] = concatenate((self.new_jobs["sector_id"], (resize(array([sector], dtype=self.new_jobs["sector_id"].dtype.type), diff_hb)))) if 1 in is_in_sector_hb: building_type_distribution = array(ndimage_sum(is_in_sector_hb, labels=building_type, index=self.available_building_types)) elif 1 in job_set.get_attribute("is_home_based_job"): # take the building type distribution from the whole region building_type_distribution = array(ndimage_sum( job_set.get_attribute("is_home_based_job"), labels=building_type, index=self.available_building_types)) else: # there are no home-based jobs in the region, take uniform distribution building_type_distribution = ones(self.available_building_types.size) building_type_distribution = building_type_distribution/building_type_distribution.sum() sampled_building_types = probsample_replace( self.available_building_types, diff_hb, building_type_distribution/ float(building_type_distribution.sum())) self.new_jobs["building_type"] = concatenate((self.new_jobs["building_type"], sampled_building_types.astype(self.new_jobs["building_type"].dtype.type))) new_max_id = self.max_id + diff_hb self.new_jobs[self.job_id_name] = concatenate((self.new_jobs[self.job_id_name], arange(self.max_id+1, new_max_id+1))) self.max_id = new_max_id if diff_nhb > 0: # non home based jobs to be created self.new_jobs[self.location_id_name]=concatenate((self.new_jobs[self.location_id_name], zeros((diff_nhb,), dtype=self.new_jobs[self.location_id_name].dtype.type))) self.new_jobs["sector_id"]=concatenate((self.new_jobs["sector_id"], (resize(array([sector], dtype=self.new_jobs["sector_id"].dtype.type), diff_nhb)))) if 1 in is_in_sector_nhb: building_type_distribution = array(ndimage_sum(is_in_sector_nhb, labels=building_type, index=self.available_building_types)) elif 1 in job_set.get_attribute("is_non_home_based_job"): # take the building type distribution from the whole region building_type_distribution = array(ndimage_sum( job_set.get_attribute("is_non_home_based_job"), labels=building_type, index=self.available_building_types)) else: # there are no non-home-based jobs in the region, take uniform distribution building_type_distribution = ones(self.available_building_types.size) building_type_distribution = building_type_distribution/building_type_distribution.sum() sampled_building_types = probsample_replace( self.available_building_types, diff_nhb, building_type_distribution/ float(building_type_distribution.sum())) self.new_jobs["building_type"] = concatenate((self.new_jobs["building_type"], sampled_building_types.astype(self.new_jobs["building_type"].dtype.type))) new_max_id = self.max_id+diff_nhb self.new_jobs[self.job_id_name]=concatenate((self.new_jobs[self.job_id_name], arange(self.max_id+1, new_max_id+1))) self.max_id = new_max_id
class HouseholdTransitionModel(Model): """Creates and removes households from household_set. New households are duplicated from the existing households, keeping the joint distribution of all characteristics. """ model_name = "Household Transition Model" def __init__(self, location_id_name="grid_id", dataset_pool=None, debuglevel=0): self.debug = DebugPrinter(debuglevel) self.location_id_name = location_id_name self.dataset_pool = self.create_dataset_pool(dataset_pool, ["urbansim", "opus_core"]) def run(self, year, household_set, control_totals, characteristics, resources=None): self._do_initialize_for_run(household_set) control_totals.get_attribute( "total_number_of_households") # to make sure they are loaded self.characteristics = characteristics self.all_categories = self.characteristics.get_attribute( "characteristic") self.all_categories = array( map(lambda x: x.lower(), self.all_categories)) self.scaled_characteristic_names = get_distinct_names( self.all_categories).tolist() self.marginal_characteristic_names = copy(control_totals.get_id_name()) index_year = self.marginal_characteristic_names.index("year") self.marginal_characteristic_names.remove("year") idx = where(control_totals.get_attribute("year") == year)[0] self.control_totals_for_this_year = DatasetSubset(control_totals, idx) self._do_run_for_this_year(household_set) return self._update_household_set(household_set) def _update_household_set(self, household_set): index_of_duplicated_hhs = household_set.duplicate_rows( self.mapping_existing_hhs_to_new_hhs) household_set.modify_attribute( name=self.location_id_name, data=-1 * ones( (index_of_duplicated_hhs.size, ), dtype=household_set.get_data_type(self.location_id_name)), index=index_of_duplicated_hhs) household_set.remove_elements(self.remove_households) if self.new_households[self.location_id_name].size > 0: max_id = household_set.get_id_attribute().max() self.new_households[self.household_id_name] = concatenate( (self.new_households[self.household_id_name], arange( max_id + 1, max_id + self.new_households[self.location_id_name].size + 1))) household_set.add_elements(self.new_households, require_all_attributes=False) difference = household_set.size() - self.household_size self.debug.print_debug( "Difference in number of households: %s" " (original %s, new %s, created %s, deleted %s)" % (difference, self.household_size, household_set.size(), self.new_households[self.household_id_name].size + self.mapping_existing_hhs_to_new_hhs.size, self.remove_households.size), 3) if self.location_id_name in household_set.get_attribute_names(): self.debug.print_debug( "Number of unplaced households: %s" % where(household_set.get_attribute(self.location_id_name) <= 0) [0].size, 3) return difference def _do_initialize_for_run(self, household_set): self.household_id_name = household_set.get_id_name()[0] self.new_households = { self.location_id_name: array([], dtype=household_set.get_data_type(self.location_id_name, int32)), self.household_id_name: array([], dtype=household_set.get_data_type(self.household_id_name, int32)) } self.remove_households = array([], dtype='int32') self.household_size = household_set.size() self.max_id = household_set.get_id_attribute().max() self.arrays_from_categories = {} self.arrays_from_categories_mapping = {} self.mapping_existing_hhs_to_new_hhs = array( [], dtype=household_set.get_data_type(self.household_id_name, int32)) def _do_run_for_this_year(self, household_set): self.household_set = household_set groups = self.control_totals_for_this_year.get_id_attribute() self.create_arrays_from_categories(self.household_set) all_characteristics = self.arrays_from_categories.keys() self.household_set.load_dataset_if_not_loaded( attributes=all_characteristics ) # prevents from lazy loading to save runtime idx_shape = [] number_of_combinations = 1 num_attributes = len(all_characteristics) for iattr in range(num_attributes): attr = all_characteristics[iattr] max_bins = self.arrays_from_categories[attr].max() + 1 idx_shape.append(max_bins) number_of_combinations = number_of_combinations * max_bins if attr not in self.new_households.keys(): self.new_households[attr] = array( [], dtype=self.household_set.get_data_type(attr, float32)) self.number_of_combinations = int(number_of_combinations) idx_tmp = indices(tuple(idx_shape)) categories_index = zeros((self.number_of_combinations, num_attributes)) for i in range(num_attributes): #create indices of all combinations categories_index[:, i] = idx_tmp[i].ravel() categories_index_mapping = {} for i in range(self.number_of_combinations): categories_index_mapping[tuple(categories_index[i, ].tolist())] = i def get_category(values): bins = map(lambda x, y: self.arrays_from_categories[x][int(y)], all_characteristics, values) try: return categories_index_mapping[tuple(bins)] except KeyError, msg: where_error = where(array(bins) == -1)[0] if where_error.size > 0: raise KeyError, \ "Invalid value of %s for attribute %s. It is not included in the characteristics groups." % ( array(values)[where_error], array(all_characteristics)[where_error]) raise KeyError, msg if num_attributes > 0: # the next array must be a copy of the household values, otherwise, it changes the original values values_array = reshape( array(self.household_set.get_attribute( all_characteristics[0])), (self.household_set.size(), 1)) if num_attributes > 1: for attr in all_characteristics[1:]: values_array = concatenate( (values_array, reshape(array(self.household_set.get_attribute(attr)), (self.household_set.size(), 1))), axis=1) for i in range(values_array.shape[1]): if values_array[:, i].max() > 10000: values_array[:, i] = values_array[:, i] / 10 values_array[:, i] = clip( values_array[:, i], 0, self.arrays_from_categories[all_characteristics[i]].size - 1) # determine for each household to what category it belongs to self.household_categories = array( map(lambda x: get_category(x), values_array)) # performance bottleneck number_of_households_in_categories = array( ndimage_sum(ones((self.household_categories.size, )), labels=self.household_categories + 1, index=arange(self.number_of_combinations) + 1)) else: # no marginal characteristics; consider just one group self.household_categories = zeros(self.household_set.size(), dtype='int32') number_of_households_in_categories = array( [self.household_set.size()]) g = arange(num_attributes) #iterate over marginal characteristics for group in groups: if groups.ndim <= 1: # there is only one group (no marginal char.) id = group else: id = tuple(group.tolist()) group_element = self.control_totals_for_this_year.get_data_element_by_id( id) total = group_element.total_number_of_households for i in range(g.size): g[i] = eval("group_element." + self.arrays_from_categories.keys()[i]) if g.size <= 0: l = ones((number_of_households_in_categories.size, )) else: l = categories_index[:, 0] == g[0] for i in range(1, num_attributes): l = logical_and(l, categories_index[:, i] == g[i]) # l has 1's for combinations of this group number_in_group = array( ndimage_sum(number_of_households_in_categories, labels=l, index=1)) diff = int(total - number_in_group) if diff < 0: # households to be removed is_in_group = l[self.household_categories] w = where(is_in_group)[0] sample_array, non_placed, size_non_placed = \ get_array_without_non_placed_agents(self.household_set, w, -1*diff, self.location_id_name) self.remove_households = concatenate( (self.remove_households, non_placed, sample_noreplace(sample_array, max(0, abs(diff) - size_non_placed)))) if diff > 0: # households to be created self._create_households(diff, l)
def run(self, in_storage, out_storage, business_table="business", jobs_table="jobs", control_totals_table=None): logger.log_status("Unrolling %s table." % business_table) # get attributes from the establisments table business_dataset = BusinessDataset(in_storage=in_storage, in_table_name=business_table) business_sizes = business_dataset.get_attribute( self.number_of_jobs_attr).astype("int32") sectors = business_dataset.get_attribute("sector_id") tazes = business_dataset.get_attribute( self.geography_id_attr).astype("int32") building_ids = array([], dtype='int32') if "building_id" in business_dataset.get_primary_attribute_names(): building_ids = business_dataset.get_attribute("building_id") parcel_ids = array([], dtype='int32') if "parcel_id" in business_dataset.get_primary_attribute_names(): parcel_ids = business_dataset.get_attribute("parcel_id") home_based = array([], dtype='int16') if "home_based" in business_dataset.get_primary_attribute_names(): home_based = business_dataset.get_attribute("home_based") building_sqft = business_dataset.get_attribute(self.sqft_attr) building_sqft[building_sqft <= 0] = 0 join_flags = None if "join_flag" in business_dataset.get_primary_attribute_names(): join_flags = business_dataset.get_attribute("join_flag") impute_sqft_flag = None if "impute_building_sqft_flag" in business_dataset.get_primary_attribute_names( ): impute_sqft_flag = business_dataset.get_attribute( "impute_building_sqft_flag") # inititalize jobs attributes total_size = business_sizes.sum() jobs_data = {} jobs_data["sector_id"] = resize(array([-1], dtype=sectors.dtype), total_size) jobs_data["building_id"] = resize( array([-1], dtype=building_ids.dtype), total_size) jobs_data["parcel_id"] = resize(array([-1], dtype=parcel_ids.dtype), total_size) jobs_data[self.geography_id_attr] = resize( array([-1], dtype=tazes.dtype), total_size) jobs_data["building_type"] = resize( array([-1], dtype=home_based.dtype), total_size) jobs_data["sqft"] = resize(array([], dtype=building_sqft.dtype), total_size) if join_flags is not None: jobs_data["join_flag"] = resize(array([], dtype=join_flags.dtype), total_size) if impute_sqft_flag is not None: jobs_data["impute_building_sqft_flag"] = resize( array([], dtype=impute_sqft_flag.dtype), total_size) indices = cumsum(business_sizes) # iterate over establishments. For each business create the corresponding number of jobs by filling the corresponding part # of the arrays start_index = 0 for i in range(business_dataset.size()): end_index = indices[i] jobs_data["sector_id"][start_index:end_index] = sectors[i] if building_ids.size > 0: jobs_data["building_id"][start_index:end_index] = building_ids[ i] if parcel_ids.size > 0: jobs_data["parcel_id"][start_index:end_index] = parcel_ids[i] jobs_data[self.geography_id_attr][start_index:end_index] = tazes[i] if home_based.size > 0: jobs_data["building_type"][start_index:end_index] = home_based[ i] if self.compute_sqft_per_job: jobs_data["sqft"][start_index:end_index] = round( (building_sqft[i] - building_sqft[i] / 10.0) / float(business_sizes[i])) # sqft per employee else: jobs_data["sqft"][start_index:end_index] = building_sqft[i] if join_flags is not None: jobs_data["join_flag"][start_index:end_index] = join_flags[i] if impute_sqft_flag is not None: jobs_data["impute_building_sqft_flag"][ start_index:end_index] = impute_sqft_flag[i] start_index = end_index jobs_data["job_id"] = arange(total_size) + 1 if self.compute_sqft_per_job: jobs_data["sqft"] = clip(jobs_data["sqft"], 0, self.maximum_sqft) jobs_data["sqft"][logical_and( jobs_data["sqft"] > 0, jobs_data["sqft"] < self.minimum_sqft)] = self.minimum_sqft # correct missing job_building_types wmissing_bt = where(jobs_data["building_type"] <= 0)[0] if wmissing_bt.size > 0: jobs_data["building_type"][ wmissing_bt] = 2 # assign non-homebased type for now. It can be re-classified in the assign_bldgs_to_jobs... script # create jobs table and write it out storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name="jobs", table_data=jobs_data) job_dataset = JobDataset(in_storage=storage) if self.unplace_jobs_with_non_existing_buildings: self.do_unplace_jobs_with_non_existing_buildings( job_dataset, out_storage) # Match to control totals (only eliminate jobs if control totals are smaller than the actual number of jobs). if control_totals_table is not None: logger.log_status("Matching to control totals.") control_totals = ControlTotalDataset( what='employment', id_name=['zone_id', 'sector_id'], in_table_name=control_totals_table, in_storage=in_storage) control_totals.load_dataset( attributes=['zone_id', 'sector_id', 'jobs']) zones_sectors = control_totals.get_id_attribute() njobs = control_totals.get_attribute('jobs') remove = array([], dtype='int32') for i in range(zones_sectors.shape[0]): zone, sector = zones_sectors[i, :] in_sector = job_dataset.get_attribute("sector_id") == sector in_zone_in_sector = logical_and( in_sector, job_dataset.get_attribute("zone_id") == zone) if in_zone_in_sector.sum() <= njobs[i]: continue to_be_removed = in_zone_in_sector.sum() - njobs[i] this_removal = 0 not_considered = ones(job_dataset.size(), dtype='bool8') for unit in [ 'parcel_id', 'building_id', None ]: # first consider jobs without parcel id, then without building_id, then all if unit is not None: wnunit = job_dataset.get_attribute(unit) <= 0 eligible = logical_and( not_considered, logical_and(in_zone_in_sector, wnunit)) not_considered[where(wnunit)] = False else: eligible = logical_and(not_considered, in_zone_in_sector) eligible_sum = eligible.sum() if eligible_sum > 0: where_eligible = where(eligible)[0] if eligible_sum <= to_be_removed - this_removal: draw = arange(eligible_sum) else: draw = sample_noreplace( where_eligible, to_be_removed - this_removal, eligible_sum) remove = concatenate((remove, where_eligible[draw])) this_removal += draw.size if this_removal >= to_be_removed: break job_dataset.remove_elements(remove) logger.log_status("%s jobs removed." % remove.size) logger.log_status("Write jobs table.") job_dataset.write_dataset(out_table_name=jobs_table, out_storage=out_storage) logger.log_status("Created %s jobs." % job_dataset.size())
def prepare_for_estimate(self, specification_dict = None, specification_storage=None, specification_table=None, agent_set=None, agents_for_estimation_storage=None, agents_for_estimation_table=None, join_datasets=False, index_to_unplace=None, portion_to_unplace=1.0, compute_lambda=False, grouping_location_set=None, movers_variable=None, movers_index=None, filter=None, location_id_variable=None, data_objects={}): """Put 'location_id_variable' always in, if the location id is to be computed on the estimation set, i.e. if it is not a primary attribute of the estimation set. Set 'index_to_unplace' to None, if 'compute_lambda' is True. In such a case, the annual supply is estimated without unplacing agents. 'grouping_location_set', 'movers_variable' and 'movers_index' must be given, if 'compute_lambda' is True. """ from opus_core.model import get_specification_for_estimation from urbansim.functions import compute_supply_and_add_to_location_set specification = get_specification_for_estimation(specification_dict, specification_storage, specification_table) if (agent_set is not None) and (index_to_unplace is not None): if self.location_id_string is not None: agent_set.compute_variables(self.location_id_string, resources=Resources(data_objects)) if portion_to_unplace < 1: unplace_size = int(portion_to_unplace*index_to_unplace.size) end_index_to_unplace = sample_noreplace(index_to_unplace, unplace_size) else: end_index_to_unplace = index_to_unplace logger.log_status("Unplace " + str(end_index_to_unplace.size) + " agents.") agent_set.modify_attribute(self.choice_set.get_id_name()[0], resize(array([-1]), end_index_to_unplace.size), end_index_to_unplace) if compute_lambda: movers = zeros(agent_set.size(), dtype="bool8") if movers_index is not None: movers[movers_index] = 1 agent_set.add_primary_attribute(movers, "potential_movers") self.estimate_config["weights_for_estimation_string"] = self.estimate_config["weights_for_estimation_string"]+"_from_lambda" compute_supply_and_add_to_location_set(self.choice_set, grouping_location_set, self.run_config["number_of_units_string"], self.run_config["capacity_string"], movers_variable, self.estimate_config["weights_for_estimation_string"], resources=Resources(data_objects)) # create agents for estimation if (agents_for_estimation_storage is not None) and (agents_for_estimation_table is not None): estimation_set = Dataset(in_storage = agents_for_estimation_storage, in_table_name=agents_for_estimation_table, id_name=agent_set.get_id_name(), dataset_name=agent_set.get_dataset_name()) if location_id_variable is not None: estimation_set.compute_variables(location_id_variable, resources=Resources(data_objects)) # needs to be a primary attribute because of the join method below estimation_set.add_primary_attribute(estimation_set.get_attribute(location_id_variable), VariableName(location_id_variable).get_alias()) if filter: values = estimation_set.compute_variables(filter, resources=Resources(data_objects)) index = where(values > 0)[0] estimation_set.subset_by_index(index, flush_attributes_if_not_loaded=False) if join_datasets: agent_set.join_by_rows(estimation_set, require_all_attributes=False, change_ids_if_not_unique=True) index = arange(agent_set.size()-estimation_set.size(),agent_set.size()) else: index = agent_set.get_id_index(estimation_set.get_id_attribute()) else: if agent_set is not None: if filter is not None: values = agent_set.compute_variables(filter, resources=Resources(data_objects)) index = where(values > 0)[0] else: index = arange(agent_set.size()) else: index = None return (specification, index)
def _do_run_for_this_year(self, job_set): building_type = job_set.get_attribute("building_type") sectors = unique( self.control_totals_for_this_year.get_attribute("sector_id")) self._compute_sector_variables(sectors, job_set) for sector in sectors: isector = where( self.control_totals_for_this_year.get_attribute("sector_id") == sector)[0] total_hb_jobs = self.control_totals_for_this_year.get_attribute( "total_home_based_employment")[isector] total_nhb_jobs = self.control_totals_for_this_year.get_attribute( "total_non_home_based_employment")[isector] is_in_sector_hb = job_set.get_attribute( "is_in_employment_sector_%s_home_based" % sector) is_in_sector_nhb = job_set.get_attribute( "is_in_employment_sector_%s_non_home_based" % sector) diff_hb = int(total_hb_jobs - is_in_sector_hb.astype(int8).sum()) diff_nhb = int(total_nhb_jobs - is_in_sector_nhb.astype(int8).sum()) if diff_hb < 0: # home based jobs to be removed w = where(is_in_sector_hb == 1)[0] sample_array, non_placed, size_non_placed = \ get_array_without_non_placed_agents(job_set, w, -1*diff_hb, self.location_id_name) self.remove_jobs = concatenate( (self.remove_jobs, non_placed, sample_noreplace(sample_array, max(0, abs(diff_hb) - size_non_placed)))) if diff_nhb < 0: # non home based jobs to be removed w = where(is_in_sector_nhb == 1)[0] sample_array, non_placed, size_non_placed = \ get_array_without_non_placed_agents(job_set, w, -1*diff_nhb, self.location_id_name) self.remove_jobs = concatenate( (self.remove_jobs, non_placed, sample_noreplace(sample_array, max(0, abs(diff_nhb) - size_non_placed)))) if diff_hb > 0: # home based jobs to be created self.new_jobs[self.location_id_name] = concatenate( (self.new_jobs[self.location_id_name], zeros( (diff_hb, ), dtype=self.new_jobs[self.location_id_name].dtype.type) )) self.new_jobs["sector_id"] = concatenate( (self.new_jobs["sector_id"], (resize( array([sector], dtype=self.new_jobs["sector_id"].dtype.type), diff_hb)))) if 1 in is_in_sector_hb: building_type_distribution = array( ndimage_sum(is_in_sector_hb, labels=building_type, index=self.available_building_types)) elif 1 in job_set.get_attribute( "is_home_based_job" ): # take the building type distribution from the whole region building_type_distribution = array( ndimage_sum(job_set.get_attribute("is_home_based_job"), labels=building_type, index=self.available_building_types)) else: # there are no home-based jobs in the region, take uniform distribution building_type_distribution = ones( self.available_building_types.size) building_type_distribution = building_type_distribution / building_type_distribution.sum( ) sampled_building_types = probsample_replace( self.available_building_types, diff_hb, building_type_distribution / float(building_type_distribution.sum())) self.new_jobs["building_type"] = concatenate( (self.new_jobs["building_type"], sampled_building_types.astype( self.new_jobs["building_type"].dtype.type))) new_max_id = self.max_id + diff_hb self.new_jobs[self.job_id_name] = concatenate( (self.new_jobs[self.job_id_name], arange(self.max_id + 1, new_max_id + 1))) self.max_id = new_max_id if diff_nhb > 0: # non home based jobs to be created self.new_jobs[self.location_id_name] = concatenate( (self.new_jobs[self.location_id_name], zeros( (diff_nhb, ), dtype=self.new_jobs[self.location_id_name].dtype.type) )) self.new_jobs["sector_id"] = concatenate( (self.new_jobs["sector_id"], (resize( array([sector], dtype=self.new_jobs["sector_id"].dtype.type), diff_nhb)))) if 1 in is_in_sector_nhb: building_type_distribution = array( ndimage_sum(is_in_sector_nhb, labels=building_type, index=self.available_building_types)) elif 1 in job_set.get_attribute( "is_non_home_based_job" ): # take the building type distribution from the whole region building_type_distribution = array( ndimage_sum( job_set.get_attribute("is_non_home_based_job"), labels=building_type, index=self.available_building_types)) else: # there are no non-home-based jobs in the region, take uniform distribution building_type_distribution = ones( self.available_building_types.size) building_type_distribution = building_type_distribution / building_type_distribution.sum( ) sampled_building_types = probsample_replace( self.available_building_types, diff_nhb, building_type_distribution / float(building_type_distribution.sum())) self.new_jobs["building_type"] = concatenate( (self.new_jobs["building_type"], sampled_building_types.astype( self.new_jobs["building_type"].dtype.type))) new_max_id = self.max_id + diff_nhb self.new_jobs[self.job_id_name] = concatenate( (self.new_jobs[self.job_id_name], arange(self.max_id + 1, new_max_id + 1))) self.max_id = new_max_id
def run(my, cache_dir=None, year=None): global parcel_set, z, node_set, submarket, esubmarket, isr, parcelfees, costdiscount ''' if 0: z = Zoning() p = Parcels() cPickle.dump((z,p),open('databaseinfo.jar','w')) else: print "Reading db info from jar..." z,p = cPickle.load(open(os.path.join(os.environ['OPUS_DATA'],'bay_area_parcel/databaseinfo.jar'))) ''' ## when developer_model is invoked alone from command line if cache_dir is not None and year is not None: #data_path = paths.get_opus_data_path_path() cache_dir = os.path.join(data_path, 'bay_area_parcel/runs/run_105.2012_05_03_09') #year = 2011 simulation_state = SimulationState() simulation_state.set_current_time(year) SimulationState().set_cache_directory(cache_dir) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration(new_instance=True, package_order=['bayarea', 'urbansim_parcel', 'urbansim', 'opus_core'], in_storage=attribute_cache ).get_dataset_pool() dataset_pool = SessionConfiguration().get_dataset_pool() current_year = SimulationState().get_current_time() cache_dir = SimulationState().get_cache_directory() parcel_set = dataset_pool.get_dataset('parcel') building_set = dataset_pool.get_dataset('building') household_set = dataset_pool.get_dataset('household') node_set = dataset_pool.get_dataset('node') unit_set = dataset_pool.get_dataset('residential_unit') submarket = dataset_pool.get_dataset('submarket') esubmarket = dataset_pool.get_dataset('employment_submarket') #print numpy.array(unit_set['rent'] > 0).size #for i in range(unit_set.size()): # print unit_set['unit_price'][i], unit_set['unit_sqft'][i] #transit_set = dataset_pool.get_dataset('transit_station') #print dataset_pool.datasets_in_pool() ''' from bayarea.node import transit_type_DDD_within_DDD_meters for i in range(7): print i v = transit_type_DDD_within_DDD_meters.transit_type_DDD_within_DDD_meters(i,500) d = v.compute(dataset_pool) print d.size found = d[numpy.nonzero(d)] print found.size sys.exit() ''' compute_devmdl_accvars(node_set) ###################### ### CAREFUL - THIS IS WHERE SCNERARIO SPECIFIC INFO GOES ###################### current_year = SimulationState().get_current_time() z = Zoning(my.scenario,current_year) isr = None if my.scenario.startswith('Transit'): isr = ISR() parcelfees = None if my.scenario.startswith('Preferred'): parcelfees = ParcelFees(dataset_pool.get_dataset('parcelfees_preferred')) #elif my.scenario.startswith('Transit'): # parcelfees = ParcelFees(dataset_pool.get_dataset('parcelfees_transit')) elif my.scenario.startswith('Equity'): parcelfees = ParcelFees(dataset_pool.get_dataset('parcelfees_equity')) elif my.scenario.startswith('Infill'): parcelfees = ParcelFees(dataset_pool.get_dataset('parcelfees_infill')) costdiscount = 0.0 if not my.scenario.startswith('No Project') and not my.scenario.startswith('Equity'): costdiscount = .01 ################################# ################################# from numpy import logical_not empty_parcels = parcel_set.compute_variables("(parcel.number_of_agents(building)==0)*(parcel.node_id>0)*(parcel.shape_area>80)") res_parcels = parcel_set.compute_variables("(parcel.number_of_agents(building)>0)*(parcel.node_id>0)*(parcel.shape_area>80)") bart_parcels = parcel_set.compute_variables("(parcel.disaggregate(bayarea.node.transit_type_1_within_800_meters))") caltrain_parcels = parcel_set.compute_variables("(parcel.disaggregate(bayarea.node.transit_type_2_within_800_meters))") #pda_parcels = parcel_set.compute_variables("(parcel.pda_id > -1)*(numpy.logical_not(parcel.county_id==38))") pda_parcels = parcel_set.compute_variables("(parcel.pda_id > -1)") SAMPLE_RATE = 0.01 from opus_core.sampling_toolbox import sample_noreplace from numpy import concatenate, where sampled_res_parcels_index = sample_noreplace(where(res_parcels)[0], int(SAMPLE_RATE * parcel_set.size())) test_parcels = concatenate((where(empty_parcels==1)[0], sampled_res_parcels_index,where(bart_parcels==1)[0],where(caltrain_parcels==1)[0],where(pda_parcels==1)[0])) test_parcels = sample_noreplace(test_parcels, int(.08 * 154877)) numpy.random.shuffle(test_parcels) """ sample = [] for i in range(parcel_set.size()): if empty_parcels[i] == 1: sample.append(i+1) elif res_parcels[i] == 1 and numpy.random.ranf() < SAMPLE_RATE: sample.append(i+1) test_parcels = array(sample) """ #empty_parcels = parcel_set.compute_variables("(parcel.node_id>0)*(parcel.shape_area>80)") #test_parcels = numpy.where(empty_parcels==1)[0] global building_sqft, building_price building_sqft = parcel_set.compute_variables('parcel.aggregate(building.building_sqft)') building_price_owner_residential=parcel_set.compute_variables('building_price_owner_res=parcel.aggregate((residential_unit.sale_price)*(residential_unit.sale_price>0),intermediates=[building])') building_price_rental_residential=parcel_set.compute_variables('building_price_rental_res=parcel.aggregate((residential_unit.rent*12*17.9)*(residential_unit.rent>0),intermediates=[building])') building_price_nonresidential = parcel_set.compute_variables('building_price_nonres = parcel.aggregate((building.non_residential_rent*7*building.non_residential_sqft))') sum_building_p = parcel_set.compute_variables('sum_building_price = parcel.building_price_owner_res + parcel.building_price_rental_res + building_price_nonres') ##sum_building_price = building_price_owner_residential + building_price_rental_residential + building_price_nonresidential vacant_parcel = parcel_set.compute_variables('parcel.sum_building_price == 0') price_per_sqft_land = (parcel_set.compute_variables('parcel.disaggregate(safe_array_divide(zone.aggregate(parcel.sum_building_price),zone.aggregate(building.building_sqft)))'))/4 parcel_land_area = parcel_set.compute_variables('parcel.shape_area') vacant_land_price = vacant_parcel*price_per_sqft_land*parcel_land_area building_price = sum_building_p + vacant_land_price ##sum_building_price = building_price_owner_residential + building_price_rental_residential + building_price_nonresidential #land_price = (sum_building_p==0) * (parcel_set.compute_variables('parcel.disaggregate(safe_array_divide(zone.aggregate(parcel.sum_building_price),zone.aggregate(building.building_sqft)))'))* parcel_set.compute_variables('parcel.land_area') #info used to match from proposal_component to submarket parcel_set.compute_variables(["bayarea.parcel.within_half_mile_transit", "bayarea.parcel.schooldistrict", "bayarea.parcel.jurisdiction_id", ]) #test_parcels = array([i+1 for i in range(parcel_set.size())]) #test_parcels = test_parcels[:10000] #test_parcels = test_parcels[:150] #test_parcels = numpy.where(parcel_set['parcel_id'] == 1608920)[0] #print test_parcels logger.log_status("%s parcels to test" % (test_parcels.size)) print "Num of parcels:", test_parcels.size import time HOTSHOT = 0 if MP: from multiprocessing import Pool, Queue pool = Pool(processes=4) import hotshot, hotshot.stats#, test.pystone if HOTSHOT: prof = hotshot.Profile('devmdl.prof') prof.start() outf = open(os.path.join(cache_dir,'buildings-%d.csv' % current_year),'w') outf.write('pid,county,dev_btype,stories,sqft,res_sqft,nonres_sqft,tenure,year_built,res_units,npv,actualfee,btype\n') debugf = open(os.path.join(cache_dir,'proforma-debug-%d.csv' % current_year),'w') bformdbg = 'county_id,far,height,max_dua,bform.sf_builtarea(),bform.sfunitsizes,bform.mf_builtarea(),bform.mfunitsizes,bform.num_units,bform.nonres_sqft,bform.buildable_area' otherdbg = 'isr,parcelfees,existing_sqft,existing_price,lotsize,unitsize,unitsize2,bform.sales_absorption,bform.rent_absorption,bform.leases_absorption,bform.sales_vacancy_rates,bform.vacancy_rates' debugf.write('pid,btype,npv,actualfee,pricesf,pricemf,rentsf,rentmf,rentof,rentret,rentind,%s,%s\n' % (bformdbg,otherdbg)) t1 = time.time() aggd = {} def chunks(l, n): for i in xrange(0, len(l), n): yield l[i:i+n] for test_chunk in chunks(test_parcels,1000): print "Executing CHUNK" sales_absorption = submarket.compute_variables('bayarea.submarket.sales_absorption') rent_absorption = submarket.compute_variables('bayarea.submarket.rent_absorption') vacancy_rates = submarket.compute_variables('bayarea.submarket.vacancy_rates') leases_absorption = esubmarket.compute_variables('bayarea.employment_submarket.leases_absorption') nr_vacancy_rates = esubmarket.compute_variables('bayarea.employment_submarket.vacancy_rates') if HOTSHOT: results = [] for p in test_chunk: r = process_parcel(p) if r <> None and r <> -1: results.append(list(r)) else: if MP: results = pool.map(process_parcel,test_chunk) else: results = [process_parcel(p) for p in test_chunk] results_bldg = [list(x[0]) for x in results if x <> None and x[0] <> -1] #each row of units represents number of units of [1, 2, 3, 4] bedrooms units = array([x[1][0] for x in results if x <> None and x[0] <> -1]) sqft_per_unit = array([x[1][1] for x in results if x <> None and x[0] <> -1]) for x in results: if x <> None: debugf.write(x[2]) results = results_bldg for result in results: #print result out_btype = devmdltypes[int(result[2])-1] outf.write(string.join([str(x) for x in result]+[str(out_btype)],sep=',')+'\n') ##TODO: id of buildings to be demolished buildings_to_demolish = [] idx_buildings_to_demolish = building_set.get_id_index(buildings_to_demolish) JAMM = JoinAttributeModificationModel() JAMM.run(household_set, building_set, index=idx_buildings_to_demolish, value=-1) building_set.remove_elements(idx_buildings_to_demolish) column_names = ["parcel_id","county","building_type_id","stories", "building_sqft","residential_sqft","non_residential_sqft", "tenure","year_built","residential_units"] buildings_data = copy.deepcopy(results) for i in range(len(buildings_data)): buildings_data[i][2] = devmdltypes[int(buildings_data[i][2])-1] buildings_data = array(buildings_data) new_buildings = {} available_bldg_id = building_set['building_id'].max() + 1 new_bldg_ids = arange(available_bldg_id, available_bldg_id+buildings_data.shape[0], dtype=building_set['building_id'].dtype) if buildings_data.size > 0: for icol, col_name in enumerate(column_names): if col_name in building_set.get_known_attribute_names(): ddtype = building_set[col_name].dtype new_buildings[col_name] = (buildings_data[:, icol]).astype(ddtype) else: #if the col_name is not in dataset, it will be discarded anyway pass new_buildings['building_id'] = new_bldg_ids # recode tenure: 1 - rent, 2 - own from 0 - own, 1 - rent new_buildings['tenure'][new_buildings['tenure']==0] = 2 ## pid is the index to parcel_set; convert them to actual parcel_id #new_buildings['parcel_id'] = parcel_set['parcel_id'][new_buildings['parcel_id']] building_set.add_elements(new_buildings, require_all_attributes=False, change_ids_if_not_unique=True) building_set.flush_dataset() assert new_bldg_ids.size == units.shape[0] == sqft_per_unit.shape[0] units_bldg_ids = repeat(new_bldg_ids, 4) bedrooms = array([1, 2, 3, 4] * units.size) units = round(units.ravel()) sqft_per_unit = sqft_per_unit.ravel() new_units = {'building_id': array([], dtype='i4'), 'bedrooms': array([], dtype='i4'), 'sqft_per_unit': array([], dtype='i4') } for i_unit, unit in enumerate(units): if unit <= 0: continue new_units['building_id'] = concatenate((new_units['building_id'], repeat(units_bldg_ids[i_unit], unit)) ) new_units['bedrooms'] = concatenate((new_units['bedrooms'], repeat(bedrooms[i_unit], unit)) ) new_units['sqft_per_unit'] = concatenate((new_units['sqft_per_unit'], repeat(sqft_per_unit[i_unit], unit)) ) ##force dtype conversion to the same dtype as unit_set for col_name in ['building_id', 'bedrooms', 'sqft_per_unit']: if col_name in unit_set.get_known_attribute_names(): new_units[col_name] = new_units[col_name].astype(unit_set[col_name].dtype) unit_set.add_elements(new_units, require_all_attributes=False, change_ids_if_not_unique=True) unit_set.flush_dataset() for result in results: units = result[-1] nonres_sqft = 1 #result[6]/1000.0 county = result[1] btype = result[2] key = (county,btype) aggd.setdefault(key,0) if btype < 7: aggd[key] += units else: aggd[key] += nonres_sqft aggd.setdefault(county,0) aggd[county] += units aggf = open('county_aggregations-%d.csv' % current_year,'w') county_names = {49:'son',41:'smt',1:'ala',43:'scl',28:'nap',38:'sfr',7:'cnc',48:'sol',21:'mar',0:'n/a'} btype_names = {1:'SF',2:'SFBUILD',3:'MF',4:'MXMF',5:'CONDO',6:'MXC',7:'OF',8:'MXO',9:'CHOOD',10:'CAUTO',11:'CBOX',12:'MANU',13:'WHE'} aggf.write('county,total,'+string.join(btype_names.values(),sep=',')+'\n') for county in [38,41,43,1,7,48,28,49,21]: aggf.write(county_names[county]+','+str(aggd.get(county,0))) for btype in btype_names.keys(): key = (county,btype) val = aggd.get(key,0) aggf.write(','+str(val)) aggf.write('\n') t2 = time.time() print "Finished in %f seconds" % (t2-t1) print "Ran optimization %d times" % devmdl_optimize.OBJCNT global NOZONINGCNT, NOBUILDTYPES print "Did not find zoning for parcel %d times" % NOZONINGCNT print "Did not find building types for parcel %d times" % NOBUILDTYPES print "DONE" my.post_run() #remove price_shifter & cost_shifter to avoid them being cached if HOTSHOT: prof.stop() prof.close() stats = hotshot.stats.load('devmdl.prof') stats.strip_dirs() stats.sort_stats('cumulative') stats.print_stats(20)
def run(self, year, business_set, control_totals, data_objects=None, resources=None): business_id_name = business_set.get_id_name()[0] control_totals.get_attribute("total_number_of_businesses") idx = where(control_totals.get_attribute("year")==year) sectors = unique(control_totals.get_attribute_by_index("building_use_id", idx)) max_id = business_set.get_id_attribute().max() business_size = business_set.size() new_businesses = {self.location_id_name:array([], dtype='int32'), "building_use_id":array([], dtype='int32'), business_id_name:array([], dtype='int32'), "sqft":array([], dtype=int32), "employees":array([], dtype=int32),} compute_resources = Resources(data_objects) # compute_resources.merge({job_building_types.get_dataset_name():job_building_types, "debug":self.debug}) business_set.compute_variables( map(lambda x: "%s.%s.is_sector_%s" % (self.variable_package, business_set.get_dataset_name(), x), sectors), resources = compute_resources) remove_businesses = array([], dtype='int32') for sector in sectors: total_businesses = control_totals.get_data_element_by_id((year,sector)).total_number_of_businesses is_in_sector = business_set.get_attribute("is_sector_%s" % sector) diff = int(total_businesses - is_in_sector.astype(int8).sum()) if diff < 0: # w = where(is_in_sector == 1)[0] sample_array, non_placed, size_non_placed = \ get_array_without_non_placed_agents(business_set, w, -1*diff, self.location_id_name) remove_businesses = concatenate((remove_businesses, non_placed, sample_noreplace(sample_array, max(0,abs(diff)-size_non_placed)))) if diff > 0: # new_businesses[self.location_id_name]=concatenate((new_businesses[self.location_id_name],zeros((diff,), dtype="int32"))) new_businesses["building_use_id"]=concatenate((new_businesses["building_use_id"], sector*ones((diff,), dtype="int32"))) available_business_index = where(is_in_sector)[0] sampled_business = probsample_replace(available_business_index, diff, None) new_businesses["sqft"] = concatenate((new_businesses["sqft"], business_set.get_attribute("sqft")[sampled_business])) new_businesses["employees"] = concatenate((new_businesses["employees"], business_set.get_attribute("employees")[sampled_business])) new_max_id = max_id+diff new_businesses[business_id_name]=concatenate((new_businesses[business_id_name], arange(max_id+1, new_max_id+1))) max_id = new_max_id business_set.remove_elements(remove_businesses) business_set.add_elements(new_businesses, require_all_attributes=False) difference = business_set.size()-business_size self.debug.print_debug("Difference in number of businesses: %s (original %s," " new %s, created %s, deleted %s)" % (difference, business_size, business_set.size(), new_businesses[business_id_name].size, remove_businesses.size), 3) self.debug.print_debug("Number of unplaced businesses: %s" % where(business_set.get_attribute(self.location_id_name) <=0)[0].size, 3) return difference
def run(self, location_set, agent_event_set, agent_set, current_year, dataset_pool=None): """ The agent_event_set is expected to have attributes: grid_id, scheduled_year, total_number, is_percentage, change_type, (optionally other agent characteristics) 'grid_id' is not a mandatory name, but it must match to the id name of the location_set. Thus, the model works on any geography level. 'is_percentage' (bool) determines if the 'total_number' is a percentage of existing agents (True) or an absolute number (False) - it is relevant only when deleting agents. 'change_type' can have values 'D' (delete), 'A' (add), 'R' (replace) and determines the type of change for the agents. If this column is missing, the model considers 'D' as default for all entries in the agent_event_set. If the change of type is 'D', the method finds agents from the agent_set (jobs, households) located in the given locations (e.g. grid_id), then samples the given amount for the current_year and unplaces them. If other characteristics columns are contained in the agent_event_set, their names must match column names in the agent_set (e.g. 'sector_id' for jobs). In such a case the deletion is done among agents that match the given characteristics. """ if not agent_event_set or (agent_event_set.size() == 0): logger.log_status("No %s agents for event processing." % agent_set.get_dataset_name()) return idx_of_events_this_year = agent_event_set.get_attribute( "scheduled_year") == current_year if idx_of_events_this_year.sum() == 0: logger.log_status("No %s agents for this year event processing." % agent_set.get_dataset_name()) return self.dataset_pool = self.create_dataset_pool(dataset_pool) location_id_name = location_set.get_id_name()[0] location_ids_in_event_set = agent_event_set.get_attribute_by_index( location_id_name, idx_of_events_this_year) other_characteristics = agent_event_set.get_known_attribute_names() for name in agent_event_set.get_id_name(): other_characteristics.remove(name) other_characteristics.remove("total_number") totals = agent_event_set.get_attribute_by_index( "total_number", idx_of_events_this_year) if "change_type" not in agent_event_set.get_known_attribute_names(): types_of_change = array(idx_of_events_this_year.sum() * ['D']) else: types_of_change = agent_event_set.get_attribute_by_index( "change_type", idx_of_events_this_year) other_characteristics.remove("change_type") if "is_percentage" not in agent_event_set.get_known_attribute_names(): is_percentage = zeros(idx_of_events_this_year.sum(), dtype='bool8') else: is_percentage = agent_event_set.get_attribute_by_index( "is_percentage", idx_of_events_this_year) other_characteristics.remove("is_percentage") # pre-load other characteristics for name in other_characteristics: agent_event_set.get_attribute(name) if location_id_name not in agent_set.get_known_attribute_names(): # compute agents locations agent_set.compute_one_variable_with_unknown_package( location_id_name, self.dataset_pool) # iterate over locations for ilocation_id in range(location_ids_in_event_set.size): agent_ids = agent_set.get_attribute(location_id_name) location_id = location_ids_in_event_set[ilocation_id] change_type = types_of_change[ilocation_id] agents_to_consider = agent_ids == location_id for characteristics in other_characteristics: characteristics_value = eval( "agent_event_set.get_data_element_by_id((location_id, current_year)).%s" % characteristics) agents_to_consider = logical_and( agents_to_consider, agent_set.get_attribute(characteristics) == characteristics_value) number_of_agents = totals[ilocation_id] agent_index = where(agents_to_consider)[0] if is_percentage[ ilocation_id]: # number_of_agents means percentage; convert to absolute number number_of_agents = agent_index.size * number_of_agents / 100.0 number_of_agents = int(number_of_agents) if change_type == 'D': if number_of_agents > 0: if agent_index.size <= number_of_agents: # unplace all agents unplace_index = agent_index else: # sample agents unplace_index = sample_noreplace( agent_index, number_of_agents) agent_set.modify_attribute( name=location_id_name, data=resize(array([-1], dtype=agent_ids.dtype), unplace_index.size), index=unplace_index) elif change_type == 'A': if number_of_agents > 0: data = { agent_set.get_id_name()[0]: arange(1, number_of_agents + 1, 1) + agent_set.get_id_attribute().max() } data[location_id_name] = array([location_id] * number_of_agents) for characteristics in other_characteristics: characteristics_value = eval( "agent_event_set.get_data_element_by_id((location_id, current_year)).%s" % characteristics) data[characteristics] = array([characteristics_value] * number_of_agents) agent_set.add_elements(data, require_all_attributes=False)
def prepare_for_estimate(self, specification_dict=None, specification_storage=None, specification_table=None, agent_set=None, agents_for_estimation_storage=None, agents_for_estimation_table=None, join_datasets=False, index_to_unplace=None, portion_to_unplace=1.0, compute_lambda=False, grouping_location_set=None, movers_variable=None, movers_index=None, filter=None, location_id_variable=None, data_objects={}): """Put 'location_id_variable' always in, if the location id is to be computed on the estimation set, i.e. if it is not a primary attribute of the estimation set. Set 'index_to_unplace' to None, if 'compute_lambda' is True. In such a case, the annual supply is estimated without unplacing agents. 'grouping_location_set', 'movers_variable' and 'movers_index' must be given, if 'compute_lambda' is True. """ from opus_core.model import get_specification_for_estimation from urbansim.functions import compute_supply_and_add_to_location_set specification = get_specification_for_estimation( specification_dict, specification_storage, specification_table) if (agent_set is not None) and (index_to_unplace is not None): if self.location_id_string is not None: agent_set.compute_variables(self.location_id_string, resources=Resources(data_objects)) if portion_to_unplace < 1: unplace_size = int(portion_to_unplace * index_to_unplace.size) end_index_to_unplace = sample_noreplace( index_to_unplace, unplace_size) else: end_index_to_unplace = index_to_unplace logger.log_status("Unplace " + str(end_index_to_unplace.size) + " agents.") agent_set.modify_attribute( self.choice_set.get_id_name()[0], resize(array([-1]), end_index_to_unplace.size), end_index_to_unplace) if compute_lambda: movers = zeros(agent_set.size(), dtype="bool8") if movers_index is not None: movers[movers_index] = 1 agent_set.add_primary_attribute(movers, "potential_movers") self.estimate_config[ "weights_for_estimation_string"] = self.estimate_config[ "weights_for_estimation_string"] + "_from_lambda" compute_supply_and_add_to_location_set( self.choice_set, grouping_location_set, self.run_config["number_of_units_string"], self.run_config["capacity_string"], movers_variable, self.estimate_config["weights_for_estimation_string"], resources=Resources(data_objects)) # create agents for estimation if (agents_for_estimation_storage is not None) and (agents_for_estimation_table is not None): estimation_set = Dataset(in_storage=agents_for_estimation_storage, in_table_name=agents_for_estimation_table, id_name=agent_set.get_id_name(), dataset_name=agent_set.get_dataset_name()) if location_id_variable is not None: estimation_set.compute_variables( location_id_variable, resources=Resources(data_objects)) # needs to be a primary attribute because of the join method below estimation_set.add_primary_attribute( estimation_set.get_attribute(location_id_variable), VariableName(location_id_variable).get_alias()) if filter: values = estimation_set.compute_variables( filter, resources=Resources(data_objects)) index = where(values > 0)[0] estimation_set.subset_by_index( index, flush_attributes_if_not_loaded=False) if join_datasets: agent_set.join_by_rows(estimation_set, require_all_attributes=False, change_ids_if_not_unique=True) index = arange(agent_set.size() - estimation_set.size(), agent_set.size()) else: index = agent_set.get_id_index( estimation_set.get_id_attribute()) else: if agent_set is not None: if filter is not None: values = agent_set.compute_variables( filter, resources=Resources(data_objects)) index = where(values > 0)[0] else: index = arange(agent_set.size()) else: index = None return (specification, index)
def _add(self, agents_pool, amount, agent_dataset, location_dataset, this_refinement, dataset_pool ): fit_index = self.get_fit_agents_index(agent_dataset, this_refinement.agent_expression, this_refinement.location_expression, dataset_pool) movers_index = array([],dtype="int32") amount_from_agents_pool = min( amount, len(agents_pool) ) if amount_from_agents_pool > 0: agents_index_from_agents_pool = sample_noreplace( agents_pool, amount_from_agents_pool ) [ agents_pool.remove(i) for i in agents_index_from_agents_pool ] if fit_index.size == 0: ##cannot find agents to copy their location or clone them, place agents in agents_pool if amount > amount_from_agents_pool: logger.log_warning("Refinement requests to add %i agents, but there are only %i agents subtracted from previous action(s) and no agents satisfying %s to clone from;" \ "add %i agents instead" % (amount, amount_from_agents_pool, ' and '.join( [this_refinement.agent_expression, this_refinement.location_expression]).strip(' and '), amount_from_agents_pool,) ) amount = amount_from_agents_pool # sample from all suitable locations is_suitable_location = location_dataset.compute_variables( this_refinement.location_expression, dataset_pool=dataset_pool ) location_id_for_agents_pool = sample_replace( location_dataset.get_id_attribute()[is_suitable_location], amount_from_agents_pool ) else: #sample from locations of suitable agents agents_index_for_location = sample_replace( fit_index, amount_from_agents_pool) location_id_for_agents_pool = agent_dataset.get_attribute( location_dataset.get_id_name()[0] )[agents_index_for_location] movers_index = concatenate( (movers_index, agents_index_for_location) ) elif fit_index.size == 0: ## no agents in agents_pool and no agents to clone either, --> fail logger.log_error( "Action 'add' failed: there is no agent subtracted from previous action, and no suitable agents satisfying %s to clone from." % \ ' and '.join( [this_refinement.agent_expression, this_refinement.location_expression] ).strip('and') ) return if amount > amount_from_agents_pool: agents_index_to_clone = sample_replace( fit_index, amount - amount_from_agents_pool) movers_index = concatenate( (movers_index, agents_index_to_clone) ) if movers_index.size > 0 and this_refinement.location_capacity_attribute is not None and len(this_refinement.location_capacity_attribute) > 0: movers_location_id = agent_dataset.get_attribute( location_dataset.get_id_name()[0] )[movers_index] movers_location_index = location_dataset.get_id_index( movers_location_id ) # see previous comment about histogram function num_of_movers_by_location = histogram( movers_location_index, bins=arange(location_dataset.size() +1) )[0] num_of_agents_by_location = location_dataset.compute_variables( "number_of_agents=%s.number_of_agents(%s)" % \ ( location_dataset.dataset_name, agent_dataset.dataset_name ), dataset_pool=dataset_pool) expand_factor = safe_array_divide( (num_of_agents_by_location + num_of_movers_by_location ).astype('float32'), num_of_agents_by_location, return_value_if_denominator_is_zero = 1.0 ) new_values = round_( expand_factor * location_dataset.get_attribute(this_refinement.location_capacity_attribute) ) location_dataset.modify_attribute( this_refinement.location_capacity_attribute, new_values ) self._add_refinement_info_to_dataset(location_dataset, self.id_names, this_refinement, index=movers_location_index) if amount_from_agents_pool > 0: agent_dataset.modify_attribute( location_dataset.get_id_name()[0], location_id_for_agents_pool, agents_index_from_agents_pool ) self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=agents_index_from_agents_pool) if amount > amount_from_agents_pool: new_agents_index = agent_dataset.duplicate_rows(agents_index_to_clone) self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=agents_index_to_clone) self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=new_agents_index)
def _add(self, agents_pool, amount, agent_dataset, location_dataset, this_refinement, dataset_pool): fit_index = self.get_fit_agents_index( agent_dataset, this_refinement.agent_expression, this_refinement.location_expression, dataset_pool) movers_index = array([], dtype="int32") amount_from_agents_pool = min(amount, len(agents_pool)) if amount_from_agents_pool > 0: agents_index_from_agents_pool = sample_noreplace( agents_pool, amount_from_agents_pool) [agents_pool.remove(i) for i in agents_index_from_agents_pool] if fit_index.size == 0: ##cannot find agents to copy their location or clone them, place agents in agents_pool logger.log_warning("Refinement requests to add %i agents, but there are only %i agents subtracted from previous action(s) and no agents satisfying %s to clone from;" \ "add %i agents instead" % (amount, amount_from_agents_pool, ' and '.join( [this_refinement.agent_expression, this_refinement.location_expression]).strip(' and '), amount_from_agents_pool,) ) amount = amount_from_agents_pool is_suitable_location = location_dataset.compute_variables( this_refinement.location_expression, dataset_pool=dataset_pool) location_id_for_agents_pool = sample_replace( location_dataset.get_id_attribute()[is_suitable_location], amount_from_agents_pool) else: agents_index_for_location = sample_replace( fit_index, amount_from_agents_pool) location_id_for_agents_pool = agent_dataset.get_attribute( location_dataset.get_id_name() [0])[agents_index_for_location] movers_index = concatenate( (movers_index, agents_index_for_location)) elif fit_index.size == 0: ## no agents in agents_pool and no agents to clone either, --> fail logger.log_error( "Action 'add' failed: there is no agent subtracted from previous action, and no suitable agents satisfying %s to clone from." % \ ' and '.join( [this_refinement.agent_expression, this_refinement.location_expression] ).strip('and') ) return if amount > amount_from_agents_pool: agents_index_to_clone = sample_replace( fit_index, amount - amount_from_agents_pool) movers_index = concatenate((movers_index, agents_index_to_clone)) if movers_index.size > 0 and this_refinement.location_capacity_attribute is not None and len( this_refinement.location_capacity_attribute) > 0: movers_location_id = agent_dataset.get_attribute( location_dataset.get_id_name()[0])[movers_index] movers_location_index = location_dataset.get_id_index( movers_location_id) # see previous comment about histogram function num_of_movers_by_location = histogram( movers_location_index, bins=arange(location_dataset.size() + 1))[0] num_of_agents_by_location = location_dataset.compute_variables( "number_of_agents=%s.number_of_agents(%s)" % \ ( location_dataset.dataset_name, agent_dataset.dataset_name ), dataset_pool=dataset_pool) expand_factor = safe_array_divide( (num_of_agents_by_location + num_of_movers_by_location).astype('float32'), num_of_agents_by_location, return_value_if_denominator_is_zero=1.0) new_values = round_(expand_factor * location_dataset.get_attribute( this_refinement.location_capacity_attribute)) location_dataset.modify_attribute( this_refinement.location_capacity_attribute, new_values) self._add_refinement_info_to_dataset(location_dataset, self.id_names, this_refinement, index=movers_location_index) if amount_from_agents_pool > 0: agent_dataset.modify_attribute(location_dataset.get_id_name()[0], location_id_for_agents_pool, agents_index_from_agents_pool) self._add_refinement_info_to_dataset( agent_dataset, self.id_names, this_refinement, index=agents_index_from_agents_pool) if amount > amount_from_agents_pool: new_agents_index = agent_dataset.duplicate_rows( agents_index_to_clone) self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=agents_index_to_clone) self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=new_agents_index)
def _subtract(self, agents_pool, amount, agent_dataset, location_dataset, this_refinement, dataset_pool): fit_index = self.get_fit_agents_index( agent_dataset, this_refinement.agent_expression, this_refinement.location_expression, dataset_pool) if amount > fit_index.size: logger.log_warning("Refinement requests to subtract %i agents, but there are %i agents in total satisfying %s;" \ "subtract %i agents instead" % (amount, fit_index.size, ' and '.join( [this_refinement.agent_expression, this_refinement.location_expression] ).strip(' and '), fit_index.size) ) amount = fit_index.size if amount == fit_index.size: movers_index = fit_index else: movers_index = sample_noreplace(fit_index, amount) agents_pool += movers_index.tolist() ## modify location capacity attribute if specified if this_refinement.location_capacity_attribute is not None and len( this_refinement.location_capacity_attribute) > 0: location_dataset = dataset_pool.get_dataset( VariableName( this_refinement.location_expression).get_dataset_name()) movers_location_id = agent_dataset.get_attribute( location_dataset.get_id_name()[0])[movers_index] movers_location_index = location_dataset.get_id_index( movers_location_id) # backward compatability code for older versions of numpy -- no longer required since we need numpy 1.2.1 or greater # new=False argument to histogram tells it to use deprecated behavior for now (to be removed in numpy 1.3) # See numpy release notes -- search for histogram # if numpy.__version__ >= '1.2.0': # num_of_movers_by_location = histogram( movers_location_index, bins=arange(location_dataset.size()), new=False)[0] # else: # num_of_movers_by_location = histogram( movers_location_index, bins=arange(location_dataset.size()))[0] num_of_movers_by_location = histogram( movers_location_index, bins=arange(location_dataset.size() + 1))[0] num_of_agents_by_location = location_dataset.compute_variables( "number_of_agents=%s.number_of_agents(%s)" % \ (location_dataset.dataset_name, agent_dataset.dataset_name), dataset_pool=dataset_pool) shrink_factor = safe_array_divide( (num_of_agents_by_location - num_of_movers_by_location).astype('float32'), num_of_agents_by_location, return_value_if_denominator_is_zero=1.0) new_values = round_(shrink_factor * location_dataset.get_attribute( this_refinement.location_capacity_attribute)) location_dataset.modify_attribute( this_refinement.location_capacity_attribute, new_values) self._add_refinement_info_to_dataset( location_dataset, ("refinement_id", "transaction_id"), this_refinement, index=movers_location_index) agent_dataset.modify_attribute(location_dataset.get_id_name()[0], -1 * ones(movers_index.size, dtype='int32'), index=movers_index) self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=movers_index)
def run(self, location_set, deletion_event_set, current_year, dataset_pool=None): """ The deletion_event_set is expected to have attributes: grid_id, scheduled_year, number_of_jobs, number_of_households The method finds jobs/househods located in the given locations (grid_id), then samples the given amount for this year and unplaces them. If the value for number_of_jobs/number_of_households is -2, the model removes all jobs/households from the location. """ if not deletion_event_set or (deletion_event_set.size() == 0): logger.log_status("No jobs/households to be deleted.") return idx_of_events_this_year = deletion_event_set.get_attribute( "scheduled_year") == current_year if idx_of_events_this_year.sum() == 0: logger.log_status("No jobs/households to be deleted.") return self.dataset_pool = self.create_dataset_pool(dataset_pool) location_id_name = location_set.get_id_name()[0] location_ids_in_event_set = deletion_event_set.get_attribute_by_index( location_id_name, idx_of_events_this_year) agents_dict = { "number_of_jobs": "job", "number_of_households": "household" } # load attributes and datasets needed delete_agents = {"job": False, "household": False} agent_sets = {} for attribute_name, dataset_name in agents_dict.iteritems(): if attribute_name in deletion_event_set.get_known_attribute_names( ): values = deletion_event_set.get_attribute_by_index( attribute_name, idx_of_events_this_year) if values.sum() > 0: agent_sets[dataset_name] = self.dataset_pool.get_dataset( dataset_name) if location_id_name not in agent_sets[ dataset_name].get_known_attribute_names(): # compute agents locations agent_sets[ dataset_name].compute_one_variable_with_unknown_package( location_id_name, self.dataset_pool) delete_agents[dataset_name] = True for attribute_name, dataset_name in agents_dict.iteritems(): if not delete_agents[dataset_name]: continue # iterate over locations for location_id in location_ids_in_event_set: number_of_agents = eval( "deletion_event_set.get_data_element_by_id((location_id, current_year)).%s" % attribute_name) agent_ids = agent_sets[dataset_name].get_attribute( location_id_name) agent_index = where(agent_ids == location_id)[0] if (number_of_agents == -2) or (agent_index.size <= number_of_agents): # unplace all agents unplace_index = agent_index else: # sample agents unplace_index = sample_noreplace(agent_index, number_of_agents) agent_sets[dataset_name].modify_attribute( name=location_id_name, data=resize(array([-1], dtype=agent_ids.dtype), unplace_index.size), index=unplace_index)
def run(self, location_set, agent_event_set, agent_set, current_year, dataset_pool=None): """ The agent_event_set is expected to have attributes: grid_id, scheduled_year, total_number, is_percentage, change_type, (optionally other agent characteristics) 'grid_id' is not a mandatory name, but it must match to the id name of the location_set. Thus, the model works on any geography level. 'is_percentage' (bool) determines if the 'total_number' is a percentage of existing agents (True) or an absolute number (False) - it is relevant only when deleting agents. 'change_type' can have values 'D' (delete), 'A' (add), 'R' (replace) and determines the type of change for the agents. If this column is missing, the model considers 'D' as default for all entries in the agent_event_set. If the change of type is 'D', the method finds agents from the agent_set (jobs, households) located in the given locations (e.g. grid_id), then samples the given amount for the current_year and unplaces them. If other characteristics columns are contained in the agent_event_set, their names must match column names in the agent_set (e.g. 'sector_id' for jobs). In such a case the deletion is done among agents that match the given characteristics. """ if not agent_event_set or (agent_event_set.size() == 0): logger.log_status("No %s agents for event processing." % agent_set.get_dataset_name()) return idx_of_events_this_year = agent_event_set.get_attribute("scheduled_year") == current_year if idx_of_events_this_year.sum() == 0: logger.log_status("No %s agents for this year event processing." % agent_set.get_dataset_name()) return self.dataset_pool = self.create_dataset_pool(dataset_pool) location_id_name = location_set.get_id_name()[0] location_ids_in_event_set = agent_event_set.get_attribute_by_index(location_id_name, idx_of_events_this_year) other_characteristics = agent_event_set.get_known_attribute_names() for name in agent_event_set.get_id_name(): other_characteristics.remove(name) other_characteristics.remove("total_number") totals = agent_event_set.get_attribute_by_index("total_number", idx_of_events_this_year) if "change_type" not in agent_event_set.get_known_attribute_names(): types_of_change = array(idx_of_events_this_year.sum()*['D']) else: types_of_change = agent_event_set.get_attribute_by_index("change_type", idx_of_events_this_year) other_characteristics.remove("change_type") if "is_percentage" not in agent_event_set.get_known_attribute_names(): is_percentage = zeros(idx_of_events_this_year.sum(), dtype='bool8') else: is_percentage = agent_event_set.get_attribute_by_index("is_percentage", idx_of_events_this_year) other_characteristics.remove("is_percentage") # pre-load other characteristics for name in other_characteristics: agent_event_set.get_attribute(name) if location_id_name not in agent_set.get_known_attribute_names(): # compute agents locations agent_set.compute_one_variable_with_unknown_package(location_id_name, self.dataset_pool) # iterate over locations for ilocation_id in range(location_ids_in_event_set.size): agent_ids = agent_set.get_attribute(location_id_name) location_id = location_ids_in_event_set[ilocation_id] change_type = types_of_change[ilocation_id] agents_to_consider = agent_ids == location_id for characteristics in other_characteristics: characteristics_value = eval("agent_event_set.get_data_element_by_id((location_id, current_year)).%s" % characteristics) agents_to_consider = logical_and(agents_to_consider, agent_set.get_attribute(characteristics) == characteristics_value) number_of_agents = totals[ilocation_id] agent_index = where(agents_to_consider)[0] if is_percentage[ilocation_id]: # number_of_agents means percentage; convert to absolute number number_of_agents = agent_index.size*number_of_agents/100.0 number_of_agents = int(number_of_agents) if change_type == 'D': if number_of_agents > 0: if agent_index.size <= number_of_agents: # unplace all agents unplace_index = agent_index else: # sample agents unplace_index = sample_noreplace(agent_index, number_of_agents) agent_set.modify_attribute(name=location_id_name, data=resize(array([-1], dtype=agent_ids.dtype), unplace_index.size), index = unplace_index) elif change_type == 'A': if number_of_agents > 0: data = {agent_set.get_id_name()[0]: arange(1, number_of_agents+1, 1) + agent_set.get_id_attribute().max()} data[location_id_name] = array([location_id] * number_of_agents) for characteristics in other_characteristics: characteristics_value = eval("agent_event_set.get_data_element_by_id((location_id, current_year)).%s" % characteristics) data[characteristics] = array([characteristics_value] * number_of_agents) agent_set.add_elements(data, require_all_attributes=False)
def estimate(self, specification, dataset, outcome_attribute, index=None, procedure=None, data_objects=None, estimate_config=None, debuglevel=0): """'specification' is of type EquationSpecification, 'dataset' is of type Dataset, 'outcome_attribute' - string that determines the dependent variable, 'index' are indices of individuals in dataset for which the model runs. If it is None, the whole dataset is considered. 'procedure' - name of the estimation procedure. If it is None, there should be an entry "estimation" in 'estimate_config' that determines the procedure. The class must have a method 'run' that takes as arguments 'data', 'regression_procedure' and 'resources'. It returns a dictionary with entries 'estimators', 'standard_errors' and 't_values' (all 1D numpy arrays). 'data_objects' is a dictionary where each key is the name of an data object ('zone', ...) and its value is an object of class Dataset. 'estimate_config' is of type Resources, it gives additional arguments for the estimation procedure. 'debuglevel' overwrites the class 'debuglevel'. """ #import wingdbstub self.debug.flag = debuglevel if estimate_config == None: estimate_config = Resources() if not isinstance(estimate_config, Resources) and isinstance( estimate_config, dict): estimate_config = Resources(estimate_config) self.estimate_config = estimate_config.merge_with_defaults( self.estimate_config) if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) self.procedure = procedure if self.procedure == None: self.procedure = self.estimate_config.get("estimation", None) if self.procedure is not None: self.procedure = ModelComponentCreator().get_model_component( self.procedure) else: logger.log_warning( "No estimation procedure given, or problems with loading the corresponding module." ) compute_resources = Resources({"debug": self.debug}) if dataset.size() <= 0: # no data loaded yet dataset.get_id_attribute() if index == None: index = arange(dataset.size()) if not isinstance(index, ndarray): index = array(index) estimation_size_agents = self.estimate_config.get( "estimation_size_agents", None) # should be a proportion of the agent_set if estimation_size_agents == None: estimation_size_agents = 1.0 else: estimation_size_agents = max(min(estimation_size_agents, 1.0), 0.0) # between 0 and 1 if estimation_size_agents < 1.0: self.debug.print_debug("Sampling agents for estimation ...", 3) estimation_idx = sample_noreplace( arange(index.size), int(index.size * estimation_size_agents)) else: estimation_idx = arange(index.size) estimation_idx = index[estimation_idx] self.debug.print_debug( "Number of observations for estimation: " + str(estimation_idx.size), 2) if estimation_idx.size <= 0: self.debug.print_debug("Nothing to be done.", 2) return (None, None) coefficients = create_coefficient_from_specification(specification) specified_coefficients = SpecifiedCoefficients().create(coefficients, specification, neqs=1) submodels = specified_coefficients.get_submodels() self.get_status_for_gui().update_pieces_using_submodels( submodels=submodels, leave_pieces=2) self.map_agents_to_submodels( submodels, self.submodel_string, dataset, estimation_idx, dataset_pool=self.dataset_pool, resources=compute_resources, submodel_size_max=self.estimate_config.get('submodel_size_max', None)) variables = specified_coefficients.get_full_variable_names_without_constants( ) self.debug.print_debug("Compute variables ...", 4) self.increment_current_status_piece() dataset.compute_variables(variables, dataset_pool=self.dataset_pool, resources=compute_resources) coef = {} estimated_coef = {} self.outcome = {} dataset.compute_variables([outcome_attribute], dataset_pool=self.dataset_pool, resources=compute_resources) regression_resources = Resources(estimate_config) regression_resources.merge({"debug": self.debug}) outcome_variable_name = VariableName(outcome_attribute) for submodel in submodels: coef[submodel] = SpecifiedCoefficientsFor1Submodel( specified_coefficients, submodel) self.increment_current_status_piece() logger.log_status("Estimate regression for submodel " + str(submodel), tags=["estimate"], verbosity_level=2) logger.log_status("Number of observations: " + str(self.observations_mapping[submodel].size), tags=["estimate"], verbosity_level=2) self.data[ submodel] = dataset.create_regression_data_for_estimation( coef[submodel], index=estimation_idx[self.observations_mapping[submodel]]) self.coefficient_names[submodel] = coef[ submodel].get_coefficient_names_without_constant()[0, :] if (self.data[submodel].shape[0] > 0 ) and (self.data[submodel].size > 0) and ( self.procedure is not None): # observations for this submodel available self.outcome[submodel] = dataset.get_attribute_by_index( outcome_variable_name.get_alias(), estimation_idx[self.observations_mapping[submodel]]) regression_resources.merge({"outcome": self.outcome[submodel]}) regression_resources.merge({ "coefficient_names": self.coefficient_names[submodel].tolist(), "constant_position": coef[submodel].get_constants_positions() }) estimated_coef[submodel] = self.procedure.run( self.data[submodel], self.regression, resources=regression_resources) if "estimators" in estimated_coef[submodel].keys(): coef[submodel].set_coefficient_values( estimated_coef[submodel]["estimators"]) if "standard_errors" in estimated_coef[submodel].keys(): coef[submodel].set_standard_errors( estimated_coef[submodel]["standard_errors"]) if "other_measures" in estimated_coef[submodel].keys(): for measure in estimated_coef[submodel][ "other_measures"].keys(): coef[submodel].set_measure( measure, estimated_coef[submodel]["other_measures"] [measure]) if "other_info" in estimated_coef[submodel].keys(): for info in estimated_coef[submodel]["other_info"]: coef[submodel].set_other_info( info, estimated_coef[submodel]["other_info"][info]) coefficients.fill_coefficients(coef) self.save_predicted_values_and_errors(specification, coefficients, dataset, outcome_variable_name, index=index, data_objects=data_objects) return (coefficients, estimated_coef)