def run(self, year, job_set, control_totals, job_building_types, data_objects=None, resources=None): self._do_initialize_for_run(job_set, job_building_types, data_objects) large_area_ids = control_totals.get_attribute("large_area_id") jobs_large_area_ids = job_set.compute_variables("washtenaw.job.large_area_id") unique_large_areas = unique(large_area_ids) is_year = control_totals.get_attribute("year")==year all_jobs_index = arange(job_set.size()) sectors = unique(control_totals.get_attribute("sector_id")[is_year]) self._compute_sector_variables(sectors, job_set) for area in unique_large_areas: idx = where(logical_and(is_year, large_area_ids == area))[0] self.control_totals_for_this_year = DatasetSubset(control_totals, idx) jobs_index = where(jobs_large_area_ids == area)[0] jobs_for_this_area = DatasetSubset(job_set, jobs_index) logger.log_status("ETM for area %s (currently %s jobs)" % (area, jobs_for_this_area.size())) last_remove_idx = self.remove_jobs.size self._do_run_for_this_year(jobs_for_this_area) add_jobs_size = self.new_jobs[self.location_id_name].size-self.new_jobs["large_area_id"].size remove_jobs_size = self.remove_jobs.size-last_remove_idx logger.log_status("add %s, remove %s, total %s" % (add_jobs_size, remove_jobs_size, jobs_for_this_area.size()+add_jobs_size-remove_jobs_size)) self.new_jobs["large_area_id"] = concatenate((self.new_jobs["large_area_id"], array(add_jobs_size*[area], dtype="int32"))) # transform indices of removing jobs into indices of the whole dataset self.remove_jobs[last_remove_idx:self.remove_jobs.size] = all_jobs_index[jobs_index[self.remove_jobs[last_remove_idx:self.remove_jobs.size]]] self._update_job_set(job_set) idx_new_jobs = arange(job_set.size()-self.new_jobs["large_area_id"].size, job_set.size()) jobs_large_area_ids = job_set.compute_variables("washtenaw.job.large_area_id") jobs_large_area_ids[idx_new_jobs] = self.new_jobs["large_area_id"] job_set.delete_one_attribute("large_area_id") job_set.add_attribute(jobs_large_area_ids, "large_area_id", metadata=AttributeType.PRIMARY) # return an index of new jobs return arange(job_set.size()-self.new_jobs["large_area_id"].size, job_set.size())
def choose_agents_to_move_from_overfilled_locations(self, capacity, agent_set, agents_index, agents_locations): """Agents with the smallest number of units should move again. """ if capacity is None: return array([], dtype='int32') index_valid_agents_locations = where(agents_locations > 0)[0] valid_agents_locations = agents_locations[index_valid_agents_locations].astype("int32") unique_locations = unique(valid_agents_locations).astype("int32") index_consider_capacity = self.choice_set.get_id_index(unique_locations) capacity_of_affected_locations = capacity[index_consider_capacity] overfilled = where(capacity_of_affected_locations < 0)[0] movers = array([], dtype='int32') indexed_individuals = DatasetSubset(agent_set, agents_index[index_valid_agents_locations]) ordered_agent_indices = self.get_agents_order(indexed_individuals) sizes = indexed_individuals.get_attribute(self.units_full_name)[ordered_agent_indices] choice_ids = self.choice_set.get_id_attribute() for loc in overfilled: agents_to_move = where(valid_agents_locations == choice_ids[index_consider_capacity[loc]])[0] if agents_to_move.size > 0: n = int(-1*capacity_of_affected_locations[loc]) this_sizes = sizes[agents_to_move] csum = this_sizes[arange(this_sizes.size-1,-1,-1)].cumsum() # ordered increasingly csum = csum[arange(csum.size-1, -1,-1)] # ordered back decreasingly w = where(csum < n)[0] if w.size < agents_to_move.size: #add one more agent in order the cumsum be larger than n w = concatenate((array([agents_to_move.size-w.size-1]), w)) idx = ordered_agent_indices[agents_to_move[w]] movers = concatenate((movers, idx)) return movers
def estimate_mu(self): iout = -1 self.values_from_mr = {} for quantity in self.observed_data.get_quantity_objects(): dataset_name = quantity.get_dataset_name() variable = quantity.get_variable_name() iout += 1 dimension_reduced = False quantity_ids = quantity.get_dataset().get_id_attribute() for i in range(self.number_of_runs): ds = self._compute_variable_for_one_run(i, variable, dataset_name, self.get_calibration_year(), quantity) if isinstance(ds, InteractionDataset): ds = ds.get_flatten_dataset() if i == 0: # first run self.mu[iout] = zeros((self.y[iout].size, self.number_of_runs), dtype=float32) ids = ds.get_id_attribute() else: if ds.size() > ids.shape[0]: ds = DatasetSubset(ds, ds.get_id_index(ids)) dimension_reduced = True scale = self.get_scales(ds, i+1, variable) matching_index = ds.get_id_index(quantity_ids) values = scale[matching_index] * ds.get_attribute(variable)[matching_index] self.mu[iout][:,i] = try_transformation(values, quantity.get_transformation()) self.values_from_mr[variable.get_expression()] = self.mu[iout] if dimension_reduced: self.y[iout] = self.y[iout][quantity.get_dataset().get_id_index(ids)]
def run(self, location_set, development_event_set, *args, **kwargs): changed_indices, processed_development_event_indices = \ EventsCoordinator.run(self, location_set, development_event_set, *args, **kwargs) if development_event_set is not None: subset = DatasetSubset(development_event_set, processed_development_event_indices) subset.write_dataset(out_storage=AttributeCache()) return (changed_indices, processed_development_event_indices)
def run(self, chunk_specification, dataset, dataset_index=None, result_array_type=float32, **kwargs): """ 'chunk_specification' - determines number of chunks to use when computing over the dataset set. 'dataset' - an object of class Dataset that is to be chunked. 'dataset_index' - index of individuals in dataset to be chunked. 'result_array_type' - type of the resulting array. Can be any numerical type of numpy array. **kwargs - keyword arguments. The method chunks dataset_index in the desired number of chunks (minimum is 1) and for each chunk it calls the method 'run_chunk'. The order of the individuals entering the chunking is determined by the method 'get_agents_order'. """ if dataset_index == None: dataset_index = arange(dataset.size()) if not isinstance(dataset_index, ndarray): dataset_index = array(dataset_index) logger.log_status("Total number of individuals: %s" % dataset_index.size) result_array = zeros(dataset_index.size, dtype=result_array_type) if dataset_index.size <= 0: logger.log_status("Nothing to be done.") return result_array all_indexed_individuals = DatasetSubset(dataset, dataset_index) ordered_agent_indices = self.get_agents_order( all_indexed_individuals) # set order of individuals in chunks # TODO: Remove next six lines after we inherit chunk specification as a text string. if (chunk_specification is None): chunk_specification = {'nchunks': 1} chunker = ChunkSpecification(chunk_specification) self.number_of_chunks = chunker.nchunks(dataset_index) chunksize = int( ceil(all_indexed_individuals.size() / float(self.number_of_chunks))) for ichunk in range(self.number_of_chunks): logger.start_block("%s chunk %d out of %d." % (self.model_short_name, (ichunk + 1), self.number_of_chunks)) self.index_of_current_chunk = ichunk try: chunk_agent_indices = ordered_agent_indices[arange( (ichunk * chunksize), min((ichunk + 1) * chunksize, all_indexed_individuals.size()))] logger.log_status("Number of agents in this chunk: %s" % chunk_agent_indices.size) result_array[chunk_agent_indices] = self.run_chunk( dataset_index[chunk_agent_indices], dataset, **kwargs).astype(result_array_type) finally: logger.end_block() return result_array
def run(self, current_year_emme2_dir, current_year, dataset_pool, config=None): """Writes to the an emme2 input file in the [current_year_emme2_dir]/tripgen/inputtg/tazdata.ma2. """ missing_dataset = '' try: missing_dataset = 'constant_taz_column' taz_col_set = dataset_pool.get_dataset("constant_taz_column") taz_col_set.load_dataset() missing_dataset = 'zone' zone_set = dataset_pool.get_dataset("zone") zone_set.load_dataset() missing_dataset = 'household' household_set = dataset_pool.get_dataset("household") except: raise Exception("Dataset %s is missing from dataset_pool" % missing_dataset) """specify travel input file name: [current_year_emme2_dir]/tripgen/inputtg/tazdata.ma2 """ full_path = os.path.join(current_year_emme2_dir, 'tripgen', 'inputtg') if not os.path.exists(full_path): os.makedirs(full_path) tm_input_file = os.path.join(full_path, 'tazdata.ma2') tm_year = self._decade_floor(current_year) logger.log_status("calculating entries for emme2 input file") taz_col_set.compute_variables("zone_id=constant_taz_column.taz") current_taz_col = DatasetSubset(taz_col_set, index=where(taz_col_set.get_attribute("year")==tm_year)[0]) current_taz_col._id_names = ['taz'] current_taz_col._create_id_mapping() zone_set.join(current_taz_col, "pctmf", join_attribute='zone_id') zone_set.join(current_taz_col, "gqi", join_attribute='zone_id') zone_set.join(current_taz_col, "gqn", join_attribute='zone_id') zone_set.join(current_taz_col, "fteuniv", join_attribute='zone_id') zone_set.join(current_taz_col, "den", new_name='density', join_attribute='zone_id') value_122 = zeros(zone_set.size()) index_122 = zone_set.try_get_id_index(array([58,59,60,71,72,73,84,85,86,150,251,266,489,578,687,688,797,868])) value_122[index_122[index_122 != -1]] = 1 zone_set.add_attribute(data=value_122, name="v122") value_123 = zeros(zone_set.size()) index_123 = zone_set.try_get_id_index(array([531,646,847,850,888,894,899,910])) value_123[index_123[index_123 != -1]] = 1 zone_set.add_attribute(data=value_123, name="v123") value_124 = logical_not(value_122 + value_123) zone_set.add_attribute(data=value_124, name="v124") """specify which variables are passing from urbansim to travel model; the order matters""" variables_list = self.get_variables_list(dataset_pool) zone_set.compute_variables(variables_list, dataset_pool=dataset_pool ) return self._write_to_file(zone_set, variables_list, tm_input_file)
def run(self, year, job_set, control_totals, job_building_types, data_objects=None, resources=None): self._do_initialize_for_run(job_set, job_building_types, data_objects) idx = where(control_totals.get_attribute("year") == year)[0] self.control_totals_for_this_year = DatasetSubset(control_totals, idx) self._do_run_for_this_year(job_set) return self._update_job_set(job_set)
def run( self, vacancy_table, history_table, year, location_set, dataset_pool=None, resources=None ): self.dataset_pool=dataset_pool building_types = self.dataset_pool.get_dataset('building_type') target_vacancy_this_year = DatasetSubset(vacancy_table, index=where(vacancy_table.get_attribute("year")==year)[0]) building_type_ids = target_vacancy_this_year.get_attribute('building_type_id') building_type_idx = building_types.get_id_index(building_type_ids) self.used_building_types = DatasetSubset(building_types, index=building_type_idx) project_types = self.used_building_types.get_attribute('building_type_name') is_residential = self.used_building_types.get_attribute('is_residential') unit_names = where(is_residential, 'residential_units', 'non_residential_sqft') specific_unit_names = where(is_residential, 'residential_units', '_sqft') rates = target_vacancy_this_year.get_attribute('target_total_vacancy') self.project_units = {} self.project_specific_units = {} target_rates = {} for i in range(self.used_building_types.size()): self.project_units[project_types[i]] = unit_names[i] if is_residential[i]: self.project_specific_units[project_types[i]] = specific_unit_names[i] else: self.project_specific_units[project_types[i]] = "%s%s" % (project_types[i], specific_unit_names[i]) target_rates[building_type_ids[i]] = rates[i] self._compute_vacancy_and_total_units_variables(location_set, project_types, resources) self.pre_check( location_set, target_vacancy_this_year, project_types) projects = None for project_type_id, target_vacancy_rate in target_rates.iteritems(): # determine current-year vacancy rates project_type = building_types.get_attribute_by_id('building_type_name', project_type_id) vacant_units_sum = location_set.get_attribute(self.variable_for_vacancy[project_type]).sum() units_sum = float( location_set.get_attribute(self.variable_for_total_units[project_type]).sum() ) should_develop_units = int(round(max( 0, ( target_vacancy_rate * units_sum - vacant_units_sum ) / ( 1 - target_vacancy_rate ) ))) logger.log_status(project_type + ": vacant units: %d, should be vacant: %f, sum units: %d" % (vacant_units_sum, target_vacancy_rate * units_sum, units_sum)) if not should_develop_units: logger.log_note(("Will not build any " + project_type + " units, because the current vacancy of %d units\n" + "is more than the %d units desired for the vacancy rate of %f.") % (vacant_units_sum, target_vacancy_rate * units_sum, target_vacancy_rate)) #create projects if should_develop_units > 0: this_project = self._create_projects(should_develop_units, project_type, project_type_id, history_table, location_set, units_sum, resources) if projects is None: projects = this_project else: projects.join_by_rows(this_project, change_ids_if_not_unique=True) return projects
def run( self, model_configuration, vacancy_table, history_table, year, location_set, resources=None): large_area_ids = vacancy_table.get_attribute("large_area_id") locations_large_area_ids = location_set.compute_variables("washtenaw.%s.large_area_id" % location_set.get_dataset_name()) unique_large_areas = unique(large_area_ids) self._compute_vacancy_variables(location_set, model_configuration['development_project_types'], resources) projects = {} for area in unique_large_areas: location_index = where(locations_large_area_ids == area)[0] locations_for_this_area = DatasetSubset(location_set, location_index) logger.log_status("DPLCM for area %s", area) target_residential_vacancy_rate, target_non_residential_vacancy_rate = self._get_target_vacancy_rates(vacancy_table, year, area) for project_type in model_configuration['development_project_types']: # determine current-year vacancy rates vacant_units_sum = locations_for_this_area.get_attribute(self.variable_for_vacancy[project_type]).sum() units_sum = float( locations_for_this_area.get_attribute(self.units_variable[project_type]).sum() ) vacant_rate = self.safe_divide(vacant_units_sum, units_sum) if model_configuration['development_project_types'][project_type]['residential']: target_vacancy_rate = target_residential_vacancy_rate else: target_vacancy_rate = target_non_residential_vacancy_rate should_develop_units = int(round(max( 0, ( target_vacancy_rate * units_sum - vacant_units_sum ) / ( 1 - target_vacancy_rate ) ))) logger.log_status(project_type + ": vacant units: %d, should be vacant: %f, sum units: %d, will develop: %d" % (vacant_units_sum, target_vacancy_rate * units_sum, units_sum, should_develop_units)) #create projects if should_develop_units > 0: project_dataset = self._create_projects(should_develop_units, project_type, history_table, locations_for_this_area, units_sum, model_configuration['development_project_types'], resources) project_dataset.add_attribute(array(project_dataset.size()*[area]), "large_area_id", metadata=AttributeType.PRIMARY) if (project_type not in projects.keys()) or (projects[project_type] is None): projects[project_type] = project_dataset else: projects[project_type].join_by_rows(project_dataset, change_ids_if_not_unique=True) for project_type in model_configuration['development_project_types']: if project_type not in projects.keys(): projects[project_type] = None if projects[project_type] is None: size = 0 else: projects[project_type].add_submodel_categories() size = projects[project_type].size() logger.log_status("%s %s projects to be built" % (size, project_type)) return projects
def run(self, year, household_set, control_totals, characteristics, resources=None): # self.person_set = person_set self._do_initialize_for_run(household_set) control_totals.get_attribute("total_number_of_households") # to make sure they are loaded self.characteristics = characteristics self.all_categories = self.characteristics.get_attribute("characteristic") self.all_categories = array(map(lambda x: x.lower(), self.all_categories)) self.scaled_characteristic_names = get_distinct_names(self.all_categories).tolist() self.marginal_characteristic_names = copy(control_totals.get_id_name()) index_year = self.marginal_characteristic_names.index("year") self.marginal_characteristic_names.remove("year") self.marginal_characteristic_names.remove(self.subarea_id_name) region_ids = control_totals.get_attribute(self.subarea_id_name) households_region_ids = household_set.compute_one_variable_with_unknown_package(variable_name="%s" % (self.subarea_id_name), dataset_pool=self.dataset_pool) unique_regions = unique(region_ids) is_year = control_totals.get_attribute("year")==year all_households_index = arange(household_set.size()) for area in unique_regions: idx = where(logical_and(is_year, region_ids == area))[0] self.control_totals_for_this_year = DatasetSubset(control_totals, idx) households_index = where(households_region_ids == area)[0] if households_index.size == 0: continue households_for_this_area = DatasetSubset(household_set, households_index) logger.log_status("HTM for area %s (currently %s households)" % (area, households_for_this_area.size())) last_remove_idx = self.remove_households.size last_new_hhs_idx = self.mapping_existing_hhs_to_new_hhs.size self._do_run_for_this_year(households_for_this_area) add_hhs_size = self.new_households[self.location_id_name].size-self.new_households[self.subarea_id_name].size+self.mapping_existing_hhs_to_new_hhs.size-last_new_hhs_idx remove_hhs_size = self.remove_households.size-last_remove_idx logger.log_status("add %s, remove %s, total %s" % (add_hhs_size, remove_hhs_size, households_for_this_area.size()+add_hhs_size-remove_hhs_size )) self.new_households[self.subarea_id_name] = concatenate((self.new_households[self.subarea_id_name], array((self.new_households[self.location_id_name].size-self.new_households[self.subarea_id_name].size)*[area], dtype="int32"))) # transform indices of removing households into indices of the whole dataset self.remove_households[last_remove_idx:self.remove_households.size] = all_households_index[households_index[self.remove_households[last_remove_idx:self.remove_households.size]]] # do the same for households to be duplicated self.mapping_existing_hhs_to_new_hhs[last_new_hhs_idx:self.mapping_existing_hhs_to_new_hhs.size] = all_households_index[households_index[self.mapping_existing_hhs_to_new_hhs[last_new_hhs_idx:self.mapping_existing_hhs_to_new_hhs.size]]] self._update_household_set(household_set) idx_new_households = arange(household_set.size()-self.new_households[self.subarea_id_name].size, household_set.size()) #household_region_ids = household_set.compute_variables("urbansim_parcel.household.%s" % self.subarea_id_name) #household_region_ids[idx_new_households] = self.new_households[self.subarea_id_name] region_ids = household_set.get_attribute(self.subarea_id_name).copy() household_set.delete_one_attribute(self.subarea_id_name) household_set.add_attribute(region_ids, self.subarea_id_name, metadata=AttributeType.PRIMARY) # return an index of new households return idx_new_households
def _convert_lccm_input(self, flt_directory_in, flt_directory_out): gc.collect() t1 = time() lc = LandCoverDataset(in_storage=StorageFactory().get_storage( 'flt_storage', storage_location=flt_directory_in), out_storage=StorageFactory().get_storage( 'flt_storage', storage_location=flt_directory_out)) # lc.get_header() # added 23 june 2009 by mm mask = lc.get_mask() idx = where(mask == 0)[0] lcsubset = DatasetSubset(lc, idx) print "Converting:" lcsubset.write_dataset(attributes=["relative_x"], out_table_name="land_covers") lc.delete_one_attribute("relative_x") lcsubset.write_dataset(attributes=["relative_y"], out_table_name="land_covers") lc.delete_one_attribute("relative_y") lc.flush_dataset() gc.collect() # lc_names = lc.get_primary_attribute_names() for attr in lc.get_primary_attribute_names(): print " ", attr lcsubset.write_dataset(attributes=[attr], out_table_name="land_covers") lc.delete_one_attribute(attr) logger.log_status("Data conversion done. " + str(time() - t1) + " s")
def run(self, employment_events, jobs, current_year): # select events for the current year events_for_this_year = DatasetSubset( employment_events, index=where( employment_events.get_attribute('scheduled_year') == current_year)[0]) # create control totals on the fly control_totals = self.create_control_totals(events_for_this_year, jobs, year=current_year) # run subarea employment transition model in order to create or delete the given number of jobs ETM = SubareaEmploymentTransitionModel( subarea_id_name=self.location_dataset.get_id_name()[0], location_id_name=self._job_location_id_name, dataset_pool=self.dataset_pool) etm_result = ETM.run( current_year, jobs, control_totals, self.dataset_pool.get_dataset('job_building_type')) self.place_jobs_into_buildings(events_for_this_year, jobs, etm_result) return etm_result
def compute(self, dataset_pool): jobs = dataset_pool.get_dataset('job') nhb_jobs = DatasetSubset( jobs, where(jobs.get_attribute('is_home_based_job') == 0)[0]) return self.get_dataset().sum_dataset_over_ids( nhb_jobs, self.job_is_in_employment_sector_group)
def run(self, chunk_specification, dataset, dataset_index=None, result_array_type=float32, **kwargs): """ 'chunk_specification' - determines number of chunks to use when computing over the dataset set. 'dataset' - an object of class Dataset that is to be chunked. 'dataset_index' - index of individuals in dataset to be chunked. 'result_array_type' - type of the resulting array. Can be any numerical type of numpy array. **kwargs - keyword arguments. The method chunks dataset_index in the desired number of chunks (minimum is 1) and for each chunk it calls the method 'run_chunk'. The order of the individuals entering the chunking is determined by the method 'get_agents_order'. """ if dataset_index==None: dataset_index=arange(dataset.size()) if not isinstance(dataset_index,ndarray): dataset_index=array(dataset_index) logger.log_status("Total number of individuals: %s" % dataset_index.size) result_array = zeros(dataset_index.size, dtype=result_array_type) if dataset_index.size <= 0: logger.log_status("Nothing to be done.") return result_array all_indexed_individuals = DatasetSubset(dataset, dataset_index) ordered_agent_indices = self.get_agents_order(all_indexed_individuals)# set order of individuals in chunks # TODO: Remove next six lines after we inherit chunk specification as a text string. if (chunk_specification is None): chunk_specification = {'nchunks':1} chunker = ChunkSpecification(chunk_specification) self.number_of_chunks = chunker.nchunks(dataset_index) chunksize = int(ceil(all_indexed_individuals.size()/float(self.number_of_chunks))) for ichunk in range(self.number_of_chunks): logger.start_block("%s chunk %d out of %d." % (self.model_short_name, (ichunk+1), self.number_of_chunks)) self.index_of_current_chunk = ichunk try: chunk_agent_indices = ordered_agent_indices[arange((ichunk*chunksize), min((ichunk+1)*chunksize, all_indexed_individuals.size()))] logger.log_status("Number of agents in this chunk: %s" % chunk_agent_indices.size) result_array[chunk_agent_indices] = self.run_chunk(dataset_index[chunk_agent_indices], dataset, **kwargs).astype(result_array_type) finally: logger.end_block() return result_array
def run(self, agent_set, **kwargs): large_areas = agent_set.get_attribute(self.large_area_id_name) valid_large_area = where(large_areas > 0)[0] if valid_large_area.size > 0: unique_large_areas = unique(large_areas[valid_large_area]) cond_array = zeros(agent_set.size(), dtype="bool8") cond_array[valid_large_area] = True result = array([], dtype="int32") for area in unique_large_areas: new_index = where(logical_and(cond_array, large_areas == area))[0] agent_subset = DatasetSubset(agent_set, new_index) logger.log_status("ARM for area %s (%s agents)" % (area, agent_subset.size())) this_result = AgentRelocationModel.run(self, agent_subset, **kwargs) result = concatenate((result, new_index[this_result])) no_large_area = where(large_areas <= 0)[0] result = concatenate((result, no_large_area)) return result
def run(self, n=500, realestate_dataset_name = 'building', current_year=None, **kwargs): target_vacancy = self.dataset_pool.get_dataset('target_vacancy') if current_year is None: year = SimulationState().get_current_time() else: year = current_year self.current_year = year this_year_index = where(target_vacancy['year']==year)[0] target_vacancy_for_this_year = DatasetSubset(target_vacancy, this_year_index) if target_vacancy_for_this_year.size() == 0: raise IOError, 'No target vacancy defined for year %s.' % year self.all_btypes_size = target_vacancy_for_this_year.size() return DevelopmentProjectProposalSamplingModelWithMinimum.run(self, n=n, realestate_dataset_name=realestate_dataset_name, current_year=current_year, **kwargs)
def get_active_agent_set(self, submodel=None): """Return agent set that make choices in the model. Works only for the ChoiceModel class. """ agents = self.get_agent_set() if submodel is None: index = self.get_agent_set_index() else: index = self.get_agent_set_index_for_submodel(submodel) return DatasetSubset(agents, index)
def get_active_choice_set(self, submodel=None): """Return choice set as seen by agents in the model. Works only for the ChoiceModel class. """ if submodel is None: choices = self.get_choice_set_index() else: choices = self.get_choice_set_index_for_submodel(submodel) choices = unique(choices.flatten()) ds = self.get_choice_set() return DatasetSubset(ds, choices)
def _write_input_file_1(self, current_year_emme2_dir, input_dir, current_year, dataset_pool, config=None): missing_dataset = '' try: missing_dataset = 'group_quarter' taz_col_set = dataset_pool.get_dataset("group_quarter") taz_col_set.load_dataset() missing_dataset = 'zone' zone_set = dataset_pool.get_dataset("zone") zone_set.load_dataset() missing_dataset = 'household' household_set = dataset_pool.get_dataset("household") except: raise Exception("Dataset %s is missing from dataset_pool" % missing_dataset) """specify travel input file name """ if not os.path.exists(input_dir): os.makedirs(input_dir) tm_input_file = os.path.join(input_dir, 'tazdata.in') tm_year = self._get_tm_year(current_year, taz_col_set) logger.log_status("calculating entries for emme%s input file" % self.emme_version) taz_col_set.compute_variables("zone_id=group_quarter.taz") current_taz_col = DatasetSubset(taz_col_set, index=where(taz_col_set.get_attribute("year")==tm_year)[0]) current_taz_col._id_names = ['taz'] current_taz_col._create_id_mapping() zone_set.join(current_taz_col, "gqdorm", join_attribute='zone_id') zone_set.join(current_taz_col, "gqmil", join_attribute='zone_id') zone_set.join(current_taz_col, "gqoth", join_attribute='zone_id') zone_set.join(current_taz_col, "fteuniv", join_attribute='zone_id') """specify which variables are passing from urbansim to travel model; the order matters""" variables_list = self.get_variables_list(dataset_pool) zone_set.compute_variables(variables_list, dataset_pool=dataset_pool ) return self._write_to_file(zone_set, variables_list, tm_input_file, tm_year)
def run(self, year, household_set, control_totals, characteristics, resources=None): self._do_initialize_for_run(household_set) control_totals.get_attribute("total_number_of_households") # to make sure they are loaded self.characteristics = characteristics self.all_categories = self.characteristics.get_attribute("characteristic") self.all_categories = array(map(lambda x: x.lower(), self.all_categories)) self.scaled_characteristic_names = get_distinct_names(self.all_categories).tolist() self.marginal_characteristic_names = copy(control_totals.get_id_name()) index_year = self.marginal_characteristic_names.index("year") self.marginal_characteristic_names.remove("year") idx = where(control_totals.get_attribute("year")==year)[0] self.control_totals_for_this_year = DatasetSubset(control_totals, idx) self._do_run_for_this_year(household_set) return self._update_household_set(household_set)
def _do_run(self, location_set, agent_set, agents_index, data_objects=None, resources=None): location_id_name = location_set.get_id_name()[0] jobsubset = DatasetSubset(agent_set, agents_index) if jobsubset.size() <= 0: return array([], dtype='int32') #unplace jobs agent_set.set_values_of_one_attribute(location_id_name, resize(array([-1.0]), jobsubset.size()), agents_index) sector_ids = jobsubset.get_attribute("sector_id") sectors = unique(sector_ids) counts = ndimage_sum(ones((jobsubset.size(),)), labels=sector_ids.astype('int32'), index=sectors.astype('int32')) if sectors.size <=1 : counts = array([counts]) variables = map(lambda x: "number_of_jobs_of_sector_"+str(int(x)), sectors) compute_variables = map(lambda var: self.variable_package + "." + location_set.get_dataset_name()+ "." + var, variables) if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) self.dataset_pool.add_datasets_if_not_included({agent_set.get_dataset_name():agent_set}) location_set.compute_variables(compute_variables, dataset_pool=self.dataset_pool) if self.filter is None: location_index = arange(location_set.size()) else: filter_values = location_set.compute_variables([self.filter], dataset_pool=self.dataset_pool) location_index = where(filter_values > 0)[0] if location_index.size <= 0: logger.log_status("No locations available. Nothing to be done.") return array([]) location_subset = DatasetSubset(location_set, location_index) i=0 for sector in sectors: distr = location_subset.get_attribute(variables[i]) if ma.allclose(distr.sum(), 0): uniform_prob = 1.0/distr.size distr = resize(array([uniform_prob], dtype='float64'), distr.size) logger.log_warning("Probabilities in scaling model for sector " + str(sector) + " sum to 0.0. Substituting uniform distribution!") # random_sample = sample(location_set.get_attribute("grid_id"), k=int(counts[i]), \ # probabilities = distr) distr = distr/float(distr.sum()) random_sample = probsample_replace(location_subset.get_id_attribute(), size=int(counts[i]), prob_array=distr) idx = where(sector_ids == sector)[0] #modify job locations agent_set.set_values_of_one_attribute(location_id_name, random_sample, agents_index[idx]) i+=1 return agent_set.get_attribute_by_index(location_id_name, agents_index)
def run(self, year=None, dataset_pool=None, **kwargs): """ """ if dataset_pool is None: dataset_pool = SessionConfiguration().get_dataset_pool() if year is None: year = SimulationState().get_current_time() this_year_index = where(self.scheduled_events.get_attribute('year')==year)[0] scheduled_events_for_this_year = DatasetSubset(self.scheduled_events, this_year_index) scheduled_events_for_this_year.load_dataset_if_not_loaded() column_names = list(set( self.scheduled_events.get_known_attribute_names() ) - set( [ 'year', 'action', 'attribute', 'amount', 'event_id', '_hidden_id_'] )) column_names.sort() # column_values = dict([ (name, scheduled_events_for_this_year.get_attribute(name)) for name in column_names]) for index in range(scheduled_events_for_this_year.size()): indicator = ones( self.dataset.size(), dtype='bool' ) event_attr = {} for attribute in column_names: if attribute in self.dataset.get_known_attribute_names(): dataset_attribute = self.dataset.get_attribute(attribute) else: ## this is done inside the loop because some action may delete computed attributes, such as dataset.add_elements() try: dataset_attribute = self.dataset.compute_one_variable_with_unknown_package(attribute, dataset_pool=dataset_pool) except: raise ValueError, "attribute %s used in scheduled events dataset can not be found in dataset %s" % (attribute, self.dataset.get_dataset_name()) # if attribute in column_names: aval = scheduled_events_for_this_year.get_attribute(attribute)[index] if aval == -1: continue # ignore if column value is -1 else: indicator *= dataset_attribute == aval event_attr.update({attribute:aval}) #agents in dataset satisfying all conditions are identified by indicator legit_index = where(indicator)[0] this_event = scheduled_events_for_this_year.get_data_element(index) if not hasattr(this_event, 'attribute'): action_attr_name = '' else: action_attr_name = this_event.attribute action_function = getattr(self, '_' + this_event.action.strip().lower()) action_function( amount=this_event.amount, attribute=action_attr_name, dataset=self.dataset, index=legit_index, data_dict=event_attr ) self.post_run(self.dataset, legit_index, **kwargs) return self.dataset
def _convert_lccm_input(self, flt_directory_in, flt_directory_out): gc.collect() t1 = time() lc = LandCoverDataset(in_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_directory_in), out_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_directory_out)) # lc.get_header() # added 23 june 2009 by mm mask = lc.get_mask() idx = where(mask==0)[0] lcsubset = DatasetSubset(lc, idx) print "Converting:" lcsubset.write_dataset(attributes=["relative_x"], out_table_name="land_covers") lc.delete_one_attribute("relative_x") lcsubset.write_dataset(attributes=["relative_y"], out_table_name="land_covers") lc.delete_one_attribute("relative_y") lc.flush_dataset() gc.collect() # lc_names = lc.get_primary_attribute_names() for attr in lc.get_primary_attribute_names(): print " ", attr lcsubset.write_dataset(attributes=[attr], out_table_name="land_covers") lc.delete_one_attribute(attr) logger.log_status("Data conversion done. " + str(time()-t1) + " s")
def _do_run(self, location_set, agent_set, agents_index, resources=None): location_id_name = location_set.get_id_name()[0] asubset = DatasetSubset(agent_set, agents_index) if asubset.size() <= 0: return array([], dtype='int32') #unplace agents agent_set.modify_attribute(location_id_name, resize(array([-1]), asubset.size()), agents_index) if self.filter is None: location_index = arange(location_set.size()) else: filter_values = location_set.compute_variables([self.filter], dataset_pool=self.dataset_pool) location_index = where(filter_values > 0)[0] if location_index.size <= 0: logger.log_status("No locations available. Nothing to be done.") return array([]) location_subset = DatasetSubset(location_set, location_index) if self.consider_capacity: location_set.compute_variables([self.capacity_attribute], dataset_pool=self.dataset_pool) weights = location_subset[self.capacity_attribute] if self.number_of_agents_attribute is not None: location_set.compute_variables([self.number_of_agents_attribute], dataset_pool=self.dataset_pool) weights = clip(weights - location_subset[self.number_of_agents_attribute], 0, location_subset[self.capacity_attribute]) else: weights = ones(location_subset.size()) if weights.sum() <=0: logger.log_status("Locations' capacity sums to zero. Nothing to be done.") return array([]) distr = weights/float(weights.sum()) random_sample = probsample_replace(location_subset.get_id_attribute(), size=asubset.size(), prob_array=distr) agent_set.modify_attribute(location_id_name, random_sample, agents_index) return agent_set.get_attribute_by_index(location_id_name, agents_index)
class HouseholdTransitionModel(Model): """Creates and removes households from household_set. New households are duplicated from the existing households, keeping the joint distribution of all characteristics. """ model_name = "Household Transition Model" def __init__(self, location_id_name="grid_id", dataset_pool=None, debuglevel=0): self.debug = DebugPrinter(debuglevel) self.location_id_name = location_id_name self.dataset_pool = self.create_dataset_pool(dataset_pool, ["urbansim", "opus_core"]) def run(self, year, household_set, control_totals, characteristics, resources=None): self._do_initialize_for_run(household_set) control_totals.get_attribute("total_number_of_households") # to make sure they are loaded self.characteristics = characteristics self.all_categories = self.characteristics.get_attribute("characteristic") self.all_categories = array(map(lambda x: x.lower(), self.all_categories)) self.scaled_characteristic_names = get_distinct_names(self.all_categories).tolist() self.marginal_characteristic_names = copy(control_totals.get_id_name()) index_year = self.marginal_characteristic_names.index("year") self.marginal_characteristic_names.remove("year") idx = where(control_totals.get_attribute("year")==year)[0] self.control_totals_for_this_year = DatasetSubset(control_totals, idx) self._do_run_for_this_year(household_set) return self._update_household_set(household_set) def _update_household_set(self, household_set): index_of_duplicated_hhs = household_set.duplicate_rows(self.mapping_existing_hhs_to_new_hhs) household_set.modify_attribute(name=self.location_id_name, data=-1 * ones((index_of_duplicated_hhs.size,), dtype=household_set.get_data_type(self.location_id_name)), index=index_of_duplicated_hhs) household_set.remove_elements(self.remove_households) if self.new_households[self.location_id_name].size > 0: max_id = household_set.get_id_attribute().max() self.new_households[self.household_id_name]=concatenate((self.new_households[self.household_id_name], arange(max_id+1, max_id+self.new_households[self.location_id_name].size+1))) household_set.add_elements(self.new_households, require_all_attributes=False) difference = household_set.size()-self.household_size self.debug.print_debug("Difference in number of households: %s" " (original %s, new %s, created %s, deleted %s)" % (difference, self.household_size, household_set.size(), self.new_households[self.household_id_name].size + self.mapping_existing_hhs_to_new_hhs.size, self.remove_households.size), 3) if self.location_id_name in household_set.get_attribute_names(): self.debug.print_debug("Number of unplaced households: %s" % where(household_set.get_attribute(self.location_id_name) <=0)[0].size, 3) return difference def _do_initialize_for_run(self, household_set): self.household_id_name = household_set.get_id_name()[0] self.new_households = { self.location_id_name:array([], dtype=household_set.get_data_type(self.location_id_name, int32)), self.household_id_name:array([], dtype=household_set.get_data_type(self.household_id_name, int32)) } self.remove_households = array([], dtype='int32') self.household_size = household_set.size() self.max_id = household_set.get_id_attribute().max() self.arrays_from_categories = {} self.arrays_from_categories_mapping = {} self.mapping_existing_hhs_to_new_hhs = array([], dtype=household_set.get_data_type(self.household_id_name, int32)) def _do_run_for_this_year(self, household_set): self.household_set = household_set groups = self.control_totals_for_this_year.get_id_attribute() self.create_arrays_from_categories(self.household_set) all_characteristics = self.arrays_from_categories.keys() self.household_set.load_dataset_if_not_loaded(attributes = all_characteristics) # prevents from lazy loading to save runtime idx_shape = [] number_of_combinations=1 num_attributes=len(all_characteristics) for iattr in range(num_attributes): attr = all_characteristics[iattr] max_bins = self.arrays_from_categories[attr].max()+1 idx_shape.append(max_bins) number_of_combinations=number_of_combinations*max_bins if attr not in self.new_households.keys(): self.new_households[attr] = array([], dtype=self.household_set.get_data_type(attr, float32)) self.number_of_combinations = int(number_of_combinations) idx_tmp = indices(tuple(idx_shape)) categories_index = zeros((self.number_of_combinations,num_attributes)) for i in range(num_attributes): #create indices of all combinations categories_index[:,i] = idx_tmp[i].ravel() categories_index_mapping = {} for i in range(self.number_of_combinations): categories_index_mapping[tuple(categories_index[i,].tolist())] = i def get_category(values): bins = map(lambda x, y: self.arrays_from_categories[x][int(y)], all_characteristics, values) try: return categories_index_mapping[tuple(bins)] except KeyError, msg: where_error = where(array(bins) == -1)[0] if where_error.size > 0: raise KeyError, \ "Invalid value of %s for attribute %s. It is not included in the characteristics groups." % ( array(values)[where_error], array(all_characteristics)[where_error]) raise KeyError, msg if num_attributes > 0: # the next array must be a copy of the household values, otherwise, it changes the original values values_array = reshape(array(self.household_set.get_attribute(all_characteristics[0])), (self.household_set.size(),1)) if num_attributes > 1: for attr in all_characteristics[1:]: values_array = concatenate((values_array, reshape(array(self.household_set.get_attribute(attr)), (self.household_set.size(),1))), axis=1) for i in range(values_array.shape[1]): if values_array[:,i].max() > 10000: values_array[:,i] = values_array[:,i]/10 values_array[:,i] = clip(values_array[:,i], 0, self.arrays_from_categories[all_characteristics[i]].size-1) # determine for each household to what category it belongs to self.household_categories = array(map(lambda x: get_category(x), values_array)) # performance bottleneck number_of_households_in_categories = array(ndimage_sum(ones((self.household_categories.size,)), labels=self.household_categories+1, index = arange(self.number_of_combinations)+1)) else: # no marginal characteristics; consider just one group self.household_categories = zeros(self.household_set.size(), dtype='int32') number_of_households_in_categories = array([self.household_set.size()]) g=arange(num_attributes) #iterate over marginal characteristics for group in groups: if groups.ndim <= 1: # there is only one group (no marginal char.) id = group else: id = tuple(group.tolist()) group_element = self.control_totals_for_this_year.get_data_element_by_id(id) total = group_element.total_number_of_households for i in range(g.size): g[i] = eval("group_element."+self.arrays_from_categories.keys()[i]) if g.size <= 0: l = ones((number_of_households_in_categories.size,)) else: l = categories_index[:,0] == g[0] for i in range(1,num_attributes): l = logical_and(l, categories_index[:,i] == g[i]) # l has 1's for combinations of this group number_in_group = array(ndimage_sum(number_of_households_in_categories, labels=l, index = 1)) diff = int(total - number_in_group) if diff < 0: # households to be removed is_in_group = l[self.household_categories] w = where(is_in_group)[0] sample_array, non_placed, size_non_placed = \ get_array_without_non_placed_agents(self.household_set, w, -1*diff, self.location_id_name) self.remove_households = concatenate((self.remove_households, non_placed, sample_noreplace(sample_array, max(0,abs(diff)-size_non_placed)))) if diff > 0: # households to be created self._create_households(diff, l)
def run(self, n=500, realestate_dataset_name='building', current_year=None, occupied_spaces_variable="occupied_spaces", total_spaces_variable="total_spaces", run_config=None, debuglevel=0): """ run method of the Development Project Proposal Sampling Model **Parameters** **n** : int, sample size for each iteration sample n proposals at a time, which are then evaluated one by one until the target vacancies are satisfied or proposals are running out **realestate_dataset_name** : string, name of real estate dataset **current_year**: int, simulation year. If None, get value from SimulationState **occupied_spaces_variable** : string, variable name for calculating how much spaces are currently occupied It can either be a variable for real_estate dataset that returns the amount spaces being occupied or a target_vacancy attribute that contains the name of real_estate variables. **total_spaces_variable** : string, variable name for calculating total existing spaces **Returns** **proposal_set** : indices to proposal_set that are accepted **demolished_buildings** : buildings to be demolished for re-development """ self.accepted_proposals = [] self.demolished_buildings = [] #id of buildings to be demolished if self.proposal_set.n <= 0: logger.log_status( "The size of proposal_set is 0; no proposals to consider, skipping DPPSM." ) return (self.proposal_set, self.demolished_buildings) target_vacancy = self.dataset_pool.get_dataset('target_vacancy') if current_year is None: year = SimulationState().get_current_time() else: year = current_year this_year_index = where(target_vacancy['year'] == year)[0] target_vacancy_for_this_year = DatasetSubset(target_vacancy, this_year_index) if target_vacancy_for_this_year.size() == 0: raise IOError, 'No target vacancy defined for year %s.' % year ## current_target_vacancy.target_attribute_name = 'target_vacancy_rate' ## each column provides a category for which a target vacancy is specified self.column_names = list(set( target_vacancy.get_known_attribute_names() ) - \ set( [ target_vacancy.target_attribute_name, 'year', '_hidden_id_', occupied_spaces_variable, total_spaces_variable ] ) ) self.column_names.sort(reverse=True) ## buildings table provides existing stocks self.realestate_dataset = self.dataset_pool.get_dataset( realestate_dataset_name) occupied_spaces_variables = [occupied_spaces_variable] total_spaces_variables = [total_spaces_variable] if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names( ): occupied_spaces_variables += unique( target_vacancy_for_this_year[occupied_spaces_variable]).tolist( ) if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names( ): total_spaces_variables += unique( target_vacancy_for_this_year[total_spaces_variable]).tolist() self._compute_variables_for_dataset_if_needed( self.realestate_dataset, self.column_names + occupied_spaces_variables + total_spaces_variables) self._compute_variables_for_dataset_if_needed( self.proposal_component_set, self.column_names + total_spaces_variables) self.proposal_set.compute_variables( "urbansim_parcel.development_project_proposal.number_of_components", dataset_pool=self.dataset_pool) n_column = len(self.column_names) target_vacancy_for_this_year.column_values = target_vacancy_for_this_year.get_multiple_attributes( self.column_names).reshape((-1, n_column)) self.realestate_dataset.column_values = self.realestate_dataset.get_multiple_attributes( self.column_names).reshape((-1, n_column)) self.proposal_component_set.column_values = self.proposal_component_set.get_multiple_attributes( self.column_names).reshape((-1, n_column)) #defaults, can be changed later by spaces_variable specified in target_vacancy rates self.realestate_dataset.total_spaces = self.realestate_dataset[ total_spaces_variable] self.proposal_component_set.total_spaces = self.proposal_component_set[ total_spaces_variable] self.realestate_dataset.occupied_spaces = self.realestate_dataset[ occupied_spaces_variable] self.accounting = {} self.logging = {} #has_needed_components = zeros(self.proposal_set.size(), dtype='bool') for index in range(target_vacancy_for_this_year.size()): column_value = tuple( target_vacancy_for_this_year.column_values[index, :].tolist()) accounting = { 'target_vacancy': target_vacancy_for_this_year[ target_vacancy.target_attribute_name][index] } realestate_indexes = self.get_index_by_condition( self.realestate_dataset.column_values, column_value) component_indexes = self.get_index_by_condition( self.proposal_component_set.column_values, column_value) this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable ## total/occupied_spaces_variable can be specified either as a universal name for all realestate ## or in targe_vacancy_rate dataset for each vacancy category if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names( ): this_occupied_spaces_variable = target_vacancy_for_this_year[ occupied_spaces_variable][index] self.realestate_dataset.occupied_spaces[realestate_indexes] = ( self.realestate_dataset[this_occupied_spaces_variable] [realestate_indexes]).astype( self.realestate_dataset.occupied_spaces.dtype) if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names( ): this_total_spaces_variable = target_vacancy_for_this_year[ total_spaces_variable][index] self.realestate_dataset.total_spaces[realestate_indexes] = ( self.realestate_dataset[this_total_spaces_variable] [realestate_indexes]).astype( self.realestate_dataset.total_spaces.dtype) self.proposal_component_set.total_spaces[component_indexes] = ( self.proposal_component_set[this_total_spaces_variable] [component_indexes]).astype( self.proposal_component_set.total_spaces.dtype) accounting["total_spaces_variable"] = this_total_spaces_variable accounting["total_spaces"] = self.realestate_dataset.total_spaces[ realestate_indexes].sum() accounting[ "occupied_spaces_variable"] = this_occupied_spaces_variable accounting[ "occupied_spaces"] = self.realestate_dataset.occupied_spaces[ realestate_indexes].sum() accounting["target_spaces"] = int( round( accounting["occupied_spaces"] /\ (1 - accounting["target_vacancy"]) ) ) accounting["proposed_spaces"] = 0 accounting["demolished_spaces"] = 0 self.accounting[column_value] = accounting if self._is_target_reached(column_value): proposal_indexes = self.proposal_set.get_id_index( unique(self.proposal_component_set['proposal_id'] [component_indexes])) single_component_indexes = where( self.proposal_set["number_of_components"] == 1)[0] self.weight[intersect1d(proposal_indexes, single_component_indexes)] = 0.0 ## handle planned proposals: all proposals with status_id == is_planned ## and start_year == year are accepted planned_proposal_indexes = where( logical_and( self.proposal_set.get_attribute("status_id") == self.proposal_set.id_planned, self.proposal_set.get_attribute("start_year") == year))[0] self.consider_proposals(planned_proposal_indexes, force_accepting=True) # consider proposals (in this order: proposed, tentative) for status in [ self.proposal_set.id_proposed, self.proposal_set.id_tentative ]: stat = (self.proposal_set.get_attribute("status_id") == status) if stat.sum() == 0: continue logger.log_status( "Sampling from %s eligible proposals of status %s." % (stat.sum(), status)) iteration = 0 while (not self._is_target_reached()): ## prevent proposals from being sampled for vacancy type whose target is reached #for column_value in self.accounting.keys(): if self.weight[stat].sum() == 0.0: logger.log_warning( "Running out of proposals of status %s before vacancy targets are reached; there aren't any proposals with non-zero weight" % status) break available_indexes = where(logical_and(stat, self.weight > 0))[0] sample_size = minimum(available_indexes.size, n) sampled_proposal_indexes = probsample_noreplace( available_indexes, sample_size, prob_array=self.weight[available_indexes], return_index=False) self.consider_proposals(sampled_proposal_indexes) self.weight[sampled_proposal_indexes] = 0 #sample_size = 1 #sampled_proposal_index = probsample_noreplace(available_indexes, sample_size, #prob_array=self.weight[available_indexes], #return_index=False) #self.consider_proposal(sampled_proposal_index) #self.weight[sampled_proposal_index] = 0 iteration += 1 self._log_status() # set status of accepted proposals to 'active' self.proposal_set.modify_attribute(name="status_id", data=self.proposal_set.id_active, index=array(self.accepted_proposals, dtype='int32')) # Code added by Jesse Ayers, MAG, 7/20/2009 # Get the active projects: stat_id = self.proposal_set.get_attribute('status_id') actv = where(stat_id == 1)[0] # Where there are active projects, compute the total_land_area_taken # and store it on the development_project_proposals dataset # so it can be used by the building_construction_model for the proper # computation of units_proposed for those projects with velocity curves if actv.size > 0: total_land_area_taken_computed = self.proposal_set.get_attribute( 'urbansim_parcel.development_project_proposal.land_area_taken') self.proposal_set.modify_attribute( 'total_land_area_taken', total_land_area_taken_computed[actv], actv) return (self.proposal_set, self.realestate_dataset.get_id_attribute()[ self.demolished_buildings])
def run(self, year=None, dataset_pool=None, **kwargs): """ """ if dataset_pool is None: dataset_pool = SessionConfiguration().get_dataset_pool() if year is None: year = SimulationState().get_current_time() this_year_index = where( self.scheduled_events.get_attribute('year') == year)[0] scheduled_events_for_this_year = DatasetSubset(self.scheduled_events, this_year_index) scheduled_events_for_this_year.load_dataset_if_not_loaded() column_names = list( set(self.scheduled_events.get_known_attribute_names()) - set([ 'year', 'action', 'attribute', 'amount', 'event_id', '_hidden_id_' ])) column_names.sort() # column_values = dict([ (name, scheduled_events_for_this_year.get_attribute(name)) for name in column_names]) for index in range(scheduled_events_for_this_year.size()): indicator = ones(self.dataset.size(), dtype='bool') event_attr = {} for attribute in column_names: if attribute in self.dataset.get_known_attribute_names(): dataset_attribute = self.dataset.get_attribute(attribute) else: ## this is done inside the loop because some action may delete computed attributes, such as dataset.add_elements() try: dataset_attribute = self.dataset.compute_one_variable_with_unknown_package( attribute, dataset_pool=dataset_pool) except: raise ValueError, "attribute %s used in scheduled events dataset can not be found in dataset %s" % ( attribute, self.dataset.get_dataset_name()) # if attribute in column_names: aval = scheduled_events_for_this_year.get_attribute( attribute)[index] if aval == -1: continue # ignore if column value is -1 else: indicator *= dataset_attribute == aval event_attr.update({attribute: aval}) #agents in dataset satisfying all conditions are identified by indicator legit_index = where(indicator)[0] this_event = scheduled_events_for_this_year.get_data_element(index) if not hasattr(this_event, 'attribute'): action_attr_name = '' else: action_attr_name = this_event.attribute action_function = getattr(self, '_' + this_event.action.strip().lower()) action_function(amount=this_event.amount, attribute=action_attr_name, dataset=self.dataset, index=legit_index, data_dict=event_attr) self.post_run(self.dataset, legit_index, **kwargs) return self.dataset
def run(self, n=500, realestate_dataset_name = 'building', current_year=None, occupied_spaces_variable="occupied_spaces", total_spaces_variable="total_spaces", minimum_spaces_attribute="minimum_spaces", within_parcel_selection_weight_string=None, within_parcel_selection_n=0, within_parcel_selection_compete_among_types=False, within_parcel_selection_threshold=75, within_parcel_selection_MU_same_weight=False, within_parcel_selection_transpose_interpcl_weight=True, run_config=None, debuglevel=0): """ run method of the Development Project Proposal Sampling Model **Parameters** **n** : int, sample size for each iteration sample n proposals at a time, which are then evaluated one by one until the target vacancies are satisfied or proposals are running out **realestate_dataset_name** : string, name of real estate dataset **current_year**: int, simulation year. If None, get value from SimulationState **occupied_spaces_variable** : string, variable name for calculating how much spaces are currently occupied It can either be a variable for real_estate dataset that returns the amount spaces being occupied or a target_vacancy attribute that contains the name of real_estate variables. **total_spaces_variable** : string, variable name for calculating total existing spaces **Returns** **proposal_set** : indices to proposal_set that are accepted **demolished_buildings** : buildings to be demolished for re-development """ self.accepted_proposals = [] self.demolished_buildings = [] #id of buildings to be demolished if self.proposal_set.n <= 0: logger.log_status("The size of proposal_set is 0; no proposals to consider, skipping DPPSM.") return (self.proposal_set, self.demolished_buildings) target_vacancy = self.dataset_pool.get_dataset('target_vacancy') if current_year is None: year = SimulationState().get_current_time() else: year = current_year this_year_index = where(target_vacancy['year']==year)[0] target_vacancy_for_this_year = DatasetSubset(target_vacancy, this_year_index) if target_vacancy_for_this_year.size() == 0: raise IOError, 'No target vacancy defined for year %s.' % year ## current_target_vacancy.target_attribute_name = 'target_vacancy_rate' ## each column provides a category for which a target vacancy is specified self.column_names = list(set( target_vacancy.get_known_attribute_names() ) - \ set( [ target_vacancy.target_attribute_name, 'year', '_hidden_id_', minimum_spaces_attribute, occupied_spaces_variable, total_spaces_variable ] ) ) self.column_names.sort(reverse=True) ## buildings table provides existing stocks self.realestate_dataset = self.dataset_pool.get_dataset(realestate_dataset_name) occupied_spaces_variables = [occupied_spaces_variable] total_spaces_variables = [total_spaces_variable] if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(): occupied_spaces_variables += unique(target_vacancy_for_this_year[occupied_spaces_variable]).tolist() if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(): total_spaces_variables += unique(target_vacancy_for_this_year[total_spaces_variable]).tolist() self._compute_variables_for_dataset_if_needed(self.realestate_dataset, self.column_names + occupied_spaces_variables + total_spaces_variables) self._compute_variables_for_dataset_if_needed(self.proposal_component_set, self.column_names + total_spaces_variables) self.proposal_set.compute_variables(["urbansim_parcel.development_project_proposal.number_of_components", "urbansim_parcel.development_project_proposal.land_area_taken"], dataset_pool=self.dataset_pool) n_column = len(self.column_names) self.column_names_index = {} for iname in range(n_column): self.column_names_index[self.column_names[iname]] = iname target_vacancy_for_this_year.column_values = target_vacancy_for_this_year.get_multiple_attributes(self.column_names).reshape((-1, n_column)) self.realestate_dataset.column_values = self.realestate_dataset.get_multiple_attributes(self.column_names).reshape((-1, n_column)) self.proposal_component_set.column_values = self.proposal_component_set.get_multiple_attributes(self.column_names).reshape((-1, n_column)) #defaults, can be changed later by spaces_variable specified in target_vacancy rates self.realestate_dataset.total_spaces = self.realestate_dataset[total_spaces_variable] self.proposal_component_set.total_spaces = self.proposal_component_set[total_spaces_variable] self.realestate_dataset.occupied_spaces = self.realestate_dataset[occupied_spaces_variable] self.accounting = {}; self.logging = {} #has_needed_components = zeros(self.proposal_set.size(), dtype='bool') for index in range(target_vacancy_for_this_year.size()): column_value = tuple(target_vacancy_for_this_year.column_values[index,:].tolist()) accounting = {'target_vacancy': target_vacancy_for_this_year[target_vacancy.target_attribute_name][index]} if minimum_spaces_attribute in target_vacancy_for_this_year.get_known_attribute_names(): accounting['minimum_spaces'] = target_vacancy_for_this_year[minimum_spaces_attribute][index] realestate_indexes = self.get_index_by_condition(self.realestate_dataset.column_values, column_value) component_indexes = self.get_index_by_condition(self.proposal_component_set.column_values, column_value) this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable ## total/occupied_spaces_variable can be specified either as a universal name for all realestate ## or in targe_vacancy_rate dataset for each vacancy category if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(): this_occupied_spaces_variable = target_vacancy_for_this_year[occupied_spaces_variable][index] self.realestate_dataset.occupied_spaces[realestate_indexes] = (self.realestate_dataset[this_occupied_spaces_variable][realestate_indexes] ).astype(self.realestate_dataset.occupied_spaces.dtype) if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(): this_total_spaces_variable = target_vacancy_for_this_year[total_spaces_variable][index] self.realestate_dataset.total_spaces[realestate_indexes] = (self.realestate_dataset[this_total_spaces_variable][realestate_indexes] ).astype(self.realestate_dataset.total_spaces.dtype) self.proposal_component_set.total_spaces[component_indexes] = (self.proposal_component_set[this_total_spaces_variable][component_indexes] ).astype(self.proposal_component_set.total_spaces.dtype) accounting["total_spaces_variable"] = this_total_spaces_variable accounting["total_spaces"] = self.realestate_dataset.total_spaces[realestate_indexes].sum() accounting["occupied_spaces_variable"] = this_occupied_spaces_variable accounting["occupied_spaces"] = self.realestate_dataset.occupied_spaces[realestate_indexes].sum() accounting["target_spaces"] = int( round( accounting["occupied_spaces"] /\ (1 - accounting["target_vacancy"]) ) ) accounting["proposed_spaces"] = 0 accounting["demolished_spaces"] = 0 self.accounting[column_value] = accounting if self._is_target_reached(column_value): proposal_indexes = self.proposal_set.get_id_index(unique(self.proposal_component_set['proposal_id'][component_indexes])) if n_column == 1: comp_indexes = where(ndimage.sum(self.proposal_component_set[self.column_names[0]]==column_value[0], labels=self.proposal_component_set['proposal_id'], index=self.proposal_set.get_id_attribute() ) == self.proposal_set["number_of_components"])[0] else: comp_indexes = where(self.proposal_set["number_of_components"]==1)[0] target_reached_prop_idx = intersect1d(proposal_indexes, comp_indexes) self.weight[target_reached_prop_idx] = 0.0 self.proposal_set["status_id"][intersect1d(target_reached_prop_idx, where(self.proposal_set["status_id"]==self.proposal_set.id_tentative)[0])] = self.proposal_set.id_no_demand ## handle planned proposals: all proposals with status_id == is_planned ## and start_year == year are accepted planned_proposal_indexes = where(logical_and( self.proposal_set.get_attribute("status_id") == self.proposal_set.id_planned, self.proposal_set.get_attribute("start_year") == year ) )[0] logger.start_block("Processing %s planned proposals" % planned_proposal_indexes.size) self.consider_proposals(planned_proposal_indexes, force_accepting=True) logger.end_block() if within_parcel_selection_n > 0: logger.start_block("Selecting proposals within parcels (%s proposals per parcel)" % within_parcel_selection_n) self.select_proposals_within_parcels(nmax=within_parcel_selection_n, weight_string=within_parcel_selection_weight_string, compete_among_types=within_parcel_selection_compete_among_types, filter_threshold=within_parcel_selection_threshold, MU_same_weight=within_parcel_selection_MU_same_weight, transpose_interpcl_weight=within_parcel_selection_transpose_interpcl_weight) logger.end_block() # consider proposals (in this order: proposed, tentative) for status in [self.proposal_set.id_proposed, self.proposal_set.id_tentative]: stat = (self.proposal_set.get_attribute("status_id") == status) if stat.sum() == 0: continue logger.log_status("Sampling from %s eligible proposals of status %s." % (stat.sum(), status)) iteration = 0 while (not self._is_target_reached()): ## prevent proposals from being sampled for vacancy type whose target is reached #for column_value in self.accounting.keys(): if self.weight[stat].sum() == 0.0: logger.log_warning("Running out of proposals of status %s before vacancy targets are reached; there aren't any proposals with non-zero weight" % status) break available_indexes = where(logical_and(stat, self.weight > 0))[0] sample_size = minimum(available_indexes.size, n) sampled_proposal_indexes = probsample_noreplace(available_indexes, sample_size, prob_array=self.weight[available_indexes], return_index=False) #sorted_sampled_indices = argsort(self.weight[sampled_proposal_indexes]) #self.consider_proposals(sampled_proposal_indexes[sorted_sampled_indices][::-1]) self.consider_proposals(sampled_proposal_indexes) self.weight[sampled_proposal_indexes] = 0 iteration += 1 self._log_status() # set status of accepted proposals to 'active' self.proposal_set.modify_attribute(name="status_id", data=self.proposal_set.id_active, index=array(self.accepted_proposals, dtype='int32')) # Code added by Jesse Ayers, MAG, 7/20/2009 # Get the active projects: stat_id = self.proposal_set.get_attribute('status_id') actv = where(stat_id==1)[0] # Where there are active projects, compute the total_land_area_taken # and store it on the development_project_proposals dataset # so it can be used by the building_construction_model for the proper # computation of units_proposed for those projects with velocity curves if actv.size > 0: total_land_area_taken_computed = self.proposal_set['land_area_taken'] self.proposal_set.modify_attribute('total_land_area_taken', total_land_area_taken_computed[actv], actv) return (self.proposal_set, self.realestate_dataset.get_id_attribute()[self.demolished_buildings])
def run(self, dataset, outcome_attribute, weight_attribute, control_totals, current_year, control_total_attribute=None, year_attribute='year', capacity_attribute=None, add_quantity=False, dataset_pool=None): """'dataset' is a Dataset for which a quantity 'outcome_attribute' is created. The total amount of the quantity is given by the attribute 'control_total_attribute' of the 'control_totals' Dataset. If it is not given, it is assumed to have the same name as 'outcome_attribute'. The 'weight_attribute' of 'dataset' determines the allocation weights. The 'control_totals' Dataset contains an attribute 'year' (or alternatively, an attribute given by the 'year_attribute' argument) and optionally other attributes that must be known to the 'dataset' (such as a geography). For each row of the control_totals dataset for which year matches the 'current_year', the total amount is distributed among the corresponding members of 'dataset' according to weights. If a 'capacity_attribute' is given (attribute of 'dataset'), the algorithm removes any allocations that exceeds the capacity and redistributes it among remaining members. The resulting values are appended to 'dataset' as 'outcome_attribute' (as primary attribute). If add_quantity is True and the 'outcome_attribute' exists in dataset, the resulting values are added to the current values of 'outcome_attribute'. """ ct_attr = control_totals.get_known_attribute_names() if year_attribute not in ct_attr: raise StandardError, "Year attribute '%s' must be a known attribute of the control totals dataset." % year_attribute ct_attr.remove(year_attribute) if control_total_attribute is None: control_total_attribute = outcome_attribute if control_total_attribute not in ct_attr: raise StandardError, "Attribute '%s' must be a known attribute of the control totals dataset." % control_total_attribute ct_attr.remove(control_total_attribute) if control_totals._is_hidden_id(): ct_attr.remove(control_totals.id_name()[0]) # compute weights and other attributes necessary for allocation attrs_to_compute = [weight_attribute] + ct_attr if capacity_attribute is not None: attrs_to_compute.append(capacity_attribute) for attr in attrs_to_compute: try: dataset.compute_variables(attr, dataset_pool=dataset_pool) except: dataset.compute_one_variable_with_unknown_package( attr, dataset_pool=dataset_pool) # create subset of control totals for the current year year_index = where( control_totals.get_attribute(year_attribute) == current_year)[0] if year_index.size <= 0: logger.log_warning("No control total for year %s" % current_year) return None control_totals_for_this_year = DatasetSubset(control_totals, year_index) # check capacity if capacity_attribute is not None: if dataset.get_attribute(capacity_attribute).sum( ) < control_totals_for_this_year.get_attribute( control_total_attribute).sum(): logger.log_warning( "Capacity (%s) is smaller than the amount to allocate (%s)." % (dataset.get_attribute(capacity_attribute).sum(), control_totals_for_this_year.get_attribute( control_total_attribute).sum())) C = dataset.get_attribute(capacity_attribute).astype('int32') all_weights = dataset.get_attribute(weight_attribute) outcome = zeros(dataset.size(), dtype='int32') for ct_row in range(control_totals_for_this_year.size()): is_considered = ones(dataset.size(), dtype='bool8') for characteristics in ct_attr: is_considered = logical_and( is_considered, dataset.get_attribute(characteristics) == control_totals_for_this_year.get_attribute( characteristics)[ct_row]) T = control_totals_for_this_year.get_attribute( control_total_attribute)[ct_row] it = 1 while True: is_considered_idx = where(is_considered)[0] weights = all_weights[is_considered_idx] weights_sum = float(weights.sum()) outcome[is_considered_idx] = round_( outcome[is_considered_idx] + T * (weights / weights_sum)).astype('int32') if capacity_attribute is None: break diff = outcome[is_considered_idx] - C[is_considered_idx] outcome[is_considered_idx] = clip(outcome[is_considered_idx], 0, C[is_considered_idx]) if it == 1 and C[is_considered_idx].sum() < T: logger.log_warning( "Control total %s cannot be met due to a capacity restriction of %s" % (T, C[is_considered_idx].sum())) T = where(diff < 0, 0, diff).sum() if T <= 0: break is_considered = logical_and(is_considered, outcome < C) it += 1 if add_quantity and (outcome_attribute in dataset.get_known_attribute_names()): dataset.modify_attribute(name=outcome_attribute, data=outcome + dataset.get_attribute(outcome_attribute)) logger.log_status( 'New values added to the attribute %s of dataset %s.' % (outcome_attribute, dataset.get_dataset_name())) else: dataset.add_primary_attribute(name=outcome_attribute, data=outcome) logger.log_status( 'New values stored into attribute %s of dataset %s.' % (outcome_attribute, dataset.get_dataset_name())) dataset.flush_attribute(outcome_attribute) return outcome
index_attribute = "lc0207_100k_0" # 4. Years - date pair of input data; year is concatenated to flt_directory_in specified in #1 #years = [1991, 1995] #years = [1995, 1999] #years = [2002] #years = sys.argv[3] years = [2007, 2007] lc1 = LandCoverDataset(in_storage = StorageFactory().get_storage('flt_storage', storage_location = os.path.join(flt_directory_in, str(years[0]))), out_storage = StorageFactory().get_storage('flt_storage', storage_location = os.path.join(flt_directory_out, str(years[0])))) agents_index = where(lc1.get_attribute(index_attribute))[0] lc1subset = DatasetSubset(lc1, agents_index) print "Writing set 1:" for attr in lc1.get_primary_attribute_names(): print " ", attr lc1subset.write_dataset(attributes=[attr], out_table_name="land_covers") lc1.delete_one_attribute(attr) # leaving this line in causes the processing of every other input data file; commenting it causes memory error lc2 = LandCoverDataset(in_storage = StorageFactory().get_storage('flt_storage', storage_location = os.path.join(flt_directory_in, str(years[1]))), out_storage = StorageFactory().get_storage('flt_storage', storage_location = os.path.join(flt_directory_out, str(years[1])))) lc2subset = DatasetSubset(lc2, agents_index) print "Writing set 2:" for attr in lc2.get_primary_attribute_names(): print " ", attr
def run(self, year, job_set, control_totals, job_building_types, data_objects=None, resources=None): self._do_initialize_for_run(job_set, job_building_types, data_objects) idx = where(control_totals.get_attribute("year")==year)[0] self.control_totals_for_this_year = DatasetSubset(control_totals, idx) self._do_run_for_this_year(job_set) return self._update_job_set(job_set)
def run(self, realestate_dataset, year=None, occupied_spaces_variable="occupied_units", total_spaces_variable="total_units", target_attribute_name='target_vacancy_rate', sample_from_dataset = None, sample_filter="", reset_attribute_value={}, year_built = 'year_built', dataset_pool=None, append_to_realestate_dataset = False, table_name = "development_projects", dataset_name = "development_project", id_name = 'development_project_id', **kwargs): """ sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning append_to_realestate_dataset - whether to append the new dataset to realestate_dataset """ if self.target_vancy_dataset is None: raise RuntimeError, "target_vacancy_rate dataset is unspecified." if not sample_from_dataset: sample_from_dataset = realestate_dataset #if dataset_pool is None: # dataset_pool = SessionConfiguration().get_dataset_pool() alldata = dataset_pool.get_dataset('alldata') unit_names = dataset_pool.get_dataset('building_type').get_attribute('unit_name') sqft_per_job = dataset_pool.get_dataset('building_sqft_per_job') zones = realestate_dataset.compute_variables("building.disaggregate(parcel.zone_id)") type_ids = realestate_dataset.get_attribute("building_type_id") building_sqft_per_job_table = sqft_per_job.get_building_sqft_as_table(zones.max(), type_ids.max()) if year is None: year = SimulationState().get_current_time() this_year_index = where(self.target_vancy_dataset.get_attribute('year')==year)[0] target_vacancy_for_this_year = DatasetSubset(self.target_vancy_dataset, this_year_index) column_names = list(set( self.target_vancy_dataset.get_known_attribute_names() ) - set( [ target_attribute_name, occupied_spaces_variable, total_spaces_variable, 'year', '_hidden_id_'] )) column_names.sort(reverse=True) column_values = dict([ (name, target_vacancy_for_this_year.get_attribute(name)) for name in column_names + [target_attribute_name]]) independent_variables = list(set([re.sub('_max$', '', re.sub('_min$', '', col)) for col in column_names])) dataset_known_attributes = realestate_dataset.get_known_attribute_names() sample_dataset_known_attributes = sample_from_dataset.get_known_attribute_names() for variable in independent_variables: if variable not in dataset_known_attributes: realestate_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool) if variable not in sample_dataset_known_attributes: sample_from_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool) dataset_known_attributes = realestate_dataset.get_known_attribute_names() #update after compute if sample_filter: short_name = VariableName(sample_filter).get_alias() if short_name not in dataset_known_attributes: filter_indicator = sample_from_dataset.compute_variables(sample_filter, dataset_pool=dataset_pool) else: filter_indicator = sample_from_dataset.get_attribute(short_name) else: filter_indicator = 1 sampled_index = array([], dtype=int32) #log header if PrettyTable is not None: status_log = PrettyTable() status_log.set_field_names(column_names + ["actual", "target", "difference", "action"]) else: logger.log_status("\t".join(column_names + ["actual", "target", "difference", "action"])) error_log = '' for index in range(target_vacancy_for_this_year.size()): this_sampled_index = array([], dtype=int32) indicator = ones( realestate_dataset.size(), dtype='bool' ) sample_indicator = ones( sample_from_dataset.size(), dtype='bool' ) criterion = {} # for logging for attribute in independent_variables: if attribute in dataset_known_attributes: dataset_attribute = realestate_dataset.get_attribute(attribute) sample_attribute = sample_from_dataset.get_attribute(attribute) else: raise ValueError, "attribute %s used in target vacancy dataset can not be found in dataset %s" % (attribute, realestate_dataset.get_dataset_name()) if attribute + '_min' in column_names: amin = target_vacancy_for_this_year.get_attribute(attribute+'_min')[index] criterion.update({attribute + '_min':amin}) if amin != -1: indicator *= dataset_attribute >= amin sample_indicator *= sample_attribute >= amin if attribute + '_max' in column_names: amax = target_vacancy_for_this_year.get_attribute(attribute+'_max')[index] criterion.update({attribute + '_max':amax}) if amax != -1: indicator *= dataset_attribute <= amax sample_indicator *= sample_attribute <= amax if attribute in column_names: aval = column_values[attribute][index] criterion.update({attribute:aval}) if aval == -1: continue elif aval == -2: ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column indicator *= logical_not(ismember(dataset_attribute, column_values[attribute])) sample_indicator *= logical_not(ismember(sample_attribute, column_values[attribute])) else: indicator *= dataset_attribute == aval sample_indicator *= sample_attribute == aval this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable ## total/occupied_spaces_variable can be specified either as a universal name for all realestate ## or in targe_vacancy_rate dataset for each vacancy category if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(): this_occupied_spaces_variable = target_vacancy_for_this_year.get_attribute(occupied_spaces_variable)[index] if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(): this_total_spaces_variable = target_vacancy_for_this_year.get_attribute(total_spaces_variable)[index] logger.be_quiet() #temporarily disable logging realestate_dataset.compute_one_variable_with_unknown_package(this_occupied_spaces_variable, dataset_pool=dataset_pool) realestate_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool) sample_from_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool) if unit_names[index]=="residential_units": num_units = alldata.compute_variables("alldata.aggregate_all(household.building_type_id==%s)" % (index+1)) #persons = household_set.compute_variables("%s.number_of_agents(%s)" % (hh_ds_name, person_ds_name), resources=resources) num_units = num_units[0] else: num_units = alldata.compute_variables("alldata.aggregate_all(job.disaggregate(employment_submarket.building_type_id)==%s)" % (index+1)) num_units = num_units * building_sqft_per_job_table[1, (index+1)] num_units = num_units[0] #need to make sure that job empsubmarket doesn't rely on building... #Must do non-home-based jobs only and then multiply by building_sqft logger.talk() actual_num = (indicator * realestate_dataset.get_attribute(this_total_spaces_variable)).sum() #target_num = int(round( (indicator * realestate_dataset.get_attribute(this_occupied_spaces_variable)).sum() /\ target_num = int(round( num_units /\ (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]) )) diff = target_num - actual_num if diff > 0: total_spaces_in_sample_dataset = sample_from_dataset.get_attribute(this_total_spaces_variable) legit_index = where(logical_and(sample_indicator, filter_indicator) * total_spaces_in_sample_dataset > 0)[0] if legit_index.size > 0: mean_size = total_spaces_in_sample_dataset[legit_index].mean() num_of_projects_to_sample = int( diff / mean_size ) ##sampled at least 1 project when diff > 0, otherwise it is a endless loop when num_of_projects_to_sample = 0 num_of_projects_to_sample = num_of_projects_to_sample if num_of_projects_to_sample > 0 else 1 while total_spaces_in_sample_dataset[this_sampled_index].sum() < diff: lucky_index = sample_replace(legit_index, num_of_projects_to_sample) this_sampled_index = concatenate((this_sampled_index, lucky_index)) this_sampled_index = this_sampled_index[0:(1+searchsorted(cumsum(total_spaces_in_sample_dataset[this_sampled_index]), diff))] sampled_index = concatenate((sampled_index, this_sampled_index)) else: error_log += "There is nothing to sample from %s and no new development will happen for " % sample_from_dataset.get_dataset_name() + \ ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n' #if diff < 0: #TODO demolition; not yet supported ##log status action = "0" if this_sampled_index.size > 0: action_num = total_spaces_in_sample_dataset[this_sampled_index].sum() if diff > 0: action = "+" + str(action_num) if diff < 0: action = "-" + str(action_num) cat = [ str(criterion[col]) for col in column_names] cat += [str(actual_num), str(target_num), str(diff), action] if PrettyTable is not None: status_log.add_row(cat) else: logger.log_status("\t".join(cat)) if PrettyTable is not None: logger.log_status("\n" + status_log.get_string()) if error_log: logger.log_error(error_log) result_data = {} result_dataset = None index = array([], dtype='int32') if sampled_index.size > 0: ### ideally duplicate_rows() is all needed to add newly cloned rows ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data ##realestate_dataset.duplicate_rows(sampled_index) result_data.setdefault(year_built, resize(year, sampled_index.size).astype('int32')) ## also add 'independent_variables' to the new dataset for attribute in set(sample_from_dataset.get_primary_attribute_names() + independent_variables): if reset_attribute_value.has_key(attribute): result_data[attribute] = resize(array(reset_attribute_value[attribute]), sampled_index.size) else: result_data[attribute] = sample_from_dataset.get_attribute_by_index(attribute, sampled_index) if id_name and result_data and id_name not in result_data: result_data[id_name] = arange(sampled_index.size, dtype='int32') + 1 storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name=table_name, table_data=result_data) result_dataset = Dataset(id_name = id_name, in_storage = storage, in_table_name = table_name, dataset_name = dataset_name ) index = arange(result_dataset.size()) if append_to_realestate_dataset: if len(result_data) > 0: index = realestate_dataset.add_elements(result_data, require_all_attributes=False, change_ids_if_not_unique=True) result_dataset = realestate_dataset return (result_dataset, index)
] }) #CacheScenarioDatabase().run(gridcell_config) # step 2 cache water demand data by dbcon = ScenarioDatabase(database_name="water_demand_seattle2") print "Create Storage object." from opus_core.storage_factory import StorageFactory storage = StorageFactory().get_storage(type="mysql_storage", storage_location=dbcon) from waterdemand.datasets.consumption_dataset import ConsumptionDataset consumption_types = ['wrmr', 'wcsr', 'wrsr'] #'wcmr' for consumption_type in consumption_types: consumption = ConsumptionDataset(in_storage=storage, in_table_name=consumption_type + '_grid') for year in range(1990, 2001): print "%s %s" % (consumption_type, year) year_index = where(consumption.get_attribute("billyear") == year) out_storage = StorageFactory().get_storage( type="flt_storage", storage_location=os.path.join(cache_directory, str(year))) consumption_subset = DatasetSubset(consumption, year_index) consumption_subset.write_dataset( out_storage=out_storage, out_table_name=consumption_type.lower())
class EmploymentTransitionModel(Model): """Creates and removes jobs from job_set.""" model_name = "Employment Transition Model" location_id_name_default = "grid_id" variable_package_default = "urbansim" def __init__(self, location_id_name=None, variable_package=None, dataset_pool=None, debuglevel=0): self.debug = DebugPrinter(debuglevel) self.location_id_name = self.location_id_name_default self.variable_package = self.variable_package_default if location_id_name is not None: self.location_id_name = location_id_name if variable_package is not None: self.variable_package = variable_package self.dataset_pool = self.create_dataset_pool(dataset_pool, ["urbansim", "opus_core"]) def run(self, year, job_set, control_totals, job_building_types, data_objects=None, resources=None): self._do_initialize_for_run(job_set, job_building_types, data_objects) idx = where(control_totals.get_attribute("year")==year)[0] self.control_totals_for_this_year = DatasetSubset(control_totals, idx) self._do_run_for_this_year(job_set) return self._update_job_set(job_set) def _do_initialize_for_run(self, job_set, job_building_types, data_objects=None): self.max_id = job_set.get_id_attribute().max() self.job_size = job_set.size() self.job_id_name = job_set.get_id_name()[0] self.new_jobs = { self.location_id_name:array([], dtype=job_set.get_data_type(self.location_id_name, int32)), "sector_id":array([], dtype=job_set.get_data_type("sector_id", int32)), self.job_id_name:array([], dtype=job_set.get_data_type(self.job_id_name, int32)), "building_type":array([], dtype=job_set.get_data_type("building_type", int8)) } self.remove_jobs = array([], dtype=int32) if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) self.dataset_pool.add_datasets_if_not_included({job_building_types.get_dataset_name():job_building_types}) self.available_building_types = job_building_types.get_id_attribute() def _compute_sector_variables(self, sectors, job_set): compute_resources = Resources({"debug":self.debug}) job_set.compute_variables( map(lambda x: "%s.%s.is_in_employment_sector_%s_home_based" % (self.variable_package, job_set.get_dataset_name(), x), sectors) + map(lambda x: "%s.%s.is_in_employment_sector_%s_non_home_based" % (self.variable_package, job_set.get_dataset_name(), x), sectors) + ["is_non_home_based_job", "is_home_based_job"], dataset_pool = self.dataset_pool, resources = compute_resources) def _do_run_for_this_year(self, job_set): building_type = job_set.get_attribute("building_type") sectors = unique(self.control_totals_for_this_year.get_attribute("sector_id")) self._compute_sector_variables(sectors, job_set) for sector in sectors: isector = where(self.control_totals_for_this_year.get_attribute("sector_id") == sector)[0] total_hb_jobs = self.control_totals_for_this_year.get_attribute("total_home_based_employment")[isector] total_nhb_jobs = self.control_totals_for_this_year.get_attribute("total_non_home_based_employment")[isector] is_in_sector_hb = job_set.get_attribute("is_in_employment_sector_%s_home_based" % sector) is_in_sector_nhb = job_set.get_attribute("is_in_employment_sector_%s_non_home_based" % sector) diff_hb = int(total_hb_jobs - is_in_sector_hb.astype(int8).sum()) diff_nhb = int(total_nhb_jobs - is_in_sector_nhb.astype(int8).sum()) if diff_hb < 0: # home based jobs to be removed w = where(is_in_sector_hb == 1)[0] sample_array, non_placed, size_non_placed = \ get_array_without_non_placed_agents(job_set, w, -1*diff_hb, self.location_id_name) self.remove_jobs = concatenate((self.remove_jobs, non_placed, sample_noreplace(sample_array, max(0,abs(diff_hb)-size_non_placed)))) if diff_nhb < 0: # non home based jobs to be removed w = where(is_in_sector_nhb == 1)[0] sample_array, non_placed, size_non_placed = \ get_array_without_non_placed_agents(job_set, w, -1*diff_nhb, self.location_id_name) self.remove_jobs = concatenate((self.remove_jobs, non_placed, sample_noreplace(sample_array, max(0,abs(diff_nhb)-size_non_placed)))) if diff_hb > 0: # home based jobs to be created self.new_jobs[self.location_id_name] = concatenate((self.new_jobs[self.location_id_name], zeros((diff_hb,), dtype=self.new_jobs[self.location_id_name].dtype.type))) self.new_jobs["sector_id"] = concatenate((self.new_jobs["sector_id"], (resize(array([sector], dtype=self.new_jobs["sector_id"].dtype.type), diff_hb)))) if 1 in is_in_sector_hb: building_type_distribution = array(ndimage_sum(is_in_sector_hb, labels=building_type, index=self.available_building_types)) elif 1 in job_set.get_attribute("is_home_based_job"): # take the building type distribution from the whole region building_type_distribution = array(ndimage_sum( job_set.get_attribute("is_home_based_job"), labels=building_type, index=self.available_building_types)) else: # there are no home-based jobs in the region, take uniform distribution building_type_distribution = ones(self.available_building_types.size) building_type_distribution = building_type_distribution/building_type_distribution.sum() sampled_building_types = probsample_replace( self.available_building_types, diff_hb, building_type_distribution/ float(building_type_distribution.sum())) self.new_jobs["building_type"] = concatenate((self.new_jobs["building_type"], sampled_building_types.astype(self.new_jobs["building_type"].dtype.type))) new_max_id = self.max_id + diff_hb self.new_jobs[self.job_id_name] = concatenate((self.new_jobs[self.job_id_name], arange(self.max_id+1, new_max_id+1))) self.max_id = new_max_id if diff_nhb > 0: # non home based jobs to be created self.new_jobs[self.location_id_name]=concatenate((self.new_jobs[self.location_id_name], zeros((diff_nhb,), dtype=self.new_jobs[self.location_id_name].dtype.type))) self.new_jobs["sector_id"]=concatenate((self.new_jobs["sector_id"], (resize(array([sector], dtype=self.new_jobs["sector_id"].dtype.type), diff_nhb)))) if 1 in is_in_sector_nhb: building_type_distribution = array(ndimage_sum(is_in_sector_nhb, labels=building_type, index=self.available_building_types)) elif 1 in job_set.get_attribute("is_non_home_based_job"): # take the building type distribution from the whole region building_type_distribution = array(ndimage_sum( job_set.get_attribute("is_non_home_based_job"), labels=building_type, index=self.available_building_types)) else: # there are no non-home-based jobs in the region, take uniform distribution building_type_distribution = ones(self.available_building_types.size) building_type_distribution = building_type_distribution/building_type_distribution.sum() sampled_building_types = probsample_replace( self.available_building_types, diff_nhb, building_type_distribution/ float(building_type_distribution.sum())) self.new_jobs["building_type"] = concatenate((self.new_jobs["building_type"], sampled_building_types.astype(self.new_jobs["building_type"].dtype.type))) new_max_id = self.max_id+diff_nhb self.new_jobs[self.job_id_name]=concatenate((self.new_jobs[self.job_id_name], arange(self.max_id+1, new_max_id+1))) self.max_id = new_max_id def _update_job_set(self, job_set): job_set.remove_elements(self.remove_jobs) job_set.add_elements(self.new_jobs, require_all_attributes=False) difference = job_set.size()-self.job_size self.debug.print_debug("Difference in number of jobs: %s (original %s," " new %s, created %s, deleted %s)" % (difference, self.job_size, job_set.size(), self.new_jobs[self.job_id_name].size, self.remove_jobs.size), 3) self.debug.print_debug("Number of unplaced jobs: %s" % where(job_set.get_attribute(self.location_id_name) <=0)[0].size, 3) return difference def prepare_for_run(self, storage, **kwargs): from urbansim.datasets.control_total_dataset import ControlTotalDataset control_totals = ControlTotalDataset(in_storage=storage, what="employment") sample_control_totals(storage, control_totals, **kwargs) return control_totals
def run(self, n=500, run_config=None, current_year=None, debuglevel=0): """ n - sample n proposals at a time, evaluate them one by one """ self.demolished_buildings = array([], dtype='int32') #id of buildings to be demolished if current_year is None: current_year = SimulationState().get_current_time() if not self.positive_proposals: logger.log_status("Proposal Set size <= 0, no proposals to consider, skipping DPPSM.") return (self.proposal_set, self.demolished_buildings) self.proposal_component_set.compute_variables([ 'urbansim_parcel.development_project_proposal_component.units_proposed', 'urbansim_parcel.development_project_proposal_component.is_residential'], dataset_pool=self.dataset_pool) self.proposal_set.compute_variables([ 'urbansim_parcel.development_project_proposal.number_of_components', 'zone_id=development_project_proposal.disaggregate(parcel.zone_id)', #'occurence_frequency = development_project_proposal.disaggregate(development_template.sample_size)' ], dataset_pool=self.dataset_pool) buildings = self.dataset_pool.get_dataset("building") buildings.compute_variables([ "occupied_units_for_jobs = urbansim_parcel.building.number_of_non_home_based_jobs", "units_for_jobs = urbansim_parcel.building.total_non_home_based_job_space", "occupied_residential_units = urbansim_parcel.building.number_of_households", # "urbansim_parcel.building.existing_units", "urbansim_parcel.building.is_residential" ], dataset_pool=self.dataset_pool) ## define unit_name by whether a building is residential or not (with is_residential attribute) ## if it is non-residential (0), count units by number of job spaces (units_for_jobs) ## if it is residential (1), count units by residenital units self.unit_name = array(["units_for_jobs", "residential_units"]) target_vacancy = self.dataset_pool.get_dataset('target_vacancy') target_vacancy.compute_variables(['is_residential = target_vacancy.disaggregate(building_type.is_residential)'], dataset_pool=self.dataset_pool) # This try-except block checks to see if the object has a subarea_id_name, # if it does, it calculates the vacancy rates by subarea_id_name try: # Check for subarea_id_name in target_vacancies dataset # if it is present, vacancy rates are specified by subarea_id_name # if it is not, vacancy rates are specified region wide target_vacancy.load_dataset() if self.subarea_id_name in target_vacancy.get_attribute_names(): current_target_vacancy_this_year = DatasetSubset(target_vacancy, index=where(target_vacancy.get_attribute("year")==current_year)[0]) current_target_vacancy = DatasetSubset(current_target_vacancy_this_year, index=where(current_target_vacancy_this_year.get_attribute(self.subarea_id_name)==self.area_id)[0]) else: current_target_vacancy = DatasetSubset(target_vacancy, index=where(target_vacancy.get_attribute("year")==current_year)[0]) except AttributeError: # vacancy rates are specified region wide: current_target_vacancy = DatasetSubset(target_vacancy, index=where(target_vacancy.get_attribute("year")==current_year)[0]) if current_target_vacancy.size() == 0: raise IOError, 'No target vacancy defined for year %s.' % current_year self.existing_units = {} #total existing units by land_use type self.occupied_units = {} #total occupied units by land_use type self.proposed_units = {} #total proposed units by land_use type self.demolished_units = {} #total (to be) demolished units by land_use type components_building_type_ids = self.proposal_component_set.get_attribute("building_type_id").astype("int32") proposal_ids = self.proposal_set.get_id_attribute() proposal_ids_in_component_set = self.proposal_component_set.get_attribute("proposal_id") all_units_proposed = self.proposal_component_set.get_attribute("units_proposed") number_of_components_in_proposals = self.proposal_set.get_attribute("number_of_components") self.accepting_proposals = zeros(current_target_vacancy.get_attribute("building_type_id").max()+1, dtype='bool8') #whether accepting new proposals, for each building type self.accepted_proposals = [] # index of accepted proposals self.target_vacancies = {} tv_building_types = current_target_vacancy.get_attribute("building_type_id") tv_rate = current_target_vacancy.get_attribute("target_vacancy_rate") for itype in range(tv_building_types.size): self.target_vacancies[tv_building_types[itype]] = tv_rate[itype] self.check_vacancy_rates(current_target_vacancy) #initialize self.accepting_proposal based on current vacancy rate sqft_per_job = self.dataset_pool.get_dataset("building_sqft_per_job") zones_of_proposals = self.proposal_set.get_attribute("zone_id") self.building_sqft_per_job_table = sqft_per_job.get_building_sqft_as_table(zones_of_proposals.max(), tv_building_types.max()) # consider only those proposals that have all components of accepted type and sum of proposed units > 0 is_accepted_type = self.accepting_proposals[components_building_type_ids] sum_is_accepted_type_over_proposals = array(ndimage.sum(is_accepted_type, labels = proposal_ids_in_component_set, index = proposal_ids)) sum_of_units_proposed = array(ndimage.sum(all_units_proposed, labels = proposal_ids_in_component_set, index = proposal_ids)) is_proposal_eligible = logical_and(sum_is_accepted_type_over_proposals == number_of_components_in_proposals, sum_of_units_proposed > 0) is_proposal_eligible = logical_and(is_proposal_eligible, self.proposal_set.get_attribute("start_year")==current_year ) ## handle planned proposals: all proposals with status_id == is_planned ## and start_year == current_year are accepted planned_proposal_indexes = where(logical_and( self.proposal_set.get_attribute("status_id") == self.proposal_set.id_planned, self.proposal_set.get_attribute("start_year") == current_year ) )[0] self.consider_proposals(planned_proposal_indexes, force_accepting=True) # consider proposals (in this order: planned, proposed, tentative) for status in [self.proposal_set.id_proposed, self.proposal_set.id_tentative]: idx = where(logical_and(self.proposal_set.get_attribute("status_id") == status, is_proposal_eligible))[0] if idx.size <= 0: continue logger.log_status("Sampling from %s eligible proposals with status %s." % (idx.size, status)) while (True in self.accepting_proposals): if self.weight[idx].sum() == 0.0: logger.log_warning("Running out of proposals; there aren't any proposals with non-zero weight") break idx = idx[self.weight[idx] > 0] n = minimum(idx.size, n) sampled_proposal_indexes = probsample_noreplace(proposal_ids[idx], n, prob_array=(self.weight[idx]/float(self.weight[idx].sum())), exclude_index=None, return_index=True) self.consider_proposals(arange(self.proposal_set.size())[idx[sampled_proposal_indexes]]) self.weight[idx[sampled_proposal_indexes]] = 0 # set status of accepted proposals to 'active' self.proposal_set.modify_attribute(name="status_id", data=self.proposal_set.id_active, index=array(self.accepted_proposals, dtype='int32')) building_types = self.dataset_pool.get_dataset("building_type") logger.log_status("Status of %s development proposals set to active." % len(self.accepted_proposals)) logger.log_status("Target/existing vacancy rates (reached using eligible proposals) by building type:") for type_id in self.existing_units.keys(): units_stock = self._get_units_stock(type_id) vr = self._get_vacancy_rates(type_id) ## units = residential_units if building_type is residential ## units = number of job spaces if building_type is non-residential logger.log_status( """%(type_id)s[%(type_name)s]: %(vr)s = ((existing_units:%(existing_units)s + units_proposed:%(units_proposed)s - units_to_be_demolished:%(units_demolished)s) - units_occupied:%(units_occupied)s) / units_stock:%(units_stock)s""" % \ { 'type_id': type_id, 'type_name': building_types.get_attribute_by_id("building_type_name", type_id), 'vr': vr, 'existing_units': int(self.existing_units[type_id]), 'units_occupied': int(self.occupied_units[type_id]), 'units_proposed': int(self.proposed_units[type_id]), 'units_demolished': int(self.demolished_units[type_id]), 'units_stock': int(units_stock) } ) # Code added by Jesse Ayers, MAG, 7/20/2009 # Get the active projects: stat_id = self.proposal_set.get_attribute('status_id') actv = where(stat_id==1)[0] # Where there are active projects, compute the total_land_area_taken # and store it on the development_project_proposals dataset # so it can be used by the building_construction_model for the proper # computation of units_proposed for those projects with velocity curves if actv.size > 0: total_land_area_taken_computed = self.proposal_set.get_attribute('urbansim_parcel.development_project_proposal.land_area_taken') self.proposal_set.modify_attribute('total_land_area_taken', total_land_area_taken_computed[actv], actv) return (self.proposal_set, self.demolished_buildings)
print flt_directory_out test_flag = options.test_flag # shutil.rmtree(flt_directory_out) # os.mkdir(flt_directory_out) logger.log_status("Convert input data from ", str(input_year)) lc = LandCoverDataset(in_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_directory_in), out_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_directory_out)) lc.get_header() # added 23 june 2009 by mm mask = lc.get_mask() idx = where(mask==0)[0] lcsubset = DatasetSubset(lc, idx) print "Converting:" lcsubset.write_dataset(attributes=["relative_x"], out_table_name="land_covers") #lcsubset.write_dataset(attributes=["relative_x"], out_table_name="land_covers", # valuetypes=valuetypes) lc.delete_one_attribute("relative_x") lcsubset.write_dataset(attributes=["relative_y"], out_table_name="land_covers") #lcsubset.write_dataset(attributes=["relative_y"], out_table_name="land_covers", # valuetypes=valuetypes) lc.delete_one_attribute("relative_y") # srcdir = os.path.join(flt_directory_out, "land_covers", "computed") # shutil.move(os.path.join(srcdir,"relative_x.li4"), os.path.join(flt_directory_out, "land_covers")) # shutil.move(os.path.join(srcdir,"relative_y.li4"), os.path.join(flt_directory_out, "land_covers")) # shutil.rmtree(srcdir) for attr in lc.get_primary_attribute_names(): print " ", attr
def _do_run(self, location_set, agent_set, agents_index, data_objects=None, resources=None): location_id_name = location_set.get_id_name()[0] jobsubset = DatasetSubset(agent_set, agents_index) if jobsubset.size() <= 0: return array([], dtype='int32') #unplace jobs agent_set.set_values_of_one_attribute( location_id_name, resize(array([-1.0]), jobsubset.size()), agents_index) sector_ids = jobsubset.get_attribute("sector_id") sectors = unique(sector_ids) counts = ndimage_sum(ones((jobsubset.size(), )), labels=sector_ids.astype('int32'), index=sectors.astype('int32')) if sectors.size <= 1: counts = array([counts]) variables = map(lambda x: "number_of_jobs_of_sector_" + str(int(x)), sectors) compute_variables = map( lambda var: self.variable_package + "." + location_set. get_dataset_name() + "." + var, variables) if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) self.dataset_pool.add_datasets_if_not_included( {agent_set.get_dataset_name(): agent_set}) location_set.compute_variables(compute_variables, dataset_pool=self.dataset_pool) if self.filter is None: location_index = arange(location_set.size()) else: filter_values = location_set.compute_variables( [self.filter], dataset_pool=self.dataset_pool) location_index = where(filter_values > 0)[0] if location_index.size <= 0: logger.log_status("No locations available. Nothing to be done.") return array([]) location_subset = DatasetSubset(location_set, location_index) i = 0 for sector in sectors: distr = location_subset.get_attribute(variables[i]) if ma.allclose(distr.sum(), 0): uniform_prob = 1.0 / distr.size distr = resize(array([uniform_prob], dtype='float64'), distr.size) logger.log_warning( "Probabilities in scaling model for sector " + str(sector) + " sum to 0.0. Substituting uniform distribution!") # random_sample = sample(location_set.get_attribute("grid_id"), k=int(counts[i]), \ # probabilities = distr) distr = distr / float(distr.sum()) random_sample = probsample_replace( location_subset.get_id_attribute(), size=int(counts[i]), prob_array=distr) idx = where(sector_ids == sector)[0] #modify job locations agent_set.set_values_of_one_attribute(location_id_name, random_sample, agents_index[idx]) i += 1 return agent_set.get_attribute_by_index(location_id_name, agents_index)
def test_agents_placed_in_appropriate_types(self): """Create 1000 unplaced industrial jobs and 1 commercial job. Allocate 50 commercial gridcells with enough space for 10 commercial jobs per gridcell. After running the EmploymentLocationChoiceModel, the 1 commercial job should be placed, but the 100 industrial jobs should remain unplaced """ storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='job_building_types', table_data = { 'id':array([2,1]), 'name': array(['commercial', 'industrial']) } ) job_building_types = JobBuildingTypeDataset(in_storage=storage, in_table_name='job_building_types') storage.write_table(table_name='jobs', table_data = { 'job_id': arange(1001)+1, 'grid_id': array([0]*1001), 'building_type': array([1]*1000 + [2]) } ) jobs = JobDataset(in_storage=storage, in_table_name='jobs') storage.write_table(table_name='gridcells', table_data = { 'grid_id': arange(50)+1, 'commercial_sqft': array([1000]*50), 'commercial_sqft_per_job': array([100]*50) } ) gridcells = GridcellDataset(in_storage=storage, in_table_name='gridcells') coefficients = Coefficients(names=("dummy",), values=(0.1,)) specification = EquationSpecification(variables=("gridcell.commercial_sqft",), coefficients=("dummy",)) compute_resources = Resources({"job":jobs, "job_building_type": job_building_types}) agents_index = where(jobs.get_attribute("grid_id") == 0) unplace_jobs = DatasetSubset(jobs, agents_index) agents_index = where(unplace_jobs.get_attribute("building_type") == 2)[0] gridcells.compute_variables(["urbansim.gridcell.number_of_commercial_jobs"], resources=compute_resources) commercial_jobs = gridcells.get_attribute("number_of_commercial_jobs") gridcells.compute_variables(["urbansim.gridcell.number_of_industrial_jobs"], resources=compute_resources) industrial_jobs = gridcells.get_attribute("number_of_industrial_jobs") model_group = ModelGroup(job_building_types, "name") elcm = EmploymentLocationChoiceModel(ModelGroupMember(model_group,"commercial"), location_set=gridcells, agents_grouping_attribute = "job.building_type", choices = "opus_core.random_choices_from_index", sample_size_locations = 30) elcm.run(specification, coefficients, agent_set = jobs, agents_index=agents_index, debuglevel=1) gridcells.compute_variables(["urbansim.gridcell.number_of_commercial_jobs"], resources=compute_resources) commercial_jobs = gridcells.get_attribute("number_of_commercial_jobs") gridcells.compute_variables(["urbansim.gridcell.number_of_industrial_jobs"], resources=compute_resources) industrial_jobs = gridcells.get_attribute("number_of_industrial_jobs") self.assertEqual(commercial_jobs.sum() == 1, True, "Error, there should only be a total of 1 commercial job") self.assertEqual(industrial_jobs.sum() == 0, True, "Error, there should be no industrial jobs because there's no space for them")
def run( self, realestate_dataset, year=None, occupied_spaces_variable="occupied_units", total_spaces_variable="total_units", target_attribute_name="target_vacancy_rate", sample_from_dataset=None, sample_filter="", reset_attribute_value={}, year_built="year_built", dataset_pool=None, append_to_realestate_dataset=False, table_name="development_projects", dataset_name="development_project", id_name="development_project_id", **kwargs ): """ sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning append_to_realestate_dataset - whether to append the new dataset to realestate_dataset """ if self.target_vancy_dataset is None: raise RuntimeError, "target_vacancy_rate dataset is unspecified." if not sample_from_dataset: sample_from_dataset = realestate_dataset # if dataset_pool is None: # dataset_pool = SessionConfiguration().get_dataset_pool() if year is None: year = SimulationState().get_current_time() this_year_index = where(self.target_vancy_dataset.get_attribute("year") == year)[0] target_vacancy_for_this_year = DatasetSubset(self.target_vancy_dataset, this_year_index) column_names = list( set(self.target_vancy_dataset.get_known_attribute_names()) - set([target_attribute_name, occupied_spaces_variable, total_spaces_variable, "year", "_hidden_id_"]) ) column_names.sort(reverse=True) column_values = dict( [ (name, target_vacancy_for_this_year.get_attribute(name)) for name in column_names + [target_attribute_name] ] ) independent_variables = list(set([re.sub("_max$", "", re.sub("_min$", "", col)) for col in column_names])) dataset_known_attributes = realestate_dataset.get_known_attribute_names() sample_dataset_known_attributes = sample_from_dataset.get_known_attribute_names() for variable in independent_variables: if variable not in dataset_known_attributes: realestate_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool) if variable not in sample_dataset_known_attributes: sample_from_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool) dataset_known_attributes = realestate_dataset.get_known_attribute_names() # update after compute if sample_filter: short_name = VariableName(sample_filter).get_alias() if short_name not in dataset_known_attributes: filter_indicator = sample_from_dataset.compute_variables(sample_filter, dataset_pool=dataset_pool) else: filter_indicator = sample_from_dataset.get_attribute(short_name) else: filter_indicator = 1 sampled_index = array([], dtype=int32) # log header if PrettyTable is not None: status_log = PrettyTable() status_log.set_field_names(column_names + ["actual", "target", "expected", "difference", "action"]) else: logger.log_status("\t".join(column_names + ["actual", "target", "expected", "difference", "action"])) error_log = "" for index in range(target_vacancy_for_this_year.size()): this_sampled_index = array([], dtype=int32) indicator = ones(realestate_dataset.size(), dtype="bool") sample_indicator = ones(sample_from_dataset.size(), dtype="bool") criterion = {} # for logging for attribute in independent_variables: if attribute in dataset_known_attributes: dataset_attribute = realestate_dataset.get_attribute(attribute) sample_attribute = sample_from_dataset.get_attribute(attribute) else: raise ValueError, "attribute %s used in target vacancy dataset can not be found in dataset %s" % ( attribute, realestate_dataset.get_dataset_name(), ) if attribute + "_min" in column_names: amin = target_vacancy_for_this_year.get_attribute(attribute + "_min")[index] criterion.update({attribute + "_min": amin}) if amin != -1: indicator *= dataset_attribute >= amin sample_indicator *= sample_attribute >= amin if attribute + "_max" in column_names: amax = target_vacancy_for_this_year.get_attribute(attribute + "_max")[index] criterion.update({attribute + "_max": amax}) if amax != -1: indicator *= dataset_attribute <= amax sample_indicator *= sample_attribute <= amax if attribute in column_names: aval = column_values[attribute][index] criterion.update({attribute: aval}) if aval == -1: continue elif ( aval == -2 ): ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column indicator *= logical_not(ismember(dataset_attribute, column_values[attribute])) sample_indicator *= logical_not(ismember(sample_attribute, column_values[attribute])) else: indicator *= dataset_attribute == aval sample_indicator *= sample_attribute == aval this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable ## total/occupied_spaces_variable can be specified either as a universal name for all realestate ## or in targe_vacancy_rate dataset for each vacancy category if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(): this_occupied_spaces_variable = target_vacancy_for_this_year.get_attribute(occupied_spaces_variable)[ index ] if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(): this_total_spaces_variable = target_vacancy_for_this_year.get_attribute(total_spaces_variable)[index] this_total_spaces_variable += "_" + str(criterion[col]) this_occupied_spaces_variable += "_" + str(criterion[col]) logger.be_quiet() # temporarily disable logging realestate_dataset.compute_one_variable_with_unknown_package( this_occupied_spaces_variable, dataset_pool=dataset_pool ) realestate_dataset.compute_one_variable_with_unknown_package( this_total_spaces_variable, dataset_pool=dataset_pool ) sample_from_dataset.compute_one_variable_with_unknown_package( this_total_spaces_variable, dataset_pool=dataset_pool ) logger.talk() actual_num = (realestate_dataset.get_attribute(this_total_spaces_variable)).sum() # target_num is obsolete with this version. target_num = int( round( (realestate_dataset.get_attribute(this_occupied_spaces_variable)).sum() / (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]) ) ) """If the target vacancy is very small and the inflow to the region big it is not enough to check only the current simulation year's vacancy. The simulation is more robust if the BTM is anticipating the next year's population (of households and jobs). #TODO: Make code more general to cover various stratifications in the real estate market. """ if criterion[col] == 1: idx = where(self.control_totals.get_attribute("year") == year + 1)[0] this_years_control_totals = DatasetSubset(self.control_totals, idx) expected_num = int( round( this_years_control_totals.get_attribute("total_number_of_households").sum() / (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]) ) ) if criterion[col] == 0: idx = where(self.employment_control_totals.get_attribute("year") == year + 1)[0] next_years_control_totals = DatasetSubset(self.employment_control_totals, idx) expected_num = int( round( next_years_control_totals.get_attribute("number_of_jobs").sum() / (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]) ) ) diff = expected_num - actual_num # Previous version which is checking the current years occupation. # diff = target_num - actual_num if diff > 0: total_spaces_in_sample_dataset = sample_from_dataset.get_attribute(this_total_spaces_variable) legit_index = where( logical_and(sample_indicator, filter_indicator) * total_spaces_in_sample_dataset > 0 )[0] if legit_index.size > 0: mean_size = total_spaces_in_sample_dataset[legit_index].mean() num_of_projects_to_sample = int(diff / mean_size) ##sampled at least 1 project when diff > 0, otherwise it is a endless loop when num_of_projects_to_sample = 0 num_of_projects_to_sample = num_of_projects_to_sample if num_of_projects_to_sample > 0 else 1 while total_spaces_in_sample_dataset[this_sampled_index].sum() < diff: lucky_index = sample_replace(legit_index, num_of_projects_to_sample) this_sampled_index = concatenate((this_sampled_index, lucky_index)) this_sampled_index = this_sampled_index[ 0 : (1 + searchsorted(cumsum(total_spaces_in_sample_dataset[this_sampled_index]), diff)) ] sampled_index = concatenate((sampled_index, this_sampled_index)) else: error_log += ( "There is nothing to sample from %s and no new development will happen for " % sample_from_dataset.get_dataset_name() + ",".join([col + "=" + str(criterion[col]) for col in column_names]) + "\n" ) # if diff < 0: #TODO demolition; not yet supported ##log status action = "0" if this_sampled_index.size > 0: action_num = total_spaces_in_sample_dataset[this_sampled_index].sum() if diff > 0: action = "+" + str(action_num) if diff < 0: action = "-" + str(action_num) cat = [str(criterion[col]) for col in column_names] cat += [str(actual_num), str(target_num), str(expected_num), str(diff), action] if PrettyTable is not None: status_log.add_row(cat) else: logger.log_status("\t".join(cat)) if PrettyTable is not None: logger.log_status("\n" + status_log.get_string()) if error_log: logger.log_error(error_log) result_data = {} result_dataset = None index = array([], dtype="int32") if True: # sampled_index.size > 0: ### ideally duplicate_rows() is all needed to add newly cloned rows ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data ##realestate_dataset.duplicate_rows(sampled_index) result_data.setdefault(year_built, resize(year, sampled_index.size).astype("int32")) ## also add 'independent_variables' to the new dataset for attribute in set(sample_from_dataset.get_primary_attribute_names() + independent_variables): if reset_attribute_value.has_key(attribute): result_data[attribute] = resize(array(reset_attribute_value[attribute]), sampled_index.size) else: result_data[attribute] = sample_from_dataset.get_attribute_by_index(attribute, sampled_index) if id_name and result_data and id_name not in result_data: result_data[id_name] = arange(sampled_index.size, dtype="int32") + 1 storage = StorageFactory().get_storage("dict_storage") storage.write_table(table_name=table_name, table_data=result_data) result_dataset = Dataset( id_name=id_name, in_storage=storage, in_table_name=table_name, dataset_name=dataset_name ) index = arange(result_dataset.size()) if append_to_realestate_dataset: if len(result_data) > 0: index = realestate_dataset.add_elements( result_data, require_all_attributes=False, change_ids_if_not_unique=True ) result_dataset = realestate_dataset return (result_dataset, index)
def prepare_for_run(self, dataset_pool, create_proposal_set=True, parcel_filter_for_new_development=None, parcel_filter_for_redevelopment=None, template_filter=None, spec_replace_module_variable_pair=None, proposed_units_variable="urbansim_parcel.development_project_proposal.units_proposed", **kwargs): """create development project proposal dataset from parcels and development templates. spec_replace_module_variable_pair is a tuple with two elements: module name, variable within the module that contans a dictionary of model variables to be replaced in the specification. """ specification, coefficients, dummy = RegressionModel.prepare_for_run(self, **kwargs) try: existing_proposal_set_parent = dataset_pool.get_dataset('development_project_proposal') #load proposals whose status_id are not of id_tentative or id_not_available available_idx = where(logical_and(existing_proposal_set_parent.get_attribute("status_id") != DevelopmentProjectProposalDataset.id_tentative, existing_proposal_set_parent.get_attribute("status_id") != DevelopmentProjectProposalDataset.id_not_available))[0] existing_proposal_set = DatasetSubset(existing_proposal_set_parent, available_idx) # Code updated by Hanyi Li, MAG 6/8/2010 # Replacing the cached 'development_project_proposal' dataset with # the filtered dataset 'existing_proposal_set' dataset_pool.replace_dataset(existing_proposal_set_parent.get_dataset_name(), existing_proposal_set) except: existing_proposal_set = None parcels = dataset_pool.get_dataset('parcel') templates = dataset_pool.get_dataset('development_template') # It is important that during this method no variable flushing happens, since # we create datasets of the same name for different purposes (new development and redevelopment) # and flushing would mix them up flush_variables_current = SessionConfiguration().get('flush_variables', False) SessionConfiguration().put_data({'flush_variables': False}) # Code added by Jesse Ayers, MAG, 9/14/2009 # Getting an index of parcels that have actively developing projects (those on a velocity function) # and making sure that new proposals are not generated for them if existing_proposal_set: parcels_with_proposals = existing_proposal_set.get_attribute('parcel_id') parcels_with_proposals_idx = parcels.get_id_index(parcels_with_proposals) if parcel_filter_for_new_development is not None: if parcel_filter_for_new_development[parcel_filter_for_new_development.find('=')+1] == '=': filter = 'flter = numpy.logical_and(parcel.number_of_agents(development_project_proposal) == 0, %s)' % parcel_filter_for_new_development else: parcel_filter_for_new_development = parcel_filter_for_new_development[parcel_filter_for_new_development.find('=')+1:].lstrip() filter = 'flter = numpy.logical_and(parcel.number_of_agents(development_project_proposal) == 0, %s)' % parcel_filter_for_new_development index1 = where(parcels.compute_variables(filter))[0] else: if parcel_filter_for_new_development is not None: index1 = where(parcels.compute_variables(parcel_filter_for_new_development))[0] else: index1 = None if template_filter is not None: try: index2 = where(templates.compute_variables(template_filter))[0] except Exception, e: logger.log_warning( "template_filter is set to %s, but there is an error when computing it: %s" % (template_filter, e) ) index2 = None
def get_active_agent_set(self): """Return agent set that make choices in the model. Works only for the ChoiceModel class. """ agents = self.get_agent_set() return DatasetSubset(agents, self.get_agent_set_index())
def run(self, development_proposal_set, building_dataset, dataset_pool, buildings_to_be_demolished=[], consider_amount_built_in_parcels=False, current_year=None): self.demolish_buildings(buildings_to_be_demolished, building_dataset, dataset_pool) if development_proposal_set.size() <= 0: logger.log_status( "Proposal set is empty. Nothing to be constructed.") return development_proposal_set # load velocity function dataset try: velocity_function_set = dataset_pool.get_dataset( "velocity_function") except: velocity_function_set = None # choose active projects is_active = development_proposal_set.get_attribute( "status_id") == development_proposal_set.id_active is_delayed_or_active = logical_or( is_active, development_proposal_set.get_attribute("status_id") == development_proposal_set.id_with_velocity) active_idx = where(is_delayed_or_active)[0] if active_idx.size <= 0: logger.log_status("No new buildings built.") return development_proposal_set if current_year is None: current_year = SimulationState().get_current_time() active_proposal_set = DatasetSubset(development_proposal_set, active_idx) # create proposal_component_set from the active proposals proposal_component_set = create_from_proposals_and_template_components( active_proposal_set, dataset_pool.get_dataset('development_template_component')) dataset_pool.replace_dataset(proposal_component_set.get_dataset_name(), proposal_component_set) # determine building types and corresponding unit names of the involved building_types building_type_id = proposal_component_set.get_attribute( "building_type_id") building_type_set = dataset_pool.get_dataset("building_type") # unit_names = building_type_set.compute_variables([ # 'building_type.disaggregate(generic_building_type.unit_name)'], dataset_pool=dataset_pool) unit_names = building_type_set.get_attribute("unit_name") # get unique values of the involved generic building types and unique unit names unique_building_types = unique(building_type_id) index_in_building_types = building_type_set.get_id_index( unique_building_types) unit_names = unit_names[index_in_building_types] is_residential = building_type_set.get_attribute( "is_residential")[index_in_building_types] == 1 unique_unit_names = unique(unit_names) # determine existing units on parcels parcels = dataset_pool.get_dataset("parcel") parcels.compute_variables( ["urbansim_parcel.parcel.vacant_land_area"] + ["urbansim_parcel.parcel.residential_units"] + map(lambda x: "urbansim_parcel.parcel.%s" % x, unique_unit_names), dataset_pool=dataset_pool) parcel_is_lut_vacant = parcels.compute_variables( ["urbansim_parcel.parcel.is_land_use_type_vacant"], dataset_pool=dataset_pool) parcel_lut = parcels.get_attribute("land_use_type_id") component_land_use_types = proposal_component_set.compute_variables( [ 'development_project_proposal_component.disaggregate(development_template.land_use_type_id, [development_project_proposal])' ], dataset_pool=dataset_pool) # from the velocity function determine the amount to be built for each component (in %) if velocity_function_set is not None: cummulative_amount_of_development = proposal_component_set.compute_variables( [ "urbansim_parcel.development_project_proposal_component.cummulative_amount_of_development" ], dataset_pool=dataset_pool) percent_of_development_this_year = proposal_component_set.compute_variables( [ "urbansim_parcel.development_project_proposal_component.percent_of_development_this_year" ], dataset_pool=dataset_pool) else: # if there is no velocity function, all components have velocity of 100% percent_of_development_this_year = resize( array([100], dtype="int32"), proposal_component_set.size()) # amount to be built to_be_built = proposal_component_set.compute_variables( [ 'urbansim_parcel.development_project_proposal_component.units_proposed' ], dataset_pool=dataset_pool ) / 100.0 * percent_of_development_this_year # initializing for new buildings max_building_id = building_dataset.get_id_attribute().max() new_buildings = {} new_buildings["parcel_id"] = array([], dtype="int32") new_buildings["residential_units"] = array([], dtype="int32") new_buildings["non_residential_sqft"] = array([], dtype="int32") new_buildings["building_type_id"] = array([], dtype="int32") new_buildings["sqft_per_unit"] = array( [], dtype=building_dataset.get_attribute("sqft_per_unit").dtype) new_buildings["land_area"] = array( [], dtype=building_dataset.get_attribute("land_area").dtype) new_buildings["improvement_value"] = array( [], dtype=building_dataset.get_attribute("improvement_value").dtype) new_buildings["template_id"] = array([], dtype="int32") sqft_per_unit = proposal_component_set.get_attribute( "building_sqft_per_unit").astype( new_buildings["sqft_per_unit"].dtype) # Compute land_area_taken properly if velocity function is present if velocity_function_set is not None: larea_taken = proposal_component_set.compute_variables([ 'urbansim_parcel.development_project_proposal_component.land_area_taken' ], dataset_pool= dataset_pool ) pct_dev_this_yr_conv = (percent_of_development_this_year / 100.0) land_area_taken = larea_taken * pct_dev_this_yr_conv else: land_area_taken = proposal_component_set.compute_variables( [ 'urbansim_parcel.development_project_proposal_component.land_area_taken' ], dataset_pool=dataset_pool).astype( new_buildings["land_area"].dtype) construction_cost = proposal_component_set.compute_variables( [ 'urbansim_parcel.development_project_proposal_component.construction_cost' ], dataset_pool=dataset_pool).astype( new_buildings["improvement_value"].dtype) template_ids = proposal_component_set.get_attribute("template_id") number_of_new_buildings = {} number_of_new_buildings_by_template_id = {} # iterate over building types that are unique over the involved proposals for itype in range(unique_building_types.size): this_building_type = unique_building_types[itype] number_of_new_buildings[this_building_type] = 0 unit_name = unit_names[itype] if is_residential[itype]: unit_name = 'residential_units' component_index = where(building_type_id == this_building_type)[0] parcel_ids_in_components = proposal_component_set.get_attribute_by_index( "parcel_id", component_index) unique_parcels = unique(parcel_ids_in_components) # iterate over involved parcels for parcel_id in unique_parcels: pidx = component_index[parcel_ids_in_components == parcel_id] parcel_index = parcels.get_id_index(parcel_id) # what is already built on this parcel if consider_amount_built_in_parcels: amount_built = parcels.get_attribute_by_index( unit_name, parcel_index) else: amount_built = 0 # what is proposed on this parcel amount_proposed = to_be_built[pidx].sum() # build if needed if rint(amount_proposed) > amount_built: if unit_name == "residential_units": bunit = "residential_units" bnunit = "non_residential_sqft" else: bnunit = "residential_units" bunit = "non_residential_sqft" to_be_built_cumsum = rint(cumsum( to_be_built[pidx])).astype("int32") idx_to_be_built = where( to_be_built_cumsum > amount_built)[0] new_buildings["parcel_id"] = concatenate( (new_buildings["parcel_id"], array(idx_to_be_built.size * [parcel_id], dtype="int32"))) new_buildings[bunit] = concatenate( (new_buildings[bunit], rint(to_be_built[pidx][idx_to_be_built]).astype( new_buildings[bunit].dtype))) new_buildings[bnunit] = concatenate( (new_buildings[bnunit], array(idx_to_be_built.size * [0], dtype="int32"))) new_buildings["building_type_id"] = concatenate( (new_buildings["building_type_id"], array(idx_to_be_built.size * [this_building_type], dtype="int32"))) new_buildings["sqft_per_unit"] = concatenate( (new_buildings["sqft_per_unit"], sqft_per_unit[pidx][idx_to_be_built])) new_buildings["land_area"] = concatenate( (new_buildings["land_area"], land_area_taken[pidx][idx_to_be_built])) new_buildings["improvement_value"] = concatenate( (new_buildings["improvement_value"], construction_cost[pidx][idx_to_be_built])) new_buildings["template_id"] = concatenate( (new_buildings["template_id"], template_ids[pidx][idx_to_be_built])) number_of_new_buildings[ this_building_type] += idx_to_be_built.size if parcel_is_lut_vacant[parcel_index]: parcel_lut[parcel_index] = component_land_use_types[ pidx][idx_to_be_built][0] # count number of buildings by template ids for icomp in range(idx_to_be_built.size): if template_ids[pidx[idx_to_be_built[ icomp]]] not in number_of_new_buildings_by_template_id.keys( ): number_of_new_buildings_by_template_id[ template_ids[pidx[idx_to_be_built[icomp]]]] = 0 number_of_new_buildings_by_template_id[template_ids[ pidx[idx_to_be_built[icomp]]]] += 1 # add created buildings to the existing building dataset buildings_id_name = building_dataset.get_id_name()[0] new_buildings[buildings_id_name] = max_building_id + arange( 1, new_buildings["parcel_id"].size + 1) new_buildings['year_built'] = resize( array([current_year], dtype="int32"), new_buildings["parcel_id"].size) building_dataset.add_elements(new_buildings, require_all_attributes=False) if "zone_id" in building_dataset.get_known_attribute_names(): zone_ids = building_dataset.compute_variables( ['building.disaggregate(parcel.zone_id)'], dataset_pool=dataset_pool) building_dataset.modify_attribute(name="zone_id", data=zone_ids) if "county" in building_dataset.get_known_attribute_names(): county_ids = building_dataset.compute_variables( ['building.disaggregate(parcel.county)'], dataset_pool=dataset_pool) building_dataset.modify_attribute(name="county", data=county_ids) logger.log_status("%s new buildings built." % new_buildings["parcel_id"].size) for type_id in number_of_new_buildings.keys(): logger.log_status("building type %s: %s" % (type_id, number_of_new_buildings[type_id])) logger.log_status("Number of new buildings by template ids:") logger.log_status(number_of_new_buildings_by_template_id) # recompute the cummulative development amount if velocity_function_set is not None: # determine, if everything has been built or if it should be considered next year cummulative_amount_of_development = development_proposal_set.compute_variables( [ "development_project_proposal.aggregate(urbansim_parcel.development_project_proposal_component.cummulative_amount_of_development)/urbansim_parcel.development_project_proposal.number_of_components" ], dataset_pool=dataset_pool) else: # if there is no velocity function, all components have velocity of 100% ## TODO: need to be reviewed, probably by Hana ## changed from proposal_component_set to development_proposal_set ## so it will have the same shape as is_delayed_or_active cummulative_amount_of_development = resize( array([100], dtype="int32"), development_proposal_set.size()) will_be_delayed = cummulative_amount_of_development < 100 velocity_idx = where(logical_and(is_delayed_or_active, will_be_delayed))[0] if velocity_idx.size > 0: # for the unfinished projects set the status_id to id_with_velocity development_proposal_set.set_values_of_one_attribute( "status_id", development_proposal_set.id_with_velocity, index=velocity_idx) not_velocity_idx = where( logical_and(is_delayed_or_active, logical_not(will_be_delayed)))[0] if not_velocity_idx.size > 0: # for the remaining projects set the status_id to id_not_available development_proposal_set.set_values_of_one_attribute( "status_id", development_proposal_set.id_not_available, index=not_velocity_idx) dataset_pool._remove_dataset(proposal_component_set.get_dataset_name()) return development_proposal_set
class DevelopmentProjectTransitionModel( Model ): """ Creates development projects. Each development project is for a single type of development, e.g. 'industrial' or 'commercial'. This model creates enough development projects to match the desired vacancy rates, as defined in the target_vacancies table. It does not place any projects in locations; that is the job of the development project location choice models. The distribution of project sizes (amount of space, value of space) is determined by sampling from the projects in the development_event_history table. """ model_name = "Development Project Transition Model" def __init__( self, debuglevel=0 ): self.debug = DebugPrinter( debuglevel ) def pre_check( self, location_set, vacancy_table, types ): for ptype in types: self.check_for_space( location_set.get_attribute(self.variable_for_total_units[ptype])) self.check_target_vacancy_is_not_100_percent( vacancy_table.get_attribute( "target_total_vacancy")) def check_for_space( self, values ): """Check that this array of values sums to something > 0.""" self.do_check( "x > 0", array( [values.sum()] ) ) def check_target_vacancy_is_not_100_percent( self, value ): """Check that the target vacancy rate is not 100% (ratio == 1), because it doesn't make sense, and it also causes a divide by 0 error.""" self.do_check( "x < 1", value ) def run( self, vacancy_table, history_table, year, location_set, dataset_pool=None, resources=None ): self.dataset_pool=dataset_pool building_types = self.dataset_pool.get_dataset('building_type') target_vacancy_this_year = DatasetSubset(vacancy_table, index=where(vacancy_table.get_attribute("year")==year)[0]) building_type_ids = target_vacancy_this_year.get_attribute('building_type_id') building_type_idx = building_types.get_id_index(building_type_ids) self.used_building_types = DatasetSubset(building_types, index=building_type_idx) project_types = self.used_building_types.get_attribute('building_type_name') is_residential = self.used_building_types.get_attribute('is_residential') unit_names = where(is_residential, 'residential_units', 'non_residential_sqft') specific_unit_names = where(is_residential, 'residential_units', '_sqft') rates = target_vacancy_this_year.get_attribute('target_total_vacancy') self.project_units = {} self.project_specific_units = {} target_rates = {} for i in range(self.used_building_types.size()): self.project_units[project_types[i]] = unit_names[i] if is_residential[i]: self.project_specific_units[project_types[i]] = specific_unit_names[i] else: self.project_specific_units[project_types[i]] = "%s%s" % (project_types[i], specific_unit_names[i]) target_rates[building_type_ids[i]] = rates[i] self._compute_vacancy_and_total_units_variables(location_set, project_types, resources) self.pre_check( location_set, target_vacancy_this_year, project_types) projects = None for project_type_id, target_vacancy_rate in target_rates.iteritems(): # determine current-year vacancy rates project_type = building_types.get_attribute_by_id('building_type_name', project_type_id) vacant_units_sum = location_set.get_attribute(self.variable_for_vacancy[project_type]).sum() units_sum = float( location_set.get_attribute(self.variable_for_total_units[project_type]).sum() ) should_develop_units = int(round(max( 0, ( target_vacancy_rate * units_sum - vacant_units_sum ) / ( 1 - target_vacancy_rate ) ))) logger.log_status(project_type + ": vacant units: %d, should be vacant: %f, sum units: %d" % (vacant_units_sum, target_vacancy_rate * units_sum, units_sum)) if not should_develop_units: logger.log_note(("Will not build any " + project_type + " units, because the current vacancy of %d units\n" + "is more than the %d units desired for the vacancy rate of %f.") % (vacant_units_sum, target_vacancy_rate * units_sum, target_vacancy_rate)) #create projects if should_develop_units > 0: this_project = self._create_projects(should_develop_units, project_type, project_type_id, history_table, location_set, units_sum, resources) if projects is None: projects = this_project else: projects.join_by_rows(this_project, change_ids_if_not_unique=True) return projects def _compute_vacancy_and_total_units_variables(self, location_set, project_types, resources=None): compute_resources = Resources(resources) compute_resources.merge({"debug":self.debug}) self.variable_for_vacancy = {} self.variable_for_total_units = {} for ptype in project_types: self.variable_for_vacancy[ptype] = compute_resources.get( "%s_vacant_variable" % ptype, "urbansim_zone.%s.vacant_%s" % (location_set.get_dataset_name(), self.project_specific_units[ptype])) self.variable_for_total_units[ptype] = compute_resources.get( "%s_total_units_variable" % ptype, "%s.aggregate(urbansim_zone.building.total_%s)" % (location_set.get_dataset_name(), self.project_specific_units[ptype])) location_set.compute_variables([self.variable_for_vacancy[ptype], self.variable_for_total_units[ptype]], dataset_pool=self.dataset_pool, resources = compute_resources) def _create_projects(self, should_develop_units, project_type, project_type_id, history_table, location_set, units_sum, resources=None): history_values = history_table.get_attribute(self.project_units[project_type]) type_code_values = history_table.get_change_type_code_attribute(self.project_units[project_type]) # take only non-zero history values and those that don't represent demolished buildings history_values_without_zeros = history_values[logical_and( history_values > 0, type_code_values != DevelopmentEventTypeOfChange.DELETE)] mean_size = history_values_without_zeros.mean() idx = array( [], dtype="int32" ) # Ensure that there are some development projects to choose from. num_of_projects_to_select = max( 10, round_( should_develop_units / mean_size ) ) while True: idx = concatenate( ( idx, randint( 0, history_values_without_zeros.size, num_of_projects_to_select ) ) ) csum = history_values_without_zeros[idx].cumsum() idx1 = idx[csum <= should_develop_units] if idx1.size == 0: # at least one project should be selected idx = array([idx[0]], dtype="int32") else: idx = idx1 if csum[-1] >= should_develop_units: break data = {"residential_units": zeros( ( idx.size, ), dtype=int32), "non_residential_sqft": zeros( ( idx.size, ), dtype=int32), 'building_type_id': array(idx.size* [project_type_id]), "project_id": arange( idx.size ) + 1, "building_id": zeros( ( idx.size, ), dtype=int32)} data[self.project_units[project_type]]= history_values_without_zeros[idx] storage = StorageFactory().get_storage('dict_storage') development_projects_table_name = 'development_projects' storage.write_table(table_name=development_projects_table_name, table_data=data) return Dataset( in_storage = storage, in_table_name = development_projects_table_name, id_name='project_id' )
def run(self, in_storage, out_storage=None, business_dsname="business", zone_dsname=None): dataset_pool = DatasetPool(storage=in_storage, package_order=['psrc_parcel', 'urbansim_parcel', 'urbansim', 'opus_core'] ) seed(1) allbusinesses = dataset_pool.get_dataset(business_dsname) parcels = dataset_pool.get_dataset('parcel') buildings = dataset_pool.get_dataset('building') parcels.compute_variables(["urbansim_parcel.parcel.residential_units", "number_of_buildings = parcel.number_of_agents(building)", "non_residential_sqft = (parcel.aggregate(building.non_residential_sqft)).astype(int32)", "number_of_res_buildings = parcel.aggregate(urbansim_parcel.building.is_residential)", "number_of_nonres_buildings = parcel.aggregate(urbansim_parcel.building.is_non_residential)", "number_of_mixed_use_buildings = parcel.aggregate(urbansim_parcel.building.is_generic_building_type_6)" ], dataset_pool=dataset_pool) restypes = [12, 4, 19, 11, 34, 10, 33] reslutypes = [13,14,15,24] is_valid_business = ones(allbusinesses.size(), dtype='bool8') parcels_not_matched = logical_and(in1d(allbusinesses["parcel_id"], parcels.get_id_attribute(), invert=True), allbusinesses["parcel_id"] > 0) if(parcels_not_matched.sum() > 0): is_valid_business[where(parcels_not_matched)] = False logger.log_warning(message="No parcel exists for %s businesses (%s jobs)" % (parcels_not_matched.sum(), allbusinesses[self.number_of_jobs_attr][where(parcels_not_matched)].sum())) zero_parcel = allbusinesses["parcel_id"]<=0 if zero_parcel.sum() > 0: is_valid_business[where(zero_parcel)] = False logger.log_warning(message="%s businesses (%s jobs) located on zero parcel_id" % (zero_parcel.sum(), allbusinesses[self.number_of_jobs_attr][where(zero_parcel)].sum())) zero_size = logical_and(is_valid_business, allbusinesses[self.number_of_jobs_attr].round() == 0) if(sum(zero_size) > 0): is_valid_business[where(zero_size)] = False logger.log_warning(message="%s businesses are of size 0." % sum(zero_size)) businesses = DatasetSubset(allbusinesses, index=where(is_valid_business)[0]) parcels.add_attribute(name="number_of_workplaces", data=parcels.sum_dataset_over_ids(businesses, constant=1)) has_single_res_buildings = logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_res_buildings"] == 1) # 1 (1 residential) parcels.add_attribute(data=has_single_res_buildings.astype("int32"), name="buildings_code") has_mult_res_buildings = logical_and(parcels["number_of_buildings"] > 1, parcels["number_of_nonres_buildings"] == 0) # 2 (mult residential) parcels.modify_attribute("buildings_code", data=2*ones(has_mult_res_buildings.sum()), index=where(has_mult_res_buildings)) has_single_nonres_buildings = logical_and(logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_nonres_buildings"] == 1), parcels["number_of_mixed_use_buildings"] == 0) # 3 (1 non-res) parcels.modify_attribute("buildings_code", data=3*ones(has_single_nonres_buildings.sum()), index=where(has_single_nonres_buildings)) has_mult_nonres_buildings = logical_and(logical_and(parcels["number_of_buildings"] > 1, parcels["number_of_res_buildings"] == 0), parcels["number_of_mixed_use_buildings"] == 0) # 4 (mult non-res) parcels.modify_attribute("buildings_code", data=4*ones(has_mult_nonres_buildings.sum()), index=where(has_mult_nonres_buildings)) has_single_mixed_buildings = logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_mixed_use_buildings"] == 1) # 5 (1 mixed-use) parcels.modify_attribute("buildings_code", data=5*ones(has_single_mixed_buildings.sum()), index=where(has_single_mixed_buildings)) has_mult_mixed_buildings = logical_and(parcels["number_of_buildings"] > 1, logical_or(logical_and(parcels["number_of_res_buildings"] > 0, parcels["number_of_nonres_buildings"] > 0), logical_or(parcels["number_of_mixed_use_buildings"] > 1, logical_and(parcels["number_of_res_buildings"] == 0, parcels["number_of_mixed_use_buildings"] > 0)))) # 6 parcels.modify_attribute("buildings_code", data=6*ones(has_mult_mixed_buildings.sum()), index=where(has_mult_mixed_buildings)) has_no_building_res_lutype = logical_and(parcels["number_of_buildings"] == 0, in1d(parcels["land_use_type_id"], reslutypes)) # 7 (vacant with res LU type) parcels.modify_attribute("buildings_code", data=7*ones(has_no_building_res_lutype.sum()), index=where(has_no_building_res_lutype)) has_no_building_nonres_lutype = logical_and(parcels["number_of_buildings"] == 0, in1d(parcels["land_use_type_id"], reslutypes)==0) # 8 (vacant with non-res LU type) parcels.modify_attribute("buildings_code", data=8*ones(has_no_building_nonres_lutype.sum()), index=where(has_no_building_nonres_lutype)) business_sizes = businesses[self.number_of_jobs_attr].round().astype("int32") business_location = {} business_location1wrkpl = zeros(businesses.size(), dtype="int32") business_location1wrkplres = zeros(businesses.size(), dtype="int32") business_ids = businesses.get_id_attribute() # sample one building for cases when sampling is required. for ibusid in range(businesses.size()): idx = where(buildings['parcel_id'] == businesses['parcel_id'][ibusid])[0] bldgids = buildings['building_id'][idx] business_location[business_ids[ibusid]] = bldgids if bldgids.size == 1: business_location1wrkpl[ibusid] = bldgids[0] elif bldgids.size > 1: business_location1wrkpl[ibusid] = bldgids[sample_noreplace(arange(bldgids.size), 1)] if buildings['residential_units'][idx].sum() > 0: # Residential buildings are sampled with probabilities proportional to residential units business_location1wrkplres[ibusid] = bldgids[probsample_noreplace(arange(bldgids.size), 1, prob_array=buildings['residential_units'][idx])] else: business_location1wrkplres[ibusid] = business_location1wrkpl[ibusid] home_based = zeros(business_sizes.sum(), dtype="bool8") job_building_id = zeros(business_sizes.sum(), dtype="int32") job_array_labels = business_ids.repeat(business_sizes) job_assignment_case = zeros(business_sizes.sum(), dtype="int32") processed_bindicator = zeros(businesses.size(), dtype="bool8") business_codes = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"]) business_nworkplaces = parcels.get_attribute_by_id("number_of_workplaces", businesses["parcel_id"]) logger.log_status("Total number of jobs: %s" % home_based.size) # 1. 1-2 worker business in 1 residential building idx_sngl_wrk_1bld_fit = where(logical_and(business_sizes < 3, business_codes == 1))[0] jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_1bld_fit]) home_based[jidx] = True job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_1bld_fit].repeat(business_sizes[idx_sngl_wrk_1bld_fit]) job_assignment_case[jidx] = 1 processed_bindicator[idx_sngl_wrk_1bld_fit] = True logger.log_status("1. %s jobs (%s businesses) set as home-based due to 1-2 worker x 1 residential building fit." % ( business_sizes[idx_sngl_wrk_1bld_fit].sum(), idx_sngl_wrk_1bld_fit.size)) # 2. 1-2 worker business in multiple residential buildings idx_sngl_wrk_multbld_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 2))[0] jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_multbld_fit]) home_based[jidx] = True job_building_id[jidx] = business_location1wrkplres[idx_sngl_wrk_multbld_fit].repeat(business_sizes[idx_sngl_wrk_multbld_fit]) job_assignment_case[jidx] = 2 processed_bindicator[idx_sngl_wrk_multbld_fit] = True logger.log_status("2. %s jobs (%s businesses) set as home-based due to 1-2 worker x multiple residential buildings fit." % ( business_sizes[idx_sngl_wrk_multbld_fit].sum(), idx_sngl_wrk_multbld_fit.size)) # 3. 1-2 worker in single non-res building (not mixed-use) idx_sngl_wrk_single_nonres_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 3))[0] jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_single_nonres_fit]) job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_single_nonres_fit].repeat(business_sizes[idx_sngl_wrk_single_nonres_fit]) job_assignment_case[jidx] = 3 processed_bindicator[idx_sngl_wrk_single_nonres_fit] = True logger.log_status("3. %s jobs (%s businesses) placed due to 1-2 worker x single non-res building fit." % ( business_sizes[idx_sngl_wrk_single_nonres_fit].sum(), idx_sngl_wrk_single_nonres_fit.size)) # 4. 1-2 worker in multiple non-res building (not mixed-use) idx_sngl_wrk_mult_nonres_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 4))[0] jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_mult_nonres_fit]) job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_mult_nonres_fit].repeat(business_sizes[idx_sngl_wrk_mult_nonres_fit]) job_assignment_case[jidx] = 4 processed_bindicator[idx_sngl_wrk_mult_nonres_fit] = True logger.log_status("4. %s jobs (%s businesses) placed due to 1-2 worker x multiple non-res building fit." % ( business_sizes[idx_sngl_wrk_mult_nonres_fit].sum(), idx_sngl_wrk_mult_nonres_fit.size)) # 5. 1-2 worker in single mixed-use building idx_sngl_wrk_smu_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 5))[0] jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_smu_fit]) job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_smu_fit].repeat(business_sizes[idx_sngl_wrk_smu_fit]) job_assignment_case[jidx] = 5 processed_bindicator[idx_sngl_wrk_smu_fit] = True logger.log_status("5. %s jobs (%s businesses) in 1-2 worker x single mixed-use building." % ( business_sizes[idx_sngl_wrk_smu_fit].sum(), idx_sngl_wrk_smu_fit.size)) # 6. 1-2 worker in multiple mixed-type buildings idx_sngl_wrk_mmu_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 6))[0] jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_mmu_fit]) job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_mmu_fit].repeat(business_sizes[idx_sngl_wrk_mmu_fit]) bldtype = buildings.get_attribute_by_id("building_type_id", business_location1wrkpl[idx_sngl_wrk_mmu_fit]) is_bldtype_res = in1d(bldtype, restypes) home_based[in1d(job_array_labels, business_ids[idx_sngl_wrk_mmu_fit][where(is_bldtype_res)])] = True job_assignment_case[jidx] = 6 processed_bindicator[idx_sngl_wrk_mmu_fit] = True logger.log_status("6. %s jobs (%s businesses) in 1-2 worker x multiple mixed-type buildings. %s jobs classified as home-based." % ( business_sizes[idx_sngl_wrk_mmu_fit].sum(), idx_sngl_wrk_mmu_fit.size, business_sizes[idx_sngl_wrk_mmu_fit][where(is_bldtype_res)].sum())) # 7. 1-2 worker business in residential parcel with no building idx_sngl_wrk_vacant_res = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 7))[0] jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_vacant_res]) job_assignment_case[jidx] = 7 home_based[jidx] = True processed_bindicator[idx_sngl_wrk_vacant_res] = True logger.log_status("7. %s jobs (%s businesses of size 1-2) could not be placed due to non-existing buildings in parcels with residential LU type." % ( business_sizes[idx_sngl_wrk_vacant_res].sum(), idx_sngl_wrk_vacant_res.size)) # 8. 3+ workers of governmental workplaces in 1+ residential building ind_bussiness_case8 = logical_and(logical_and(processed_bindicator==0, logical_and(business_sizes > 2, in1d(businesses['sector_id'], [18,19]))), in1d(business_codes, [1,2])) idx_wrk_fit = where(ind_bussiness_case8)[0] jidx = in1d(job_array_labels, business_ids[idx_wrk_fit]) job_assignment_case[jidx] = 8 processed_bindicator[idx_wrk_fit] = True logger.log_status("8. %s governmental jobs (%s businesses of size 3+) could not be placed due to residing in residential buildings only." % ( business_sizes[idx_wrk_fit].sum(), idx_wrk_fit.size)) # 9. 3-30 workers in single residential building. Make two of them home based. idx_sngl_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, logical_and(business_sizes > 2, business_sizes <= 30)), business_codes == 1))[0] jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_fit]) job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_fit].repeat(business_sizes[idx_sngl_wrk_fit]) bsizeminus2 = vstack((2*ones(idx_sngl_wrk_fit.size), business_sizes[idx_sngl_wrk_fit]-2)).ravel("F").astype("int32") # interweaving 2 and remaining business size hbidx = tile(array([True, False]), bsizeminus2.size/2).repeat(bsizeminus2) # set the first two jobs of every business to True, others to False home_based[(where(jidx)[0])[hbidx]] = True job_assignment_case[jidx] = 9 processed_bindicator[idx_sngl_wrk_fit] = True logger.log_status("9. %s jobs (%s businesses) in 3-30 worker x single residential building. %s jobs assigned as home-based." % ( business_sizes[idx_sngl_wrk_fit].sum(), idx_sngl_wrk_fit.size, hbidx.sum())) # 10. 3-30 workers in multiple residential buildings. Make two of them home based. idx_sngl_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, logical_and(business_sizes > 2, business_sizes <= 30)), business_codes == 2))[0] jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_fit]) job_assignment_case[jidx] = 10 processed_bindicator[idx_sngl_wrk_fit] = True # sample buildings to businesses by parcels bpcls = unique(businesses["parcel_id"][idx_sngl_wrk_fit]) for ipcl in range(bpcls.size): bidx = where(buildings['parcel_id'] == bpcls[ipcl])[0] bldgids = buildings['building_id'][bidx] bussids = intersect1d(business_ids[businesses["parcel_id"] == bpcls[ipcl]], business_ids[idx_sngl_wrk_fit]) # multiply by units for sampling prop. to units rather than buildings bldgids = bldgids.repeat(maximum(1, buildings['residential_units'][bidx].astype('int32'))) if bldgids.size < bussids.size: bldarray = bldgids.repeat(1+ceil((bussids.size - bldgids.size)/float(bldgids.size)) ) else: bldarray = bldgids shuffle(bldarray) # randomly reorder in-place for ib in range(bussids.size): jidx = where(job_array_labels == bussids[ib])[0] job_building_id[jidx] = bldarray[ib] home_based[jidx[0:2]] = True logger.log_status("10. %s jobs (%s businesses) in 3-30 worker x multiple residential building. %s jobs assigned as home-based." % ( business_sizes[idx_sngl_wrk_fit].sum(), idx_sngl_wrk_fit.size, idx_sngl_wrk_fit.size*2)) # 11. single workplace, 3+ workers in single non-res or mixed-use building (11.) idx_sngl_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2), logical_or(business_codes==3, business_codes==5)), business_nworkplaces==1))[0] which_labels = where(in1d(job_array_labels, business_ids[idx_sngl_wrkplace_2plus_workers]))[0] job_building_id[which_labels] = business_location1wrkpl[idx_sngl_wrkplace_2plus_workers].repeat(business_sizes[idx_sngl_wrkplace_2plus_workers]) job_assignment_case[which_labels] = 11 processed_bindicator[idx_sngl_wrkplace_2plus_workers] = True logger.log_status("11. %s jobs (%s businesses) could be placed due to single workplace x 3+ workers x single non-res/mixed-use building fit." % ( business_sizes[idx_sngl_wrkplace_2plus_workers].sum(), idx_sngl_wrkplace_2plus_workers.size)) # 12. single workplace, 3+ workers in multiple mixed-type building idx_sngl_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2), logical_or(business_codes==4, business_codes==6)), business_nworkplaces==1))[0] jidx = in1d(job_array_labels, business_ids[idx_sngl_wrkplace_2plus_workers]) job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrkplace_2plus_workers].repeat(business_sizes[idx_sngl_wrkplace_2plus_workers]) job_assignment_case[jidx] = 12 processed_bindicator[idx_sngl_wrkplace_2plus_workers] = True logger.log_status("12. %s jobs (%s businesses) could be placed due to single workplace x 3+ workers x multiple non-res/mixed building fit." % ( business_sizes[idx_sngl_wrkplace_2plus_workers].sum(), idx_sngl_wrkplace_2plus_workers.size)) # 13. multiple workplaces, 3+ workers in single non-res or mixed building idx_mult_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2), logical_or(business_codes==3, business_codes==5)), business_nworkplaces > 1))[0] jidx = in1d(job_array_labels, business_ids[idx_mult_wrkplace_2plus_workers]) job_building_id[jidx] = business_location1wrkpl[idx_mult_wrkplace_2plus_workers].repeat(business_sizes[idx_mult_wrkplace_2plus_workers]) job_assignment_case[jidx] = 13 processed_bindicator[idx_mult_wrkplace_2plus_workers] = True logger.log_status("13. %s jobs (%s businesses) could be placed due to multiple workplaces x 3+ workers x single non-res/mixed building fit." % ( business_sizes[idx_mult_wrkplace_2plus_workers].sum(), idx_mult_wrkplace_2plus_workers.size)) # 14. multiple workplaces, 3+ workers in multiple non-res or mixed building idx_mult_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2), logical_or(business_codes==4, business_codes==6)), business_nworkplaces > 1))[0] processed_bindicator[idx_mult_wrkplace_2plus_workers] = True # sample buildings to businesses by parcels bpcls = unique(businesses["parcel_id"][idx_mult_wrkplace_2plus_workers]) #hbasedsum = home_based.sum() for ipcl in range(bpcls.size): bldgids = buildings['building_id'][buildings['parcel_id'] == bpcls[ipcl]] bussids = intersect1d(business_ids[businesses["parcel_id"] == bpcls[ipcl]], business_ids[idx_mult_wrkplace_2plus_workers]) if bldgids.size < bussids.size: bldarray = bldgids.repeat(1+ceil((bussids.size - bldgids.size)/float(bldgids.size))) else: bldarray = bldgids shuffle(bldarray) # randomly reorder in-place is_res = in1d(bldarray, restypes) for ib in range(bussids.size): jidx = where(job_array_labels == bussids[ib]) job_building_id[jidx] = bldarray[ib] #home_based[jidx] = is_res job_assignment_case[jidx] = 14 logger.log_status("14. %s jobs (%s businesses) could be placed due to multiple workplaces x 3+ workers x multiple non-res/mixed building fit." % ( business_sizes[idx_mult_wrkplace_2plus_workers].sum(), idx_mult_wrkplace_2plus_workers.size)) # 15. 3+ workers in residential parcel with no building idx_wrk_vacant_res = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 2), business_codes == 7))[0] jidx = in1d(job_array_labels, business_ids[idx_wrk_vacant_res]) job_assignment_case[jidx] = 15 processed_bindicator[idx_wrk_vacant_res] = True logger.log_status("15. %s jobs (%s businesses of 3+ workers) could not be placed due to non-existing buildings in parcels with residential LU type." % ( business_sizes[idx_wrk_vacant_res].sum(), idx_wrk_vacant_res.size)) # 16. nonresidential parcel with no building idx_wrk_vacant_nonres = where(logical_and(processed_bindicator==0, business_codes == 8))[0] jidx = in1d(job_array_labels, business_ids[idx_wrk_vacant_nonres]) job_assignment_case[jidx] = 16 processed_bindicator[idx_wrk_vacant_nonres] = True logger.log_status("16. %s jobs (%s businesses) could not be placed due to non-existing buildings in parcels with non-esidential LU type." % ( business_sizes[idx_wrk_vacant_nonres].sum(), idx_wrk_vacant_nonres.size)) # 17. 31+ workers in single residential building. Do not place - will go into ELCM. idx_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 30), business_codes == 1))[0] jidx = in1d(job_array_labels, business_ids[idx_wrk_fit]) job_assignment_case[jidx] = 17 processed_bindicator[idx_wrk_fit] = True logger.log_status("17. %s jobs (%s businesses) in 31+ workers x single residential building." % ( business_sizes[idx_wrk_fit].sum(), idx_wrk_fit.size)) # 18. 31+ workers in multiple residential buildings. idx_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 30), business_codes == 2))[0] jidx = in1d(job_array_labels, business_ids[idx_wrk_fit]) job_assignment_case[jidx] = 18 processed_bindicator[idx_wrk_fit] = True logger.log_status("18. %s jobs (%s businesses) in 31+ workers x multiple residential building." % ( business_sizes[idx_wrk_fit].sum(), idx_wrk_fit.size)) # jobs in messy buildings idx_messy_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 0), business_codes == 0))[0] processed_bindicator[idx_messy_fit] = True logger.log_status("%s jobs (%s businesses) could not be placed due to messy buildings." % ( business_sizes[idx_messy_fit].sum(), idx_messy_fit.size)) # build new buildings for jobs in cases 7, 8, 15 and 16 jidx_no_bld = where(in1d(job_assignment_case, [7,8,15,16]))[0] bus = unique(job_array_labels[jidx_no_bld]) bsidx = businesses.get_id_index(bus) # first create buildings for single workplaces per parcel single_workplace_idx = where(business_nworkplaces[bsidx] == 1)[0] newbld_parcel_id = businesses['parcel_id'][bsidx][single_workplace_idx] newbld_bt = sector2building_type(businesses['sector_id'][bsidx][single_workplace_idx]) newbids = arange(buildings.get_id_attribute().max()+1, buildings.get_id_attribute().max()+single_workplace_idx.size+1) bbldid = zeros(bsidx.size, dtype='int32') bbldid[single_workplace_idx] = newbids # for parcels with multiple workplaces select the largest business to determine its building type mult_bsidx = bsidx[where(business_nworkplaces[bsidx] > 1)[0]] empty_parcels = businesses['parcel_id'][mult_bsidx] uempty_parcels = unique(empty_parcels) bsize_on_empty_pcl = ndmax(business_sizes[mult_bsidx], labels=empty_parcels, index=uempty_parcels) newbld2_sec = zeros(uempty_parcels.size, dtype='int32') newbids2 = arange(newbids.max()+1, newbids.max()+uempty_parcels.size+1) for ipcl in range(uempty_parcels.size): newbld2_sec[ipcl] = businesses['sector_id'][mult_bsidx][logical_and(businesses['parcel_id'][mult_bsidx] == uempty_parcels[ipcl], business_sizes[mult_bsidx]==bsize_on_empty_pcl[ipcl])][0] this_bidx = where(businesses['parcel_id'][bsidx] == uempty_parcels[ipcl]) bbldid[this_bidx] = newbids2[ipcl] newbld_parcel_id = concatenate((newbld_parcel_id, uempty_parcels)) newbld_bt = concatenate((newbld_bt, sector2building_type(newbld2_sec))) newbldgs = {'building_id': concatenate((newbids, newbids2)), 'parcel_id': newbld_parcel_id, 'building_type_id': newbld_bt, } buildings.add_elements(newbldgs, require_all_attributes=False) jidx = where(in1d(job_array_labels, business_ids[bsidx]))[0] job_building_id[jidx] = bbldid.repeat(business_sizes[bsidx]) logger.log_status("Build %s new buildings to accommodate %s jobs (out of which %s are governmental) from cases 7, 15, 16." % ( newbld_parcel_id.size, jidx.size, business_sizes[bsidx][where(in1d(businesses['sector_id'][bsidx], [18,19]))].sum())) logger.log_status("Assigned %s (%s percent) home-based jobs." % (home_based.sum(), round(home_based.sum()/(home_based.size/100.),2))) logger.log_status("Finished %s percent (%s) jobs (%s businesses) processed. %s jobs (%s businesses) remain to be processed." % \ (round(business_sizes[processed_bindicator].sum()/(home_based.size/100.),2), business_sizes[processed_bindicator].sum(), processed_bindicator.sum(), business_sizes[logical_not(processed_bindicator)].sum(), business_sizes[logical_not(processed_bindicator)].size)) logger.start_block("Storing jobs data.") # create job dataset job_data = {"job_id": (arange(job_building_id.size)+1).astype("int32"), "home_based_status" : home_based, "building_id": job_building_id, "business_id": job_array_labels.astype("int32"), "sector_id": businesses['sector_id'].repeat(business_sizes).astype("int32"), "parcel_id": businesses['parcel_id'].repeat(business_sizes).astype("int32"), "assignment_case": job_assignment_case} # join with zones if zone_dsname is not None: zones = dataset_pool.get_dataset(zone_dsname) idname = zones.get_id_name()[0] #jpcls = buildings.get_attribute_by_id('parcel_id', job_building_id) job_data[idname] = parcels.get_attribute_by_id(idname, job_data["parcel_id"]) dictstorage = StorageFactory().get_storage('dict_storage') dictstorage.write_table(table_name="jobs", table_data=job_data) jobs = Dataset(in_storage=dictstorage, in_table_name="jobs", dataset_name="job", id_name="job_id") if out_storage is not None: jobs.write_dataset(out_storage=out_storage, out_table_name="jobs") buildings.write_dataset(out_storage=out_storage, attributes=AttributeType.PRIMARY) logger.end_block() return jobs
class EmploymentTransitionModel(Model): """Creates and removes jobs from job_set.""" model_name = "Employment Transition Model" location_id_name_default = "grid_id" variable_package_default = "urbansim" def __init__(self, location_id_name=None, variable_package=None, dataset_pool=None, debuglevel=0): self.debug = DebugPrinter(debuglevel) self.location_id_name = self.location_id_name_default self.variable_package = self.variable_package_default if location_id_name is not None: self.location_id_name = location_id_name if variable_package is not None: self.variable_package = variable_package self.dataset_pool = self.create_dataset_pool(dataset_pool, ["urbansim", "opus_core"]) def run(self, year, job_set, control_totals, job_building_types, data_objects=None, resources=None): self._do_initialize_for_run(job_set, job_building_types, data_objects) idx = where(control_totals.get_attribute("year") == year)[0] self.control_totals_for_this_year = DatasetSubset(control_totals, idx) self._do_run_for_this_year(job_set) return self._update_job_set(job_set) def _do_initialize_for_run(self, job_set, job_building_types, data_objects=None): self.max_id = job_set.get_id_attribute().max() self.job_size = job_set.size() self.job_id_name = job_set.get_id_name()[0] self.new_jobs = { self.location_id_name: array([], dtype=job_set.get_data_type(self.location_id_name, int32)), "sector_id": array([], dtype=job_set.get_data_type("sector_id", int32)), self.job_id_name: array([], dtype=job_set.get_data_type(self.job_id_name, int32)), "building_type": array([], dtype=job_set.get_data_type("building_type", int8)) } self.remove_jobs = array([], dtype=int32) if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) self.dataset_pool.add_datasets_if_not_included( {job_building_types.get_dataset_name(): job_building_types}) self.available_building_types = job_building_types.get_id_attribute() def _compute_sector_variables(self, sectors, job_set): compute_resources = Resources({"debug": self.debug}) job_set.compute_variables(map( lambda x: "%s.%s.is_in_employment_sector_%s_home_based" % (self.variable_package, job_set.get_dataset_name(), x), sectors) + map( lambda x: "%s.%s.is_in_employment_sector_%s_non_home_based" % (self.variable_package, job_set.get_dataset_name(), x), sectors) + ["is_non_home_based_job", "is_home_based_job"], dataset_pool=self.dataset_pool, resources=compute_resources) def _do_run_for_this_year(self, job_set): building_type = job_set.get_attribute("building_type") sectors = unique( self.control_totals_for_this_year.get_attribute("sector_id")) self._compute_sector_variables(sectors, job_set) for sector in sectors: isector = where( self.control_totals_for_this_year.get_attribute("sector_id") == sector)[0] total_hb_jobs = self.control_totals_for_this_year.get_attribute( "total_home_based_employment")[isector] total_nhb_jobs = self.control_totals_for_this_year.get_attribute( "total_non_home_based_employment")[isector] is_in_sector_hb = job_set.get_attribute( "is_in_employment_sector_%s_home_based" % sector) is_in_sector_nhb = job_set.get_attribute( "is_in_employment_sector_%s_non_home_based" % sector) diff_hb = int(total_hb_jobs - is_in_sector_hb.astype(int8).sum()) diff_nhb = int(total_nhb_jobs - is_in_sector_nhb.astype(int8).sum()) if diff_hb < 0: # home based jobs to be removed w = where(is_in_sector_hb == 1)[0] sample_array, non_placed, size_non_placed = \ get_array_without_non_placed_agents(job_set, w, -1*diff_hb, self.location_id_name) self.remove_jobs = concatenate( (self.remove_jobs, non_placed, sample_noreplace(sample_array, max(0, abs(diff_hb) - size_non_placed)))) if diff_nhb < 0: # non home based jobs to be removed w = where(is_in_sector_nhb == 1)[0] sample_array, non_placed, size_non_placed = \ get_array_without_non_placed_agents(job_set, w, -1*diff_nhb, self.location_id_name) self.remove_jobs = concatenate( (self.remove_jobs, non_placed, sample_noreplace(sample_array, max(0, abs(diff_nhb) - size_non_placed)))) if diff_hb > 0: # home based jobs to be created self.new_jobs[self.location_id_name] = concatenate( (self.new_jobs[self.location_id_name], zeros( (diff_hb, ), dtype=self.new_jobs[self.location_id_name].dtype.type) )) self.new_jobs["sector_id"] = concatenate( (self.new_jobs["sector_id"], (resize( array([sector], dtype=self.new_jobs["sector_id"].dtype.type), diff_hb)))) if 1 in is_in_sector_hb: building_type_distribution = array( ndimage_sum(is_in_sector_hb, labels=building_type, index=self.available_building_types)) elif 1 in job_set.get_attribute( "is_home_based_job" ): # take the building type distribution from the whole region building_type_distribution = array( ndimage_sum(job_set.get_attribute("is_home_based_job"), labels=building_type, index=self.available_building_types)) else: # there are no home-based jobs in the region, take uniform distribution building_type_distribution = ones( self.available_building_types.size) building_type_distribution = building_type_distribution / building_type_distribution.sum( ) sampled_building_types = probsample_replace( self.available_building_types, diff_hb, building_type_distribution / float(building_type_distribution.sum())) self.new_jobs["building_type"] = concatenate( (self.new_jobs["building_type"], sampled_building_types.astype( self.new_jobs["building_type"].dtype.type))) new_max_id = self.max_id + diff_hb self.new_jobs[self.job_id_name] = concatenate( (self.new_jobs[self.job_id_name], arange(self.max_id + 1, new_max_id + 1))) self.max_id = new_max_id if diff_nhb > 0: # non home based jobs to be created self.new_jobs[self.location_id_name] = concatenate( (self.new_jobs[self.location_id_name], zeros( (diff_nhb, ), dtype=self.new_jobs[self.location_id_name].dtype.type) )) self.new_jobs["sector_id"] = concatenate( (self.new_jobs["sector_id"], (resize( array([sector], dtype=self.new_jobs["sector_id"].dtype.type), diff_nhb)))) if 1 in is_in_sector_nhb: building_type_distribution = array( ndimage_sum(is_in_sector_nhb, labels=building_type, index=self.available_building_types)) elif 1 in job_set.get_attribute( "is_non_home_based_job" ): # take the building type distribution from the whole region building_type_distribution = array( ndimage_sum( job_set.get_attribute("is_non_home_based_job"), labels=building_type, index=self.available_building_types)) else: # there are no non-home-based jobs in the region, take uniform distribution building_type_distribution = ones( self.available_building_types.size) building_type_distribution = building_type_distribution / building_type_distribution.sum( ) sampled_building_types = probsample_replace( self.available_building_types, diff_nhb, building_type_distribution / float(building_type_distribution.sum())) self.new_jobs["building_type"] = concatenate( (self.new_jobs["building_type"], sampled_building_types.astype( self.new_jobs["building_type"].dtype.type))) new_max_id = self.max_id + diff_nhb self.new_jobs[self.job_id_name] = concatenate( (self.new_jobs[self.job_id_name], arange(self.max_id + 1, new_max_id + 1))) self.max_id = new_max_id def _update_job_set(self, job_set): job_set.remove_elements(self.remove_jobs) job_set.add_elements(self.new_jobs, require_all_attributes=False) difference = job_set.size() - self.job_size self.debug.print_debug( "Difference in number of jobs: %s (original %s," " new %s, created %s, deleted %s)" % (difference, self.job_size, job_set.size(), self.new_jobs[self.job_id_name].size, self.remove_jobs.size), 3) self.debug.print_debug( "Number of unplaced jobs: %s" % where(job_set.get_attribute(self.location_id_name) <= 0)[0].size, 3) return difference def prepare_for_run(self, storage, **kwargs): from urbansim.datasets.control_total_dataset import ControlTotalDataset control_totals = ControlTotalDataset(in_storage=storage, what="employment") sample_control_totals(storage, control_totals, **kwargs) return control_totals