def run(self, year, job_set, control_totals, job_building_types, data_objects=None, resources=None): self._do_initialize_for_run(job_set, job_building_types, data_objects) large_area_ids = control_totals.get_attribute("large_area_id") jobs_large_area_ids = job_set.compute_variables("washtenaw.job.large_area_id") unique_large_areas = unique(large_area_ids) is_year = control_totals.get_attribute("year")==year all_jobs_index = arange(job_set.size()) sectors = unique(control_totals.get_attribute("sector_id")[is_year]) self._compute_sector_variables(sectors, job_set) for area in unique_large_areas: idx = where(logical_and(is_year, large_area_ids == area))[0] self.control_totals_for_this_year = DatasetSubset(control_totals, idx) jobs_index = where(jobs_large_area_ids == area)[0] jobs_for_this_area = DatasetSubset(job_set, jobs_index) logger.log_status("ETM for area %s (currently %s jobs)" % (area, jobs_for_this_area.size())) last_remove_idx = self.remove_jobs.size self._do_run_for_this_year(jobs_for_this_area) add_jobs_size = self.new_jobs[self.location_id_name].size-self.new_jobs["large_area_id"].size remove_jobs_size = self.remove_jobs.size-last_remove_idx logger.log_status("add %s, remove %s, total %s" % (add_jobs_size, remove_jobs_size, jobs_for_this_area.size()+add_jobs_size-remove_jobs_size)) self.new_jobs["large_area_id"] = concatenate((self.new_jobs["large_area_id"], array(add_jobs_size*[area], dtype="int32"))) # transform indices of removing jobs into indices of the whole dataset self.remove_jobs[last_remove_idx:self.remove_jobs.size] = all_jobs_index[jobs_index[self.remove_jobs[last_remove_idx:self.remove_jobs.size]]] self._update_job_set(job_set) idx_new_jobs = arange(job_set.size()-self.new_jobs["large_area_id"].size, job_set.size()) jobs_large_area_ids = job_set.compute_variables("washtenaw.job.large_area_id") jobs_large_area_ids[idx_new_jobs] = self.new_jobs["large_area_id"] job_set.delete_one_attribute("large_area_id") job_set.add_attribute(jobs_large_area_ids, "large_area_id", metadata=AttributeType.PRIMARY) # return an index of new jobs return arange(job_set.size()-self.new_jobs["large_area_id"].size, job_set.size())
def run(self, year, job_set, control_totals, job_building_types, data_objects=None, resources=None): self._do_initialize_for_run(job_set, job_building_types, data_objects) subarea_ids = control_totals.get_attribute(self.subarea_id_name) jobs_subarea_ids = job_set.compute_one_variable_with_unknown_package( variable_name="%s" % (self.subarea_id_name), dataset_pool=self.dataset_pool) unique_subareas = unique(subarea_ids) is_year = control_totals.get_attribute("year") == year all_jobs_index = arange(job_set.size()) sectors = unique(control_totals.get_attribute("sector_id")[is_year]) self._compute_sector_variables(sectors, job_set) for area in unique_subareas: idx = where(logical_and(is_year, subarea_ids == area))[0] self.control_totals_for_this_year = DatasetSubset( control_totals, idx) jobs_index = where(jobs_subarea_ids == area)[0] jobs_for_this_area = DatasetSubset(job_set, jobs_index) logger.log_status("ETM for area %s (currently %s jobs)" % (area, jobs_for_this_area.size())) last_remove_idx = self.remove_jobs.size self._do_run_for_this_year(jobs_for_this_area) add_jobs_size = self.new_jobs[ self.location_id_name].size - self.new_jobs[ self.subarea_id_name].size remove_jobs_size = self.remove_jobs.size - last_remove_idx logger.log_status( "add %s, remove %s, total %s" % (add_jobs_size, remove_jobs_size, jobs_for_this_area.size() + add_jobs_size - remove_jobs_size)) self.new_jobs[self.subarea_id_name] = concatenate( (self.new_jobs[self.subarea_id_name], array(add_jobs_size * [area], dtype="int32"))) # transform indices of removing jobs into indices of the whole dataset self.remove_jobs[last_remove_idx:self.remove_jobs. size] = all_jobs_index[jobs_index[ self.remove_jobs[last_remove_idx:self. remove_jobs.size]]] self._update_job_set(job_set) idx_new_jobs = arange( job_set.size() - self.new_jobs[self.subarea_id_name].size, job_set.size()) jobs_subarea_ids = job_set.compute_one_variable_with_unknown_package( variable_name="%s" % (self.subarea_id_name), dataset_pool=self.dataset_pool) jobs_subarea_ids[idx_new_jobs] = self.new_jobs[self.subarea_id_name] job_set.delete_one_attribute(self.subarea_id_name) job_set.add_attribute(jobs_subarea_ids, self.subarea_id_name, metadata=AttributeType.PRIMARY) # return an index of new jobs return arange( job_set.size() - self.new_jobs[self.subarea_id_name].size, job_set.size())
def run(self, chunk_specification, dataset, dataset_index=None, result_array_type=float32, **kwargs): """ 'chunk_specification' - determines number of chunks to use when computing over the dataset set. 'dataset' - an object of class Dataset that is to be chunked. 'dataset_index' - index of individuals in dataset to be chunked. 'result_array_type' - type of the resulting array. Can be any numerical type of numpy array. **kwargs - keyword arguments. The method chunks dataset_index in the desired number of chunks (minimum is 1) and for each chunk it calls the method 'run_chunk'. The order of the individuals entering the chunking is determined by the method 'get_agents_order'. """ if dataset_index == None: dataset_index = arange(dataset.size()) if not isinstance(dataset_index, ndarray): dataset_index = array(dataset_index) logger.log_status("Total number of individuals: %s" % dataset_index.size) result_array = zeros(dataset_index.size, dtype=result_array_type) if dataset_index.size <= 0: logger.log_status("Nothing to be done.") return result_array all_indexed_individuals = DatasetSubset(dataset, dataset_index) ordered_agent_indices = self.get_agents_order( all_indexed_individuals) # set order of individuals in chunks # TODO: Remove next six lines after we inherit chunk specification as a text string. if (chunk_specification is None): chunk_specification = {'nchunks': 1} chunker = ChunkSpecification(chunk_specification) self.number_of_chunks = chunker.nchunks(dataset_index) chunksize = int( ceil(all_indexed_individuals.size() / float(self.number_of_chunks))) for ichunk in range(self.number_of_chunks): logger.start_block("%s chunk %d out of %d." % (self.model_short_name, (ichunk + 1), self.number_of_chunks)) self.index_of_current_chunk = ichunk try: chunk_agent_indices = ordered_agent_indices[arange( (ichunk * chunksize), min((ichunk + 1) * chunksize, all_indexed_individuals.size()))] logger.log_status("Number of agents in this chunk: %s" % chunk_agent_indices.size) result_array[chunk_agent_indices] = self.run_chunk( dataset_index[chunk_agent_indices], dataset, **kwargs).astype(result_array_type) finally: logger.end_block() return result_array
def run(self, year, household_set, control_totals, characteristics, resources=None): # self.person_set = person_set self._do_initialize_for_run(household_set) control_totals.get_attribute("total_number_of_households") # to make sure they are loaded self.characteristics = characteristics self.all_categories = self.characteristics.get_attribute("characteristic") self.all_categories = array(map(lambda x: x.lower(), self.all_categories)) self.scaled_characteristic_names = get_distinct_names(self.all_categories).tolist() self.marginal_characteristic_names = copy(control_totals.get_id_name()) index_year = self.marginal_characteristic_names.index("year") self.marginal_characteristic_names.remove("year") self.marginal_characteristic_names.remove(self.subarea_id_name) region_ids = control_totals.get_attribute(self.subarea_id_name) households_region_ids = household_set.compute_one_variable_with_unknown_package(variable_name="%s" % (self.subarea_id_name), dataset_pool=self.dataset_pool) unique_regions = unique(region_ids) is_year = control_totals.get_attribute("year")==year all_households_index = arange(household_set.size()) for area in unique_regions: idx = where(logical_and(is_year, region_ids == area))[0] self.control_totals_for_this_year = DatasetSubset(control_totals, idx) households_index = where(households_region_ids == area)[0] if households_index.size == 0: continue households_for_this_area = DatasetSubset(household_set, households_index) logger.log_status("HTM for area %s (currently %s households)" % (area, households_for_this_area.size())) last_remove_idx = self.remove_households.size last_new_hhs_idx = self.mapping_existing_hhs_to_new_hhs.size self._do_run_for_this_year(households_for_this_area) add_hhs_size = self.new_households[self.location_id_name].size-self.new_households[self.subarea_id_name].size+self.mapping_existing_hhs_to_new_hhs.size-last_new_hhs_idx remove_hhs_size = self.remove_households.size-last_remove_idx logger.log_status("add %s, remove %s, total %s" % (add_hhs_size, remove_hhs_size, households_for_this_area.size()+add_hhs_size-remove_hhs_size )) self.new_households[self.subarea_id_name] = concatenate((self.new_households[self.subarea_id_name], array((self.new_households[self.location_id_name].size-self.new_households[self.subarea_id_name].size)*[area], dtype="int32"))) # transform indices of removing households into indices of the whole dataset self.remove_households[last_remove_idx:self.remove_households.size] = all_households_index[households_index[self.remove_households[last_remove_idx:self.remove_households.size]]] # do the same for households to be duplicated self.mapping_existing_hhs_to_new_hhs[last_new_hhs_idx:self.mapping_existing_hhs_to_new_hhs.size] = all_households_index[households_index[self.mapping_existing_hhs_to_new_hhs[last_new_hhs_idx:self.mapping_existing_hhs_to_new_hhs.size]]] self._update_household_set(household_set) idx_new_households = arange(household_set.size()-self.new_households[self.subarea_id_name].size, household_set.size()) #household_region_ids = household_set.compute_variables("urbansim_parcel.household.%s" % self.subarea_id_name) #household_region_ids[idx_new_households] = self.new_households[self.subarea_id_name] region_ids = household_set.get_attribute(self.subarea_id_name).copy() household_set.delete_one_attribute(self.subarea_id_name) household_set.add_attribute(region_ids, self.subarea_id_name, metadata=AttributeType.PRIMARY) # return an index of new households return idx_new_households
def _do_run(self, location_set, agent_set, agents_index, data_objects=None, resources=None): location_id_name = location_set.get_id_name()[0] jobsubset = DatasetSubset(agent_set, agents_index) if jobsubset.size() <= 0: return array([], dtype='int32') #unplace jobs agent_set.set_values_of_one_attribute(location_id_name, resize(array([-1.0]), jobsubset.size()), agents_index) sector_ids = jobsubset.get_attribute("sector_id") sectors = unique(sector_ids) counts = ndimage_sum(ones((jobsubset.size(),)), labels=sector_ids.astype('int32'), index=sectors.astype('int32')) if sectors.size <=1 : counts = array([counts]) variables = map(lambda x: "number_of_jobs_of_sector_"+str(int(x)), sectors) compute_variables = map(lambda var: self.variable_package + "." + location_set.get_dataset_name()+ "." + var, variables) if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) self.dataset_pool.add_datasets_if_not_included({agent_set.get_dataset_name():agent_set}) location_set.compute_variables(compute_variables, dataset_pool=self.dataset_pool) if self.filter is None: location_index = arange(location_set.size()) else: filter_values = location_set.compute_variables([self.filter], dataset_pool=self.dataset_pool) location_index = where(filter_values > 0)[0] if location_index.size <= 0: logger.log_status("No locations available. Nothing to be done.") return array([]) location_subset = DatasetSubset(location_set, location_index) i=0 for sector in sectors: distr = location_subset.get_attribute(variables[i]) if ma.allclose(distr.sum(), 0): uniform_prob = 1.0/distr.size distr = resize(array([uniform_prob], dtype='float64'), distr.size) logger.log_warning("Probabilities in scaling model for sector " + str(sector) + " sum to 0.0. Substituting uniform distribution!") # random_sample = sample(location_set.get_attribute("grid_id"), k=int(counts[i]), \ # probabilities = distr) distr = distr/float(distr.sum()) random_sample = probsample_replace(location_subset.get_id_attribute(), size=int(counts[i]), prob_array=distr) idx = where(sector_ids == sector)[0] #modify job locations agent_set.set_values_of_one_attribute(location_id_name, random_sample, agents_index[idx]) i+=1 return agent_set.get_attribute_by_index(location_id_name, agents_index)
def run(self, chunk_specification, dataset, dataset_index=None, result_array_type=float32, **kwargs): """ 'chunk_specification' - determines number of chunks to use when computing over the dataset set. 'dataset' - an object of class Dataset that is to be chunked. 'dataset_index' - index of individuals in dataset to be chunked. 'result_array_type' - type of the resulting array. Can be any numerical type of numpy array. **kwargs - keyword arguments. The method chunks dataset_index in the desired number of chunks (minimum is 1) and for each chunk it calls the method 'run_chunk'. The order of the individuals entering the chunking is determined by the method 'get_agents_order'. """ if dataset_index==None: dataset_index=arange(dataset.size()) if not isinstance(dataset_index,ndarray): dataset_index=array(dataset_index) logger.log_status("Total number of individuals: %s" % dataset_index.size) result_array = zeros(dataset_index.size, dtype=result_array_type) if dataset_index.size <= 0: logger.log_status("Nothing to be done.") return result_array all_indexed_individuals = DatasetSubset(dataset, dataset_index) ordered_agent_indices = self.get_agents_order(all_indexed_individuals)# set order of individuals in chunks # TODO: Remove next six lines after we inherit chunk specification as a text string. if (chunk_specification is None): chunk_specification = {'nchunks':1} chunker = ChunkSpecification(chunk_specification) self.number_of_chunks = chunker.nchunks(dataset_index) chunksize = int(ceil(all_indexed_individuals.size()/float(self.number_of_chunks))) for ichunk in range(self.number_of_chunks): logger.start_block("%s chunk %d out of %d." % (self.model_short_name, (ichunk+1), self.number_of_chunks)) self.index_of_current_chunk = ichunk try: chunk_agent_indices = ordered_agent_indices[arange((ichunk*chunksize), min((ichunk+1)*chunksize, all_indexed_individuals.size()))] logger.log_status("Number of agents in this chunk: %s" % chunk_agent_indices.size) result_array[chunk_agent_indices] = self.run_chunk(dataset_index[chunk_agent_indices], dataset, **kwargs).astype(result_array_type) finally: logger.end_block() return result_array
def run(self, year=None, dataset_pool=None, **kwargs): """ """ if dataset_pool is None: dataset_pool = SessionConfiguration().get_dataset_pool() if year is None: year = SimulationState().get_current_time() this_year_index = where(self.scheduled_events.get_attribute('year')==year)[0] scheduled_events_for_this_year = DatasetSubset(self.scheduled_events, this_year_index) scheduled_events_for_this_year.load_dataset_if_not_loaded() column_names = list(set( self.scheduled_events.get_known_attribute_names() ) - set( [ 'year', 'action', 'attribute', 'amount', 'event_id', '_hidden_id_'] )) column_names.sort() # column_values = dict([ (name, scheduled_events_for_this_year.get_attribute(name)) for name in column_names]) for index in range(scheduled_events_for_this_year.size()): indicator = ones( self.dataset.size(), dtype='bool' ) event_attr = {} for attribute in column_names: if attribute in self.dataset.get_known_attribute_names(): dataset_attribute = self.dataset.get_attribute(attribute) else: ## this is done inside the loop because some action may delete computed attributes, such as dataset.add_elements() try: dataset_attribute = self.dataset.compute_one_variable_with_unknown_package(attribute, dataset_pool=dataset_pool) except: raise ValueError, "attribute %s used in scheduled events dataset can not be found in dataset %s" % (attribute, self.dataset.get_dataset_name()) # if attribute in column_names: aval = scheduled_events_for_this_year.get_attribute(attribute)[index] if aval == -1: continue # ignore if column value is -1 else: indicator *= dataset_attribute == aval event_attr.update({attribute:aval}) #agents in dataset satisfying all conditions are identified by indicator legit_index = where(indicator)[0] this_event = scheduled_events_for_this_year.get_data_element(index) if not hasattr(this_event, 'attribute'): action_attr_name = '' else: action_attr_name = this_event.attribute action_function = getattr(self, '_' + this_event.action.strip().lower()) action_function( amount=this_event.amount, attribute=action_attr_name, dataset=self.dataset, index=legit_index, data_dict=event_attr ) self.post_run(self.dataset, legit_index, **kwargs) return self.dataset
def run(self, n=500, realestate_dataset_name = 'building', current_year=None, **kwargs): target_vacancy = self.dataset_pool.get_dataset('target_vacancy') if current_year is None: year = SimulationState().get_current_time() else: year = current_year self.current_year = year this_year_index = where(target_vacancy['year']==year)[0] target_vacancy_for_this_year = DatasetSubset(target_vacancy, this_year_index) if target_vacancy_for_this_year.size() == 0: raise IOError, 'No target vacancy defined for year %s.' % year self.all_btypes_size = target_vacancy_for_this_year.size() return DevelopmentProjectProposalSamplingModelWithMinimum.run(self, n=n, realestate_dataset_name=realestate_dataset_name, current_year=current_year, **kwargs)
def _do_run(self, location_set, agent_set, agents_index, resources=None): location_id_name = location_set.get_id_name()[0] asubset = DatasetSubset(agent_set, agents_index) if asubset.size() <= 0: return array([], dtype='int32') #unplace agents agent_set.modify_attribute(location_id_name, resize(array([-1]), asubset.size()), agents_index) if self.filter is None: location_index = arange(location_set.size()) else: filter_values = location_set.compute_variables([self.filter], dataset_pool=self.dataset_pool) location_index = where(filter_values > 0)[0] if location_index.size <= 0: logger.log_status("No locations available. Nothing to be done.") return array([]) location_subset = DatasetSubset(location_set, location_index) if self.consider_capacity: location_set.compute_variables([self.capacity_attribute], dataset_pool=self.dataset_pool) weights = location_subset[self.capacity_attribute] if self.number_of_agents_attribute is not None: location_set.compute_variables([self.number_of_agents_attribute], dataset_pool=self.dataset_pool) weights = clip(weights - location_subset[self.number_of_agents_attribute], 0, location_subset[self.capacity_attribute]) else: weights = ones(location_subset.size()) if weights.sum() <=0: logger.log_status("Locations' capacity sums to zero. Nothing to be done.") return array([]) distr = weights/float(weights.sum()) random_sample = probsample_replace(location_subset.get_id_attribute(), size=asubset.size(), prob_array=distr) agent_set.modify_attribute(location_id_name, random_sample, agents_index) return agent_set.get_attribute_by_index(location_id_name, agents_index)
def run(self, agent_set, **kwargs): large_areas = agent_set.get_attribute(self.large_area_id_name) valid_large_area = where(large_areas > 0)[0] if valid_large_area.size > 0: unique_large_areas = unique(large_areas[valid_large_area]) cond_array = zeros(agent_set.size(), dtype="bool8") cond_array[valid_large_area] = True result = array([], dtype="int32") for area in unique_large_areas: new_index = where(logical_and(cond_array, large_areas == area))[0] agent_subset = DatasetSubset(agent_set, new_index) logger.log_status("ARM for area %s (%s agents)" % (area, agent_subset.size())) this_result = AgentRelocationModel.run(self, agent_subset, **kwargs) result = concatenate((result, new_index[this_result])) no_large_area = where(large_areas <= 0)[0] result = concatenate((result, no_large_area)) return result
def run(self, in_storage, out_storage=None, business_dsname="business", zone_dsname=None): dataset_pool = DatasetPool(storage=in_storage, package_order=['psrc_parcel', 'urbansim_parcel', 'urbansim', 'opus_core'] ) seed(1) allbusinesses = dataset_pool.get_dataset(business_dsname) parcels = dataset_pool.get_dataset('parcel') buildings = dataset_pool.get_dataset('building') parcels.compute_variables(["urbansim_parcel.parcel.residential_units", "number_of_buildings = parcel.number_of_agents(building)", "non_residential_sqft = (parcel.aggregate(building.non_residential_sqft)).astype(int32)", "number_of_res_buildings = parcel.aggregate(urbansim_parcel.building.is_residential)", "number_of_nonres_buildings = parcel.aggregate(urbansim_parcel.building.is_non_residential)", "number_of_mixed_use_buildings = parcel.aggregate(urbansim_parcel.building.is_generic_building_type_6)" ], dataset_pool=dataset_pool) restypes = [12, 4, 19, 11, 34, 10, 33] reslutypes = [13,14,15,24] is_valid_business = ones(allbusinesses.size(), dtype='bool8') parcels_not_matched = logical_and(in1d(allbusinesses["parcel_id"], parcels.get_id_attribute(), invert=True), allbusinesses["parcel_id"] > 0) if(parcels_not_matched.sum() > 0): is_valid_business[where(parcels_not_matched)] = False logger.log_warning(message="No parcel exists for %s businesses (%s jobs)" % (parcels_not_matched.sum(), allbusinesses[self.number_of_jobs_attr][where(parcels_not_matched)].sum())) zero_parcel = allbusinesses["parcel_id"]<=0 if zero_parcel.sum() > 0: is_valid_business[where(zero_parcel)] = False logger.log_warning(message="%s businesses (%s jobs) located on zero parcel_id" % (zero_parcel.sum(), allbusinesses[self.number_of_jobs_attr][where(zero_parcel)].sum())) zero_size = logical_and(is_valid_business, allbusinesses[self.number_of_jobs_attr].round() == 0) if(sum(zero_size) > 0): is_valid_business[where(zero_size)] = False logger.log_warning(message="%s businesses are of size 0." % sum(zero_size)) businesses = DatasetSubset(allbusinesses, index=where(is_valid_business)[0]) parcels.add_attribute(name="number_of_workplaces", data=parcels.sum_dataset_over_ids(businesses, constant=1)) has_single_res_buildings = logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_res_buildings"] == 1) # 1 (1 residential) parcels.add_attribute(data=has_single_res_buildings.astype("int32"), name="buildings_code") has_mult_res_buildings = logical_and(parcels["number_of_buildings"] > 1, parcels["number_of_nonres_buildings"] == 0) # 2 (mult residential) parcels.modify_attribute("buildings_code", data=2*ones(has_mult_res_buildings.sum()), index=where(has_mult_res_buildings)) has_single_nonres_buildings = logical_and(logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_nonres_buildings"] == 1), parcels["number_of_mixed_use_buildings"] == 0) # 3 (1 non-res) parcels.modify_attribute("buildings_code", data=3*ones(has_single_nonres_buildings.sum()), index=where(has_single_nonres_buildings)) has_mult_nonres_buildings = logical_and(logical_and(parcels["number_of_buildings"] > 1, parcels["number_of_res_buildings"] == 0), parcels["number_of_mixed_use_buildings"] == 0) # 4 (mult non-res) parcels.modify_attribute("buildings_code", data=4*ones(has_mult_nonres_buildings.sum()), index=where(has_mult_nonres_buildings)) has_single_mixed_buildings = logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_mixed_use_buildings"] == 1) # 5 (1 mixed-use) parcels.modify_attribute("buildings_code", data=5*ones(has_single_mixed_buildings.sum()), index=where(has_single_mixed_buildings)) has_mult_mixed_buildings = logical_and(parcels["number_of_buildings"] > 1, logical_or(logical_and(parcels["number_of_res_buildings"] > 0, parcels["number_of_nonres_buildings"] > 0), logical_or(parcels["number_of_mixed_use_buildings"] > 1, logical_and(parcels["number_of_res_buildings"] == 0, parcels["number_of_mixed_use_buildings"] > 0)))) # 6 parcels.modify_attribute("buildings_code", data=6*ones(has_mult_mixed_buildings.sum()), index=where(has_mult_mixed_buildings)) has_no_building_res_lutype = logical_and(parcels["number_of_buildings"] == 0, in1d(parcels["land_use_type_id"], reslutypes)) # 7 (vacant with res LU type) parcels.modify_attribute("buildings_code", data=7*ones(has_no_building_res_lutype.sum()), index=where(has_no_building_res_lutype)) has_no_building_nonres_lutype = logical_and(parcels["number_of_buildings"] == 0, in1d(parcels["land_use_type_id"], reslutypes)==0) # 8 (vacant with non-res LU type) parcels.modify_attribute("buildings_code", data=8*ones(has_no_building_nonres_lutype.sum()), index=where(has_no_building_nonres_lutype)) business_sizes = businesses[self.number_of_jobs_attr].round().astype("int32") business_location = {} business_location1wrkpl = zeros(businesses.size(), dtype="int32") business_location1wrkplres = zeros(businesses.size(), dtype="int32") business_ids = businesses.get_id_attribute() # sample one building for cases when sampling is required. for ibusid in range(businesses.size()): idx = where(buildings['parcel_id'] == businesses['parcel_id'][ibusid])[0] bldgids = buildings['building_id'][idx] business_location[business_ids[ibusid]] = bldgids if bldgids.size == 1: business_location1wrkpl[ibusid] = bldgids[0] elif bldgids.size > 1: business_location1wrkpl[ibusid] = bldgids[sample_noreplace(arange(bldgids.size), 1)] if buildings['residential_units'][idx].sum() > 0: # Residential buildings are sampled with probabilities proportional to residential units business_location1wrkplres[ibusid] = bldgids[probsample_noreplace(arange(bldgids.size), 1, prob_array=buildings['residential_units'][idx])] else: business_location1wrkplres[ibusid] = business_location1wrkpl[ibusid] home_based = zeros(business_sizes.sum(), dtype="bool8") job_building_id = zeros(business_sizes.sum(), dtype="int32") job_array_labels = business_ids.repeat(business_sizes) job_assignment_case = zeros(business_sizes.sum(), dtype="int32") processed_bindicator = zeros(businesses.size(), dtype="bool8") business_codes = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"]) business_nworkplaces = parcels.get_attribute_by_id("number_of_workplaces", businesses["parcel_id"]) logger.log_status("Total number of jobs: %s" % home_based.size) # 1. 1-2 worker business in 1 residential building idx_sngl_wrk_1bld_fit = where(logical_and(business_sizes < 3, business_codes == 1))[0] jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_1bld_fit]) home_based[jidx] = True job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_1bld_fit].repeat(business_sizes[idx_sngl_wrk_1bld_fit]) job_assignment_case[jidx] = 1 processed_bindicator[idx_sngl_wrk_1bld_fit] = True logger.log_status("1. %s jobs (%s businesses) set as home-based due to 1-2 worker x 1 residential building fit." % ( business_sizes[idx_sngl_wrk_1bld_fit].sum(), idx_sngl_wrk_1bld_fit.size)) # 2. 1-2 worker business in multiple residential buildings idx_sngl_wrk_multbld_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 2))[0] jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_multbld_fit]) home_based[jidx] = True job_building_id[jidx] = business_location1wrkplres[idx_sngl_wrk_multbld_fit].repeat(business_sizes[idx_sngl_wrk_multbld_fit]) job_assignment_case[jidx] = 2 processed_bindicator[idx_sngl_wrk_multbld_fit] = True logger.log_status("2. %s jobs (%s businesses) set as home-based due to 1-2 worker x multiple residential buildings fit." % ( business_sizes[idx_sngl_wrk_multbld_fit].sum(), idx_sngl_wrk_multbld_fit.size)) # 3. 1-2 worker in single non-res building (not mixed-use) idx_sngl_wrk_single_nonres_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 3))[0] jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_single_nonres_fit]) job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_single_nonres_fit].repeat(business_sizes[idx_sngl_wrk_single_nonres_fit]) job_assignment_case[jidx] = 3 processed_bindicator[idx_sngl_wrk_single_nonres_fit] = True logger.log_status("3. %s jobs (%s businesses) placed due to 1-2 worker x single non-res building fit." % ( business_sizes[idx_sngl_wrk_single_nonres_fit].sum(), idx_sngl_wrk_single_nonres_fit.size)) # 4. 1-2 worker in multiple non-res building (not mixed-use) idx_sngl_wrk_mult_nonres_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 4))[0] jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_mult_nonres_fit]) job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_mult_nonres_fit].repeat(business_sizes[idx_sngl_wrk_mult_nonres_fit]) job_assignment_case[jidx] = 4 processed_bindicator[idx_sngl_wrk_mult_nonres_fit] = True logger.log_status("4. %s jobs (%s businesses) placed due to 1-2 worker x multiple non-res building fit." % ( business_sizes[idx_sngl_wrk_mult_nonres_fit].sum(), idx_sngl_wrk_mult_nonres_fit.size)) # 5. 1-2 worker in single mixed-use building idx_sngl_wrk_smu_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 5))[0] jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_smu_fit]) job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_smu_fit].repeat(business_sizes[idx_sngl_wrk_smu_fit]) job_assignment_case[jidx] = 5 processed_bindicator[idx_sngl_wrk_smu_fit] = True logger.log_status("5. %s jobs (%s businesses) in 1-2 worker x single mixed-use building." % ( business_sizes[idx_sngl_wrk_smu_fit].sum(), idx_sngl_wrk_smu_fit.size)) # 6. 1-2 worker in multiple mixed-type buildings idx_sngl_wrk_mmu_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 6))[0] jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_mmu_fit]) job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_mmu_fit].repeat(business_sizes[idx_sngl_wrk_mmu_fit]) bldtype = buildings.get_attribute_by_id("building_type_id", business_location1wrkpl[idx_sngl_wrk_mmu_fit]) is_bldtype_res = in1d(bldtype, restypes) home_based[in1d(job_array_labels, business_ids[idx_sngl_wrk_mmu_fit][where(is_bldtype_res)])] = True job_assignment_case[jidx] = 6 processed_bindicator[idx_sngl_wrk_mmu_fit] = True logger.log_status("6. %s jobs (%s businesses) in 1-2 worker x multiple mixed-type buildings. %s jobs classified as home-based." % ( business_sizes[idx_sngl_wrk_mmu_fit].sum(), idx_sngl_wrk_mmu_fit.size, business_sizes[idx_sngl_wrk_mmu_fit][where(is_bldtype_res)].sum())) # 7. 1-2 worker business in residential parcel with no building idx_sngl_wrk_vacant_res = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 7))[0] jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_vacant_res]) job_assignment_case[jidx] = 7 home_based[jidx] = True processed_bindicator[idx_sngl_wrk_vacant_res] = True logger.log_status("7. %s jobs (%s businesses of size 1-2) could not be placed due to non-existing buildings in parcels with residential LU type." % ( business_sizes[idx_sngl_wrk_vacant_res].sum(), idx_sngl_wrk_vacant_res.size)) # 8. 3+ workers of governmental workplaces in 1+ residential building ind_bussiness_case8 = logical_and(logical_and(processed_bindicator==0, logical_and(business_sizes > 2, in1d(businesses['sector_id'], [18,19]))), in1d(business_codes, [1,2])) idx_wrk_fit = where(ind_bussiness_case8)[0] jidx = in1d(job_array_labels, business_ids[idx_wrk_fit]) job_assignment_case[jidx] = 8 processed_bindicator[idx_wrk_fit] = True logger.log_status("8. %s governmental jobs (%s businesses of size 3+) could not be placed due to residing in residential buildings only." % ( business_sizes[idx_wrk_fit].sum(), idx_wrk_fit.size)) # 9. 3-30 workers in single residential building. Make two of them home based. idx_sngl_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, logical_and(business_sizes > 2, business_sizes <= 30)), business_codes == 1))[0] jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_fit]) job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_fit].repeat(business_sizes[idx_sngl_wrk_fit]) bsizeminus2 = vstack((2*ones(idx_sngl_wrk_fit.size), business_sizes[idx_sngl_wrk_fit]-2)).ravel("F").astype("int32") # interweaving 2 and remaining business size hbidx = tile(array([True, False]), bsizeminus2.size/2).repeat(bsizeminus2) # set the first two jobs of every business to True, others to False home_based[(where(jidx)[0])[hbidx]] = True job_assignment_case[jidx] = 9 processed_bindicator[idx_sngl_wrk_fit] = True logger.log_status("9. %s jobs (%s businesses) in 3-30 worker x single residential building. %s jobs assigned as home-based." % ( business_sizes[idx_sngl_wrk_fit].sum(), idx_sngl_wrk_fit.size, hbidx.sum())) # 10. 3-30 workers in multiple residential buildings. Make two of them home based. idx_sngl_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, logical_and(business_sizes > 2, business_sizes <= 30)), business_codes == 2))[0] jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_fit]) job_assignment_case[jidx] = 10 processed_bindicator[idx_sngl_wrk_fit] = True # sample buildings to businesses by parcels bpcls = unique(businesses["parcel_id"][idx_sngl_wrk_fit]) for ipcl in range(bpcls.size): bidx = where(buildings['parcel_id'] == bpcls[ipcl])[0] bldgids = buildings['building_id'][bidx] bussids = intersect1d(business_ids[businesses["parcel_id"] == bpcls[ipcl]], business_ids[idx_sngl_wrk_fit]) # multiply by units for sampling prop. to units rather than buildings bldgids = bldgids.repeat(maximum(1, buildings['residential_units'][bidx].astype('int32'))) if bldgids.size < bussids.size: bldarray = bldgids.repeat(1+ceil((bussids.size - bldgids.size)/float(bldgids.size)) ) else: bldarray = bldgids shuffle(bldarray) # randomly reorder in-place for ib in range(bussids.size): jidx = where(job_array_labels == bussids[ib])[0] job_building_id[jidx] = bldarray[ib] home_based[jidx[0:2]] = True logger.log_status("10. %s jobs (%s businesses) in 3-30 worker x multiple residential building. %s jobs assigned as home-based." % ( business_sizes[idx_sngl_wrk_fit].sum(), idx_sngl_wrk_fit.size, idx_sngl_wrk_fit.size*2)) # 11. single workplace, 3+ workers in single non-res or mixed-use building (11.) idx_sngl_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2), logical_or(business_codes==3, business_codes==5)), business_nworkplaces==1))[0] which_labels = where(in1d(job_array_labels, business_ids[idx_sngl_wrkplace_2plus_workers]))[0] job_building_id[which_labels] = business_location1wrkpl[idx_sngl_wrkplace_2plus_workers].repeat(business_sizes[idx_sngl_wrkplace_2plus_workers]) job_assignment_case[which_labels] = 11 processed_bindicator[idx_sngl_wrkplace_2plus_workers] = True logger.log_status("11. %s jobs (%s businesses) could be placed due to single workplace x 3+ workers x single non-res/mixed-use building fit." % ( business_sizes[idx_sngl_wrkplace_2plus_workers].sum(), idx_sngl_wrkplace_2plus_workers.size)) # 12. single workplace, 3+ workers in multiple mixed-type building idx_sngl_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2), logical_or(business_codes==4, business_codes==6)), business_nworkplaces==1))[0] jidx = in1d(job_array_labels, business_ids[idx_sngl_wrkplace_2plus_workers]) job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrkplace_2plus_workers].repeat(business_sizes[idx_sngl_wrkplace_2plus_workers]) job_assignment_case[jidx] = 12 processed_bindicator[idx_sngl_wrkplace_2plus_workers] = True logger.log_status("12. %s jobs (%s businesses) could be placed due to single workplace x 3+ workers x multiple non-res/mixed building fit." % ( business_sizes[idx_sngl_wrkplace_2plus_workers].sum(), idx_sngl_wrkplace_2plus_workers.size)) # 13. multiple workplaces, 3+ workers in single non-res or mixed building idx_mult_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2), logical_or(business_codes==3, business_codes==5)), business_nworkplaces > 1))[0] jidx = in1d(job_array_labels, business_ids[idx_mult_wrkplace_2plus_workers]) job_building_id[jidx] = business_location1wrkpl[idx_mult_wrkplace_2plus_workers].repeat(business_sizes[idx_mult_wrkplace_2plus_workers]) job_assignment_case[jidx] = 13 processed_bindicator[idx_mult_wrkplace_2plus_workers] = True logger.log_status("13. %s jobs (%s businesses) could be placed due to multiple workplaces x 3+ workers x single non-res/mixed building fit." % ( business_sizes[idx_mult_wrkplace_2plus_workers].sum(), idx_mult_wrkplace_2plus_workers.size)) # 14. multiple workplaces, 3+ workers in multiple non-res or mixed building idx_mult_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2), logical_or(business_codes==4, business_codes==6)), business_nworkplaces > 1))[0] processed_bindicator[idx_mult_wrkplace_2plus_workers] = True # sample buildings to businesses by parcels bpcls = unique(businesses["parcel_id"][idx_mult_wrkplace_2plus_workers]) #hbasedsum = home_based.sum() for ipcl in range(bpcls.size): bldgids = buildings['building_id'][buildings['parcel_id'] == bpcls[ipcl]] bussids = intersect1d(business_ids[businesses["parcel_id"] == bpcls[ipcl]], business_ids[idx_mult_wrkplace_2plus_workers]) if bldgids.size < bussids.size: bldarray = bldgids.repeat(1+ceil((bussids.size - bldgids.size)/float(bldgids.size))) else: bldarray = bldgids shuffle(bldarray) # randomly reorder in-place is_res = in1d(bldarray, restypes) for ib in range(bussids.size): jidx = where(job_array_labels == bussids[ib]) job_building_id[jidx] = bldarray[ib] #home_based[jidx] = is_res job_assignment_case[jidx] = 14 logger.log_status("14. %s jobs (%s businesses) could be placed due to multiple workplaces x 3+ workers x multiple non-res/mixed building fit." % ( business_sizes[idx_mult_wrkplace_2plus_workers].sum(), idx_mult_wrkplace_2plus_workers.size)) # 15. 3+ workers in residential parcel with no building idx_wrk_vacant_res = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 2), business_codes == 7))[0] jidx = in1d(job_array_labels, business_ids[idx_wrk_vacant_res]) job_assignment_case[jidx] = 15 processed_bindicator[idx_wrk_vacant_res] = True logger.log_status("15. %s jobs (%s businesses of 3+ workers) could not be placed due to non-existing buildings in parcels with residential LU type." % ( business_sizes[idx_wrk_vacant_res].sum(), idx_wrk_vacant_res.size)) # 16. nonresidential parcel with no building idx_wrk_vacant_nonres = where(logical_and(processed_bindicator==0, business_codes == 8))[0] jidx = in1d(job_array_labels, business_ids[idx_wrk_vacant_nonres]) job_assignment_case[jidx] = 16 processed_bindicator[idx_wrk_vacant_nonres] = True logger.log_status("16. %s jobs (%s businesses) could not be placed due to non-existing buildings in parcels with non-esidential LU type." % ( business_sizes[idx_wrk_vacant_nonres].sum(), idx_wrk_vacant_nonres.size)) # 17. 31+ workers in single residential building. Do not place - will go into ELCM. idx_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 30), business_codes == 1))[0] jidx = in1d(job_array_labels, business_ids[idx_wrk_fit]) job_assignment_case[jidx] = 17 processed_bindicator[idx_wrk_fit] = True logger.log_status("17. %s jobs (%s businesses) in 31+ workers x single residential building." % ( business_sizes[idx_wrk_fit].sum(), idx_wrk_fit.size)) # 18. 31+ workers in multiple residential buildings. idx_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 30), business_codes == 2))[0] jidx = in1d(job_array_labels, business_ids[idx_wrk_fit]) job_assignment_case[jidx] = 18 processed_bindicator[idx_wrk_fit] = True logger.log_status("18. %s jobs (%s businesses) in 31+ workers x multiple residential building." % ( business_sizes[idx_wrk_fit].sum(), idx_wrk_fit.size)) # jobs in messy buildings idx_messy_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 0), business_codes == 0))[0] processed_bindicator[idx_messy_fit] = True logger.log_status("%s jobs (%s businesses) could not be placed due to messy buildings." % ( business_sizes[idx_messy_fit].sum(), idx_messy_fit.size)) # build new buildings for jobs in cases 7, 8, 15 and 16 jidx_no_bld = where(in1d(job_assignment_case, [7,8,15,16]))[0] bus = unique(job_array_labels[jidx_no_bld]) bsidx = businesses.get_id_index(bus) # first create buildings for single workplaces per parcel single_workplace_idx = where(business_nworkplaces[bsidx] == 1)[0] newbld_parcel_id = businesses['parcel_id'][bsidx][single_workplace_idx] newbld_bt = sector2building_type(businesses['sector_id'][bsidx][single_workplace_idx]) newbids = arange(buildings.get_id_attribute().max()+1, buildings.get_id_attribute().max()+single_workplace_idx.size+1) bbldid = zeros(bsidx.size, dtype='int32') bbldid[single_workplace_idx] = newbids # for parcels with multiple workplaces select the largest business to determine its building type mult_bsidx = bsidx[where(business_nworkplaces[bsidx] > 1)[0]] empty_parcels = businesses['parcel_id'][mult_bsidx] uempty_parcels = unique(empty_parcels) bsize_on_empty_pcl = ndmax(business_sizes[mult_bsidx], labels=empty_parcels, index=uempty_parcels) newbld2_sec = zeros(uempty_parcels.size, dtype='int32') newbids2 = arange(newbids.max()+1, newbids.max()+uempty_parcels.size+1) for ipcl in range(uempty_parcels.size): newbld2_sec[ipcl] = businesses['sector_id'][mult_bsidx][logical_and(businesses['parcel_id'][mult_bsidx] == uempty_parcels[ipcl], business_sizes[mult_bsidx]==bsize_on_empty_pcl[ipcl])][0] this_bidx = where(businesses['parcel_id'][bsidx] == uempty_parcels[ipcl]) bbldid[this_bidx] = newbids2[ipcl] newbld_parcel_id = concatenate((newbld_parcel_id, uempty_parcels)) newbld_bt = concatenate((newbld_bt, sector2building_type(newbld2_sec))) newbldgs = {'building_id': concatenate((newbids, newbids2)), 'parcel_id': newbld_parcel_id, 'building_type_id': newbld_bt, } buildings.add_elements(newbldgs, require_all_attributes=False) jidx = where(in1d(job_array_labels, business_ids[bsidx]))[0] job_building_id[jidx] = bbldid.repeat(business_sizes[bsidx]) logger.log_status("Build %s new buildings to accommodate %s jobs (out of which %s are governmental) from cases 7, 15, 16." % ( newbld_parcel_id.size, jidx.size, business_sizes[bsidx][where(in1d(businesses['sector_id'][bsidx], [18,19]))].sum())) logger.log_status("Assigned %s (%s percent) home-based jobs." % (home_based.sum(), round(home_based.sum()/(home_based.size/100.),2))) logger.log_status("Finished %s percent (%s) jobs (%s businesses) processed. %s jobs (%s businesses) remain to be processed." % \ (round(business_sizes[processed_bindicator].sum()/(home_based.size/100.),2), business_sizes[processed_bindicator].sum(), processed_bindicator.sum(), business_sizes[logical_not(processed_bindicator)].sum(), business_sizes[logical_not(processed_bindicator)].size)) logger.start_block("Storing jobs data.") # create job dataset job_data = {"job_id": (arange(job_building_id.size)+1).astype("int32"), "home_based_status" : home_based, "building_id": job_building_id, "business_id": job_array_labels.astype("int32"), "sector_id": businesses['sector_id'].repeat(business_sizes).astype("int32"), "parcel_id": businesses['parcel_id'].repeat(business_sizes).astype("int32"), "assignment_case": job_assignment_case} # join with zones if zone_dsname is not None: zones = dataset_pool.get_dataset(zone_dsname) idname = zones.get_id_name()[0] #jpcls = buildings.get_attribute_by_id('parcel_id', job_building_id) job_data[idname] = parcels.get_attribute_by_id(idname, job_data["parcel_id"]) dictstorage = StorageFactory().get_storage('dict_storage') dictstorage.write_table(table_name="jobs", table_data=job_data) jobs = Dataset(in_storage=dictstorage, in_table_name="jobs", dataset_name="job", id_name="job_id") if out_storage is not None: jobs.write_dataset(out_storage=out_storage, out_table_name="jobs") buildings.write_dataset(out_storage=out_storage, attributes=AttributeType.PRIMARY) logger.end_block() return jobs
class DevelopmentProjectTransitionModel(Model): """ Creates development projects. Each development project is for a single type of development, e.g. 'industrial' or 'commercial'. This model creates enough development projects to match the desired vacancy rates, as defined in the target_vacancies table. It does not place any projects in locations; that is the job of the development project location choice models. The distribution of project sizes (amount of space, value of space) is determined by sampling from the projects in the development_event_history table. """ model_name = "Development Project Transition Model" def __init__(self, debuglevel=0): self.debug = DebugPrinter(debuglevel) def pre_check(self, location_set, vacancy_table, types): for ptype in types: self.check_for_space( location_set.get_attribute( self.variable_for_total_units[ptype])) self.check_target_vacancy_is_not_100_percent( vacancy_table.get_attribute("target_total_vacancy")) def check_for_space(self, values): """Check that this array of values sums to something > 0.""" self.do_check("x > 0", array([values.sum()])) def check_target_vacancy_is_not_100_percent(self, value): """Check that the target vacancy rate is not 100% (ratio == 1), because it doesn't make sense, and it also causes a divide by 0 error.""" self.do_check("x < 1", value) def run(self, vacancy_table, history_table, year, location_set, dataset_pool=None, resources=None): self.dataset_pool = dataset_pool building_types = self.dataset_pool.get_dataset('building_type') target_vacancy_this_year = DatasetSubset( vacancy_table, index=where(vacancy_table.get_attribute("year") == year)[0]) building_type_ids = target_vacancy_this_year.get_attribute( 'building_type_id') building_type_idx = building_types.get_id_index(building_type_ids) self.used_building_types = DatasetSubset(building_types, index=building_type_idx) project_types = self.used_building_types.get_attribute( 'building_type_name') is_residential = self.used_building_types.get_attribute( 'is_residential') unit_names = where(is_residential, 'residential_units', 'non_residential_sqft') specific_unit_names = where(is_residential, 'residential_units', '_sqft') rates = target_vacancy_this_year.get_attribute('target_total_vacancy') self.project_units = {} self.project_specific_units = {} target_rates = {} for i in range(self.used_building_types.size()): self.project_units[project_types[i]] = unit_names[i] if is_residential[i]: self.project_specific_units[ project_types[i]] = specific_unit_names[i] else: self.project_specific_units[project_types[i]] = "%s%s" % ( project_types[i], specific_unit_names[i]) target_rates[building_type_ids[i]] = rates[i] self._compute_vacancy_and_total_units_variables( location_set, project_types, resources) self.pre_check(location_set, target_vacancy_this_year, project_types) projects = None for project_type_id, target_vacancy_rate in target_rates.iteritems(): # determine current-year vacancy rates project_type = building_types.get_attribute_by_id( 'building_type_name', project_type_id) vacant_units_sum = location_set.get_attribute( self.variable_for_vacancy[project_type]).sum() units_sum = float( location_set.get_attribute( self.variable_for_total_units[project_type]).sum()) should_develop_units = int( round( max(0, (target_vacancy_rate * units_sum - vacant_units_sum) / (1 - target_vacancy_rate)))) logger.log_status( project_type + ": vacant units: %d, should be vacant: %f, sum units: %d" % (vacant_units_sum, target_vacancy_rate * units_sum, units_sum)) if not should_develop_units: logger.log_note(( "Will not build any " + project_type + " units, because the current vacancy of %d units\n" + "is more than the %d units desired for the vacancy rate of %f." ) % (vacant_units_sum, target_vacancy_rate * units_sum, target_vacancy_rate)) #create projects if should_develop_units > 0: this_project = self._create_projects( should_develop_units, project_type, project_type_id, history_table, location_set, units_sum, resources) if projects is None: projects = this_project else: projects.join_by_rows(this_project, change_ids_if_not_unique=True) return projects def _compute_vacancy_and_total_units_variables(self, location_set, project_types, resources=None): compute_resources = Resources(resources) compute_resources.merge({"debug": self.debug}) self.variable_for_vacancy = {} self.variable_for_total_units = {} for ptype in project_types: self.variable_for_vacancy[ptype] = compute_resources.get( "%s_vacant_variable" % ptype, "urbansim_zone.%s.vacant_%s" % (location_set.get_dataset_name(), self.project_specific_units[ptype])) self.variable_for_total_units[ptype] = compute_resources.get( "%s_total_units_variable" % ptype, "%s.aggregate(urbansim_zone.building.total_%s)" % (location_set.get_dataset_name(), self.project_specific_units[ptype])) location_set.compute_variables([ self.variable_for_vacancy[ptype], self.variable_for_total_units[ptype] ], dataset_pool=self.dataset_pool, resources=compute_resources) def _create_projects(self, should_develop_units, project_type, project_type_id, history_table, location_set, units_sum, resources=None): history_values = history_table.get_attribute( self.project_units[project_type]) type_code_values = history_table.get_change_type_code_attribute( self.project_units[project_type]) # take only non-zero history values and those that don't represent demolished buildings history_values_without_zeros = history_values[logical_and( history_values > 0, type_code_values != DevelopmentEventTypeOfChange.DELETE)] mean_size = history_values_without_zeros.mean() idx = array([], dtype="int32") # Ensure that there are some development projects to choose from. num_of_projects_to_select = max( 10, round_(should_develop_units / mean_size)) while True: idx = concatenate((idx, randint(0, history_values_without_zeros.size, num_of_projects_to_select))) csum = history_values_without_zeros[idx].cumsum() idx1 = idx[csum <= should_develop_units] if idx1.size == 0: # at least one project should be selected idx = array([idx[0]], dtype="int32") else: idx = idx1 if csum[-1] >= should_develop_units: break data = { "residential_units": zeros((idx.size, ), dtype=int32), "non_residential_sqft": zeros((idx.size, ), dtype=int32), 'building_type_id': array(idx.size * [project_type_id]), "project_id": arange(idx.size) + 1, "building_id": zeros((idx.size, ), dtype=int32) } data[self. project_units[project_type]] = history_values_without_zeros[idx] storage = StorageFactory().get_storage('dict_storage') development_projects_table_name = 'development_projects' storage.write_table(table_name=development_projects_table_name, table_data=data) return Dataset(in_storage=storage, in_table_name=development_projects_table_name, id_name='project_id')
def run(self, realestate_dataset, year=None, occupied_spaces_variable="occupied_units", total_spaces_variable="total_units", target_attribute_name='target_vacancy_rate', sample_from_dataset = None, sample_filter="", reset_attribute_value={}, year_built = 'year_built', dataset_pool=None, append_to_realestate_dataset = False, table_name = "development_projects", dataset_name = "development_project", id_name = [], **kwargs): """ sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning append_to_realestate_dataset - whether to append the new dataset to realestate_dataset """ if self.target_vancy_dataset is None: raise RuntimeError, "target_vacancy_rate dataset is unspecified." if not sample_from_dataset: sample_from_dataset = realestate_dataset #if dataset_pool is None: # dataset_pool = SessionConfiguration().get_dataset_pool() if year is None: year = SimulationState().get_current_time() this_year_index = where(self.target_vancy_dataset.get_attribute('year')==year)[0] target_vacancy_for_this_year = DatasetSubset(self.target_vancy_dataset, this_year_index) column_names = list(set( self.target_vancy_dataset.get_known_attribute_names() ) - set( [ target_attribute_name, occupied_spaces_variable, total_spaces_variable, 'year', '_hidden_id_'] )) column_names.sort(reverse=True) column_values = dict([ (name, target_vacancy_for_this_year.get_attribute(name)) for name in column_names + [target_attribute_name]]) independent_variables = list(set([re.sub('_max$', '', re.sub('_min$', '', col)) for col in column_names])) dataset_known_attributes = realestate_dataset.get_known_attribute_names() for variable in independent_variables: if variable not in dataset_known_attributes: realestate_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool) sample_from_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool) dataset_known_attributes = realestate_dataset.get_known_attribute_names() #update after compute if sample_filter: short_name = VariableName(sample_filter).get_alias() if short_name not in dataset_known_attributes: filter_indicator = sample_from_dataset.compute_variables(sample_filter, dataset_pool=dataset_pool) else: filter_indicator = sample_from_dataset.get_attribute(short_name) else: filter_indicator = 1 sampled_index = array([], dtype=int32) #log header if PrettyTable is not None: status_log = PrettyTable() status_log.set_field_names(column_names + ["actual", "target", "difference", "action"]) else: logger.log_status("\t".join(column_names + ["actual", "target", "difference", "action"])) error_log = '' for index in range(target_vacancy_for_this_year.size()): this_sampled_index = array([], dtype=int32) indicator = ones( realestate_dataset.size(), dtype='bool' ) sample_indicator = ones( sample_from_dataset.size(), dtype='bool' ) criterion = {} # for logging for attribute in independent_variables: if attribute in dataset_known_attributes: dataset_attribute = realestate_dataset.get_attribute(attribute) sample_attribute = sample_from_dataset.get_attribute(attribute) else: raise ValueError, "attribute %s used in target vacancy dataset can not be found in dataset %s" % (attribute, realestate_dataset.get_dataset_name()) if attribute + '_min' in column_names: amin = target_vacancy_for_this_year.get_attribute(attribute+'_min')[index] criterion.update({attribute + '_min':amin}) if amin != -1: indicator *= dataset_attribute >= amin sample_indicator *= sample_attribute >= amin if attribute + '_max' in column_names: amax = target_vacancy_for_this_year.get_attribute(attribute+'_max')[index] criterion.update({attribute + '_max':amax}) if amax != -1: indicator *= dataset_attribute <= amax sample_indicator *= sample_attribute <= amax if attribute in column_names: aval = column_values[attribute][index] criterion.update({attribute:aval}) if aval == -1: continue elif aval == -2: ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column indicator *= logical_not(ismember(dataset_attribute, column_values[attribute])) sample_indicator *= logical_not(ismember(sample_attribute, column_values[attribute])) else: indicator *= dataset_attribute == aval sample_indicator *= sample_attribute == aval this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable ## total/occupied_spaces_variable can be specified either as a universal name for all realestate ## or in targe_vacancy_rate dataset for each vacancy category if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(): this_occupied_spaces_variable = target_vacancy_for_this_year.get_attribute(occupied_spaces_variable)[index] if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(): this_total_spaces_variable = target_vacancy_for_this_year.get_attribute(total_spaces_variable)[index] logger.be_quiet() #temporarily disable logging realestate_dataset.compute_one_variable_with_unknown_package(this_occupied_spaces_variable, dataset_pool=dataset_pool) realestate_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool) sample_from_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool) logger.talk() actual_num = (indicator * realestate_dataset.get_attribute(this_total_spaces_variable)).sum() target_num = int(round( (indicator * realestate_dataset.get_attribute(this_occupied_spaces_variable)).sum() /\ (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]) )) diff = target_num - actual_num if diff > 0: total_spaces_in_sample_dataset = sample_from_dataset.get_attribute(this_total_spaces_variable) legit_index = where(logical_and(sample_indicator, filter_indicator) * total_spaces_in_sample_dataset > 0)[0] if legit_index.size > 0: mean_size = total_spaces_in_sample_dataset[legit_index].mean() num_of_projects_to_sample = int( diff / mean_size ) while total_spaces_in_sample_dataset[this_sampled_index].sum() < diff: lucky_index = sample_replace(legit_index, num_of_projects_to_sample) this_sampled_index = concatenate((this_sampled_index, lucky_index)) this_sampled_index = this_sampled_index[0:(1+searchsorted(cumsum(total_spaces_in_sample_dataset[this_sampled_index]), diff))] sampled_index = concatenate((sampled_index, this_sampled_index)) else: error_log += "There is nothing to sample from %s and no new development will happen for " % sample_from_dataset.get_dataset_name() + \ ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n' #if diff < 0: #TODO demolition; not yet supported ##log status action = "0" if this_sampled_index.size > 0: action_num = total_spaces_in_sample_dataset[this_sampled_index].sum() if diff > 0: action = "+" + str(action_num) if diff < 0: action = "-" + str(action_num) cat = [ str(criterion[col]) for col in column_names] cat += [str(actual_num), str(target_num), str(diff), action] if PrettyTable is not None: status_log.add_row(cat) else: logger.log_status("\t".join(cat)) if PrettyTable is not None: logger.log_status("\n" + status_log.get_string()) if error_log: logger.log_error(error_log) result_data = {} result_dataset = None index = array([], dtype='int32') if sampled_index.size > 0: ### ideally duplicate_rows() is all needed to add newly cloned rows ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data ##realestate_dataset.duplicate_rows(sampled_index) result_data.setdefault(year_built, resize(year, sampled_index.size).astype('int32')) for attribute in sample_from_dataset.get_primary_attribute_names(): if reset_attribute_value.has_key(attribute): result_data[attribute] = resize(array(reset_attribute_value[attribute]), sampled_index.size) else: result_data[attribute] = sample_from_dataset.get_attribute_by_index(attribute, sampled_index) storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name=table_name, table_data=result_data) result_dataset = Dataset(id_name = id_name, in_storage = storage, in_table_name = table_name, dataset_name = dataset_name ) index = arange(result_dataset.size()) if append_to_realestate_dataset: if len(result_data) > 0: index = realestate_dataset.add_elements(result_data, require_all_attributes=False, change_ids_if_not_unique=True) result_dataset = realestate_dataset return (result_dataset, index)
def run(self, n=500, run_config=None, current_year=None, debuglevel=0): """ n - sample n proposals at a time, evaluate them one by one """ self.demolished_buildings = array( [], dtype='int32') #id of buildings to be demolished if current_year is None: current_year = SimulationState().get_current_time() if not self.positive_proposals: logger.log_status( "Proposal Set size <= 0, no proposals to consider, skipping DPPSM." ) return (self.proposal_set, self.demolished_buildings) self.proposal_component_set.compute_variables([ 'urbansim_parcel.development_project_proposal_component.units_proposed', 'urbansim_parcel.development_project_proposal_component.is_residential' ], dataset_pool=self. dataset_pool) self.proposal_set.compute_variables( [ 'urbansim_parcel.development_project_proposal.number_of_components', 'zone_id=development_project_proposal.disaggregate(parcel.zone_id)', #'occurence_frequency = development_project_proposal.disaggregate(development_template.sample_size)' ], dataset_pool=self.dataset_pool) buildings = self.dataset_pool.get_dataset("building") buildings.compute_variables( [ "occupied_units_for_jobs = urbansim_parcel.building.number_of_non_home_based_jobs", "units_for_jobs = urbansim_parcel.building.total_non_home_based_job_space", "occupied_residential_units = urbansim_parcel.building.number_of_households", # "urbansim_parcel.building.existing_units", "urbansim_parcel.building.is_residential" ], dataset_pool=self.dataset_pool) ## define unit_name by whether a building is residential or not (with is_residential attribute) ## if it is non-residential (0), count units by number of job spaces (units_for_jobs) ## if it is residential (1), count units by residenital units self.unit_name = array(["units_for_jobs", "residential_units"]) target_vacancy = self.dataset_pool.get_dataset('target_vacancy') target_vacancy.compute_variables([ 'is_residential = target_vacancy.disaggregate(building_type.is_residential)' ], dataset_pool=self.dataset_pool) # This try-except block checks to see if the object has a subarea_id_name, # if it does, it calculates the vacancy rates by subarea_id_name try: # Check for subarea_id_name in target_vacancies dataset # if it is present, vacancy rates are specified by subarea_id_name # if it is not, vacancy rates are specified region wide target_vacancy.load_dataset() if self.subarea_id_name in target_vacancy.get_attribute_names(): current_target_vacancy_this_year = DatasetSubset( target_vacancy, index=where( target_vacancy.get_attribute("year") == current_year)[0]) current_target_vacancy = DatasetSubset( current_target_vacancy_this_year, index=where( current_target_vacancy_this_year.get_attribute( self.subarea_id_name) == self.area_id)[0]) else: current_target_vacancy = DatasetSubset( target_vacancy, index=where( target_vacancy.get_attribute("year") == current_year)[0]) except AttributeError: # vacancy rates are specified region wide: current_target_vacancy = DatasetSubset( target_vacancy, index=where( target_vacancy.get_attribute("year") == current_year)[0]) if current_target_vacancy.size() == 0: raise IOError, 'No target vacancy defined for year %s.' % current_year self.existing_units = {} #total existing units by land_use type self.occupied_units = {} #total occupied units by land_use type self.proposed_units = {} #total proposed units by land_use type self.demolished_units = { } #total (to be) demolished units by land_use type components_building_type_ids = self.proposal_component_set.get_attribute( "building_type_id").astype("int32") proposal_ids = self.proposal_set.get_id_attribute() proposal_ids_in_component_set = self.proposal_component_set.get_attribute( "proposal_id") all_units_proposed = self.proposal_component_set.get_attribute( "units_proposed") number_of_components_in_proposals = self.proposal_set.get_attribute( "number_of_components") self.accepting_proposals = zeros( current_target_vacancy.get_attribute("building_type_id").max() + 1, dtype='bool8' ) #whether accepting new proposals, for each building type self.accepted_proposals = [] # index of accepted proposals self.target_vacancies = {} tv_building_types = current_target_vacancy.get_attribute( "building_type_id") tv_rate = current_target_vacancy.get_attribute("target_vacancy_rate") for itype in range(tv_building_types.size): self.target_vacancies[tv_building_types[itype]] = tv_rate[itype] self.check_vacancy_rates( current_target_vacancy ) #initialize self.accepting_proposal based on current vacancy rate sqft_per_job = self.dataset_pool.get_dataset("building_sqft_per_job") zones_of_proposals = self.proposal_set.get_attribute("zone_id") self.building_sqft_per_job_table = sqft_per_job.get_building_sqft_as_table( zones_of_proposals.max(), tv_building_types.max()) # consider only those proposals that have all components of accepted type and sum of proposed units > 0 is_accepted_type = self.accepting_proposals[ components_building_type_ids] sum_is_accepted_type_over_proposals = array( ndimage.sum(is_accepted_type, labels=proposal_ids_in_component_set, index=proposal_ids)) sum_of_units_proposed = array( ndimage.sum(all_units_proposed, labels=proposal_ids_in_component_set, index=proposal_ids)) is_proposal_eligible = logical_and( sum_is_accepted_type_over_proposals == number_of_components_in_proposals, sum_of_units_proposed > 0) is_proposal_eligible = logical_and( is_proposal_eligible, self.proposal_set.get_attribute("start_year") == current_year) ## handle planned proposals: all proposals with status_id == is_planned ## and start_year == current_year are accepted planned_proposal_indexes = where( logical_and( self.proposal_set.get_attribute( "status_id") == self.proposal_set.id_planned, self.proposal_set.get_attribute("start_year") == current_year))[0] self.consider_proposals(planned_proposal_indexes, force_accepting=True) # consider proposals (in this order: planned, proposed, tentative) for status in [ self.proposal_set.id_proposed, self.proposal_set.id_tentative ]: idx = where( logical_and( self.proposal_set.get_attribute("status_id") == status, is_proposal_eligible))[0] if idx.size <= 0: continue logger.log_status( "Sampling from %s eligible proposals with status %s." % (idx.size, status)) while (True in self.accepting_proposals): if self.weight[idx].sum() == 0.0: logger.log_warning( "Running out of proposals; there aren't any proposals with non-zero weight" ) break idx = idx[self.weight[idx] > 0] n = minimum(idx.size, n) sampled_proposal_indexes = probsample_noreplace( proposal_ids[idx], n, prob_array=(self.weight[idx] / float(self.weight[idx].sum())), exclude_index=None, return_index=True) self.consider_proposals( arange(self.proposal_set.size())[ idx[sampled_proposal_indexes]]) self.weight[idx[sampled_proposal_indexes]] = 0 # set status of accepted proposals to 'active' self.proposal_set.modify_attribute(name="status_id", data=self.proposal_set.id_active, index=array(self.accepted_proposals, dtype='int32')) building_types = self.dataset_pool.get_dataset("building_type") logger.log_status("Status of %s development proposals set to active." % len(self.accepted_proposals)) logger.log_status( "Target/existing vacancy rates (reached using eligible proposals) by building type:" ) for type_id in self.existing_units.keys(): units_stock = self._get_units_stock(type_id) vr = self._get_vacancy_rates(type_id) ## units = residential_units if building_type is residential ## units = number of job spaces if building_type is non-residential logger.log_status( """%(type_id)s[%(type_name)s]: %(vr)s = ((existing_units:%(existing_units)s + units_proposed:%(units_proposed)s - units_to_be_demolished:%(units_demolished)s) - units_occupied:%(units_occupied)s) / units_stock:%(units_stock)s""" % \ { 'type_id': type_id, 'type_name': building_types.get_attribute_by_id("building_type_name", type_id), 'vr': vr, 'existing_units': int(self.existing_units[type_id]), 'units_occupied': int(self.occupied_units[type_id]), 'units_proposed': int(self.proposed_units[type_id]), 'units_demolished': int(self.demolished_units[type_id]), 'units_stock': int(units_stock) } ) # Code added by Jesse Ayers, MAG, 7/20/2009 # Get the active projects: stat_id = self.proposal_set.get_attribute('status_id') actv = where(stat_id == 1)[0] # Where there are active projects, compute the total_land_area_taken # and store it on the development_project_proposals dataset # so it can be used by the building_construction_model for the proper # computation of units_proposed for those projects with velocity curves if actv.size > 0: total_land_area_taken_computed = self.proposal_set.get_attribute( 'urbansim_parcel.development_project_proposal.land_area_taken') self.proposal_set.modify_attribute( 'total_land_area_taken', total_land_area_taken_computed[actv], actv) return (self.proposal_set, self.demolished_buildings)
def run(self, n=500, run_config=None, current_year=None, debuglevel=0): """ n - sample n proposals at a time, evaluate them one by one """ self.demolished_buildings = array([], dtype='int32') #id of buildings to be demolished if current_year is None: current_year = SimulationState().get_current_time() if not self.positive_proposals: logger.log_status("Proposal Set size <= 0, no proposals to consider, skipping DPPSM.") return (self.proposal_set, self.demolished_buildings) self.proposal_component_set.compute_variables([ 'urbansim_parcel.development_project_proposal_component.units_proposed', 'urbansim_parcel.development_project_proposal_component.is_residential'], dataset_pool=self.dataset_pool) self.proposal_set.compute_variables([ 'urbansim_parcel.development_project_proposal.number_of_components', 'zone_id=development_project_proposal.disaggregate(parcel.zone_id)', #'occurence_frequency = development_project_proposal.disaggregate(development_template.sample_size)' ], dataset_pool=self.dataset_pool) buildings = self.dataset_pool.get_dataset("building") buildings.compute_variables([ "occupied_units_for_jobs = urbansim_parcel.building.number_of_non_home_based_jobs", "units_for_jobs = urbansim_parcel.building.total_non_home_based_job_space", "occupied_residential_units = urbansim_parcel.building.number_of_households", # "urbansim_parcel.building.existing_units", "urbansim_parcel.building.is_residential" ], dataset_pool=self.dataset_pool) ## define unit_name by whether a building is residential or not (with is_residential attribute) ## if it is non-residential (0), count units by number of job spaces (units_for_jobs) ## if it is residential (1), count units by residenital units self.unit_name = array(["units_for_jobs", "residential_units"]) target_vacancy = self.dataset_pool.get_dataset('target_vacancy') target_vacancy.compute_variables(['is_residential = target_vacancy.disaggregate(building_type.is_residential)'], dataset_pool=self.dataset_pool) # This try-except block checks to see if the object has a subarea_id_name, # if it does, it calculates the vacancy rates by subarea_id_name try: # Check for subarea_id_name in target_vacancies dataset # if it is present, vacancy rates are specified by subarea_id_name # if it is not, vacancy rates are specified region wide target_vacancy.load_dataset() if self.subarea_id_name in target_vacancy.get_attribute_names(): current_target_vacancy_this_year = DatasetSubset(target_vacancy, index=where(target_vacancy.get_attribute("year")==current_year)[0]) current_target_vacancy = DatasetSubset(current_target_vacancy_this_year, index=where(current_target_vacancy_this_year.get_attribute(self.subarea_id_name)==self.area_id)[0]) else: current_target_vacancy = DatasetSubset(target_vacancy, index=where(target_vacancy.get_attribute("year")==current_year)[0]) except AttributeError: # vacancy rates are specified region wide: current_target_vacancy = DatasetSubset(target_vacancy, index=where(target_vacancy.get_attribute("year")==current_year)[0]) if current_target_vacancy.size() == 0: raise IOError, 'No target vacancy defined for year %s.' % current_year self.existing_units = {} #total existing units by land_use type self.occupied_units = {} #total occupied units by land_use type self.proposed_units = {} #total proposed units by land_use type self.demolished_units = {} #total (to be) demolished units by land_use type components_building_type_ids = self.proposal_component_set.get_attribute("building_type_id").astype("int32") proposal_ids = self.proposal_set.get_id_attribute() proposal_ids_in_component_set = self.proposal_component_set.get_attribute("proposal_id") all_units_proposed = self.proposal_component_set.get_attribute("units_proposed") number_of_components_in_proposals = self.proposal_set.get_attribute("number_of_components") self.accepting_proposals = zeros(current_target_vacancy.get_attribute("building_type_id").max()+1, dtype='bool8') #whether accepting new proposals, for each building type self.accepted_proposals = [] # index of accepted proposals self.target_vacancies = {} tv_building_types = current_target_vacancy.get_attribute("building_type_id") tv_rate = current_target_vacancy.get_attribute("target_vacancy_rate") for itype in range(tv_building_types.size): self.target_vacancies[tv_building_types[itype]] = tv_rate[itype] self.check_vacancy_rates(current_target_vacancy) #initialize self.accepting_proposal based on current vacancy rate sqft_per_job = self.dataset_pool.get_dataset("building_sqft_per_job") zones_of_proposals = self.proposal_set.get_attribute("zone_id") self.building_sqft_per_job_table = sqft_per_job.get_building_sqft_as_table(zones_of_proposals.max(), tv_building_types.max()) # consider only those proposals that have all components of accepted type and sum of proposed units > 0 is_accepted_type = self.accepting_proposals[components_building_type_ids] sum_is_accepted_type_over_proposals = array(ndimage.sum(is_accepted_type, labels = proposal_ids_in_component_set, index = proposal_ids)) sum_of_units_proposed = array(ndimage.sum(all_units_proposed, labels = proposal_ids_in_component_set, index = proposal_ids)) is_proposal_eligible = logical_and(sum_is_accepted_type_over_proposals == number_of_components_in_proposals, sum_of_units_proposed > 0) is_proposal_eligible = logical_and(is_proposal_eligible, self.proposal_set.get_attribute("start_year")==current_year ) ## handle planned proposals: all proposals with status_id == is_planned ## and start_year == current_year are accepted planned_proposal_indexes = where(logical_and( self.proposal_set.get_attribute("status_id") == self.proposal_set.id_planned, self.proposal_set.get_attribute("start_year") == current_year ) )[0] self.consider_proposals(planned_proposal_indexes, force_accepting=True) # consider proposals (in this order: planned, proposed, tentative) for status in [self.proposal_set.id_proposed, self.proposal_set.id_tentative]: idx = where(logical_and(self.proposal_set.get_attribute("status_id") == status, is_proposal_eligible))[0] if idx.size <= 0: continue logger.log_status("Sampling from %s eligible proposals with status %s." % (idx.size, status)) while (True in self.accepting_proposals): if self.weight[idx].sum() == 0.0: logger.log_warning("Running out of proposals; there aren't any proposals with non-zero weight") break idx = idx[self.weight[idx] > 0] n = minimum(idx.size, n) sampled_proposal_indexes = probsample_noreplace(proposal_ids[idx], n, prob_array=(self.weight[idx]/float(self.weight[idx].sum())), exclude_index=None, return_index=True) self.consider_proposals(arange(self.proposal_set.size())[idx[sampled_proposal_indexes]]) self.weight[idx[sampled_proposal_indexes]] = 0 # set status of accepted proposals to 'active' self.proposal_set.modify_attribute(name="status_id", data=self.proposal_set.id_active, index=array(self.accepted_proposals, dtype='int32')) building_types = self.dataset_pool.get_dataset("building_type") logger.log_status("Status of %s development proposals set to active." % len(self.accepted_proposals)) logger.log_status("Target/existing vacancy rates (reached using eligible proposals) by building type:") for type_id in self.existing_units.keys(): units_stock = self._get_units_stock(type_id) vr = self._get_vacancy_rates(type_id) ## units = residential_units if building_type is residential ## units = number of job spaces if building_type is non-residential logger.log_status( """%(type_id)s[%(type_name)s]: %(vr)s = ((existing_units:%(existing_units)s + units_proposed:%(units_proposed)s - units_to_be_demolished:%(units_demolished)s) - units_occupied:%(units_occupied)s) / units_stock:%(units_stock)s""" % \ { 'type_id': type_id, 'type_name': building_types.get_attribute_by_id("building_type_name", type_id), 'vr': vr, 'existing_units': int(self.existing_units[type_id]), 'units_occupied': int(self.occupied_units[type_id]), 'units_proposed': int(self.proposed_units[type_id]), 'units_demolished': int(self.demolished_units[type_id]), 'units_stock': int(units_stock) } ) # Code added by Jesse Ayers, MAG, 7/20/2009 # Get the active projects: stat_id = self.proposal_set.get_attribute('status_id') actv = where(stat_id==1)[0] # Where there are active projects, compute the total_land_area_taken # and store it on the development_project_proposals dataset # so it can be used by the building_construction_model for the proper # computation of units_proposed for those projects with velocity curves if actv.size > 0: total_land_area_taken_computed = self.proposal_set.get_attribute('urbansim_parcel.development_project_proposal.land_area_taken') self.proposal_set.modify_attribute('total_land_area_taken', total_land_area_taken_computed[actv], actv) return (self.proposal_set, self.demolished_buildings)
def run(self, dataset, outcome_attribute, weight_attribute, control_totals, current_year, control_total_attribute=None, year_attribute='year', capacity_attribute=None, add_quantity=False, dataset_pool=None): """'dataset' is a Dataset for which a quantity 'outcome_attribute' is created. The total amount of the quantity is given by the attribute 'control_total_attribute' of the 'control_totals' Dataset. If it is not given, it is assumed to have the same name as 'outcome_attribute'. The 'weight_attribute' of 'dataset' determines the allocation weights. The 'control_totals' Dataset contains an attribute 'year' (or alternatively, an attribute given by the 'year_attribute' argument) and optionally other attributes that must be known to the 'dataset' (such as a geography). For each row of the control_totals dataset for which year matches the 'current_year', the total amount is distributed among the corresponding members of 'dataset' according to weights. If a 'capacity_attribute' is given (attribute of 'dataset'), the algorithm removes any allocations that exceeds the capacity and redistributes it among remaining members. The resulting values are appended to 'dataset' as 'outcome_attribute' (as primary attribute). If add_quantity is True and the 'outcome_attribute' exists in dataset, the resulting values are added to the current values of 'outcome_attribute'. """ ct_attr = control_totals.get_known_attribute_names() if year_attribute not in ct_attr: raise StandardError, "Year attribute '%s' must be a known attribute of the control totals dataset." % year_attribute ct_attr.remove(year_attribute) if control_total_attribute is None: control_total_attribute = outcome_attribute if control_total_attribute not in ct_attr: raise StandardError, "Attribute '%s' must be a known attribute of the control totals dataset." % control_total_attribute ct_attr.remove(control_total_attribute) if control_totals._is_hidden_id(): ct_attr.remove(control_totals.id_name()[0]) # compute weights and other attributes necessary for allocation attrs_to_compute = [weight_attribute] + ct_attr if capacity_attribute is not None: attrs_to_compute.append(capacity_attribute) for attr in attrs_to_compute: try: dataset.compute_variables(attr, dataset_pool=dataset_pool) except: dataset.compute_one_variable_with_unknown_package(attr, dataset_pool=dataset_pool) # create subset of control totals for the current year year_index = where(control_totals.get_attribute(year_attribute) == current_year)[0] if year_index.size <= 0: logger.log_warning("No control total for year %s" % current_year) return None control_totals_for_this_year = DatasetSubset(control_totals, year_index) # check capacity if capacity_attribute is not None: if dataset.get_attribute(capacity_attribute).sum() < control_totals_for_this_year.get_attribute(control_total_attribute).sum(): logger.log_warning("Capacity (%s) is smaller than the amount to allocate (%s)." % (dataset.get_attribute(capacity_attribute).sum(), control_totals_for_this_year.get_attribute(control_total_attribute).sum())) C = dataset.get_attribute(capacity_attribute).astype('int32') all_weights = dataset.get_attribute(weight_attribute) outcome = zeros(dataset.size(), dtype='int32') for ct_row in range(control_totals_for_this_year.size()): is_considered = ones(dataset.size(), dtype='bool8') for characteristics in ct_attr: is_considered = logical_and(is_considered, dataset.get_attribute(characteristics) == control_totals_for_this_year.get_attribute(characteristics)[ct_row]) T = control_totals_for_this_year.get_attribute(control_total_attribute)[ct_row] it = 1 while True: is_considered_idx = where(is_considered)[0] weights = all_weights[is_considered_idx] weights_sum = float(weights.sum()) outcome[is_considered_idx] = round_(outcome[is_considered_idx] + T * (weights/weights_sum)).astype('int32') if capacity_attribute is None: break diff = outcome[is_considered_idx] - C[is_considered_idx] outcome[is_considered_idx] = clip(outcome[is_considered_idx], 0, C[is_considered_idx]) if it == 1 and C[is_considered_idx].sum() < T: logger.log_warning("Control total %s cannot be met due to a capacity restriction of %s" % (T, C[is_considered_idx].sum())) T = where(diff < 0, 0, diff).sum() if T <= 0: break is_considered = logical_and(is_considered, outcome < C) it += 1 if add_quantity and (outcome_attribute in dataset.get_known_attribute_names()): dataset.modify_attribute(name=outcome_attribute, data=outcome+dataset.get_attribute(outcome_attribute)) logger.log_status('New values added to the attribute %s of dataset %s.' % (outcome_attribute, dataset.get_dataset_name())) else: dataset.add_primary_attribute(name=outcome_attribute, data=outcome) logger.log_status('New values stored into attribute %s of dataset %s.' % (outcome_attribute, dataset.get_dataset_name())) dataset.flush_attribute(outcome_attribute) return outcome
def run(self, realestate_dataset, year=None, occupied_spaces_variable="occupied_units", total_spaces_variable="total_units", target_attribute_name='target_vacancy_rate', sample_from_dataset = None, sample_filter="", reset_attribute_value={}, year_built = 'year_built', dataset_pool=None, append_to_realestate_dataset = False, table_name = "development_projects", dataset_name = "development_project", id_name = 'development_project_id', **kwargs): """ sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning append_to_realestate_dataset - whether to append the new dataset to realestate_dataset """ if self.target_vancy_dataset is None: raise RuntimeError, "target_vacancy_rate dataset is unspecified." if not sample_from_dataset: sample_from_dataset = realestate_dataset #if dataset_pool is None: # dataset_pool = SessionConfiguration().get_dataset_pool() alldata = dataset_pool.get_dataset('alldata') unit_names = dataset_pool.get_dataset('building_type').get_attribute('unit_name') sqft_per_job = dataset_pool.get_dataset('building_sqft_per_job') zones = realestate_dataset.compute_variables("building.disaggregate(parcel.zone_id)") type_ids = realestate_dataset.get_attribute("building_type_id") building_sqft_per_job_table = sqft_per_job.get_building_sqft_as_table(zones.max(), type_ids.max()) if year is None: year = SimulationState().get_current_time() this_year_index = where(self.target_vancy_dataset.get_attribute('year')==year)[0] target_vacancy_for_this_year = DatasetSubset(self.target_vancy_dataset, this_year_index) column_names = list(set( self.target_vancy_dataset.get_known_attribute_names() ) - set( [ target_attribute_name, occupied_spaces_variable, total_spaces_variable, 'year', '_hidden_id_'] )) column_names.sort(reverse=True) column_values = dict([ (name, target_vacancy_for_this_year.get_attribute(name)) for name in column_names + [target_attribute_name]]) independent_variables = list(set([re.sub('_max$', '', re.sub('_min$', '', col)) for col in column_names])) dataset_known_attributes = realestate_dataset.get_known_attribute_names() sample_dataset_known_attributes = sample_from_dataset.get_known_attribute_names() for variable in independent_variables: if variable not in dataset_known_attributes: realestate_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool) if variable not in sample_dataset_known_attributes: sample_from_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool) dataset_known_attributes = realestate_dataset.get_known_attribute_names() #update after compute if sample_filter: short_name = VariableName(sample_filter).get_alias() if short_name not in dataset_known_attributes: filter_indicator = sample_from_dataset.compute_variables(sample_filter, dataset_pool=dataset_pool) else: filter_indicator = sample_from_dataset.get_attribute(short_name) else: filter_indicator = 1 sampled_index = array([], dtype=int32) #log header if PrettyTable is not None: status_log = PrettyTable() status_log.set_field_names(column_names + ["actual", "target", "difference", "action"]) else: logger.log_status("\t".join(column_names + ["actual", "target", "difference", "action"])) error_log = '' for index in range(target_vacancy_for_this_year.size()): this_sampled_index = array([], dtype=int32) indicator = ones( realestate_dataset.size(), dtype='bool' ) sample_indicator = ones( sample_from_dataset.size(), dtype='bool' ) criterion = {} # for logging for attribute in independent_variables: if attribute in dataset_known_attributes: dataset_attribute = realestate_dataset.get_attribute(attribute) sample_attribute = sample_from_dataset.get_attribute(attribute) else: raise ValueError, "attribute %s used in target vacancy dataset can not be found in dataset %s" % (attribute, realestate_dataset.get_dataset_name()) if attribute + '_min' in column_names: amin = target_vacancy_for_this_year.get_attribute(attribute+'_min')[index] criterion.update({attribute + '_min':amin}) if amin != -1: indicator *= dataset_attribute >= amin sample_indicator *= sample_attribute >= amin if attribute + '_max' in column_names: amax = target_vacancy_for_this_year.get_attribute(attribute+'_max')[index] criterion.update({attribute + '_max':amax}) if amax != -1: indicator *= dataset_attribute <= amax sample_indicator *= sample_attribute <= amax if attribute in column_names: aval = column_values[attribute][index] criterion.update({attribute:aval}) if aval == -1: continue elif aval == -2: ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column indicator *= logical_not(ismember(dataset_attribute, column_values[attribute])) sample_indicator *= logical_not(ismember(sample_attribute, column_values[attribute])) else: indicator *= dataset_attribute == aval sample_indicator *= sample_attribute == aval this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable ## total/occupied_spaces_variable can be specified either as a universal name for all realestate ## or in targe_vacancy_rate dataset for each vacancy category if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(): this_occupied_spaces_variable = target_vacancy_for_this_year.get_attribute(occupied_spaces_variable)[index] if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(): this_total_spaces_variable = target_vacancy_for_this_year.get_attribute(total_spaces_variable)[index] logger.be_quiet() #temporarily disable logging realestate_dataset.compute_one_variable_with_unknown_package(this_occupied_spaces_variable, dataset_pool=dataset_pool) realestate_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool) sample_from_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool) if unit_names[index]=="residential_units": num_units = alldata.compute_variables("alldata.aggregate_all(household.building_type_id==%s)" % (index+1)) #persons = household_set.compute_variables("%s.number_of_agents(%s)" % (hh_ds_name, person_ds_name), resources=resources) num_units = num_units[0] else: num_units = alldata.compute_variables("alldata.aggregate_all(job.disaggregate(employment_submarket.building_type_id)==%s)" % (index+1)) num_units = num_units * building_sqft_per_job_table[1, (index+1)] num_units = num_units[0] #need to make sure that job empsubmarket doesn't rely on building... #Must do non-home-based jobs only and then multiply by building_sqft logger.talk() actual_num = (indicator * realestate_dataset.get_attribute(this_total_spaces_variable)).sum() #target_num = int(round( (indicator * realestate_dataset.get_attribute(this_occupied_spaces_variable)).sum() /\ target_num = int(round( num_units /\ (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]) )) diff = target_num - actual_num if diff > 0: total_spaces_in_sample_dataset = sample_from_dataset.get_attribute(this_total_spaces_variable) legit_index = where(logical_and(sample_indicator, filter_indicator) * total_spaces_in_sample_dataset > 0)[0] if legit_index.size > 0: mean_size = total_spaces_in_sample_dataset[legit_index].mean() num_of_projects_to_sample = int( diff / mean_size ) ##sampled at least 1 project when diff > 0, otherwise it is a endless loop when num_of_projects_to_sample = 0 num_of_projects_to_sample = num_of_projects_to_sample if num_of_projects_to_sample > 0 else 1 while total_spaces_in_sample_dataset[this_sampled_index].sum() < diff: lucky_index = sample_replace(legit_index, num_of_projects_to_sample) this_sampled_index = concatenate((this_sampled_index, lucky_index)) this_sampled_index = this_sampled_index[0:(1+searchsorted(cumsum(total_spaces_in_sample_dataset[this_sampled_index]), diff))] sampled_index = concatenate((sampled_index, this_sampled_index)) else: error_log += "There is nothing to sample from %s and no new development will happen for " % sample_from_dataset.get_dataset_name() + \ ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n' #if diff < 0: #TODO demolition; not yet supported ##log status action = "0" if this_sampled_index.size > 0: action_num = total_spaces_in_sample_dataset[this_sampled_index].sum() if diff > 0: action = "+" + str(action_num) if diff < 0: action = "-" + str(action_num) cat = [ str(criterion[col]) for col in column_names] cat += [str(actual_num), str(target_num), str(diff), action] if PrettyTable is not None: status_log.add_row(cat) else: logger.log_status("\t".join(cat)) if PrettyTable is not None: logger.log_status("\n" + status_log.get_string()) if error_log: logger.log_error(error_log) result_data = {} result_dataset = None index = array([], dtype='int32') if sampled_index.size > 0: ### ideally duplicate_rows() is all needed to add newly cloned rows ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data ##realestate_dataset.duplicate_rows(sampled_index) result_data.setdefault(year_built, resize(year, sampled_index.size).astype('int32')) ## also add 'independent_variables' to the new dataset for attribute in set(sample_from_dataset.get_primary_attribute_names() + independent_variables): if reset_attribute_value.has_key(attribute): result_data[attribute] = resize(array(reset_attribute_value[attribute]), sampled_index.size) else: result_data[attribute] = sample_from_dataset.get_attribute_by_index(attribute, sampled_index) if id_name and result_data and id_name not in result_data: result_data[id_name] = arange(sampled_index.size, dtype='int32') + 1 storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name=table_name, table_data=result_data) result_dataset = Dataset(id_name = id_name, in_storage = storage, in_table_name = table_name, dataset_name = dataset_name ) index = arange(result_dataset.size()) if append_to_realestate_dataset: if len(result_data) > 0: index = realestate_dataset.add_elements(result_data, require_all_attributes=False, change_ids_if_not_unique=True) result_dataset = realestate_dataset return (result_dataset, index)
def run(self, in_storage, business_dsname="business"): dataset_pool = DatasetPool(storage=in_storage, package_order=['psrc_parcel', 'urbansim_parcel', 'urbansim', 'opus_core'] ) seed(1) allbusinesses = dataset_pool.get_dataset(business_dsname) parcels = dataset_pool.get_dataset('parcel') buildings = dataset_pool.get_dataset('building') parcels.compute_variables(["urbansim_parcel.parcel.residential_units", "number_of_buildings = parcel.number_of_agents(building)", "non_residential_sqft = (parcel.aggregate(building.non_residential_sqft)).astype(int32)", "number_of_res_buildings = parcel.aggregate(urbansim_parcel.building.is_residential)", "number_of_nonres_buildings = parcel.aggregate(urbansim_parcel.building.is_non_residential)", "number_of_mixed_use_buildings = parcel.aggregate(urbansim_parcel.building.is_generic_building_type_6)" ], dataset_pool=dataset_pool) restypes = [12, 4, 19, 11, 34, 10, 33] reslutypes = [13,14,15,24] is_valid_business = ones(allbusinesses.size(), dtype='bool8') parcels_not_matched = logical_and(in1d(allbusinesses["parcel_id"], parcels.get_id_attribute(), invert=True), allbusinesses["parcel_id"] > 0) if(parcels_not_matched.sum() > 0): is_valid_business[where(parcels_not_matched)] = False logger.log_warning(message="No parcel exists for %s businesses (%s jobs)" % (parcels_not_matched.sum(), allbusinesses[self.number_of_jobs_attr][where(parcels_not_matched)].sum())) zero_parcel = allbusinesses["parcel_id"]<=0 if zero_parcel.sum() > 0: is_valid_business[where(zero_parcel)] = False logger.log_warning(message="%s businesses (%s jobs) located on zero parcel_id" % (zero_parcel.sum(), allbusinesses[self.number_of_jobs_attr][where(zero_parcel)].sum())) zero_size = logical_and(is_valid_business, allbusinesses[self.number_of_jobs_attr].round() == 0) if(sum(zero_size) > 0): is_valid_business[where(zero_size)] = False logger.log_warning(message="%s businesses are of size 0." % sum(zero_size)) businesses = DatasetSubset(allbusinesses, index=where(is_valid_business)[0]) parcels.add_attribute(name="number_of_workplaces", data=parcels.sum_dataset_over_ids(businesses, constant=1)) has_single_res_buildings = logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_res_buildings"] == 1) # 1 (1 residential) parcels.add_attribute(data=has_single_res_buildings.astype("int32"), name="buildings_code") has_mult_res_buildings = logical_and(parcels["number_of_buildings"] > 1, parcels["number_of_nonres_buildings"] == 0) # 2 (mult residential) parcels.modify_attribute("buildings_code", data=2*ones(has_mult_res_buildings.sum()), index=where(has_mult_res_buildings)) has_single_nonres_buildings = logical_and(logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_nonres_buildings"] == 1), parcels["number_of_mixed_use_buildings"] == 0) # 3 (1 non-res) parcels.modify_attribute("buildings_code", data=3*ones(has_single_nonres_buildings.sum()), index=where(has_single_nonres_buildings)) has_mult_nonres_buildings = logical_and(logical_and(parcels["number_of_buildings"] > 1, parcels["number_of_res_buildings"] == 0), parcels["number_of_mixed_use_buildings"] == 0) # 4 (mult non-res) parcels.modify_attribute("buildings_code", data=4*ones(has_mult_nonres_buildings.sum()), index=where(has_mult_nonres_buildings)) has_single_mixed_buildings = logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_mixed_use_buildings"] == 1) # 5 (1 mixed-use) parcels.modify_attribute("buildings_code", data=5*ones(has_single_mixed_buildings.sum()), index=where(has_single_mixed_buildings)) has_mult_mixed_buildings = logical_and(parcels["number_of_buildings"] > 1, logical_or(logical_and(parcels["number_of_res_buildings"] > 0, parcels["number_of_nonres_buildings"] > 0), logical_or(parcels["number_of_mixed_use_buildings"] > 1, logical_and(parcels["number_of_res_buildings"] == 0, parcels["number_of_mixed_use_buildings"] > 0)))) # 6 parcels.modify_attribute("buildings_code", data=6*ones(has_mult_mixed_buildings.sum()), index=where(has_mult_mixed_buildings)) has_no_building_res_lutype = logical_and(parcels["number_of_buildings"] == 0, in1d(parcels["land_use_type_id"], reslutypes)) # 7 (vacant with res LU type) parcels.modify_attribute("buildings_code", data=7*ones(has_no_building_res_lutype.sum()), index=where(has_no_building_res_lutype)) has_no_building_nonres_lutype = logical_and(parcels["number_of_buildings"] == 0, in1d(parcels["land_use_type_id"], reslutypes)==0) # 8 (vacant with non-res LU type) parcels.modify_attribute("buildings_code", data=8*ones(has_no_building_nonres_lutype.sum()), index=where(has_no_building_nonres_lutype)) business_sizes = businesses[self.number_of_jobs_attr].round().astype("int32") business_location = {} business_location1wrkpl = zeros(businesses.size(), dtype="int32") business_location1wrkplres = zeros(businesses.size(), dtype="int32") # sample one building for cases when sampling is required. for ibusid in range(businesses.size()): idx = where(buildings['parcel_id'] == businesses['parcel_id'][ibusid])[0] bldgids = buildings['building_id'][idx] business_location[businesses['business_id'][ibusid]] = bldgids if bldgids.size == 1: business_location1wrkpl[ibusid] = bldgids[0] elif bldgids.size > 1: business_location1wrkpl[ibusid] = bldgids[sample_noreplace(arange(bldgids.size), 1)] if buildings['residential_units'][idx].sum() > 0: # Residential buildings are sampled with probabilities proportional to residential units business_location1wrkplres[ibusid] = bldgids[probsample_noreplace(arange(bldgids.size), 1, prob_array=buildings['residential_units'][idx])] else: business_location1wrkplres[ibusid] = business_location1wrkpl[ibusid] home_based = zeros(business_sizes.sum(), dtype="bool8") job_building_id = zeros(business_sizes.sum(), dtype="int32") job_array_labels = businesses['business_id'].repeat(business_sizes) job_assignment_case = zeros(business_sizes.sum(), dtype="int32") processed_bindicator = zeros(businesses.size(), dtype="bool8") logger.log_status("Total number of jobs: %s" % home_based.size) # 1. up to 5 workers-business in 1 residential building idx_single_worker = where(business_sizes < 6)[0] bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_single_worker]) idx_sngl_wrk_1bld_fit = where(bcode == 1)[0] jidx = in1d(job_array_labels, businesses['business_id'][idx_single_worker[idx_sngl_wrk_1bld_fit]]) home_based[jidx] = True job_building_id[jidx] = business_location1wrkpl[idx_single_worker[idx_sngl_wrk_1bld_fit]].repeat(business_sizes[idx_single_worker[idx_sngl_wrk_1bld_fit]]) job_assignment_case[jidx] = 1 processed_bindicator[idx_single_worker[idx_sngl_wrk_1bld_fit]] = True logger.log_status("1. %s jobs (%s businesses) set as home-based due to <6 worker x 1 residential building fit." % ( business_sizes[idx_single_worker[idx_sngl_wrk_1bld_fit]].sum(), idx_sngl_wrk_1bld_fit.size)) # 2. up to 5 workers-business in multiple residential buildings idx_single_worker = where(logical_and(processed_bindicator==0, business_sizes < 6))[0] bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_single_worker]) idx_sngl_wrk_multbld_fit = where(bcode == 2)[0] jidx = in1d(job_array_labels, businesses['business_id'][idx_single_worker[idx_sngl_wrk_multbld_fit]]) home_based[jidx] = True job_building_id[jidx] = business_location1wrkplres[idx_single_worker[idx_sngl_wrk_multbld_fit]].repeat(business_sizes[idx_single_worker[idx_sngl_wrk_multbld_fit]]) job_assignment_case[jidx] = 2 processed_bindicator[idx_single_worker[idx_sngl_wrk_multbld_fit]] = True logger.log_status("2. %s jobs (%s businesses) set as home-based due to single worker x multiple residential buildings fit." % ( business_sizes[idx_single_worker[idx_sngl_wrk_multbld_fit]].sum(), idx_sngl_wrk_multbld_fit.size)) # 3. single worker in single non-res building (not mixed-use) idx_single_worker = where(logical_and(processed_bindicator==0, business_sizes == 1))[0] bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_single_worker]) idx_sngl_wrk_single_nonres_fit = where(bcode == 3)[0] jidx = in1d(job_array_labels, businesses['business_id'][idx_single_worker[idx_sngl_wrk_single_nonres_fit]]) job_building_id[jidx] = business_location1wrkpl[idx_single_worker[idx_sngl_wrk_single_nonres_fit]] job_assignment_case[jidx] = 3 processed_bindicator[idx_single_worker[idx_sngl_wrk_single_nonres_fit]] = True logger.log_status("3. %s jobs could be placed due to single worker x single non-res building fit." % idx_sngl_wrk_single_nonres_fit.size) # 4. single worker in multiple non-res building (not mixed-use) idx_single_worker = where(logical_and(processed_bindicator==0, business_sizes == 1))[0] bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_single_worker]) idx_sngl_wrk_mult_nonres_fit = where(bcode == 4)[0] jidx = in1d(job_array_labels, businesses['business_id'][idx_single_worker[idx_sngl_wrk_mult_nonres_fit]]) job_building_id[jidx] = business_location1wrkpl[idx_single_worker[idx_sngl_wrk_mult_nonres_fit]] job_assignment_case[jidx] = 4 processed_bindicator[idx_single_worker[idx_sngl_wrk_mult_nonres_fit]] = True logger.log_status("4. %s jobs could be placed due to single worker x multiple non-res building fit." % idx_sngl_wrk_mult_nonres_fit.size) # 5. single worker in single mixed-use building idx_single_worker = where(logical_and(processed_bindicator==0, business_sizes == 1))[0] bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_single_worker]) idx_sngl_wrk_smu_fit = where(bcode == 5)[0] jidx = in1d(job_array_labels, businesses['business_id'][idx_single_worker[idx_sngl_wrk_smu_fit]]) job_building_id[jidx] = business_location1wrkpl[idx_single_worker[idx_sngl_wrk_smu_fit]] job_assignment_case[jidx] = 5 processed_bindicator[idx_single_worker[idx_sngl_wrk_smu_fit]] = True logger.log_status("5. %s jobs in single worker x single mixed-use building." % idx_sngl_wrk_smu_fit.size) # 6. single worker in multiple mixed-type buildings idx_single_worker = where(logical_and(processed_bindicator==0, business_sizes == 1))[0] bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_single_worker]) idx_sngl_wrk_mmu_fit = where(bcode == 6)[0] jidx = in1d(job_array_labels, businesses['business_id'][idx_single_worker[idx_sngl_wrk_mmu_fit]]) job_building_id[jidx] = business_location1wrkpl[idx_single_worker[idx_sngl_wrk_mmu_fit]] bldtype = buildings.get_attribute_by_id("building_type_id", business_location1wrkpl[idx_single_worker[idx_sngl_wrk_mmu_fit]]) is_bldtype_res = in1d(bldtype, restypes) home_based[in1d(job_array_labels, businesses['business_id'][idx_single_worker[idx_sngl_wrk_mmu_fit]][where(is_bldtype_res)])] = True job_assignment_case[jidx] = 6 processed_bindicator[idx_single_worker[idx_sngl_wrk_mmu_fit]] = True logger.log_status("6. %s jobs in single worker x multiple mixed-type buildings. %s jobs classified as home-based." % (idx_sngl_wrk_mmu_fit.size, is_bldtype_res.sum())) # 7. up to 5 workers-business in residential parcel with no building idx_single_worker = where(logical_and(processed_bindicator==0, business_sizes < 6))[0] bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_single_worker]) idx_sngl_wrk_vacant_res = where(bcode == 7)[0] jidx = in1d(job_array_labels, businesses['business_id'][idx_single_worker[idx_sngl_wrk_vacant_res]]) job_assignment_case[jidx] = 7 home_based[jidx] = True processed_bindicator[idx_single_worker[idx_sngl_wrk_vacant_res]] = True logger.log_status("7. %s jobs (%s businesses of size <6) could not be placed due to non-existing buildings in parcels with residential LU type." % ( business_sizes[idx_single_worker[idx_sngl_wrk_vacant_res]].sum(), idx_sngl_wrk_vacant_res.size)) # 9. 6+ workers in single residential building: do not place - will go into ELCM idx_more_workers = where(logical_and(processed_bindicator==0, business_sizes > 5))[0] bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_more_workers]) idx_sngl_wrk_fit = where(bcode == 1)[0] jidx = in1d(job_array_labels, businesses['business_id'][idx_more_workers[idx_sngl_wrk_fit]]) #job_building_id[jidx] = business_location1wrkpl[idx_more_workers[idx_sngl_wrk_fit]].repeat(business_sizes[idx_more_workers[idx_sngl_wrk_fit]]) #home_based[jidx] = True job_assignment_case[jidx] = 9 processed_bindicator[idx_more_workers[idx_sngl_wrk_fit]] = True logger.log_status("9. %s jobs (%s businesses) in 6+ worker x single residential building. Not placed." % ( business_sizes[idx_more_workers[idx_sngl_wrk_fit]].sum(), idx_sngl_wrk_fit.size)) # 10. 6+ workers in multiple residential building: do not place - will go into ELCM idx_more_workers = where(logical_and(processed_bindicator==0, business_sizes > 5))[0] bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_more_workers]) idx_sngl_wrk_fit = where(bcode == 2)[0] jidx = in1d(job_array_labels, businesses['business_id'][idx_more_workers[idx_sngl_wrk_fit]]) job_assignment_case[jidx] = 10 processed_bindicator[idx_more_workers[idx_sngl_wrk_fit]] = True # sample buildings to businesses by parcels #bpcls = unique(businesses["parcel_id"][idx_more_workers[idx_sngl_wrk_fit]]) #for ipcl in range(bpcls.size): #bidx = where(buildings['parcel_id'] == bpcls[ipcl])[0] #bldgids = buildings['building_id'][bidx] #bussids = businesses['business_id'][businesses["parcel_id"] == bpcls[ipcl]] ## multiply by units for sampling prop. to units rather than buildings #bldgids = bldgids.repeat(maximum(1, buildings['residential_units'][bidx].astype('int32'))) #if bldgids.size < bussids.size: #bldarray = bldgids.repeat(1+ceil((bussids.size - bldgids.size)/float(bldgids.size)) ) #else: #bldarray = bldgids #shuffle(bldarray) # randomly reorder in-place #for ib in range(bussids.size): #jidx = where(job_array_labels == bussids[ib]) #job_building_id[jidx] = bldarray[ib] #home_based[jidx] = True #job_assignment_case[jidx] = 10 logger.log_status("10. %s jobs (%s businesses) in 6+ worker x multiple residential building. Not placed." % ( business_sizes[idx_more_workers[idx_sngl_wrk_fit]].sum(), idx_sngl_wrk_fit.size)) # 11. single workplace, 2+ workers in single non-res or mixed-use building (11.) idx_2plus_workers = where(logical_and(processed_bindicator==0, business_sizes > 1))[0] bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_2plus_workers]) workplace_filter = parcels.get_attribute_by_id("number_of_workplaces", businesses["parcel_id"][idx_2plus_workers]) idx_sngl_wrkplace_2plus_workers = where(logical_and(logical_or(bcode==3, bcode==5), workplace_filter==1))[0] which_labels = where(in1d(job_array_labels, businesses['business_id'][idx_2plus_workers[idx_sngl_wrkplace_2plus_workers]]))[0] job_building_id[which_labels] = business_location1wrkpl[idx_2plus_workers[idx_sngl_wrkplace_2plus_workers]].repeat(business_sizes[idx_2plus_workers[idx_sngl_wrkplace_2plus_workers]]) job_assignment_case[which_labels] = 11 processed_bindicator[idx_2plus_workers[idx_sngl_wrkplace_2plus_workers]] = True logger.log_status("11. %s jobs (%s businesses) could be placed due to single workplace x 2+ workers x single non-res/mixed-use building fit." % ( business_sizes[idx_2plus_workers[idx_sngl_wrkplace_2plus_workers]].sum(), idx_sngl_wrkplace_2plus_workers.size)) # 12. single workplace, 2+ workers in multiple mixed-type building idx_2plus_workers = where(logical_and(processed_bindicator==0, business_sizes > 1))[0] bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_2plus_workers]) workplace_filter = parcels.get_attribute_by_id("number_of_workplaces", businesses["parcel_id"][idx_2plus_workers]) idx_sngl_wrkplace_2plus_workers = where(logical_and(logical_or(bcode==6, bcode==4), workplace_filter==1))[0] jidx = in1d(job_array_labels, businesses['business_id'][idx_2plus_workers[idx_sngl_wrkplace_2plus_workers]]) job_building_id[jidx] = business_location1wrkpl[idx_2plus_workers[idx_sngl_wrkplace_2plus_workers]].repeat(business_sizes[idx_2plus_workers[idx_sngl_wrkplace_2plus_workers]]) job_assignment_case[jidx] = 12 processed_bindicator[idx_2plus_workers[idx_sngl_wrkplace_2plus_workers]] = True logger.log_status("12. %s jobs (%s businesses) could be placed due to single workplace x 2+ workers x multiple non-res/mixed building fit." % ( business_sizes[idx_2plus_workers[idx_sngl_wrkplace_2plus_workers]].sum(), idx_sngl_wrkplace_2plus_workers.size)) # 13. multiple workplaces, 2+ workers in single non-res or mixed building idx_2plus_workers = where(logical_and(processed_bindicator==0, business_sizes > 1))[0] bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_2plus_workers]) workplace_filter = parcels.get_attribute_by_id("number_of_workplaces", businesses["parcel_id"][idx_2plus_workers]) idx_mult_wrkplace_2plus_workers = where(logical_and(logical_or(bcode==3, bcode==5), workplace_filter > 1))[0] jidx = in1d(job_array_labels, businesses['business_id'][idx_2plus_workers[idx_mult_wrkplace_2plus_workers]]) job_building_id[jidx] = business_location1wrkpl[idx_2plus_workers[idx_mult_wrkplace_2plus_workers]].repeat(business_sizes[idx_2plus_workers[idx_mult_wrkplace_2plus_workers]]) job_assignment_case[jidx] = 13 processed_bindicator[idx_2plus_workers[idx_mult_wrkplace_2plus_workers]] = True logger.log_status("13. %s jobs (%s businesses) could be placed due to multiple workplaces x 2+ workers x single non-res/mixed building fit." % ( business_sizes[idx_2plus_workers[idx_mult_wrkplace_2plus_workers]].sum(), idx_mult_wrkplace_2plus_workers.size)) # 14. multiple workplaces, 2+ workers in multiple non-res or mixed building idx_2plus_workers = where(logical_and(processed_bindicator==0, business_sizes > 1))[0] bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_2plus_workers]) workplace_filter = parcels.get_attribute_by_id("number_of_workplaces", businesses["parcel_id"][idx_2plus_workers]) idx_mult_wrkplace_2plus_workers = where(logical_and(logical_or(bcode==4, bcode==6), workplace_filter > 1))[0] processed_bindicator[idx_2plus_workers[idx_mult_wrkplace_2plus_workers]] = True # sample buildings to businesses by parcels bpcls = unique(businesses["parcel_id"][idx_2plus_workers[idx_mult_wrkplace_2plus_workers]]) hbasedsum = home_based.sum() for ipcl in range(bpcls.size): bldgids = buildings['building_id'][buildings['parcel_id'] == bpcls[ipcl]] bussids = businesses['business_id'][businesses["parcel_id"] == bpcls[ipcl]] if bldgids.size < bussids.size: bldarray = bldgids.repeat(1+ceil((bussids.size - bldgids.size)/float(bldgids.size))) else: bldarray = bldgids shuffle(bldarray) # randomly reorder in-place is_res = in1d(bldarray, restypes) for ib in range(bussids.size): jidx = where(job_array_labels == bussids[ib]) job_building_id[jidx] = bldarray[ib] home_based[jidx] = is_res job_assignment_case[jidx] = 14 logger.log_status("14. %s jobs (%s businesses) could be placed due to multiple workplaces x 2+ workers x multiple non-res/mixed building fit. Classify %s jobs as home-based." % ( business_sizes[idx_2plus_workers[idx_mult_wrkplace_2plus_workers]].sum(), idx_mult_wrkplace_2plus_workers.size, home_based.sum()-hbasedsum)) # 15. 6+ workers in residential parcel with no building idx_2plus_workers = where(logical_and(processed_bindicator==0, business_sizes > 5))[0] bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_2plus_workers]) idx_wrk_vacant_res = where(bcode == 7)[0] jidx = in1d(job_array_labels, businesses['business_id'][idx_2plus_workers[idx_wrk_vacant_res]]) job_assignment_case[jidx] = 15 processed_bindicator[idx_2plus_workers[idx_wrk_vacant_res]] = True logger.log_status("15. %s jobs (%s businesses of 6+ workers) could not be placed due to non-existing buildings in parcels with residential LU type." % ( business_sizes[idx_2plus_workers[idx_wrk_vacant_res]].sum(), idx_wrk_vacant_res.size)) # 16. nonresidential parcel with no building idx_any_workers = where(processed_bindicator==0)[0] bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_any_workers]) idx_wrk_vacant_nonres = where(bcode == 8)[0] jidx = in1d(job_array_labels, businesses['business_id'][idx_any_workers[idx_wrk_vacant_nonres]]) job_assignment_case[jidx] = 16 processed_bindicator[idx_any_workers[idx_wrk_vacant_nonres]] = True logger.log_status("16. %s jobs (%s businesses) could not be placed due to non-existing buildings in parcels with rnon-esidential LU type." % ( business_sizes[idx_any_workers[idx_wrk_vacant_nonres]].sum(), idx_wrk_vacant_nonres.size)) # jobs in messy buildings idx_worker = where(logical_and(processed_bindicator==0, business_sizes > 0))[0] bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_worker]) idx_messy_fit = where(bcode == 0)[0] processed_bindicator[idx_worker[idx_messy_fit]] = True logger.log_status("%s jobs (%s businesses) could not be placed due to messy buildings." % ( business_sizes[idx_worker[idx_messy_fit]].sum(), idx_messy_fit.size)) logger.log_status("So far %s (%s percent) home-based jobs." % (home_based.sum(), round(home_based.sum()/(home_based.size/100.),2))) logger.log_status("So far %s percent (%s) jobs (%s businesses) processed. %s jobs (%s businesses) remain to be processed." % \ (round(business_sizes[processed_bindicator].sum()/(home_based.size/100.),2), business_sizes[processed_bindicator].sum(), processed_bindicator.sum(), business_sizes[logical_not(processed_bindicator)].sum(), business_sizes[logical_not(processed_bindicator)].size)) # create job dataset job_data = {"job_id": arange(job_building_id.size)+1, "home_based" : home_based, "building_id": job_building_id, "business_id": job_array_labels, "sector_id": zeros(job_building_id.size), "parcel_id": zeros(job_building_id.size), "assignment_case": job_assignment_case} for ib in range(businesses.size()): idx = where(job_data['business_id'] == businesses['business_id'][ib]) job_data["sector_id"][idx] = businesses['sector_id'][ib] job_data["parcel_id"][idx] = businesses['parcel_id'][ib] dictstorage = StorageFactory().get_storage('dict_storage') dictstorage.write_table(table_name="jobs", table_data=job_data) return Dataset(in_storage=dictstorage, in_table_name="jobs", dataset_name="job", id_name="job_id")
def run(self, year=None, dataset_pool=None, **kwargs): """ """ if dataset_pool is None: dataset_pool = SessionConfiguration().get_dataset_pool() if year is None: year = SimulationState().get_current_time() this_year_index = where( self.scheduled_events.get_attribute('year') == year)[0] scheduled_events_for_this_year = DatasetSubset(self.scheduled_events, this_year_index) scheduled_events_for_this_year.load_dataset_if_not_loaded() column_names = list( set(self.scheduled_events.get_known_attribute_names()) - set([ 'year', 'action', 'attribute', 'amount', 'event_id', '_hidden_id_' ])) column_names.sort() # column_values = dict([ (name, scheduled_events_for_this_year.get_attribute(name)) for name in column_names]) for index in range(scheduled_events_for_this_year.size()): indicator = ones(self.dataset.size(), dtype='bool') event_attr = {} for attribute in column_names: if attribute in self.dataset.get_known_attribute_names(): dataset_attribute = self.dataset.get_attribute(attribute) else: ## this is done inside the loop because some action may delete computed attributes, such as dataset.add_elements() try: dataset_attribute = self.dataset.compute_one_variable_with_unknown_package( attribute, dataset_pool=dataset_pool) except: raise ValueError, "attribute %s used in scheduled events dataset can not be found in dataset %s" % ( attribute, self.dataset.get_dataset_name()) # if attribute in column_names: aval = scheduled_events_for_this_year.get_attribute( attribute)[index] if aval == -1: continue # ignore if column value is -1 else: indicator *= dataset_attribute == aval event_attr.update({attribute: aval}) #agents in dataset satisfying all conditions are identified by indicator legit_index = where(indicator)[0] this_event = scheduled_events_for_this_year.get_data_element(index) if not hasattr(this_event, 'attribute'): action_attr_name = '' else: action_attr_name = this_event.attribute action_function = getattr(self, '_' + this_event.action.strip().lower()) action_function(amount=this_event.amount, attribute=action_attr_name, dataset=self.dataset, index=legit_index, data_dict=event_attr) self.post_run(self.dataset, legit_index, **kwargs) return self.dataset
def run(self, dataset, outcome_attribute, weight_attribute, control_totals, current_year, control_total_attribute=None, year_attribute='year', capacity_attribute=None, add_quantity=False, dataset_pool=None): """'dataset' is a Dataset for which a quantity 'outcome_attribute' is created. The total amount of the quantity is given by the attribute 'control_total_attribute' of the 'control_totals' Dataset. If it is not given, it is assumed to have the same name as 'outcome_attribute'. The 'weight_attribute' of 'dataset' determines the allocation weights. The 'control_totals' Dataset contains an attribute 'year' (or alternatively, an attribute given by the 'year_attribute' argument) and optionally other attributes that must be known to the 'dataset' (such as a geography). For each row of the control_totals dataset for which year matches the 'current_year', the total amount is distributed among the corresponding members of 'dataset' according to weights. If a 'capacity_attribute' is given (attribute of 'dataset'), the algorithm removes any allocations that exceeds the capacity and redistributes it among remaining members. The resulting values are appended to 'dataset' as 'outcome_attribute' (as primary attribute). If add_quantity is True and the 'outcome_attribute' exists in dataset, the resulting values are added to the current values of 'outcome_attribute'. """ python_version = '2.%s' % (sys.version_info[1]) ct_attr = control_totals.get_known_attribute_names() if year_attribute not in ct_attr: raise StandardError, "Year attribute '%s' must be a known attribute of the control totals dataset." % year_attribute ct_attr.remove(year_attribute) if control_total_attribute is None: control_total_attribute = outcome_attribute if control_total_attribute not in ct_attr: raise StandardError, "Attribute '%s' must be a known attribute of the control totals dataset." % control_total_attribute ct_attr.remove(control_total_attribute) if control_totals._is_hidden_id(): ct_attr.remove(control_totals.id_name()[0]) # compute weights and other attributes necessary for allocation attrs_to_compute = [weight_attribute] + ct_attr if capacity_attribute is not None: attrs_to_compute.append(capacity_attribute) for attr in attrs_to_compute: try: dataset.compute_variables(attr, dataset_pool=dataset_pool) except: dataset.compute_one_variable_with_unknown_package(attr, dataset_pool=dataset_pool) # create subset of control totals for the current year year_index = where(control_totals.get_attribute(year_attribute) == current_year)[0] if year_index.size <= 0: logger.log_warning("No control total for year %s" % current_year) return None control_totals_for_this_year = DatasetSubset(control_totals, year_index) # check capacity if capacity_attribute is not None: if dataset.get_attribute(capacity_attribute).sum() < control_totals_for_this_year.get_attribute(control_total_attribute).sum(): logger.log_warning("Capacity (%s) is smaller than the amount to allocate (%s)." % (dataset.get_attribute(capacity_attribute).sum(), control_totals_for_this_year.get_attribute(control_total_attribute).sum())) C = dataset.get_attribute(capacity_attribute).astype('int32') all_weights = dataset.get_attribute(weight_attribute) outcome = zeros(dataset.size(), dtype='int32') for ct_row in range(control_totals_for_this_year.size()): is_considered = ones(dataset.size(), dtype='bool8') for characteristics in ct_attr: is_considered = logical_and(is_considered, dataset.get_attribute(characteristics) == control_totals_for_this_year.get_attribute(characteristics)[ct_row]) T = control_totals_for_this_year.get_attribute(control_total_attribute)[ct_row] it = 1 while True: is_considered_idx = where(is_considered)[0] # Make sure to jump out of the loop when no index is considered, or else cause the index out of boundary error in cum_prob[-1] if is_considered_idx.size < 1: break # End of Make sure weights = all_weights[is_considered_idx] weights_sum = float(weights.sum()) # Add: Do the prob sample for exact match --Hanyi prob_array = (weights*1.0/weights_sum).astype('float64') cum_prob = cumsum(prob_array, dtype='float64') sample_prob = uniform(0, cum_prob[-1], T) sampled_index = searchsorted(cum_prob, sample_prob) sampled_index = sampled_index.astype('int32') # due to precision problems, searchsorted could return index = cum_prob.size sampled_index = clip(sampled_index, 0, cum_prob.size-1) if python_version == '2.6': # Start: Python 2.6 version count = zeros(prob_array.size).astype('int32') for i in range(0,prob_array.size): sub_indx = where(sampled_index==i)[0] count[i] = sub_indx.size # End: Python 2.6 version elif python_version == '2.7': # Start: Python 2.7 version from collections import Counter dict_count_ini = dict([(x,0) for x in range(prob_array.size)]) ## Alternative init dict method #from numpy import arange #dict_count_ini_keys = arange(prob_array.size).astype('int32') #dict_count_ini_values = zeros(prob_array.size).astype('int32') #dict_count_ini = dict(zip(dict_count_ini_keys, dict_count_ini_values)) ## Alternative End dict_count = Counter(sampled_index) dict_count_ini.update(dict(dict_count)) count = array(dict(sorted(dict_count_ini.items())).values()) # End: Python 2.7 version outcome[is_considered_idx] = outcome[is_considered_idx] + count # End # Comment out the line below --Hanyi #outcome[is_considered_idx] = round_(outcome[is_considered_idx] + T * (weights/weights_sum)).astype('int32') if capacity_attribute is None: break diff = outcome[is_considered_idx] - C[is_considered_idx] outcome[is_considered_idx] = clip(outcome[is_considered_idx], 0, C[is_considered_idx]) if it == 1 and C[is_considered_idx].sum() < T: logger.log_warning("Control total %s cannot be met due to a capacity restriction of %s" % (T, C[is_considered_idx].sum())) T = where(diff < 0, 0, diff).sum() if T <= 0: break is_considered = logical_and(is_considered, outcome < C) it += 1 if add_quantity and (outcome_attribute in dataset.get_known_attribute_names()): dataset.modify_attribute(name=outcome_attribute, data=outcome+dataset.get_attribute(outcome_attribute)) logger.log_status('New values added to the attribute %s of dataset %s.' % (outcome_attribute, dataset.get_dataset_name())) else: dataset.add_primary_attribute(name=outcome_attribute, data=outcome) logger.log_status('New values stored into attribute %s of dataset %s.' % (outcome_attribute, dataset.get_dataset_name())) dataset.flush_attribute(outcome_attribute) return outcome
def prepare_for_run(self, dataset_pool, create_proposal_set=True, parcel_filter_for_new_development=None, parcel_filter_for_redevelopment=None, template_filter=None, spec_replace_module_variable_pair=None, proposed_units_variable="urbansim_parcel.development_project_proposal.units_proposed", **kwargs): """create development project proposal dataset from parcels and development templates. spec_replace_module_variable_pair is a tuple with two elements: module name, variable within the module that contans a dictionary of model variables to be replaced in the specification. """ specification, coefficients, dummy = RegressionModel.prepare_for_run(self, **kwargs) try: existing_proposal_set_parent = dataset_pool.get_dataset('development_project_proposal') if 'units_proposed' not in existing_proposal_set_parent.get_known_attribute_names(): ## compute 'units_proposed' and add it as a primary attribute (as it may be missing when loaded from the base_year_data) units_proposed = existing_proposal_set_parent.compute_variables(proposed_units_variable, dataset_pool) existing_proposal_set_parent.add_attribute(units_proposed, "units_proposed", AttributeType.PRIMARY) #load proposals whose status_id are not of id_tentative or id_not_available available_idx = where(in1d(existing_proposal_set_parent.get_attribute("status_id"), array([DevelopmentProjectProposalDataset.id_active, DevelopmentProjectProposalDataset.id_proposed, DevelopmentProjectProposalDataset.id_planned, DevelopmentProjectProposalDataset.id_with_velocity])))[0] existing_proposal_set = DatasetSubset(existing_proposal_set_parent, available_idx) # Code updated by Hanyi Li, MAG 6/8/2010 # Replacing the cached 'development_project_proposal' dataset with # the filtered dataset 'existing_proposal_set' dataset_pool.replace_dataset(existing_proposal_set_parent.get_dataset_name(), existing_proposal_set) except: existing_proposal_set = None parcels = dataset_pool.get_dataset('parcel') templates = dataset_pool.get_dataset('development_template') # It is important that during this method no variable flushing happens, since # we create datasets of the same name for different purposes (new development and redevelopment) # and flushing would mix them up flush_variables_current = SimulationState().get_flush_datasets() SimulationState().set_flush_datasets(False) # Code added by Jesse Ayers, MAG, 9/14/2009 # Getting an index of parcels that have actively developing projects (those on a velocity function) # and making sure that new proposals are not generated for them if existing_proposal_set and existing_proposal_set.size()>0: parcels_with_proposals = existing_proposal_set.get_attribute('parcel_id') parcels_with_proposals_idx = parcels.get_id_index(parcels_with_proposals) if parcel_filter_for_new_development is not None: if parcel_filter_for_new_development[parcel_filter_for_new_development.find('=')+1] == '=': filter = 'flter = numpy.logical_and(parcel.number_of_agents(development_project_proposal) == 0, %s)' % parcel_filter_for_new_development else: parcel_filter_for_new_development = parcel_filter_for_new_development[parcel_filter_for_new_development.find('=')+1:].lstrip() filter = 'flter = numpy.logical_and(parcel.number_of_agents(development_project_proposal) == 0, %s)' % parcel_filter_for_new_development index1 = where(parcels.compute_variables(filter))[0] else: if parcel_filter_for_new_development is not None: index1 = where(parcels.compute_variables(parcel_filter_for_new_development))[0] else: index1 = None if template_filter is not None: try: index2 = where(templates.compute_variables(template_filter))[0] except Exception, e: logger.log_warning( "template_filter is set to %s, but there is an error when computing it: %s" % (template_filter, e) ) index2 = None
def run(self, year=None, target_attribute_name='number_of_households', sample_filter="", reset_dataset_attribute_value={}, dataset_pool=None, **kwargs): """ sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning """ #if dataset_pool is None: # dataset_pool = SessionConfiguration().get_dataset_pool() if year is None: year = SimulationState().get_current_time() this_year_index = where(self.control_totals.get_attribute('year')==year)[0] control_totals_for_this_year = DatasetSubset(self.control_totals, this_year_index) column_names = list(set( self.control_totals.get_known_attribute_names() ) - set( [ target_attribute_name, 'year', '_hidden_id_'] )) column_names.sort(reverse=True) column_values = dict([ (name, control_totals_for_this_year.get_attribute(name)) for name in column_names + [target_attribute_name]]) independent_variables = list(set([re.sub('_max$', '', re.sub('_min$', '', col)) for col in column_names])) dataset_known_attributes = self.dataset.get_known_attribute_names() for variable in independent_variables: if variable not in dataset_known_attributes: self.dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool) dataset_known_attributes = self.dataset.get_known_attribute_names() #update after compute if sample_filter: short_name = VariableName(sample_filter).get_alias() if short_name not in dataset_known_attributes: filter_indicator = self.dataset.compute_variables(sample_filter, dataset_pool=dataset_pool) else: filter_indicator = self.dataset.get_attribute(short_name) else: filter_indicator = 1 to_be_cloned = array([], dtype=int32) to_be_removed = array([], dtype=int32) #log header if PrettyTable is not None: status_log = PrettyTable() status_log.set_field_names(column_names + ["actual", "target", "difference", "action"]) else: logger.log_status("\t".join(column_names + ["actual", "target", "difference", "action"])) error_log = '' for index in range(control_totals_for_this_year.size()): lucky_index = None indicator = ones( self.dataset.size(), dtype='bool' ) criterion = {} for attribute in independent_variables: if attribute in dataset_known_attributes: dataset_attribute = self.dataset.get_attribute(attribute) else: raise ValueError, "attribute %s used in control total dataset can not be found in dataset %s" % (attribute, self.dataset.get_dataset_name()) if attribute + '_min' in column_names: amin = column_values[attribute + '_min'][index] criterion.update({attribute + '_min':amin}) if amin != -1: indicator *= dataset_attribute >= amin if attribute + '_max' in column_names: amax = column_values[attribute+'_max'][index] criterion.update({attribute + '_max':amax}) if amax != -1: indicator *= dataset_attribute <= amax if attribute in column_names: aval = column_values[attribute][index] criterion.update({attribute:aval}) if aval == -1: continue elif aval == -2: ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column complement_values = setdiff1d( dataset_attribute, column_values[attribute] ) has_one_of_the_complement_value = zeros(dataset_attribute.size, dtype='bool') for value in complement_values: has_one_of_the_complement_value += dataset_attribute == value indicator *= has_one_of_the_complement_value else: indicator *= dataset_attribute == aval target_num = column_values[target_attribute_name][index] ## if accounting attribute is None, count number of agents with indicator = True if self.dataset_accounting_attribute is None: actual_num = indicator.sum() action_num = 0 diff = target_num - actual_num if actual_num != target_num: legit_index = where(logical_and(indicator, filter_indicator))[0] if legit_index.size > 0: if actual_num < target_num: lucky_index = sample_replace(legit_index, target_num - actual_num) to_be_cloned = concatenate((to_be_cloned, lucky_index)) elif actual_num > target_num: lucky_index = sample_noreplace(legit_index, actual_num-target_num) to_be_removed = concatenate((to_be_removed, lucky_index)) action_num = lucky_index.size else: error_log += "There is nothing to sample from %s and no action will happen for" % self.dataset.get_dataset_name() + \ ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n' else: ## sum accounting attribute for agents with indicator = True; ## assume dataset_accouting_attribute is a primary attribute accounting = self.dataset.get_attribute(self.dataset_accounting_attribute) * indicator actual_num = accounting.sum() mean_size = float(actual_num) / indicator.sum() action_num = 0 diff = target_num - actual_num if actual_num != target_num: legit_index = where(logical_and(indicator, filter_indicator))[0] if legit_index.size > 0: while actual_num + action_num < target_num: lucky_index = sample_replace(legit_index, ceil((target_num - actual_num - action_num)/mean_size) ) action_num += accounting[lucky_index].sum() to_be_cloned = concatenate((to_be_cloned, lucky_index)) while actual_num - action_num > target_num: lucky_index = sample_noreplace(legit_index, ceil((actual_num - target_num - action_num)/mean_size) ) action_num += accounting[lucky_index].sum() to_be_removed = concatenate((to_be_removed, lucky_index)) else: error_log += "There is nothing to sample from %s and no action will happen for " % self.dataset.get_dataset_name() + \ ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n' ##log status action = "0" if lucky_index is not None: if actual_num < target_num: action = "+" + str(action_num) if actual_num > target_num: action = "-" + str(action_num) cat = [ str(criterion[col]) for col in column_names] cat += [str(actual_num), str(target_num), str(diff), action] if PrettyTable is not None: status_log.add_row(cat) else: logger.log_status("\t".join(cat)) if PrettyTable is not None: logger.log_status("\n" + status_log.get_string()) if error_log: logger.log_error(error_log) clone_data = {} if to_be_cloned.size > 0: ### ideally duplicate_rows() is all needed to add newly cloned rows ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data ##self.dataset.duplicate_rows(to_be_cloned) logger.log_status() for attribute in dataset_known_attributes: if reset_dataset_attribute_value.has_key(attribute): clone_data[attribute] = resize(array(reset_dataset_attribute_value[attribute]), to_be_cloned.size) else: clone_data[attribute] = self.dataset.get_attribute_by_index(attribute, to_be_cloned) self.post_run(self.dataset, to_be_cloned, to_be_removed, **kwargs) if to_be_removed.size > 0: logger.log_status() self.dataset.remove_elements(to_be_removed) if clone_data: self.dataset.add_elements(data=clone_data, change_ids_if_not_unique=True) return self.dataset
def run(self, dataset, outcome_attribute, weight_attribute, control_totals, current_year, control_total_attribute=None, year_attribute='year', capacity_attribute=None, add_quantity=False, dataset_pool=None): """'dataset' is a Dataset for which a quantity 'outcome_attribute' is created. The total amount of the quantity is given by the attribute 'control_total_attribute' of the 'control_totals' Dataset. If it is not given, it is assumed to have the same name as 'outcome_attribute'. The 'weight_attribute' of 'dataset' determines the allocation weights. The 'control_totals' Dataset contains an attribute 'year' (or alternatively, an attribute given by the 'year_attribute' argument) and optionally other attributes that must be known to the 'dataset' (such as a geography). For each row of the control_totals dataset for which year matches the 'current_year', the total amount is distributed among the corresponding members of 'dataset' according to weights. If a 'capacity_attribute' is given (attribute of 'dataset'), the algorithm removes any allocations that exceeds the capacity and redistributes it among remaining members. The resulting values are appended to 'dataset' as 'outcome_attribute' (as primary attribute). If add_quantity is True and the 'outcome_attribute' exists in dataset, the resulting values are added to the current values of 'outcome_attribute'. """ ct_attr = control_totals.get_known_attribute_names() if year_attribute not in ct_attr: raise StandardError, "Year attribute '%s' must be a known attribute of the control totals dataset." % year_attribute ct_attr.remove(year_attribute) if control_total_attribute is None: control_total_attribute = outcome_attribute if control_total_attribute not in ct_attr: raise StandardError, "Attribute '%s' must be a known attribute of the control totals dataset." % control_total_attribute ct_attr.remove(control_total_attribute) if control_totals._is_hidden_id(): ct_attr.remove(control_totals.id_name()[0]) # compute weights and other attributes necessary for allocation attrs_to_compute = [weight_attribute] + ct_attr if capacity_attribute is not None: attrs_to_compute.append(capacity_attribute) for attr in attrs_to_compute: try: dataset.compute_variables(attr, dataset_pool=dataset_pool) except: dataset.compute_one_variable_with_unknown_package( attr, dataset_pool=dataset_pool) # create subset of control totals for the current year year_index = where( control_totals.get_attribute(year_attribute) == current_year)[0] if year_index.size <= 0: logger.log_warning("No control total for year %s" % current_year) return None control_totals_for_this_year = DatasetSubset(control_totals, year_index) # check capacity if capacity_attribute is not None: if dataset.get_attribute(capacity_attribute).sum( ) < control_totals_for_this_year.get_attribute( control_total_attribute).sum(): logger.log_warning( "Capacity (%s) is smaller than the amount to allocate (%s)." % (dataset.get_attribute(capacity_attribute).sum(), control_totals_for_this_year.get_attribute( control_total_attribute).sum())) C = dataset.get_attribute(capacity_attribute).astype('int32') all_weights = dataset.get_attribute(weight_attribute) outcome = zeros(dataset.size(), dtype='int32') for ct_row in range(control_totals_for_this_year.size()): is_considered = ones(dataset.size(), dtype='bool8') for characteristics in ct_attr: is_considered = logical_and( is_considered, dataset.get_attribute(characteristics) == control_totals_for_this_year.get_attribute( characteristics)[ct_row]) T = control_totals_for_this_year.get_attribute( control_total_attribute)[ct_row] it = 1 while True: is_considered_idx = where(is_considered)[0] weights = all_weights[is_considered_idx] weights_sum = float(weights.sum()) outcome[is_considered_idx] = round_( outcome[is_considered_idx] + T * (weights / weights_sum)).astype('int32') if capacity_attribute is None: break diff = outcome[is_considered_idx] - C[is_considered_idx] outcome[is_considered_idx] = clip(outcome[is_considered_idx], 0, C[is_considered_idx]) if it == 1 and C[is_considered_idx].sum() < T: logger.log_warning( "Control total %s cannot be met due to a capacity restriction of %s" % (T, C[is_considered_idx].sum())) T = where(diff < 0, 0, diff).sum() if T <= 0: break is_considered = logical_and(is_considered, outcome < C) it += 1 if add_quantity and (outcome_attribute in dataset.get_known_attribute_names()): dataset.modify_attribute(name=outcome_attribute, data=outcome + dataset.get_attribute(outcome_attribute)) logger.log_status( 'New values added to the attribute %s of dataset %s.' % (outcome_attribute, dataset.get_dataset_name())) else: dataset.add_primary_attribute(name=outcome_attribute, data=outcome) logger.log_status( 'New values stored into attribute %s of dataset %s.' % (outcome_attribute, dataset.get_dataset_name())) dataset.flush_attribute(outcome_attribute) return outcome
def run(self, n=500, realestate_dataset_name = 'building', current_year=None, occupied_spaces_variable="occupied_spaces", total_spaces_variable="total_spaces", minimum_spaces_attribute="minimum_spaces", within_parcel_selection_weight_string=None, within_parcel_selection_n=0, within_parcel_selection_compete_among_types=False, within_parcel_selection_threshold=75, within_parcel_selection_MU_same_weight=False, within_parcel_selection_transpose_interpcl_weight=True, run_config=None, debuglevel=0): """ run method of the Development Project Proposal Sampling Model **Parameters** **n** : int, sample size for each iteration sample n proposals at a time, which are then evaluated one by one until the target vacancies are satisfied or proposals are running out **realestate_dataset_name** : string, name of real estate dataset **current_year**: int, simulation year. If None, get value from SimulationState **occupied_spaces_variable** : string, variable name for calculating how much spaces are currently occupied It can either be a variable for real_estate dataset that returns the amount spaces being occupied or a target_vacancy attribute that contains the name of real_estate variables. **total_spaces_variable** : string, variable name for calculating total existing spaces **Returns** **proposal_set** : indices to proposal_set that are accepted **demolished_buildings** : buildings to be demolished for re-development """ self.accepted_proposals = [] self.demolished_buildings = [] #id of buildings to be demolished if self.proposal_set.n <= 0: logger.log_status("The size of proposal_set is 0; no proposals to consider, skipping DPPSM.") return (self.proposal_set, self.demolished_buildings) target_vacancy = self.dataset_pool.get_dataset('target_vacancy') if current_year is None: year = SimulationState().get_current_time() else: year = current_year this_year_index = where(target_vacancy['year']==year)[0] target_vacancy_for_this_year = DatasetSubset(target_vacancy, this_year_index) if target_vacancy_for_this_year.size() == 0: raise IOError, 'No target vacancy defined for year %s.' % year ## current_target_vacancy.target_attribute_name = 'target_vacancy_rate' ## each column provides a category for which a target vacancy is specified self.column_names = list(set( target_vacancy.get_known_attribute_names() ) - \ set( [ target_vacancy.target_attribute_name, 'year', '_hidden_id_', minimum_spaces_attribute, occupied_spaces_variable, total_spaces_variable ] ) ) self.column_names.sort(reverse=True) ## buildings table provides existing stocks self.realestate_dataset = self.dataset_pool.get_dataset(realestate_dataset_name) occupied_spaces_variables = [occupied_spaces_variable] total_spaces_variables = [total_spaces_variable] if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(): occupied_spaces_variables += unique(target_vacancy_for_this_year[occupied_spaces_variable]).tolist() if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(): total_spaces_variables += unique(target_vacancy_for_this_year[total_spaces_variable]).tolist() self._compute_variables_for_dataset_if_needed(self.realestate_dataset, self.column_names + occupied_spaces_variables + total_spaces_variables) self._compute_variables_for_dataset_if_needed(self.proposal_component_set, self.column_names + total_spaces_variables) self.proposal_set.compute_variables(["urbansim_parcel.development_project_proposal.number_of_components", "urbansim_parcel.development_project_proposal.land_area_taken"], dataset_pool=self.dataset_pool) n_column = len(self.column_names) self.column_names_index = {} for iname in range(n_column): self.column_names_index[self.column_names[iname]] = iname target_vacancy_for_this_year.column_values = target_vacancy_for_this_year.get_multiple_attributes(self.column_names).reshape((-1, n_column)) self.realestate_dataset.column_values = self.realestate_dataset.get_multiple_attributes(self.column_names).reshape((-1, n_column)) self.proposal_component_set.column_values = self.proposal_component_set.get_multiple_attributes(self.column_names).reshape((-1, n_column)) #defaults, can be changed later by spaces_variable specified in target_vacancy rates self.realestate_dataset.total_spaces = self.realestate_dataset[total_spaces_variable] self.proposal_component_set.total_spaces = self.proposal_component_set[total_spaces_variable] self.realestate_dataset.occupied_spaces = self.realestate_dataset[occupied_spaces_variable] self.accounting = {}; self.logging = {} #has_needed_components = zeros(self.proposal_set.size(), dtype='bool') for index in range(target_vacancy_for_this_year.size()): column_value = tuple(target_vacancy_for_this_year.column_values[index,:].tolist()) accounting = {'target_vacancy': target_vacancy_for_this_year[target_vacancy.target_attribute_name][index]} if minimum_spaces_attribute in target_vacancy_for_this_year.get_known_attribute_names(): accounting['minimum_spaces'] = target_vacancy_for_this_year[minimum_spaces_attribute][index] realestate_indexes = self.get_index_by_condition(self.realestate_dataset.column_values, column_value) component_indexes = self.get_index_by_condition(self.proposal_component_set.column_values, column_value) this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable ## total/occupied_spaces_variable can be specified either as a universal name for all realestate ## or in targe_vacancy_rate dataset for each vacancy category if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(): this_occupied_spaces_variable = target_vacancy_for_this_year[occupied_spaces_variable][index] self.realestate_dataset.occupied_spaces[realestate_indexes] = (self.realestate_dataset[this_occupied_spaces_variable][realestate_indexes] ).astype(self.realestate_dataset.occupied_spaces.dtype) if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(): this_total_spaces_variable = target_vacancy_for_this_year[total_spaces_variable][index] self.realestate_dataset.total_spaces[realestate_indexes] = (self.realestate_dataset[this_total_spaces_variable][realestate_indexes] ).astype(self.realestate_dataset.total_spaces.dtype) self.proposal_component_set.total_spaces[component_indexes] = (self.proposal_component_set[this_total_spaces_variable][component_indexes] ).astype(self.proposal_component_set.total_spaces.dtype) accounting["total_spaces_variable"] = this_total_spaces_variable accounting["total_spaces"] = self.realestate_dataset.total_spaces[realestate_indexes].sum() accounting["occupied_spaces_variable"] = this_occupied_spaces_variable accounting["occupied_spaces"] = self.realestate_dataset.occupied_spaces[realestate_indexes].sum() accounting["target_spaces"] = int( round( accounting["occupied_spaces"] /\ (1 - accounting["target_vacancy"]) ) ) accounting["proposed_spaces"] = 0 accounting["demolished_spaces"] = 0 self.accounting[column_value] = accounting if self._is_target_reached(column_value): proposal_indexes = self.proposal_set.get_id_index(unique(self.proposal_component_set['proposal_id'][component_indexes])) if n_column == 1: comp_indexes = where(ndimage.sum(self.proposal_component_set[self.column_names[0]]==column_value[0], labels=self.proposal_component_set['proposal_id'], index=self.proposal_set.get_id_attribute() ) == self.proposal_set["number_of_components"])[0] else: comp_indexes = where(self.proposal_set["number_of_components"]==1)[0] target_reached_prop_idx = intersect1d(proposal_indexes, comp_indexes) self.weight[target_reached_prop_idx] = 0.0 self.proposal_set["status_id"][intersect1d(target_reached_prop_idx, where(self.proposal_set["status_id"]==self.proposal_set.id_tentative)[0])] = self.proposal_set.id_no_demand ## handle planned proposals: all proposals with status_id == is_planned ## and start_year == year are accepted planned_proposal_indexes = where(logical_and( self.proposal_set.get_attribute("status_id") == self.proposal_set.id_planned, self.proposal_set.get_attribute("start_year") == year ) )[0] logger.start_block("Processing %s planned proposals" % planned_proposal_indexes.size) self.consider_proposals(planned_proposal_indexes, force_accepting=True) logger.end_block() if within_parcel_selection_n > 0: logger.start_block("Selecting proposals within parcels (%s proposals per parcel)" % within_parcel_selection_n) self.select_proposals_within_parcels(nmax=within_parcel_selection_n, weight_string=within_parcel_selection_weight_string, compete_among_types=within_parcel_selection_compete_among_types, filter_threshold=within_parcel_selection_threshold, MU_same_weight=within_parcel_selection_MU_same_weight, transpose_interpcl_weight=within_parcel_selection_transpose_interpcl_weight) logger.end_block() # consider proposals (in this order: proposed, tentative) for status in [self.proposal_set.id_proposed, self.proposal_set.id_tentative]: stat = (self.proposal_set.get_attribute("status_id") == status) if stat.sum() == 0: continue logger.log_status("Sampling from %s eligible proposals of status %s." % (stat.sum(), status)) iteration = 0 while (not self._is_target_reached()): ## prevent proposals from being sampled for vacancy type whose target is reached #for column_value in self.accounting.keys(): if self.weight[stat].sum() == 0.0: logger.log_warning("Running out of proposals of status %s before vacancy targets are reached; there aren't any proposals with non-zero weight" % status) break available_indexes = where(logical_and(stat, self.weight > 0))[0] sample_size = minimum(available_indexes.size, n) sampled_proposal_indexes = probsample_noreplace(available_indexes, sample_size, prob_array=self.weight[available_indexes], return_index=False) #sorted_sampled_indices = argsort(self.weight[sampled_proposal_indexes]) #self.consider_proposals(sampled_proposal_indexes[sorted_sampled_indices][::-1]) self.consider_proposals(sampled_proposal_indexes) self.weight[sampled_proposal_indexes] = 0 iteration += 1 self._log_status() # set status of accepted proposals to 'active' self.proposal_set.modify_attribute(name="status_id", data=self.proposal_set.id_active, index=array(self.accepted_proposals, dtype='int32')) # Code added by Jesse Ayers, MAG, 7/20/2009 # Get the active projects: stat_id = self.proposal_set.get_attribute('status_id') actv = where(stat_id==1)[0] # Where there are active projects, compute the total_land_area_taken # and store it on the development_project_proposals dataset # so it can be used by the building_construction_model for the proper # computation of units_proposed for those projects with velocity curves if actv.size > 0: total_land_area_taken_computed = self.proposal_set['land_area_taken'] self.proposal_set.modify_attribute('total_land_area_taken', total_land_area_taken_computed[actv], actv) return (self.proposal_set, self.realestate_dataset.get_id_attribute()[self.demolished_buildings])
def run(self, realestate_dataset, living_units_dataset, year=None, occupied_spaces_variable="occupied_units", total_spaces_variable="total_units", target_attribute_name='target_vacancy_rate', sample_from_dataset = None, living_units_from_dataset = None, sample_filter="", reset_attribute_value={}, year_built = 'year_built', dataset_pool=None, append_to_realestate_dataset = False, table_name = "development_projects", dataset_name = "development_project", id_name = 'development_project_id', **kwargs): """ sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning append_to_realestate_dataset - whether to append the new dataset to realestate_dataset """ if self.target_vancy_dataset is None: raise RuntimeError, "target_vacancy_rate dataset is unspecified." if not sample_from_dataset or not living_units_from_dataset: logger.log_note('No development projects or no living units of development projects to sample from. Development projects are taken from building dataset and thus living units from living_units dataset.') sample_from_dataset = realestate_dataset living_units_from_dataset = living_units_dataset if dataset_pool is None: dataset_pool = SessionConfiguration().get_dataset_pool() if year is None: year = SimulationState().get_current_time() this_year_index = where(self.target_vancy_dataset.get_attribute('year')==year)[0] target_vacancy_for_this_year = DatasetSubset(self.target_vancy_dataset, this_year_index) column_names = list(set( self.target_vancy_dataset.get_known_attribute_names() ) - set( [ target_attribute_name, occupied_spaces_variable, total_spaces_variable, 'year', '_hidden_id_'] )) column_names.sort(reverse=True) column_values = dict([ (name, target_vacancy_for_this_year.get_attribute(name)) for name in column_names + [target_attribute_name]]) independent_variables = list(set([re.sub('_max$', '', re.sub('_min$', '', col)) for col in column_names])) sample_dataset_known_attributes = sample_from_dataset.get_known_attribute_names() for attribute in independent_variables: if attribute not in sample_dataset_known_attributes: sample_from_dataset.compute_one_variable_with_unknown_package(attribute, dataset_pool=dataset_pool) sample_dataset_known_attributes = sample_from_dataset.get_known_attribute_names() #update after compute if sample_filter: short_name = VariableName(sample_filter).get_alias() if short_name not in sample_dataset_known_attributes: filter_indicator = sample_from_dataset.compute_variables(sample_filter, dataset_pool=dataset_pool) else: filter_indicator = sample_from_dataset.get_attribute(short_name) else: filter_indicator = 1 sampled_index = array([], dtype=int32) #log header if PrettyTable is not None: status_log = PrettyTable() status_log.set_field_names(column_names + ["actual", "target", "expected", "difference", "action"]) else: logger.log_status("\t".join(column_names + ["actual", "target", "expected", "difference", "action"])) error_log = '' for index in range(target_vacancy_for_this_year.size()): sample_indicator = ones( sample_from_dataset.size(), dtype='bool' ) criterion = {} # for logging for attribute in independent_variables: if attribute in sample_dataset_known_attributes: sample_attribute = sample_from_dataset.get_attribute(attribute) else: raise ValueError, "attribute %s used in target vacancy dataset can not be found in dataset %s" % (attribute, realestate_dataset.get_dataset_name()) if attribute + '_min' in column_names: amin = target_vacancy_for_this_year.get_attribute(attribute+'_min')[index] criterion.update({attribute + '_min':amin}) if amin != -1: sample_indicator *= sample_attribute >= amin if attribute + '_max' in column_names: amax = target_vacancy_for_this_year.get_attribute(attribute+'_max')[index] criterion.update({attribute + '_max':amax}) if amax != -1: sample_indicator *= sample_attribute <= amax if attribute in column_names: aval = column_values[attribute][index] criterion.update({attribute:aval}) if aval == -1: continue elif aval == -2: ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column sample_indicator *= logical_not(ismember(sample_attribute, column_values[attribute])) else: sample_indicator *= sample_attribute == aval this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable ## total/occupied_spaces_variable can be specified either as a universal name for all realestate ## or in targe_vacancy_rate dataset for each vacancy category if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(): this_occupied_spaces_variable = target_vacancy_for_this_year.get_attribute(occupied_spaces_variable)[index] if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(): this_total_spaces_variable = target_vacancy_for_this_year.get_attribute(total_spaces_variable)[index] this_total_spaces_variable += '_' + str(criterion[col]) this_occupied_spaces_variable += '_' + str(criterion[col]) logger.be_quiet() #temporarily disable logging realestate_dataset.compute_one_variable_with_unknown_package(this_occupied_spaces_variable, dataset_pool=dataset_pool) realestate_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool) sample_from_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool) logger.talk() actual_num = (realestate_dataset.get_attribute(this_total_spaces_variable)).sum() #target_num is obsolete with this version. target_num = int(round( (realestate_dataset.get_attribute(this_occupied_spaces_variable)).sum() /\ (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]))) '''If the target vacancy is very small and the inflow to the region big it is not enough to check only the current simulation year's vacancy. The simulation is more robust if the BTM is anticipating the next year's population (of households and jobs). This version calculates the non residential spaces based on sqft requirements of jobs per sector. #TODO: Make code more general to cover various stratifications in the real estate market. ''' if criterion[col] == 0: """ Option without demography model idx = where(self.control_totals.get_attribute("year")==year + 1)[0] this_years_control_totals = DatasetSubset(self.control_totals, idx) expected_num = int(round( this_years_control_totals.get_attribute('total_number_of_households').sum() /\ (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])))""" hh_dataset = dataset_pool.get_dataset( 'household' ) number_of_hh = hh_dataset.size() expected_num = int(round( number_of_hh /\ (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]))) if criterion[col] > 0: # Getting control totals per sector in a dictionary idx = where(self.employment_control_totals.get_attribute("year")==year)[0] # Create index to get the subset of control totals for the next simulation year. this_years_control_totals = DatasetSubset(self.employment_control_totals, idx) # Create the subset of control totals. idx_non_home_based = where(logical_and(this_years_control_totals['home_based_status'] == 0,this_years_control_totals['sector_id'] == criterion[col]))[0] # Create index of non home based control totals in current sector. Only non home based jobs are supported. TODO: Support home based jobs. this_years_control_totals = DatasetSubset(this_years_control_totals, idx_non_home_based) # idx_current_sector = where(this_years_control_totals['sector_id'] == criterion[col])[0] next_years_jobs = this_years_control_totals['number_of_jobs'] controled_sectors = this_years_control_totals['sector_id'] sector_job_totals = dict(zip(controled_sectors, next_years_jobs.T)) # creating dictionary with sector id's as key and number of jobs as values to ensure multiplication with right requiremtents. # Getting infos on required sqft per sector. # a_zone_id = min(self.building_sqft_per_job['zone_id']) # Get a zone number from the definition table. Here choose to take the minimum which is arbitrary. This code assumes constant sqft requirements in all zones. TODO: Support different sqft requirements per zone. # idx_zone = where(self.building_sqft_per_job['zone_id'] == a_zone_id)[0] # subset_sqft_per_job = DatasetSubset(self.building_sqft_per_job, idx_zone) # sqft_per_job = subset_sqft_per_job['building_sqft_per_job'] # sectors_with_requirements = subset_sqft_per_job['sector_id'] # requirements_by_sector = dict(zip(sectors_with_requirements, sqft_per_job.T)) # # needed_sqft_over_all_sectors = sector_job_totals[criterion[col]] * requirements_by_sector[criterion[col]] # expected_num = int(round( needed_sqft_over_all_sectors /\ # (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]))) idx_sector = where(self.sectors['sector_id'] == criterion[col]) subset_sqft_per_job_sector = DatasetSubset(self.sectors, idx_sector) needed_sqft_current_sector = sector_job_totals[criterion[col]] * subset_sqft_per_job_sector.get_attribute('sqm_per_job') expected_num = int(round( needed_sqft_current_sector /\ (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]))) diff = expected_num - actual_num #Previous version which is checking the current years occupation. #diff = target_num - actual_num this_sampled_index = array([], dtype=int32) if diff > 0: total_spaces_in_sample_dataset = sample_from_dataset.get_attribute(this_total_spaces_variable) legit_index = where(logical_and(sample_indicator, filter_indicator) * total_spaces_in_sample_dataset > 0)[0] if legit_index.size > 0: mean_size = total_spaces_in_sample_dataset[legit_index].mean() num_of_projects_to_sample = int( diff / mean_size ) ##sampled at least 1 project when diff > 0, otherwise it is a endless loop when num_of_projects_to_sample = 0 num_of_projects_to_sample = num_of_projects_to_sample if num_of_projects_to_sample > 0 else 1 while total_spaces_in_sample_dataset[this_sampled_index].sum() < diff: lucky_index = sample_replace(legit_index, num_of_projects_to_sample) this_sampled_index = concatenate((this_sampled_index, lucky_index)) this_sampled_index = this_sampled_index[0:(1+searchsorted(cumsum(total_spaces_in_sample_dataset[this_sampled_index]), diff))] sampled_index = concatenate((sampled_index, this_sampled_index)) else: error_log += "There is nothing to sample from %s and no new development will happen for " % sample_from_dataset.get_dataset_name() + \ ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n' #if diff < 0: #TODO demolition; not yet supported ##log status action = "0" if this_sampled_index.size > 0: action_num = total_spaces_in_sample_dataset[this_sampled_index].sum() if diff > 0: action = "+" + str(action_num) if diff < 0: action = "-" + str(action_num) cat = [ str(criterion[col]) for col in column_names] cat += [str(actual_num), str(target_num), str(expected_num), str(diff), action] if PrettyTable is not None: status_log.add_row(cat) else: logger.log_status("\t".join(cat)) if PrettyTable is not None: logger.log_status("\n" + status_log.get_string()) if error_log: logger.log_error(error_log) #logger.log_note("Updating attributes of %s sampled development events." % sampled_index.size) result_data = {} result_dataset = None index = array([], dtype='int32') if sampled_index.size > 0: ### ideally duplicate_rows() is all needed to add newly cloned rows ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data ##realestate_dataset.duplicate_rows(sampled_index) #result_data.setdefault(year_built, resize(year, sampled_index.size).astype('int32')) # Reset the year_built attribute. Uncommented because it is overwritten in the for loop afterwards. ## also add 'independent_variables' to the new dataset for attribute in set(sample_from_dataset.get_primary_attribute_names() + independent_variables): if reset_attribute_value.has_key(attribute): result_data[attribute] = resize(array(reset_attribute_value[attribute]), sampled_index.size) else: result_data[attribute] = sample_from_dataset.get_attribute_by_index(attribute, sampled_index) # Reset the year_built attribute. result_data['year_built'] = resize(year, sampled_index.size).astype('int32') # TODO: Uncomment the following three lines to reset land_area, tax_exempt, zgde. Test still to be done. parcel_id should be changed by location choice model. #result_data['land_area'] = resize(-1, sampled_index.size).astype('int32') #result_data['tax_exempt'] = resize(-1, sampled_index.size).astype('int32') #result_data['zgde'] = resize(-1, sampled_index.size).astype('int32') if id_name and result_data and id_name not in result_data: result_data[id_name] = arange(sampled_index.size, dtype='int32') + 1 storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name=table_name, table_data=result_data) result_dataset = Dataset(id_name = id_name, in_storage = storage, in_table_name = table_name, dataset_name = dataset_name ) index = arange(result_dataset.size()) if append_to_realestate_dataset: if len(result_data) > 0: logger.start_block('Appending development events and living units') logger.log_note("Append %d sampled development events to real estate dataset." % len(result_data[result_data.keys()[0]])) index = realestate_dataset.add_elements(result_data, require_all_attributes=False, change_ids_if_not_unique=True) logger.start_block('Creating id mapping') # remember the ids from the development_event_history dataset. mapping_new_old = self.get_mapping_of_old_ids_to_new_ids(result_data, realestate_dataset, index) logger.end_block() '''Getting living units associated to selected development events by iterating over the mapping dictionary and selecting each time all the living units according to the old building ids. The living units are then added to selected_living_units_dict which is then added to living_units dataset. A dictionary is needed to use the add_elements method. Creating a dictionary also clones the records. The subset is only a view on the original table.''' selected_living_units_dict = {} counter = 0 for new_id in mapping_new_old: if counter == 0: logger.log_note("Log assignment of every 100th development event") counter +=1 if counter % 100 == 0: logger.log_note("Assembling living units for development event %s" % new_id) sel_index = [i for i in range(0, len(living_units_from_dataset['building_id'])) if living_units_from_dataset['building_id'][i] == mapping_new_old[new_id]] living_units_this_sampled_building = DatasetSubset(living_units_from_dataset, sel_index) if len(selected_living_units_dict) == 0: logger.start_block('Assign new building id') for attribute_name in living_units_this_sampled_building.get_primary_attribute_names(): column = living_units_this_sampled_building.get_attribute(attribute_name) if attribute_name == 'building_id': new_ids = array(living_units_this_sampled_building.size() * [new_id], dtype=int32) selected_living_units_dict.update({attribute_name: new_ids}) else: selected_living_units_dict.update({attribute_name: column}) logger.end_block() else: this_living_units_dict ={} for attribute_name in living_units_this_sampled_building.get_primary_attribute_names(): column = living_units_this_sampled_building.get_attribute(attribute_name) if attribute_name == 'building_id': new_ids = array(living_units_this_sampled_building.size() * [new_id], dtype=int32) this_living_units_dict.update({attribute_name: new_ids}) else: this_living_units_dict.update({attribute_name: column}) for attribute_name in living_units_this_sampled_building.get_primary_attribute_names(): selected_living_units_dict[attribute_name] = concatenate([selected_living_units_dict[attribute_name], this_living_units_dict[attribute_name]]) # Reset year_built attribute of living units selected_living_units_dict['year_built'] = resize(year, len(selected_living_units_dict['year_built'])).astype('int32') # TODO: Uncomment the following two lines to reset rent_price, zgde. Test still to be done # selected_living_units_dict['rent_price'] = resize(-1, len(selected_living_units_dict['rent_price'])).astype('int32') # selected_living_units_dict['zgde'] = resize(-1, len(selected_living_units_dict['zgde'])).astype('int32') index_units = living_units_dataset.add_elements(selected_living_units_dict, require_all_attributes=False, change_ids_if_not_unique=True) # Check consistency of buildings and living units. All living units must belong to a building if SimulationState().get_current_time() - SimulationState().get_start_time() == 1: for building_id in living_units_dataset['building_id']: if building_id not in realestate_dataset['building_id']: logger.log_warning('Living unit with building_id %d has no corresponding building.' % (building_id)) # Uncomment next line to enforce consistency of living units and building dataset. Then you may uncomment the two previous lines. # assert(building_id in realestate_dataset['building_id']), 'Living unit with building_id %d has no corresponding building.' % (building_id) result_dataset = realestate_dataset logger.end_block() # It is recommended to derive all variables of buildings in relation to living units via expression variables. # However, if the building dataset contains attributes derived from living units these attributes should be consistent # with the living units table. Below an example. # Residential_units attribute of each building should be consistent with the number of living units associated. # self.check_consistency_of_living_units_per_building(realestate_dataset, living_units_dataset, mapping_new_old) return (result_dataset, index)
def run( self, realestate_dataset, year=None, occupied_spaces_variable="occupied_units", total_spaces_variable="total_units", target_attribute_name="target_vacancy_rate", sample_from_dataset=None, sample_filter="", reset_attribute_value={}, year_built="year_built", dataset_pool=None, append_to_realestate_dataset=False, table_name="development_projects", dataset_name="development_project", id_name="development_project_id", **kwargs ): """ sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning append_to_realestate_dataset - whether to append the new dataset to realestate_dataset """ if self.target_vancy_dataset is None: raise RuntimeError, "target_vacancy_rate dataset is unspecified." if not sample_from_dataset: sample_from_dataset = realestate_dataset # if dataset_pool is None: # dataset_pool = SessionConfiguration().get_dataset_pool() if year is None: year = SimulationState().get_current_time() this_year_index = where(self.target_vancy_dataset.get_attribute("year") == year)[0] target_vacancy_for_this_year = DatasetSubset(self.target_vancy_dataset, this_year_index) column_names = list( set(self.target_vancy_dataset.get_known_attribute_names()) - set([target_attribute_name, occupied_spaces_variable, total_spaces_variable, "year", "_hidden_id_"]) ) column_names.sort(reverse=True) column_values = dict( [ (name, target_vacancy_for_this_year.get_attribute(name)) for name in column_names + [target_attribute_name] ] ) independent_variables = list(set([re.sub("_max$", "", re.sub("_min$", "", col)) for col in column_names])) dataset_known_attributes = realestate_dataset.get_known_attribute_names() sample_dataset_known_attributes = sample_from_dataset.get_known_attribute_names() for variable in independent_variables: if variable not in dataset_known_attributes: realestate_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool) if variable not in sample_dataset_known_attributes: sample_from_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool) dataset_known_attributes = realestate_dataset.get_known_attribute_names() # update after compute if sample_filter: short_name = VariableName(sample_filter).get_alias() if short_name not in dataset_known_attributes: filter_indicator = sample_from_dataset.compute_variables(sample_filter, dataset_pool=dataset_pool) else: filter_indicator = sample_from_dataset.get_attribute(short_name) else: filter_indicator = 1 sampled_index = array([], dtype=int32) # log header if PrettyTable is not None: status_log = PrettyTable() status_log.set_field_names(column_names + ["actual", "target", "expected", "difference", "action"]) else: logger.log_status("\t".join(column_names + ["actual", "target", "expected", "difference", "action"])) error_log = "" for index in range(target_vacancy_for_this_year.size()): this_sampled_index = array([], dtype=int32) indicator = ones(realestate_dataset.size(), dtype="bool") sample_indicator = ones(sample_from_dataset.size(), dtype="bool") criterion = {} # for logging for attribute in independent_variables: if attribute in dataset_known_attributes: dataset_attribute = realestate_dataset.get_attribute(attribute) sample_attribute = sample_from_dataset.get_attribute(attribute) else: raise ValueError, "attribute %s used in target vacancy dataset can not be found in dataset %s" % ( attribute, realestate_dataset.get_dataset_name(), ) if attribute + "_min" in column_names: amin = target_vacancy_for_this_year.get_attribute(attribute + "_min")[index] criterion.update({attribute + "_min": amin}) if amin != -1: indicator *= dataset_attribute >= amin sample_indicator *= sample_attribute >= amin if attribute + "_max" in column_names: amax = target_vacancy_for_this_year.get_attribute(attribute + "_max")[index] criterion.update({attribute + "_max": amax}) if amax != -1: indicator *= dataset_attribute <= amax sample_indicator *= sample_attribute <= amax if attribute in column_names: aval = column_values[attribute][index] criterion.update({attribute: aval}) if aval == -1: continue elif ( aval == -2 ): ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column indicator *= logical_not(ismember(dataset_attribute, column_values[attribute])) sample_indicator *= logical_not(ismember(sample_attribute, column_values[attribute])) else: indicator *= dataset_attribute == aval sample_indicator *= sample_attribute == aval this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable ## total/occupied_spaces_variable can be specified either as a universal name for all realestate ## or in targe_vacancy_rate dataset for each vacancy category if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(): this_occupied_spaces_variable = target_vacancy_for_this_year.get_attribute(occupied_spaces_variable)[ index ] if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(): this_total_spaces_variable = target_vacancy_for_this_year.get_attribute(total_spaces_variable)[index] this_total_spaces_variable += "_" + str(criterion[col]) this_occupied_spaces_variable += "_" + str(criterion[col]) logger.be_quiet() # temporarily disable logging realestate_dataset.compute_one_variable_with_unknown_package( this_occupied_spaces_variable, dataset_pool=dataset_pool ) realestate_dataset.compute_one_variable_with_unknown_package( this_total_spaces_variable, dataset_pool=dataset_pool ) sample_from_dataset.compute_one_variable_with_unknown_package( this_total_spaces_variable, dataset_pool=dataset_pool ) logger.talk() actual_num = (realestate_dataset.get_attribute(this_total_spaces_variable)).sum() # target_num is obsolete with this version. target_num = int( round( (realestate_dataset.get_attribute(this_occupied_spaces_variable)).sum() / (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]) ) ) """If the target vacancy is very small and the inflow to the region big it is not enough to check only the current simulation year's vacancy. The simulation is more robust if the BTM is anticipating the next year's population (of households and jobs). #TODO: Make code more general to cover various stratifications in the real estate market. """ if criterion[col] == 1: idx = where(self.control_totals.get_attribute("year") == year + 1)[0] this_years_control_totals = DatasetSubset(self.control_totals, idx) expected_num = int( round( this_years_control_totals.get_attribute("total_number_of_households").sum() / (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]) ) ) if criterion[col] == 0: idx = where(self.employment_control_totals.get_attribute("year") == year + 1)[0] next_years_control_totals = DatasetSubset(self.employment_control_totals, idx) expected_num = int( round( next_years_control_totals.get_attribute("number_of_jobs").sum() / (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]) ) ) diff = expected_num - actual_num # Previous version which is checking the current years occupation. # diff = target_num - actual_num if diff > 0: total_spaces_in_sample_dataset = sample_from_dataset.get_attribute(this_total_spaces_variable) legit_index = where( logical_and(sample_indicator, filter_indicator) * total_spaces_in_sample_dataset > 0 )[0] if legit_index.size > 0: mean_size = total_spaces_in_sample_dataset[legit_index].mean() num_of_projects_to_sample = int(diff / mean_size) ##sampled at least 1 project when diff > 0, otherwise it is a endless loop when num_of_projects_to_sample = 0 num_of_projects_to_sample = num_of_projects_to_sample if num_of_projects_to_sample > 0 else 1 while total_spaces_in_sample_dataset[this_sampled_index].sum() < diff: lucky_index = sample_replace(legit_index, num_of_projects_to_sample) this_sampled_index = concatenate((this_sampled_index, lucky_index)) this_sampled_index = this_sampled_index[ 0 : (1 + searchsorted(cumsum(total_spaces_in_sample_dataset[this_sampled_index]), diff)) ] sampled_index = concatenate((sampled_index, this_sampled_index)) else: error_log += ( "There is nothing to sample from %s and no new development will happen for " % sample_from_dataset.get_dataset_name() + ",".join([col + "=" + str(criterion[col]) for col in column_names]) + "\n" ) # if diff < 0: #TODO demolition; not yet supported ##log status action = "0" if this_sampled_index.size > 0: action_num = total_spaces_in_sample_dataset[this_sampled_index].sum() if diff > 0: action = "+" + str(action_num) if diff < 0: action = "-" + str(action_num) cat = [str(criterion[col]) for col in column_names] cat += [str(actual_num), str(target_num), str(expected_num), str(diff), action] if PrettyTable is not None: status_log.add_row(cat) else: logger.log_status("\t".join(cat)) if PrettyTable is not None: logger.log_status("\n" + status_log.get_string()) if error_log: logger.log_error(error_log) result_data = {} result_dataset = None index = array([], dtype="int32") if True: # sampled_index.size > 0: ### ideally duplicate_rows() is all needed to add newly cloned rows ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data ##realestate_dataset.duplicate_rows(sampled_index) result_data.setdefault(year_built, resize(year, sampled_index.size).astype("int32")) ## also add 'independent_variables' to the new dataset for attribute in set(sample_from_dataset.get_primary_attribute_names() + independent_variables): if reset_attribute_value.has_key(attribute): result_data[attribute] = resize(array(reset_attribute_value[attribute]), sampled_index.size) else: result_data[attribute] = sample_from_dataset.get_attribute_by_index(attribute, sampled_index) if id_name and result_data and id_name not in result_data: result_data[id_name] = arange(sampled_index.size, dtype="int32") + 1 storage = StorageFactory().get_storage("dict_storage") storage.write_table(table_name=table_name, table_data=result_data) result_dataset = Dataset( id_name=id_name, in_storage=storage, in_table_name=table_name, dataset_name=dataset_name ) index = arange(result_dataset.size()) if append_to_realestate_dataset: if len(result_data) > 0: index = realestate_dataset.add_elements( result_data, require_all_attributes=False, change_ids_if_not_unique=True ) result_dataset = realestate_dataset return (result_dataset, index)
class DevelopmentProjectTransitionModel( Model ): """ Creates development projects. Each development project is for a single type of development, e.g. 'industrial' or 'commercial'. This model creates enough development projects to match the desired vacancy rates, as defined in the target_vacancies table. It does not place any projects in locations; that is the job of the development project location choice models. The distribution of project sizes (amount of space, value of space) is determined by sampling from the projects in the development_event_history table. """ model_name = "Development Project Transition Model" def __init__( self, debuglevel=0 ): self.debug = DebugPrinter( debuglevel ) def pre_check( self, location_set, vacancy_table, types ): for ptype in types: self.check_for_space( location_set.get_attribute(self.variable_for_total_units[ptype])) self.check_target_vacancy_is_not_100_percent( vacancy_table.get_attribute( "target_total_vacancy")) def check_for_space( self, values ): """Check that this array of values sums to something > 0.""" self.do_check( "x > 0", array( [values.sum()] ) ) def check_target_vacancy_is_not_100_percent( self, value ): """Check that the target vacancy rate is not 100% (ratio == 1), because it doesn't make sense, and it also causes a divide by 0 error.""" self.do_check( "x < 1", value ) def run( self, vacancy_table, history_table, year, location_set, dataset_pool=None, resources=None ): self.dataset_pool=dataset_pool building_types = self.dataset_pool.get_dataset('building_type') target_vacancy_this_year = DatasetSubset(vacancy_table, index=where(vacancy_table.get_attribute("year")==year)[0]) building_type_ids = target_vacancy_this_year.get_attribute('building_type_id') building_type_idx = building_types.get_id_index(building_type_ids) self.used_building_types = DatasetSubset(building_types, index=building_type_idx) project_types = self.used_building_types.get_attribute('building_type_name') is_residential = self.used_building_types.get_attribute('is_residential') unit_names = where(is_residential, 'residential_units', 'non_residential_sqft') specific_unit_names = where(is_residential, 'residential_units', '_sqft') rates = target_vacancy_this_year.get_attribute('target_total_vacancy') self.project_units = {} self.project_specific_units = {} target_rates = {} for i in range(self.used_building_types.size()): self.project_units[project_types[i]] = unit_names[i] if is_residential[i]: self.project_specific_units[project_types[i]] = specific_unit_names[i] else: self.project_specific_units[project_types[i]] = "%s%s" % (project_types[i], specific_unit_names[i]) target_rates[building_type_ids[i]] = rates[i] self._compute_vacancy_and_total_units_variables(location_set, project_types, resources) self.pre_check( location_set, target_vacancy_this_year, project_types) projects = None for project_type_id, target_vacancy_rate in target_rates.iteritems(): # determine current-year vacancy rates project_type = building_types.get_attribute_by_id('building_type_name', project_type_id) vacant_units_sum = location_set.get_attribute(self.variable_for_vacancy[project_type]).sum() units_sum = float( location_set.get_attribute(self.variable_for_total_units[project_type]).sum() ) should_develop_units = int(round(max( 0, ( target_vacancy_rate * units_sum - vacant_units_sum ) / ( 1 - target_vacancy_rate ) ))) logger.log_status(project_type + ": vacant units: %d, should be vacant: %f, sum units: %d" % (vacant_units_sum, target_vacancy_rate * units_sum, units_sum)) if not should_develop_units: logger.log_note(("Will not build any " + project_type + " units, because the current vacancy of %d units\n" + "is more than the %d units desired for the vacancy rate of %f.") % (vacant_units_sum, target_vacancy_rate * units_sum, target_vacancy_rate)) #create projects if should_develop_units > 0: this_project = self._create_projects(should_develop_units, project_type, project_type_id, history_table, location_set, units_sum, resources) if projects is None: projects = this_project else: projects.join_by_rows(this_project, change_ids_if_not_unique=True) return projects def _compute_vacancy_and_total_units_variables(self, location_set, project_types, resources=None): compute_resources = Resources(resources) compute_resources.merge({"debug":self.debug}) self.variable_for_vacancy = {} self.variable_for_total_units = {} for ptype in project_types: self.variable_for_vacancy[ptype] = compute_resources.get( "%s_vacant_variable" % ptype, "urbansim_zone.%s.vacant_%s" % (location_set.get_dataset_name(), self.project_specific_units[ptype])) self.variable_for_total_units[ptype] = compute_resources.get( "%s_total_units_variable" % ptype, "%s.aggregate(urbansim_zone.building.total_%s)" % (location_set.get_dataset_name(), self.project_specific_units[ptype])) location_set.compute_variables([self.variable_for_vacancy[ptype], self.variable_for_total_units[ptype]], dataset_pool=self.dataset_pool, resources = compute_resources) def _create_projects(self, should_develop_units, project_type, project_type_id, history_table, location_set, units_sum, resources=None): history_values = history_table.get_attribute(self.project_units[project_type]) type_code_values = history_table.get_change_type_code_attribute(self.project_units[project_type]) # take only non-zero history values and those that don't represent demolished buildings history_values_without_zeros = history_values[logical_and( history_values > 0, type_code_values != DevelopmentEventTypeOfChange.DELETE)] mean_size = history_values_without_zeros.mean() idx = array( [], dtype="int32" ) # Ensure that there are some development projects to choose from. num_of_projects_to_select = max( 10, round_( should_develop_units / mean_size ) ) while True: idx = concatenate( ( idx, randint( 0, history_values_without_zeros.size, num_of_projects_to_select ) ) ) csum = history_values_without_zeros[idx].cumsum() idx1 = idx[csum <= should_develop_units] if idx1.size == 0: # at least one project should be selected idx = array([idx[0]], dtype="int32") else: idx = idx1 if csum[-1] >= should_develop_units: break data = {"residential_units": zeros( ( idx.size, ), dtype=int32), "non_residential_sqft": zeros( ( idx.size, ), dtype=int32), 'building_type_id': array(idx.size* [project_type_id]), "project_id": arange( idx.size ) + 1, "building_id": zeros( ( idx.size, ), dtype=int32)} data[self.project_units[project_type]]= history_values_without_zeros[idx] storage = StorageFactory().get_storage('dict_storage') development_projects_table_name = 'development_projects' storage.write_table(table_name=development_projects_table_name, table_data=data) return Dataset( in_storage = storage, in_table_name = development_projects_table_name, id_name='project_id' )
def run(self, n=500, realestate_dataset_name='building', current_year=None, occupied_spaces_variable="occupied_spaces", total_spaces_variable="total_spaces", run_config=None, debuglevel=0): """ run method of the Development Project Proposal Sampling Model **Parameters** **n** : int, sample size for each iteration sample n proposals at a time, which are then evaluated one by one until the target vacancies are satisfied or proposals are running out **realestate_dataset_name** : string, name of real estate dataset **current_year**: int, simulation year. If None, get value from SimulationState **occupied_spaces_variable** : string, variable name for calculating how much spaces are currently occupied It can either be a variable for real_estate dataset that returns the amount spaces being occupied or a target_vacancy attribute that contains the name of real_estate variables. **total_spaces_variable** : string, variable name for calculating total existing spaces **Returns** **proposal_set** : indices to proposal_set that are accepted **demolished_buildings** : buildings to be demolished for re-development """ self.accepted_proposals = [] self.demolished_buildings = [] #id of buildings to be demolished if self.proposal_set.n <= 0: logger.log_status( "The size of proposal_set is 0; no proposals to consider, skipping DPPSM." ) return (self.proposal_set, self.demolished_buildings) target_vacancy = self.dataset_pool.get_dataset('target_vacancy') if current_year is None: year = SimulationState().get_current_time() else: year = current_year this_year_index = where(target_vacancy['year'] == year)[0] target_vacancy_for_this_year = DatasetSubset(target_vacancy, this_year_index) if target_vacancy_for_this_year.size() == 0: raise IOError, 'No target vacancy defined for year %s.' % year ## current_target_vacancy.target_attribute_name = 'target_vacancy_rate' ## each column provides a category for which a target vacancy is specified self.column_names = list(set( target_vacancy.get_known_attribute_names() ) - \ set( [ target_vacancy.target_attribute_name, 'year', '_hidden_id_', occupied_spaces_variable, total_spaces_variable ] ) ) self.column_names.sort(reverse=True) ## buildings table provides existing stocks self.realestate_dataset = self.dataset_pool.get_dataset( realestate_dataset_name) occupied_spaces_variables = [occupied_spaces_variable] total_spaces_variables = [total_spaces_variable] if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names( ): occupied_spaces_variables += unique( target_vacancy_for_this_year[occupied_spaces_variable]).tolist( ) if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names( ): total_spaces_variables += unique( target_vacancy_for_this_year[total_spaces_variable]).tolist() self._compute_variables_for_dataset_if_needed( self.realestate_dataset, self.column_names + occupied_spaces_variables + total_spaces_variables) self._compute_variables_for_dataset_if_needed( self.proposal_component_set, self.column_names + total_spaces_variables) self.proposal_set.compute_variables( "urbansim_parcel.development_project_proposal.number_of_components", dataset_pool=self.dataset_pool) n_column = len(self.column_names) target_vacancy_for_this_year.column_values = target_vacancy_for_this_year.get_multiple_attributes( self.column_names).reshape((-1, n_column)) self.realestate_dataset.column_values = self.realestate_dataset.get_multiple_attributes( self.column_names).reshape((-1, n_column)) self.proposal_component_set.column_values = self.proposal_component_set.get_multiple_attributes( self.column_names).reshape((-1, n_column)) #defaults, can be changed later by spaces_variable specified in target_vacancy rates self.realestate_dataset.total_spaces = self.realestate_dataset[ total_spaces_variable] self.proposal_component_set.total_spaces = self.proposal_component_set[ total_spaces_variable] self.realestate_dataset.occupied_spaces = self.realestate_dataset[ occupied_spaces_variable] self.accounting = {} self.logging = {} #has_needed_components = zeros(self.proposal_set.size(), dtype='bool') for index in range(target_vacancy_for_this_year.size()): column_value = tuple( target_vacancy_for_this_year.column_values[index, :].tolist()) accounting = { 'target_vacancy': target_vacancy_for_this_year[ target_vacancy.target_attribute_name][index] } realestate_indexes = self.get_index_by_condition( self.realestate_dataset.column_values, column_value) component_indexes = self.get_index_by_condition( self.proposal_component_set.column_values, column_value) this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable ## total/occupied_spaces_variable can be specified either as a universal name for all realestate ## or in targe_vacancy_rate dataset for each vacancy category if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names( ): this_occupied_spaces_variable = target_vacancy_for_this_year[ occupied_spaces_variable][index] self.realestate_dataset.occupied_spaces[realestate_indexes] = ( self.realestate_dataset[this_occupied_spaces_variable] [realestate_indexes]).astype( self.realestate_dataset.occupied_spaces.dtype) if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names( ): this_total_spaces_variable = target_vacancy_for_this_year[ total_spaces_variable][index] self.realestate_dataset.total_spaces[realestate_indexes] = ( self.realestate_dataset[this_total_spaces_variable] [realestate_indexes]).astype( self.realestate_dataset.total_spaces.dtype) self.proposal_component_set.total_spaces[component_indexes] = ( self.proposal_component_set[this_total_spaces_variable] [component_indexes]).astype( self.proposal_component_set.total_spaces.dtype) accounting["total_spaces_variable"] = this_total_spaces_variable accounting["total_spaces"] = self.realestate_dataset.total_spaces[ realestate_indexes].sum() accounting[ "occupied_spaces_variable"] = this_occupied_spaces_variable accounting[ "occupied_spaces"] = self.realestate_dataset.occupied_spaces[ realestate_indexes].sum() accounting["target_spaces"] = int( round( accounting["occupied_spaces"] /\ (1 - accounting["target_vacancy"]) ) ) accounting["proposed_spaces"] = 0 accounting["demolished_spaces"] = 0 self.accounting[column_value] = accounting if self._is_target_reached(column_value): proposal_indexes = self.proposal_set.get_id_index( unique(self.proposal_component_set['proposal_id'] [component_indexes])) single_component_indexes = where( self.proposal_set["number_of_components"] == 1)[0] self.weight[intersect1d(proposal_indexes, single_component_indexes)] = 0.0 ## handle planned proposals: all proposals with status_id == is_planned ## and start_year == year are accepted planned_proposal_indexes = where( logical_and( self.proposal_set.get_attribute("status_id") == self.proposal_set.id_planned, self.proposal_set.get_attribute("start_year") == year))[0] self.consider_proposals(planned_proposal_indexes, force_accepting=True) # consider proposals (in this order: proposed, tentative) for status in [ self.proposal_set.id_proposed, self.proposal_set.id_tentative ]: stat = (self.proposal_set.get_attribute("status_id") == status) if stat.sum() == 0: continue logger.log_status( "Sampling from %s eligible proposals of status %s." % (stat.sum(), status)) iteration = 0 while (not self._is_target_reached()): ## prevent proposals from being sampled for vacancy type whose target is reached #for column_value in self.accounting.keys(): if self.weight[stat].sum() == 0.0: logger.log_warning( "Running out of proposals of status %s before vacancy targets are reached; there aren't any proposals with non-zero weight" % status) break available_indexes = where(logical_and(stat, self.weight > 0))[0] sample_size = minimum(available_indexes.size, n) sampled_proposal_indexes = probsample_noreplace( available_indexes, sample_size, prob_array=self.weight[available_indexes], return_index=False) self.consider_proposals(sampled_proposal_indexes) self.weight[sampled_proposal_indexes] = 0 #sample_size = 1 #sampled_proposal_index = probsample_noreplace(available_indexes, sample_size, #prob_array=self.weight[available_indexes], #return_index=False) #self.consider_proposal(sampled_proposal_index) #self.weight[sampled_proposal_index] = 0 iteration += 1 self._log_status() # set status of accepted proposals to 'active' self.proposal_set.modify_attribute(name="status_id", data=self.proposal_set.id_active, index=array(self.accepted_proposals, dtype='int32')) # Code added by Jesse Ayers, MAG, 7/20/2009 # Get the active projects: stat_id = self.proposal_set.get_attribute('status_id') actv = where(stat_id == 1)[0] # Where there are active projects, compute the total_land_area_taken # and store it on the development_project_proposals dataset # so it can be used by the building_construction_model for the proper # computation of units_proposed for those projects with velocity curves if actv.size > 0: total_land_area_taken_computed = self.proposal_set.get_attribute( 'urbansim_parcel.development_project_proposal.land_area_taken') self.proposal_set.modify_attribute( 'total_land_area_taken', total_land_area_taken_computed[actv], actv) return (self.proposal_set, self.realestate_dataset.get_id_attribute()[ self.demolished_buildings])
def _do_run(self, location_set, agent_set, agents_index, data_objects=None, resources=None): location_id_name = location_set.get_id_name()[0] jobsubset = DatasetSubset(agent_set, agents_index) if jobsubset.size() <= 0: return array([], dtype='int32') #unplace jobs agent_set.set_values_of_one_attribute( location_id_name, resize(array([-1.0]), jobsubset.size()), agents_index) sector_ids = jobsubset.get_attribute("sector_id") sectors = unique(sector_ids) counts = ndimage_sum(ones((jobsubset.size(), )), labels=sector_ids.astype('int32'), index=sectors.astype('int32')) if sectors.size <= 1: counts = array([counts]) variables = map(lambda x: "number_of_jobs_of_sector_" + str(int(x)), sectors) compute_variables = map( lambda var: self.variable_package + "." + location_set. get_dataset_name() + "." + var, variables) if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) self.dataset_pool.add_datasets_if_not_included( {agent_set.get_dataset_name(): agent_set}) location_set.compute_variables(compute_variables, dataset_pool=self.dataset_pool) if self.filter is None: location_index = arange(location_set.size()) else: filter_values = location_set.compute_variables( [self.filter], dataset_pool=self.dataset_pool) location_index = where(filter_values > 0)[0] if location_index.size <= 0: logger.log_status("No locations available. Nothing to be done.") return array([]) location_subset = DatasetSubset(location_set, location_index) i = 0 for sector in sectors: distr = location_subset.get_attribute(variables[i]) if ma.allclose(distr.sum(), 0): uniform_prob = 1.0 / distr.size distr = resize(array([uniform_prob], dtype='float64'), distr.size) logger.log_warning( "Probabilities in scaling model for sector " + str(sector) + " sum to 0.0. Substituting uniform distribution!") # random_sample = sample(location_set.get_attribute("grid_id"), k=int(counts[i]), \ # probabilities = distr) distr = distr / float(distr.sum()) random_sample = probsample_replace( location_subset.get_id_attribute(), size=int(counts[i]), prob_array=distr) idx = where(sector_ids == sector)[0] #modify job locations agent_set.set_values_of_one_attribute(location_id_name, random_sample, agents_index[idx]) i += 1 return agent_set.get_attribute_by_index(location_id_name, agents_index)