def _add(self, amount=0, attribute='', dataset=None, index=None, data_dict={}, **kwargs): new_data = {} dataset_known_attributes = dataset.get_known_attribute_names() if index.size > 0: # sample from agents lucky_index = sample_replace(index, amount) for attr in dataset_known_attributes: new_data[attr] = dataset.get_attribute_by_index( attr, lucky_index) else: ## if attributes are not fully specified, the missing attributes will be filled with 0's for attr in dataset.get_primary_attribute_names(): if data_dict.has_key(attr): new_data[attr] = resize(array(data_dict[attr]), amount) else: if attr == dataset.get_id_name()[0]: new_data[attr] = zeros( amount, dtype=dataset.get_id_attribute().dtype) else: logger.log_warning( "Attribute %s is unspecified for 'add' event; its value will be sampled from all %s values of %s." % (attr, attr, dataset.get_dataset_name())) new_data[attr] = sample_replace( dataset.get_attribute(attr), amount) dataset.add_elements(data=new_data, change_ids_if_not_unique=True)
def run(self, person_set, tour_set, person_index=None, tour_filter=None, dataset_pool=None): if person_index == None: person_index = arange(person_set.size()) if tour_filter <> None: tour_index = where(tour_set.compute_variables(tour_filter))[0] else: tour_index = arange(tour_set.size()) sampled_tour_id = sample_replace( tour_set.get_id_attribute()[tour_index], person_index.size, return_index=False) if 'tour_id' in person_set.get_known_attribute_names(): person_set.set_values_of_one_attribute('tour_id', sampled_tour_id, person_index) else: tour_id = -1 * ones(person_set.size(), dtype="int32") tour_id[person_index] = sampled_tour_id person_set.add_primary_attribute(tour_id, 'tour_id') return sampled_tour_id
def _clone(self, agents_pool, amount, agent_dataset, location_dataset, this_refinement, dataset_pool ): """ clone certain amount of agent satisfying condition specified by agent_expression and location_expression and add them to agents_pool. Useful to add certain agents to location where there is no such agent exist previously. """ fit_index = self.get_fit_agents_index(agent_dataset, this_refinement.agent_expression, this_refinement.location_expression, dataset_pool) if fit_index.size == 0: logger.log_error("Refinement requests to clone %i agents, but there are no agents satisfying %s." \ % (amount, ' and '.join( [this_refinement.agent_expression, this_refinement.location_expression] ).strip(' and '), )) return clone_index = sample_replace( fit_index, amount ) agents_pool += clone_index.tolist() agent_dataset.modify_attribute(location_dataset.get_id_name()[0], -1 * ones( clone_index.size, dtype='int32' ), index = clone_index ) self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=clone_index)
def _create_households(self, diff, l): # sample existing households to copy is_hh_in_group = l[self.household_categories] consider_hhs_idx = where(is_hh_in_group)[0] if consider_hhs_idx.size > 0: sample_from_existing_hhs = sample_replace(consider_hhs_idx, diff) self.mapping_existing_hhs_to_new_hhs = concatenate((self.mapping_existing_hhs_to_new_hhs, sample_from_existing_hhs))
def test_sample_replace(self): start_time = time.time() sample = sample_replace(self.all, self.size, return_index=True) logger.log_status("sample_replace %s from %s items array in " % (self.size,self.n) + str(time.time() - start_time) + " sec") self.assertEqual(sample.size, self.size, msg ="sample size not equal to size parameter") assert isinstance(sample, ndarray), "sample is not of type ndarray" assert 0 <= sample.min() <= self.n-1, "sampled elements not in between min and max of source array" assert 0 <= sample.max() <= self.n-1, "sampled elements not in between min and max of source array"
def _create_households(self, diff, l): # sample existing households to copy is_hh_in_group = l[self.household_categories] consider_hhs_idx = where(is_hh_in_group)[0] if consider_hhs_idx.size > 0: sample_from_existing_hhs = sample_replace(consider_hhs_idx, diff) self.mapping_existing_hhs_to_new_hhs = concatenate( (self.mapping_existing_hhs_to_new_hhs, sample_from_existing_hhs))
def _add(self, amount=0, attribute='', dataset=None, index=None, data_dict={}, **kwargs): new_data = {} dataset_known_attributes = dataset.get_known_attribute_names() if index.size > 0: # sample from agents lucky_index = sample_replace(index, amount) for attr in dataset_known_attributes: new_data[attr] = dataset.get_attribute_by_index(attr, lucky_index) else: ## if attributes are not fully specified, the missing attributes will be filled with 0's for attr in dataset.get_primary_attribute_names(): if data_dict.has_key(attr): new_data[attr] = resize(array(data_dict[attr]), amount) else: if attr == dataset.get_id_name()[0]: new_data[attr] = zeros(amount, dtype=dataset.get_id_attribute().dtype) else: logger.log_warning("Attribute %s is unspecified for 'add' event; its value will be sampled from all %s values of %s." % (attr, attr, dataset.get_dataset_name()) ) new_data[attr] = sample_replace(dataset.get_attribute(attr), amount) dataset.add_elements(data=new_data, change_ids_if_not_unique=True)
def run(self, person_set, tour_set, person_index=None, tour_filter=None, dataset_pool=None): if person_index == None: person_index = arange(person_set.size()) if tour_filter <> None: tour_index = where(tour_set.compute_variables(tour_filter))[0] else: tour_index = arange(tour_set.size()) sampled_tour_id = sample_replace(tour_set.get_id_attribute()[tour_index], person_index.size, return_index=False) if 'tour_id' in person_set.get_known_attribute_names(): person_set.set_values_of_one_attribute('tour_id', sampled_tour_id, person_index) else: tour_id = -1 * ones(person_set.size(), dtype="int32") tour_id[person_index] = sampled_tour_id person_set.add_primary_attribute(tour_id, 'tour_id') return sampled_tour_id
def run(self, job_dataset, dataset_pool, out_storage=None, jobs_table="jobs"): """ Algorithm: 1. For all non_home_based jobs that have parcel_id assigned but no building_id, try to choose a building from all buildings in that parcel. Draw the building with probabilities given by the sector-building_type distribution. The job sizes are fitted into the available space (the attribute job.sqft is updated). 2. For all non_home_based jobs for which no building was found in step 1, check if the parcel has residential buildings. In such a case, re-assign the jobs to be home-based. Otherwise, if sum of non_residential_sqft over the involved buildings is 0, for all jobs that have impute_building_sqft_flag=True draw a building using the sector-building_type distribution and impute the corresponding sqft to the non_residential_sqft of that building. 3. For all home_based jobs that have parcel_id assigned but no building_id, try to choose a building from all buildings in that parcel. The capacity of a single-family building is determined from sizes of the households living there (for each household the minimum of number of members and 2 is taken). For multi-family buildings the capacity is 50. 4. Assign a building type to jobs that have missing building type. It is sampled from the regional-wide distribution of home based and non-home based jobs. 5. Update the table 'building_sqft_per_job' using the updated job.sqft. 'in_storage' should contain the jobs table and the zone_averages_table. The 'dataset_pool_storage' should contain all other tables needed (buildings, households, building_types). """ parcel_ids = job_dataset.get_attribute("parcel_id") building_ids = job_dataset.get_attribute("building_id") building_types = job_dataset.get_attribute("building_type") try: impute_sqft_flags = job_dataset.get_attribute("impute_building_sqft_flag") except: impute_sqft_flags = zeros(job_dataset.size()) is_considered = logical_and(parcel_ids > 0, building_ids <= 0) # jobs that have assigned parcel but not building job_index_home_based = where(logical_and(is_considered, building_types == 1))[0] job_index_governmental = where(logical_and(is_considered, building_types == 3))[0] building_dataset = dataset_pool.get_dataset('building') parcel_ids_in_bldgs = building_dataset.get_attribute("parcel_id") bldg_ids_in_bldgs = building_dataset.get_id_attribute() bldg_types_in_bldgs = building_dataset.get_attribute("building_type_id") non_res_sqft = building_dataset.get_attribute("non_residential_sqft") occupied = building_dataset.compute_variables(["urbansim_parcel.building.occupied_building_sqft_by_jobs"], dataset_pool=dataset_pool) is_governmental = building_dataset.compute_variables(["building.disaggregate(building_type.generic_building_type_id == 7)"], dataset_pool=dataset_pool) # assign buildings to governmental jobs randomly unique_parcels = unique(parcel_ids[job_index_governmental]) logger.log_status("Placing governmental jobs ...") for parcel in unique_parcels: idx_in_bldgs = where(parcel_ids_in_bldgs[is_governmental] == parcel)[0] if idx_in_bldgs.size <= 0: continue idx_in_jobs = where(parcel_ids[job_index_governmental] == parcel)[0] draw = sample_replace(idx_in_bldgs, idx_in_jobs.size) building_ids[job_index_governmental[idx_in_jobs]] = bldg_ids_in_bldgs[where(is_governmental)[0][draw]] logger.log_status("%s governmental jobs (out of %s gov. jobs) were placed." % ( (building_ids[job_index_governmental]>0).sum(), job_index_governmental.size)) logger.log_status("The not-placed governmental jobs will be added to the non-home based jobs.") # consider the unplaced governmental jobs together with other non-home-based jobs is_now_considered = logical_and(is_considered, building_ids <= 0) job_index_non_home_based = where(logical_and(is_now_considered, logical_or(building_types == 2, building_types == 3)))[0] # assign buildings to non_home_based jobs based on available space unique_parcels = unique(parcel_ids[job_index_non_home_based]) job_building_types = job_dataset.compute_variables(["bldgs_building_type_id = job.disaggregate(building.building_type_id)"], dataset_pool=dataset_pool) where_valid_jbt = where(logical_and(job_building_types>0, logical_or(building_types == 2, building_types==3)))[0] building_type_dataset = dataset_pool.get_dataset("building_type") available_building_types= building_type_dataset.get_id_attribute() idx_available_bt = building_type_dataset.get_id_index(available_building_types) sectors = job_dataset.get_attribute("sector_id") unique_sectors = unique(sectors) sector_bt_distribution = zeros((unique_sectors.size, building_type_dataset.size()), dtype="float32") jobs_sqft = job_dataset.get_attribute_by_index("sqft", job_index_non_home_based).astype("float32") job_dataset._compute_if_needed("urbansim_parcel.job.zone_id", dataset_pool=dataset_pool) jobs_zones = job_dataset.get_attribute_by_index("zone_id", job_index_non_home_based) new_jobs_sqft = job_dataset.get_attribute("sqft").copy() # find sector -> building_type distribution sector_index_mapping = {} for isector in range(unique_sectors.size): idx = where(sectors[where_valid_jbt]==unique_sectors[isector])[0] if idx.size == 0: continue o = ones(idx.size, dtype="int32") sector_bt_distribution[isector,:] = ndimage_sum(o, labels=job_building_types[where_valid_jbt[idx]], index=available_building_types) sector_bt_distribution[isector,:] = sector_bt_distribution[isector,:]/sector_bt_distribution[isector,:].sum() sector_index_mapping[unique_sectors[isector]] = isector # create a lookup table for zonal average per building type of sqft per employee zone_average_dataset = dataset_pool.get_dataset("building_sqft_per_job") zone_bt_lookup = zone_average_dataset.get_building_sqft_as_table(job_dataset.get_attribute("zone_id").max(), available_building_types.max()) counter_zero_capacity = 0 counter_zero_distr = 0 # iterate over parcels logger.log_status("Placing non-home-based jobs ...") for parcel in unique_parcels: idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0] if idx_in_bldgs.size <= 0: continue idx_in_jobs = where(parcel_ids[job_index_non_home_based] == parcel)[0] capacity = maximum(non_res_sqft[idx_in_bldgs] - occupied[idx_in_bldgs],0) #capacity = non_res_sqft[idx_in_bldgs] - occupied[idx_in_bldgs] if capacity.sum() <= 0: counter_zero_capacity += idx_in_jobs.size continue this_jobs_sectors = sectors[job_index_non_home_based][idx_in_jobs] this_jobs_sqft_table = resize(jobs_sqft[idx_in_jobs], (idx_in_bldgs.size, idx_in_jobs.size)) wn = jobs_sqft[idx_in_jobs] <= 0 for i in range(idx_in_bldgs.size): this_jobs_sqft_table[i, where(wn)[0]] = zone_bt_lookup[jobs_zones[idx_in_jobs[wn]], bldg_types_in_bldgs[idx_in_bldgs[i]]] supply_demand_ratio = (resize(capacity, (capacity.size, 1))/this_jobs_sqft_table.astype("float32").sum(axis=0))/float(idx_in_jobs.size)*0.9 if any(supply_demand_ratio < 1): # correct only if supply is smaller than demand this_jobs_sqft_table = this_jobs_sqft_table * supply_demand_ratio probcomb = zeros(this_jobs_sqft_table.shape) bt = bldg_types_in_bldgs[idx_in_bldgs] ibt = building_type_dataset.get_id_index(bt) for i in range(probcomb.shape[0]): for j in range(probcomb.shape[1]): probcomb[i,j] = sector_bt_distribution[sector_index_mapping[this_jobs_sectors[j]],ibt[i]] pcs = probcomb.sum(axis=0) probcomb = probcomb/pcs wz = where(pcs<=0)[0] counter_zero_distr += wz.size probcomb[:, wz] = 0 # to avoid nan values taken = zeros(capacity.shape) has_sqft = this_jobs_sqft_table > 0 while True: if (has_sqft * probcomb).sum() <= 0: break req = (this_jobs_sqft_table * probcomb).sum(axis=0) maxi = req.max() wmaxi = where(req==maxi)[0] drawjob = sample_noreplace(arange(wmaxi.size), 1) # draw job from jobs with the maximum size imax_req = wmaxi[drawjob] weights = has_sqft[:,imax_req] * probcomb[:,imax_req] draw = probsample_noreplace(arange(probcomb.shape[0]), 1, resize(weights/weights.sum(), (probcomb.shape[0],))) if (taken[draw] + this_jobs_sqft_table[draw,imax_req]) > capacity[draw]: probcomb[draw,imax_req]=0 continue taken[draw] = taken[draw] + this_jobs_sqft_table[draw,imax_req] building_ids[job_index_non_home_based[idx_in_jobs[imax_req]]] = bldg_ids_in_bldgs[idx_in_bldgs[draw]] probcomb[:,imax_req] = 0 new_jobs_sqft[job_index_non_home_based[idx_in_jobs[imax_req]]] = int(min(self.maximum_sqft, max(round(this_jobs_sqft_table[draw,imax_req]), self.minimum_sqft))) logger.log_status("%s non home based jobs (out of %s nhb jobs) were placed." % ( (building_ids[job_index_non_home_based]>0).sum(), job_index_non_home_based.size)) logger.log_status("Unplaced due to zero capacity: %s" % counter_zero_capacity) logger.log_status("Unplaced due to zero distribution: %s" % counter_zero_distr) job_dataset.modify_attribute(name="building_id", data = building_ids) # re-classify unplaced non-home based jobs to home-based if parcels contain residential buildings bldgs_is_residential = logical_and(logical_not(is_governmental), building_dataset.compute_variables(["urbansim_parcel.building.is_residential"], dataset_pool=dataset_pool)) is_now_considered = logical_and(parcel_ids > 0, building_ids <= 0) job_index_non_home_based_unplaced = where(logical_and(is_now_considered, building_types == 2))[0] unique_parcels = unique(parcel_ids[job_index_non_home_based_unplaced]) imputed_sqft = 0 logger.log_status("Try to reclassify non-home-based jobs (excluding governemtal jobs) ...") for parcel in unique_parcels: idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0] if idx_in_bldgs.size <= 0: continue idx_in_jobs = where(parcel_ids[job_index_non_home_based_unplaced] == parcel)[0] where_residential = where(bldgs_is_residential[idx_in_bldgs])[0] if where_residential.size > 0: building_types[job_index_non_home_based_unplaced[idx_in_jobs]] = 1 # set to home-based jobs elif non_res_sqft[idx_in_bldgs].sum() <= 0: # impute non_residential_sqft and assign buildings this_jobs_sectors = sectors[job_index_non_home_based_unplaced][idx_in_jobs] this_jobs_sqft_table = resize(jobs_sqft[idx_in_jobs], (idx_in_bldgs.size, idx_in_jobs.size)) wn = jobs_sqft[idx_in_jobs] <= 0 for i in range(idx_in_bldgs.size): this_jobs_sqft_table[i, where(wn)[0]] = zone_bt_lookup[jobs_zones[idx_in_jobs[wn]], bldg_types_in_bldgs[idx_in_bldgs[i]]] probcomb = zeros(this_jobs_sqft_table.shape) bt = bldg_types_in_bldgs[idx_in_bldgs] ibt = building_type_dataset.get_id_index(bt) for i in range(probcomb.shape[0]): for j in range(probcomb.shape[1]): probcomb[i,j] = sector_bt_distribution[sector_index_mapping[this_jobs_sectors[j]],ibt[i]] for ijob in range(probcomb.shape[1]): if (probcomb[:,ijob].sum() <= 0) or (impute_sqft_flags[job_index_non_home_based_unplaced[ijob]] == 0): continue weights = probcomb[:,ijob] draw = probsample_noreplace(arange(probcomb.shape[0]), 1, resize(weights/weights.sum(), (probcomb.shape[0],))) non_res_sqft[idx_in_bldgs[draw]] += this_jobs_sqft_table[draw,ijob] imputed_sqft += this_jobs_sqft_table[draw,ijob] building_ids[job_index_non_home_based_unplaced[idx_in_jobs[ijob]]] = bldg_ids_in_bldgs[idx_in_bldgs[draw]] new_jobs_sqft[job_index_non_home_based[idx_in_jobs[ijob]]] = int(min(self.maximum_sqft, max(round(this_jobs_sqft_table[draw,ijob]), self.minimum_sqft))) building_dataset.modify_attribute(name="non_residential_sqft", data = non_res_sqft) job_dataset.modify_attribute(name="building_id", data = building_ids) job_dataset.modify_attribute(name="building_type", data = building_types) job_dataset.modify_attribute(name="sqft", data = new_jobs_sqft) old_nhb_size = job_index_non_home_based.size job_index_home_based = where(logical_and(is_considered, building_types == 1))[0] job_index_non_home_based = where(logical_and(is_considered, building_types == 2))[0] logger.log_status("%s non-home based jobs reclassified as home-based." % (old_nhb_size-job_index_non_home_based.size)) logger.log_status("%s non-residential sqft imputed." % imputed_sqft) logger.log_status("Additionaly, %s non home based jobs were placed due to imputed sqft." % \ (building_ids[job_index_non_home_based_unplaced]>0).sum()) # home_based jobs unique_parcels = unique(parcel_ids[job_index_home_based]) capacity_in_buildings = building_dataset.compute_variables([ "urbansim_parcel.building.vacant_home_based_job_space"], dataset_pool=dataset_pool) parcels_with_exceeded_capacity = [] # iterate over parcels logger.log_status("Placing home-based jobs ...") for parcel in unique_parcels: idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0] idx_in_jobs = where(parcel_ids[job_index_home_based] == parcel)[0] capacity = capacity_in_buildings[idx_in_bldgs] if capacity.sum() <= 0: continue probcomb = ones((idx_in_bldgs.size, idx_in_jobs.size)) taken = zeros(capacity.shape, dtype="int32") while True: zero_cap = where((capacity - taken) <= 0)[0] probcomb[zero_cap,:] = 0 if probcomb.sum() <= 0: break req = probcomb.sum(axis=0) wmaxi = where(req==req.max())[0] drawjob = sample_noreplace(arange(wmaxi.size), 1) # draw job from available jobs imax_req = wmaxi[drawjob] weights = probcomb[:,imax_req] # sample building draw = probsample_noreplace(arange(probcomb.shape[0]), 1, resize(weights/weights.sum(), (probcomb.shape[0],))) taken[draw] = taken[draw] + 1 building_ids[job_index_home_based[idx_in_jobs[imax_req]]] = bldg_ids_in_bldgs[idx_in_bldgs[draw]] probcomb[:,imax_req] = 0 if -1 in building_ids[job_index_home_based[idx_in_jobs]]: parcels_with_exceeded_capacity.append(parcel) parcels_with_exceeded_capacity = array(parcels_with_exceeded_capacity) logger.log_status("%s home based jobs (out of %s hb jobs) were placed." % ((building_ids[job_index_home_based]>0).sum(), job_index_home_based.size)) # assign building type where missing # determine regional distribution idx_home_based = where(building_types == 1)[0] idx_non_home_based = where(building_types == 2)[0] idx_bt_missing = where(building_types <= 0)[0] if idx_bt_missing.size > 0: # sample building types sample_bt = probsample_replace(array([1,2]), idx_bt_missing.size, array([idx_home_based.size, idx_non_home_based.size])/float(idx_home_based.size + idx_non_home_based.size)) # coerce to int32 (on a 64 bit machine, sample_bt will be of type int64) building_types[idx_bt_missing] = sample_bt.astype(int32) job_dataset.modify_attribute(name="building_type", data = building_types) if out_storage is not None: job_dataset.write_dataset(out_table_name=jobs_table, out_storage=out_storage, attributes=AttributeType.PRIMARY) building_dataset.write_dataset(out_table_name='buildings', out_storage=out_storage, attributes=AttributeType.PRIMARY) logger.log_status("Assigning building_id to jobs done.")
def run( self, individual_dataset, counts_dataset, fraction_dataset, id_name1="blockgroup_id", id_name2="zone_id", fraction_attribute_name="fraction", out_storage=None, ): """ """ assert id_name1 in individual_dataset.get_known_attribute_names() if id_name2 not in individual_dataset.get_known_attribute_names(): individual_dataset.add_primary_attribute(-1 * ones(individual_dataset.size()), id_name2) lucky_household_index = array([], dtype="int32") hh_zone_id = array([], dtype="int32") output_data = {} logger.start_block("Start assigning individuals") zone_ids = counts_dataset.get_attribute(id_name2) building_types = counts_dataset.get_attribute("building_type_id") households = counts_dataset.get_attribute("households") for zone_id, building_type, n in zip(zone_ids, building_types, households): logger.log_status("n(%s=%i & %s=%i) = %s:" % (id_name2, zone_id, "building_type_id", building_type, n)) fraction_index = where(fraction_dataset.get_attribute(id_name2) == zone_id) blockgroup_ids = fraction_dataset.get_attribute_by_index(id_name1, fraction_index) fractions = fraction_dataset.get_attribute_by_index(fraction_attribute_name, fraction_index) for blockgroup_id, fraction in zip(blockgroup_ids, fractions): nn = int(round(n * fraction)) logger.log_status("\tfrac(%s=%s) = %s, n = %s" % ("blockgroup_id", blockgroup_id, fraction, nn)) if nn >= 1: suitable_household_index = where( logical_and( individual_dataset.get_attribute(id_name1) == blockgroup_id, individual_dataset.get_attribute("building_type_id") == building_type, ) )[0] logger.log_status( "\t\t sample %s from %s suitable households" % (nn, suitable_household_index.size) ) if suitable_household_index.size == 0: logger.log_warning("\tNo suitable households") continue lucky_household_index = concatenate( (lucky_household_index, sample_replace(suitable_household_index, nn)) ) hh_zone_id = concatenate((hh_zone_id, [zone_id] * nn)) for attribute_name in individual_dataset.get_known_attribute_names(): output_data[attribute_name] = individual_dataset.get_attribute_by_index( attribute_name, lucky_household_index ) output_data["original_household_id"] = output_data["household_id"] output_data["household_id"] = 1 + arange(lucky_household_index.size) output_data["zone_id"] = hh_zone_id storage = StorageFactory().get_storage("dict_storage") storage.write_table(table_name="households", table_data=output_data) output_dataset = Dataset(in_storage=storage, id_name=["household_id"], in_table_name="households") output_dataset.write_dataset(out_storage=out_storage, out_table_name="households")
def _add(self, agents_pool, amount, agent_dataset, location_dataset, this_refinement, dataset_pool): fit_index = self.get_fit_agents_index( agent_dataset, this_refinement.agent_expression, this_refinement.location_expression, dataset_pool) movers_index = array([], dtype="int32") amount_from_agents_pool = min(amount, len(agents_pool)) if amount_from_agents_pool > 0: agents_index_from_agents_pool = sample_noreplace( agents_pool, amount_from_agents_pool) [agents_pool.remove(i) for i in agents_index_from_agents_pool] if fit_index.size == 0: ##cannot find agents to copy their location or clone them, place agents in agents_pool logger.log_warning("Refinement requests to add %i agents, but there are only %i agents subtracted from previous action(s) and no agents satisfying %s to clone from;" \ "add %i agents instead" % (amount, amount_from_agents_pool, ' and '.join( [this_refinement.agent_expression, this_refinement.location_expression]).strip(' and '), amount_from_agents_pool,) ) amount = amount_from_agents_pool is_suitable_location = location_dataset.compute_variables( this_refinement.location_expression, dataset_pool=dataset_pool) location_id_for_agents_pool = sample_replace( location_dataset.get_id_attribute()[is_suitable_location], amount_from_agents_pool) else: agents_index_for_location = sample_replace( fit_index, amount_from_agents_pool) location_id_for_agents_pool = agent_dataset.get_attribute( location_dataset.get_id_name() [0])[agents_index_for_location] movers_index = concatenate( (movers_index, agents_index_for_location)) elif fit_index.size == 0: ## no agents in agents_pool and no agents to clone either, --> fail logger.log_error( "Action 'add' failed: there is no agent subtracted from previous action, and no suitable agents satisfying %s to clone from." % \ ' and '.join( [this_refinement.agent_expression, this_refinement.location_expression] ).strip('and') ) return if amount > amount_from_agents_pool: agents_index_to_clone = sample_replace( fit_index, amount - amount_from_agents_pool) movers_index = concatenate((movers_index, agents_index_to_clone)) if movers_index.size > 0 and this_refinement.location_capacity_attribute is not None and len( this_refinement.location_capacity_attribute) > 0: movers_location_id = agent_dataset.get_attribute( location_dataset.get_id_name()[0])[movers_index] movers_location_index = location_dataset.get_id_index( movers_location_id) # see previous comment about histogram function num_of_movers_by_location = histogram( movers_location_index, bins=arange(location_dataset.size() + 1))[0] num_of_agents_by_location = location_dataset.compute_variables( "number_of_agents=%s.number_of_agents(%s)" % \ ( location_dataset.dataset_name, agent_dataset.dataset_name ), dataset_pool=dataset_pool) expand_factor = safe_array_divide( (num_of_agents_by_location + num_of_movers_by_location).astype('float32'), num_of_agents_by_location, return_value_if_denominator_is_zero=1.0) new_values = round_(expand_factor * location_dataset.get_attribute( this_refinement.location_capacity_attribute)) location_dataset.modify_attribute( this_refinement.location_capacity_attribute, new_values) self._add_refinement_info_to_dataset(location_dataset, self.id_names, this_refinement, index=movers_location_index) if amount_from_agents_pool > 0: agent_dataset.modify_attribute(location_dataset.get_id_name()[0], location_id_for_agents_pool, agents_index_from_agents_pool) self._add_refinement_info_to_dataset( agent_dataset, self.id_names, this_refinement, index=agents_index_from_agents_pool) if amount > amount_from_agents_pool: new_agents_index = agent_dataset.duplicate_rows( agents_index_to_clone) self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=agents_index_to_clone) self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=new_agents_index)
def _add(self, agents_pool, amount, agent_dataset, location_dataset, this_refinement, dataset_pool ): fit_index = self.get_fit_agents_index(agent_dataset, this_refinement.agent_expression, this_refinement.location_expression, dataset_pool) movers_index = array([],dtype="int32") amount_from_agents_pool = min( amount, len(agents_pool) ) if amount_from_agents_pool > 0: agents_index_from_agents_pool = sample_noreplace( agents_pool, amount_from_agents_pool ) [ agents_pool.remove(i) for i in agents_index_from_agents_pool ] if fit_index.size == 0: ##cannot find agents to copy their location or clone them, place agents in agents_pool if amount > amount_from_agents_pool: logger.log_warning("Refinement requests to add %i agents, but there are only %i agents subtracted from previous action(s) and no agents satisfying %s to clone from;" \ "add %i agents instead" % (amount, amount_from_agents_pool, ' and '.join( [this_refinement.agent_expression, this_refinement.location_expression]).strip(' and '), amount_from_agents_pool,) ) amount = amount_from_agents_pool # sample from all suitable locations is_suitable_location = location_dataset.compute_variables( this_refinement.location_expression, dataset_pool=dataset_pool ) location_id_for_agents_pool = sample_replace( location_dataset.get_id_attribute()[is_suitable_location], amount_from_agents_pool ) else: #sample from locations of suitable agents agents_index_for_location = sample_replace( fit_index, amount_from_agents_pool) location_id_for_agents_pool = agent_dataset.get_attribute( location_dataset.get_id_name()[0] )[agents_index_for_location] movers_index = concatenate( (movers_index, agents_index_for_location) ) elif fit_index.size == 0: ## no agents in agents_pool and no agents to clone either, --> fail logger.log_error( "Action 'add' failed: there is no agent subtracted from previous action, and no suitable agents satisfying %s to clone from." % \ ' and '.join( [this_refinement.agent_expression, this_refinement.location_expression] ).strip('and') ) return if amount > amount_from_agents_pool: agents_index_to_clone = sample_replace( fit_index, amount - amount_from_agents_pool) movers_index = concatenate( (movers_index, agents_index_to_clone) ) if movers_index.size > 0 and this_refinement.location_capacity_attribute is not None and len(this_refinement.location_capacity_attribute) > 0: movers_location_id = agent_dataset.get_attribute( location_dataset.get_id_name()[0] )[movers_index] movers_location_index = location_dataset.get_id_index( movers_location_id ) # see previous comment about histogram function num_of_movers_by_location = histogram( movers_location_index, bins=arange(location_dataset.size() +1) )[0] num_of_agents_by_location = location_dataset.compute_variables( "number_of_agents=%s.number_of_agents(%s)" % \ ( location_dataset.dataset_name, agent_dataset.dataset_name ), dataset_pool=dataset_pool) expand_factor = safe_array_divide( (num_of_agents_by_location + num_of_movers_by_location ).astype('float32'), num_of_agents_by_location, return_value_if_denominator_is_zero = 1.0 ) new_values = round_( expand_factor * location_dataset.get_attribute(this_refinement.location_capacity_attribute) ) location_dataset.modify_attribute( this_refinement.location_capacity_attribute, new_values ) self._add_refinement_info_to_dataset(location_dataset, self.id_names, this_refinement, index=movers_location_index) if amount_from_agents_pool > 0: agent_dataset.modify_attribute( location_dataset.get_id_name()[0], location_id_for_agents_pool, agents_index_from_agents_pool ) self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=agents_index_from_agents_pool) if amount > amount_from_agents_pool: new_agents_index = agent_dataset.duplicate_rows(agents_index_to_clone) self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=agents_index_to_clone) self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=new_agents_index)
def run(self, job_dataset, dataset_pool, out_storage=None, jobs_table="jobs"): """ Algorithm: 1. For all non_home_based jobs that have parcel_id assigned but no building_id, try to choose a building from all buildings in that parcel. Draw the building with probabilities given by the sector-building_type distribution. The job sizes are fitted into the available space (the attribute job.sqft is updated). 2. For all non_home_based jobs for which no building was found in step 1, check if the parcel has residential buildings. In such a case, re-assign the jobs to be home-based. Otherwise, if sum of non_residential_sqft over the involved buildings is 0, for all jobs that have impute_building_sqft_flag=True draw a building using the sector-building_type distribution and impute the corresponding sqft to the non_residential_sqft of that building. 3. For all home_based jobs that have parcel_id assigned but no building_id, try to choose a building from all buildings in that parcel. The capacity of a single-family building is determined from sizes of the households living there (for each household the minimum of number of members and 2 is taken). For multi-family buildings the capacity is 50. 4. Assign a building type to jobs that have missing building type. It is sampled from the regional-wide distribution of home based and non-home based jobs. 5. Update the table 'building_sqft_per_job' using the updated job.sqft. 'in_storage' should contain the jobs table and the zone_averages_table. The 'dataset_pool_storage' should contain all other tables needed (buildings, households, building_types). """ parcel_ids = job_dataset.get_attribute("parcel_id") building_ids = job_dataset.get_attribute("building_id") home_base_status = job_dataset.get_attribute("home_based_status") sectors = job_dataset.get_attribute("sector_id") is_considered = logical_and(parcel_ids > 0, building_ids <= 0) # jobs that have assigned parcel but not building job_index_home_based = where(logical_and(is_considered, home_base_status == 0))[0] is_governmental_job = sectors == 18 is_edu_job = sectors == 19 job_index_governmental = where(logical_and(is_considered, is_governmental_job))[0] job_index_edu = where(logical_and(is_considered, is_edu_job))[0] building_dataset = dataset_pool.get_dataset('building') parcel_ids_in_bldgs = building_dataset.get_attribute("parcel_id") bldg_ids_in_bldgs = building_dataset.get_id_attribute() bldg_types_in_bldgs = building_dataset.get_attribute("building_type_id") non_res_sqft = building_dataset.get_attribute("non_residential_sqft") preferred_nhb_btypes = (building_dataset['building.building_type_id'] == 3) + \ (building_dataset['building.building_type_id'] == 8) + \ (building_dataset['building.building_type_id'] == 13) + \ (building_dataset['building.building_type_id'] == 20) + \ (building_dataset['building.building_type_id'] == 21) non_res_sqft_preferred = non_res_sqft * preferred_nhb_btypes is_governmental = building_dataset.compute_variables([ "numpy.logical_and(building.disaggregate(building_type.generic_building_type_id == 7), building.building_type_id <> 18)"], dataset_pool=dataset_pool) idx_gov = where(is_governmental)[0] is_edu = building_dataset['building.building_type_id'] == 18 idx_edu = where(is_edu)[0] bldgs_is_residential = logical_and(logical_not(logical_or(is_governmental, is_edu)), building_dataset.compute_variables(["urbansim_parcel.building.is_residential"], dataset_pool=dataset_pool)) bldgs_isnot_residential = logical_not(bldgs_is_residential) # assign buildings to educational jobs randomly unique_parcels = unique(parcel_ids[job_index_edu]) logger.log_status("Placing educational jobs ...") for parcel in unique_parcels: idx_in_bldgs = where(parcel_ids_in_bldgs[idx_edu] == parcel)[0] if idx_in_bldgs.size <= 0: continue idx_in_jobs = where(parcel_ids[job_index_edu] == parcel)[0] draw = sample_replace(idx_in_bldgs, idx_in_jobs.size) building_ids[job_index_edu[idx_in_jobs]] = bldg_ids_in_bldgs[idx_edu[draw]] logger.log_status("%s educational jobs (out of %s edu. jobs) were placed." % ( (building_ids[job_index_edu]>0).sum(), job_index_edu.size)) # assign buildings to governmental jobs randomly unique_parcels = unique(parcel_ids[job_index_governmental]) logger.log_status("Placing governmental jobs ...") for parcel in unique_parcels: idx_in_bldgs = where(parcel_ids_in_bldgs[idx_gov] == parcel)[0] if idx_in_bldgs.size <= 0: continue idx_in_jobs = where(parcel_ids[job_index_governmental] == parcel)[0] draw = sample_replace(idx_in_bldgs, idx_in_jobs.size) building_ids[job_index_governmental[idx_in_jobs]] = bldg_ids_in_bldgs[idx_gov[draw]] logger.log_status("%s governmental jobs (out of %s gov. jobs) were placed." % ( (building_ids[job_index_governmental]>0).sum(), job_index_governmental.size)) logger.log_status("The unplaced governmental jobs will be added to the non-home based jobs.") #tmp = unique(parcel_ids[job_index_governmental][building_ids[job_index_governmental]<=0]) #output_dir = "/Users/hana" #write_to_text_file(os.path.join(output_dir, 'parcels_with_no_gov_bldg.txt'), tmp, delimiter='\n') # consider the unplaced governmental jobs together with other non-home-based jobs is_now_considered = logical_and(is_considered, building_ids <= 0) job_index_non_home_based = where(logical_and(is_now_considered, logical_or(home_base_status == 0, is_governmental_job)))[0] # assign buildings to non_home_based jobs based on available space unique_parcels = unique(parcel_ids[job_index_non_home_based]) # iterate over parcels logger.log_status("Placing non-home-based jobs ...") nhb_not_placed = 0 for parcel in unique_parcels: idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0] if idx_in_bldgs.size <= 0: continue idx_in_jobs = where(parcel_ids[job_index_non_home_based] == parcel)[0] # sample proportionally to the building size weights = non_res_sqft_preferred[idx_in_bldgs] # 1.preference: preferred building types with non-res sqft if weights.sum() <= 0: weights = preferred_nhb_btypes[idx_in_bldgs] # 2.preference: preferred building types if weights.sum() <= 0: weights = non_res_sqft[idx_in_bldgs] # 3.preference: any building with non-res sqft if weights.sum() <= 0: weights = bldgs_isnot_residential[idx_in_bldgs] # 4.preference: any non-res building if weights.sum() <= 0: nhb_not_placed = nhb_not_placed + idx_in_jobs.size continue draw = probsample_replace(idx_in_bldgs, idx_in_jobs.size, weights/float(weights.sum())) building_ids[job_index_non_home_based[idx_in_jobs]] = bldg_ids_in_bldgs[draw] logger.log_status("%s non home based jobs (out of %s nhb jobs) were placed. No capacity in buildings for %s jobs." % ( (building_ids[job_index_non_home_based]>0).sum(), job_index_non_home_based.size, nhb_not_placed)) job_dataset.modify_attribute(name="building_id", data = building_ids) # re-classify unplaced non-home based jobs to home-based if parcels contain residential buildings is_now_considered = logical_and(parcel_ids > 0, building_ids <= 0) job_index_non_home_based_unplaced = where(logical_and(is_now_considered, logical_and(home_base_status == 0, logical_not(is_governmental_job))))[0] unique_parcels = unique(parcel_ids[job_index_non_home_based_unplaced]) logger.log_status("Try to reclassify non-home-based jobs (excluding governmental jobs) ...") nhb_reclass = 0 for parcel in unique_parcels: idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0] if idx_in_bldgs.size <= 0: continue idx_in_jobs = where(parcel_ids[job_index_non_home_based_unplaced] == parcel)[0] where_residential = where(bldgs_is_residential[idx_in_bldgs])[0] if where_residential.size > 0: #home_base_status[job_index_non_home_based_unplaced[idx_in_jobs]] = 1 # set to home-based jobs nhb_reclass = nhb_reclass + idx_in_jobs.size else: draw = sample_replace(idx_in_bldgs, idx_in_jobs.size) #building_ids[job_index_non_home_based_unplaced[idx_in_jobs]] = bldg_ids_in_bldgs[draw] #job_dataset.modify_attribute(name="home_base_status", data = home_base_status) #job_dataset.modify_attribute(name="building_id", data = building_ids) job_index_home_based = where(logical_and(is_considered, home_base_status == 1))[0] logger.log_status("%s non-home based jobs reclassified as home-based." % nhb_reclass) # home_based jobs unique_parcels = unique(parcel_ids[job_index_home_based]) capacity_in_buildings = building_dataset.compute_variables([ "clip_to_zero(urbansim_parcel.building.total_home_based_job_space-building.aggregate(job.home_based_status==1))"], dataset_pool=dataset_pool) parcels_with_exceeded_capacity = [] # iterate over parcels logger.log_status("Placing home-based jobs ...") for parcel in unique_parcels: idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0] idx_in_jobs = where(parcel_ids[job_index_home_based] == parcel)[0] capacity = capacity_in_buildings[idx_in_bldgs] if capacity.sum() <= 0: continue probcomb = ones((idx_in_bldgs.size, idx_in_jobs.size)) taken = zeros(capacity.shape, dtype="int32") while True: zero_cap = where((capacity - taken) <= 0)[0] probcomb[zero_cap,:] = 0 if probcomb.sum() <= 0: break req = probcomb.sum(axis=0) wmaxi = where(req==req.max())[0] drawjob = sample_noreplace(arange(wmaxi.size), 1) # draw job from available jobs imax_req = wmaxi[drawjob] weights = probcomb[:,imax_req] # sample building draw = probsample_noreplace(arange(probcomb.shape[0]), 1, resize(weights/weights.sum(), (probcomb.shape[0],))) taken[draw] = taken[draw] + 1 building_ids[job_index_home_based[idx_in_jobs[imax_req]]] = bldg_ids_in_bldgs[idx_in_bldgs[draw]] probcomb[:,imax_req] = 0 if -1 in building_ids[job_index_home_based[idx_in_jobs]]: parcels_with_exceeded_capacity.append(parcel) parcels_with_exceeded_capacity = array(parcels_with_exceeded_capacity) logger.log_status("%s home based jobs (out of %s hb jobs) were placed." % ((building_ids[job_index_home_based]>0).sum(), job_index_home_based.size)) # assign building type where missing # determine regional distribution idx_home_based = where(home_base_status == 1)[0] idx_non_home_based = where(home_base_status == 0)[0] idx_bt_missing = where(home_base_status <= 0)[0] if idx_bt_missing.size > 0: # sample building types sample_bt = probsample_replace(array([1,0]), idx_bt_missing.size, array([idx_home_based.size, idx_non_home_based.size])/float(idx_home_based.size + idx_non_home_based.size)) # coerce to int32 (on a 64 bit machine, sample_bt will be of type int64) home_base_status[idx_bt_missing] = sample_bt.astype(int32) job_dataset.modify_attribute(name="home_based_status", data = home_base_status) if out_storage is not None: job_dataset.write_dataset(out_table_name=jobs_table, out_storage=out_storage, attributes=AttributeType.PRIMARY) logger.log_status("Assigning building_id to jobs done.")
def run(self, realestate_dataset, year=None, occupied_spaces_variable="occupied_units", total_spaces_variable="total_units", target_attribute_name='target_vacancy_rate', sample_from_dataset = None, sample_filter="", reset_attribute_value={}, year_built = 'year_built', dataset_pool=None, append_to_realestate_dataset = False, table_name = "development_projects", dataset_name = "development_project", id_name = 'development_project_id', **kwargs): """ sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning append_to_realestate_dataset - whether to append the new dataset to realestate_dataset """ if self.target_vancy_dataset is None: raise RuntimeError, "target_vacancy_rate dataset is unspecified." if not sample_from_dataset: sample_from_dataset = realestate_dataset #if dataset_pool is None: # dataset_pool = SessionConfiguration().get_dataset_pool() alldata = dataset_pool.get_dataset('alldata') unit_names = dataset_pool.get_dataset('building_type').get_attribute('unit_name') sqft_per_job = dataset_pool.get_dataset('building_sqft_per_job') zones = realestate_dataset.compute_variables("building.disaggregate(parcel.zone_id)") type_ids = realestate_dataset.get_attribute("building_type_id") building_sqft_per_job_table = sqft_per_job.get_building_sqft_as_table(zones.max(), type_ids.max()) if year is None: year = SimulationState().get_current_time() this_year_index = where(self.target_vancy_dataset.get_attribute('year')==year)[0] target_vacancy_for_this_year = DatasetSubset(self.target_vancy_dataset, this_year_index) column_names = list(set( self.target_vancy_dataset.get_known_attribute_names() ) - set( [ target_attribute_name, occupied_spaces_variable, total_spaces_variable, 'year', '_hidden_id_'] )) column_names.sort(reverse=True) column_values = dict([ (name, target_vacancy_for_this_year.get_attribute(name)) for name in column_names + [target_attribute_name]]) independent_variables = list(set([re.sub('_max$', '', re.sub('_min$', '', col)) for col in column_names])) dataset_known_attributes = realestate_dataset.get_known_attribute_names() sample_dataset_known_attributes = sample_from_dataset.get_known_attribute_names() for variable in independent_variables: if variable not in dataset_known_attributes: realestate_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool) if variable not in sample_dataset_known_attributes: sample_from_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool) dataset_known_attributes = realestate_dataset.get_known_attribute_names() #update after compute if sample_filter: short_name = VariableName(sample_filter).get_alias() if short_name not in dataset_known_attributes: filter_indicator = sample_from_dataset.compute_variables(sample_filter, dataset_pool=dataset_pool) else: filter_indicator = sample_from_dataset.get_attribute(short_name) else: filter_indicator = 1 sampled_index = array([], dtype=int32) #log header if PrettyTable is not None: status_log = PrettyTable() status_log.set_field_names(column_names + ["actual", "target", "difference", "action"]) else: logger.log_status("\t".join(column_names + ["actual", "target", "difference", "action"])) error_log = '' for index in range(target_vacancy_for_this_year.size()): this_sampled_index = array([], dtype=int32) indicator = ones( realestate_dataset.size(), dtype='bool' ) sample_indicator = ones( sample_from_dataset.size(), dtype='bool' ) criterion = {} # for logging for attribute in independent_variables: if attribute in dataset_known_attributes: dataset_attribute = realestate_dataset.get_attribute(attribute) sample_attribute = sample_from_dataset.get_attribute(attribute) else: raise ValueError, "attribute %s used in target vacancy dataset can not be found in dataset %s" % (attribute, realestate_dataset.get_dataset_name()) if attribute + '_min' in column_names: amin = target_vacancy_for_this_year.get_attribute(attribute+'_min')[index] criterion.update({attribute + '_min':amin}) if amin != -1: indicator *= dataset_attribute >= amin sample_indicator *= sample_attribute >= amin if attribute + '_max' in column_names: amax = target_vacancy_for_this_year.get_attribute(attribute+'_max')[index] criterion.update({attribute + '_max':amax}) if amax != -1: indicator *= dataset_attribute <= amax sample_indicator *= sample_attribute <= amax if attribute in column_names: aval = column_values[attribute][index] criterion.update({attribute:aval}) if aval == -1: continue elif aval == -2: ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column indicator *= logical_not(ismember(dataset_attribute, column_values[attribute])) sample_indicator *= logical_not(ismember(sample_attribute, column_values[attribute])) else: indicator *= dataset_attribute == aval sample_indicator *= sample_attribute == aval this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable ## total/occupied_spaces_variable can be specified either as a universal name for all realestate ## or in targe_vacancy_rate dataset for each vacancy category if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(): this_occupied_spaces_variable = target_vacancy_for_this_year.get_attribute(occupied_spaces_variable)[index] if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(): this_total_spaces_variable = target_vacancy_for_this_year.get_attribute(total_spaces_variable)[index] logger.be_quiet() #temporarily disable logging realestate_dataset.compute_one_variable_with_unknown_package(this_occupied_spaces_variable, dataset_pool=dataset_pool) realestate_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool) sample_from_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool) if unit_names[index]=="residential_units": num_units = alldata.compute_variables("alldata.aggregate_all(household.building_type_id==%s)" % (index+1)) #persons = household_set.compute_variables("%s.number_of_agents(%s)" % (hh_ds_name, person_ds_name), resources=resources) num_units = num_units[0] else: num_units = alldata.compute_variables("alldata.aggregate_all(job.disaggregate(employment_submarket.building_type_id)==%s)" % (index+1)) num_units = num_units * building_sqft_per_job_table[1, (index+1)] num_units = num_units[0] #need to make sure that job empsubmarket doesn't rely on building... #Must do non-home-based jobs only and then multiply by building_sqft logger.talk() actual_num = (indicator * realestate_dataset.get_attribute(this_total_spaces_variable)).sum() #target_num = int(round( (indicator * realestate_dataset.get_attribute(this_occupied_spaces_variable)).sum() /\ target_num = int(round( num_units /\ (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]) )) diff = target_num - actual_num if diff > 0: total_spaces_in_sample_dataset = sample_from_dataset.get_attribute(this_total_spaces_variable) legit_index = where(logical_and(sample_indicator, filter_indicator) * total_spaces_in_sample_dataset > 0)[0] if legit_index.size > 0: mean_size = total_spaces_in_sample_dataset[legit_index].mean() num_of_projects_to_sample = int( diff / mean_size ) ##sampled at least 1 project when diff > 0, otherwise it is a endless loop when num_of_projects_to_sample = 0 num_of_projects_to_sample = num_of_projects_to_sample if num_of_projects_to_sample > 0 else 1 while total_spaces_in_sample_dataset[this_sampled_index].sum() < diff: lucky_index = sample_replace(legit_index, num_of_projects_to_sample) this_sampled_index = concatenate((this_sampled_index, lucky_index)) this_sampled_index = this_sampled_index[0:(1+searchsorted(cumsum(total_spaces_in_sample_dataset[this_sampled_index]), diff))] sampled_index = concatenate((sampled_index, this_sampled_index)) else: error_log += "There is nothing to sample from %s and no new development will happen for " % sample_from_dataset.get_dataset_name() + \ ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n' #if diff < 0: #TODO demolition; not yet supported ##log status action = "0" if this_sampled_index.size > 0: action_num = total_spaces_in_sample_dataset[this_sampled_index].sum() if diff > 0: action = "+" + str(action_num) if diff < 0: action = "-" + str(action_num) cat = [ str(criterion[col]) for col in column_names] cat += [str(actual_num), str(target_num), str(diff), action] if PrettyTable is not None: status_log.add_row(cat) else: logger.log_status("\t".join(cat)) if PrettyTable is not None: logger.log_status("\n" + status_log.get_string()) if error_log: logger.log_error(error_log) result_data = {} result_dataset = None index = array([], dtype='int32') if sampled_index.size > 0: ### ideally duplicate_rows() is all needed to add newly cloned rows ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data ##realestate_dataset.duplicate_rows(sampled_index) result_data.setdefault(year_built, resize(year, sampled_index.size).astype('int32')) ## also add 'independent_variables' to the new dataset for attribute in set(sample_from_dataset.get_primary_attribute_names() + independent_variables): if reset_attribute_value.has_key(attribute): result_data[attribute] = resize(array(reset_attribute_value[attribute]), sampled_index.size) else: result_data[attribute] = sample_from_dataset.get_attribute_by_index(attribute, sampled_index) if id_name and result_data and id_name not in result_data: result_data[id_name] = arange(sampled_index.size, dtype='int32') + 1 storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name=table_name, table_data=result_data) result_dataset = Dataset(id_name = id_name, in_storage = storage, in_table_name = table_name, dataset_name = dataset_name ) index = arange(result_dataset.size()) if append_to_realestate_dataset: if len(result_data) > 0: index = realestate_dataset.add_elements(result_data, require_all_attributes=False, change_ids_if_not_unique=True) result_dataset = realestate_dataset return (result_dataset, index)
def run(self, realestate_dataset, living_units_dataset, year=None, occupied_spaces_variable="occupied_units", total_spaces_variable="total_units", target_attribute_name='target_vacancy_rate', sample_from_dataset = None, living_units_from_dataset = None, sample_filter="", reset_attribute_value={}, year_built = 'year_built', dataset_pool=None, append_to_realestate_dataset = False, table_name = "development_projects", dataset_name = "development_project", id_name = 'development_project_id', **kwargs): """ sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning append_to_realestate_dataset - whether to append the new dataset to realestate_dataset """ if self.target_vancy_dataset is None: raise RuntimeError, "target_vacancy_rate dataset is unspecified." if not sample_from_dataset or not living_units_from_dataset: logger.log_note('No development projects or no living units of development projects to sample from. Development projects are taken from building dataset and thus living units from living_units dataset.') sample_from_dataset = realestate_dataset living_units_from_dataset = living_units_dataset if dataset_pool is None: dataset_pool = SessionConfiguration().get_dataset_pool() if year is None: year = SimulationState().get_current_time() this_year_index = where(self.target_vancy_dataset.get_attribute('year')==year)[0] target_vacancy_for_this_year = DatasetSubset(self.target_vancy_dataset, this_year_index) column_names = list(set( self.target_vancy_dataset.get_known_attribute_names() ) - set( [ target_attribute_name, occupied_spaces_variable, total_spaces_variable, 'year', '_hidden_id_'] )) column_names.sort(reverse=True) column_values = dict([ (name, target_vacancy_for_this_year.get_attribute(name)) for name in column_names + [target_attribute_name]]) independent_variables = list(set([re.sub('_max$', '', re.sub('_min$', '', col)) for col in column_names])) sample_dataset_known_attributes = sample_from_dataset.get_known_attribute_names() for attribute in independent_variables: if attribute not in sample_dataset_known_attributes: sample_from_dataset.compute_one_variable_with_unknown_package(attribute, dataset_pool=dataset_pool) sample_dataset_known_attributes = sample_from_dataset.get_known_attribute_names() #update after compute if sample_filter: short_name = VariableName(sample_filter).get_alias() if short_name not in sample_dataset_known_attributes: filter_indicator = sample_from_dataset.compute_variables(sample_filter, dataset_pool=dataset_pool) else: filter_indicator = sample_from_dataset.get_attribute(short_name) else: filter_indicator = 1 sampled_index = array([], dtype=int32) #log header if PrettyTable is not None: status_log = PrettyTable() status_log.set_field_names(column_names + ["actual", "target", "expected", "difference", "action"]) else: logger.log_status("\t".join(column_names + ["actual", "target", "expected", "difference", "action"])) error_log = '' for index in range(target_vacancy_for_this_year.size()): sample_indicator = ones( sample_from_dataset.size(), dtype='bool' ) criterion = {} # for logging for attribute in independent_variables: if attribute in sample_dataset_known_attributes: sample_attribute = sample_from_dataset.get_attribute(attribute) else: raise ValueError, "attribute %s used in target vacancy dataset can not be found in dataset %s" % (attribute, realestate_dataset.get_dataset_name()) if attribute + '_min' in column_names: amin = target_vacancy_for_this_year.get_attribute(attribute+'_min')[index] criterion.update({attribute + '_min':amin}) if amin != -1: sample_indicator *= sample_attribute >= amin if attribute + '_max' in column_names: amax = target_vacancy_for_this_year.get_attribute(attribute+'_max')[index] criterion.update({attribute + '_max':amax}) if amax != -1: sample_indicator *= sample_attribute <= amax if attribute in column_names: aval = column_values[attribute][index] criterion.update({attribute:aval}) if aval == -1: continue elif aval == -2: ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column sample_indicator *= logical_not(ismember(sample_attribute, column_values[attribute])) else: sample_indicator *= sample_attribute == aval this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable ## total/occupied_spaces_variable can be specified either as a universal name for all realestate ## or in targe_vacancy_rate dataset for each vacancy category if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(): this_occupied_spaces_variable = target_vacancy_for_this_year.get_attribute(occupied_spaces_variable)[index] if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(): this_total_spaces_variable = target_vacancy_for_this_year.get_attribute(total_spaces_variable)[index] this_total_spaces_variable += '_' + str(criterion[col]) this_occupied_spaces_variable += '_' + str(criterion[col]) logger.be_quiet() #temporarily disable logging realestate_dataset.compute_one_variable_with_unknown_package(this_occupied_spaces_variable, dataset_pool=dataset_pool) realestate_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool) sample_from_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool) logger.talk() actual_num = (realestate_dataset.get_attribute(this_total_spaces_variable)).sum() #target_num is obsolete with this version. target_num = int(round( (realestate_dataset.get_attribute(this_occupied_spaces_variable)).sum() /\ (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]))) '''If the target vacancy is very small and the inflow to the region big it is not enough to check only the current simulation year's vacancy. The simulation is more robust if the BTM is anticipating the next year's population (of households and jobs). This version calculates the non residential spaces based on sqft requirements of jobs per sector. #TODO: Make code more general to cover various stratifications in the real estate market. ''' if criterion[col] == 0: """ Option without demography model idx = where(self.control_totals.get_attribute("year")==year + 1)[0] this_years_control_totals = DatasetSubset(self.control_totals, idx) expected_num = int(round( this_years_control_totals.get_attribute('total_number_of_households').sum() /\ (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])))""" hh_dataset = dataset_pool.get_dataset( 'household' ) number_of_hh = hh_dataset.size() expected_num = int(round( number_of_hh /\ (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]))) if criterion[col] > 0: # Getting control totals per sector in a dictionary idx = where(self.employment_control_totals.get_attribute("year")==year)[0] # Create index to get the subset of control totals for the next simulation year. this_years_control_totals = DatasetSubset(self.employment_control_totals, idx) # Create the subset of control totals. idx_non_home_based = where(logical_and(this_years_control_totals['home_based_status'] == 0,this_years_control_totals['sector_id'] == criterion[col]))[0] # Create index of non home based control totals in current sector. Only non home based jobs are supported. TODO: Support home based jobs. this_years_control_totals = DatasetSubset(this_years_control_totals, idx_non_home_based) # idx_current_sector = where(this_years_control_totals['sector_id'] == criterion[col])[0] next_years_jobs = this_years_control_totals['number_of_jobs'] controled_sectors = this_years_control_totals['sector_id'] sector_job_totals = dict(zip(controled_sectors, next_years_jobs.T)) # creating dictionary with sector id's as key and number of jobs as values to ensure multiplication with right requiremtents. # Getting infos on required sqft per sector. # a_zone_id = min(self.building_sqft_per_job['zone_id']) # Get a zone number from the definition table. Here choose to take the minimum which is arbitrary. This code assumes constant sqft requirements in all zones. TODO: Support different sqft requirements per zone. # idx_zone = where(self.building_sqft_per_job['zone_id'] == a_zone_id)[0] # subset_sqft_per_job = DatasetSubset(self.building_sqft_per_job, idx_zone) # sqft_per_job = subset_sqft_per_job['building_sqft_per_job'] # sectors_with_requirements = subset_sqft_per_job['sector_id'] # requirements_by_sector = dict(zip(sectors_with_requirements, sqft_per_job.T)) # # needed_sqft_over_all_sectors = sector_job_totals[criterion[col]] * requirements_by_sector[criterion[col]] # expected_num = int(round( needed_sqft_over_all_sectors /\ # (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]))) idx_sector = where(self.sectors['sector_id'] == criterion[col]) subset_sqft_per_job_sector = DatasetSubset(self.sectors, idx_sector) needed_sqft_current_sector = sector_job_totals[criterion[col]] * subset_sqft_per_job_sector.get_attribute('sqm_per_job') expected_num = int(round( needed_sqft_current_sector /\ (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]))) diff = expected_num - actual_num #Previous version which is checking the current years occupation. #diff = target_num - actual_num this_sampled_index = array([], dtype=int32) if diff > 0: total_spaces_in_sample_dataset = sample_from_dataset.get_attribute(this_total_spaces_variable) legit_index = where(logical_and(sample_indicator, filter_indicator) * total_spaces_in_sample_dataset > 0)[0] if legit_index.size > 0: mean_size = total_spaces_in_sample_dataset[legit_index].mean() num_of_projects_to_sample = int( diff / mean_size ) ##sampled at least 1 project when diff > 0, otherwise it is a endless loop when num_of_projects_to_sample = 0 num_of_projects_to_sample = num_of_projects_to_sample if num_of_projects_to_sample > 0 else 1 while total_spaces_in_sample_dataset[this_sampled_index].sum() < diff: lucky_index = sample_replace(legit_index, num_of_projects_to_sample) this_sampled_index = concatenate((this_sampled_index, lucky_index)) this_sampled_index = this_sampled_index[0:(1+searchsorted(cumsum(total_spaces_in_sample_dataset[this_sampled_index]), diff))] sampled_index = concatenate((sampled_index, this_sampled_index)) else: error_log += "There is nothing to sample from %s and no new development will happen for " % sample_from_dataset.get_dataset_name() + \ ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n' #if diff < 0: #TODO demolition; not yet supported ##log status action = "0" if this_sampled_index.size > 0: action_num = total_spaces_in_sample_dataset[this_sampled_index].sum() if diff > 0: action = "+" + str(action_num) if diff < 0: action = "-" + str(action_num) cat = [ str(criterion[col]) for col in column_names] cat += [str(actual_num), str(target_num), str(expected_num), str(diff), action] if PrettyTable is not None: status_log.add_row(cat) else: logger.log_status("\t".join(cat)) if PrettyTable is not None: logger.log_status("\n" + status_log.get_string()) if error_log: logger.log_error(error_log) #logger.log_note("Updating attributes of %s sampled development events." % sampled_index.size) result_data = {} result_dataset = None index = array([], dtype='int32') if sampled_index.size > 0: ### ideally duplicate_rows() is all needed to add newly cloned rows ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data ##realestate_dataset.duplicate_rows(sampled_index) #result_data.setdefault(year_built, resize(year, sampled_index.size).astype('int32')) # Reset the year_built attribute. Uncommented because it is overwritten in the for loop afterwards. ## also add 'independent_variables' to the new dataset for attribute in set(sample_from_dataset.get_primary_attribute_names() + independent_variables): if reset_attribute_value.has_key(attribute): result_data[attribute] = resize(array(reset_attribute_value[attribute]), sampled_index.size) else: result_data[attribute] = sample_from_dataset.get_attribute_by_index(attribute, sampled_index) # Reset the year_built attribute. result_data['year_built'] = resize(year, sampled_index.size).astype('int32') # TODO: Uncomment the following three lines to reset land_area, tax_exempt, zgde. Test still to be done. parcel_id should be changed by location choice model. #result_data['land_area'] = resize(-1, sampled_index.size).astype('int32') #result_data['tax_exempt'] = resize(-1, sampled_index.size).astype('int32') #result_data['zgde'] = resize(-1, sampled_index.size).astype('int32') if id_name and result_data and id_name not in result_data: result_data[id_name] = arange(sampled_index.size, dtype='int32') + 1 storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name=table_name, table_data=result_data) result_dataset = Dataset(id_name = id_name, in_storage = storage, in_table_name = table_name, dataset_name = dataset_name ) index = arange(result_dataset.size()) if append_to_realestate_dataset: if len(result_data) > 0: logger.start_block('Appending development events and living units') logger.log_note("Append %d sampled development events to real estate dataset." % len(result_data[result_data.keys()[0]])) index = realestate_dataset.add_elements(result_data, require_all_attributes=False, change_ids_if_not_unique=True) logger.start_block('Creating id mapping') # remember the ids from the development_event_history dataset. mapping_new_old = self.get_mapping_of_old_ids_to_new_ids(result_data, realestate_dataset, index) logger.end_block() '''Getting living units associated to selected development events by iterating over the mapping dictionary and selecting each time all the living units according to the old building ids. The living units are then added to selected_living_units_dict which is then added to living_units dataset. A dictionary is needed to use the add_elements method. Creating a dictionary also clones the records. The subset is only a view on the original table.''' selected_living_units_dict = {} counter = 0 for new_id in mapping_new_old: if counter == 0: logger.log_note("Log assignment of every 100th development event") counter +=1 if counter % 100 == 0: logger.log_note("Assembling living units for development event %s" % new_id) sel_index = [i for i in range(0, len(living_units_from_dataset['building_id'])) if living_units_from_dataset['building_id'][i] == mapping_new_old[new_id]] living_units_this_sampled_building = DatasetSubset(living_units_from_dataset, sel_index) if len(selected_living_units_dict) == 0: logger.start_block('Assign new building id') for attribute_name in living_units_this_sampled_building.get_primary_attribute_names(): column = living_units_this_sampled_building.get_attribute(attribute_name) if attribute_name == 'building_id': new_ids = array(living_units_this_sampled_building.size() * [new_id], dtype=int32) selected_living_units_dict.update({attribute_name: new_ids}) else: selected_living_units_dict.update({attribute_name: column}) logger.end_block() else: this_living_units_dict ={} for attribute_name in living_units_this_sampled_building.get_primary_attribute_names(): column = living_units_this_sampled_building.get_attribute(attribute_name) if attribute_name == 'building_id': new_ids = array(living_units_this_sampled_building.size() * [new_id], dtype=int32) this_living_units_dict.update({attribute_name: new_ids}) else: this_living_units_dict.update({attribute_name: column}) for attribute_name in living_units_this_sampled_building.get_primary_attribute_names(): selected_living_units_dict[attribute_name] = concatenate([selected_living_units_dict[attribute_name], this_living_units_dict[attribute_name]]) # Reset year_built attribute of living units selected_living_units_dict['year_built'] = resize(year, len(selected_living_units_dict['year_built'])).astype('int32') # TODO: Uncomment the following two lines to reset rent_price, zgde. Test still to be done # selected_living_units_dict['rent_price'] = resize(-1, len(selected_living_units_dict['rent_price'])).astype('int32') # selected_living_units_dict['zgde'] = resize(-1, len(selected_living_units_dict['zgde'])).astype('int32') index_units = living_units_dataset.add_elements(selected_living_units_dict, require_all_attributes=False, change_ids_if_not_unique=True) # Check consistency of buildings and living units. All living units must belong to a building if SimulationState().get_current_time() - SimulationState().get_start_time() == 1: for building_id in living_units_dataset['building_id']: if building_id not in realestate_dataset['building_id']: logger.log_warning('Living unit with building_id %d has no corresponding building.' % (building_id)) # Uncomment next line to enforce consistency of living units and building dataset. Then you may uncomment the two previous lines. # assert(building_id in realestate_dataset['building_id']), 'Living unit with building_id %d has no corresponding building.' % (building_id) result_dataset = realestate_dataset logger.end_block() # It is recommended to derive all variables of buildings in relation to living units via expression variables. # However, if the building dataset contains attributes derived from living units these attributes should be consistent # with the living units table. Below an example. # Residential_units attribute of each building should be consistent with the number of living units associated. # self.check_consistency_of_living_units_per_building(realestate_dataset, living_units_dataset, mapping_new_old) return (result_dataset, index)
def _add(self, agents_pool, amount, agent_dataset, location_dataset, this_refinement, dataset_pool ): fit_index = self.get_fit_agents_index(agent_dataset, this_refinement.agent_expression, this_refinement.location_expression, dataset_pool) if this_refinement.agent_expression is not None and len(this_refinement.agent_expression) > 0: agents_index = where(agent_dataset.compute_variables(this_refinement.agent_expression, dataset_pool=dataset_pool)>0)[0] else: agents_index = arange(agent_dataset.size()) movers_index = array([],dtype="int32") ar_pool = array(agents_pool) fitted_agents_pool = ar_pool[in1d(ar_pool, agents_index)] amount_from_agents_pool = min( amount, fitted_agents_pool.size ) prob_string = self.probability_attributes.get(agent_dataset.get_dataset_name(),None) if prob_string is not None: probs_values = (agent_dataset.compute_variables([prob_string], dataset_pool=dataset_pool)).astype('int32') uprobs_values = unique(probs_values[fit_index]) if uprobs_values.size > 0: probs_existing = array(ndimage_sum(ones(fit_index.size), labels=probs_values[fit_index], index=uprobs_values)) if amount_from_agents_pool > 0: if prob_string is not None and uprobs_values.size > 0: prob_pool_values = probs_values[fitted_agents_pool] probs_pool=zeros(prob_pool_values.size) for i in range(uprobs_values.size): probpoolidx = where(prob_pool_values == uprobs_values[i])[0] if probpoolidx.size == 0: continue probs_pool[probpoolidx]=probs_existing[i]/float(probpoolidx.size) probs_pool[probs_pool<=0] = (probs_existing.min()/10.0)/float((probs_pool<=0).sum()) else: probs_pool=ones(fitted_agents_pool.size) agents_index_from_agents_pool = probsample_noreplace( fitted_agents_pool, amount_from_agents_pool, prob_array=probs_pool ) [ agents_pool.remove(i) for i in agents_index_from_agents_pool ] if fit_index.size == 0: ##cannot find agents to copy their location or clone them, place agents in agents_pool if amount > amount_from_agents_pool: logger.log_warning("Refinement requests to add %i agents, but there are only %i agents subtracted from previous action(s) and no agents satisfying %s to clone from;" \ "add %i agents instead" % (amount, amount_from_agents_pool, ' and '.join( [this_refinement.agent_expression, this_refinement.location_expression]).strip(' and '), amount_from_agents_pool,) ) amount = amount_from_agents_pool # sample from all suitable locations is_suitable_location = location_dataset.compute_variables( this_refinement.location_expression, dataset_pool=dataset_pool ) location_id_for_agents_pool = sample_replace( location_dataset.get_id_attribute()[is_suitable_location], amount_from_agents_pool ) else: #sample from locations of suitable agents agents_index_for_location = sample_replace( fit_index, amount_from_agents_pool) location_id_for_agents_pool = agent_dataset.get_attribute( location_dataset.get_id_name()[0] )[agents_index_for_location] movers_index = concatenate( (movers_index, agents_index_for_location) ) elif fit_index.size == 0: ## no agents in agents_pool and no agents to clone either, --> fail logger.log_error( "Action 'add' failed: there is no agent subtracted from previous action, and no suitable agents satisfying %s to clone from." % \ ' and '.join( [this_refinement.agent_expression, this_refinement.location_expression] ).strip('and') ) return if amount > amount_from_agents_pool: agents_index_to_clone = sample_replace( fit_index, amount - amount_from_agents_pool) movers_index = concatenate( (movers_index, agents_index_to_clone) ) if movers_index.size > 0 and this_refinement.location_capacity_attribute is not None and len(this_refinement.location_capacity_attribute) > 0: movers_location_id = agent_dataset.get_attribute( location_dataset.get_id_name()[0] )[movers_index] movers_location_index = location_dataset.get_id_index( movers_location_id ) # see previous comment about histogram function num_of_movers_by_location = histogram( movers_location_index, bins=arange(location_dataset.size() +1) )[0] num_of_agents_by_location = location_dataset.compute_variables( "number_of_agents=%s.number_of_agents(%s)" % \ ( location_dataset.dataset_name, agent_dataset.dataset_name ), dataset_pool=dataset_pool) expand_factor = safe_array_divide( (num_of_agents_by_location + num_of_movers_by_location ).astype('float32'), num_of_agents_by_location, return_value_if_denominator_is_zero = 1.0 ) new_values = round_( expand_factor * location_dataset.get_attribute(this_refinement.location_capacity_attribute) ) location_dataset.modify_attribute( this_refinement.location_capacity_attribute, new_values ) self._add_refinement_info_to_dataset(location_dataset, self.id_names, this_refinement, index=movers_location_index) if amount_from_agents_pool > 0: agent_dataset.modify_attribute( 'building_id', -1 * ones( agents_index_from_agents_pool.size, dtype='int32' ), agents_index_from_agents_pool ) agent_dataset.modify_attribute( location_dataset.get_id_name()[0], location_id_for_agents_pool, agents_index_from_agents_pool ) self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=agents_index_from_agents_pool) self.processed_locations['add'] = concatenate((self.processed_locations.get('add', array([])), unique(location_dataset[self.subarea_id_name][location_dataset.get_id_index(location_id_for_agents_pool)]))) if amount > amount_from_agents_pool: new_agents_index = agent_dataset.duplicate_rows(agents_index_to_clone) self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=agents_index_to_clone) self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=new_agents_index) if location_dataset.get_dataset_name() <> 'building': agent_dataset.modify_attribute( 'building_id', -1 * ones( new_agents_index.size, dtype='int32' ), new_agents_index ) self.processed_locations['add'] = concatenate((self.processed_locations.get('add', array([])), unique(agent_dataset[self.subarea_id_name][new_agents_index])))
def run(self, individual_dataset, counts_dataset, fraction_dataset, id_name1='blockgroup_id', id_name2='zone_id', fraction_attribute_name='fraction', out_storage=None): """ """ assert id_name1 in individual_dataset.get_known_attribute_names() if id_name2 not in individual_dataset.get_known_attribute_names(): individual_dataset.add_primary_attribute( -1 * ones(individual_dataset.size()), id_name2) lucky_household_index = array([], dtype="int32") hh_zone_id = array([], dtype="int32") output_data = {} logger.start_block("Start assigning individuals") zone_ids = counts_dataset.get_attribute(id_name2) building_types = counts_dataset.get_attribute("building_type_id") households = counts_dataset.get_attribute("households") for zone_id, building_type, n in zip(zone_ids, building_types, households): logger.log_status( "n(%s=%i & %s=%i) = %s:" % (id_name2, zone_id, "building_type_id", building_type, n)) fraction_index = where( fraction_dataset.get_attribute(id_name2) == zone_id) blockgroup_ids = fraction_dataset.get_attribute_by_index( id_name1, fraction_index) fractions = fraction_dataset.get_attribute_by_index( fraction_attribute_name, fraction_index) for blockgroup_id, fraction in zip(blockgroup_ids, fractions): nn = int(round(n * fraction)) logger.log_status( "\tfrac(%s=%s) = %s, n = %s" % ("blockgroup_id", blockgroup_id, fraction, nn)) if nn >= 1: suitable_household_index = where( logical_and( individual_dataset.get_attribute(id_name1) == blockgroup_id, individual_dataset.get_attribute( "building_type_id") == building_type))[0] logger.log_status( "\t\t sample %s from %s suitable households" % (nn, suitable_household_index.size)) if suitable_household_index.size == 0: logger.log_warning("\tNo suitable households") continue lucky_household_index = concatenate( (lucky_household_index, sample_replace(suitable_household_index, nn))) hh_zone_id = concatenate((hh_zone_id, [zone_id] * nn)) for attribute_name in individual_dataset.get_known_attribute_names(): output_data[ attribute_name] = individual_dataset.get_attribute_by_index( attribute_name, lucky_household_index) output_data["original_household_id"] = output_data["household_id"] output_data["household_id"] = 1 + arange(lucky_household_index.size) output_data["zone_id"] = hh_zone_id storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name="households", table_data=output_data) output_dataset = Dataset(in_storage=storage, id_name=["household_id"], in_table_name="households") output_dataset.write_dataset(out_storage=out_storage, out_table_name="households")
def run(self, location_set, agent_event_set, agent_set, current_year, disaggregate_to=None, location_characteristics=[], dataset_pool=None): """ The agent_event_set is expected to have attributes: grid_id, scheduled_year, total_number, is_percentage, change_type, (optionally other agent characteristics) 'grid_id' is not a mandatory name, but it must match to the id name of the location_set. Thus, the model works on any geography level. If it's a aggregated geography and there is a need to disaggregate the agents into lower geography, set the dataset to be disaggregated to in the 'disaggregate_to' argument (e.g. location_set=zone, disaggregate_to=building). 'is_percentage' (bool) determines if the 'total_number' is a percentage of existing agents (True) or an absolute number (False). 'change_type' can have values 'D' (delete), 'A' (add) and determines the type of change for the agents. If this column is missing, the model considers 'D' as default for all entries in the agent_event_set. If the change of type is 'D', the method finds agents from the agent_set (jobs, households) located in the given locations (e.g. grid_id), then samples the given amount for the current_year and unplaces them. If other characteristics columns are contained in the agent_event_set, their names must match column names in the agent_set (e.g. 'sector_id' for jobs). In such a case the deletion is done among agents that match the given characteristics. If the change of type is 'A', the agent_event_set can contain attributes of the location set. It determines agents used for sampling missing characteristics of the added agents, for example values of income or persons. Values of these characteristics can be -1 if no restriction for the sampling is desired. Such location attributes must be passed in the argument 'location_characteristics'. """ if not agent_event_set or (agent_event_set.size() == 0): logger.log_status("No %s agents for event processing." % agent_set.get_dataset_name()) return idx_of_events_this_year = agent_event_set.get_attribute("scheduled_year") == current_year if idx_of_events_this_year.sum() == 0: logger.log_status("No %s agents for this year event processing." % agent_set.get_dataset_name()) return self.dataset_pool = self.create_dataset_pool(dataset_pool) location_id_name = location_set.get_id_name()[0] location_ids_in_event_set = agent_event_set.get_attribute_by_index(location_id_name, idx_of_events_this_year) other_characteristics = agent_event_set.get_known_attribute_names() for name in agent_event_set.get_id_name() + [location_id_name, "scheduled_year", "total_number", "is_percentage", "change_type"] + location_characteristics: if name in other_characteristics: other_characteristics.remove(name) totals = agent_event_set.get_attribute_by_index("total_number", idx_of_events_this_year) if "change_type" not in agent_event_set.get_known_attribute_names(): types_of_change = array(idx_of_events_this_year.sum()*['D']) else: types_of_change = agent_event_set.get_attribute_by_index("change_type", idx_of_events_this_year) if "is_percentage" not in agent_event_set.get_known_attribute_names(): is_percentage = zeros(idx_of_events_this_year.sum(), dtype='bool8') else: is_percentage = agent_event_set.get_attribute_by_index("is_percentage", idx_of_events_this_year) # pre-load other characteristics for name in other_characteristics: agent_event_set.get_attribute(name) if location_id_name not in agent_set.get_known_attribute_names(): # compute agents locations agent_set.compute_one_variable_with_unknown_package(location_id_name, self.dataset_pool) # iterate over rows in the event set for ilocation_id in range(location_ids_in_event_set.size): agent_ids = agent_set.get_attribute(location_id_name) location_id = location_ids_in_event_set[ilocation_id] change_type = types_of_change[ilocation_id] agents_to_consider = agent_ids == location_id for characteristics in other_characteristics: characteristics_value = agent_event_set[characteristics][idx_of_events_this_year][ilocation_id] agents_to_consider = logical_and(agents_to_consider, agent_set.get_attribute(characteristics) == characteristics_value) number_of_agents = totals[ilocation_id] agent_index = where(agents_to_consider)[0] if is_percentage[ilocation_id]: # number_of_agents means percentage; convert to absolute number number_of_agents = agent_index.size*number_of_agents/100.0 number_of_agents = int(number_of_agents) if change_type == 'D': if number_of_agents > 0: if agent_index.size <= number_of_agents: # unplace all agents unplace_index = agent_index else: # sample agents unplace_index = sample_noreplace(agent_index, number_of_agents) agent_set.modify_attribute(name=location_id_name, data=resize(array([-1], dtype=agent_ids.dtype), unplace_index.size), index = unplace_index) logger.log_status('%s agents deleted from location %s' % (number_of_agents, location_id)) elif change_type == 'A': if number_of_agents <= 0: continue data = {agent_set.get_id_name()[0]: arange(1, number_of_agents+1, 1) + agent_set.get_id_attribute().max()} if disaggregate_to is not None: if location_id_name not in disaggregate_to.get_known_attribute_names(): disaggregate_to.compute_one_variable_with_unknown_package(location_id_name, self.dataset_pool) disaggr_idx = where(disaggregate_to[location_id_name] == location_id)[0] if disaggr_idx.size <= 0: logger.log_warning('No %s locations found for %s=%s. %s agents not created.' % ( disaggregate_to.get_dataset_name(), location_id_name, location_id, number_of_agents)) continue # sample disaggregated locations disaggr_sidx = sample_replace(disaggr_idx, number_of_agents) data[disaggregate_to.get_id_name()[0]] = disaggregate_to.get_id_attribute()[disaggr_sidx] else: data[location_id_name] = array([location_id] * number_of_agents) for characteristics in other_characteristics: characteristics_value = agent_event_set[characteristics][idx_of_events_this_year][ilocation_id] data[characteristics] = array([characteristics_value] * number_of_agents) # determine agents with the desire characteristics to impute missing characteristics to the new agents loc_indicator = ones(agent_set.size(), dtype='bool8') for locchar in location_characteristics: if agent_event_set[locchar][idx_of_events_this_year][ilocation_id] == -1: continue var = agent_set.compute_one_variable_with_unknown_package(locchar, self.dataset_pool) loc_indicator = logical_and(loc_indicator, var == agent_event_set[locchar][idx_of_events_this_year][ilocation_id]) if loc_indicator.sum() == 0: loc_indicator[:] = True clone_attr_index = sample_replace(where(loc_indicator)[0], number_of_agents) # impute remaining attributes for attr in agent_set.get_primary_attribute_names(): if attr not in data.keys(): data[attr] = agent_set[attr][clone_attr_index] agent_set.add_elements(data, require_all_attributes=False) if location_id_name not in agent_set.get_known_attribute_names(): # re-compute agents locations, because the add_elements method deleted all computed attributes agent_set.compute_one_variable_with_unknown_package(location_id_name, self.dataset_pool) logger.log_status('%s agents added to location %s' % (number_of_agents, location_id))
def run(self, year=None, target_attribute_name='number_of_households', sample_filter="", reset_dataset_attribute_value={}, dataset_pool=None, **kwargs): """ sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning """ #if dataset_pool is None: # dataset_pool = SessionConfiguration().get_dataset_pool() if year is None: year = SimulationState().get_current_time() this_year_index = where(self.control_totals.get_attribute('year')==year)[0] control_totals_for_this_year = DatasetSubset(self.control_totals, this_year_index) column_names = list(set( self.control_totals.get_known_attribute_names() ) - set( [ target_attribute_name, 'year', '_hidden_id_'] )) column_names.sort(reverse=True) column_values = dict([ (name, control_totals_for_this_year.get_attribute(name)) for name in column_names + [target_attribute_name]]) independent_variables = list(set([re.sub('_max$', '', re.sub('_min$', '', col)) for col in column_names])) dataset_known_attributes = self.dataset.get_known_attribute_names() for variable in independent_variables: if variable not in dataset_known_attributes: self.dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool) dataset_known_attributes = self.dataset.get_known_attribute_names() #update after compute if sample_filter: short_name = VariableName(sample_filter).get_alias() if short_name not in dataset_known_attributes: filter_indicator = self.dataset.compute_variables(sample_filter, dataset_pool=dataset_pool) else: filter_indicator = self.dataset.get_attribute(short_name) else: filter_indicator = 1 to_be_cloned = array([], dtype=int32) to_be_removed = array([], dtype=int32) #log header if PrettyTable is not None: status_log = PrettyTable() status_log.set_field_names(column_names + ["actual", "target", "difference", "action"]) else: logger.log_status("\t".join(column_names + ["actual", "target", "difference", "action"])) error_log = '' for index in range(control_totals_for_this_year.size()): lucky_index = None indicator = ones( self.dataset.size(), dtype='bool' ) criterion = {} for attribute in independent_variables: if attribute in dataset_known_attributes: dataset_attribute = self.dataset.get_attribute(attribute) else: raise ValueError, "attribute %s used in control total dataset can not be found in dataset %s" % (attribute, self.dataset.get_dataset_name()) if attribute + '_min' in column_names: amin = column_values[attribute + '_min'][index] criterion.update({attribute + '_min':amin}) if amin != -1: indicator *= dataset_attribute >= amin if attribute + '_max' in column_names: amax = column_values[attribute+'_max'][index] criterion.update({attribute + '_max':amax}) if amax != -1: indicator *= dataset_attribute <= amax if attribute in column_names: aval = column_values[attribute][index] criterion.update({attribute:aval}) if aval == -1: continue elif aval == -2: ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column complement_values = setdiff1d( dataset_attribute, column_values[attribute] ) has_one_of_the_complement_value = zeros(dataset_attribute.size, dtype='bool') for value in complement_values: has_one_of_the_complement_value += dataset_attribute == value indicator *= has_one_of_the_complement_value else: indicator *= dataset_attribute == aval target_num = column_values[target_attribute_name][index] ## if accounting attribute is None, count number of agents with indicator = True if self.dataset_accounting_attribute is None: actual_num = indicator.sum() action_num = 0 diff = target_num - actual_num if actual_num != target_num: legit_index = where(logical_and(indicator, filter_indicator))[0] if legit_index.size > 0: if actual_num < target_num: lucky_index = sample_replace(legit_index, target_num - actual_num) to_be_cloned = concatenate((to_be_cloned, lucky_index)) elif actual_num > target_num: lucky_index = sample_noreplace(legit_index, actual_num-target_num) to_be_removed = concatenate((to_be_removed, lucky_index)) action_num = lucky_index.size else: error_log += "There is nothing to sample from %s and no action will happen for" % self.dataset.get_dataset_name() + \ ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n' else: ## sum accounting attribute for agents with indicator = True; ## assume dataset_accouting_attribute is a primary attribute accounting = self.dataset.get_attribute(self.dataset_accounting_attribute) * indicator actual_num = accounting.sum() mean_size = float(actual_num) / indicator.sum() action_num = 0 diff = target_num - actual_num if actual_num != target_num: legit_index = where(logical_and(indicator, filter_indicator))[0] if legit_index.size > 0: while actual_num + action_num < target_num: lucky_index = sample_replace(legit_index, ceil((target_num - actual_num - action_num)/mean_size) ) action_num += accounting[lucky_index].sum() to_be_cloned = concatenate((to_be_cloned, lucky_index)) while actual_num - action_num > target_num: lucky_index = sample_noreplace(legit_index, ceil((actual_num - target_num - action_num)/mean_size) ) action_num += accounting[lucky_index].sum() to_be_removed = concatenate((to_be_removed, lucky_index)) else: error_log += "There is nothing to sample from %s and no action will happen for " % self.dataset.get_dataset_name() + \ ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n' ##log status action = "0" if lucky_index is not None: if actual_num < target_num: action = "+" + str(action_num) if actual_num > target_num: action = "-" + str(action_num) cat = [ str(criterion[col]) for col in column_names] cat += [str(actual_num), str(target_num), str(diff), action] if PrettyTable is not None: status_log.add_row(cat) else: logger.log_status("\t".join(cat)) if PrettyTable is not None: logger.log_status("\n" + status_log.get_string()) if error_log: logger.log_error(error_log) clone_data = {} if to_be_cloned.size > 0: ### ideally duplicate_rows() is all needed to add newly cloned rows ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data ##self.dataset.duplicate_rows(to_be_cloned) logger.log_status() for attribute in dataset_known_attributes: if reset_dataset_attribute_value.has_key(attribute): clone_data[attribute] = resize(array(reset_dataset_attribute_value[attribute]), to_be_cloned.size) else: clone_data[attribute] = self.dataset.get_attribute_by_index(attribute, to_be_cloned) self.post_run(self.dataset, to_be_cloned, to_be_removed, **kwargs) if to_be_removed.size > 0: logger.log_status() self.dataset.remove_elements(to_be_removed) if clone_data: self.dataset.add_elements(data=clone_data, change_ids_if_not_unique=True) return self.dataset
def run( self, realestate_dataset, year=None, occupied_spaces_variable="occupied_units", total_spaces_variable="total_units", target_attribute_name="target_vacancy_rate", sample_from_dataset=None, sample_filter="", reset_attribute_value={}, year_built="year_built", dataset_pool=None, append_to_realestate_dataset=False, table_name="development_projects", dataset_name="development_project", id_name="development_project_id", **kwargs ): """ sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning append_to_realestate_dataset - whether to append the new dataset to realestate_dataset """ if self.target_vancy_dataset is None: raise RuntimeError, "target_vacancy_rate dataset is unspecified." if not sample_from_dataset: sample_from_dataset = realestate_dataset # if dataset_pool is None: # dataset_pool = SessionConfiguration().get_dataset_pool() if year is None: year = SimulationState().get_current_time() this_year_index = where(self.target_vancy_dataset.get_attribute("year") == year)[0] target_vacancy_for_this_year = DatasetSubset(self.target_vancy_dataset, this_year_index) column_names = list( set(self.target_vancy_dataset.get_known_attribute_names()) - set([target_attribute_name, occupied_spaces_variable, total_spaces_variable, "year", "_hidden_id_"]) ) column_names.sort(reverse=True) column_values = dict( [ (name, target_vacancy_for_this_year.get_attribute(name)) for name in column_names + [target_attribute_name] ] ) independent_variables = list(set([re.sub("_max$", "", re.sub("_min$", "", col)) for col in column_names])) dataset_known_attributes = realestate_dataset.get_known_attribute_names() sample_dataset_known_attributes = sample_from_dataset.get_known_attribute_names() for variable in independent_variables: if variable not in dataset_known_attributes: realestate_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool) if variable not in sample_dataset_known_attributes: sample_from_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool) dataset_known_attributes = realestate_dataset.get_known_attribute_names() # update after compute if sample_filter: short_name = VariableName(sample_filter).get_alias() if short_name not in dataset_known_attributes: filter_indicator = sample_from_dataset.compute_variables(sample_filter, dataset_pool=dataset_pool) else: filter_indicator = sample_from_dataset.get_attribute(short_name) else: filter_indicator = 1 sampled_index = array([], dtype=int32) # log header if PrettyTable is not None: status_log = PrettyTable() status_log.set_field_names(column_names + ["actual", "target", "expected", "difference", "action"]) else: logger.log_status("\t".join(column_names + ["actual", "target", "expected", "difference", "action"])) error_log = "" for index in range(target_vacancy_for_this_year.size()): this_sampled_index = array([], dtype=int32) indicator = ones(realestate_dataset.size(), dtype="bool") sample_indicator = ones(sample_from_dataset.size(), dtype="bool") criterion = {} # for logging for attribute in independent_variables: if attribute in dataset_known_attributes: dataset_attribute = realestate_dataset.get_attribute(attribute) sample_attribute = sample_from_dataset.get_attribute(attribute) else: raise ValueError, "attribute %s used in target vacancy dataset can not be found in dataset %s" % ( attribute, realestate_dataset.get_dataset_name(), ) if attribute + "_min" in column_names: amin = target_vacancy_for_this_year.get_attribute(attribute + "_min")[index] criterion.update({attribute + "_min": amin}) if amin != -1: indicator *= dataset_attribute >= amin sample_indicator *= sample_attribute >= amin if attribute + "_max" in column_names: amax = target_vacancy_for_this_year.get_attribute(attribute + "_max")[index] criterion.update({attribute + "_max": amax}) if amax != -1: indicator *= dataset_attribute <= amax sample_indicator *= sample_attribute <= amax if attribute in column_names: aval = column_values[attribute][index] criterion.update({attribute: aval}) if aval == -1: continue elif ( aval == -2 ): ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column indicator *= logical_not(ismember(dataset_attribute, column_values[attribute])) sample_indicator *= logical_not(ismember(sample_attribute, column_values[attribute])) else: indicator *= dataset_attribute == aval sample_indicator *= sample_attribute == aval this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable ## total/occupied_spaces_variable can be specified either as a universal name for all realestate ## or in targe_vacancy_rate dataset for each vacancy category if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(): this_occupied_spaces_variable = target_vacancy_for_this_year.get_attribute(occupied_spaces_variable)[ index ] if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(): this_total_spaces_variable = target_vacancy_for_this_year.get_attribute(total_spaces_variable)[index] this_total_spaces_variable += "_" + str(criterion[col]) this_occupied_spaces_variable += "_" + str(criterion[col]) logger.be_quiet() # temporarily disable logging realestate_dataset.compute_one_variable_with_unknown_package( this_occupied_spaces_variable, dataset_pool=dataset_pool ) realestate_dataset.compute_one_variable_with_unknown_package( this_total_spaces_variable, dataset_pool=dataset_pool ) sample_from_dataset.compute_one_variable_with_unknown_package( this_total_spaces_variable, dataset_pool=dataset_pool ) logger.talk() actual_num = (realestate_dataset.get_attribute(this_total_spaces_variable)).sum() # target_num is obsolete with this version. target_num = int( round( (realestate_dataset.get_attribute(this_occupied_spaces_variable)).sum() / (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]) ) ) """If the target vacancy is very small and the inflow to the region big it is not enough to check only the current simulation year's vacancy. The simulation is more robust if the BTM is anticipating the next year's population (of households and jobs). #TODO: Make code more general to cover various stratifications in the real estate market. """ if criterion[col] == 1: idx = where(self.control_totals.get_attribute("year") == year + 1)[0] this_years_control_totals = DatasetSubset(self.control_totals, idx) expected_num = int( round( this_years_control_totals.get_attribute("total_number_of_households").sum() / (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]) ) ) if criterion[col] == 0: idx = where(self.employment_control_totals.get_attribute("year") == year + 1)[0] next_years_control_totals = DatasetSubset(self.employment_control_totals, idx) expected_num = int( round( next_years_control_totals.get_attribute("number_of_jobs").sum() / (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]) ) ) diff = expected_num - actual_num # Previous version which is checking the current years occupation. # diff = target_num - actual_num if diff > 0: total_spaces_in_sample_dataset = sample_from_dataset.get_attribute(this_total_spaces_variable) legit_index = where( logical_and(sample_indicator, filter_indicator) * total_spaces_in_sample_dataset > 0 )[0] if legit_index.size > 0: mean_size = total_spaces_in_sample_dataset[legit_index].mean() num_of_projects_to_sample = int(diff / mean_size) ##sampled at least 1 project when diff > 0, otherwise it is a endless loop when num_of_projects_to_sample = 0 num_of_projects_to_sample = num_of_projects_to_sample if num_of_projects_to_sample > 0 else 1 while total_spaces_in_sample_dataset[this_sampled_index].sum() < diff: lucky_index = sample_replace(legit_index, num_of_projects_to_sample) this_sampled_index = concatenate((this_sampled_index, lucky_index)) this_sampled_index = this_sampled_index[ 0 : (1 + searchsorted(cumsum(total_spaces_in_sample_dataset[this_sampled_index]), diff)) ] sampled_index = concatenate((sampled_index, this_sampled_index)) else: error_log += ( "There is nothing to sample from %s and no new development will happen for " % sample_from_dataset.get_dataset_name() + ",".join([col + "=" + str(criterion[col]) for col in column_names]) + "\n" ) # if diff < 0: #TODO demolition; not yet supported ##log status action = "0" if this_sampled_index.size > 0: action_num = total_spaces_in_sample_dataset[this_sampled_index].sum() if diff > 0: action = "+" + str(action_num) if diff < 0: action = "-" + str(action_num) cat = [str(criterion[col]) for col in column_names] cat += [str(actual_num), str(target_num), str(expected_num), str(diff), action] if PrettyTable is not None: status_log.add_row(cat) else: logger.log_status("\t".join(cat)) if PrettyTable is not None: logger.log_status("\n" + status_log.get_string()) if error_log: logger.log_error(error_log) result_data = {} result_dataset = None index = array([], dtype="int32") if True: # sampled_index.size > 0: ### ideally duplicate_rows() is all needed to add newly cloned rows ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data ##realestate_dataset.duplicate_rows(sampled_index) result_data.setdefault(year_built, resize(year, sampled_index.size).astype("int32")) ## also add 'independent_variables' to the new dataset for attribute in set(sample_from_dataset.get_primary_attribute_names() + independent_variables): if reset_attribute_value.has_key(attribute): result_data[attribute] = resize(array(reset_attribute_value[attribute]), sampled_index.size) else: result_data[attribute] = sample_from_dataset.get_attribute_by_index(attribute, sampled_index) if id_name and result_data and id_name not in result_data: result_data[id_name] = arange(sampled_index.size, dtype="int32") + 1 storage = StorageFactory().get_storage("dict_storage") storage.write_table(table_name=table_name, table_data=result_data) result_dataset = Dataset( id_name=id_name, in_storage=storage, in_table_name=table_name, dataset_name=dataset_name ) index = arange(result_dataset.size()) if append_to_realestate_dataset: if len(result_data) > 0: index = realestate_dataset.add_elements( result_data, require_all_attributes=False, change_ids_if_not_unique=True ) result_dataset = realestate_dataset return (result_dataset, index)
def run(self, realestate_dataset, year=None, occupied_spaces_variable="occupied_units", total_spaces_variable="total_units", target_attribute_name='target_vacancy_rate', sample_from_dataset = None, sample_filter="", reset_attribute_value={}, year_built = 'year_built', dataset_pool=None, append_to_realestate_dataset = False, table_name = "development_projects", dataset_name = "development_project", id_name = [], **kwargs): """ sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning append_to_realestate_dataset - whether to append the new dataset to realestate_dataset """ if self.target_vancy_dataset is None: raise RuntimeError, "target_vacancy_rate dataset is unspecified." if not sample_from_dataset: sample_from_dataset = realestate_dataset #if dataset_pool is None: # dataset_pool = SessionConfiguration().get_dataset_pool() if year is None: year = SimulationState().get_current_time() this_year_index = where(self.target_vancy_dataset.get_attribute('year')==year)[0] target_vacancy_for_this_year = DatasetSubset(self.target_vancy_dataset, this_year_index) column_names = list(set( self.target_vancy_dataset.get_known_attribute_names() ) - set( [ target_attribute_name, occupied_spaces_variable, total_spaces_variable, 'year', '_hidden_id_'] )) column_names.sort(reverse=True) column_values = dict([ (name, target_vacancy_for_this_year.get_attribute(name)) for name in column_names + [target_attribute_name]]) independent_variables = list(set([re.sub('_max$', '', re.sub('_min$', '', col)) for col in column_names])) dataset_known_attributes = realestate_dataset.get_known_attribute_names() for variable in independent_variables: if variable not in dataset_known_attributes: realestate_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool) sample_from_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool) dataset_known_attributes = realestate_dataset.get_known_attribute_names() #update after compute if sample_filter: short_name = VariableName(sample_filter).get_alias() if short_name not in dataset_known_attributes: filter_indicator = sample_from_dataset.compute_variables(sample_filter, dataset_pool=dataset_pool) else: filter_indicator = sample_from_dataset.get_attribute(short_name) else: filter_indicator = 1 sampled_index = array([], dtype=int32) #log header if PrettyTable is not None: status_log = PrettyTable() status_log.set_field_names(column_names + ["actual", "target", "difference", "action"]) else: logger.log_status("\t".join(column_names + ["actual", "target", "difference", "action"])) error_log = '' for index in range(target_vacancy_for_this_year.size()): this_sampled_index = array([], dtype=int32) indicator = ones( realestate_dataset.size(), dtype='bool' ) sample_indicator = ones( sample_from_dataset.size(), dtype='bool' ) criterion = {} # for logging for attribute in independent_variables: if attribute in dataset_known_attributes: dataset_attribute = realestate_dataset.get_attribute(attribute) sample_attribute = sample_from_dataset.get_attribute(attribute) else: raise ValueError, "attribute %s used in target vacancy dataset can not be found in dataset %s" % (attribute, realestate_dataset.get_dataset_name()) if attribute + '_min' in column_names: amin = target_vacancy_for_this_year.get_attribute(attribute+'_min')[index] criterion.update({attribute + '_min':amin}) if amin != -1: indicator *= dataset_attribute >= amin sample_indicator *= sample_attribute >= amin if attribute + '_max' in column_names: amax = target_vacancy_for_this_year.get_attribute(attribute+'_max')[index] criterion.update({attribute + '_max':amax}) if amax != -1: indicator *= dataset_attribute <= amax sample_indicator *= sample_attribute <= amax if attribute in column_names: aval = column_values[attribute][index] criterion.update({attribute:aval}) if aval == -1: continue elif aval == -2: ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column indicator *= logical_not(ismember(dataset_attribute, column_values[attribute])) sample_indicator *= logical_not(ismember(sample_attribute, column_values[attribute])) else: indicator *= dataset_attribute == aval sample_indicator *= sample_attribute == aval this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable ## total/occupied_spaces_variable can be specified either as a universal name for all realestate ## or in targe_vacancy_rate dataset for each vacancy category if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(): this_occupied_spaces_variable = target_vacancy_for_this_year.get_attribute(occupied_spaces_variable)[index] if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(): this_total_spaces_variable = target_vacancy_for_this_year.get_attribute(total_spaces_variable)[index] logger.be_quiet() #temporarily disable logging realestate_dataset.compute_one_variable_with_unknown_package(this_occupied_spaces_variable, dataset_pool=dataset_pool) realestate_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool) sample_from_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool) logger.talk() actual_num = (indicator * realestate_dataset.get_attribute(this_total_spaces_variable)).sum() target_num = int(round( (indicator * realestate_dataset.get_attribute(this_occupied_spaces_variable)).sum() /\ (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]) )) diff = target_num - actual_num if diff > 0: total_spaces_in_sample_dataset = sample_from_dataset.get_attribute(this_total_spaces_variable) legit_index = where(logical_and(sample_indicator, filter_indicator) * total_spaces_in_sample_dataset > 0)[0] if legit_index.size > 0: mean_size = total_spaces_in_sample_dataset[legit_index].mean() num_of_projects_to_sample = int( diff / mean_size ) while total_spaces_in_sample_dataset[this_sampled_index].sum() < diff: lucky_index = sample_replace(legit_index, num_of_projects_to_sample) this_sampled_index = concatenate((this_sampled_index, lucky_index)) this_sampled_index = this_sampled_index[0:(1+searchsorted(cumsum(total_spaces_in_sample_dataset[this_sampled_index]), diff))] sampled_index = concatenate((sampled_index, this_sampled_index)) else: error_log += "There is nothing to sample from %s and no new development will happen for " % sample_from_dataset.get_dataset_name() + \ ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n' #if diff < 0: #TODO demolition; not yet supported ##log status action = "0" if this_sampled_index.size > 0: action_num = total_spaces_in_sample_dataset[this_sampled_index].sum() if diff > 0: action = "+" + str(action_num) if diff < 0: action = "-" + str(action_num) cat = [ str(criterion[col]) for col in column_names] cat += [str(actual_num), str(target_num), str(diff), action] if PrettyTable is not None: status_log.add_row(cat) else: logger.log_status("\t".join(cat)) if PrettyTable is not None: logger.log_status("\n" + status_log.get_string()) if error_log: logger.log_error(error_log) result_data = {} result_dataset = None index = array([], dtype='int32') if sampled_index.size > 0: ### ideally duplicate_rows() is all needed to add newly cloned rows ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data ##realestate_dataset.duplicate_rows(sampled_index) result_data.setdefault(year_built, resize(year, sampled_index.size).astype('int32')) for attribute in sample_from_dataset.get_primary_attribute_names(): if reset_attribute_value.has_key(attribute): result_data[attribute] = resize(array(reset_attribute_value[attribute]), sampled_index.size) else: result_data[attribute] = sample_from_dataset.get_attribute_by_index(attribute, sampled_index) storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name=table_name, table_data=result_data) result_dataset = Dataset(id_name = id_name, in_storage = storage, in_table_name = table_name, dataset_name = dataset_name ) index = arange(result_dataset.size()) if append_to_realestate_dataset: if len(result_data) > 0: index = realestate_dataset.add_elements(result_data, require_all_attributes=False, change_ids_if_not_unique=True) result_dataset = realestate_dataset return (result_dataset, index)