Exemple #1
0
    def _do_run(self, location_set, agent_set, agents_index, data_objects=None, resources=None):
        location_id_name = location_set.get_id_name()[0]
        jobsubset = DatasetSubset(agent_set, agents_index)
        if jobsubset.size() <= 0:
            return array([], dtype='int32')
        #unplace jobs
        agent_set.set_values_of_one_attribute(location_id_name, 
                                              resize(array([-1.0]), jobsubset.size()), agents_index)
        sector_ids = jobsubset.get_attribute("sector_id")
        sectors = unique(sector_ids)
        counts = ndimage_sum(ones((jobsubset.size(),)), labels=sector_ids.astype('int32'), index=sectors.astype('int32'))
        if sectors.size <=1 :
            counts = array([counts])
        variables = map(lambda x: "number_of_jobs_of_sector_"+str(int(x)), sectors)
        compute_variables = map(lambda var: self.variable_package + "." + 
            location_set.get_dataset_name()+ "." + var, variables)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.dataset_pool.add_datasets_if_not_included({agent_set.get_dataset_name():agent_set})
        location_set.compute_variables(compute_variables, dataset_pool=self.dataset_pool)
        if self.filter is None:
            location_index = arange(location_set.size())
        else:
            filter_values = location_set.compute_variables([self.filter], dataset_pool=self.dataset_pool)
            location_index = where(filter_values > 0)[0]
        if location_index.size <= 0:
            logger.log_status("No locations available. Nothing to be done.")
            return array([])
        location_subset = DatasetSubset(location_set, location_index)
        i=0
        for sector in sectors:
            distr = location_subset.get_attribute(variables[i])
            if ma.allclose(distr.sum(), 0):
                uniform_prob = 1.0/distr.size
                distr = resize(array([uniform_prob], dtype='float64'), distr.size)
                logger.log_warning("Probabilities in scaling model for sector " + str(sector) + " sum to 0.0.  Substituting uniform distribution!")
#                random_sample = sample(location_set.get_attribute("grid_id"), k=int(counts[i]), \
#                                   probabilities = distr)
            distr = distr/float(distr.sum())
            random_sample = probsample_replace(location_subset.get_id_attribute(), size=int(counts[i]), 
                                       prob_array=distr)
            idx = where(sector_ids == sector)[0]
            #modify job locations
            agent_set.set_values_of_one_attribute(location_id_name, random_sample, agents_index[idx])
            i+=1
        return agent_set.get_attribute_by_index(location_id_name, agents_index)
 def _do_run(self, location_set, agent_set, agents_index, resources=None):
     location_id_name = location_set.get_id_name()[0]
     asubset = DatasetSubset(agent_set, agents_index)
     if asubset.size() <= 0:
         return array([], dtype='int32')
     #unplace agents
     agent_set.modify_attribute(location_id_name, 
                             resize(array([-1]), asubset.size()), agents_index)
     if self.filter is None:
         location_index = arange(location_set.size())
     else:
         filter_values = location_set.compute_variables([self.filter], dataset_pool=self.dataset_pool)
         location_index = where(filter_values > 0)[0]
     if location_index.size <= 0:
         logger.log_status("No locations available. Nothing to be done.")
         return array([])
     
     location_subset = DatasetSubset(location_set, location_index)
     if self.consider_capacity:
         location_set.compute_variables([self.capacity_attribute], 
                                        dataset_pool=self.dataset_pool)
         weights = location_subset[self.capacity_attribute]
         if self.number_of_agents_attribute is not None:
             location_set.compute_variables([self.number_of_agents_attribute], 
                                        dataset_pool=self.dataset_pool)
             weights = clip(weights - location_subset[self.number_of_agents_attribute],
                                        0, location_subset[self.capacity_attribute])
     else:
         weights = ones(location_subset.size())
     
     if weights.sum() <=0:
         logger.log_status("Locations' capacity sums to zero. Nothing to be done.")
         return array([])        
     distr = weights/float(weights.sum())
     random_sample = probsample_replace(location_subset.get_id_attribute(), size=asubset.size(), 
                                    prob_array=distr)
     agent_set.modify_attribute(location_id_name, random_sample, agents_index)
     return agent_set.get_attribute_by_index(location_id_name, agents_index)
class HouseholdTransitionModel(Model):
    """Creates and removes households from household_set. New households are duplicated from the existing households, keeping 
       the joint distribution of all characteristics. 
    """

    model_name = "Household Transition Model"

    def __init__(self, location_id_name="grid_id", dataset_pool=None, debuglevel=0):
        self.debug = DebugPrinter(debuglevel)
        self.location_id_name = location_id_name
        self.dataset_pool = self.create_dataset_pool(dataset_pool, ["urbansim", "opus_core"])

    def run(self, year, household_set, control_totals, characteristics, resources=None):
        self._do_initialize_for_run(household_set)
        control_totals.get_attribute("total_number_of_households") # to make sure they are loaded
        self.characteristics = characteristics
        self.all_categories = self.characteristics.get_attribute("characteristic")
        self.all_categories = array(map(lambda x: x.lower(), self.all_categories))
        self.scaled_characteristic_names = get_distinct_names(self.all_categories).tolist()
        self.marginal_characteristic_names = copy(control_totals.get_id_name())
        index_year = self.marginal_characteristic_names.index("year")
        self.marginal_characteristic_names.remove("year")
        idx = where(control_totals.get_attribute("year")==year)[0]
        self.control_totals_for_this_year = DatasetSubset(control_totals, idx)
        self._do_run_for_this_year(household_set)
        return self._update_household_set(household_set)
        
    def _update_household_set(self, household_set):
        index_of_duplicated_hhs = household_set.duplicate_rows(self.mapping_existing_hhs_to_new_hhs)
        household_set.modify_attribute(name=self.location_id_name, data=-1 * ones((index_of_duplicated_hhs.size,), 
                                                                              dtype=household_set.get_data_type(self.location_id_name)),
                                                                          index=index_of_duplicated_hhs)
        household_set.remove_elements(self.remove_households)
        if self.new_households[self.location_id_name].size > 0:
            max_id = household_set.get_id_attribute().max()
            self.new_households[self.household_id_name]=concatenate((self.new_households[self.household_id_name],
                                                             arange(max_id+1, max_id+self.new_households[self.location_id_name].size+1)))
            household_set.add_elements(self.new_households, require_all_attributes=False)

        difference = household_set.size()-self.household_size
        self.debug.print_debug("Difference in number of households: %s"
            " (original %s, new %s, created %s, deleted %s)"
                % (difference,
                   self.household_size,
                   household_set.size(),
                   self.new_households[self.household_id_name].size + self.mapping_existing_hhs_to_new_hhs.size,
                   self.remove_households.size),
            3)
        if self.location_id_name in household_set.get_attribute_names():
            self.debug.print_debug("Number of unplaced households: %s"
                % where(household_set.get_attribute(self.location_id_name) <=0)[0].size,
                3)
        return difference

    def _do_initialize_for_run(self, household_set):
        self.household_id_name = household_set.get_id_name()[0]
        self.new_households = {
           self.location_id_name:array([], dtype=household_set.get_data_type(self.location_id_name, int32)),
           self.household_id_name:array([], dtype=household_set.get_data_type(self.household_id_name, int32))
                   }
        self.remove_households = array([], dtype='int32')
        self.household_size = household_set.size()
        self.max_id = household_set.get_id_attribute().max()
        self.arrays_from_categories = {}
        self.arrays_from_categories_mapping = {}
        self.mapping_existing_hhs_to_new_hhs = array([], dtype=household_set.get_data_type(self.household_id_name, int32))
        
    def _do_run_for_this_year(self, household_set):
        self.household_set = household_set
        groups = self.control_totals_for_this_year.get_id_attribute()
        self.create_arrays_from_categories(self.household_set)

        all_characteristics = self.arrays_from_categories.keys()
        self.household_set.load_dataset_if_not_loaded(attributes = all_characteristics) # prevents from lazy loading to save runtime
        idx_shape = []
        number_of_combinations=1
        num_attributes=len(all_characteristics)
        for iattr in range(num_attributes):
            attr = all_characteristics[iattr]
            max_bins = self.arrays_from_categories[attr].max()+1
            idx_shape.append(max_bins)
            number_of_combinations=number_of_combinations*max_bins
            if attr not in self.new_households.keys():
                self.new_households[attr] = array([], dtype=self.household_set.get_data_type(attr, float32))

        self.number_of_combinations = int(number_of_combinations)
        idx_tmp = indices(tuple(idx_shape))
        
        categories_index = zeros((self.number_of_combinations,num_attributes))

        for i in range(num_attributes): #create indices of all combinations
            categories_index[:,i] = idx_tmp[i].ravel()

        categories_index_mapping = {}
        for i in range(self.number_of_combinations):
            categories_index_mapping[tuple(categories_index[i,].tolist())] = i

        def get_category(values):
            bins = map(lambda x, y: self.arrays_from_categories[x][int(y)], all_characteristics, values)
            try:
                return categories_index_mapping[tuple(bins)]
            except KeyError, msg: 
                where_error = where(array(bins) == -1)[0]
                if where_error.size > 0:
                    raise KeyError, \
                        "Invalid value of %s for attribute %s. It is not included in the characteristics groups." % (
                                                                               array(values)[where_error], 
                                                                               array(all_characteristics)[where_error])
                raise KeyError, msg

        if num_attributes > 0:
            # the next array must be a copy of the household values, otherwise, it changes the original values
            values_array = reshape(array(self.household_set.get_attribute(all_characteristics[0])), (self.household_set.size(),1))
            if num_attributes > 1:
                for attr in all_characteristics[1:]:
                    values_array = concatenate((values_array, reshape(array(self.household_set.get_attribute(attr)),
                                                                      (self.household_set.size(),1))), axis=1)
            for i in range(values_array.shape[1]):
                if values_array[:,i].max() > 10000:
                    values_array[:,i] = values_array[:,i]/10
                values_array[:,i] = clip(values_array[:,i], 0, self.arrays_from_categories[all_characteristics[i]].size-1)
    
            # determine for each household to what category it belongs to
            self.household_categories = array(map(lambda x: get_category(x), values_array)) # performance bottleneck
    
            number_of_households_in_categories = array(ndimage_sum(ones((self.household_categories.size,)),
                                                                    labels=self.household_categories+1,
                                                                    index = arange(self.number_of_combinations)+1))
        else:
            # no marginal characteristics; consider just one group
            self.household_categories = zeros(self.household_set.size(), dtype='int32')
            number_of_households_in_categories = array([self.household_set.size()])

        g=arange(num_attributes)

        #iterate over marginal characteristics
        for group in groups:
            if groups.ndim <= 1: # there is only one group (no marginal char.)
                id = group
            else:
                id = tuple(group.tolist())
            group_element = self.control_totals_for_this_year.get_data_element_by_id(id)
            total = group_element.total_number_of_households
            for i in range(g.size):
                g[i] = eval("group_element."+self.arrays_from_categories.keys()[i])
            if g.size <= 0:
                l = ones((number_of_households_in_categories.size,))
            else:
                l = categories_index[:,0] == g[0]
                for i in range(1,num_attributes):
                    l = logical_and(l, categories_index[:,i] == g[i])
            # l has 1's for combinations of this group
            number_in_group = array(ndimage_sum(number_of_households_in_categories, labels=l, index = 1))
            diff = int(total - number_in_group)
            if diff < 0: # households to be removed
                is_in_group = l[self.household_categories]
                w = where(is_in_group)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(self.household_set, w, -1*diff,
                                                          self.location_id_name)
                self.remove_households = concatenate((self.remove_households, non_placed, sample_noreplace(sample_array,
                                                                                   max(0,abs(diff)-size_non_placed))))
            if diff > 0: # households to be created
                self._create_households(diff, l)
    def run(self, in_storage, out_storage=None, business_dsname="business", zone_dsname=None):
        dataset_pool = DatasetPool(storage=in_storage, package_order=['psrc_parcel', 'urbansim_parcel', 'urbansim', 'opus_core'] )
        seed(1)
        allbusinesses = dataset_pool.get_dataset(business_dsname)
        parcels = dataset_pool.get_dataset('parcel')
        buildings = dataset_pool.get_dataset('building')
        parcels.compute_variables(["urbansim_parcel.parcel.residential_units", "number_of_buildings = parcel.number_of_agents(building)", 
                                   "non_residential_sqft = (parcel.aggregate(building.non_residential_sqft)).astype(int32)",
                                   "number_of_res_buildings = parcel.aggregate(urbansim_parcel.building.is_residential)",
                                   "number_of_nonres_buildings = parcel.aggregate(urbansim_parcel.building.is_non_residential)",
                                   "number_of_mixed_use_buildings = parcel.aggregate(urbansim_parcel.building.is_generic_building_type_6)"
                                   ], 
                                  dataset_pool=dataset_pool)
        restypes = [12, 4, 19, 11, 34, 10, 33]
        reslutypes = [13,14,15,24]
        is_valid_business = ones(allbusinesses.size(), dtype='bool8')
        parcels_not_matched = logical_and(in1d(allbusinesses["parcel_id"], parcels.get_id_attribute(), invert=True), allbusinesses["parcel_id"] > 0)
        if(parcels_not_matched.sum() > 0):
            is_valid_business[where(parcels_not_matched)] = False
            logger.log_warning(message="No parcel exists for %s businesses (%s jobs)" % (parcels_not_matched.sum(), 
                                                                                         allbusinesses[self.number_of_jobs_attr][where(parcels_not_matched)].sum()))
        zero_parcel = allbusinesses["parcel_id"]<=0
        if zero_parcel.sum() > 0:
            is_valid_business[where(zero_parcel)] = False
            logger.log_warning(message="%s businesses (%s jobs) located on zero parcel_id" % (zero_parcel.sum(), 
                                                                                         allbusinesses[self.number_of_jobs_attr][where(zero_parcel)].sum()))            
            
        zero_size = logical_and(is_valid_business, allbusinesses[self.number_of_jobs_attr].round() == 0)
        if(sum(zero_size) > 0):
            is_valid_business[where(zero_size)] = False
            logger.log_warning(message="%s businesses are of size 0." % sum(zero_size))
        
        businesses = DatasetSubset(allbusinesses, index=where(is_valid_business)[0])
        
        parcels.add_attribute(name="number_of_workplaces", data=parcels.sum_dataset_over_ids(businesses, constant=1))
        
        has_single_res_buildings = logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_res_buildings"] == 1) # 1 (1 residential)
        parcels.add_attribute(data=has_single_res_buildings.astype("int32"), name="buildings_code")
        has_mult_res_buildings = logical_and(parcels["number_of_buildings"] > 1,  parcels["number_of_nonres_buildings"] == 0) # 2 (mult residential)
        parcels.modify_attribute("buildings_code", data=2*ones(has_mult_res_buildings.sum()), index=where(has_mult_res_buildings)) 
        has_single_nonres_buildings = logical_and(logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_nonres_buildings"] == 1), parcels["number_of_mixed_use_buildings"] == 0) # 3 (1 non-res)
        parcels.modify_attribute("buildings_code", data=3*ones(has_single_nonres_buildings.sum()), index=where(has_single_nonres_buildings)) 
        has_mult_nonres_buildings = logical_and(logical_and(parcels["number_of_buildings"] > 1, parcels["number_of_res_buildings"] == 0), parcels["number_of_mixed_use_buildings"] == 0) # 4 (mult non-res)
        parcels.modify_attribute("buildings_code", data=4*ones(has_mult_nonres_buildings.sum()), index=where(has_mult_nonres_buildings))
        has_single_mixed_buildings = logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_mixed_use_buildings"] == 1) # 5 (1 mixed-use)
        parcels.modify_attribute("buildings_code", data=5*ones(has_single_mixed_buildings.sum()), index=where(has_single_mixed_buildings))
        has_mult_mixed_buildings = logical_and(parcels["number_of_buildings"] > 1, 
                                               logical_or(logical_and(parcels["number_of_res_buildings"] > 0, parcels["number_of_nonres_buildings"] > 0), 
                                                          logical_or(parcels["number_of_mixed_use_buildings"] > 1, 
                                                                     logical_and(parcels["number_of_res_buildings"] == 0, 
                                                                                 parcels["number_of_mixed_use_buildings"] > 0)))) # 6
        parcels.modify_attribute("buildings_code", data=6*ones(has_mult_mixed_buildings.sum()), index=where(has_mult_mixed_buildings))
        has_no_building_res_lutype = logical_and(parcels["number_of_buildings"] == 0, in1d(parcels["land_use_type_id"], reslutypes)) # 7 (vacant with res LU type)
        parcels.modify_attribute("buildings_code", data=7*ones(has_no_building_res_lutype.sum()), index=where(has_no_building_res_lutype)) 
        has_no_building_nonres_lutype = logical_and(parcels["number_of_buildings"] == 0, in1d(parcels["land_use_type_id"], reslutypes)==0) # 8 (vacant with non-res LU type)
        parcels.modify_attribute("buildings_code", data=8*ones(has_no_building_nonres_lutype.sum()), index=where(has_no_building_nonres_lutype))
        
        business_sizes = businesses[self.number_of_jobs_attr].round().astype("int32") 
        business_location = {}
        business_location1wrkpl = zeros(businesses.size(), dtype="int32")
        business_location1wrkplres = zeros(businesses.size(), dtype="int32")
        business_ids = businesses.get_id_attribute()
        # sample one building for cases when sampling is required.
        for ibusid in range(businesses.size()):
            idx = where(buildings['parcel_id'] == businesses['parcel_id'][ibusid])[0]
            bldgids = buildings['building_id'][idx]
            business_location[business_ids[ibusid]] = bldgids
            if bldgids.size == 1:
                business_location1wrkpl[ibusid] = bldgids[0]
            elif bldgids.size > 1:
                business_location1wrkpl[ibusid] = bldgids[sample_noreplace(arange(bldgids.size), 1)]
                if buildings['residential_units'][idx].sum() > 0:
                    # Residential buildings are sampled with probabilities proportional to residential units
                    business_location1wrkplres[ibusid] = bldgids[probsample_noreplace(arange(bldgids.size), 1, prob_array=buildings['residential_units'][idx])]
                else:
                    business_location1wrkplres[ibusid] = business_location1wrkpl[ibusid]
        
        home_based = zeros(business_sizes.sum(), dtype="bool8")
        job_building_id = zeros(business_sizes.sum(), dtype="int32")
        job_array_labels = business_ids.repeat(business_sizes)
        job_assignment_case = zeros(business_sizes.sum(), dtype="int32")
        processed_bindicator = zeros(businesses.size(), dtype="bool8")
        business_codes = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"])
        business_nworkplaces = parcels.get_attribute_by_id("number_of_workplaces", businesses["parcel_id"])
        logger.log_status("Total number of jobs: %s" % home_based.size)
        
        # 1. 1-2 worker business in 1 residential building
        idx_sngl_wrk_1bld_fit = where(logical_and(business_sizes < 3, business_codes == 1))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_1bld_fit])
        home_based[jidx] = True
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_1bld_fit].repeat(business_sizes[idx_sngl_wrk_1bld_fit])
        job_assignment_case[jidx] = 1
        processed_bindicator[idx_sngl_wrk_1bld_fit] = True
        logger.log_status("1. %s jobs (%s businesses) set as home-based due to 1-2 worker x 1 residential building fit." % (
            business_sizes[idx_sngl_wrk_1bld_fit].sum(), idx_sngl_wrk_1bld_fit.size))
        
        # 2. 1-2 worker business in multiple residential buildings
        idx_sngl_wrk_multbld_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 2))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_multbld_fit])
        home_based[jidx] = True
        job_building_id[jidx] = business_location1wrkplres[idx_sngl_wrk_multbld_fit].repeat(business_sizes[idx_sngl_wrk_multbld_fit])
        job_assignment_case[jidx] = 2
        processed_bindicator[idx_sngl_wrk_multbld_fit] = True
        logger.log_status("2. %s jobs (%s businesses) set as home-based due to 1-2 worker x multiple residential buildings fit." % (
            business_sizes[idx_sngl_wrk_multbld_fit].sum(), idx_sngl_wrk_multbld_fit.size))
               
        # 3. 1-2 worker in single non-res building (not mixed-use)
        idx_sngl_wrk_single_nonres_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 3))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_single_nonres_fit])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_single_nonres_fit].repeat(business_sizes[idx_sngl_wrk_single_nonres_fit])
        job_assignment_case[jidx] = 3
        processed_bindicator[idx_sngl_wrk_single_nonres_fit] = True
        logger.log_status("3. %s jobs (%s businesses) placed due to 1-2 worker x single non-res building fit." % (
                          business_sizes[idx_sngl_wrk_single_nonres_fit].sum(), idx_sngl_wrk_single_nonres_fit.size))     
        
        # 4. 1-2 worker in multiple non-res building (not mixed-use)
        idx_sngl_wrk_mult_nonres_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 4))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_mult_nonres_fit])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_mult_nonres_fit].repeat(business_sizes[idx_sngl_wrk_mult_nonres_fit])
        job_assignment_case[jidx] = 4
        processed_bindicator[idx_sngl_wrk_mult_nonres_fit] = True
        logger.log_status("4. %s jobs (%s businesses) placed due to 1-2 worker x multiple non-res building fit." % (
            business_sizes[idx_sngl_wrk_mult_nonres_fit].sum(), idx_sngl_wrk_mult_nonres_fit.size))      
                
        # 5. 1-2 worker in single mixed-use building
        idx_sngl_wrk_smu_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 5))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_smu_fit])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_smu_fit].repeat(business_sizes[idx_sngl_wrk_smu_fit])
        job_assignment_case[jidx] = 5
        processed_bindicator[idx_sngl_wrk_smu_fit] = True
        logger.log_status("5. %s jobs (%s businesses) in 1-2 worker x single mixed-use building." % (
            business_sizes[idx_sngl_wrk_smu_fit].sum(), idx_sngl_wrk_smu_fit.size))       
        
        # 6. 1-2 worker in multiple mixed-type buildings
        idx_sngl_wrk_mmu_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 6))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_mmu_fit])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_mmu_fit].repeat(business_sizes[idx_sngl_wrk_mmu_fit])
        bldtype = buildings.get_attribute_by_id("building_type_id", business_location1wrkpl[idx_sngl_wrk_mmu_fit])
        is_bldtype_res = in1d(bldtype, restypes)
        home_based[in1d(job_array_labels, business_ids[idx_sngl_wrk_mmu_fit][where(is_bldtype_res)])] = True
        job_assignment_case[jidx] = 6
        processed_bindicator[idx_sngl_wrk_mmu_fit] = True
        logger.log_status("6. %s jobs (%s businesses) in 1-2 worker x multiple mixed-type buildings. %s jobs classified as home-based." % (
            business_sizes[idx_sngl_wrk_mmu_fit].sum(), idx_sngl_wrk_mmu_fit.size, business_sizes[idx_sngl_wrk_mmu_fit][where(is_bldtype_res)].sum()))            

        # 7. 1-2 worker business in residential parcel with no building
        idx_sngl_wrk_vacant_res = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 7))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_vacant_res])
        job_assignment_case[jidx] = 7
        home_based[jidx] = True
        processed_bindicator[idx_sngl_wrk_vacant_res] = True
        logger.log_status("7. %s jobs (%s businesses of size 1-2) could not be placed due to non-existing buildings in parcels with residential LU type." % (
            business_sizes[idx_sngl_wrk_vacant_res].sum(), idx_sngl_wrk_vacant_res.size))        

        # 8. 3+ workers of governmental workplaces in 1+ residential building
        ind_bussiness_case8 = logical_and(logical_and(processed_bindicator==0, logical_and(business_sizes > 2, in1d(businesses['sector_id'], [18,19]))), in1d(business_codes, [1,2]))
        idx_wrk_fit = where(ind_bussiness_case8)[0]
        jidx = in1d(job_array_labels, business_ids[idx_wrk_fit])
        job_assignment_case[jidx] = 8
        processed_bindicator[idx_wrk_fit] = True
        logger.log_status("8. %s governmental jobs (%s businesses of size 3+) could not be placed due to residing in residential buildings only." % (
                    business_sizes[idx_wrk_fit].sum(), idx_wrk_fit.size))
        
        # 9. 3-30 workers in single residential building. Make two of them home based.
        idx_sngl_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, logical_and(business_sizes > 2, business_sizes <= 30)), business_codes == 1))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_fit])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_fit].repeat(business_sizes[idx_sngl_wrk_fit])
        bsizeminus2 = vstack((2*ones(idx_sngl_wrk_fit.size), business_sizes[idx_sngl_wrk_fit]-2)).ravel("F").astype("int32") # interweaving 2 and remaining business size
        hbidx = tile(array([True, False]), bsizeminus2.size/2).repeat(bsizeminus2) # set the first two jobs of every business to True, others to False
        home_based[(where(jidx)[0])[hbidx]] = True
        job_assignment_case[jidx] = 9
        processed_bindicator[idx_sngl_wrk_fit] = True        
        logger.log_status("9. %s jobs (%s businesses) in 3-30 worker x single residential building. %s jobs assigned as home-based." % (
            business_sizes[idx_sngl_wrk_fit].sum(), idx_sngl_wrk_fit.size, hbidx.sum()))      
        
        # 10. 3-30 workers in multiple residential buildings. Make two of them home based.
        idx_sngl_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, logical_and(business_sizes > 2, business_sizes <= 30)), business_codes == 2))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_fit])
        job_assignment_case[jidx] = 10
        processed_bindicator[idx_sngl_wrk_fit] = True
        # sample buildings to businesses by parcels 
        bpcls = unique(businesses["parcel_id"][idx_sngl_wrk_fit])
        for ipcl in range(bpcls.size):
            bidx = where(buildings['parcel_id'] == bpcls[ipcl])[0]
            bldgids = buildings['building_id'][bidx]
            bussids = intersect1d(business_ids[businesses["parcel_id"] == bpcls[ipcl]], business_ids[idx_sngl_wrk_fit])
            # multiply by units for sampling prop. to units rather than buildings
            bldgids = bldgids.repeat(maximum(1, buildings['residential_units'][bidx].astype('int32'))) 
            if bldgids.size < bussids.size:
                bldarray = bldgids.repeat(1+ceil((bussids.size - bldgids.size)/float(bldgids.size)) )
            else:
                bldarray = bldgids
            shuffle(bldarray) # randomly reorder in-place
            for ib in range(bussids.size):
                jidx = where(job_array_labels == bussids[ib])[0]
                job_building_id[jidx] = bldarray[ib]
                home_based[jidx[0:2]] = True
        logger.log_status("10. %s jobs (%s businesses) in 3-30 worker x multiple residential building. %s jobs assigned as home-based." % (
            business_sizes[idx_sngl_wrk_fit].sum(), idx_sngl_wrk_fit.size, idx_sngl_wrk_fit.size*2))        


        # 11. single workplace, 3+ workers in single non-res or mixed-use building (11.)
        idx_sngl_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2), 
                                                            logical_or(business_codes==3, business_codes==5)),
                                                business_nworkplaces==1))[0]
        which_labels = where(in1d(job_array_labels, business_ids[idx_sngl_wrkplace_2plus_workers]))[0]
        job_building_id[which_labels] = business_location1wrkpl[idx_sngl_wrkplace_2plus_workers].repeat(business_sizes[idx_sngl_wrkplace_2plus_workers])   
        job_assignment_case[which_labels] = 11
        processed_bindicator[idx_sngl_wrkplace_2plus_workers] = True
        logger.log_status("11. %s jobs (%s businesses) could be placed due to single workplace x 3+ workers x single non-res/mixed-use building fit." % (
            business_sizes[idx_sngl_wrkplace_2plus_workers].sum(), idx_sngl_wrkplace_2plus_workers.size))
        
        # 12. single workplace, 3+ workers in multiple mixed-type building
        idx_sngl_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2),
                                                                        logical_or(business_codes==4, business_codes==6)),
                                                            business_nworkplaces==1))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrkplace_2plus_workers])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrkplace_2plus_workers].repeat(business_sizes[idx_sngl_wrkplace_2plus_workers])    
        job_assignment_case[jidx] = 12
        processed_bindicator[idx_sngl_wrkplace_2plus_workers] = True
        logger.log_status("12. %s jobs (%s businesses) could be placed due to single workplace x 3+ workers x multiple non-res/mixed building fit." % (
            business_sizes[idx_sngl_wrkplace_2plus_workers].sum(), idx_sngl_wrkplace_2plus_workers.size))

        # 13. multiple workplaces, 3+ workers in single non-res or mixed building
        idx_mult_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2),
                                                                        logical_or(business_codes==3, business_codes==5)),
                                                            business_nworkplaces > 1))[0]
        jidx = in1d(job_array_labels, business_ids[idx_mult_wrkplace_2plus_workers])
        job_building_id[jidx] = business_location1wrkpl[idx_mult_wrkplace_2plus_workers].repeat(business_sizes[idx_mult_wrkplace_2plus_workers])
        job_assignment_case[jidx] = 13
        processed_bindicator[idx_mult_wrkplace_2plus_workers] = True
        logger.log_status("13. %s jobs (%s businesses) could be placed due to multiple workplaces x 3+ workers x single non-res/mixed building fit." % (
            business_sizes[idx_mult_wrkplace_2plus_workers].sum(), idx_mult_wrkplace_2plus_workers.size))
        
        # 14. multiple workplaces, 3+ workers in multiple non-res or mixed building
        idx_mult_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2),
                                                                        logical_or(business_codes==4, business_codes==6)),
                                                            business_nworkplaces > 1))[0]
        processed_bindicator[idx_mult_wrkplace_2plus_workers] = True
        # sample buildings to businesses by parcels 
        bpcls = unique(businesses["parcel_id"][idx_mult_wrkplace_2plus_workers])
        #hbasedsum = home_based.sum()
        for ipcl in range(bpcls.size):
            bldgids = buildings['building_id'][buildings['parcel_id'] == bpcls[ipcl]]
            bussids = intersect1d(business_ids[businesses["parcel_id"] == bpcls[ipcl]], business_ids[idx_mult_wrkplace_2plus_workers])
            if bldgids.size < bussids.size:
                bldarray = bldgids.repeat(1+ceil((bussids.size - bldgids.size)/float(bldgids.size)))
            else:
                bldarray = bldgids
            shuffle(bldarray) # randomly reorder in-place
            is_res = in1d(bldarray, restypes)
            for ib in range(bussids.size):
                jidx = where(job_array_labels == bussids[ib])
                job_building_id[jidx] = bldarray[ib]
                #home_based[jidx] = is_res
                job_assignment_case[jidx] = 14
        logger.log_status("14. %s jobs (%s businesses) could be placed due to multiple workplaces x 3+ workers x multiple non-res/mixed building fit." % (
            business_sizes[idx_mult_wrkplace_2plus_workers].sum(), idx_mult_wrkplace_2plus_workers.size))
        
        
        # 15. 3+ workers in residential parcel with no building
        idx_wrk_vacant_res = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 2), business_codes == 7))[0]
        jidx = in1d(job_array_labels, business_ids[idx_wrk_vacant_res])
        job_assignment_case[jidx] = 15
        processed_bindicator[idx_wrk_vacant_res] = True
        logger.log_status("15. %s jobs (%s businesses of 3+ workers) could not be placed due to non-existing buildings in parcels with residential LU type." % (
            business_sizes[idx_wrk_vacant_res].sum(), idx_wrk_vacant_res.size))
        
        # 16. nonresidential parcel with no building
        idx_wrk_vacant_nonres = where(logical_and(processed_bindicator==0, business_codes == 8))[0]
        jidx = in1d(job_array_labels, business_ids[idx_wrk_vacant_nonres])
        job_assignment_case[jidx] = 16
        processed_bindicator[idx_wrk_vacant_nonres] = True
        logger.log_status("16. %s jobs (%s businesses) could not be placed due to non-existing buildings in parcels with non-esidential LU type." % (
            business_sizes[idx_wrk_vacant_nonres].sum(), idx_wrk_vacant_nonres.size))        
        
        # 17. 31+ workers in single residential building. Do not place - will go into ELCM.
        idx_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 30), business_codes == 1))[0]
        jidx = in1d(job_array_labels, business_ids[idx_wrk_fit])
        job_assignment_case[jidx] = 17
        processed_bindicator[idx_wrk_fit] = True        
        logger.log_status("17. %s jobs (%s businesses) in 31+ workers x single residential building." % (
            business_sizes[idx_wrk_fit].sum(), idx_wrk_fit.size))         
    
        # 18. 31+ workers in multiple residential buildings.
        idx_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 30), business_codes == 2))[0]
        jidx = in1d(job_array_labels, business_ids[idx_wrk_fit])
        job_assignment_case[jidx] = 18
        processed_bindicator[idx_wrk_fit] = True
        logger.log_status("18. %s jobs (%s businesses) in 31+ workers x multiple residential building." % (
            business_sizes[idx_wrk_fit].sum(), idx_wrk_fit.size))                

        # jobs in messy buildings
        idx_messy_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 0), business_codes == 0))[0]
        processed_bindicator[idx_messy_fit] = True
        logger.log_status("%s jobs (%s businesses) could not be placed due to messy buildings." % (
            business_sizes[idx_messy_fit].sum(), idx_messy_fit.size))         
         
        # build new buildings for jobs in cases 7, 8, 15 and 16
        jidx_no_bld = where(in1d(job_assignment_case, [7,8,15,16]))[0]
        bus = unique(job_array_labels[jidx_no_bld])
        bsidx = businesses.get_id_index(bus)
        # first create buildings for single workplaces per parcel
        single_workplace_idx = where(business_nworkplaces[bsidx] == 1)[0]
        newbld_parcel_id = businesses['parcel_id'][bsidx][single_workplace_idx]
        newbld_bt = sector2building_type(businesses['sector_id'][bsidx][single_workplace_idx])
        newbids = arange(buildings.get_id_attribute().max()+1, buildings.get_id_attribute().max()+single_workplace_idx.size+1)
        bbldid = zeros(bsidx.size, dtype='int32')
        bbldid[single_workplace_idx] = newbids
        # for parcels with multiple workplaces select the largest business to determine its building type
        mult_bsidx = bsidx[where(business_nworkplaces[bsidx] > 1)[0]]
        empty_parcels = businesses['parcel_id'][mult_bsidx]
        uempty_parcels = unique(empty_parcels)
        bsize_on_empty_pcl = ndmax(business_sizes[mult_bsidx], labels=empty_parcels, index=uempty_parcels)
        newbld2_sec = zeros(uempty_parcels.size, dtype='int32')
        newbids2 = arange(newbids.max()+1, newbids.max()+uempty_parcels.size+1)
        for ipcl in range(uempty_parcels.size):
            newbld2_sec[ipcl] = businesses['sector_id'][mult_bsidx][logical_and(businesses['parcel_id'][mult_bsidx] == uempty_parcels[ipcl], 
                                                                                business_sizes[mult_bsidx]==bsize_on_empty_pcl[ipcl])][0]
            this_bidx = where(businesses['parcel_id'][bsidx] == uempty_parcels[ipcl])
            bbldid[this_bidx] = newbids2[ipcl]
            
        newbld_parcel_id = concatenate((newbld_parcel_id, uempty_parcels))
        newbld_bt = concatenate((newbld_bt, sector2building_type(newbld2_sec)))    
        
        newbldgs = {'building_id': concatenate((newbids, newbids2)),
                    'parcel_id': newbld_parcel_id,
                    'building_type_id': newbld_bt,
                    }
        buildings.add_elements(newbldgs, require_all_attributes=False)
        jidx = where(in1d(job_array_labels, business_ids[bsidx]))[0]
        job_building_id[jidx] = bbldid.repeat(business_sizes[bsidx])
        logger.log_status("Build %s new buildings to accommodate %s jobs (out of which %s are governmental) from cases 7, 15, 16." % (
            newbld_parcel_id.size, jidx.size, business_sizes[bsidx][where(in1d(businesses['sector_id'][bsidx], [18,19]))].sum()))
        
        
        logger.log_status("Assigned %s (%s percent) home-based jobs." % (home_based.sum(), round(home_based.sum()/(home_based.size/100.),2)))
        logger.log_status("Finished %s percent (%s) jobs (%s businesses) processed. %s jobs (%s businesses) remain to be processed." % \
                          (round(business_sizes[processed_bindicator].sum()/(home_based.size/100.),2),
                           business_sizes[processed_bindicator].sum(), processed_bindicator.sum(),
                          business_sizes[logical_not(processed_bindicator)].sum(), business_sizes[logical_not(processed_bindicator)].size))
        
        logger.start_block("Storing jobs data.")
        # create job dataset
        job_data = {"job_id": (arange(job_building_id.size)+1).astype("int32"),
                    "home_based_status" : home_based,
                    "building_id": job_building_id,
                    "business_id": job_array_labels.astype("int32"),
                    "sector_id": businesses['sector_id'].repeat(business_sizes).astype("int32"), 
                    "parcel_id": businesses['parcel_id'].repeat(business_sizes).astype("int32"), 
                    "assignment_case": job_assignment_case}

        # join with zones
        if zone_dsname is not None:
            zones = dataset_pool.get_dataset(zone_dsname)
            idname = zones.get_id_name()[0]
            #jpcls = buildings.get_attribute_by_id('parcel_id', job_building_id)
            job_data[idname] = parcels.get_attribute_by_id(idname, job_data["parcel_id"])
            
            
        dictstorage = StorageFactory().get_storage('dict_storage')
        dictstorage.write_table(table_name="jobs", table_data=job_data)
        jobs = Dataset(in_storage=dictstorage, in_table_name="jobs", dataset_name="job", id_name="job_id")
        if out_storage is not None:
            jobs.write_dataset(out_storage=out_storage, out_table_name="jobs")
            buildings.write_dataset(out_storage=out_storage, attributes=AttributeType.PRIMARY)
        logger.end_block()        
        return jobs
class HouseholdTransitionModel(Model):
    """Creates and removes households from household_set. New households are duplicated from the existing households, keeping 
       the joint distribution of all characteristics. 
    """

    model_name = "Household Transition Model"

    def __init__(self,
                 location_id_name="grid_id",
                 dataset_pool=None,
                 debuglevel=0):
        self.debug = DebugPrinter(debuglevel)
        self.location_id_name = location_id_name
        self.dataset_pool = self.create_dataset_pool(dataset_pool,
                                                     ["urbansim", "opus_core"])

    def run(self,
            year,
            household_set,
            control_totals,
            characteristics,
            resources=None):
        self._do_initialize_for_run(household_set)
        control_totals.get_attribute(
            "total_number_of_households")  # to make sure they are loaded
        self.characteristics = characteristics
        self.all_categories = self.characteristics.get_attribute(
            "characteristic")
        self.all_categories = array(
            map(lambda x: x.lower(), self.all_categories))
        self.scaled_characteristic_names = get_distinct_names(
            self.all_categories).tolist()
        self.marginal_characteristic_names = copy(control_totals.get_id_name())
        index_year = self.marginal_characteristic_names.index("year")
        self.marginal_characteristic_names.remove("year")
        idx = where(control_totals.get_attribute("year") == year)[0]
        self.control_totals_for_this_year = DatasetSubset(control_totals, idx)
        self._do_run_for_this_year(household_set)
        return self._update_household_set(household_set)

    def _update_household_set(self, household_set):
        index_of_duplicated_hhs = household_set.duplicate_rows(
            self.mapping_existing_hhs_to_new_hhs)
        household_set.modify_attribute(
            name=self.location_id_name,
            data=-1 * ones(
                (index_of_duplicated_hhs.size, ),
                dtype=household_set.get_data_type(self.location_id_name)),
            index=index_of_duplicated_hhs)
        household_set.remove_elements(self.remove_households)
        if self.new_households[self.location_id_name].size > 0:
            max_id = household_set.get_id_attribute().max()
            self.new_households[self.household_id_name] = concatenate(
                (self.new_households[self.household_id_name],
                 arange(
                     max_id + 1, max_id +
                     self.new_households[self.location_id_name].size + 1)))
            household_set.add_elements(self.new_households,
                                       require_all_attributes=False)

        difference = household_set.size() - self.household_size
        self.debug.print_debug(
            "Difference in number of households: %s"
            " (original %s, new %s, created %s, deleted %s)" %
            (difference, self.household_size, household_set.size(),
             self.new_households[self.household_id_name].size +
             self.mapping_existing_hhs_to_new_hhs.size,
             self.remove_households.size), 3)
        if self.location_id_name in household_set.get_attribute_names():
            self.debug.print_debug(
                "Number of unplaced households: %s" %
                where(household_set.get_attribute(self.location_id_name) <= 0)
                [0].size, 3)
        return difference

    def _do_initialize_for_run(self, household_set):
        self.household_id_name = household_set.get_id_name()[0]
        self.new_households = {
            self.location_id_name:
            array([],
                  dtype=household_set.get_data_type(self.location_id_name,
                                                    int32)),
            self.household_id_name:
            array([],
                  dtype=household_set.get_data_type(self.household_id_name,
                                                    int32))
        }
        self.remove_households = array([], dtype='int32')
        self.household_size = household_set.size()
        self.max_id = household_set.get_id_attribute().max()
        self.arrays_from_categories = {}
        self.arrays_from_categories_mapping = {}
        self.mapping_existing_hhs_to_new_hhs = array(
            [],
            dtype=household_set.get_data_type(self.household_id_name, int32))

    def _do_run_for_this_year(self, household_set):
        self.household_set = household_set
        groups = self.control_totals_for_this_year.get_id_attribute()
        self.create_arrays_from_categories(self.household_set)

        all_characteristics = self.arrays_from_categories.keys()
        self.household_set.load_dataset_if_not_loaded(
            attributes=all_characteristics
        )  # prevents from lazy loading to save runtime
        idx_shape = []
        number_of_combinations = 1
        num_attributes = len(all_characteristics)
        for iattr in range(num_attributes):
            attr = all_characteristics[iattr]
            max_bins = self.arrays_from_categories[attr].max() + 1
            idx_shape.append(max_bins)
            number_of_combinations = number_of_combinations * max_bins
            if attr not in self.new_households.keys():
                self.new_households[attr] = array(
                    [], dtype=self.household_set.get_data_type(attr, float32))

        self.number_of_combinations = int(number_of_combinations)
        idx_tmp = indices(tuple(idx_shape))

        categories_index = zeros((self.number_of_combinations, num_attributes))

        for i in range(num_attributes):  #create indices of all combinations
            categories_index[:, i] = idx_tmp[i].ravel()

        categories_index_mapping = {}
        for i in range(self.number_of_combinations):
            categories_index_mapping[tuple(categories_index[i, ].tolist())] = i

        def get_category(values):
            bins = map(lambda x, y: self.arrays_from_categories[x][int(y)],
                       all_characteristics, values)
            try:
                return categories_index_mapping[tuple(bins)]
            except KeyError, msg:
                where_error = where(array(bins) == -1)[0]
                if where_error.size > 0:
                    raise KeyError, \
                        "Invalid value of %s for attribute %s. It is not included in the characteristics groups." % (
                                                                               array(values)[where_error],
                                                                               array(all_characteristics)[where_error])
                raise KeyError, msg

        if num_attributes > 0:
            # the next array must be a copy of the household values, otherwise, it changes the original values
            values_array = reshape(
                array(self.household_set.get_attribute(
                    all_characteristics[0])), (self.household_set.size(), 1))
            if num_attributes > 1:
                for attr in all_characteristics[1:]:
                    values_array = concatenate(
                        (values_array,
                         reshape(array(self.household_set.get_attribute(attr)),
                                 (self.household_set.size(), 1))),
                        axis=1)
            for i in range(values_array.shape[1]):
                if values_array[:, i].max() > 10000:
                    values_array[:, i] = values_array[:, i] / 10
                values_array[:, i] = clip(
                    values_array[:, i], 0,
                    self.arrays_from_categories[all_characteristics[i]].size -
                    1)

            # determine for each household to what category it belongs to
            self.household_categories = array(
                map(lambda x: get_category(x),
                    values_array))  # performance bottleneck

            number_of_households_in_categories = array(
                ndimage_sum(ones((self.household_categories.size, )),
                            labels=self.household_categories + 1,
                            index=arange(self.number_of_combinations) + 1))
        else:
            # no marginal characteristics; consider just one group
            self.household_categories = zeros(self.household_set.size(),
                                              dtype='int32')
            number_of_households_in_categories = array(
                [self.household_set.size()])

        g = arange(num_attributes)

        #iterate over marginal characteristics
        for group in groups:
            if groups.ndim <= 1:  # there is only one group (no marginal char.)
                id = group
            else:
                id = tuple(group.tolist())
            group_element = self.control_totals_for_this_year.get_data_element_by_id(
                id)
            total = group_element.total_number_of_households
            for i in range(g.size):
                g[i] = eval("group_element." +
                            self.arrays_from_categories.keys()[i])
            if g.size <= 0:
                l = ones((number_of_households_in_categories.size, ))
            else:
                l = categories_index[:, 0] == g[0]
                for i in range(1, num_attributes):
                    l = logical_and(l, categories_index[:, i] == g[i])
            # l has 1's for combinations of this group
            number_in_group = array(
                ndimage_sum(number_of_households_in_categories,
                            labels=l,
                            index=1))
            diff = int(total - number_in_group)
            if diff < 0:  # households to be removed
                is_in_group = l[self.household_categories]
                w = where(is_in_group)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(self.household_set, w, -1*diff,
                                                          self.location_id_name)
                self.remove_households = concatenate(
                    (self.remove_households, non_placed,
                     sample_noreplace(sample_array,
                                      max(0,
                                          abs(diff) - size_non_placed))))
            if diff > 0:  # households to be created
                self._create_households(diff, l)
Exemple #6
0
class EstablishmentReappearanceModel(TransitionModel):
    """
    """
    
    model_name = "Establishment Reappearance Model"
    model_short_name = "ERM"
    
    def run(self, 
            year=None, 
            target_attribute_name='number_of_jobs', 
            sampling_filter="establishment.disappeared == 1", 
            reset_dataset_attribute_value={'disappeared':0, 'building_id':-1}, 
            dataset_pool=None,  
            **kwargs
            ):

        """         
        """

        id_name = 'control_total_id'
        ct_known_attributes = self.control_totals_all.get_primary_attribute_names()

        if target_attribute_name not in ct_known_attributes:
            raise AttributeError, "Target attribute %s must be an attribute of control_total dataset" % target_attribute_name
        
        if id_name not in ct_known_attributes:
            self.control_totals_all.add_attribute(name=id_name,
                                                  data = np.arange(1, self.control_totals_all.size()+1)
                                                  )
        if self.control_totals_all.get_id_name() != [id_name]:
            self.control_totals_all._id_names = [id_name]

        if year is None:
            year = SimulationState().get_current_time()
        this_year_index = np.where(self.control_totals_all['year']==year)[0]
        self.control_totals = DatasetSubset(self.control_totals_all, this_year_index)

        if dataset_pool is None:
            try:
                dataset_pool = SessionConfiguration().get_dataset_pool()
            except AttributeError:
                dataset_pool = DatasetPool(datasets_dict={
                                           self.dataset.dataset_name:self.dataset,
                                           #sync_dataset.dataset_name:sync_dataset,
                                           'control_total': self.control_totals
                                            })
        column_names = list( set( ct_known_attributes  ) \
                           - set( [ target_attribute_name, 
                                   'year', 
                                   '_hidden_id_',
                                   id_name, 
                                   '_actual_',
                                  ] )
                           )
        column_names.sort(reverse=True)
        #column_values = dict([ (name, self.control_totals.get_attribute(name)) 
        #                       for name in column_names + [target_attribute_name]])

        self._code_control_total_id(column_names,
                                    dataset_pool=dataset_pool)
        
        target = self.control_totals[target_attribute_name]
        if self.dataset_accounting_attribute is None:
            self.dataset_accounting_attribute = '_one_'
            self.dataset.add_attribute(name = self.dataset_accounting_attribute,
                                       data = ones(self.dataset.size(), 
                                                   dtype=target.dtype))

        exp_actual = '_actual_ = control_total.aggregate(%s.%s)' % \
                        (self.dataset.dataset_name,
                         self.dataset_accounting_attribute)
        
        actual = self.control_totals_all.compute_variables(exp_actual,
                                    dataset_pool=dataset_pool)[this_year_index]
        actual = actual.astype(target.dtype)

        dataset_known_attributes = self.dataset.get_known_attribute_names() #update after compute

        #update control_total_id after removing disappeared
        column_names_new = list(set(column_names) - set(["disappeared"]))
        #self.control_totals_all.touch_attribute(target_attribute_name)
        self._code_control_total_id(column_names_new, 
                                    dataset_pool=dataset_pool)
        if sampling_filter:
            short_name = VariableName(sampling_filter).get_alias()
            if short_name not in dataset_known_attributes:
                filter_indicator = self.dataset.compute_variables(sampling_filter, dataset_pool=dataset_pool)
            else:
                filter_indicator = self.dataset[short_name]
        else:
            filter_indicator = 1

        to_reappear = np.array([], dtype=np.int32)
        #log header
        if PrettyTable is not None:
            status_log = PrettyTable()
            status_log.set_field_names(column_names + ["actual", "target", "difference", "action", "N", "note"])
        else:        
            logger.log_status("\t".join(column_names + ["actual", "target", "difference", "action", "N", "note"]))
            
        error_log = ''
        error_num = 1
        
        def log_status():
            ##log status
            action = "0"
            N = "0"
            if lucky_index is not None:
                if actual_num < target_num: 
                    action = "+" + str(action_num)
                    N = "+" + str(lucky_index.size)
                if actual_num > target_num: 
                    action = "-" + str(action_num)
                    N = "-" + str(lucky_index.size)
            
            cat = [ str(self.control_totals[col][index]) for col in column_names]
            cat += [str(actual_num), str(target_num), str(diff), action, N, error_str]
            if PrettyTable is not None:
                status_log.add_row(cat)
            else:
                logger.log_status("\t".join(cat))        

        for index, control_total_id in enumerate(self.control_totals.get_id_attribute()):
            target_num = target[index]
            actual_num = actual[index]
            action_num = 0
            n_num = 0
            diff = target_num - actual_num
            
            accounting = self.dataset[self.dataset_accounting_attribute]
            lucky_index = None
            error_str = ''
             
            if actual_num < target_num:

                indicator = self.dataset[id_name]==control_total_id
                n_indicator = indicator.sum()

                # do sampling from legitimate records
                legit_index = np.where(np.logical_and(indicator, filter_indicator))[0]
                legit_size = sum(accounting[legit_index])
                if legit_size > diff:  
                    ##there are more establishments that are marked as 'disappeared' than the gap between target and actual
                    ##sample required
                    mean_size = float(legit_size) / n_indicator if n_indicator != 0 else 1
                    n = int(np.ceil(diff / mean_size))

                    i = 0
                    while diff > 0 and action_num < diff:
                        if n > 1:  # adjust number of records to sample in each iteration
                            n = int( np.ceil((diff - action_num) / (mean_size * STEP_SIZE**i)) )
                        sampleable_index = legit_index[np.logical_not(np.in1d(legit_index, to_reappear))]
                        if n < sampleable_index.size:
                            lucky_index = sample_noreplace(sampleable_index, n)
                        else:
                            lucky_index = sampleable_index

                        temp_num = accounting[lucky_index].sum()
                        
                        if action_num + temp_num <= diff:
                            ## accept the last batch of samples only when it does not overshoot
                            to_reappear = np.concatenate((to_reappear, lucky_index))
                            action_num += temp_num
                        else:
                            ## already overshoot, reject the last batch and reduce the number of samples
                            i += 1
                        
                        if i > MAX_ITERATIONS:
                            ## we're in trouble
                            error_str = str(error_num)
                            error_log += "%s. We exhausted %s iterations and could not find samples to match target %s exactly.\n" % \
                                                                             (error_num, MAX_ITERATIONS, target_num)
                            error_num += 1
                            break
                elif 0 < legit_size <= diff:
                        # let all re-appear
                        lucky_index = legit_index
                        to_reappear = np.concatenate((to_reappear, lucky_index))
                        action_num += legit_size
                else:
                    error_str = str(error_num)
                    error_log += "%s. There is no suitable %s to sample from.\n" % (error_num, self.dataset.get_dataset_name())
                      #+ \ ','.join([col+"="+str(self.control_totals[col][index]) for col in column_names]) + '\n'
                    error_num += 1
            
            log_status()

        if PrettyTable is not None:
            logger.log_status("\n" + status_log.get_string() + '\n')
            
        if error_log:
            logger.log_error( '\n' + error_log)
        
        ## TODO: this sequence of add_elements first and then remove_elements works only when
        ## add_elements method appends data to the end of dataset and doesn't change the
        ## indices of existing elements.
        if to_reappear.size > 0:
            self._reset_attribute(self.dataset, 
                                 reset_attribute_dict = reset_dataset_attribute_value, 
                                 index=to_reappear)
           
        return self.dataset
    def _do_run(self,
                location_set,
                agent_set,
                agents_index,
                data_objects=None,
                resources=None):
        location_id_name = location_set.get_id_name()[0]
        jobsubset = DatasetSubset(agent_set, agents_index)
        if jobsubset.size() <= 0:
            return array([], dtype='int32')
        #unplace jobs
        agent_set.set_values_of_one_attribute(
            location_id_name, resize(array([-1.0]), jobsubset.size()),
            agents_index)
        sector_ids = jobsubset.get_attribute("sector_id")
        sectors = unique(sector_ids)
        counts = ndimage_sum(ones((jobsubset.size(), )),
                             labels=sector_ids.astype('int32'),
                             index=sectors.astype('int32'))
        if sectors.size <= 1:
            counts = array([counts])
        variables = map(lambda x: "number_of_jobs_of_sector_" + str(int(x)),
                        sectors)
        compute_variables = map(
            lambda var: self.variable_package + "." + location_set.
            get_dataset_name() + "." + var, variables)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.dataset_pool.add_datasets_if_not_included(
            {agent_set.get_dataset_name(): agent_set})
        location_set.compute_variables(compute_variables,
                                       dataset_pool=self.dataset_pool)
        if self.filter is None:
            location_index = arange(location_set.size())
        else:
            filter_values = location_set.compute_variables(
                [self.filter], dataset_pool=self.dataset_pool)
            location_index = where(filter_values > 0)[0]
        if location_index.size <= 0:
            logger.log_status("No locations available. Nothing to be done.")
            return array([])
        location_subset = DatasetSubset(location_set, location_index)
        i = 0
        for sector in sectors:
            distr = location_subset.get_attribute(variables[i])
            if ma.allclose(distr.sum(), 0):
                uniform_prob = 1.0 / distr.size
                distr = resize(array([uniform_prob], dtype='float64'),
                               distr.size)
                logger.log_warning(
                    "Probabilities in scaling model for sector " +
                    str(sector) +
                    " sum to 0.0.  Substituting uniform distribution!")


#                random_sample = sample(location_set.get_attribute("grid_id"), k=int(counts[i]), \
#                                   probabilities = distr)
            distr = distr / float(distr.sum())
            random_sample = probsample_replace(
                location_subset.get_id_attribute(),
                size=int(counts[i]),
                prob_array=distr)
            idx = where(sector_ids == sector)[0]
            #modify job locations
            agent_set.set_values_of_one_attribute(location_id_name,
                                                  random_sample,
                                                  agents_index[idx])
            i += 1
        return agent_set.get_attribute_by_index(location_id_name, agents_index)