def test_probsample_noreplace_ordering(self):
        probs=array([1, 1, 1, 1, 100, 1, 1, 1, 1, 1])
        probsum = float(probs.sum())
        first = []
        n = 100
        #seed(10)
        for i in range(n):
            sample = probsample_noreplace(arange(10), 5, prob_array=probs/probsum, return_index=False)
            # keep the first element sampled
            first.append(sample[0])
        # How many times the fifth element (which has highest probability) came out first. It should be about 90% of the time.
        freq4 = (array(first) == 4).sum()/float(n)
        assert freq4 > 0.85, "Error in ordering elements in probsample_noreplace"
        
        # check the second sampled element
        probs=array([1, 1, 100, 1, 1, 1, 1000, 1, 1, 1, 1])
        probsum = float(probs.sum())
        second=[]
        while True:
            sample = probsample_noreplace(arange(11), 5, prob_array=probs/probsum, return_index=False)
            if sample[0] == 6:
                second.append(sample[1])
                if len(second) >= n:
                    break

        # How many times the third element came out second given the seventh element came out first. It should be about 90% of the time.
        freq2 = (array(second) == 2).sum()/float(n)
        assert freq2 > 0.85, "Error in ordering elements in probsample_noreplace"
    def get_movers_from_overfilled_locations(self, agent_set, agents_index, config=None):
        """Returns an index (relative to agents_index) of agents that should be removed from their locations.
        """
        id_name = self.choice_set.get_id_name()[0]
        agents_locations = agent_set.get_attribute_by_index(id_name, agents_index)
        # check if there was an overfilling of locations
        movers = array([], dtype='int32')

        if self.compute_capacity_flag:
            overfilled_string = config.get("is_choice_overfilled_string", None) 
            if overfilled_string:
                tmp_agent_set = copy.copy(agent_set)
                overfilled_locations = where(self.choice_set.compute_variables(overfilled_string, self.dataset_pool))[0]
                current_agents_in_overfilled_locations = intersect1d(agents_locations, overfilled_locations)
                while current_agents_in_overfilled_locations.size > 0:
                    for location in current_agents_in_overfilled_locations:
                        agents_of_this_location = where(agents_locations == location)[0]
                        if agents_of_this_location.size > 1:
                            sampled_agents = probsample_noreplace(agents_of_this_location, 1)
                        else:
                            sampled_agents = agents_of_this_location
                        movers = concatenate((movers, sampled_agents))
                        
                    tmp_agent_set.set_values_of_one_attribute(id_name, -1, agents_index[movers])
                    agents_locations = tmp_agent_set.get_attribute_by_index(id_name, agents_index)
                    self.dataset_pool.replace_dataset(tmp_agent_set.get_dataset_name(), tmp_agent_set)
                    overfilled_locations = where(self.choice_set.compute_variables(overfilled_string, self.dataset_pool))[0]
                    current_agents_in_overfilled_locations = intersect1d(agents_locations, overfilled_locations)
                self.dataset_pool.replace_dataset(agent_set.get_dataset_name(), agent_set)
            else:
                new_locations_vacancy = self.get_locations_vacancy(agent_set)
                movers = self.choose_agents_to_move_from_overfilled_locations(new_locations_vacancy,
                                                            agent_set, agents_index, agents_locations)
        return concatenate((movers, where(agents_locations <= 0)[0]))
Example #3
0
    def _sample_by_agent_and_stratum(self, index1, index2, stratum, prob_array,
                                     chosen_choice_index,
                                     strata_sample_setting):
        """agent by agent and stratum by stratum stratified sampling, suitable for 2d prob_array and/or sample_size varies for agents
        this method is slower than _sample_by_stratum, for simpler stratified sampling use _sample_by_stratum instead"""

        rank_of_prob = rank(prob_array)
        rank_of_strata = rank(strata_sample_setting)

        J = self.__determine_sampled_index_size(strata_sample_setting,
                                                rank_of_strata)
        sampled_index = zeros((index1.size, J), dtype=DTYPE) - 1
        self._sampling_probability = zeros((index1.size, J), dtype=float32)
        self._stratum_id = ones((index1.size, J), dtype=DTYPE) * NO_STRATUM_ID

        for i in range(index1.size):
            if rank_of_strata == 3:
                strata_sample_pairs = strata_sample_setting[i, :]
            else:
                strata_sample_pairs = strata_sample_setting

            if rank_of_prob == 2:
                prob = prob_array[i, :]
            else:
                prob = prob_array

            j = 0
            for (this_stratum, this_size) in strata_sample_pairs:
                if this_size <= 0: continue
                index_not_in_stratum = where(stratum != this_stratum)[0]
                this_prob = copy.copy(prob)

                this_prob[index_not_in_stratum] = 0.0
                this_prob = normalize(this_prob)

                if nonzerocounts(this_prob) < this_size:
                    logger.log_warning(
                        "weight array dosen't have enough non-zero counts, use sample with replacement"
                    )


#                chosen_index_to_index2 = where(index2 == chosen_choice_index[i])[0]
#exclude_index passed to probsample_noreplace needs to be indexed to index2
                this_sampled_index = probsample_noreplace(
                    index2,
                    sample_size=this_size,
                    prob_array=this_prob,
                    exclude_index=chosen_choice_index[i],
                    return_index=True)
                sampled_index[i, j:j + this_size] = this_sampled_index

                self._sampling_probability[
                    i, j:j + this_size] = this_prob[this_sampled_index]
                self._stratum_id[i, j:j + this_size] = ones(
                    (this_sampled_index.size, ), dtype=DTYPE) * this_stratum

                j += this_size

        return index2[sampled_index]
 def test_probsample_noreplace(self):
     start_time = time.time()
     sample = probsample_noreplace(self.all, self.size, self.prob, return_index=True)
     logger.log_status("probsample_noreplace %s from %s items array in " % (self.size,self.n) + str(time.time() - start_time) + " sec")
     self.assertEqual(sample.size, self.size, msg ="sample size not equal to size parameter")
     assert isinstance(sample, ndarray), "sample is not of type ndarray"
     assert 0 <= sample.min() <= self.n-1, "sampled elements not in between min and max of source array"
     assert 0 <= sample.max() <= self.n-1, "sampled elements not in between min and max of source array"
     assert alltrue(not_equal(self.prob[sample], 0.0)), "elements with zero weight in the sample"
     assert not sometrue(find_duplicates(sample)), "there are duplicates in samples"
    def _sample_by_agent_and_stratum(
        self, index1, index2, stratum, prob_array, chosen_choice_index, strata_sample_setting
    ):
        """agent by agent and stratum by stratum stratified sampling, suitable for 2d prob_array and/or sample_size varies for agents
        this method is slower than _sample_by_stratum, for simpler stratified sampling use _sample_by_stratum instead"""

        rank_of_prob = rank(prob_array)
        rank_of_strata = rank(strata_sample_setting)

        J = self.__determine_sampled_index_size(strata_sample_setting, rank_of_strata)
        sampled_index = zeros((index1.size, J), dtype="int32") - 1
        self._sampling_probability = zeros((index1.size, J), dtype=float32)
        self._stratum_id = ones((index1.size, J), dtype="int32") * NO_STRATUM_ID

        for i in range(index1.size):
            if rank_of_strata == 3:
                strata_sample_pairs = strata_sample_setting[i, :]
            else:
                strata_sample_pairs = strata_sample_setting

            if rank_of_prob == 2:
                prob = prob_array[i, :]
            else:
                prob = prob_array

            j = 0
            for (this_stratum, this_size) in strata_sample_pairs:
                if this_size <= 0:
                    continue
                index_not_in_stratum = where(stratum != this_stratum)[0]
                this_prob = copy.copy(prob)

                this_prob[index_not_in_stratum] = 0.0
                this_prob = normalize(this_prob)

                if nonzerocounts(this_prob) < this_size:
                    logger.log_warning("weight array dosen't have enough non-zero counts, use sample with replacement")

                #                chosen_index_to_index2 = where(index2 == chosen_choice_index[i])[0]
                # exclude_index passed to probsample_noreplace needs to be indexed to index2
                this_sampled_index = probsample_noreplace(
                    index2,
                    sample_size=this_size,
                    prob_array=this_prob,
                    exclude_index=chosen_choice_index[i],
                    return_index=True,
                )
                sampled_index[i, j : j + this_size] = this_sampled_index

                self._sampling_probability[i, j : j + this_size] = this_prob[this_sampled_index]
                self._stratum_id[i, j : j + this_size] = ones((this_sampled_index.size,), dtype="int32") * this_stratum

                j += this_size

        return index2[sampled_index]
Example #6
0
 def run(self, zones, run_choice_model=True, choose_job_only_in_residence_zone=True, **kwargs):
     agent_set = kwargs['agent_set']
     agents_index = kwargs.get('agents_index', None)
     if agents_index is None:
         agents_index = arange(agent_set.size())
     cond_array = zeros(agent_set.size(), dtype="bool8")
     cond_array[agents_index] = True
     zone_ids = zones.get_id_attribute()
     agents_zones = agent_set.compute_variables(['urbansim_parcel.%s.%s' % (agent_set.get_dataset_name(),
                                                     zones.get_id_name()[0])], dataset_pool=self.dataset_pool)
     if self.filter is not None:
         jobs_set_index = where( self.job_set.compute_variables(self.filter) )[0]
     else:
         jobs_set_index = arange( self.job_set.size() )  
     #self.job_set.compute_variables("urbansim_parcel.job.zone_id")
     agent_set.compute_variables("urbansim_parcel.person.zone_id")
     # remove job links from all workers
     agent_set.set_values_of_one_attribute(self.choice_attribute_name, -1*ones(agents_index.size, dtype='int32'), 
                                           index=agents_index)
     for zone_id in zone_ids:
         new_index = where(logical_and(cond_array, agents_zones == zone_id))[0]
         logger.log_status("%s for zone %s" % (self.model_short_name, zone_id))
         if run_choice_model:
             kwargs['agents_index'] = new_index
             choices = ChoiceModel.run(self, **kwargs)
             prob_work_at_home = self.upc_sequence.get_probabilities()[:, 1]
             job_set_in_this_zone = jobs_set_index[self.job_set['zone_id'][jobs_set_index] == zone_id]
             number_of_hb_jobs = job_set_in_this_zone.size
             # sample workers for the number of jobs
             draw = probsample_noreplace(kwargs['agents_index'], min(kwargs['agents_index'].size, number_of_hb_jobs), 
                                         prob_work_at_home)
             agent_set.set_values_of_one_attribute(self.choice_attribute_name, 
                                               ones(draw.size, dtype=agent_set[self.choice_attribute_name].dtype), 
                                               index=draw)
             logger.log_status("%s workers choose to work at home, %s workers chose to work out of home." % 
                           (where(agent_set.get_attribute_by_index(self.choice_attribute_name, kwargs['agents_index']) == 1)[0].size,
                            where(agent_set.get_attribute_by_index(self.choice_attribute_name, kwargs['agents_index']) == 0)[0].size))            
         at_home_worker_in_this_zone = kwargs['agents_index'][agent_set[self.choice_attribute_name][kwargs['agents_index']] == 1]
         assigned_worker_in_this_zone, assigned_job_set_in_this_zone = self._assign_job_to_worker(at_home_worker_in_this_zone, 
                                                                                                  job_set_in_this_zone)
         agent_set.set_values_of_one_attribute(self.job_set.get_id_name()[0], 
                                           self.job_set.get_id_attribute()[assigned_job_set_in_this_zone], 
                                           index=assigned_worker_in_this_zone)
         agent_set.compute_variables([self.location_id_name], dataset_pool=self.dataset_pool)
         self.job_set.modify_attribute(name=VariableName(self.location_id_name).get_alias(), 
                                   data=agent_set.get_attribute_by_index(self.location_id_name, assigned_worker_in_this_zone),
                                   index=assigned_job_set_in_this_zone)
         agent_set.flush_dataset()
         self.job_set.flush_dataset()
         
         
     logger.log_status("Total: %s workers work at home, %s workers work out of home." % 
                       (where(agent_set.get_attribute(self.choice_attribute_name) == 1)[0].size,
                        where(agent_set.get_attribute(self.choice_attribute_name) == 0)[0].size
                       ))
    def get_movers_from_overfilled_locations(self,
                                             agent_set,
                                             agents_index,
                                             config=None):
        """Returns an index (relative to agents_index) of agents that should be removed from their locations.
        """
        id_name = self.choice_set.get_id_name()[0]
        agents_locations = agent_set.get_attribute_by_index(
            id_name, agents_index)
        # check if there was an overfilling of locations
        movers = array([], dtype='int32')

        if self.compute_capacity_flag:
            overfilled_string = config.get("is_choice_overfilled_string", None)
            if overfilled_string:
                tmp_agent_set = copy.copy(agent_set)
                overfilled_locations = where(
                    self.choice_set.compute_variables(overfilled_string,
                                                      self.dataset_pool))[0]
                current_agents_in_overfilled_locations = intersect1d(
                    agents_locations, overfilled_locations)
                while current_agents_in_overfilled_locations.size > 0:
                    for location in current_agents_in_overfilled_locations:
                        agents_of_this_location = where(
                            agents_locations == location)[0]
                        if agents_of_this_location.size > 1:
                            sampled_agents = probsample_noreplace(
                                agents_of_this_location, 1)
                        else:
                            sampled_agents = agents_of_this_location
                        movers = concatenate((movers, sampled_agents))

                    tmp_agent_set.set_values_of_one_attribute(
                        id_name, -1, agents_index[movers])
                    agents_locations = tmp_agent_set.get_attribute_by_index(
                        id_name, agents_index)
                    self.dataset_pool.replace_dataset(
                        tmp_agent_set.get_dataset_name(), tmp_agent_set)
                    overfilled_locations = where(
                        self.choice_set.compute_variables(
                            overfilled_string, self.dataset_pool))[0]
                    current_agents_in_overfilled_locations = intersect1d(
                        agents_locations, overfilled_locations)
                self.dataset_pool.replace_dataset(agent_set.get_dataset_name(),
                                                  agent_set)
            else:
                new_locations_vacancy = self.get_locations_vacancy(agent_set)
                movers = self.choose_agents_to_move_from_overfilled_locations(
                    new_locations_vacancy, agent_set, agents_index,
                    agents_locations)
        return concatenate((movers, where(agents_locations <= 0)[0]))
 def choose_agents_to_move_from_overfilled_locations(self, capacity,
                                                     agent_set, agents_index, agents_locations):
     """Iterates over locations that are overfilled and selects randomly agents placed in those locations
     to be removed."""
     if capacity is None:
         return array([], dtype='int32')
     index_valid_agents_locations = where(agents_locations > 0)[0]
     valid_agents_locations = agents_locations[index_valid_agents_locations]
     index_consider_capacity = unique(self.choice_set.get_id_index(valid_agents_locations))
     capacity_of_affected_locations = capacity[index_consider_capacity]
     overfilled = where(capacity_of_affected_locations < 0)[0]
     movers = array([], dtype='int32')
     choice_ids = self.choice_set.get_id_attribute()
     for loc in overfilled:
         agents_to_move = where(valid_agents_locations == choice_ids[index_consider_capacity[loc]])[0]
         if agents_to_move.size > 0:
             n = int(-1*capacity_of_affected_locations[loc])
             sampled_agents = probsample_noreplace(index_valid_agents_locations[agents_to_move], 
                                                   min(n, agents_to_move.size))
             movers = concatenate((movers, sampled_agents))
     return movers
 def choose_agents_to_move_from_overfilled_locations(
         self, capacity, agent_set, agents_index, agents_locations):
     """Iterates over locations that are overfilled and selects randomly agents placed in those locations
     to be removed."""
     if capacity is None:
         return array([], dtype='int32')
     index_valid_agents_locations = where(agents_locations > 0)[0]
     valid_agents_locations = agents_locations[index_valid_agents_locations]
     index_consider_capacity = unique(
         self.choice_set.get_id_index(valid_agents_locations))
     capacity_of_affected_locations = capacity[index_consider_capacity]
     overfilled = where(capacity_of_affected_locations < 0)[0]
     movers = array([], dtype='int32')
     choice_ids = self.choice_set.get_id_attribute()
     for loc in overfilled:
         agents_to_move = where(valid_agents_locations == choice_ids[
             index_consider_capacity[loc]])[0]
         if agents_to_move.size > 0:
             n = int(-1 * capacity_of_affected_locations[loc])
             sampled_agents = probsample_noreplace(
                 index_valid_agents_locations[agents_to_move],
                 min(n, agents_to_move.size))
             movers = concatenate((movers, sampled_agents))
     return movers
Example #10
0
    def run(self, in_storage, business_dsname="business"):
        dataset_pool = DatasetPool(storage=in_storage, package_order=['psrc_parcel', 'urbansim_parcel', 'urbansim', 'opus_core'] )
        seed(1)
        allbusinesses = dataset_pool.get_dataset(business_dsname)
        parcels = dataset_pool.get_dataset('parcel')
        buildings = dataset_pool.get_dataset('building')
        parcels.compute_variables(["urbansim_parcel.parcel.residential_units", "number_of_buildings = parcel.number_of_agents(building)", 
                                   "non_residential_sqft = (parcel.aggregate(building.non_residential_sqft)).astype(int32)",
                                   "number_of_res_buildings = parcel.aggregate(urbansim_parcel.building.is_residential)",
                                   "number_of_nonres_buildings = parcel.aggregate(urbansim_parcel.building.is_non_residential)",
                                   "number_of_mixed_use_buildings = parcel.aggregate(urbansim_parcel.building.is_generic_building_type_6)"
                                   ], 
                                  dataset_pool=dataset_pool)
        restypes = [12, 4, 19, 11, 34, 10, 33]
        reslutypes = [13,14,15,24]
        is_valid_business = ones(allbusinesses.size(), dtype='bool8')
        parcels_not_matched = logical_and(in1d(allbusinesses["parcel_id"], parcels.get_id_attribute(), invert=True), allbusinesses["parcel_id"] > 0)
        if(parcels_not_matched.sum() > 0):
            is_valid_business[where(parcels_not_matched)] = False
            logger.log_warning(message="No parcel exists for %s businesses (%s jobs)" % (parcels_not_matched.sum(), 
                                                                                         allbusinesses[self.number_of_jobs_attr][where(parcels_not_matched)].sum()))
        zero_parcel = allbusinesses["parcel_id"]<=0
        if zero_parcel.sum() > 0:
            is_valid_business[where(zero_parcel)] = False
            logger.log_warning(message="%s businesses (%s jobs) located on zero parcel_id" % (zero_parcel.sum(), 
                                                                                         allbusinesses[self.number_of_jobs_attr][where(zero_parcel)].sum()))            
            
        zero_size = logical_and(is_valid_business, allbusinesses[self.number_of_jobs_attr].round() == 0)
        if(sum(zero_size) > 0):
            is_valid_business[where(zero_size)] = False
            logger.log_warning(message="%s businesses are of size 0." % sum(zero_size))
        
        businesses = DatasetSubset(allbusinesses, index=where(is_valid_business)[0])
            
        parcels.add_attribute(name="number_of_workplaces", data=parcels.sum_dataset_over_ids(businesses, constant=1))
        
        has_single_res_buildings = logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_res_buildings"] == 1) # 1 (1 residential)
        parcels.add_attribute(data=has_single_res_buildings.astype("int32"), name="buildings_code")
        has_mult_res_buildings = logical_and(parcels["number_of_buildings"] > 1,  parcels["number_of_nonres_buildings"] == 0) # 2 (mult residential)
        parcels.modify_attribute("buildings_code", data=2*ones(has_mult_res_buildings.sum()), index=where(has_mult_res_buildings)) 
        has_single_nonres_buildings = logical_and(logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_nonres_buildings"] == 1), parcels["number_of_mixed_use_buildings"] == 0) # 3 (1 non-res)
        parcels.modify_attribute("buildings_code", data=3*ones(has_single_nonres_buildings.sum()), index=where(has_single_nonres_buildings)) 
        has_mult_nonres_buildings = logical_and(logical_and(parcels["number_of_buildings"] > 1, parcels["number_of_res_buildings"] == 0), parcels["number_of_mixed_use_buildings"] == 0) # 4 (mult non-res)
        parcels.modify_attribute("buildings_code", data=4*ones(has_mult_nonres_buildings.sum()), index=where(has_mult_nonres_buildings))
        has_single_mixed_buildings = logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_mixed_use_buildings"] == 1) # 5 (1 mixed-use)
        parcels.modify_attribute("buildings_code", data=5*ones(has_single_mixed_buildings.sum()), index=where(has_single_mixed_buildings))
        has_mult_mixed_buildings = logical_and(parcels["number_of_buildings"] > 1, 
                                               logical_or(logical_and(parcels["number_of_res_buildings"] > 0, parcels["number_of_nonres_buildings"] > 0), 
                                                          logical_or(parcels["number_of_mixed_use_buildings"] > 1, 
                                                                     logical_and(parcels["number_of_res_buildings"] == 0, 
                                                                                 parcels["number_of_mixed_use_buildings"] > 0)))) # 6
        parcels.modify_attribute("buildings_code", data=6*ones(has_mult_mixed_buildings.sum()), index=where(has_mult_mixed_buildings))
        has_no_building_res_lutype = logical_and(parcels["number_of_buildings"] == 0, in1d(parcels["land_use_type_id"], reslutypes)) # 7 (vacant with res LU type)
        parcels.modify_attribute("buildings_code", data=7*ones(has_no_building_res_lutype.sum()), index=where(has_no_building_res_lutype)) 
        has_no_building_nonres_lutype = logical_and(parcels["number_of_buildings"] == 0, in1d(parcels["land_use_type_id"], reslutypes)==0) # 8 (vacant with non-res LU type)
        parcels.modify_attribute("buildings_code", data=8*ones(has_no_building_nonres_lutype.sum()), index=where(has_no_building_nonres_lutype))
        
        business_sizes = businesses[self.number_of_jobs_attr].round().astype("int32") 
        business_location = {}
        business_location1wrkpl = zeros(businesses.size(), dtype="int32")
        business_location1wrkplres = zeros(businesses.size(), dtype="int32")
        # sample one building for cases when sampling is required.
        for ibusid in range(businesses.size()):
            idx = where(buildings['parcel_id'] == businesses['parcel_id'][ibusid])[0]
            bldgids = buildings['building_id'][idx]
            business_location[businesses['business_id'][ibusid]] = bldgids
            if bldgids.size == 1:
                business_location1wrkpl[ibusid] = bldgids[0]
            elif bldgids.size > 1:
                business_location1wrkpl[ibusid] = bldgids[sample_noreplace(arange(bldgids.size), 1)]
                if buildings['residential_units'][idx].sum() > 0:
                    # Residential buildings are sampled with probabilities proportional to residential units
                    business_location1wrkplres[ibusid] = bldgids[probsample_noreplace(arange(bldgids.size), 1, prob_array=buildings['residential_units'][idx])]
                else:
                    business_location1wrkplres[ibusid] = business_location1wrkpl[ibusid]
        
        home_based = zeros(business_sizes.sum(), dtype="bool8")
        job_building_id = zeros(business_sizes.sum(), dtype="int32")
        job_array_labels = businesses['business_id'].repeat(business_sizes)
        job_assignment_case = zeros(business_sizes.sum(), dtype="int32")
        processed_bindicator = zeros(businesses.size(), dtype="bool8")
        logger.log_status("Total number of jobs: %s" % home_based.size)
        
        # 1. up to 5 workers-business in 1 residential building
        idx_single_worker = where(business_sizes < 6)[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_single_worker])
        idx_sngl_wrk_1bld_fit = where(bcode == 1)[0]
        jidx = in1d(job_array_labels, businesses['business_id'][idx_single_worker[idx_sngl_wrk_1bld_fit]])
        home_based[jidx] = True
        job_building_id[jidx] = business_location1wrkpl[idx_single_worker[idx_sngl_wrk_1bld_fit]].repeat(business_sizes[idx_single_worker[idx_sngl_wrk_1bld_fit]])
        job_assignment_case[jidx] = 1
        processed_bindicator[idx_single_worker[idx_sngl_wrk_1bld_fit]] = True
        logger.log_status("1. %s jobs (%s businesses) set as home-based due to <6 worker x 1 residential building fit." % (
            business_sizes[idx_single_worker[idx_sngl_wrk_1bld_fit]].sum(), idx_sngl_wrk_1bld_fit.size))
        
        # 2. up to 5 workers-business in multiple residential buildings
        idx_single_worker = where(logical_and(processed_bindicator==0, business_sizes < 6))[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_single_worker])
        idx_sngl_wrk_multbld_fit = where(bcode == 2)[0]
        jidx = in1d(job_array_labels, businesses['business_id'][idx_single_worker[idx_sngl_wrk_multbld_fit]])
        home_based[jidx] = True
        job_building_id[jidx] = business_location1wrkplres[idx_single_worker[idx_sngl_wrk_multbld_fit]].repeat(business_sizes[idx_single_worker[idx_sngl_wrk_multbld_fit]])
        job_assignment_case[jidx] = 2
        processed_bindicator[idx_single_worker[idx_sngl_wrk_multbld_fit]] = True
        logger.log_status("2. %s jobs (%s businesses) set as home-based due to single worker x multiple residential buildings fit." % (
            business_sizes[idx_single_worker[idx_sngl_wrk_multbld_fit]].sum(), idx_sngl_wrk_multbld_fit.size))
               
        # 3. single worker in single non-res building (not mixed-use)
        idx_single_worker = where(logical_and(processed_bindicator==0, business_sizes == 1))[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_single_worker])        
        idx_sngl_wrk_single_nonres_fit = where(bcode == 3)[0]
        jidx = in1d(job_array_labels, businesses['business_id'][idx_single_worker[idx_sngl_wrk_single_nonres_fit]])
        job_building_id[jidx] = business_location1wrkpl[idx_single_worker[idx_sngl_wrk_single_nonres_fit]] 
        job_assignment_case[jidx] = 3
        processed_bindicator[idx_single_worker[idx_sngl_wrk_single_nonres_fit]] = True
        logger.log_status("3. %s jobs could be placed due to single worker x single non-res building fit." % idx_sngl_wrk_single_nonres_fit.size)        
        
        # 4. single worker in multiple non-res building (not mixed-use)
        idx_single_worker = where(logical_and(processed_bindicator==0, business_sizes == 1))[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_single_worker])        
        idx_sngl_wrk_mult_nonres_fit = where(bcode == 4)[0]
        jidx = in1d(job_array_labels, businesses['business_id'][idx_single_worker[idx_sngl_wrk_mult_nonres_fit]])
        job_building_id[jidx] = business_location1wrkpl[idx_single_worker[idx_sngl_wrk_mult_nonres_fit]]
        job_assignment_case[jidx] = 4
        processed_bindicator[idx_single_worker[idx_sngl_wrk_mult_nonres_fit]] = True
        logger.log_status("4. %s jobs could be placed due to single worker x multiple non-res building fit." % idx_sngl_wrk_mult_nonres_fit.size)        

                
        # 5. single worker in single mixed-use building
        idx_single_worker = where(logical_and(processed_bindicator==0, business_sizes == 1))[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_single_worker])        
        idx_sngl_wrk_smu_fit = where(bcode == 5)[0]
        jidx = in1d(job_array_labels, businesses['business_id'][idx_single_worker[idx_sngl_wrk_smu_fit]])
        job_building_id[jidx] = business_location1wrkpl[idx_single_worker[idx_sngl_wrk_smu_fit]]  
        job_assignment_case[jidx] = 5
        processed_bindicator[idx_single_worker[idx_sngl_wrk_smu_fit]] = True
        logger.log_status("5. %s jobs in single worker x single mixed-use building." % idx_sngl_wrk_smu_fit.size)          
        
        # 6. single worker in multiple mixed-type buildings
        idx_single_worker = where(logical_and(processed_bindicator==0, business_sizes == 1))[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_single_worker])        
        idx_sngl_wrk_mmu_fit = where(bcode == 6)[0]
        jidx = in1d(job_array_labels, businesses['business_id'][idx_single_worker[idx_sngl_wrk_mmu_fit]])
        job_building_id[jidx] = business_location1wrkpl[idx_single_worker[idx_sngl_wrk_mmu_fit]]
        bldtype = buildings.get_attribute_by_id("building_type_id", business_location1wrkpl[idx_single_worker[idx_sngl_wrk_mmu_fit]])
        is_bldtype_res = in1d(bldtype, restypes)
        home_based[in1d(job_array_labels, businesses['business_id'][idx_single_worker[idx_sngl_wrk_mmu_fit]][where(is_bldtype_res)])] = True
        job_assignment_case[jidx] = 6
        processed_bindicator[idx_single_worker[idx_sngl_wrk_mmu_fit]] = True
        logger.log_status("6. %s jobs in single worker x multiple mixed-type buildings. %s jobs classified as home-based." % (idx_sngl_wrk_mmu_fit.size, is_bldtype_res.sum()))            

        # 7. up to 5 workers-business in residential parcel with no building
        idx_single_worker = where(logical_and(processed_bindicator==0, business_sizes < 6))[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_single_worker]) 
        idx_sngl_wrk_vacant_res = where(bcode == 7)[0]
        jidx = in1d(job_array_labels, businesses['business_id'][idx_single_worker[idx_sngl_wrk_vacant_res]])
        job_assignment_case[jidx] = 7
        home_based[jidx] = True
        processed_bindicator[idx_single_worker[idx_sngl_wrk_vacant_res]] = True
        logger.log_status("7. %s jobs (%s businesses of size <6) could not be placed due to non-existing buildings in parcels with residential LU type." % (
            business_sizes[idx_single_worker[idx_sngl_wrk_vacant_res]].sum(), idx_sngl_wrk_vacant_res.size))
        
        

        # 9. 6+ workers in single residential building: do not place - will go into ELCM
        idx_more_workers = where(logical_and(processed_bindicator==0, business_sizes > 5))[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_more_workers])
        idx_sngl_wrk_fit = where(bcode == 1)[0]
        jidx = in1d(job_array_labels, businesses['business_id'][idx_more_workers[idx_sngl_wrk_fit]])
        #job_building_id[jidx] = business_location1wrkpl[idx_more_workers[idx_sngl_wrk_fit]].repeat(business_sizes[idx_more_workers[idx_sngl_wrk_fit]])
        #home_based[jidx] = True
        job_assignment_case[jidx] = 9
        processed_bindicator[idx_more_workers[idx_sngl_wrk_fit]] = True        
        logger.log_status("9. %s jobs (%s businesses) in 6+ worker x single residential building. Not placed." % (
            business_sizes[idx_more_workers[idx_sngl_wrk_fit]].sum(), idx_sngl_wrk_fit.size))
        
        # 10. 6+ workers in multiple residential building: do not place - will go into ELCM
        idx_more_workers = where(logical_and(processed_bindicator==0, business_sizes > 5))[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_more_workers])
        idx_sngl_wrk_fit = where(bcode == 2)[0]
        jidx = in1d(job_array_labels, businesses['business_id'][idx_more_workers[idx_sngl_wrk_fit]])
        job_assignment_case[jidx] = 10
        processed_bindicator[idx_more_workers[idx_sngl_wrk_fit]] = True
        # sample buildings to businesses by parcels 
        #bpcls = unique(businesses["parcel_id"][idx_more_workers[idx_sngl_wrk_fit]])
        #for ipcl in range(bpcls.size):
            #bidx = where(buildings['parcel_id'] == bpcls[ipcl])[0]
            #bldgids = buildings['building_id'][bidx]
            #bussids = businesses['business_id'][businesses["parcel_id"] == bpcls[ipcl]]
            ## multiply by units for sampling prop. to units rather than buildings
            #bldgids = bldgids.repeat(maximum(1, buildings['residential_units'][bidx].astype('int32'))) 
            #if bldgids.size < bussids.size:
                #bldarray = bldgids.repeat(1+ceil((bussids.size - bldgids.size)/float(bldgids.size)) )
            #else:
                #bldarray = bldgids
            #shuffle(bldarray) # randomly reorder in-place
            #for ib in range(bussids.size):
                #jidx = where(job_array_labels == bussids[ib])
                #job_building_id[jidx] = bldarray[ib]
                #home_based[jidx] = True
                #job_assignment_case[jidx] = 10
        logger.log_status("10. %s jobs (%s businesses) in 6+ worker x multiple residential building. Not placed." % (
            business_sizes[idx_more_workers[idx_sngl_wrk_fit]].sum(), idx_sngl_wrk_fit.size))        


        # 11. single workplace, 2+ workers in single non-res or mixed-use building (11.)
        idx_2plus_workers = where(logical_and(processed_bindicator==0, business_sizes > 1))[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_2plus_workers])
        workplace_filter = parcels.get_attribute_by_id("number_of_workplaces", businesses["parcel_id"][idx_2plus_workers])
        idx_sngl_wrkplace_2plus_workers = where(logical_and(logical_or(bcode==3, bcode==5), workplace_filter==1))[0]
        which_labels = where(in1d(job_array_labels, businesses['business_id'][idx_2plus_workers[idx_sngl_wrkplace_2plus_workers]]))[0]
        job_building_id[which_labels] = business_location1wrkpl[idx_2plus_workers[idx_sngl_wrkplace_2plus_workers]].repeat(business_sizes[idx_2plus_workers[idx_sngl_wrkplace_2plus_workers]])   
        job_assignment_case[which_labels] = 11
        processed_bindicator[idx_2plus_workers[idx_sngl_wrkplace_2plus_workers]] = True
        logger.log_status("11. %s jobs (%s businesses) could be placed due to single workplace x 2+ workers x single non-res/mixed-use building fit." % (
            business_sizes[idx_2plus_workers[idx_sngl_wrkplace_2plus_workers]].sum(), idx_sngl_wrkplace_2plus_workers.size))
        
        # 12. single workplace, 2+ workers in multiple mixed-type building
        idx_2plus_workers = where(logical_and(processed_bindicator==0, business_sizes > 1))[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_2plus_workers])
        workplace_filter = parcels.get_attribute_by_id("number_of_workplaces", businesses["parcel_id"][idx_2plus_workers])
        idx_sngl_wrkplace_2plus_workers = where(logical_and(logical_or(bcode==6, bcode==4), workplace_filter==1))[0]
        jidx = in1d(job_array_labels, businesses['business_id'][idx_2plus_workers[idx_sngl_wrkplace_2plus_workers]])
        job_building_id[jidx] = business_location1wrkpl[idx_2plus_workers[idx_sngl_wrkplace_2plus_workers]].repeat(business_sizes[idx_2plus_workers[idx_sngl_wrkplace_2plus_workers]])    
        job_assignment_case[jidx] = 12
        processed_bindicator[idx_2plus_workers[idx_sngl_wrkplace_2plus_workers]] = True
        logger.log_status("12. %s jobs (%s businesses) could be placed due to single workplace x 2+ workers x multiple non-res/mixed building fit." % (
            business_sizes[idx_2plus_workers[idx_sngl_wrkplace_2plus_workers]].sum(), idx_sngl_wrkplace_2plus_workers.size))

        # 13. multiple workplaces, 2+ workers in single non-res or mixed building
        idx_2plus_workers = where(logical_and(processed_bindicator==0, business_sizes > 1))[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_2plus_workers])
        workplace_filter = parcels.get_attribute_by_id("number_of_workplaces", businesses["parcel_id"][idx_2plus_workers])
        idx_mult_wrkplace_2plus_workers = where(logical_and(logical_or(bcode==3, bcode==5), workplace_filter > 1))[0]
        jidx = in1d(job_array_labels, businesses['business_id'][idx_2plus_workers[idx_mult_wrkplace_2plus_workers]])
        job_building_id[jidx] = business_location1wrkpl[idx_2plus_workers[idx_mult_wrkplace_2plus_workers]].repeat(business_sizes[idx_2plus_workers[idx_mult_wrkplace_2plus_workers]])
        job_assignment_case[jidx] = 13
        processed_bindicator[idx_2plus_workers[idx_mult_wrkplace_2plus_workers]] = True
        logger.log_status("13. %s jobs (%s businesses) could be placed due to multiple workplaces x 2+ workers x single non-res/mixed building fit." % (
            business_sizes[idx_2plus_workers[idx_mult_wrkplace_2plus_workers]].sum(), idx_mult_wrkplace_2plus_workers.size))
        
        # 14. multiple workplaces, 2+ workers in multiple non-res or mixed building
        idx_2plus_workers = where(logical_and(processed_bindicator==0, business_sizes > 1))[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_2plus_workers])
        workplace_filter = parcels.get_attribute_by_id("number_of_workplaces", businesses["parcel_id"][idx_2plus_workers])
        idx_mult_wrkplace_2plus_workers = where(logical_and(logical_or(bcode==4, bcode==6), workplace_filter > 1))[0]
        processed_bindicator[idx_2plus_workers[idx_mult_wrkplace_2plus_workers]] = True
        # sample buildings to businesses by parcels 
        bpcls = unique(businesses["parcel_id"][idx_2plus_workers[idx_mult_wrkplace_2plus_workers]])
        hbasedsum = home_based.sum()
        for ipcl in range(bpcls.size):
            bldgids = buildings['building_id'][buildings['parcel_id'] == bpcls[ipcl]]
            bussids = businesses['business_id'][businesses["parcel_id"] == bpcls[ipcl]]
            if bldgids.size < bussids.size:
                bldarray = bldgids.repeat(1+ceil((bussids.size - bldgids.size)/float(bldgids.size)))
            else:
                bldarray = bldgids
            shuffle(bldarray) # randomly reorder in-place
            is_res = in1d(bldarray, restypes)
            for ib in range(bussids.size):
                jidx = where(job_array_labels == bussids[ib])
                job_building_id[jidx] = bldarray[ib]   
                home_based[jidx] = is_res
                job_assignment_case[jidx] = 14
        logger.log_status("14. %s jobs (%s businesses) could be placed due to multiple workplaces x 2+ workers x multiple non-res/mixed building fit. Classify %s jobs as home-based." % (
            business_sizes[idx_2plus_workers[idx_mult_wrkplace_2plus_workers]].sum(), idx_mult_wrkplace_2plus_workers.size, home_based.sum()-hbasedsum))
        
        # 15. 6+ workers in residential parcel with no building
        idx_2plus_workers = where(logical_and(processed_bindicator==0, business_sizes > 5))[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_2plus_workers]) 
        idx_wrk_vacant_res = where(bcode == 7)[0]
        jidx = in1d(job_array_labels, businesses['business_id'][idx_2plus_workers[idx_wrk_vacant_res]])
        job_assignment_case[jidx] = 15
        processed_bindicator[idx_2plus_workers[idx_wrk_vacant_res]] = True
        logger.log_status("15. %s jobs (%s businesses of 6+ workers) could not be placed due to non-existing buildings in parcels with residential LU type." % (
            business_sizes[idx_2plus_workers[idx_wrk_vacant_res]].sum(), idx_wrk_vacant_res.size))
        
        # 16. nonresidential parcel with no building
        idx_any_workers = where(processed_bindicator==0)[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_any_workers]) 
        idx_wrk_vacant_nonres = where(bcode == 8)[0]
        jidx = in1d(job_array_labels, businesses['business_id'][idx_any_workers[idx_wrk_vacant_nonres]])
        job_assignment_case[jidx] = 16
        processed_bindicator[idx_any_workers[idx_wrk_vacant_nonres]] = True
        logger.log_status("16. %s jobs (%s businesses) could not be placed due to non-existing buildings in parcels with rnon-esidential LU type." % (
            business_sizes[idx_any_workers[idx_wrk_vacant_nonres]].sum(), idx_wrk_vacant_nonres.size))        
        
        
        
        # jobs in messy buildings
        idx_worker = where(logical_and(processed_bindicator==0, business_sizes > 0))[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_worker])
        idx_messy_fit = where(bcode == 0)[0]
        processed_bindicator[idx_worker[idx_messy_fit]] = True
        logger.log_status("%s jobs (%s businesses) could not be placed due to messy buildings." % (
            business_sizes[idx_worker[idx_messy_fit]].sum(), idx_messy_fit.size))         

        
        logger.log_status("So far %s (%s percent) home-based jobs." % (home_based.sum(), round(home_based.sum()/(home_based.size/100.),2)))
        logger.log_status("So far %s percent (%s) jobs (%s businesses) processed. %s jobs (%s businesses) remain to be processed." % \
                          (round(business_sizes[processed_bindicator].sum()/(home_based.size/100.),2),
                           business_sizes[processed_bindicator].sum(), processed_bindicator.sum(),
                          business_sizes[logical_not(processed_bindicator)].sum(), business_sizes[logical_not(processed_bindicator)].size))
        
        # create job dataset
        job_data = {"job_id": arange(job_building_id.size)+1,
                    "home_based" : home_based,
                    "building_id": job_building_id,
                    "business_id": job_array_labels,
                    "sector_id": zeros(job_building_id.size),
                    "parcel_id": zeros(job_building_id.size),
                    "assignment_case": job_assignment_case}
        
        for ib in range(businesses.size()):
            idx = where(job_data['business_id'] == businesses['business_id'][ib])
            job_data["sector_id"][idx] = businesses['sector_id'][ib]
            job_data["parcel_id"][idx] = businesses['parcel_id'][ib]

        dictstorage = StorageFactory().get_storage('dict_storage')
        dictstorage.write_table(table_name="jobs", table_data=job_data)        
        return Dataset(in_storage=dictstorage, in_table_name="jobs", dataset_name="job", id_name="job_id")
Example #11
0
    def run(self, job_dataset, dataset_pool, out_storage=None, jobs_table="jobs"):
        """
        Algorithm:
            1. For all non_home_based jobs that have parcel_id assigned but no building_id, try
                to choose a building from all buildings in that parcel. Draw the building with probabilities
                given by the sector-building_type distribution. The job sizes are
                fitted into the available space (the attribute job.sqft is updated).
            2. For all non_home_based jobs for which no building was found in step 1, check
                if the parcel has residential buildings. In such a case, re-assign the jobs to be
                home-based.
                Otherwise, if sum of non_residential_sqft over the involved buildings is 0,
                for all jobs that have impute_building_sqft_flag=True draw a building using
                the sector-building_type distribution and impute the corresponding sqft to 
                the non_residential_sqft of that building.
            3. For all home_based jobs that have parcel_id assigned but no building_id, try
                to choose a building from all buildings in that parcel. 
                The capacity of a single-family building is determined from sizes of the households living there 
                (for each household the minimum of number of members and 2 is taken). 
                For multi-family buildings the capacity is 50.
            4. Assign a building type to jobs that have missing building type. It is sampled 
                from the regional-wide distribution of home based and non-home based jobs.
            5. Update the table 'building_sqft_per_job' using the updated job.sqft.
        'in_storage' should contain the jobs table and the zone_averages_table. The 'dataset_pool_storage'
        should contain all other tables needed (buildings, households, building_types). 
        """
        parcel_ids = job_dataset.get_attribute("parcel_id")
        building_ids = job_dataset.get_attribute("building_id")
        home_base_status = job_dataset.get_attribute("home_based_status")
        sectors = job_dataset.get_attribute("sector_id")
        
        is_considered = logical_and(parcel_ids > 0, building_ids <= 0) # jobs that have assigned parcel but not building
        job_index_home_based = where(logical_and(is_considered, home_base_status == 0))[0]
        is_governmental_job = sectors == 18
        is_edu_job = sectors == 19
        job_index_governmental = where(logical_and(is_considered, is_governmental_job))[0]
        job_index_edu = where(logical_and(is_considered, is_edu_job))[0]
        
        building_dataset = dataset_pool.get_dataset('building')
        parcel_ids_in_bldgs = building_dataset.get_attribute("parcel_id")
        bldg_ids_in_bldgs = building_dataset.get_id_attribute()
        bldg_types_in_bldgs = building_dataset.get_attribute("building_type_id")
        
        non_res_sqft = building_dataset.get_attribute("non_residential_sqft")

        preferred_nhb_btypes =   (building_dataset['building.building_type_id'] == 3) + \
                                (building_dataset['building.building_type_id'] == 8) + \
                                (building_dataset['building.building_type_id'] == 13) + \
                                (building_dataset['building.building_type_id'] == 20) + \
                                (building_dataset['building.building_type_id'] == 21)
        non_res_sqft_preferred =  non_res_sqft  * preferred_nhb_btypes              
                                          
        is_governmental = building_dataset.compute_variables([
            "numpy.logical_and(building.disaggregate(building_type.generic_building_type_id == 7), building.building_type_id <> 18)"],
                                                                     dataset_pool=dataset_pool)
        idx_gov = where(is_governmental)[0]
        is_edu = building_dataset['building.building_type_id'] == 18
        idx_edu = where(is_edu)[0]
        
        bldgs_is_residential = logical_and(logical_not(logical_or(is_governmental, is_edu)), 
                                           building_dataset.compute_variables(["urbansim_parcel.building.is_residential"], 
                                                           dataset_pool=dataset_pool))
        
        bldgs_isnot_residential = logical_not(bldgs_is_residential)
        
        # assign buildings to educational jobs randomly
        unique_parcels = unique(parcel_ids[job_index_edu])
        logger.log_status("Placing educational jobs ...")
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs[idx_edu] == parcel)[0]
            if idx_in_bldgs.size <= 0:
                continue
            idx_in_jobs = where(parcel_ids[job_index_edu] == parcel)[0]
            draw = sample_replace(idx_in_bldgs, idx_in_jobs.size)
            building_ids[job_index_edu[idx_in_jobs]] = bldg_ids_in_bldgs[idx_edu[draw]]
        logger.log_status("%s educational jobs (out of %s edu. jobs) were placed." % (
                                        (building_ids[job_index_edu]>0).sum(), job_index_edu.size))
        
        # assign buildings to governmental jobs randomly
        unique_parcels = unique(parcel_ids[job_index_governmental])
        logger.log_status("Placing governmental jobs ...")
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs[idx_gov] == parcel)[0]
            if idx_in_bldgs.size <= 0:
                continue
            idx_in_jobs = where(parcel_ids[job_index_governmental] == parcel)[0]
            draw = sample_replace(idx_in_bldgs, idx_in_jobs.size)
            building_ids[job_index_governmental[idx_in_jobs]] = bldg_ids_in_bldgs[idx_gov[draw]]
        logger.log_status("%s governmental jobs (out of %s gov. jobs) were placed." % (
                        (building_ids[job_index_governmental]>0).sum(), job_index_governmental.size))
        logger.log_status("The unplaced governmental jobs will be added to the non-home based jobs.")
        
        #tmp = unique(parcel_ids[job_index_governmental][building_ids[job_index_governmental]<=0])
        #output_dir =  "/Users/hana"
        #write_to_text_file(os.path.join(output_dir, 'parcels_with_no_gov_bldg.txt'), tmp, delimiter='\n')
        
        # consider the unplaced governmental jobs together with other non-home-based jobs
        is_now_considered = logical_and(is_considered, building_ids <= 0)
        job_index_non_home_based = where(logical_and(is_now_considered, logical_or(home_base_status == 0, is_governmental_job)))[0]
                                    
        # assign buildings to non_home_based jobs based on available space
        unique_parcels = unique(parcel_ids[job_index_non_home_based])
        # iterate over parcels
        logger.log_status("Placing non-home-based jobs ...")
        nhb_not_placed = 0
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0]
            if idx_in_bldgs.size <= 0:
                continue
            idx_in_jobs = where(parcel_ids[job_index_non_home_based] == parcel)[0]
            # sample proportionally to the building size
            weights = non_res_sqft_preferred[idx_in_bldgs] # 1.preference: preferred building types with non-res sqft 
            if weights.sum() <= 0:
                weights = preferred_nhb_btypes[idx_in_bldgs] # 2.preference: preferred building types
                if weights.sum() <= 0:
                    weights = non_res_sqft[idx_in_bldgs] # 3.preference: any building with non-res sqft 
                    if weights.sum() <= 0: 
                        weights = bldgs_isnot_residential[idx_in_bldgs] # 4.preference: any non-res building
                        if weights.sum() <= 0: 
                            nhb_not_placed = nhb_not_placed + idx_in_jobs.size
                            continue
            draw = probsample_replace(idx_in_bldgs, idx_in_jobs.size, weights/float(weights.sum()))
            building_ids[job_index_non_home_based[idx_in_jobs]] = bldg_ids_in_bldgs[draw]
            
        logger.log_status("%s non home based jobs (out of %s nhb jobs) were placed. No capacity in buildings for %s jobs." % (
                                                                (building_ids[job_index_non_home_based]>0).sum(),
                                                                 job_index_non_home_based.size, nhb_not_placed))
        
        job_dataset.modify_attribute(name="building_id", data = building_ids)
        
        # re-classify unplaced non-home based jobs to home-based if parcels contain residential buildings

        is_now_considered = logical_and(parcel_ids > 0, building_ids <= 0)
        job_index_non_home_based_unplaced = where(logical_and(is_now_considered, 
                                               logical_and(home_base_status == 0, logical_not(is_governmental_job))))[0]
        unique_parcels = unique(parcel_ids[job_index_non_home_based_unplaced])

        logger.log_status("Try to reclassify non-home-based jobs (excluding governmental jobs) ...")
        nhb_reclass = 0
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0]
            if idx_in_bldgs.size <= 0:
                continue
            idx_in_jobs = where(parcel_ids[job_index_non_home_based_unplaced] == parcel)[0]
            where_residential = where(bldgs_is_residential[idx_in_bldgs])[0]
            if where_residential.size > 0:
                #home_base_status[job_index_non_home_based_unplaced[idx_in_jobs]] = 1 # set to home-based jobs
                nhb_reclass = nhb_reclass + idx_in_jobs.size
            else:
                draw = sample_replace(idx_in_bldgs, idx_in_jobs.size)
                #building_ids[job_index_non_home_based_unplaced[idx_in_jobs]] = bldg_ids_in_bldgs[draw]

        #job_dataset.modify_attribute(name="home_base_status", data = home_base_status)
        #job_dataset.modify_attribute(name="building_id", data = building_ids)
        
        job_index_home_based = where(logical_and(is_considered, home_base_status == 1))[0]
        logger.log_status("%s non-home based jobs reclassified as home-based." % nhb_reclass)

        # home_based jobs
        unique_parcels = unique(parcel_ids[job_index_home_based])
        capacity_in_buildings = building_dataset.compute_variables([
                          "clip_to_zero(urbansim_parcel.building.total_home_based_job_space-building.aggregate(job.home_based_status==1))"],
                             dataset_pool=dataset_pool)
        parcels_with_exceeded_capacity = []
        # iterate over parcels
        logger.log_status("Placing home-based jobs ...")
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0]
            idx_in_jobs = where(parcel_ids[job_index_home_based] == parcel)[0]
            capacity = capacity_in_buildings[idx_in_bldgs]
            if capacity.sum() <= 0:
                continue
            probcomb = ones((idx_in_bldgs.size, idx_in_jobs.size))
            taken = zeros(capacity.shape, dtype="int32")
            while True:
                zero_cap = where((capacity - taken) <= 0)[0]
                probcomb[zero_cap,:] = 0
                if probcomb.sum() <= 0:
                    break
                req =  probcomb.sum(axis=0)
                wmaxi = where(req==req.max())[0]
                drawjob = sample_noreplace(arange(wmaxi.size), 1) # draw job from available jobs
                imax_req = wmaxi[drawjob]
                weights = probcomb[:,imax_req]
                # sample building
                draw = probsample_noreplace(arange(probcomb.shape[0]), 1, resize(weights/weights.sum(), (probcomb.shape[0],)))
                taken[draw] = taken[draw] + 1
                building_ids[job_index_home_based[idx_in_jobs[imax_req]]] = bldg_ids_in_bldgs[idx_in_bldgs[draw]]
                probcomb[:,imax_req] = 0
            if -1 in building_ids[job_index_home_based[idx_in_jobs]]:
                parcels_with_exceeded_capacity.append(parcel)
        parcels_with_exceeded_capacity = array(parcels_with_exceeded_capacity)    
        
        logger.log_status("%s home based jobs (out of %s hb jobs) were placed." % ((building_ids[job_index_home_based]>0).sum(),
                                                                         job_index_home_based.size))
        
        # assign building type where missing
        # determine regional distribution
        idx_home_based = where(home_base_status == 1)[0]
        idx_non_home_based = where(home_base_status == 0)[0]
        idx_bt_missing = where(home_base_status <= 0)[0]
        if idx_bt_missing.size > 0:
            # sample building types
            sample_bt = probsample_replace(array([1,0]), idx_bt_missing.size, 
               array([idx_home_based.size, idx_non_home_based.size])/float(idx_home_based.size + idx_non_home_based.size))
            # coerce to int32 (on a 64 bit machine, sample_bt will be of type int64)
            home_base_status[idx_bt_missing] = sample_bt.astype(int32)
            job_dataset.modify_attribute(name="home_based_status", data = home_base_status) 
        
        if out_storage is not None:
            job_dataset.write_dataset(out_table_name=jobs_table, out_storage=out_storage, attributes=AttributeType.PRIMARY)
        logger.log_status("Assigning building_id to jobs done.")
Example #12
0
                
                market_ids = m.choice_set.compute_one_variable_with_unknown_package( id_name, dataset_pool=dataset_pool)
                market_ids_2d = market_ids[m.model_interaction.get_choice_index()]
                model_data[i].update({'market_id':market_ids_2d, 'market_share':ms})

            logger.end_block()
        training_data.append(model_data)
        
    config = xmlconfig.get_run_configuration(options.scenario_name)
    if not options.agents_index:
        agent_set = dataset_pool.get_dataset(options.agent_set)
        agents_size = agent_set.size()
        if options.agents_filter:
            is_valid = agent_set.compute_variables(options.agents_filter)
            options.agents_index = probsample_noreplace(arange(agents_size),
                                                        options.sample_size,
                                                        prob_array=is_valid
                                                       ).tolist()
        else:
            options.agents_index = randint(0, agents_size, size=options.sample_size).tolist()

    ## regularization data
    population_data = []
    for h, hierarchy in enumerate(options.meta_models):
        model_data = []
        for i, model_name in enumerate(hierarchy):
            logger.start_block('%s' % model_name)

            config['models_configuration'][model_name]['controller']['run']['arguments']['agents_index'] = options.agents_index
            config['models'] = [{model_name:["run"]}]
            config['years'] = [options.year, options.year]
            config['seed'] = options.seed
    def run(self,
            n=500,
            realestate_dataset_name='building',
            current_year=None,
            occupied_spaces_variable="occupied_spaces",
            total_spaces_variable="total_spaces",
            run_config=None,
            debuglevel=0):
        """
        run method of the Development Project Proposal Sampling Model
        
        **Parameters**
        
            **n** : int, sample size for each iteration
                   
                   sample n proposals at a time, which are then evaluated one by one until the 
                   target vacancies are satisfied or proposals are running out
                   
            **realestate_dataset_name** : string, name of real estate dataset
            
            **current_year**: int, simulation year. If None, get value from SimulationState
            
            **occupied_spaces_variable** : string, variable name for calculating how much spaces are currently occupied
                                        
                                          It can either be a variable for real_estate dataset that returns 
                                          the amount spaces being occupied or a target_vacancy attribute 
                                          that contains the name of real_estate variables.   
            
            **total_spaces_variable** : string, variable name for calculating total existing spaces
            
        **Returns**
        
            **proposal_set** : indices to proposal_set that are accepted 
            
            **demolished_buildings** : buildings to be demolished for re-development
        """

        self.accepted_proposals = []
        self.demolished_buildings = []  #id of buildings to be demolished
        if self.proposal_set.n <= 0:
            logger.log_status(
                "The size of proposal_set is 0; no proposals to consider, skipping DPPSM."
            )
            return (self.proposal_set, self.demolished_buildings)

        target_vacancy = self.dataset_pool.get_dataset('target_vacancy')

        if current_year is None:
            year = SimulationState().get_current_time()
        else:
            year = current_year
        this_year_index = where(target_vacancy['year'] == year)[0]
        target_vacancy_for_this_year = DatasetSubset(target_vacancy,
                                                     this_year_index)
        if target_vacancy_for_this_year.size() == 0:
            raise IOError, 'No target vacancy defined for year %s.' % year

        ## current_target_vacancy.target_attribute_name = 'target_vacancy_rate'
        ## each column provides a category for which a target vacancy is specified
        self.column_names = list(set( target_vacancy.get_known_attribute_names() ) - \
                            set( [ target_vacancy.target_attribute_name,
                                   'year', '_hidden_id_',
                                   occupied_spaces_variable, total_spaces_variable
                                   ] )
                            )
        self.column_names.sort(reverse=True)

        ## buildings table provides existing stocks
        self.realestate_dataset = self.dataset_pool.get_dataset(
            realestate_dataset_name)

        occupied_spaces_variables = [occupied_spaces_variable]
        total_spaces_variables = [total_spaces_variable]
        if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(
        ):
            occupied_spaces_variables += unique(
                target_vacancy_for_this_year[occupied_spaces_variable]).tolist(
                )
        if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(
        ):
            total_spaces_variables += unique(
                target_vacancy_for_this_year[total_spaces_variable]).tolist()

        self._compute_variables_for_dataset_if_needed(
            self.realestate_dataset, self.column_names +
            occupied_spaces_variables + total_spaces_variables)
        self._compute_variables_for_dataset_if_needed(
            self.proposal_component_set,
            self.column_names + total_spaces_variables)
        self.proposal_set.compute_variables(
            "urbansim_parcel.development_project_proposal.number_of_components",
            dataset_pool=self.dataset_pool)

        n_column = len(self.column_names)
        target_vacancy_for_this_year.column_values = target_vacancy_for_this_year.get_multiple_attributes(
            self.column_names).reshape((-1, n_column))
        self.realestate_dataset.column_values = self.realestate_dataset.get_multiple_attributes(
            self.column_names).reshape((-1, n_column))
        self.proposal_component_set.column_values = self.proposal_component_set.get_multiple_attributes(
            self.column_names).reshape((-1, n_column))
        #defaults, can be changed later by spaces_variable specified in target_vacancy rates
        self.realestate_dataset.total_spaces = self.realestate_dataset[
            total_spaces_variable]
        self.proposal_component_set.total_spaces = self.proposal_component_set[
            total_spaces_variable]
        self.realestate_dataset.occupied_spaces = self.realestate_dataset[
            occupied_spaces_variable]

        self.accounting = {}
        self.logging = {}
        #has_needed_components = zeros(self.proposal_set.size(), dtype='bool')
        for index in range(target_vacancy_for_this_year.size()):
            column_value = tuple(
                target_vacancy_for_this_year.column_values[index, :].tolist())
            accounting = {
                'target_vacancy':
                target_vacancy_for_this_year[
                    target_vacancy.target_attribute_name][index]
            }

            realestate_indexes = self.get_index_by_condition(
                self.realestate_dataset.column_values, column_value)
            component_indexes = self.get_index_by_condition(
                self.proposal_component_set.column_values, column_value)

            this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable
            ## total/occupied_spaces_variable can be specified either as a universal name for all realestate
            ## or in targe_vacancy_rate dataset for each vacancy category
            if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(
            ):
                this_occupied_spaces_variable = target_vacancy_for_this_year[
                    occupied_spaces_variable][index]
                self.realestate_dataset.occupied_spaces[realestate_indexes] = (
                    self.realestate_dataset[this_occupied_spaces_variable]
                    [realestate_indexes]).astype(
                        self.realestate_dataset.occupied_spaces.dtype)

            if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(
            ):
                this_total_spaces_variable = target_vacancy_for_this_year[
                    total_spaces_variable][index]
                self.realestate_dataset.total_spaces[realestate_indexes] = (
                    self.realestate_dataset[this_total_spaces_variable]
                    [realestate_indexes]).astype(
                        self.realestate_dataset.total_spaces.dtype)
                self.proposal_component_set.total_spaces[component_indexes] = (
                    self.proposal_component_set[this_total_spaces_variable]
                    [component_indexes]).astype(
                        self.proposal_component_set.total_spaces.dtype)

            accounting["total_spaces_variable"] = this_total_spaces_variable
            accounting["total_spaces"] = self.realestate_dataset.total_spaces[
                realestate_indexes].sum()
            accounting[
                "occupied_spaces_variable"] = this_occupied_spaces_variable
            accounting[
                "occupied_spaces"] = self.realestate_dataset.occupied_spaces[
                    realestate_indexes].sum()
            accounting["target_spaces"] = int( round( accounting["occupied_spaces"] /\
                                                     (1 - accounting["target_vacancy"])
                                               ) )
            accounting["proposed_spaces"] = 0
            accounting["demolished_spaces"] = 0

            self.accounting[column_value] = accounting

            if self._is_target_reached(column_value):
                proposal_indexes = self.proposal_set.get_id_index(
                    unique(self.proposal_component_set['proposal_id']
                           [component_indexes]))
                single_component_indexes = where(
                    self.proposal_set["number_of_components"] == 1)[0]
                self.weight[intersect1d(proposal_indexes,
                                        single_component_indexes)] = 0.0

        ## handle planned proposals: all proposals with status_id == is_planned
        ## and start_year == year are accepted
        planned_proposal_indexes = where(
            logical_and(
                self.proposal_set.get_attribute("status_id") ==
                self.proposal_set.id_planned,
                self.proposal_set.get_attribute("start_year") == year))[0]

        self.consider_proposals(planned_proposal_indexes, force_accepting=True)

        # consider proposals (in this order: proposed, tentative)
        for status in [
                self.proposal_set.id_proposed, self.proposal_set.id_tentative
        ]:
            stat = (self.proposal_set.get_attribute("status_id") == status)
            if stat.sum() == 0:
                continue

            logger.log_status(
                "Sampling from %s eligible proposals of status %s." %
                (stat.sum(), status))
            iteration = 0
            while (not self._is_target_reached()):
                ## prevent proposals from being sampled for vacancy type whose target is reached
                #for column_value in self.accounting.keys():

                if self.weight[stat].sum() == 0.0:
                    logger.log_warning(
                        "Running out of proposals of status %s before vacancy targets are reached; there aren't any proposals with non-zero weight"
                        % status)
                    break

                available_indexes = where(logical_and(stat,
                                                      self.weight > 0))[0]
                sample_size = minimum(available_indexes.size, n)
                sampled_proposal_indexes = probsample_noreplace(
                    available_indexes,
                    sample_size,
                    prob_array=self.weight[available_indexes],
                    return_index=False)

                self.consider_proposals(sampled_proposal_indexes)
                self.weight[sampled_proposal_indexes] = 0
                #sample_size = 1
                #sampled_proposal_index = probsample_noreplace(available_indexes, sample_size,
                #prob_array=self.weight[available_indexes],
                #return_index=False)

                #self.consider_proposal(sampled_proposal_index)

                #self.weight[sampled_proposal_index] = 0
                iteration += 1

        self._log_status()

        # set status of accepted proposals to 'active'
        self.proposal_set.modify_attribute(name="status_id",
                                           data=self.proposal_set.id_active,
                                           index=array(self.accepted_proposals,
                                                       dtype='int32'))

        # Code added by Jesse Ayers, MAG, 7/20/2009
        # Get the active projects:
        stat_id = self.proposal_set.get_attribute('status_id')
        actv = where(stat_id == 1)[0]
        # Where there are active projects, compute the total_land_area_taken
        # and store it on the development_project_proposals dataset
        # so it can be used by the building_construction_model for the proper
        # computation of units_proposed for those projects with velocity curves
        if actv.size > 0:
            total_land_area_taken_computed = self.proposal_set.get_attribute(
                'urbansim_parcel.development_project_proposal.land_area_taken')
            self.proposal_set.modify_attribute(
                'total_land_area_taken', total_land_area_taken_computed[actv],
                actv)

        return (self.proposal_set, self.realestate_dataset.get_id_attribute()[
            self.demolished_buildings])
Example #14
0
    def run(self, n=500, 
            realestate_dataset_name = 'building',
            current_year=None,
            occupied_spaces_variable="occupied_spaces",
            total_spaces_variable="total_spaces",
            minimum_spaces_attribute="minimum_spaces",
            within_parcel_selection_weight_string=None,
            within_parcel_selection_n=0,
            within_parcel_selection_compete_among_types=False,
            within_parcel_selection_threshold=75,
            within_parcel_selection_MU_same_weight=False,
            within_parcel_selection_transpose_interpcl_weight=True,
            run_config=None,
            debuglevel=0):
        """
        run method of the Development Project Proposal Sampling Model
        
        **Parameters**
        
            **n** : int, sample size for each iteration
                   
                   sample n proposals at a time, which are then evaluated one by one until the 
                   target vacancies are satisfied or proposals are running out
                   
            **realestate_dataset_name** : string, name of real estate dataset
            
            **current_year**: int, simulation year. If None, get value from SimulationState
            
            **occupied_spaces_variable** : string, variable name for calculating how much spaces are currently occupied
                                        
                                          It can either be a variable for real_estate dataset that returns 
                                          the amount spaces being occupied or a target_vacancy attribute 
                                          that contains the name of real_estate variables.   
            
            **total_spaces_variable** : string, variable name for calculating total existing spaces
            
        **Returns**
        
            **proposal_set** : indices to proposal_set that are accepted 
            
            **demolished_buildings** : buildings to be demolished for re-development
        """

        self.accepted_proposals = []
        self.demolished_buildings = []  #id of buildings to be demolished

        if self.proposal_set.n <= 0:
            logger.log_status("The size of proposal_set is 0; no proposals to consider, skipping DPPSM.")
            return (self.proposal_set, self.demolished_buildings)

        target_vacancy = self.dataset_pool.get_dataset('target_vacancy')

        if current_year is None:
            year = SimulationState().get_current_time()
        else:
            year = current_year
        this_year_index = where(target_vacancy['year']==year)[0]
        target_vacancy_for_this_year = DatasetSubset(target_vacancy, this_year_index)
        if target_vacancy_for_this_year.size() == 0:
            raise IOError, 'No target vacancy defined for year %s.' % year
        
        ## current_target_vacancy.target_attribute_name = 'target_vacancy_rate'
        ## each column provides a category for which a target vacancy is specified
        self.column_names = list(set( target_vacancy.get_known_attribute_names() ) - \
                            set( [ target_vacancy.target_attribute_name, 
                                   'year', '_hidden_id_', minimum_spaces_attribute,
                                   occupied_spaces_variable, total_spaces_variable
                                   ] )
                            )
        self.column_names.sort(reverse=True)
            
        ## buildings table provides existing stocks
        self.realestate_dataset = self.dataset_pool.get_dataset(realestate_dataset_name)
        
        occupied_spaces_variables = [occupied_spaces_variable]
        total_spaces_variables = [total_spaces_variable]
        if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
            occupied_spaces_variables += unique(target_vacancy_for_this_year[occupied_spaces_variable]).tolist()
        if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
            total_spaces_variables += unique(target_vacancy_for_this_year[total_spaces_variable]).tolist()
            
        self._compute_variables_for_dataset_if_needed(self.realestate_dataset, self.column_names + occupied_spaces_variables + total_spaces_variables)
        self._compute_variables_for_dataset_if_needed(self.proposal_component_set, self.column_names + total_spaces_variables)
        self.proposal_set.compute_variables(["urbansim_parcel.development_project_proposal.number_of_components", 
                                             "urbansim_parcel.development_project_proposal.land_area_taken"],
                                            dataset_pool=self.dataset_pool)
        
        n_column = len(self.column_names)
        self.column_names_index = {}
        for iname in range(n_column):
            self.column_names_index[self.column_names[iname]] = iname
 
        target_vacancy_for_this_year.column_values = target_vacancy_for_this_year.get_multiple_attributes(self.column_names).reshape((-1, n_column))
        self.realestate_dataset.column_values = self.realestate_dataset.get_multiple_attributes(self.column_names).reshape((-1, n_column))
        self.proposal_component_set.column_values = self.proposal_component_set.get_multiple_attributes(self.column_names).reshape((-1, n_column))
        #defaults, can be changed later by spaces_variable specified in target_vacancy rates
        self.realestate_dataset.total_spaces = self.realestate_dataset[total_spaces_variable]
        self.proposal_component_set.total_spaces = self.proposal_component_set[total_spaces_variable]
        self.realestate_dataset.occupied_spaces = self.realestate_dataset[occupied_spaces_variable]
        
        self.accounting = {}; self.logging = {}
        #has_needed_components = zeros(self.proposal_set.size(), dtype='bool')
        for index in range(target_vacancy_for_this_year.size()):
            column_value = tuple(target_vacancy_for_this_year.column_values[index,:].tolist())
            accounting = {'target_vacancy': target_vacancy_for_this_year[target_vacancy.target_attribute_name][index]}
            if minimum_spaces_attribute in target_vacancy_for_this_year.get_known_attribute_names():
                accounting['minimum_spaces'] = target_vacancy_for_this_year[minimum_spaces_attribute][index]
            realestate_indexes = self.get_index_by_condition(self.realestate_dataset.column_values, column_value)
            component_indexes = self.get_index_by_condition(self.proposal_component_set.column_values, column_value)
            
            this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable
            ## total/occupied_spaces_variable can be specified either as a universal name for all realestate
            ## or in targe_vacancy_rate dataset for each vacancy category
            if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_occupied_spaces_variable = target_vacancy_for_this_year[occupied_spaces_variable][index]
                self.realestate_dataset.occupied_spaces[realestate_indexes] = (self.realestate_dataset[this_occupied_spaces_variable][realestate_indexes]
                                                                               ).astype(self.realestate_dataset.occupied_spaces.dtype)
    
            if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_total_spaces_variable = target_vacancy_for_this_year[total_spaces_variable][index]    
                self.realestate_dataset.total_spaces[realestate_indexes] = (self.realestate_dataset[this_total_spaces_variable][realestate_indexes]
                                                                            ).astype(self.realestate_dataset.total_spaces.dtype)
                self.proposal_component_set.total_spaces[component_indexes] = (self.proposal_component_set[this_total_spaces_variable][component_indexes]
                                                                               ).astype(self.proposal_component_set.total_spaces.dtype)
                
            accounting["total_spaces_variable"] = this_total_spaces_variable
            accounting["total_spaces"] = self.realestate_dataset.total_spaces[realestate_indexes].sum()
            accounting["occupied_spaces_variable"] = this_occupied_spaces_variable
            accounting["occupied_spaces"] = self.realestate_dataset.occupied_spaces[realestate_indexes].sum()
            accounting["target_spaces"] = int( round( accounting["occupied_spaces"] /\
                                                     (1 - accounting["target_vacancy"])
                                               ) )
            accounting["proposed_spaces"] = 0
            accounting["demolished_spaces"] = 0
            
            self.accounting[column_value] = accounting
            
            if self._is_target_reached(column_value):
                proposal_indexes = self.proposal_set.get_id_index(unique(self.proposal_component_set['proposal_id'][component_indexes]))
                if n_column == 1:
                    comp_indexes = where(ndimage.sum(self.proposal_component_set[self.column_names[0]]==column_value[0], 
                                    labels=self.proposal_component_set['proposal_id'], 
                                    index=self.proposal_set.get_id_attribute()
                                    ) == self.proposal_set["number_of_components"])[0]
                else:
                    comp_indexes = where(self.proposal_set["number_of_components"]==1)[0]
                target_reached_prop_idx = intersect1d(proposal_indexes, comp_indexes)
                self.weight[target_reached_prop_idx] = 0.0
                self.proposal_set["status_id"][intersect1d(target_reached_prop_idx, where(self.proposal_set["status_id"]==self.proposal_set.id_tentative)[0])] = self.proposal_set.id_no_demand
                
        ## handle planned proposals: all proposals with status_id == is_planned 
        ## and start_year == year are accepted
        planned_proposal_indexes = where(logical_and(
                                                  self.proposal_set.get_attribute("status_id") == self.proposal_set.id_planned, 
                                                  self.proposal_set.get_attribute("start_year") == year ) 
                                        )[0]
        
        logger.start_block("Processing %s planned proposals" % planned_proposal_indexes.size)
        self.consider_proposals(planned_proposal_indexes, force_accepting=True)
        logger.end_block()
        
        if within_parcel_selection_n > 0:
            logger.start_block("Selecting proposals within parcels (%s proposals per parcel)" % within_parcel_selection_n)
            self.select_proposals_within_parcels(nmax=within_parcel_selection_n, weight_string=within_parcel_selection_weight_string,
                                                 compete_among_types=within_parcel_selection_compete_among_types, 
                                                 filter_threshold=within_parcel_selection_threshold,
                                                 MU_same_weight=within_parcel_selection_MU_same_weight,
                                                 transpose_interpcl_weight=within_parcel_selection_transpose_interpcl_weight)
            logger.end_block()
        
        # consider proposals (in this order: proposed, tentative)
        for status in [self.proposal_set.id_proposed, self.proposal_set.id_tentative]:
            stat = (self.proposal_set.get_attribute("status_id") == status)
            if stat.sum() == 0:
                continue
            
            logger.log_status("Sampling from %s eligible proposals of status %s." % (stat.sum(), status))
            iteration = 0
            while (not self._is_target_reached()):
                ## prevent proposals from being sampled for vacancy type whose target is reached
                #for column_value in self.accounting.keys():
                
                if self.weight[stat].sum() == 0.0:
                    logger.log_warning("Running out of proposals of status %s before vacancy targets are reached; there aren't any proposals with non-zero weight" % status)
                    break
                
                available_indexes = where(logical_and(stat, self.weight > 0))[0]
                sample_size = minimum(available_indexes.size, n)
                sampled_proposal_indexes = probsample_noreplace(available_indexes, sample_size, 
                                                                prob_array=self.weight[available_indexes],
                                                                return_index=False)
                #sorted_sampled_indices = argsort(self.weight[sampled_proposal_indexes])
                #self.consider_proposals(sampled_proposal_indexes[sorted_sampled_indices][::-1])
                self.consider_proposals(sampled_proposal_indexes)
                self.weight[sampled_proposal_indexes] = 0
                iteration += 1
        
        self._log_status()
        
        # set status of accepted proposals to 'active'
        self.proposal_set.modify_attribute(name="status_id", 
                                           data=self.proposal_set.id_active,
                                           index=array(self.accepted_proposals, dtype='int32'))
        
        # Code added by Jesse Ayers, MAG, 7/20/2009
        # Get the active projects:
        stat_id = self.proposal_set.get_attribute('status_id')
        actv = where(stat_id==1)[0]
        # Where there are active projects, compute the total_land_area_taken
        # and store it on the development_project_proposals dataset
        # so it can be used by the building_construction_model for the proper
        # computation of units_proposed for those projects with velocity curves
        if actv.size > 0:          
            total_land_area_taken_computed = self.proposal_set['land_area_taken']
            self.proposal_set.modify_attribute('total_land_area_taken', total_land_area_taken_computed[actv], actv)

        return (self.proposal_set, self.realestate_dataset.get_id_attribute()[self.demolished_buildings])
    def run(self, run_choice_model=True, choose_job_only_in_residence_zone=False, 
            residence_id='zone_id', *args, **kwargs):
        agent_set = kwargs['agent_set']
        agents_index = kwargs.get('agents_index', None)
        if agents_index is None:
            agents_index = arange(agent_set.size())
        if agents_index.size <= 0:
            logger.log_status("Nothing to be done.")
            return
        
        if self.filter is not None:
            jobs_set_index = where( self.job_set.compute_variables(self.filter) )[0]
        else:
            jobs_set_index = arange( self.job_set.size() )
            
        if run_choice_model:
            choices = ChoiceModel.run(self, *args, **kwargs)
            if self.match_number_of_jobs:
                prob_work_at_home = self.upc_sequence.probabilities[:, 1]
                # sample as many workers as there are jobs
                draw = probsample_noreplace(arange(agents_index.size), min(agents_index.size, jobs_set_index.size), 
                                            prob_work_at_home)
                choices = zeros(agents_index.size, dtype='int32')
                choices[draw] = 1
                
            agent_set.set_values_of_one_attribute(self.choice_attribute_name, 
                                                  choices, 
                                                  index=agents_index)
            at_home_worker_index = agents_index[choices==1]
            logger.log_status("%s workers choose to work at home, %s workers chose to work out of home." % 
                              (where(agent_set.get_attribute_by_index(self.choice_attribute_name, agents_index) == 1)[0].size,
                               where(agent_set.get_attribute_by_index(self.choice_attribute_name, agents_index) == 0)[0].size))            
        else:
            at_home_worker_index = where(logical_and( 
                                                     agent_set.get_attribute(self.choice_attribute_name) == 1,
                                                     agent_set.get_attribute('job_id') <= 0
                                                     )
                                        )[0]
        
            
        logger.log_status("Total: %s workers work at home, (%s workers work out of home), will try to assign %s workers to %s jobs." % 
                          (where(agent_set.get_attribute(self.choice_attribute_name) == 1)[0].size,
                           where(agent_set.get_attribute(self.choice_attribute_name) == 0)[0].size,
                          at_home_worker_index.size,
                          jobs_set_index.size
                          ))

        if not choose_job_only_in_residence_zone:
            assigned_worker_index, assigned_job_index = self._assign_job_to_worker(at_home_worker_index, jobs_set_index)
        else:
            agent_set.compute_one_variable_with_unknown_package(residence_id, dataset_pool=self.dataset_pool)
            self.job_set.compute_one_variable_with_unknown_package(residence_id, dataset_pool=self.dataset_pool)
            agent_zone_ids = agent_set.get_attribute_by_index(residence_id, at_home_worker_index)
            job_zone_ids = self.job_set.get_attribute_by_index(residence_id, jobs_set_index)
            unique_zones = unique(job_zone_ids)
            assigned_worker_index = array([], dtype="int32")
            assigned_job_index = array([], dtype="int32")
            for this_zone in unique_zones:
                logger.log_status("%s: %s" % (residence_id, this_zone))
                if this_zone <= 0: continue
                at_home_worker_in_this_zone = where(agent_zone_ids == this_zone)[0]
                job_set_in_this_zone = where(job_zone_ids == this_zone)[0]
                assigned_worker_in_this_zone, assigned_job_set_in_this_zone = self._assign_job_to_worker(at_home_worker_in_this_zone, job_set_in_this_zone)
                assigned_worker_index = concatenate((assigned_worker_index, at_home_worker_index[assigned_worker_in_this_zone]))
                assigned_job_index = concatenate((assigned_job_index, jobs_set_index[assigned_job_set_in_this_zone]))

        ## each worker can only be assigned to 1 job
        #assert assigned_worker_index.size == unique(assigned_worker_index).size
        agent_set.set_values_of_one_attribute(self.job_set.get_id_name()[0], 
                                              self.job_set.get_id_attribute()[assigned_job_index], 
                                              index=assigned_worker_index)
        agent_set.compute_variables([self.location_id_name], dataset_pool=self.dataset_pool)
        self.job_set.modify_attribute(name=VariableName(self.location_id_name).get_alias(), 
                                      data=agent_set.get_attribute_by_index(self.location_id_name, assigned_worker_index),
                                      index=assigned_job_index)
    def run(self,
            dataset1,
            dataset2,
            index1=None,
            index2=None,
            sample_size=10,
            weight=None,
            include_chosen_choice=False,
            with_replacement=False,
            resources=None,
            dataset_pool=None):
        """this function samples number of sample_size (scalar value) alternatives from dataset2
        for agent set specified by dataset1.
        If index1 is not None, only samples alterantives for agents with indices in index1;
        if index2 is not None, only samples alternatives from indices in index2.
        sample_size specifies number of alternatives to be sampled for each agent.
        weight, to be used as sampling weight, is either an attribute name of dataset2, or a 1d
        array of the same length as index2 or 2d array of shape (index1.size, index2.size).

        Also refer to document of interaction_dataset"""

        if dataset_pool is None:
            try:
                sc = SessionConfiguration()
                dataset_pool = sc.get_dataset_pool()
            except:
                dataset_pool = DatasetPool()

        local_resources = Resources(resources)
        local_resources.merge_if_not_None({
            "dataset1":
            dataset1,
            "dataset2":
            dataset2,
            "index1":
            index1,
            "index2":
            index2,
            "sample_size":
            sample_size,
            "weight":
            weight,
            "with_replacement":
            with_replacement,
            "include_chosen_choice":
            include_chosen_choice
        })

        local_resources.check_obligatory_keys(
            ['dataset1', 'dataset2', 'sample_size'])
        agent = local_resources["dataset1"]
        index1 = local_resources.get("index1", None)
        if index1 is None:
            index1 = arange(agent.size())
        choice = local_resources["dataset2"]
        index2 = local_resources.get("index2", None)
        if index2 is None:
            index2 = arange(choice.size())

        if index1.size == 0 or index2.size == 0:
            err_msg = "either choice size or agent size is zero, return None"
            logger.log_warning(err_msg)
            return None

        include_chosen_choice = local_resources.get("include_chosen_choice",
                                                    False)
        J = local_resources["sample_size"]
        if include_chosen_choice:
            J = J - 1

        with_replacement = local_resources.get("with_replacement")

        weight = local_resources.get("weight", None)
        if isinstance(weight, str):
            if weight in choice.get_known_attribute_names():
                weight = choice.get_attribute(weight)
                rank_of_weight = 1
            else:
                varname = VariableName(weight)
                if varname.get_dataset_name() == choice.get_dataset_name():
                    weight = choice.compute_variables(
                        weight, dataset_pool=dataset_pool)
                    rank_of_weight = 1
                elif varname.get_interaction_set_names() is not None:
                    ## weights can be an interaction variable
                    interaction_dataset = InteractionDataset(local_resources)
                    weight = interaction_dataset.compute_variables(
                        weight, dataset_pool=dataset_pool)
                    rank_of_weight = 2
                    assert (len(weight.shape) >= rank_of_weight)
                else:
                    err_msg = ("weight is neither a known attribute name "
                               "nor a simple variable from the choice dataset "
                               "nor an interaction variable: '%s'" % weight)
                    logger.log_error(err_msg)
                    raise ValueError, err_msg
        elif isinstance(weight, ndarray):
            rank_of_weight = weight.ndim
        elif not weight:  ## weight is None or empty string
            weight = ones(index2.size)
            rank_of_weight = 1
        else:
            err_msg = "unkown weight type"
            logger.log_error(err_msg)
            raise TypeError, err_msg

        if (weight.size <> index2.size) and (weight.shape[rank_of_weight - 1]
                                             <> index2.size):
            if weight.shape[rank_of_weight - 1] == choice.size():
                if rank_of_weight == 1:
                    weight = take(weight, index2)
                if rank_of_weight == 2:
                    weight = take(weight, index2, axis=1)
            else:
                err_msg = "weight array size doesn't match to size of dataset2 or its index"
                logger.log_error(err_msg)
                raise ValueError, err_msg

        prob = normalize(weight)

        #chosen_choice = ones(index1.size) * UNPLACED_ID
        chosen_choice_id = agent.get_attribute(choice.get_id_name()[0])[index1]
        #index_of_placed_agent = where(greater(chosen_choice_id, UNPLACED_ID))[0]
        chosen_choice_index = choice.try_get_id_index(
            chosen_choice_id, return_value_if_not_found=UNPLACED_ID)
        chosen_choice_index_to_index2 = lookup(chosen_choice_index,
                                               index2,
                                               index_if_not_found=UNPLACED_ID)

        if rank_of_weight == 1:  # if weight_array is 1d, then each agent shares the same weight for choices
            replace = with_replacement  # sampling with no replacement
            non_zero_counts = nonzerocounts(weight)
            if non_zero_counts < J:
                logger.log_warning(
                    "weight array dosen't have enough non-zero counts, use sample with replacement"
                )
                replace = True
            if non_zero_counts > 0:
                sampled_index = prob2dsample(
                    index2,
                    sample_size=(index1.size, J),
                    prob_array=prob,
                    exclude_index=chosen_choice_index_to_index2,
                    replace=replace,
                    return_index=True)
            else:
                # all alternatives have a zero weight
                sampled_index = zeros((index1.size, 0), dtype=DTYPE)
            #return index2[sampled_index]

        if rank_of_weight == 2:
            sampled_index = zeros((index1.size, J), dtype=DTYPE) - 1

            for i in range(index1.size):
                replace = with_replacement  # sampling with/without replacement
                i_prob = prob[i, :]
                if nonzerocounts(i_prob) < J:
                    logger.log_warning(
                        "weight array dosen't have enough non-zero counts, use sample with replacement"
                    )
                    replace = True

                #exclude_index passed to probsample_noreplace needs to be indexed to index2
                sampled_index[i, :] = probsample_noreplace(
                    index2,
                    sample_size=J,
                    prob_array=i_prob,
                    exclude_index=chosen_choice_index_to_index2[i],
                    return_index=True)
        sampling_prob = take(prob, sampled_index)
        sampled_index_within_prob = sampled_index.copy()
        sampled_index = index2[sampled_index]
        is_chosen_choice = zeros(sampled_index.shape, dtype="bool")
        #chosen_choice = -1 * ones(chosen_choice_index.size, dtype="int32")
        if include_chosen_choice:
            sampled_index = column_stack(
                (chosen_choice_index[:, newaxis], sampled_index))
            is_chosen_choice = zeros(sampled_index.shape, dtype="bool")
            is_chosen_choice[chosen_choice_index != UNPLACED_ID, 0] = 1
            #chosen_choice[where(is_chosen_choice)[0]] = where(is_chosen_choice)[1]
            ## this is necessary because prob is indexed to index2, not to the choice set (as is chosen_choice_index)
            sampling_prob_for_chosen_choices = take(
                prob, chosen_choice_index_to_index2[:, newaxis])
            ## if chosen choice chosen equals unplaced_id then the sampling prob is 0
            sampling_prob_for_chosen_choices[where(
                chosen_choice_index == UNPLACED_ID)[0], ] = 0.0
            sampling_prob = column_stack(
                [sampling_prob_for_chosen_choices, sampling_prob])

        interaction_dataset = self.create_interaction_dataset(
            dataset1, dataset2, index1, sampled_index)
        interaction_dataset.add_attribute(sampling_prob,
                                          '__sampling_probability')
        interaction_dataset.add_attribute(is_chosen_choice, 'chosen_choice')

        if local_resources.get("include_mnl_bias_correction_term", False):
            if include_chosen_choice:
                sampled_index_within_prob = column_stack(
                    (chosen_choice_index_to_index2[:, newaxis],
                     sampled_index_within_prob))
            interaction_dataset.add_mnl_bias_correction_term(
                prob, sampled_index_within_prob)

        ## to get the older returns
        #sampled_index = interaction_dataset.get_2d_index()
        #chosen_choices = UNPLACED_ID * ones(index1.size, dtype="int32")
        #where_chosen = where(interaction_dataset.get_attribute("chosen_choice"))
        #chosen_choices[where_chosen[0]]=where_chosen[1]
        #return (sampled_index, chosen_choice)

        return interaction_dataset
    def run(self, in_storage, out_storage=None, business_dsname="business", zone_dsname=None):
        dataset_pool = DatasetPool(storage=in_storage, package_order=['psrc_parcel', 'urbansim_parcel', 'urbansim', 'opus_core'] )
        seed(1)
        allbusinesses = dataset_pool.get_dataset(business_dsname)
        parcels = dataset_pool.get_dataset('parcel')
        buildings = dataset_pool.get_dataset('building')
        parcels.compute_variables(["urbansim_parcel.parcel.residential_units", "number_of_buildings = parcel.number_of_agents(building)", 
                                   "non_residential_sqft = (parcel.aggregate(building.non_residential_sqft)).astype(int32)",
                                   "number_of_res_buildings = parcel.aggregate(urbansim_parcel.building.is_residential)",
                                   "number_of_nonres_buildings = parcel.aggregate(urbansim_parcel.building.is_non_residential)",
                                   "number_of_mixed_use_buildings = parcel.aggregate(urbansim_parcel.building.is_generic_building_type_6)"
                                   ], 
                                  dataset_pool=dataset_pool)
        restypes = [12, 4, 19, 11, 34, 10, 33]
        reslutypes = [13,14,15,24]
        is_valid_business = ones(allbusinesses.size(), dtype='bool8')
        parcels_not_matched = logical_and(in1d(allbusinesses["parcel_id"], parcels.get_id_attribute(), invert=True), allbusinesses["parcel_id"] > 0)
        if(parcels_not_matched.sum() > 0):
            is_valid_business[where(parcels_not_matched)] = False
            logger.log_warning(message="No parcel exists for %s businesses (%s jobs)" % (parcels_not_matched.sum(), 
                                                                                         allbusinesses[self.number_of_jobs_attr][where(parcels_not_matched)].sum()))
        zero_parcel = allbusinesses["parcel_id"]<=0
        if zero_parcel.sum() > 0:
            is_valid_business[where(zero_parcel)] = False
            logger.log_warning(message="%s businesses (%s jobs) located on zero parcel_id" % (zero_parcel.sum(), 
                                                                                         allbusinesses[self.number_of_jobs_attr][where(zero_parcel)].sum()))            
            
        zero_size = logical_and(is_valid_business, allbusinesses[self.number_of_jobs_attr].round() == 0)
        if(sum(zero_size) > 0):
            is_valid_business[where(zero_size)] = False
            logger.log_warning(message="%s businesses are of size 0." % sum(zero_size))
        
        businesses = DatasetSubset(allbusinesses, index=where(is_valid_business)[0])
        
        parcels.add_attribute(name="number_of_workplaces", data=parcels.sum_dataset_over_ids(businesses, constant=1))
        
        has_single_res_buildings = logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_res_buildings"] == 1) # 1 (1 residential)
        parcels.add_attribute(data=has_single_res_buildings.astype("int32"), name="buildings_code")
        has_mult_res_buildings = logical_and(parcels["number_of_buildings"] > 1,  parcels["number_of_nonres_buildings"] == 0) # 2 (mult residential)
        parcels.modify_attribute("buildings_code", data=2*ones(has_mult_res_buildings.sum()), index=where(has_mult_res_buildings)) 
        has_single_nonres_buildings = logical_and(logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_nonres_buildings"] == 1), parcels["number_of_mixed_use_buildings"] == 0) # 3 (1 non-res)
        parcels.modify_attribute("buildings_code", data=3*ones(has_single_nonres_buildings.sum()), index=where(has_single_nonres_buildings)) 
        has_mult_nonres_buildings = logical_and(logical_and(parcels["number_of_buildings"] > 1, parcels["number_of_res_buildings"] == 0), parcels["number_of_mixed_use_buildings"] == 0) # 4 (mult non-res)
        parcels.modify_attribute("buildings_code", data=4*ones(has_mult_nonres_buildings.sum()), index=where(has_mult_nonres_buildings))
        has_single_mixed_buildings = logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_mixed_use_buildings"] == 1) # 5 (1 mixed-use)
        parcels.modify_attribute("buildings_code", data=5*ones(has_single_mixed_buildings.sum()), index=where(has_single_mixed_buildings))
        has_mult_mixed_buildings = logical_and(parcels["number_of_buildings"] > 1, 
                                               logical_or(logical_and(parcels["number_of_res_buildings"] > 0, parcels["number_of_nonres_buildings"] > 0), 
                                                          logical_or(parcels["number_of_mixed_use_buildings"] > 1, 
                                                                     logical_and(parcels["number_of_res_buildings"] == 0, 
                                                                                 parcels["number_of_mixed_use_buildings"] > 0)))) # 6
        parcels.modify_attribute("buildings_code", data=6*ones(has_mult_mixed_buildings.sum()), index=where(has_mult_mixed_buildings))
        has_no_building_res_lutype = logical_and(parcels["number_of_buildings"] == 0, in1d(parcels["land_use_type_id"], reslutypes)) # 7 (vacant with res LU type)
        parcels.modify_attribute("buildings_code", data=7*ones(has_no_building_res_lutype.sum()), index=where(has_no_building_res_lutype)) 
        has_no_building_nonres_lutype = logical_and(parcels["number_of_buildings"] == 0, in1d(parcels["land_use_type_id"], reslutypes)==0) # 8 (vacant with non-res LU type)
        parcels.modify_attribute("buildings_code", data=8*ones(has_no_building_nonres_lutype.sum()), index=where(has_no_building_nonres_lutype))
        
        business_sizes = businesses[self.number_of_jobs_attr].round().astype("int32") 
        business_location = {}
        business_location1wrkpl = zeros(businesses.size(), dtype="int32")
        business_location1wrkplres = zeros(businesses.size(), dtype="int32")
        business_ids = businesses.get_id_attribute()
        # sample one building for cases when sampling is required.
        for ibusid in range(businesses.size()):
            idx = where(buildings['parcel_id'] == businesses['parcel_id'][ibusid])[0]
            bldgids = buildings['building_id'][idx]
            business_location[business_ids[ibusid]] = bldgids
            if bldgids.size == 1:
                business_location1wrkpl[ibusid] = bldgids[0]
            elif bldgids.size > 1:
                business_location1wrkpl[ibusid] = bldgids[sample_noreplace(arange(bldgids.size), 1)]
                if buildings['residential_units'][idx].sum() > 0:
                    # Residential buildings are sampled with probabilities proportional to residential units
                    business_location1wrkplres[ibusid] = bldgids[probsample_noreplace(arange(bldgids.size), 1, prob_array=buildings['residential_units'][idx])]
                else:
                    business_location1wrkplres[ibusid] = business_location1wrkpl[ibusid]
        
        home_based = zeros(business_sizes.sum(), dtype="bool8")
        job_building_id = zeros(business_sizes.sum(), dtype="int32")
        job_array_labels = business_ids.repeat(business_sizes)
        job_assignment_case = zeros(business_sizes.sum(), dtype="int32")
        processed_bindicator = zeros(businesses.size(), dtype="bool8")
        business_codes = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"])
        business_nworkplaces = parcels.get_attribute_by_id("number_of_workplaces", businesses["parcel_id"])
        logger.log_status("Total number of jobs: %s" % home_based.size)
        
        # 1. 1-2 worker business in 1 residential building
        idx_sngl_wrk_1bld_fit = where(logical_and(business_sizes < 3, business_codes == 1))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_1bld_fit])
        home_based[jidx] = True
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_1bld_fit].repeat(business_sizes[idx_sngl_wrk_1bld_fit])
        job_assignment_case[jidx] = 1
        processed_bindicator[idx_sngl_wrk_1bld_fit] = True
        logger.log_status("1. %s jobs (%s businesses) set as home-based due to 1-2 worker x 1 residential building fit." % (
            business_sizes[idx_sngl_wrk_1bld_fit].sum(), idx_sngl_wrk_1bld_fit.size))
        
        # 2. 1-2 worker business in multiple residential buildings
        idx_sngl_wrk_multbld_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 2))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_multbld_fit])
        home_based[jidx] = True
        job_building_id[jidx] = business_location1wrkplres[idx_sngl_wrk_multbld_fit].repeat(business_sizes[idx_sngl_wrk_multbld_fit])
        job_assignment_case[jidx] = 2
        processed_bindicator[idx_sngl_wrk_multbld_fit] = True
        logger.log_status("2. %s jobs (%s businesses) set as home-based due to 1-2 worker x multiple residential buildings fit." % (
            business_sizes[idx_sngl_wrk_multbld_fit].sum(), idx_sngl_wrk_multbld_fit.size))
               
        # 3. 1-2 worker in single non-res building (not mixed-use)
        idx_sngl_wrk_single_nonres_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 3))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_single_nonres_fit])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_single_nonres_fit].repeat(business_sizes[idx_sngl_wrk_single_nonres_fit])
        job_assignment_case[jidx] = 3
        processed_bindicator[idx_sngl_wrk_single_nonres_fit] = True
        logger.log_status("3. %s jobs (%s businesses) placed due to 1-2 worker x single non-res building fit." % (
                          business_sizes[idx_sngl_wrk_single_nonres_fit].sum(), idx_sngl_wrk_single_nonres_fit.size))     
        
        # 4. 1-2 worker in multiple non-res building (not mixed-use)
        idx_sngl_wrk_mult_nonres_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 4))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_mult_nonres_fit])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_mult_nonres_fit].repeat(business_sizes[idx_sngl_wrk_mult_nonres_fit])
        job_assignment_case[jidx] = 4
        processed_bindicator[idx_sngl_wrk_mult_nonres_fit] = True
        logger.log_status("4. %s jobs (%s businesses) placed due to 1-2 worker x multiple non-res building fit." % (
            business_sizes[idx_sngl_wrk_mult_nonres_fit].sum(), idx_sngl_wrk_mult_nonres_fit.size))      
                
        # 5. 1-2 worker in single mixed-use building
        idx_sngl_wrk_smu_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 5))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_smu_fit])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_smu_fit].repeat(business_sizes[idx_sngl_wrk_smu_fit])
        job_assignment_case[jidx] = 5
        processed_bindicator[idx_sngl_wrk_smu_fit] = True
        logger.log_status("5. %s jobs (%s businesses) in 1-2 worker x single mixed-use building." % (
            business_sizes[idx_sngl_wrk_smu_fit].sum(), idx_sngl_wrk_smu_fit.size))       
        
        # 6. 1-2 worker in multiple mixed-type buildings
        idx_sngl_wrk_mmu_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 6))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_mmu_fit])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_mmu_fit].repeat(business_sizes[idx_sngl_wrk_mmu_fit])
        bldtype = buildings.get_attribute_by_id("building_type_id", business_location1wrkpl[idx_sngl_wrk_mmu_fit])
        is_bldtype_res = in1d(bldtype, restypes)
        home_based[in1d(job_array_labels, business_ids[idx_sngl_wrk_mmu_fit][where(is_bldtype_res)])] = True
        job_assignment_case[jidx] = 6
        processed_bindicator[idx_sngl_wrk_mmu_fit] = True
        logger.log_status("6. %s jobs (%s businesses) in 1-2 worker x multiple mixed-type buildings. %s jobs classified as home-based." % (
            business_sizes[idx_sngl_wrk_mmu_fit].sum(), idx_sngl_wrk_mmu_fit.size, business_sizes[idx_sngl_wrk_mmu_fit][where(is_bldtype_res)].sum()))            

        # 7. 1-2 worker business in residential parcel with no building
        idx_sngl_wrk_vacant_res = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 7))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_vacant_res])
        job_assignment_case[jidx] = 7
        home_based[jidx] = True
        processed_bindicator[idx_sngl_wrk_vacant_res] = True
        logger.log_status("7. %s jobs (%s businesses of size 1-2) could not be placed due to non-existing buildings in parcels with residential LU type." % (
            business_sizes[idx_sngl_wrk_vacant_res].sum(), idx_sngl_wrk_vacant_res.size))        

        # 8. 3+ workers of governmental workplaces in 1+ residential building
        ind_bussiness_case8 = logical_and(logical_and(processed_bindicator==0, logical_and(business_sizes > 2, in1d(businesses['sector_id'], [18,19]))), in1d(business_codes, [1,2]))
        idx_wrk_fit = where(ind_bussiness_case8)[0]
        jidx = in1d(job_array_labels, business_ids[idx_wrk_fit])
        job_assignment_case[jidx] = 8
        processed_bindicator[idx_wrk_fit] = True
        logger.log_status("8. %s governmental jobs (%s businesses of size 3+) could not be placed due to residing in residential buildings only." % (
                    business_sizes[idx_wrk_fit].sum(), idx_wrk_fit.size))
        
        # 9. 3-30 workers in single residential building. Make two of them home based.
        idx_sngl_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, logical_and(business_sizes > 2, business_sizes <= 30)), business_codes == 1))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_fit])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_fit].repeat(business_sizes[idx_sngl_wrk_fit])
        bsizeminus2 = vstack((2*ones(idx_sngl_wrk_fit.size), business_sizes[idx_sngl_wrk_fit]-2)).ravel("F").astype("int32") # interweaving 2 and remaining business size
        hbidx = tile(array([True, False]), bsizeminus2.size/2).repeat(bsizeminus2) # set the first two jobs of every business to True, others to False
        home_based[(where(jidx)[0])[hbidx]] = True
        job_assignment_case[jidx] = 9
        processed_bindicator[idx_sngl_wrk_fit] = True        
        logger.log_status("9. %s jobs (%s businesses) in 3-30 worker x single residential building. %s jobs assigned as home-based." % (
            business_sizes[idx_sngl_wrk_fit].sum(), idx_sngl_wrk_fit.size, hbidx.sum()))      
        
        # 10. 3-30 workers in multiple residential buildings. Make two of them home based.
        idx_sngl_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, logical_and(business_sizes > 2, business_sizes <= 30)), business_codes == 2))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_fit])
        job_assignment_case[jidx] = 10
        processed_bindicator[idx_sngl_wrk_fit] = True
        # sample buildings to businesses by parcels 
        bpcls = unique(businesses["parcel_id"][idx_sngl_wrk_fit])
        for ipcl in range(bpcls.size):
            bidx = where(buildings['parcel_id'] == bpcls[ipcl])[0]
            bldgids = buildings['building_id'][bidx]
            bussids = intersect1d(business_ids[businesses["parcel_id"] == bpcls[ipcl]], business_ids[idx_sngl_wrk_fit])
            # multiply by units for sampling prop. to units rather than buildings
            bldgids = bldgids.repeat(maximum(1, buildings['residential_units'][bidx].astype('int32'))) 
            if bldgids.size < bussids.size:
                bldarray = bldgids.repeat(1+ceil((bussids.size - bldgids.size)/float(bldgids.size)) )
            else:
                bldarray = bldgids
            shuffle(bldarray) # randomly reorder in-place
            for ib in range(bussids.size):
                jidx = where(job_array_labels == bussids[ib])[0]
                job_building_id[jidx] = bldarray[ib]
                home_based[jidx[0:2]] = True
        logger.log_status("10. %s jobs (%s businesses) in 3-30 worker x multiple residential building. %s jobs assigned as home-based." % (
            business_sizes[idx_sngl_wrk_fit].sum(), idx_sngl_wrk_fit.size, idx_sngl_wrk_fit.size*2))        


        # 11. single workplace, 3+ workers in single non-res or mixed-use building (11.)
        idx_sngl_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2), 
                                                            logical_or(business_codes==3, business_codes==5)),
                                                business_nworkplaces==1))[0]
        which_labels = where(in1d(job_array_labels, business_ids[idx_sngl_wrkplace_2plus_workers]))[0]
        job_building_id[which_labels] = business_location1wrkpl[idx_sngl_wrkplace_2plus_workers].repeat(business_sizes[idx_sngl_wrkplace_2plus_workers])   
        job_assignment_case[which_labels] = 11
        processed_bindicator[idx_sngl_wrkplace_2plus_workers] = True
        logger.log_status("11. %s jobs (%s businesses) could be placed due to single workplace x 3+ workers x single non-res/mixed-use building fit." % (
            business_sizes[idx_sngl_wrkplace_2plus_workers].sum(), idx_sngl_wrkplace_2plus_workers.size))
        
        # 12. single workplace, 3+ workers in multiple mixed-type building
        idx_sngl_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2),
                                                                        logical_or(business_codes==4, business_codes==6)),
                                                            business_nworkplaces==1))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrkplace_2plus_workers])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrkplace_2plus_workers].repeat(business_sizes[idx_sngl_wrkplace_2plus_workers])    
        job_assignment_case[jidx] = 12
        processed_bindicator[idx_sngl_wrkplace_2plus_workers] = True
        logger.log_status("12. %s jobs (%s businesses) could be placed due to single workplace x 3+ workers x multiple non-res/mixed building fit." % (
            business_sizes[idx_sngl_wrkplace_2plus_workers].sum(), idx_sngl_wrkplace_2plus_workers.size))

        # 13. multiple workplaces, 3+ workers in single non-res or mixed building
        idx_mult_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2),
                                                                        logical_or(business_codes==3, business_codes==5)),
                                                            business_nworkplaces > 1))[0]
        jidx = in1d(job_array_labels, business_ids[idx_mult_wrkplace_2plus_workers])
        job_building_id[jidx] = business_location1wrkpl[idx_mult_wrkplace_2plus_workers].repeat(business_sizes[idx_mult_wrkplace_2plus_workers])
        job_assignment_case[jidx] = 13
        processed_bindicator[idx_mult_wrkplace_2plus_workers] = True
        logger.log_status("13. %s jobs (%s businesses) could be placed due to multiple workplaces x 3+ workers x single non-res/mixed building fit." % (
            business_sizes[idx_mult_wrkplace_2plus_workers].sum(), idx_mult_wrkplace_2plus_workers.size))
        
        # 14. multiple workplaces, 3+ workers in multiple non-res or mixed building
        idx_mult_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2),
                                                                        logical_or(business_codes==4, business_codes==6)),
                                                            business_nworkplaces > 1))[0]
        processed_bindicator[idx_mult_wrkplace_2plus_workers] = True
        # sample buildings to businesses by parcels 
        bpcls = unique(businesses["parcel_id"][idx_mult_wrkplace_2plus_workers])
        #hbasedsum = home_based.sum()
        for ipcl in range(bpcls.size):
            bldgids = buildings['building_id'][buildings['parcel_id'] == bpcls[ipcl]]
            bussids = intersect1d(business_ids[businesses["parcel_id"] == bpcls[ipcl]], business_ids[idx_mult_wrkplace_2plus_workers])
            if bldgids.size < bussids.size:
                bldarray = bldgids.repeat(1+ceil((bussids.size - bldgids.size)/float(bldgids.size)))
            else:
                bldarray = bldgids
            shuffle(bldarray) # randomly reorder in-place
            is_res = in1d(bldarray, restypes)
            for ib in range(bussids.size):
                jidx = where(job_array_labels == bussids[ib])
                job_building_id[jidx] = bldarray[ib]
                #home_based[jidx] = is_res
                job_assignment_case[jidx] = 14
        logger.log_status("14. %s jobs (%s businesses) could be placed due to multiple workplaces x 3+ workers x multiple non-res/mixed building fit." % (
            business_sizes[idx_mult_wrkplace_2plus_workers].sum(), idx_mult_wrkplace_2plus_workers.size))
        
        
        # 15. 3+ workers in residential parcel with no building
        idx_wrk_vacant_res = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 2), business_codes == 7))[0]
        jidx = in1d(job_array_labels, business_ids[idx_wrk_vacant_res])
        job_assignment_case[jidx] = 15
        processed_bindicator[idx_wrk_vacant_res] = True
        logger.log_status("15. %s jobs (%s businesses of 3+ workers) could not be placed due to non-existing buildings in parcels with residential LU type." % (
            business_sizes[idx_wrk_vacant_res].sum(), idx_wrk_vacant_res.size))
        
        # 16. nonresidential parcel with no building
        idx_wrk_vacant_nonres = where(logical_and(processed_bindicator==0, business_codes == 8))[0]
        jidx = in1d(job_array_labels, business_ids[idx_wrk_vacant_nonres])
        job_assignment_case[jidx] = 16
        processed_bindicator[idx_wrk_vacant_nonres] = True
        logger.log_status("16. %s jobs (%s businesses) could not be placed due to non-existing buildings in parcels with non-esidential LU type." % (
            business_sizes[idx_wrk_vacant_nonres].sum(), idx_wrk_vacant_nonres.size))        
        
        # 17. 31+ workers in single residential building. Do not place - will go into ELCM.
        idx_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 30), business_codes == 1))[0]
        jidx = in1d(job_array_labels, business_ids[idx_wrk_fit])
        job_assignment_case[jidx] = 17
        processed_bindicator[idx_wrk_fit] = True        
        logger.log_status("17. %s jobs (%s businesses) in 31+ workers x single residential building." % (
            business_sizes[idx_wrk_fit].sum(), idx_wrk_fit.size))         
    
        # 18. 31+ workers in multiple residential buildings.
        idx_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 30), business_codes == 2))[0]
        jidx = in1d(job_array_labels, business_ids[idx_wrk_fit])
        job_assignment_case[jidx] = 18
        processed_bindicator[idx_wrk_fit] = True
        logger.log_status("18. %s jobs (%s businesses) in 31+ workers x multiple residential building." % (
            business_sizes[idx_wrk_fit].sum(), idx_wrk_fit.size))                

        # jobs in messy buildings
        idx_messy_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 0), business_codes == 0))[0]
        processed_bindicator[idx_messy_fit] = True
        logger.log_status("%s jobs (%s businesses) could not be placed due to messy buildings." % (
            business_sizes[idx_messy_fit].sum(), idx_messy_fit.size))         
         
        # build new buildings for jobs in cases 7, 8, 15 and 16
        jidx_no_bld = where(in1d(job_assignment_case, [7,8,15,16]))[0]
        bus = unique(job_array_labels[jidx_no_bld])
        bsidx = businesses.get_id_index(bus)
        # first create buildings for single workplaces per parcel
        single_workplace_idx = where(business_nworkplaces[bsidx] == 1)[0]
        newbld_parcel_id = businesses['parcel_id'][bsidx][single_workplace_idx]
        newbld_bt = sector2building_type(businesses['sector_id'][bsidx][single_workplace_idx])
        newbids = arange(buildings.get_id_attribute().max()+1, buildings.get_id_attribute().max()+single_workplace_idx.size+1)
        bbldid = zeros(bsidx.size, dtype='int32')
        bbldid[single_workplace_idx] = newbids
        # for parcels with multiple workplaces select the largest business to determine its building type
        mult_bsidx = bsidx[where(business_nworkplaces[bsidx] > 1)[0]]
        empty_parcels = businesses['parcel_id'][mult_bsidx]
        uempty_parcels = unique(empty_parcels)
        bsize_on_empty_pcl = ndmax(business_sizes[mult_bsidx], labels=empty_parcels, index=uempty_parcels)
        newbld2_sec = zeros(uempty_parcels.size, dtype='int32')
        newbids2 = arange(newbids.max()+1, newbids.max()+uempty_parcels.size+1)
        for ipcl in range(uempty_parcels.size):
            newbld2_sec[ipcl] = businesses['sector_id'][mult_bsidx][logical_and(businesses['parcel_id'][mult_bsidx] == uempty_parcels[ipcl], 
                                                                                business_sizes[mult_bsidx]==bsize_on_empty_pcl[ipcl])][0]
            this_bidx = where(businesses['parcel_id'][bsidx] == uempty_parcels[ipcl])
            bbldid[this_bidx] = newbids2[ipcl]
            
        newbld_parcel_id = concatenate((newbld_parcel_id, uempty_parcels))
        newbld_bt = concatenate((newbld_bt, sector2building_type(newbld2_sec)))    
        
        newbldgs = {'building_id': concatenate((newbids, newbids2)),
                    'parcel_id': newbld_parcel_id,
                    'building_type_id': newbld_bt,
                    }
        buildings.add_elements(newbldgs, require_all_attributes=False)
        jidx = where(in1d(job_array_labels, business_ids[bsidx]))[0]
        job_building_id[jidx] = bbldid.repeat(business_sizes[bsidx])
        logger.log_status("Build %s new buildings to accommodate %s jobs (out of which %s are governmental) from cases 7, 15, 16." % (
            newbld_parcel_id.size, jidx.size, business_sizes[bsidx][where(in1d(businesses['sector_id'][bsidx], [18,19]))].sum()))
        
        
        logger.log_status("Assigned %s (%s percent) home-based jobs." % (home_based.sum(), round(home_based.sum()/(home_based.size/100.),2)))
        logger.log_status("Finished %s percent (%s) jobs (%s businesses) processed. %s jobs (%s businesses) remain to be processed." % \
                          (round(business_sizes[processed_bindicator].sum()/(home_based.size/100.),2),
                           business_sizes[processed_bindicator].sum(), processed_bindicator.sum(),
                          business_sizes[logical_not(processed_bindicator)].sum(), business_sizes[logical_not(processed_bindicator)].size))
        
        logger.start_block("Storing jobs data.")
        # create job dataset
        job_data = {"job_id": (arange(job_building_id.size)+1).astype("int32"),
                    "home_based_status" : home_based,
                    "building_id": job_building_id,
                    "business_id": job_array_labels.astype("int32"),
                    "sector_id": businesses['sector_id'].repeat(business_sizes).astype("int32"), 
                    "parcel_id": businesses['parcel_id'].repeat(business_sizes).astype("int32"), 
                    "assignment_case": job_assignment_case}

        # join with zones
        if zone_dsname is not None:
            zones = dataset_pool.get_dataset(zone_dsname)
            idname = zones.get_id_name()[0]
            #jpcls = buildings.get_attribute_by_id('parcel_id', job_building_id)
            job_data[idname] = parcels.get_attribute_by_id(idname, job_data["parcel_id"])
            
            
        dictstorage = StorageFactory().get_storage('dict_storage')
        dictstorage.write_table(table_name="jobs", table_data=job_data)
        jobs = Dataset(in_storage=dictstorage, in_table_name="jobs", dataset_name="job", id_name="job_id")
        if out_storage is not None:
            jobs.write_dataset(out_storage=out_storage, out_table_name="jobs")
            buildings.write_dataset(out_storage=out_storage, attributes=AttributeType.PRIMARY)
        logger.end_block()        
        return jobs
    def run(self, dataset1, dataset2, index1=None, index2=None, sample_size=10, weight=None,
            include_chosen_choice=False, with_replacement=False, resources=None, dataset_pool=None):
        
        """this function samples number of sample_size (scalar value) alternatives from dataset2
        for agent set specified by dataset1.
        If index1 is not None, only samples alterantives for agents with indices in index1;
        if index2 is not None, only samples alternatives from indices in index2.
        sample_size specifies number of alternatives to be sampled for each agent.
        weight, to be used as sampling weight, is either an attribute name of dataset2, or a 1d
        array of the same length as index2 or 2d array of shape (index1.size, index2.size).

        Also refer to document of interaction_dataset"""

        if dataset_pool is None:
            try:
                sc = SessionConfiguration()
                dataset_pool=sc.get_dataset_pool()
            except:
                dataset_pool = DatasetPool()
        
        local_resources = Resources(resources)
        local_resources.merge_if_not_None(
                {"dataset1": dataset1, "dataset2": dataset2,
                "index1":index1, "index2": index2,
                "sample_size": sample_size, "weight": weight,
                "with_replacement": with_replacement,
                "include_chosen_choice": include_chosen_choice})

        local_resources.check_obligatory_keys(['dataset1', 'dataset2', 'sample_size'])
        agent = local_resources["dataset1"]
        index1 = local_resources.get("index1", None)
        if index1 is None:
            index1 = arange(agent.size())
        choice = local_resources["dataset2"]
        index2 = local_resources.get("index2", None)
        if index2 is None:
            index2 = arange(choice.size())
            
        if index1.size == 0 or index2.size == 0:
            err_msg = "either choice size or agent size is zero, return None"
            logger.log_warning(err_msg)
            return None
        
        include_chosen_choice = local_resources.get("include_chosen_choice",  False)
        J = local_resources["sample_size"]
        if include_chosen_choice:
            J = J - 1
            
        with_replacement = local_resources.get("with_replacement")
            
        weight = local_resources.get("weight", None)
        if isinstance(weight, str):
            if weight in choice.get_known_attribute_names():
                weight=choice.get_attribute(weight)
                rank_of_weight = 1 
            elif VariableName(weight).get_dataset_name() == choice.get_dataset_name():
                weight=choice.compute_variables(weight, dataset_pool=dataset_pool)
                rank_of_weight = 1
            else:
                ## weights can be an interaction variable
                interaction_dataset = InteractionDataset(local_resources)
                weight=interaction_dataset.compute_variables(weight, dataset_pool=dataset_pool)
                rank_of_weight = 2
        elif isinstance(weight, ndarray):
            rank_of_weight = weight.ndim
        elif not weight:  ## weight is None or empty string
            weight = ones(index2.size)
            rank_of_weight = 1
        else:
            err_msg = "unkown weight type"
            logger.log_error(err_msg)
            raise TypeError, err_msg

        if (weight.size <> index2.size) and (weight.shape[rank_of_weight-1] <> index2.size):
            if weight.shape[rank_of_weight-1] == choice.size():
                if rank_of_weight == 1:
                    weight = take(weight, index2)
                if rank_of_weight == 2:
                    weight = take(weight, index2, axis=1)
            else:
                err_msg = "weight array size doesn't match to size of dataset2 or its index"
                logger.log_error(err_msg)
                raise ValueError, err_msg

        prob = normalize(weight)

        #chosen_choice = ones(index1.size) * UNPLACED_ID
        chosen_choice_id = agent.get_attribute(choice.get_id_name()[0])[index1]
        #index_of_placed_agent = where(greater(chosen_choice_id, UNPLACED_ID))[0]
        chosen_choice_index = choice.try_get_id_index(chosen_choice_id, return_value_if_not_found=UNPLACED_ID)
        chosen_choice_index_to_index2 = lookup(chosen_choice_index, index2, index_if_not_found=UNPLACED_ID)
        
        if rank_of_weight == 1: # if weight_array is 1d, then each agent shares the same weight for choices
            replace = with_replacement           # sampling with no replacement 
            if nonzerocounts(weight) < J:
                logger.log_warning("weight array dosen't have enough non-zero counts, use sample with replacement")
                replace = True
            sampled_index = prob2dsample( index2, sample_size=(index1.size, J),
                                        prob_array=prob, exclude_index=chosen_choice_index_to_index2,
                                        replace=replace, return_index=True )
            #return index2[sampled_index]

        if rank_of_weight == 2:
            sampled_index = zeros((index1.size,J), dtype="int32") - 1
                
            for i in range(index1.size):
                replace = with_replacement          # sampling with/without replacement
                i_prob = prob[i,:]
                if nonzerocounts(i_prob) < J:
                    logger.log_warning("weight array dosen't have enough non-zero counts, use sample with replacement")
                    replace = True

                #exclude_index passed to probsample_noreplace needs to be indexed to index2
                sampled_index[i,:] = probsample_noreplace( index2, sample_size=J, prob_array=i_prob,
                                                     exclude_index=chosen_choice_index_to_index2[i],
                                                     return_index=True )
        sampling_prob = take(prob, sampled_index)
        sampled_index = index2[sampled_index]
        is_chosen_choice = zeros(sampled_index.shape, dtype="bool")
        #chosen_choice = -1 * ones(chosen_choice_index.size, dtype="int32")
        if include_chosen_choice:
            sampled_index = column_stack((chosen_choice_index[:,newaxis],sampled_index))
            is_chosen_choice = zeros(sampled_index.shape, dtype="bool")
            is_chosen_choice[chosen_choice_index!=UNPLACED_ID, 0] = 1
            #chosen_choice[where(is_chosen_choice)[0]] = where(is_chosen_choice)[1]
            ## this is necessary because prob is indexed to index2, not to the choice set (as is chosen_choice_index)
            sampling_prob_for_chosen_choices = take(prob, chosen_choice_index_to_index2[:, newaxis])
            ## if chosen choice chosen equals unplaced_id then the sampling prob is 0
            sampling_prob_for_chosen_choices[where(chosen_choice_index==UNPLACED_ID)[0],] = 0.0
            sampling_prob = column_stack([sampling_prob_for_chosen_choices, sampling_prob])
        
        interaction_dataset = self.create_interaction_dataset(dataset1, dataset2, index1, sampled_index)
        interaction_dataset.add_attribute(sampling_prob, '__sampling_probability')
        interaction_dataset.add_attribute(is_chosen_choice, 'chosen_choice')
        
        ## to get the older returns
        #sampled_index = interaction_dataset.get_2d_index()
        #chosen_choices = UNPLACED_ID * ones(index1.size, dtype="int32") 
        #where_chosen = where(interaction_dataset.get_attribute("chosen_choice"))
        #chosen_choices[where_chosen[0]]=where_chosen[1]
        #return (sampled_index, chosen_choice)
        
        return interaction_dataset
Example #19
0
    def _add(self, agents_pool, amount, 
             agent_dataset, location_dataset, 
             this_refinement,
             dataset_pool ):
        
        fit_index = self.get_fit_agents_index(agent_dataset, 
                                              this_refinement.agent_expression, 
                                              this_refinement.location_expression,
                                              dataset_pool)
        if this_refinement.agent_expression is not None and len(this_refinement.agent_expression) > 0:
            agents_index = where(agent_dataset.compute_variables(this_refinement.agent_expression, 
                                                               dataset_pool=dataset_pool)>0)[0]
        else:
            agents_index = arange(agent_dataset.size())
        movers_index = array([],dtype="int32")
        ar_pool = array(agents_pool)
        fitted_agents_pool = ar_pool[in1d(ar_pool, agents_index)]
        amount_from_agents_pool = min( amount, fitted_agents_pool.size )
        prob_string = self.probability_attributes.get(agent_dataset.get_dataset_name(),None)
        if prob_string is not None:
            probs_values = (agent_dataset.compute_variables([prob_string], dataset_pool=dataset_pool)).astype('int32')
            uprobs_values = unique(probs_values[fit_index])
            if uprobs_values.size > 0:
                probs_existing = array(ndimage_sum(ones(fit_index.size), 
                                         labels=probs_values[fit_index], index=uprobs_values))
        if amount_from_agents_pool > 0:        
            if prob_string is not None and uprobs_values.size > 0:                
                prob_pool_values = probs_values[fitted_agents_pool]
                probs_pool=zeros(prob_pool_values.size)
                for i in range(uprobs_values.size):
                    probpoolidx = where(prob_pool_values == uprobs_values[i])[0]
                    if probpoolidx.size == 0:
                        continue
                    probs_pool[probpoolidx]=probs_existing[i]/float(probpoolidx.size)
                probs_pool[probs_pool<=0] = (probs_existing.min()/10.0)/float((probs_pool<=0).sum())
            else:
                probs_pool=ones(fitted_agents_pool.size)
            
            agents_index_from_agents_pool = probsample_noreplace( fitted_agents_pool, amount_from_agents_pool, prob_array=probs_pool )
            [ agents_pool.remove(i) for i in agents_index_from_agents_pool ]
            if fit_index.size == 0:
                ##cannot find agents to copy their location or clone them, place agents in agents_pool
                if amount > amount_from_agents_pool:                   
                    logger.log_warning("Refinement requests to add %i agents,  but there are only %i agents subtracted from previous action(s) and no agents satisfying %s to clone from;" \
                                   "add %i agents instead" % (amount, amount_from_agents_pool, 
                                                              ' and '.join( [this_refinement.agent_expression, 
                                                                           this_refinement.location_expression]).strip(' and '), 
                                                              amount_from_agents_pool,) )
                    amount = amount_from_agents_pool
                # sample from all suitable locations
                is_suitable_location = location_dataset.compute_variables( this_refinement.location_expression,
                                                                           dataset_pool=dataset_pool )
                location_id_for_agents_pool = sample_replace( location_dataset.get_id_attribute()[is_suitable_location],
                                                                 amount_from_agents_pool )
            else:
                #sample from locations of suitable agents            
                agents_index_for_location = sample_replace( fit_index, amount_from_agents_pool)
                location_id_for_agents_pool = agent_dataset.get_attribute( location_dataset.get_id_name()[0] 
                                                                         )[agents_index_for_location]
                movers_index = concatenate( (movers_index, agents_index_for_location) )

        elif fit_index.size == 0:
            ## no agents in agents_pool and no agents to clone either, --> fail
            logger.log_error( "Action 'add' failed: there is no agent subtracted from previous action, and no suitable agents satisfying %s to clone from." % \
                              ' and '.join( [this_refinement.agent_expression, this_refinement.location_expression] ).strip('and') )
            return
            
        if amount > amount_from_agents_pool:
            agents_index_to_clone = sample_replace( fit_index, amount - amount_from_agents_pool)
            movers_index = concatenate( (movers_index, agents_index_to_clone) )

        if movers_index.size > 0 and this_refinement.location_capacity_attribute is not None and len(this_refinement.location_capacity_attribute) > 0:
            movers_location_id = agent_dataset.get_attribute( location_dataset.get_id_name()[0] )[movers_index]
            movers_location_index = location_dataset.get_id_index( movers_location_id )
            # see previous comment about histogram function
            num_of_movers_by_location = histogram( movers_location_index, bins=arange(location_dataset.size() +1) )[0]
            num_of_agents_by_location = location_dataset.compute_variables( "number_of_agents=%s.number_of_agents(%s)" % \
                                                                            ( location_dataset.dataset_name,
                                                                            agent_dataset.dataset_name ),
                                                                            dataset_pool=dataset_pool)
            
            expand_factor = safe_array_divide( (num_of_agents_by_location + num_of_movers_by_location ).astype('float32'),
                                                num_of_agents_by_location, return_value_if_denominator_is_zero = 1.0 )
            new_values = round_( expand_factor * location_dataset.get_attribute(this_refinement.location_capacity_attribute) )
            location_dataset.modify_attribute( this_refinement.location_capacity_attribute, 
                                               new_values
                                           )
            self._add_refinement_info_to_dataset(location_dataset, self.id_names, this_refinement, index=movers_location_index)
        if amount_from_agents_pool > 0:
            agent_dataset.modify_attribute( 'building_id',
                                            -1 * ones( agents_index_from_agents_pool.size, dtype='int32' ),
                                            agents_index_from_agents_pool
                                            )
            agent_dataset.modify_attribute( location_dataset.get_id_name()[0],
                                            location_id_for_agents_pool,
                                            agents_index_from_agents_pool
                                            )

            self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=agents_index_from_agents_pool)
            self.processed_locations['add'] = concatenate((self.processed_locations.get('add', array([])), 
                                                unique(location_dataset[self.subarea_id_name][location_dataset.get_id_index(location_id_for_agents_pool)])))
            
        if amount > amount_from_agents_pool:
            new_agents_index = agent_dataset.duplicate_rows(agents_index_to_clone)
            self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=agents_index_to_clone)
            self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=new_agents_index)
            if location_dataset.get_dataset_name() <> 'building':
                agent_dataset.modify_attribute( 'building_id',
                                            -1 * ones( new_agents_index.size, dtype='int32' ),
                                            new_agents_index
                                            )
            self.processed_locations['add'] = concatenate((self.processed_locations.get('add', array([])), 
                                                unique(agent_dataset[self.subarea_id_name][new_agents_index])))
Example #20
0
    def run(self, n=500, run_config=None, current_year=None, debuglevel=0):
        """
        n - sample n proposals at a time, evaluate them one by one
        """
        self.demolished_buildings = array(
            [], dtype='int32')  #id of buildings to be demolished
        if current_year is None:
            current_year = SimulationState().get_current_time()
        if not self.positive_proposals:
            logger.log_status(
                "Proposal Set size <= 0, no proposals to consider, skipping DPPSM."
            )
            return (self.proposal_set, self.demolished_buildings)
        self.proposal_component_set.compute_variables([
            'urbansim_parcel.development_project_proposal_component.units_proposed',
            'urbansim_parcel.development_project_proposal_component.is_residential'
        ],
                                                      dataset_pool=self.
                                                      dataset_pool)
        self.proposal_set.compute_variables(
            [
                'urbansim_parcel.development_project_proposal.number_of_components',
                'zone_id=development_project_proposal.disaggregate(parcel.zone_id)',
                #'occurence_frequency = development_project_proposal.disaggregate(development_template.sample_size)'
            ],
            dataset_pool=self.dataset_pool)
        buildings = self.dataset_pool.get_dataset("building")
        buildings.compute_variables(
            [
                "occupied_units_for_jobs = urbansim_parcel.building.number_of_non_home_based_jobs",
                "units_for_jobs = urbansim_parcel.building.total_non_home_based_job_space",
                "occupied_residential_units = urbansim_parcel.building.number_of_households",
                #                                "urbansim_parcel.building.existing_units",
                "urbansim_parcel.building.is_residential"
            ],
            dataset_pool=self.dataset_pool)

        ## define unit_name by whether a building is residential or not (with is_residential attribute)
        ## if it is non-residential (0), count units by number of job spaces (units_for_jobs)
        ## if it is residential (1), count units by residenital units
        self.unit_name = array(["units_for_jobs", "residential_units"])

        target_vacancy = self.dataset_pool.get_dataset('target_vacancy')
        target_vacancy.compute_variables([
            'is_residential = target_vacancy.disaggregate(building_type.is_residential)'
        ],
                                         dataset_pool=self.dataset_pool)
        # This try-except block checks to see if the object has a subarea_id_name,
        # if it does, it calculates the vacancy rates by subarea_id_name
        try:
            # Check for subarea_id_name in target_vacancies dataset
            # if it is present, vacancy rates are specified by subarea_id_name
            # if it is not, vacancy rates are specified region wide
            target_vacancy.load_dataset()
            if self.subarea_id_name in target_vacancy.get_attribute_names():
                current_target_vacancy_this_year = DatasetSubset(
                    target_vacancy,
                    index=where(
                        target_vacancy.get_attribute("year") ==
                        current_year)[0])
                current_target_vacancy = DatasetSubset(
                    current_target_vacancy_this_year,
                    index=where(
                        current_target_vacancy_this_year.get_attribute(
                            self.subarea_id_name) == self.area_id)[0])
            else:
                current_target_vacancy = DatasetSubset(
                    target_vacancy,
                    index=where(
                        target_vacancy.get_attribute("year") ==
                        current_year)[0])
        except AttributeError:
            # vacancy rates are specified region wide:
            current_target_vacancy = DatasetSubset(
                target_vacancy,
                index=where(
                    target_vacancy.get_attribute("year") == current_year)[0])

        if current_target_vacancy.size() == 0:
            raise IOError, 'No target vacancy defined for year %s.' % current_year

        self.existing_units = {}  #total existing units by land_use type
        self.occupied_units = {}  #total occupied units by land_use type
        self.proposed_units = {}  #total proposed units by land_use type
        self.demolished_units = {
        }  #total (to be) demolished units by land_use type

        components_building_type_ids = self.proposal_component_set.get_attribute(
            "building_type_id").astype("int32")
        proposal_ids = self.proposal_set.get_id_attribute()
        proposal_ids_in_component_set = self.proposal_component_set.get_attribute(
            "proposal_id")
        all_units_proposed = self.proposal_component_set.get_attribute(
            "units_proposed")
        number_of_components_in_proposals = self.proposal_set.get_attribute(
            "number_of_components")

        self.accepting_proposals = zeros(
            current_target_vacancy.get_attribute("building_type_id").max() + 1,
            dtype='bool8'
        )  #whether accepting new proposals, for each building type
        self.accepted_proposals = []  # index of accepted proposals

        self.target_vacancies = {}
        tv_building_types = current_target_vacancy.get_attribute(
            "building_type_id")
        tv_rate = current_target_vacancy.get_attribute("target_vacancy_rate")
        for itype in range(tv_building_types.size):
            self.target_vacancies[tv_building_types[itype]] = tv_rate[itype]

        self.check_vacancy_rates(
            current_target_vacancy
        )  #initialize self.accepting_proposal based on current vacancy rate

        sqft_per_job = self.dataset_pool.get_dataset("building_sqft_per_job")
        zones_of_proposals = self.proposal_set.get_attribute("zone_id")
        self.building_sqft_per_job_table = sqft_per_job.get_building_sqft_as_table(
            zones_of_proposals.max(), tv_building_types.max())
        # consider only those proposals that have all components of accepted type and sum of proposed units > 0
        is_accepted_type = self.accepting_proposals[
            components_building_type_ids]
        sum_is_accepted_type_over_proposals = array(
            ndimage.sum(is_accepted_type,
                        labels=proposal_ids_in_component_set,
                        index=proposal_ids))
        sum_of_units_proposed = array(
            ndimage.sum(all_units_proposed,
                        labels=proposal_ids_in_component_set,
                        index=proposal_ids))
        is_proposal_eligible = logical_and(
            sum_is_accepted_type_over_proposals ==
            number_of_components_in_proposals, sum_of_units_proposed > 0)

        is_proposal_eligible = logical_and(
            is_proposal_eligible,
            self.proposal_set.get_attribute("start_year") == current_year)
        ## handle planned proposals: all proposals with status_id == is_planned
        ## and start_year == current_year are accepted
        planned_proposal_indexes = where(
            logical_and(
                self.proposal_set.get_attribute(
                    "status_id") == self.proposal_set.id_planned,
                self.proposal_set.get_attribute("start_year") ==
                current_year))[0]

        self.consider_proposals(planned_proposal_indexes, force_accepting=True)
        # consider proposals (in this order: planned, proposed, tentative)
        for status in [
                self.proposal_set.id_proposed, self.proposal_set.id_tentative
        ]:
            idx = where(
                logical_and(
                    self.proposal_set.get_attribute("status_id") == status,
                    is_proposal_eligible))[0]
            if idx.size <= 0:
                continue
            logger.log_status(
                "Sampling from %s eligible proposals with status %s." %
                (idx.size, status))
            while (True in self.accepting_proposals):
                if self.weight[idx].sum() == 0.0:
                    logger.log_warning(
                        "Running out of proposals; there aren't any proposals with non-zero weight"
                    )
                    break

                idx = idx[self.weight[idx] > 0]
                n = minimum(idx.size, n)
                sampled_proposal_indexes = probsample_noreplace(
                    proposal_ids[idx],
                    n,
                    prob_array=(self.weight[idx] /
                                float(self.weight[idx].sum())),
                    exclude_index=None,
                    return_index=True)
                self.consider_proposals(
                    arange(self.proposal_set.size())[
                        idx[sampled_proposal_indexes]])
                self.weight[idx[sampled_proposal_indexes]] = 0

        # set status of accepted proposals to 'active'
        self.proposal_set.modify_attribute(name="status_id",
                                           data=self.proposal_set.id_active,
                                           index=array(self.accepted_proposals,
                                                       dtype='int32'))
        building_types = self.dataset_pool.get_dataset("building_type")
        logger.log_status("Status of %s development proposals set to active." %
                          len(self.accepted_proposals))
        logger.log_status(
            "Target/existing vacancy rates (reached using eligible proposals) by building type:"
        )
        for type_id in self.existing_units.keys():
            units_stock = self._get_units_stock(type_id)
            vr = self._get_vacancy_rates(type_id)
            ## units = residential_units if building_type is residential
            ## units = number of job spaces if building_type is non-residential
            logger.log_status(
                              """%(type_id)s[%(type_name)s]: %(vr)s = ((existing_units:%(existing_units)s + 
                              units_proposed:%(units_proposed)s - units_to_be_demolished:%(units_demolished)s) 
                              - units_occupied:%(units_occupied)s) / units_stock:%(units_stock)s""" %  \
                                          { 'type_id': type_id,
                                            'type_name': building_types.get_attribute_by_id("building_type_name", type_id),
                                            'vr':  vr,
                                            'existing_units': int(self.existing_units[type_id]),
                                            'units_occupied': int(self.occupied_units[type_id]),
                                            'units_proposed': int(self.proposed_units[type_id]),
                                            'units_demolished': int(self.demolished_units[type_id]),
                                            'units_stock': int(units_stock)
                                          }
                            )
        # Code added by Jesse Ayers, MAG, 7/20/2009
        # Get the active projects:
        stat_id = self.proposal_set.get_attribute('status_id')
        actv = where(stat_id == 1)[0]
        # Where there are active projects, compute the total_land_area_taken
        # and store it on the development_project_proposals dataset
        # so it can be used by the building_construction_model for the proper
        # computation of units_proposed for those projects with velocity curves
        if actv.size > 0:
            total_land_area_taken_computed = self.proposal_set.get_attribute(
                'urbansim_parcel.development_project_proposal.land_area_taken')
            self.proposal_set.modify_attribute(
                'total_land_area_taken', total_land_area_taken_computed[actv],
                actv)

        return (self.proposal_set, self.demolished_buildings)
    def run(self, n=500, run_config=None, current_year=None, debuglevel=0):
        """
        n - sample n proposals at a time, evaluate them one by one
        """
        self.demolished_buildings = array([], dtype='int32')  #id of buildings to be demolished
        if current_year is None:
            current_year = SimulationState().get_current_time()
        if not self.positive_proposals:
            logger.log_status("Proposal Set size <= 0, no proposals to consider, skipping DPPSM.")
            return (self.proposal_set, self.demolished_buildings) 
        self.proposal_component_set.compute_variables([
            'urbansim_parcel.development_project_proposal_component.units_proposed',
            'urbansim_parcel.development_project_proposal_component.is_residential'],
                                        dataset_pool=self.dataset_pool)
        self.proposal_set.compute_variables([
            'urbansim_parcel.development_project_proposal.number_of_components',
            'zone_id=development_project_proposal.disaggregate(parcel.zone_id)',
            #'occurence_frequency = development_project_proposal.disaggregate(development_template.sample_size)'
            ],
                                        dataset_pool=self.dataset_pool)
        buildings = self.dataset_pool.get_dataset("building")
        buildings.compute_variables([
                                "occupied_units_for_jobs = urbansim_parcel.building.number_of_non_home_based_jobs",
                                "units_for_jobs = urbansim_parcel.building.total_non_home_based_job_space",
                                "occupied_residential_units = urbansim_parcel.building.number_of_households",
#                                "urbansim_parcel.building.existing_units",
                                "urbansim_parcel.building.is_residential"
                                    ],
                                    dataset_pool=self.dataset_pool)

        ## define unit_name by whether a building is residential or not (with is_residential attribute)
        ## if it is non-residential (0), count units by number of job spaces (units_for_jobs)
        ## if it is residential (1), count units by residenital units
        self.unit_name = array(["units_for_jobs", "residential_units"])
                
        target_vacancy = self.dataset_pool.get_dataset('target_vacancy')
        target_vacancy.compute_variables(['is_residential = target_vacancy.disaggregate(building_type.is_residential)'],
                                         dataset_pool=self.dataset_pool)
        # This try-except block checks to see if the object has a subarea_id_name,
        # if it does, it calculates the vacancy rates by subarea_id_name
        try:
            # Check for subarea_id_name in target_vacancies dataset
            # if it is present, vacancy rates are specified by subarea_id_name
            # if it is not, vacancy rates are specified region wide
            target_vacancy.load_dataset()
            if self.subarea_id_name in target_vacancy.get_attribute_names():
                current_target_vacancy_this_year = DatasetSubset(target_vacancy, index=where(target_vacancy.get_attribute("year")==current_year)[0])
                current_target_vacancy = DatasetSubset(current_target_vacancy_this_year, index=where(current_target_vacancy_this_year.get_attribute(self.subarea_id_name)==self.area_id)[0])
            else:
                current_target_vacancy = DatasetSubset(target_vacancy, index=where(target_vacancy.get_attribute("year")==current_year)[0])
        except AttributeError:
            # vacancy rates are specified region wide:
            current_target_vacancy = DatasetSubset(target_vacancy, index=where(target_vacancy.get_attribute("year")==current_year)[0])

        if current_target_vacancy.size() == 0:
            raise IOError, 'No target vacancy defined for year %s.' % current_year
        
        self.existing_units = {}   #total existing units by land_use type
        self.occupied_units = {}   #total occupied units by land_use type
        self.proposed_units = {}   #total proposed units by land_use type
        self.demolished_units = {} #total (to be) demolished units by land_use type

        components_building_type_ids = self.proposal_component_set.get_attribute("building_type_id").astype("int32")
        proposal_ids = self.proposal_set.get_id_attribute()
        proposal_ids_in_component_set = self.proposal_component_set.get_attribute("proposal_id")
        all_units_proposed = self.proposal_component_set.get_attribute("units_proposed")
        number_of_components_in_proposals = self.proposal_set.get_attribute("number_of_components")
        
        self.accepting_proposals = zeros(current_target_vacancy.get_attribute("building_type_id").max()+1, dtype='bool8')  #whether accepting new proposals, for each building type
        self.accepted_proposals = [] # index of accepted proposals

        self.target_vacancies = {}
        tv_building_types = current_target_vacancy.get_attribute("building_type_id")
        tv_rate = current_target_vacancy.get_attribute("target_vacancy_rate")
        for itype in range(tv_building_types.size):
            self.target_vacancies[tv_building_types[itype]] = tv_rate[itype]
            
        self.check_vacancy_rates(current_target_vacancy)  #initialize self.accepting_proposal based on current vacancy rate

        sqft_per_job = self.dataset_pool.get_dataset("building_sqft_per_job")
        zones_of_proposals = self.proposal_set.get_attribute("zone_id")
        self.building_sqft_per_job_table = sqft_per_job.get_building_sqft_as_table(zones_of_proposals.max(), 
                                                                                   tv_building_types.max())
        # consider only those proposals that have all components of accepted type and sum of proposed units > 0
        is_accepted_type = self.accepting_proposals[components_building_type_ids]
        sum_is_accepted_type_over_proposals = array(ndimage.sum(is_accepted_type, labels = proposal_ids_in_component_set, 
                                                          index = proposal_ids))
        sum_of_units_proposed = array(ndimage.sum(all_units_proposed, labels = proposal_ids_in_component_set, 
                                                          index = proposal_ids))
        is_proposal_eligible = logical_and(sum_is_accepted_type_over_proposals == number_of_components_in_proposals,
                                           sum_of_units_proposed > 0)

        is_proposal_eligible = logical_and(is_proposal_eligible,
                                           self.proposal_set.get_attribute("start_year")==current_year )
        ## handle planned proposals: all proposals with status_id == is_planned 
        ## and start_year == current_year are accepted
        planned_proposal_indexes = where(logical_and(
                                                  self.proposal_set.get_attribute("status_id") == self.proposal_set.id_planned, 
                                                  self.proposal_set.get_attribute("start_year") == current_year ) 
                                        )[0] 
                                   
        self.consider_proposals(planned_proposal_indexes, force_accepting=True)
        # consider proposals (in this order: planned, proposed, tentative)
        for status in [self.proposal_set.id_proposed, self.proposal_set.id_tentative]:
            idx = where(logical_and(self.proposal_set.get_attribute("status_id") == status, is_proposal_eligible))[0]
            if idx.size <= 0:
                continue
            logger.log_status("Sampling from %s eligible proposals with status %s." % (idx.size, status))
            while (True in self.accepting_proposals):
                if self.weight[idx].sum() == 0.0:
                    logger.log_warning("Running out of proposals; there aren't any proposals with non-zero weight")
                    break
                
                idx = idx[self.weight[idx] > 0]
                n = minimum(idx.size, n)
                sampled_proposal_indexes = probsample_noreplace(proposal_ids[idx], n, 
                                                prob_array=(self.weight[idx]/float(self.weight[idx].sum())),                                                                
                                                exclude_index=None, return_index=True)
                self.consider_proposals(arange(self.proposal_set.size())[idx[sampled_proposal_indexes]])
                self.weight[idx[sampled_proposal_indexes]] = 0

        # set status of accepted proposals to 'active'
        self.proposal_set.modify_attribute(name="status_id", data=self.proposal_set.id_active,
                                          index=array(self.accepted_proposals, dtype='int32'))
        building_types = self.dataset_pool.get_dataset("building_type")
        logger.log_status("Status of %s development proposals set to active." % len(self.accepted_proposals))
        logger.log_status("Target/existing vacancy rates (reached using eligible proposals) by building type:")
        for type_id in self.existing_units.keys():
            units_stock = self._get_units_stock(type_id)
            vr = self._get_vacancy_rates(type_id)
            ## units = residential_units if building_type is residential
            ## units = number of job spaces if building_type is non-residential
            logger.log_status(
                              """%(type_id)s[%(type_name)s]: %(vr)s = ((existing_units:%(existing_units)s + 
                              units_proposed:%(units_proposed)s - units_to_be_demolished:%(units_demolished)s) 
                              - units_occupied:%(units_occupied)s) / units_stock:%(units_stock)s""" %  \
                                          { 'type_id': type_id,
                                            'type_name': building_types.get_attribute_by_id("building_type_name", type_id),
                                            'vr':  vr,
                                            'existing_units': int(self.existing_units[type_id]),
                                            'units_occupied': int(self.occupied_units[type_id]),
                                            'units_proposed': int(self.proposed_units[type_id]),
                                            'units_demolished': int(self.demolished_units[type_id]),
                                            'units_stock': int(units_stock)
                                          }
                            )
        # Code added by Jesse Ayers, MAG, 7/20/2009
        # Get the active projects:
        stat_id = self.proposal_set.get_attribute('status_id')
        actv = where(stat_id==1)[0]
        # Where there are active projects, compute the total_land_area_taken
        # and store it on the development_project_proposals dataset
        # so it can be used by the building_construction_model for the proper
        # computation of units_proposed for those projects with velocity curves
        if actv.size > 0:          
            total_land_area_taken_computed = self.proposal_set.get_attribute('urbansim_parcel.development_project_proposal.land_area_taken')
            self.proposal_set.modify_attribute('total_land_area_taken', total_land_area_taken_computed[actv], actv)

        return (self.proposal_set, self.demolished_buildings) 
    def run(self, job_dataset, dataset_pool, out_storage=None, jobs_table="jobs"):
        """
        Algorithm:
            1. For all non_home_based jobs that have parcel_id assigned but no building_id, try
                to choose a building from all buildings in that parcel. Draw the building with probabilities
                given by the sector-building_type distribution. The job sizes are
                fitted into the available space (the attribute job.sqft is updated).
            2. For all non_home_based jobs for which no building was found in step 1, check
                if the parcel has residential buildings. In such a case, re-assign the jobs to be
                home-based.
                Otherwise, if sum of non_residential_sqft over the involved buildings is 0,
                for all jobs that have impute_building_sqft_flag=True draw a building using
                the sector-building_type distribution and impute the corresponding sqft to 
                the non_residential_sqft of that building.
            3. For all home_based jobs that have parcel_id assigned but no building_id, try
                to choose a building from all buildings in that parcel. 
                The capacity of a single-family building is determined from sizes of the households living there 
                (for each household the minimum of number of members and 2 is taken). 
                For multi-family buildings the capacity is 50.
            4. Assign a building type to jobs that have missing building type. It is sampled 
                from the regional-wide distribution of home based and non-home based jobs.
            5. Update the table 'building_sqft_per_job' using the updated job.sqft.
        'in_storage' should contain the jobs table and the zone_averages_table. The 'dataset_pool_storage'
        should contain all other tables needed (buildings, households, building_types). 
        """
        parcel_ids = job_dataset.get_attribute("parcel_id")
        building_ids = job_dataset.get_attribute("building_id")
        building_types = job_dataset.get_attribute("building_type")
        try:
            impute_sqft_flags = job_dataset.get_attribute("impute_building_sqft_flag")
        except:
            impute_sqft_flags = zeros(job_dataset.size())
        is_considered = logical_and(parcel_ids > 0, building_ids <= 0) # jobs that have assigned parcel but not building
        job_index_home_based = where(logical_and(is_considered, building_types == 1))[0]
        job_index_governmental = where(logical_and(is_considered, building_types == 3))[0]
        
        building_dataset = dataset_pool.get_dataset('building')
        parcel_ids_in_bldgs = building_dataset.get_attribute("parcel_id")
        bldg_ids_in_bldgs = building_dataset.get_id_attribute()
        bldg_types_in_bldgs = building_dataset.get_attribute("building_type_id")
        
        non_res_sqft = building_dataset.get_attribute("non_residential_sqft")
        occupied = building_dataset.compute_variables(["urbansim_parcel.building.occupied_building_sqft_by_jobs"],
                                                                     dataset_pool=dataset_pool)
        is_governmental = building_dataset.compute_variables(["building.disaggregate(building_type.generic_building_type_id == 7)"],
                                                                     dataset_pool=dataset_pool)
        
        # assign buildings to governmental jobs randomly
        unique_parcels = unique(parcel_ids[job_index_governmental])
        logger.log_status("Placing governmental jobs ...")
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs[is_governmental] == parcel)[0]
            if idx_in_bldgs.size <= 0:
                continue
            idx_in_jobs = where(parcel_ids[job_index_governmental] == parcel)[0]
            draw = sample_replace(idx_in_bldgs, idx_in_jobs.size)
            building_ids[job_index_governmental[idx_in_jobs]] = bldg_ids_in_bldgs[where(is_governmental)[0][draw]]
        logger.log_status("%s governmental jobs (out of %s gov. jobs) were placed." % (
                                                                (building_ids[job_index_governmental]>0).sum(),
                                                                 job_index_governmental.size))
        logger.log_status("The not-placed governmental jobs will be added to the non-home based jobs.")
        
        # consider the unplaced governmental jobs together with other non-home-based jobs
        is_now_considered = logical_and(is_considered, building_ids <= 0)
        job_index_non_home_based = where(logical_and(is_now_considered, logical_or(building_types == 2, building_types == 3)))[0]
                                    
        # assign buildings to non_home_based jobs based on available space
        unique_parcels = unique(parcel_ids[job_index_non_home_based])
        job_building_types = job_dataset.compute_variables(["bldgs_building_type_id = job.disaggregate(building.building_type_id)"], 
                                                           dataset_pool=dataset_pool)
        where_valid_jbt = where(logical_and(job_building_types>0, logical_or(building_types == 2, building_types==3)))[0]
        building_type_dataset = dataset_pool.get_dataset("building_type")
        available_building_types= building_type_dataset.get_id_attribute()
        idx_available_bt = building_type_dataset.get_id_index(available_building_types)
        sectors = job_dataset.get_attribute("sector_id")
        unique_sectors = unique(sectors)
        sector_bt_distribution = zeros((unique_sectors.size, building_type_dataset.size()), dtype="float32")
        
        jobs_sqft = job_dataset.get_attribute_by_index("sqft", job_index_non_home_based).astype("float32")
        job_dataset._compute_if_needed("urbansim_parcel.job.zone_id", dataset_pool=dataset_pool) 
        jobs_zones = job_dataset.get_attribute_by_index("zone_id", job_index_non_home_based)
        new_jobs_sqft = job_dataset.get_attribute("sqft").copy()
        
        # find sector -> building_type distribution
        sector_index_mapping = {}
        for isector in range(unique_sectors.size):
            idx = where(sectors[where_valid_jbt]==unique_sectors[isector])[0]
            if idx.size == 0: continue
            o = ones(idx.size, dtype="int32")
            sector_bt_distribution[isector,:] = ndimage_sum(o, labels=job_building_types[where_valid_jbt[idx]], 
                                                            index=available_building_types)
            sector_bt_distribution[isector,:] = sector_bt_distribution[isector,:]/sector_bt_distribution[isector,:].sum()
            sector_index_mapping[unique_sectors[isector]] = isector
               
        # create a lookup table for zonal average per building type of sqft per employee
        zone_average_dataset = dataset_pool.get_dataset("building_sqft_per_job")
        zone_bt_lookup = zone_average_dataset.get_building_sqft_as_table(job_dataset.get_attribute("zone_id").max(),
                                                                         available_building_types.max())

        counter_zero_capacity = 0
        counter_zero_distr = 0
        # iterate over parcels
        logger.log_status("Placing non-home-based jobs ...")
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0]
            if idx_in_bldgs.size <= 0:
                continue
            idx_in_jobs = where(parcel_ids[job_index_non_home_based] == parcel)[0]
            capacity = maximum(non_res_sqft[idx_in_bldgs] - occupied[idx_in_bldgs],0)
            #capacity = non_res_sqft[idx_in_bldgs] - occupied[idx_in_bldgs]
            if capacity.sum() <= 0:
                counter_zero_capacity += idx_in_jobs.size
                continue
            this_jobs_sectors = sectors[job_index_non_home_based][idx_in_jobs]
            this_jobs_sqft_table = resize(jobs_sqft[idx_in_jobs], (idx_in_bldgs.size, idx_in_jobs.size))
            wn = jobs_sqft[idx_in_jobs] <= 0
            for i in range(idx_in_bldgs.size):
                this_jobs_sqft_table[i, where(wn)[0]] = zone_bt_lookup[jobs_zones[idx_in_jobs[wn]], bldg_types_in_bldgs[idx_in_bldgs[i]]]
            supply_demand_ratio = (resize(capacity, (capacity.size, 1))/this_jobs_sqft_table.astype("float32").sum(axis=0))/float(idx_in_jobs.size)*0.9
            if any(supply_demand_ratio < 1): # correct only if supply is smaller than demand 
                this_jobs_sqft_table = this_jobs_sqft_table * supply_demand_ratio
            probcomb = zeros(this_jobs_sqft_table.shape)
            bt = bldg_types_in_bldgs[idx_in_bldgs]
            ibt = building_type_dataset.get_id_index(bt)
            for i in range(probcomb.shape[0]):
                for j in range(probcomb.shape[1]):
                    probcomb[i,j] = sector_bt_distribution[sector_index_mapping[this_jobs_sectors[j]],ibt[i]]
            pcs = probcomb.sum(axis=0)
            probcomb = probcomb/pcs
            wz = where(pcs<=0)[0]
            counter_zero_distr += wz.size
            probcomb[:, wz] = 0 # to avoid nan values
            taken = zeros(capacity.shape)
            has_sqft = this_jobs_sqft_table > 0
            while True:
                if (has_sqft * probcomb).sum() <= 0:
                    break
                req =  (this_jobs_sqft_table * probcomb).sum(axis=0)
                maxi = req.max()
                wmaxi = where(req==maxi)[0]
                drawjob = sample_noreplace(arange(wmaxi.size), 1) # draw job from jobs with the maximum size
                imax_req = wmaxi[drawjob]
                weights = has_sqft[:,imax_req] * probcomb[:,imax_req]
                draw = probsample_noreplace(arange(probcomb.shape[0]), 1, resize(weights/weights.sum(), (probcomb.shape[0],)))
                if (taken[draw] + this_jobs_sqft_table[draw,imax_req]) > capacity[draw]:
                    probcomb[draw,imax_req]=0
                    continue
                taken[draw] = taken[draw] + this_jobs_sqft_table[draw,imax_req]
                building_ids[job_index_non_home_based[idx_in_jobs[imax_req]]] = bldg_ids_in_bldgs[idx_in_bldgs[draw]]
                probcomb[:,imax_req] = 0
                new_jobs_sqft[job_index_non_home_based[idx_in_jobs[imax_req]]] = int(min(self.maximum_sqft, max(round(this_jobs_sqft_table[draw,imax_req]), 
                                                                                     self.minimum_sqft)))
            
        logger.log_status("%s non home based jobs (out of %s nhb jobs) were placed." % (
                                                                (building_ids[job_index_non_home_based]>0).sum(),
                                                                 job_index_non_home_based.size))
        logger.log_status("Unplaced due to zero capacity: %s" % counter_zero_capacity)
        logger.log_status("Unplaced due to zero distribution: %s" % counter_zero_distr)
        
        job_dataset.modify_attribute(name="building_id", data = building_ids)
        
        # re-classify unplaced non-home based jobs to home-based if parcels contain residential buildings
        bldgs_is_residential = logical_and(logical_not(is_governmental), building_dataset.compute_variables(["urbansim_parcel.building.is_residential"], 
                                                           dataset_pool=dataset_pool))
        is_now_considered = logical_and(parcel_ids > 0, building_ids <= 0)
        job_index_non_home_based_unplaced = where(logical_and(is_now_considered, building_types == 2))[0]
        unique_parcels = unique(parcel_ids[job_index_non_home_based_unplaced])
        imputed_sqft = 0
        logger.log_status("Try to reclassify non-home-based jobs (excluding governemtal jobs) ...")
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0]
            if idx_in_bldgs.size <= 0:
                continue
            idx_in_jobs = where(parcel_ids[job_index_non_home_based_unplaced] == parcel)[0]
            where_residential = where(bldgs_is_residential[idx_in_bldgs])[0]
            if where_residential.size > 0:
                building_types[job_index_non_home_based_unplaced[idx_in_jobs]] = 1 # set to home-based jobs
            elif non_res_sqft[idx_in_bldgs].sum() <= 0:
                # impute non_residential_sqft and assign buildings
                this_jobs_sectors = sectors[job_index_non_home_based_unplaced][idx_in_jobs]
                this_jobs_sqft_table = resize(jobs_sqft[idx_in_jobs], (idx_in_bldgs.size, idx_in_jobs.size))
                wn = jobs_sqft[idx_in_jobs] <= 0
                for i in range(idx_in_bldgs.size):
                    this_jobs_sqft_table[i, where(wn)[0]] = zone_bt_lookup[jobs_zones[idx_in_jobs[wn]], bldg_types_in_bldgs[idx_in_bldgs[i]]]
                probcomb = zeros(this_jobs_sqft_table.shape)
                bt = bldg_types_in_bldgs[idx_in_bldgs]
                ibt = building_type_dataset.get_id_index(bt)
                for i in range(probcomb.shape[0]):
                    for j in range(probcomb.shape[1]):
                        probcomb[i,j] = sector_bt_distribution[sector_index_mapping[this_jobs_sectors[j]],ibt[i]]
                for ijob in range(probcomb.shape[1]):
                    if (probcomb[:,ijob].sum() <= 0) or (impute_sqft_flags[job_index_non_home_based_unplaced[ijob]] == 0):
                        continue
                    weights = probcomb[:,ijob]
                    draw = probsample_noreplace(arange(probcomb.shape[0]), 1, resize(weights/weights.sum(), (probcomb.shape[0],)))
                    non_res_sqft[idx_in_bldgs[draw]] += this_jobs_sqft_table[draw,ijob]
                    imputed_sqft += this_jobs_sqft_table[draw,ijob]
                    building_ids[job_index_non_home_based_unplaced[idx_in_jobs[ijob]]] = bldg_ids_in_bldgs[idx_in_bldgs[draw]]
                    new_jobs_sqft[job_index_non_home_based[idx_in_jobs[ijob]]] = int(min(self.maximum_sqft, max(round(this_jobs_sqft_table[draw,ijob]), 
                                                                                     self.minimum_sqft)))
                    
        building_dataset.modify_attribute(name="non_residential_sqft", data = non_res_sqft)
        job_dataset.modify_attribute(name="building_id", data = building_ids)
        job_dataset.modify_attribute(name="building_type", data = building_types)
        job_dataset.modify_attribute(name="sqft", data = new_jobs_sqft)
        
        old_nhb_size = job_index_non_home_based.size
        job_index_home_based = where(logical_and(is_considered, building_types == 1))[0]
        job_index_non_home_based = where(logical_and(is_considered, building_types == 2))[0]
        logger.log_status("%s non-home based jobs reclassified as home-based." % (old_nhb_size-job_index_non_home_based.size))
        logger.log_status("%s non-residential sqft imputed." % imputed_sqft)
        logger.log_status("Additionaly, %s non home based jobs were placed due to imputed sqft." % \
                                                (building_ids[job_index_non_home_based_unplaced]>0).sum())
        # home_based jobs
        unique_parcels = unique(parcel_ids[job_index_home_based])
        capacity_in_buildings = building_dataset.compute_variables([
                          "urbansim_parcel.building.vacant_home_based_job_space"],
                             dataset_pool=dataset_pool)
        parcels_with_exceeded_capacity = []
        # iterate over parcels
        logger.log_status("Placing home-based jobs ...")
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0]
            idx_in_jobs = where(parcel_ids[job_index_home_based] == parcel)[0]
            capacity = capacity_in_buildings[idx_in_bldgs]
            if capacity.sum() <= 0:
                continue
            probcomb = ones((idx_in_bldgs.size, idx_in_jobs.size))
            taken = zeros(capacity.shape, dtype="int32")
            while True:
                zero_cap = where((capacity - taken) <= 0)[0]
                probcomb[zero_cap,:] = 0
                if probcomb.sum() <= 0:
                    break
                req =  probcomb.sum(axis=0)
                wmaxi = where(req==req.max())[0]
                drawjob = sample_noreplace(arange(wmaxi.size), 1) # draw job from available jobs
                imax_req = wmaxi[drawjob]
                weights = probcomb[:,imax_req]
                # sample building
                draw = probsample_noreplace(arange(probcomb.shape[0]), 1, resize(weights/weights.sum(), (probcomb.shape[0],)))
                taken[draw] = taken[draw] + 1
                building_ids[job_index_home_based[idx_in_jobs[imax_req]]] = bldg_ids_in_bldgs[idx_in_bldgs[draw]]
                probcomb[:,imax_req] = 0
            if -1 in building_ids[job_index_home_based[idx_in_jobs]]:
                parcels_with_exceeded_capacity.append(parcel)
        parcels_with_exceeded_capacity = array(parcels_with_exceeded_capacity)    
        
        logger.log_status("%s home based jobs (out of %s hb jobs) were placed." % ((building_ids[job_index_home_based]>0).sum(),
                                                                         job_index_home_based.size))
        
        # assign building type where missing
        # determine regional distribution
        idx_home_based = where(building_types == 1)[0]
        idx_non_home_based = where(building_types == 2)[0]
        idx_bt_missing = where(building_types <= 0)[0]
        if idx_bt_missing.size > 0:
            # sample building types
            sample_bt = probsample_replace(array([1,2]), idx_bt_missing.size, 
               array([idx_home_based.size, idx_non_home_based.size])/float(idx_home_based.size + idx_non_home_based.size))
            # coerce to int32 (on a 64 bit machine, sample_bt will be of type int64)
            building_types[idx_bt_missing] = sample_bt.astype(int32)
            job_dataset.modify_attribute(name="building_type", data = building_types) 
        
        if out_storage is not None:
            job_dataset.write_dataset(out_table_name=jobs_table, out_storage=out_storage, attributes=AttributeType.PRIMARY)
            building_dataset.write_dataset(out_table_name='buildings', out_storage=out_storage, attributes=AttributeType.PRIMARY)
        logger.log_status("Assigning building_id to jobs done.")