Example #1
0
    def _do_sector_for_businesses(self, sector, diff, business_set,
                                  is_in_sector):
        available_business_index = where(is_in_sector)[0]
        if diff < 0:  #
            sample_array, non_placed, size_non_placed = \
                get_array_without_non_placed_agents(business_set, available_business_index, -1*diff,
                                                     self.location_id_name)
            self.remove_businesses = concatenate(
                (self.remove_businesses, non_placed,
                 sample_noreplace(sample_array,
                                  max(0,
                                      abs(diff) - size_non_placed))))

        if diff > 0:  #
            self.new_businesses[self.location_id_name] = concatenate(
                (self.new_businesses[self.location_id_name], zeros((diff, ))))
            self.new_businesses["sector_id"] = concatenate(
                (self.new_businesses["sector_id"], sector * ones((diff, ))))
            sampled_business = probsample_replace(available_business_index,
                                                  diff, None)
            self.new_businesses["sqft"] = concatenate(
                (self.new_businesses["sqft"],
                 business_set.get_attribute("sqft")[sampled_business]))
            self.new_businesses["employment"] = concatenate(
                (self.new_businesses["employment"],
                 business_set.get_attribute("employment")[sampled_business]))
            self.new_businesses["activity_id"] = concatenate(
                (self.new_businesses["activity_id"],
                 business_set.get_attribute("activity_id")[sampled_business]))

            new_max_id = self.max_id + diff
            self.new_businesses[self.business_id_name] = concatenate(
                (self.new_businesses[self.business_id_name],
                 arange(self.max_id + 1, new_max_id + 1)))
            self.max_id = new_max_id
 def test_probsample_replace(self):
     start_time = time.time()
     sample = probsample_replace(self.all, self.size, self.prob, return_index=True)
     logger.log_status("probsample_replace %s from %s items array in " % (self.size,self.n) + str(time.time() - start_time) + " sec")
     self.assertEqual(sample.size, self.size, msg ="sample size not equal to size parameter")
     assert isinstance(sample, ndarray), "sample is not of type ndarray"
     assert 0 <= sample.min() <= self.n-1, "sampled elements not in between min and max of source array"
     assert 0 <= sample.max() <= self.n-1, "sampled elements not in between min and max of source array"
     assert alltrue(not_equal(self.prob[sample], 0.0)), "elements with zero weight in the sample"
Example #3
0
    def _simulate_submodel(self, submodel, location_set, agent_set, agents_index, ignore_agents_distribution=False):
        location_id_name = location_set.get_id_name()[0]
        subm_agents_index = agents_index[self.observations_mapping[submodel]]
        if self.submodel_string is not None:
            all_agents_in_subm = where(agent_set[self.submodel_string]==submodel)[0]
        else:
            all_agents_in_subm = arange(agent_set.size())
        if subm_agents_index.size <= 0:
            return array([], dtype='int32')
        #unplace agents
        agent_set.set_values_of_one_attribute(location_id_name, 
                                resize(array([-1]), subm_agents_index.size), subm_agents_index)
        
        if not ignore_agents_distribution:
            agent_distr_in_loc = array(ndimage_sum(ones(all_agents_in_subm.size), 
                                         labels=agent_set[location_id_name][all_agents_in_subm], 
                                  index=location_set.get_id_attribute()))
        else:
            agent_distr_in_loc = ones(location_set.size(), dtype="int32")
 
        location_ind = ones(location_set.size(), dtype='bool')
        if self.filter is not None:
            submodel_filter = re.sub('SUBMODEL', str(submodel), self.filter)
            filter_values = location_set.compute_variables([submodel_filter], dataset_pool=self.dataset_pool)
            location_ind = logical_and(location_ind, filter_values > 0)
        if self.weights is not None:
            submodel_weights = re.sub('SUBMODEL', str(submodel), self.weights)
            weight_values = location_set.compute_variables([submodel_weights], dataset_pool=self.dataset_pool)
            location_ind = logical_and(location_ind, weight_values > 0)
        
        location_index = where(location_ind)[0]
        if location_index.size <= 0:
            logger.log_status("No locations available. Nothing to be done.")
            return array(subm_agents_index.size*[-1], dtype="int32")
        logger.log_status("Submodel %s: %s %s(s) are scaled into %s %s(s)." % (submodel, 
                                                    subm_agents_index.size, agent_set.get_dataset_name(), 
                                                    location_index.size, location_set.get_dataset_name()))
        distr = agent_distr_in_loc[location_index]
        if self.weights is not None:
            distr = distr * weight_values[location_index]
        if ma.allclose(distr.sum(), 0):
            uniform_prob = 1.0/distr.size
            distr = resize(array([uniform_prob], dtype='float64'), distr.size)
            logger.log_warning("Probabilities in scaling model for submodel " + str(submodel) + " sum to 0.0.  Substituting uniform distribution!")
        distr = distr/float(distr.sum())
        random_sample = probsample_replace(location_set.get_id_attribute()[location_index], size=subm_agents_index.size, 
                                       prob_array=distr)
        #modify agents locations
        agent_set.set_values_of_one_attribute(location_id_name, random_sample, subm_agents_index)
        return random_sample
Example #4
0
    def _do_run(self, location_set, agent_set, agents_index, data_objects=None, resources=None):
        location_id_name = location_set.get_id_name()[0]
        jobsubset = DatasetSubset(agent_set, agents_index)
        if jobsubset.size() <= 0:
            return array([], dtype='int32')
        #unplace jobs
        agent_set.set_values_of_one_attribute(location_id_name, 
                                              resize(array([-1.0]), jobsubset.size()), agents_index)
        sector_ids = jobsubset.get_attribute("sector_id")
        sectors = unique(sector_ids)
        counts = ndimage_sum(ones((jobsubset.size(),)), labels=sector_ids.astype('int32'), index=sectors.astype('int32'))
        if sectors.size <=1 :
            counts = array([counts])
        variables = map(lambda x: "number_of_jobs_of_sector_"+str(int(x)), sectors)
        compute_variables = map(lambda var: self.variable_package + "." + 
            location_set.get_dataset_name()+ "." + var, variables)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.dataset_pool.add_datasets_if_not_included({agent_set.get_dataset_name():agent_set})
        location_set.compute_variables(compute_variables, dataset_pool=self.dataset_pool)
        if self.filter is None:
            location_index = arange(location_set.size())
        else:
            filter_values = location_set.compute_variables([self.filter], dataset_pool=self.dataset_pool)
            location_index = where(filter_values > 0)[0]
        if location_index.size <= 0:
            logger.log_status("No locations available. Nothing to be done.")
            return array([])
        location_subset = DatasetSubset(location_set, location_index)
        i=0
        for sector in sectors:
            distr = location_subset.get_attribute(variables[i])
            if ma.allclose(distr.sum(), 0):
                uniform_prob = 1.0/distr.size
                distr = resize(array([uniform_prob], dtype='float64'), distr.size)
                logger.log_warning("Probabilities in scaling model for sector " + str(sector) + " sum to 0.0.  Substituting uniform distribution!")
#                random_sample = sample(location_set.get_attribute("grid_id"), k=int(counts[i]), \
#                                   probabilities = distr)
            distr = distr/float(distr.sum())
            random_sample = probsample_replace(location_subset.get_id_attribute(), size=int(counts[i]), 
                                       prob_array=distr)
            idx = where(sector_ids == sector)[0]
            #modify job locations
            agent_set.set_values_of_one_attribute(location_id_name, random_sample, agents_index[idx])
            i+=1
        return agent_set.get_attribute_by_index(location_id_name, agents_index)
Example #5
0
 def _do_run(self, location_set, agent_set, agents_index, resources=None):
     location_id_name = location_set.get_id_name()[0]
     asubset = DatasetSubset(agent_set, agents_index)
     if asubset.size() <= 0:
         return array([], dtype='int32')
     #unplace agents
     agent_set.modify_attribute(location_id_name, 
                             resize(array([-1]), asubset.size()), agents_index)
     if self.filter is None:
         location_index = arange(location_set.size())
     else:
         filter_values = location_set.compute_variables([self.filter], dataset_pool=self.dataset_pool)
         location_index = where(filter_values > 0)[0]
     if location_index.size <= 0:
         logger.log_status("No locations available. Nothing to be done.")
         return array([])
     
     location_subset = DatasetSubset(location_set, location_index)
     if self.consider_capacity:
         location_set.compute_variables([self.capacity_attribute], 
                                        dataset_pool=self.dataset_pool)
         weights = location_subset[self.capacity_attribute]
         if self.number_of_agents_attribute is not None:
             location_set.compute_variables([self.number_of_agents_attribute], 
                                        dataset_pool=self.dataset_pool)
             weights = clip(weights - location_subset[self.number_of_agents_attribute],
                                        0, location_subset[self.capacity_attribute])
     else:
         weights = ones(location_subset.size())
     
     if weights.sum() <=0:
         logger.log_status("Locations' capacity sums to zero. Nothing to be done.")
         return array([])        
     distr = weights/float(weights.sum())
     random_sample = probsample_replace(location_subset.get_id_attribute(), size=asubset.size(), 
                                    prob_array=distr)
     agent_set.modify_attribute(location_id_name, random_sample, agents_index)
     return agent_set.get_attribute_by_index(location_id_name, agents_index)
 def _do_sector_for_businesses(self, sector, diff, business_set, is_in_sector):
     available_business_index = where(is_in_sector)[0]
     if diff < 0: #
         sample_array, non_placed, size_non_placed = \
             get_array_without_non_placed_agents(business_set, available_business_index, -1*diff, 
                                                  self.location_id_name)
         self.remove_businesses = concatenate((self.remove_businesses, non_placed, 
                                    sample_noreplace(sample_array, max(0,abs(diff)-size_non_placed))))
                         
     if diff > 0: #
         self.new_businesses[self.location_id_name]=concatenate((self.new_businesses[self.location_id_name],zeros((diff,))))
         self.new_businesses["sector_id"]=concatenate((self.new_businesses["sector_id"], sector*ones((diff,))))
         sampled_business = probsample_replace(available_business_index, diff, None)
         self.new_businesses["sqft"] = concatenate((self.new_businesses["sqft"],
                                              business_set.get_attribute("sqft")[sampled_business]))
         self.new_businesses["employment"] = concatenate((self.new_businesses["employment"],
                                                    business_set.get_attribute("employment")[sampled_business]))
         self.new_businesses["activity_id"] = concatenate((self.new_businesses["activity_id"],
                                                    business_set.get_attribute("activity_id")[sampled_business]))
         
         new_max_id = self.max_id+diff
         self.new_businesses[self.business_id_name]=concatenate((self.new_businesses[self.business_id_name], 
                                                                 arange(self.max_id+1, new_max_id+1)))
         self.max_id = new_max_id
    def run(self, year, business_set,
            control_totals,
            data_objects=None,
            resources=None):
        business_id_name = business_set.get_id_name()[0]
        control_totals.get_attribute("total_number_of_businesses")
        idx = where(control_totals.get_attribute("year")==year)
        sectors = unique(control_totals.get_attribute_by_index("building_use_id", idx))
        max_id = business_set.get_id_attribute().max()
        business_size = business_set.size()
        new_businesses = {self.location_id_name:array([], dtype='int32'),
                          "building_use_id":array([], dtype='int32'),
                          business_id_name:array([], dtype='int32'),
                          "sqft":array([], dtype=int32),
                          "employees":array([], dtype=int32),}
        compute_resources = Resources(data_objects)
#        compute_resources.merge({job_building_types.get_dataset_name():job_building_types, "debug":self.debug})
        business_set.compute_variables(
            map(lambda x: "%s.%s.is_sector_%s"
                    % (self.variable_package, business_set.get_dataset_name(), x),
                sectors),
            resources = compute_resources)
        remove_businesses = array([], dtype='int32')

        for sector in sectors:
            total_businesses = control_totals.get_data_element_by_id((year,sector)).total_number_of_businesses
            is_in_sector = business_set.get_attribute("is_sector_%s" % sector)
            diff = int(total_businesses - is_in_sector.astype(int8).sum())

            if diff < 0: #
                w = where(is_in_sector == 1)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(business_set, w, -1*diff,
                                                         self.location_id_name)
                remove_businesses = concatenate((remove_businesses, non_placed,
                                           sample_noreplace(sample_array, max(0,abs(diff)-size_non_placed))))

            if diff > 0: #
                new_businesses[self.location_id_name]=concatenate((new_businesses[self.location_id_name],zeros((diff,), dtype="int32")))
                new_businesses["building_use_id"]=concatenate((new_businesses["building_use_id"],
                                                               sector*ones((diff,), dtype="int32")))

                available_business_index = where(is_in_sector)[0]
                sampled_business = probsample_replace(available_business_index, diff, None)

                new_businesses["sqft"] = concatenate((new_businesses["sqft"],
                                                     business_set.get_attribute("sqft")[sampled_business]))
                new_businesses["employees"] = concatenate((new_businesses["employees"],
                                                           business_set.get_attribute("employees")[sampled_business]))

                new_max_id = max_id+diff
                new_businesses[business_id_name]=concatenate((new_businesses[business_id_name], arange(max_id+1, new_max_id+1)))
                max_id = new_max_id

        business_set.remove_elements(remove_businesses)
        business_set.add_elements(new_businesses, require_all_attributes=False)
        difference = business_set.size()-business_size
        self.debug.print_debug("Difference in number of businesses: %s (original %s,"
            " new %s, created %s, deleted %s)"
                % (difference,
                   business_size,
                   business_set.size(),
                   new_businesses[business_id_name].size,
                   remove_businesses.size),
            3)
        self.debug.print_debug("Number of unplaced businesses: %s"
            % where(business_set.get_attribute(self.location_id_name) <=0)[0].size,
            3)
        return difference
    def run( self, vacancy_table, frequency_table, template_table, year, location_set, resources=None ):
        self.pre_check( location_set, vacancy_table, [] )
        target_residential_vacancy_rate = vacancy_table.get_data_element_by_id( year ).target_total_residential_vacancy
        target_non_residential_vacancy_rate = vacancy_table.get_data_element_by_id( year ).target_total_non_residential_vacancy
        compute_resources = Resources(resources)
#        compute_resources.merge({"household":household_set, "job":job_set, "debug":self.debug})
        location_set.compute_variables( ["urbansim.gridcell.vacant_residential_units",
                                        "urbansim.gridcell.vacant_commercial_sqft",
                                        "urbansim.gridcell.vacant_industrial_sqft"],
                                        resources = compute_resources )

        # determine current-year vacancy rates
        vacant_resunits_sum = location_set.get_attribute( "vacant_residential_units" ).sum()
        resunits_sum = float( location_set.get_attribute( "residential_units" ).sum() )
        vacant_residential_rate = self.safe_divide(vacant_resunits_sum, resunits_sum)

        vacant_commercial_sqft_sum = location_set.get_attribute( "vacant_commercial_sqft" ).sum()
        commercial_sqft_sum =  float( location_set.get_attribute( "commercial_sqft" ).sum() )
        vacant_commercial_rate =  self.safe_divide(vacant_commercial_sqft_sum, commercial_sqft_sum)

        vacant_industrial_sqft_sum = location_set.get_attribute( "vacant_industrial_sqft" ).sum()
        industrial_sqft_sum = float( location_set.get_attribute( "industrial_sqft" ).sum() )
        vacant_industrial_rate =  self.safe_divide(vacant_industrial_sqft_sum, industrial_sqft_sum)

        logger.log_status("Res: vacant res units: %d, should be vacant: %f, sum res units: %d"
                          % (vacant_resunits_sum, target_residential_vacancy_rate * resunits_sum, resunits_sum))
        logger.log_status("Com: vacant sqft: %d, should be vacant: %f, sum sqft: %d"
                          % (vacant_commercial_sqft_sum, target_non_residential_vacancy_rate * commercial_sqft_sum,
                             commercial_sqft_sum))
        logger.log_status("Ind: vacant sqft: %d, should be vacant: %f, sum sqft: %d"
                          % (vacant_industrial_sqft_sum, target_non_residential_vacancy_rate * industrial_sqft_sum,
                             industrial_sqft_sum))

        should_develop_resunits = max( 0, ( target_residential_vacancy_rate * resunits_sum - vacant_resunits_sum ) /
                                         ( 1 - target_residential_vacancy_rate ) )
        if not should_develop_resunits:
            logger.log_note(("Will not build any residential units, because the current residential vacancy of %d units\n"
                             + "is more than the %d units desired for the vacancy rate of %f.")
                            % (vacant_resunits_sum,
                               target_residential_vacancy_rate * resunits_sum,
                               target_residential_vacancy_rate))
        should_develop_commercial = max( 0, ( target_non_residential_vacancy_rate * commercial_sqft_sum - vacant_commercial_sqft_sum ) /
                                           ( 1 - target_non_residential_vacancy_rate ) )
        if not should_develop_commercial:
            logger.log_note(("Will not build any commercial sqft, because the current commercial vacancy of %d sqft\n"
                             + "is more than the %d sqft desired for the vacancy rate of %f.")
                            % (vacant_commercial_sqft_sum,
                               target_non_residential_vacancy_rate * commercial_sqft_sum,
                               target_non_residential_vacancy_rate))
        should_develop_industrial = max( 0, ( target_non_residential_vacancy_rate * industrial_sqft_sum - vacant_industrial_sqft_sum ) /
                                           ( 1 - target_non_residential_vacancy_rate ) )
        if not should_develop_industrial:
            logger.log_note(("Will not build any industrial sqft, because the current industrial vacancy of %d sqft\n"
                             + "is more than the %d sqft desired for the vacancy rate of %f.")
                            % (vacant_industrial_sqft_sum,
                               target_non_residential_vacancy_rate * industrial_sqft_sum,
                               target_non_residential_vacancy_rate))

#        projects = {}
#        should_develop = {"residential":should_develop_resunits,
#                          "commercial":should_develop_commercial,
#                          "industrial":should_develop_industrial}

#        average_improvement_value = {}
#        average_improvement_value["residential"] = self.safe_divide(
#            location_set.get_attribute("residential_improvement_value" ).sum(), resunits_sum)
#        average_improvement_value["commercial"] = self.safe_divide(
#            location_set.get_attribute("commercial_improvement_value" ).sum(), commercial_sqft_sum)
#        average_improvement_value["industrial"] = self.safe_divide(
#            location_set.get_attribute("industrial_improvement_value" ).sum(), industrial_sqft_sum)

        #create projects

        development_type_ids = []
        units = []; com_sqfts=[]; ind_sqfts=[]; gov_sqfts=[];
        while should_develop_resunits > 0 or should_develop_commercial > 0 or should_develop_industrial > 0:
            n = 1   # sample n developments at a time
            sampled_ids = probsample_replace(frequency_table.get_attribute('development_type_id'),
                                             n,
                                             frequency_table.get_attribute('frequency').astype(float32)/frequency_table.get_attribute('frequency').sum())
            for id in sampled_ids:
                index = where(template_table.get_attribute('development_type_id') == id)[0]
                res_unit = template_table.get_attribute_by_index('residential_units', index)
                com_sqft = template_table.get_attribute_by_index('commercial_sqft', index)
                ind_sqft = template_table.get_attribute_by_index('industrial_sqft', index)
                gov_sqft = template_table.get_attribute_by_index('governmental_sqft', index)

                should_develop_resunits -= res_unit[0]
                should_develop_commercial -= com_sqft[0]
                should_develop_industrial -= ind_sqft[0]

                development_type_ids.append(id)
                units.append(res_unit)
                com_sqfts.append(com_sqft)
                ind_sqfts.append(ind_sqft)
                gov_sqfts.append(gov_sqft)

        sizes = len(development_type_ids)
        if sizes > 0:
            storage = StorageFactory().get_storage('dict_storage')

            developments_table_name = 'developments'
            storage.write_table(
                    table_name=developments_table_name,
                    table_data={
                        "landuse_development_id": arange( sizes ),
                        "grid_id": -1 * ones( ( sizes, ), dtype=int32),
                        "development_type_id": array(development_type_ids),
                        "residential_units":array(units),
                        "commercial_sqft":array(com_sqfts),
                        "industrial_sqft":array(ind_sqfts),
                        "governmental_sqft":array(gov_sqfts),
                        "improvement_value": zeros( ( sizes, ), dtype="int32"),
                        },
                )

            developments = LandUseDevelopmentDataset(
                in_storage = storage,
                in_table_name = developments_table_name,
                )

        else:
            developments = None

        return developments
    def run(self,
            year,
            business_set,
            control_totals,
            data_objects=None,
            resources=None):
        business_id_name = business_set.get_id_name()[0]
        control_totals.get_attribute("total_number_of_businesses")
        idx = where(control_totals.get_attribute("year") == year)
        sectors = unique(
            control_totals.get_attribute_by_index("building_use_id", idx))
        max_id = business_set.get_id_attribute().max()
        business_size = business_set.size()
        new_businesses = {
            self.location_id_name: array([], dtype='int32'),
            "building_use_id": array([], dtype='int32'),
            business_id_name: array([], dtype='int32'),
            "sqft": array([], dtype=int32),
            "employees": array([], dtype=int32),
        }
        compute_resources = Resources(data_objects)
        #        compute_resources.merge({job_building_types.get_dataset_name():job_building_types, "debug":self.debug})
        business_set.compute_variables(map(
            lambda x: "%s.%s.is_sector_%s" %
            (self.variable_package, business_set.get_dataset_name(), x),
            sectors),
                                       resources=compute_resources)
        remove_businesses = array([], dtype='int32')

        for sector in sectors:
            total_businesses = control_totals.get_data_element_by_id(
                (year, sector)).total_number_of_businesses
            is_in_sector = business_set.get_attribute("is_sector_%s" % sector)
            diff = int(total_businesses - is_in_sector.astype(int8).sum())

            if diff < 0:  #
                w = where(is_in_sector == 1)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(business_set, w, -1*diff,
                                                         self.location_id_name)
                remove_businesses = concatenate(
                    (remove_businesses, non_placed,
                     sample_noreplace(sample_array,
                                      max(0,
                                          abs(diff) - size_non_placed))))

            if diff > 0:  #
                new_businesses[self.location_id_name] = concatenate(
                    (new_businesses[self.location_id_name],
                     zeros((diff, ), dtype="int32")))
                new_businesses["building_use_id"] = concatenate(
                    (new_businesses["building_use_id"], sector * ones(
                        (diff, ), dtype="int32")))

                available_business_index = where(is_in_sector)[0]
                sampled_business = probsample_replace(available_business_index,
                                                      diff, None)

                new_businesses["sqft"] = concatenate(
                    (new_businesses["sqft"],
                     business_set.get_attribute("sqft")[sampled_business]))
                new_businesses["employees"] = concatenate((
                    new_businesses["employees"],
                    business_set.get_attribute("employees")[sampled_business]))

                new_max_id = max_id + diff
                new_businesses[business_id_name] = concatenate(
                    (new_businesses[business_id_name],
                     arange(max_id + 1, new_max_id + 1)))
                max_id = new_max_id

        business_set.remove_elements(remove_businesses)
        business_set.add_elements(new_businesses, require_all_attributes=False)
        difference = business_set.size() - business_size
        self.debug.print_debug(
            "Difference in number of businesses: %s (original %s,"
            " new %s, created %s, deleted %s)" %
            (difference, business_size, business_set.size(),
             new_businesses[business_id_name].size, remove_businesses.size), 3)
        self.debug.print_debug(
            "Number of unplaced businesses: %s" %
            where(business_set.get_attribute(self.location_id_name) <= 0)
            [0].size, 3)
        return difference
    def _do_run_for_this_year(self, job_set):
        building_type = job_set.get_attribute("building_type")
        sectors = unique(self.control_totals_for_this_year.get_attribute("sector_id"))
        self._compute_sector_variables(sectors, job_set)
        for sector in sectors:
            isector = where(self.control_totals_for_this_year.get_attribute("sector_id") == sector)[0]
            total_hb_jobs = self.control_totals_for_this_year.get_attribute("total_home_based_employment")[isector]
            total_nhb_jobs = self.control_totals_for_this_year.get_attribute("total_non_home_based_employment")[isector]
            is_in_sector_hb = job_set.get_attribute("is_in_employment_sector_%s_home_based" % sector)
            is_in_sector_nhb = job_set.get_attribute("is_in_employment_sector_%s_non_home_based" % sector)
            diff_hb = int(total_hb_jobs - is_in_sector_hb.astype(int8).sum())
            diff_nhb = int(total_nhb_jobs - is_in_sector_nhb.astype(int8).sum())
            if diff_hb < 0: # home based jobs to be removed
                w = where(is_in_sector_hb == 1)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(job_set, w, -1*diff_hb,
                                                         self.location_id_name)
                self.remove_jobs = concatenate((self.remove_jobs, non_placed,
                                           sample_noreplace(sample_array, max(0,abs(diff_hb)-size_non_placed))))
            if diff_nhb < 0: # non home based jobs to be removed
                w = where(is_in_sector_nhb == 1)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(job_set, w, -1*diff_nhb,
                                                         self.location_id_name)
                self.remove_jobs = concatenate((self.remove_jobs, non_placed,
                                           sample_noreplace(sample_array, max(0,abs(diff_nhb)-size_non_placed))))

            if diff_hb > 0: # home based jobs to be created
                self.new_jobs[self.location_id_name] = concatenate((self.new_jobs[self.location_id_name],
                                   zeros((diff_hb,), dtype=self.new_jobs[self.location_id_name].dtype.type)))
                self.new_jobs["sector_id"] = concatenate((self.new_jobs["sector_id"],
                                   (resize(array([sector], dtype=self.new_jobs["sector_id"].dtype.type), diff_hb))))
                if 1 in is_in_sector_hb:
                    building_type_distribution = array(ndimage_sum(is_in_sector_hb,
                                                                    labels=building_type,
                                                                    index=self.available_building_types))
                elif 1 in job_set.get_attribute("is_home_based_job"): # take the building type distribution from the whole region
                    building_type_distribution = array(ndimage_sum(
                                                                job_set.get_attribute("is_home_based_job"),
                                                                labels=building_type,
                                                                index=self.available_building_types))
                else: # there are no home-based jobs in the region, take uniform distribution
                    building_type_distribution = ones(self.available_building_types.size)
                    building_type_distribution = building_type_distribution/building_type_distribution.sum()
                sampled_building_types = probsample_replace(
                    self.available_building_types, diff_hb, building_type_distribution/
                    float(building_type_distribution.sum()))
                self.new_jobs["building_type"] = concatenate((self.new_jobs["building_type"],
                            sampled_building_types.astype(self.new_jobs["building_type"].dtype.type)))
                new_max_id = self.max_id + diff_hb
                self.new_jobs[self.job_id_name] = concatenate((self.new_jobs[self.job_id_name],
                                                     arange(self.max_id+1, new_max_id+1)))
                self.max_id = new_max_id

            if diff_nhb > 0: # non home based jobs to be created
                self.new_jobs[self.location_id_name]=concatenate((self.new_jobs[self.location_id_name],
                                     zeros((diff_nhb,), dtype=self.new_jobs[self.location_id_name].dtype.type)))
                self.new_jobs["sector_id"]=concatenate((self.new_jobs["sector_id"],
                                           (resize(array([sector], dtype=self.new_jobs["sector_id"].dtype.type), diff_nhb))))
                if 1 in is_in_sector_nhb:
                    building_type_distribution = array(ndimage_sum(is_in_sector_nhb,
                                                                    labels=building_type,
                                                                    index=self.available_building_types))
                elif 1 in job_set.get_attribute("is_non_home_based_job"): # take the building type distribution from the whole region
                    building_type_distribution = array(ndimage_sum(
                                                        job_set.get_attribute("is_non_home_based_job"),
                                                        labels=building_type,
                                                        index=self.available_building_types))
                else: # there are no non-home-based jobs in the region, take uniform distribution
                    building_type_distribution = ones(self.available_building_types.size)
                    building_type_distribution = building_type_distribution/building_type_distribution.sum()
                sampled_building_types = probsample_replace(
                    self.available_building_types, diff_nhb, building_type_distribution/
                    float(building_type_distribution.sum()))
                self.new_jobs["building_type"] = concatenate((self.new_jobs["building_type"],
                                        sampled_building_types.astype(self.new_jobs["building_type"].dtype.type)))
                new_max_id = self.max_id+diff_nhb
                self.new_jobs[self.job_id_name]=concatenate((self.new_jobs[self.job_id_name], arange(self.max_id+1, 
                                                                                                     new_max_id+1)))
                self.max_id = new_max_id
    def run(self, job_dataset, dataset_pool, out_storage=None, jobs_table="jobs"):
        """
        Algorithm:
            1. For all non_home_based jobs that have parcel_id assigned but no building_id, try
                to choose a building from all buildings in that parcel. Draw the building with probabilities
                given by the sector-building_type distribution. The job sizes are
                fitted into the available space (the attribute job.sqft is updated).
            2. For all non_home_based jobs for which no building was found in step 1, check
                if the parcel has residential buildings. In such a case, re-assign the jobs to be
                home-based.
                Otherwise, if sum of non_residential_sqft over the involved buildings is 0,
                for all jobs that have impute_building_sqft_flag=True draw a building using
                the sector-building_type distribution and impute the corresponding sqft to 
                the non_residential_sqft of that building.
            3. For all home_based jobs that have parcel_id assigned but no building_id, try
                to choose a building from all buildings in that parcel. 
                The capacity of a single-family building is determined from sizes of the households living there 
                (for each household the minimum of number of members and 2 is taken). 
                For multi-family buildings the capacity is 50.
            4. Assign a building type to jobs that have missing building type. It is sampled 
                from the regional-wide distribution of home based and non-home based jobs.
            5. Update the table 'building_sqft_per_job' using the updated job.sqft.
        'in_storage' should contain the jobs table and the zone_averages_table. The 'dataset_pool_storage'
        should contain all other tables needed (buildings, households, building_types). 
        """
        parcel_ids = job_dataset.get_attribute("parcel_id")
        building_ids = job_dataset.get_attribute("building_id")
        building_types = job_dataset.get_attribute("building_type")
        try:
            impute_sqft_flags = job_dataset.get_attribute("impute_building_sqft_flag")
        except:
            impute_sqft_flags = zeros(job_dataset.size())
        is_considered = logical_and(parcel_ids > 0, building_ids <= 0) # jobs that have assigned parcel but not building
        job_index_home_based = where(logical_and(is_considered, building_types == 1))[0]
        job_index_governmental = where(logical_and(is_considered, building_types == 3))[0]
        
        building_dataset = dataset_pool.get_dataset('building')
        parcel_ids_in_bldgs = building_dataset.get_attribute("parcel_id")
        bldg_ids_in_bldgs = building_dataset.get_id_attribute()
        bldg_types_in_bldgs = building_dataset.get_attribute("building_type_id")
        
        non_res_sqft = building_dataset.get_attribute("non_residential_sqft")
        occupied = building_dataset.compute_variables(["urbansim_parcel.building.occupied_building_sqft_by_jobs"],
                                                                     dataset_pool=dataset_pool)
        is_governmental = building_dataset.compute_variables(["building.disaggregate(building_type.generic_building_type_id == 7)"],
                                                                     dataset_pool=dataset_pool)
        
        # assign buildings to governmental jobs randomly
        unique_parcels = unique(parcel_ids[job_index_governmental])
        logger.log_status("Placing governmental jobs ...")
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs[is_governmental] == parcel)[0]
            if idx_in_bldgs.size <= 0:
                continue
            idx_in_jobs = where(parcel_ids[job_index_governmental] == parcel)[0]
            draw = sample_replace(idx_in_bldgs, idx_in_jobs.size)
            building_ids[job_index_governmental[idx_in_jobs]] = bldg_ids_in_bldgs[where(is_governmental)[0][draw]]
        logger.log_status("%s governmental jobs (out of %s gov. jobs) were placed." % (
                                                                (building_ids[job_index_governmental]>0).sum(),
                                                                 job_index_governmental.size))
        logger.log_status("The not-placed governmental jobs will be added to the non-home based jobs.")
        
        # consider the unplaced governmental jobs together with other non-home-based jobs
        is_now_considered = logical_and(is_considered, building_ids <= 0)
        job_index_non_home_based = where(logical_and(is_now_considered, logical_or(building_types == 2, building_types == 3)))[0]
                                    
        # assign buildings to non_home_based jobs based on available space
        unique_parcels = unique(parcel_ids[job_index_non_home_based])
        job_building_types = job_dataset.compute_variables(["bldgs_building_type_id = job.disaggregate(building.building_type_id)"], 
                                                           dataset_pool=dataset_pool)
        where_valid_jbt = where(logical_and(job_building_types>0, logical_or(building_types == 2, building_types==3)))[0]
        building_type_dataset = dataset_pool.get_dataset("building_type")
        available_building_types= building_type_dataset.get_id_attribute()
        idx_available_bt = building_type_dataset.get_id_index(available_building_types)
        sectors = job_dataset.get_attribute("sector_id")
        unique_sectors = unique(sectors)
        sector_bt_distribution = zeros((unique_sectors.size, building_type_dataset.size()), dtype="float32")
        
        jobs_sqft = job_dataset.get_attribute_by_index("sqft", job_index_non_home_based).astype("float32")
        job_dataset._compute_if_needed("urbansim_parcel.job.zone_id", dataset_pool=dataset_pool) 
        jobs_zones = job_dataset.get_attribute_by_index("zone_id", job_index_non_home_based)
        new_jobs_sqft = job_dataset.get_attribute("sqft").copy()
        
        # find sector -> building_type distribution
        sector_index_mapping = {}
        for isector in range(unique_sectors.size):
            idx = where(sectors[where_valid_jbt]==unique_sectors[isector])[0]
            if idx.size == 0: continue
            o = ones(idx.size, dtype="int32")
            sector_bt_distribution[isector,:] = ndimage_sum(o, labels=job_building_types[where_valid_jbt[idx]], 
                                                            index=available_building_types)
            sector_bt_distribution[isector,:] = sector_bt_distribution[isector,:]/sector_bt_distribution[isector,:].sum()
            sector_index_mapping[unique_sectors[isector]] = isector
               
        # create a lookup table for zonal average per building type of sqft per employee
        zone_average_dataset = dataset_pool.get_dataset("building_sqft_per_job")
        zone_bt_lookup = zone_average_dataset.get_building_sqft_as_table(job_dataset.get_attribute("zone_id").max(),
                                                                         available_building_types.max())

        counter_zero_capacity = 0
        counter_zero_distr = 0
        # iterate over parcels
        logger.log_status("Placing non-home-based jobs ...")
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0]
            if idx_in_bldgs.size <= 0:
                continue
            idx_in_jobs = where(parcel_ids[job_index_non_home_based] == parcel)[0]
            capacity = maximum(non_res_sqft[idx_in_bldgs] - occupied[idx_in_bldgs],0)
            #capacity = non_res_sqft[idx_in_bldgs] - occupied[idx_in_bldgs]
            if capacity.sum() <= 0:
                counter_zero_capacity += idx_in_jobs.size
                continue
            this_jobs_sectors = sectors[job_index_non_home_based][idx_in_jobs]
            this_jobs_sqft_table = resize(jobs_sqft[idx_in_jobs], (idx_in_bldgs.size, idx_in_jobs.size))
            wn = jobs_sqft[idx_in_jobs] <= 0
            for i in range(idx_in_bldgs.size):
                this_jobs_sqft_table[i, where(wn)[0]] = zone_bt_lookup[jobs_zones[idx_in_jobs[wn]], bldg_types_in_bldgs[idx_in_bldgs[i]]]
            supply_demand_ratio = (resize(capacity, (capacity.size, 1))/this_jobs_sqft_table.astype("float32").sum(axis=0))/float(idx_in_jobs.size)*0.9
            if any(supply_demand_ratio < 1): # correct only if supply is smaller than demand 
                this_jobs_sqft_table = this_jobs_sqft_table * supply_demand_ratio
            probcomb = zeros(this_jobs_sqft_table.shape)
            bt = bldg_types_in_bldgs[idx_in_bldgs]
            ibt = building_type_dataset.get_id_index(bt)
            for i in range(probcomb.shape[0]):
                for j in range(probcomb.shape[1]):
                    probcomb[i,j] = sector_bt_distribution[sector_index_mapping[this_jobs_sectors[j]],ibt[i]]
            pcs = probcomb.sum(axis=0)
            probcomb = probcomb/pcs
            wz = where(pcs<=0)[0]
            counter_zero_distr += wz.size
            probcomb[:, wz] = 0 # to avoid nan values
            taken = zeros(capacity.shape)
            has_sqft = this_jobs_sqft_table > 0
            while True:
                if (has_sqft * probcomb).sum() <= 0:
                    break
                req =  (this_jobs_sqft_table * probcomb).sum(axis=0)
                maxi = req.max()
                wmaxi = where(req==maxi)[0]
                drawjob = sample_noreplace(arange(wmaxi.size), 1) # draw job from jobs with the maximum size
                imax_req = wmaxi[drawjob]
                weights = has_sqft[:,imax_req] * probcomb[:,imax_req]
                draw = probsample_noreplace(arange(probcomb.shape[0]), 1, resize(weights/weights.sum(), (probcomb.shape[0],)))
                if (taken[draw] + this_jobs_sqft_table[draw,imax_req]) > capacity[draw]:
                    probcomb[draw,imax_req]=0
                    continue
                taken[draw] = taken[draw] + this_jobs_sqft_table[draw,imax_req]
                building_ids[job_index_non_home_based[idx_in_jobs[imax_req]]] = bldg_ids_in_bldgs[idx_in_bldgs[draw]]
                probcomb[:,imax_req] = 0
                new_jobs_sqft[job_index_non_home_based[idx_in_jobs[imax_req]]] = int(min(self.maximum_sqft, max(round(this_jobs_sqft_table[draw,imax_req]), 
                                                                                     self.minimum_sqft)))
            
        logger.log_status("%s non home based jobs (out of %s nhb jobs) were placed." % (
                                                                (building_ids[job_index_non_home_based]>0).sum(),
                                                                 job_index_non_home_based.size))
        logger.log_status("Unplaced due to zero capacity: %s" % counter_zero_capacity)
        logger.log_status("Unplaced due to zero distribution: %s" % counter_zero_distr)
        
        job_dataset.modify_attribute(name="building_id", data = building_ids)
        
        # re-classify unplaced non-home based jobs to home-based if parcels contain residential buildings
        bldgs_is_residential = logical_and(logical_not(is_governmental), building_dataset.compute_variables(["urbansim_parcel.building.is_residential"], 
                                                           dataset_pool=dataset_pool))
        is_now_considered = logical_and(parcel_ids > 0, building_ids <= 0)
        job_index_non_home_based_unplaced = where(logical_and(is_now_considered, building_types == 2))[0]
        unique_parcels = unique(parcel_ids[job_index_non_home_based_unplaced])
        imputed_sqft = 0
        logger.log_status("Try to reclassify non-home-based jobs (excluding governemtal jobs) ...")
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0]
            if idx_in_bldgs.size <= 0:
                continue
            idx_in_jobs = where(parcel_ids[job_index_non_home_based_unplaced] == parcel)[0]
            where_residential = where(bldgs_is_residential[idx_in_bldgs])[0]
            if where_residential.size > 0:
                building_types[job_index_non_home_based_unplaced[idx_in_jobs]] = 1 # set to home-based jobs
            elif non_res_sqft[idx_in_bldgs].sum() <= 0:
                # impute non_residential_sqft and assign buildings
                this_jobs_sectors = sectors[job_index_non_home_based_unplaced][idx_in_jobs]
                this_jobs_sqft_table = resize(jobs_sqft[idx_in_jobs], (idx_in_bldgs.size, idx_in_jobs.size))
                wn = jobs_sqft[idx_in_jobs] <= 0
                for i in range(idx_in_bldgs.size):
                    this_jobs_sqft_table[i, where(wn)[0]] = zone_bt_lookup[jobs_zones[idx_in_jobs[wn]], bldg_types_in_bldgs[idx_in_bldgs[i]]]
                probcomb = zeros(this_jobs_sqft_table.shape)
                bt = bldg_types_in_bldgs[idx_in_bldgs]
                ibt = building_type_dataset.get_id_index(bt)
                for i in range(probcomb.shape[0]):
                    for j in range(probcomb.shape[1]):
                        probcomb[i,j] = sector_bt_distribution[sector_index_mapping[this_jobs_sectors[j]],ibt[i]]
                for ijob in range(probcomb.shape[1]):
                    if (probcomb[:,ijob].sum() <= 0) or (impute_sqft_flags[job_index_non_home_based_unplaced[ijob]] == 0):
                        continue
                    weights = probcomb[:,ijob]
                    draw = probsample_noreplace(arange(probcomb.shape[0]), 1, resize(weights/weights.sum(), (probcomb.shape[0],)))
                    non_res_sqft[idx_in_bldgs[draw]] += this_jobs_sqft_table[draw,ijob]
                    imputed_sqft += this_jobs_sqft_table[draw,ijob]
                    building_ids[job_index_non_home_based_unplaced[idx_in_jobs[ijob]]] = bldg_ids_in_bldgs[idx_in_bldgs[draw]]
                    new_jobs_sqft[job_index_non_home_based[idx_in_jobs[ijob]]] = int(min(self.maximum_sqft, max(round(this_jobs_sqft_table[draw,ijob]), 
                                                                                     self.minimum_sqft)))
                    
        building_dataset.modify_attribute(name="non_residential_sqft", data = non_res_sqft)
        job_dataset.modify_attribute(name="building_id", data = building_ids)
        job_dataset.modify_attribute(name="building_type", data = building_types)
        job_dataset.modify_attribute(name="sqft", data = new_jobs_sqft)
        
        old_nhb_size = job_index_non_home_based.size
        job_index_home_based = where(logical_and(is_considered, building_types == 1))[0]
        job_index_non_home_based = where(logical_and(is_considered, building_types == 2))[0]
        logger.log_status("%s non-home based jobs reclassified as home-based." % (old_nhb_size-job_index_non_home_based.size))
        logger.log_status("%s non-residential sqft imputed." % imputed_sqft)
        logger.log_status("Additionaly, %s non home based jobs were placed due to imputed sqft." % \
                                                (building_ids[job_index_non_home_based_unplaced]>0).sum())
        # home_based jobs
        unique_parcels = unique(parcel_ids[job_index_home_based])
        capacity_in_buildings = building_dataset.compute_variables([
                          "urbansim_parcel.building.vacant_home_based_job_space"],
                             dataset_pool=dataset_pool)
        parcels_with_exceeded_capacity = []
        # iterate over parcels
        logger.log_status("Placing home-based jobs ...")
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0]
            idx_in_jobs = where(parcel_ids[job_index_home_based] == parcel)[0]
            capacity = capacity_in_buildings[idx_in_bldgs]
            if capacity.sum() <= 0:
                continue
            probcomb = ones((idx_in_bldgs.size, idx_in_jobs.size))
            taken = zeros(capacity.shape, dtype="int32")
            while True:
                zero_cap = where((capacity - taken) <= 0)[0]
                probcomb[zero_cap,:] = 0
                if probcomb.sum() <= 0:
                    break
                req =  probcomb.sum(axis=0)
                wmaxi = where(req==req.max())[0]
                drawjob = sample_noreplace(arange(wmaxi.size), 1) # draw job from available jobs
                imax_req = wmaxi[drawjob]
                weights = probcomb[:,imax_req]
                # sample building
                draw = probsample_noreplace(arange(probcomb.shape[0]), 1, resize(weights/weights.sum(), (probcomb.shape[0],)))
                taken[draw] = taken[draw] + 1
                building_ids[job_index_home_based[idx_in_jobs[imax_req]]] = bldg_ids_in_bldgs[idx_in_bldgs[draw]]
                probcomb[:,imax_req] = 0
            if -1 in building_ids[job_index_home_based[idx_in_jobs]]:
                parcels_with_exceeded_capacity.append(parcel)
        parcels_with_exceeded_capacity = array(parcels_with_exceeded_capacity)    
        
        logger.log_status("%s home based jobs (out of %s hb jobs) were placed." % ((building_ids[job_index_home_based]>0).sum(),
                                                                         job_index_home_based.size))
        
        # assign building type where missing
        # determine regional distribution
        idx_home_based = where(building_types == 1)[0]
        idx_non_home_based = where(building_types == 2)[0]
        idx_bt_missing = where(building_types <= 0)[0]
        if idx_bt_missing.size > 0:
            # sample building types
            sample_bt = probsample_replace(array([1,2]), idx_bt_missing.size, 
               array([idx_home_based.size, idx_non_home_based.size])/float(idx_home_based.size + idx_non_home_based.size))
            # coerce to int32 (on a 64 bit machine, sample_bt will be of type int64)
            building_types[idx_bt_missing] = sample_bt.astype(int32)
            job_dataset.modify_attribute(name="building_type", data = building_types) 
        
        if out_storage is not None:
            job_dataset.write_dataset(out_table_name=jobs_table, out_storage=out_storage, attributes=AttributeType.PRIMARY)
            building_dataset.write_dataset(out_table_name='buildings', out_storage=out_storage, attributes=AttributeType.PRIMARY)
        logger.log_status("Assigning building_id to jobs done.")
    def _do_run_for_this_year(self, job_set):
        building_type = job_set.get_attribute("building_type")
        sectors = unique(
            self.control_totals_for_this_year.get_attribute("sector_id"))
        self._compute_sector_variables(sectors, job_set)
        for sector in sectors:
            isector = where(
                self.control_totals_for_this_year.get_attribute("sector_id") ==
                sector)[0]
            total_hb_jobs = self.control_totals_for_this_year.get_attribute(
                "total_home_based_employment")[isector]
            total_nhb_jobs = self.control_totals_for_this_year.get_attribute(
                "total_non_home_based_employment")[isector]
            is_in_sector_hb = job_set.get_attribute(
                "is_in_employment_sector_%s_home_based" % sector)
            is_in_sector_nhb = job_set.get_attribute(
                "is_in_employment_sector_%s_non_home_based" % sector)
            diff_hb = int(total_hb_jobs - is_in_sector_hb.astype(int8).sum())
            diff_nhb = int(total_nhb_jobs -
                           is_in_sector_nhb.astype(int8).sum())
            if diff_hb < 0:  # home based jobs to be removed
                w = where(is_in_sector_hb == 1)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(job_set, w, -1*diff_hb,
                                                         self.location_id_name)
                self.remove_jobs = concatenate(
                    (self.remove_jobs, non_placed,
                     sample_noreplace(sample_array,
                                      max(0,
                                          abs(diff_hb) - size_non_placed))))
            if diff_nhb < 0:  # non home based jobs to be removed
                w = where(is_in_sector_nhb == 1)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(job_set, w, -1*diff_nhb,
                                                         self.location_id_name)
                self.remove_jobs = concatenate(
                    (self.remove_jobs, non_placed,
                     sample_noreplace(sample_array,
                                      max(0,
                                          abs(diff_nhb) - size_non_placed))))

            if diff_hb > 0:  # home based jobs to be created
                self.new_jobs[self.location_id_name] = concatenate(
                    (self.new_jobs[self.location_id_name],
                     zeros(
                         (diff_hb, ),
                         dtype=self.new_jobs[self.location_id_name].dtype.type)
                     ))
                self.new_jobs["sector_id"] = concatenate(
                    (self.new_jobs["sector_id"], (resize(
                        array([sector],
                              dtype=self.new_jobs["sector_id"].dtype.type),
                        diff_hb))))
                if 1 in is_in_sector_hb:
                    building_type_distribution = array(
                        ndimage_sum(is_in_sector_hb,
                                    labels=building_type,
                                    index=self.available_building_types))
                elif 1 in job_set.get_attribute(
                        "is_home_based_job"
                ):  # take the building type distribution from the whole region
                    building_type_distribution = array(
                        ndimage_sum(job_set.get_attribute("is_home_based_job"),
                                    labels=building_type,
                                    index=self.available_building_types))
                else:  # there are no home-based jobs in the region, take uniform distribution
                    building_type_distribution = ones(
                        self.available_building_types.size)
                    building_type_distribution = building_type_distribution / building_type_distribution.sum(
                    )
                sampled_building_types = probsample_replace(
                    self.available_building_types, diff_hb,
                    building_type_distribution /
                    float(building_type_distribution.sum()))
                self.new_jobs["building_type"] = concatenate(
                    (self.new_jobs["building_type"],
                     sampled_building_types.astype(
                         self.new_jobs["building_type"].dtype.type)))
                new_max_id = self.max_id + diff_hb
                self.new_jobs[self.job_id_name] = concatenate(
                    (self.new_jobs[self.job_id_name],
                     arange(self.max_id + 1, new_max_id + 1)))
                self.max_id = new_max_id

            if diff_nhb > 0:  # non home based jobs to be created
                self.new_jobs[self.location_id_name] = concatenate(
                    (self.new_jobs[self.location_id_name],
                     zeros(
                         (diff_nhb, ),
                         dtype=self.new_jobs[self.location_id_name].dtype.type)
                     ))
                self.new_jobs["sector_id"] = concatenate(
                    (self.new_jobs["sector_id"], (resize(
                        array([sector],
                              dtype=self.new_jobs["sector_id"].dtype.type),
                        diff_nhb))))
                if 1 in is_in_sector_nhb:
                    building_type_distribution = array(
                        ndimage_sum(is_in_sector_nhb,
                                    labels=building_type,
                                    index=self.available_building_types))
                elif 1 in job_set.get_attribute(
                        "is_non_home_based_job"
                ):  # take the building type distribution from the whole region
                    building_type_distribution = array(
                        ndimage_sum(
                            job_set.get_attribute("is_non_home_based_job"),
                            labels=building_type,
                            index=self.available_building_types))
                else:  # there are no non-home-based jobs in the region, take uniform distribution
                    building_type_distribution = ones(
                        self.available_building_types.size)
                    building_type_distribution = building_type_distribution / building_type_distribution.sum(
                    )
                sampled_building_types = probsample_replace(
                    self.available_building_types, diff_nhb,
                    building_type_distribution /
                    float(building_type_distribution.sum()))
                self.new_jobs["building_type"] = concatenate(
                    (self.new_jobs["building_type"],
                     sampled_building_types.astype(
                         self.new_jobs["building_type"].dtype.type)))
                new_max_id = self.max_id + diff_nhb
                self.new_jobs[self.job_id_name] = concatenate(
                    (self.new_jobs[self.job_id_name],
                     arange(self.max_id + 1, new_max_id + 1)))
                self.max_id = new_max_id
    def run(self,
            vacancy_table,
            frequency_table,
            template_table,
            year,
            location_set,
            resources=None):
        self.pre_check(location_set, vacancy_table, [])
        target_residential_vacancy_rate = vacancy_table.get_data_element_by_id(
            year).target_total_residential_vacancy
        target_non_residential_vacancy_rate = vacancy_table.get_data_element_by_id(
            year).target_total_non_residential_vacancy
        compute_resources = Resources(resources)
        #        compute_resources.merge({"household":household_set, "job":job_set, "debug":self.debug})
        location_set.compute_variables([
            "urbansim.gridcell.vacant_residential_units",
            "urbansim.gridcell.vacant_commercial_sqft",
            "urbansim.gridcell.vacant_industrial_sqft"
        ],
                                       resources=compute_resources)

        # determine current-year vacancy rates
        vacant_resunits_sum = location_set.get_attribute(
            "vacant_residential_units").sum()
        resunits_sum = float(
            location_set.get_attribute("residential_units").sum())
        vacant_residential_rate = self.safe_divide(vacant_resunits_sum,
                                                   resunits_sum)

        vacant_commercial_sqft_sum = location_set.get_attribute(
            "vacant_commercial_sqft").sum()
        commercial_sqft_sum = float(
            location_set.get_attribute("commercial_sqft").sum())
        vacant_commercial_rate = self.safe_divide(vacant_commercial_sqft_sum,
                                                  commercial_sqft_sum)

        vacant_industrial_sqft_sum = location_set.get_attribute(
            "vacant_industrial_sqft").sum()
        industrial_sqft_sum = float(
            location_set.get_attribute("industrial_sqft").sum())
        vacant_industrial_rate = self.safe_divide(vacant_industrial_sqft_sum,
                                                  industrial_sqft_sum)

        logger.log_status(
            "Res: vacant res units: %d, should be vacant: %f, sum res units: %d"
            % (vacant_resunits_sum,
               target_residential_vacancy_rate * resunits_sum, resunits_sum))
        logger.log_status(
            "Com: vacant sqft: %d, should be vacant: %f, sum sqft: %d" %
            (vacant_commercial_sqft_sum, target_non_residential_vacancy_rate *
             commercial_sqft_sum, commercial_sqft_sum))
        logger.log_status(
            "Ind: vacant sqft: %d, should be vacant: %f, sum sqft: %d" %
            (vacant_industrial_sqft_sum, target_non_residential_vacancy_rate *
             industrial_sqft_sum, industrial_sqft_sum))

        should_develop_resunits = max(
            0, (target_residential_vacancy_rate * resunits_sum -
                vacant_resunits_sum) / (1 - target_residential_vacancy_rate))
        if not should_develop_resunits:
            logger.log_note((
                "Will not build any residential units, because the current residential vacancy of %d units\n"
                +
                "is more than the %d units desired for the vacancy rate of %f."
            ) % (vacant_resunits_sum, target_residential_vacancy_rate *
                 resunits_sum, target_residential_vacancy_rate))
        should_develop_commercial = max(
            0, (target_non_residential_vacancy_rate * commercial_sqft_sum -
                vacant_commercial_sqft_sum) /
            (1 - target_non_residential_vacancy_rate))
        if not should_develop_commercial:
            logger.log_note((
                "Will not build any commercial sqft, because the current commercial vacancy of %d sqft\n"
                +
                "is more than the %d sqft desired for the vacancy rate of %f."
            ) % (vacant_commercial_sqft_sum,
                 target_non_residential_vacancy_rate * commercial_sqft_sum,
                 target_non_residential_vacancy_rate))
        should_develop_industrial = max(
            0, (target_non_residential_vacancy_rate * industrial_sqft_sum -
                vacant_industrial_sqft_sum) /
            (1 - target_non_residential_vacancy_rate))
        if not should_develop_industrial:
            logger.log_note((
                "Will not build any industrial sqft, because the current industrial vacancy of %d sqft\n"
                +
                "is more than the %d sqft desired for the vacancy rate of %f."
            ) % (vacant_industrial_sqft_sum,
                 target_non_residential_vacancy_rate * industrial_sqft_sum,
                 target_non_residential_vacancy_rate))

#        projects = {}
#        should_develop = {"residential":should_develop_resunits,
#                          "commercial":should_develop_commercial,
#                          "industrial":should_develop_industrial}

#        average_improvement_value = {}
#        average_improvement_value["residential"] = self.safe_divide(
#            location_set.get_attribute("residential_improvement_value" ).sum(), resunits_sum)
#        average_improvement_value["commercial"] = self.safe_divide(
#            location_set.get_attribute("commercial_improvement_value" ).sum(), commercial_sqft_sum)
#        average_improvement_value["industrial"] = self.safe_divide(
#            location_set.get_attribute("industrial_improvement_value" ).sum(), industrial_sqft_sum)

#create projects

        development_type_ids = []
        units = []
        com_sqfts = []
        ind_sqfts = []
        gov_sqfts = []
        while should_develop_resunits > 0 or should_develop_commercial > 0 or should_develop_industrial > 0:
            n = 1  # sample n developments at a time
            sampled_ids = probsample_replace(
                frequency_table.get_attribute('development_type_id'), n,
                frequency_table.get_attribute('frequency').astype(float32) /
                frequency_table.get_attribute('frequency').sum())
            for id in sampled_ids:
                index = where(
                    template_table.get_attribute('development_type_id') ==
                    id)[0]
                res_unit = template_table.get_attribute_by_index(
                    'residential_units', index)
                com_sqft = template_table.get_attribute_by_index(
                    'commercial_sqft', index)
                ind_sqft = template_table.get_attribute_by_index(
                    'industrial_sqft', index)
                gov_sqft = template_table.get_attribute_by_index(
                    'governmental_sqft', index)

                should_develop_resunits -= res_unit[0]
                should_develop_commercial -= com_sqft[0]
                should_develop_industrial -= ind_sqft[0]

                development_type_ids.append(id)
                units.append(res_unit)
                com_sqfts.append(com_sqft)
                ind_sqfts.append(ind_sqft)
                gov_sqfts.append(gov_sqft)

        sizes = len(development_type_ids)
        if sizes > 0:
            storage = StorageFactory().get_storage('dict_storage')

            developments_table_name = 'developments'
            storage.write_table(
                table_name=developments_table_name,
                table_data={
                    "landuse_development_id": arange(sizes),
                    "grid_id": -1 * ones((sizes, ), dtype=int32),
                    "development_type_id": array(development_type_ids),
                    "residential_units": array(units),
                    "commercial_sqft": array(com_sqfts),
                    "industrial_sqft": array(ind_sqfts),
                    "governmental_sqft": array(gov_sqfts),
                    "improvement_value": zeros((sizes, ), dtype="int32"),
                },
            )

            developments = LandUseDevelopmentDataset(
                in_storage=storage,
                in_table_name=developments_table_name,
            )

        else:
            developments = None

        return developments
    def _do_run(self,
                location_set,
                agent_set,
                agents_index,
                data_objects=None,
                resources=None):
        location_id_name = location_set.get_id_name()[0]
        jobsubset = DatasetSubset(agent_set, agents_index)
        if jobsubset.size() <= 0:
            return array([], dtype='int32')
        #unplace jobs
        agent_set.set_values_of_one_attribute(
            location_id_name, resize(array([-1.0]), jobsubset.size()),
            agents_index)
        sector_ids = jobsubset.get_attribute("sector_id")
        sectors = unique(sector_ids)
        counts = ndimage_sum(ones((jobsubset.size(), )),
                             labels=sector_ids.astype('int32'),
                             index=sectors.astype('int32'))
        if sectors.size <= 1:
            counts = array([counts])
        variables = map(lambda x: "number_of_jobs_of_sector_" + str(int(x)),
                        sectors)
        compute_variables = map(
            lambda var: self.variable_package + "." + location_set.
            get_dataset_name() + "." + var, variables)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.dataset_pool.add_datasets_if_not_included(
            {agent_set.get_dataset_name(): agent_set})
        location_set.compute_variables(compute_variables,
                                       dataset_pool=self.dataset_pool)
        if self.filter is None:
            location_index = arange(location_set.size())
        else:
            filter_values = location_set.compute_variables(
                [self.filter], dataset_pool=self.dataset_pool)
            location_index = where(filter_values > 0)[0]
        if location_index.size <= 0:
            logger.log_status("No locations available. Nothing to be done.")
            return array([])
        location_subset = DatasetSubset(location_set, location_index)
        i = 0
        for sector in sectors:
            distr = location_subset.get_attribute(variables[i])
            if ma.allclose(distr.sum(), 0):
                uniform_prob = 1.0 / distr.size
                distr = resize(array([uniform_prob], dtype='float64'),
                               distr.size)
                logger.log_warning(
                    "Probabilities in scaling model for sector " +
                    str(sector) +
                    " sum to 0.0.  Substituting uniform distribution!")


#                random_sample = sample(location_set.get_attribute("grid_id"), k=int(counts[i]), \
#                                   probabilities = distr)
            distr = distr / float(distr.sum())
            random_sample = probsample_replace(
                location_subset.get_id_attribute(),
                size=int(counts[i]),
                prob_array=distr)
            idx = where(sector_ids == sector)[0]
            #modify job locations
            agent_set.set_values_of_one_attribute(location_id_name,
                                                  random_sample,
                                                  agents_index[idx])
            i += 1
        return agent_set.get_attribute_by_index(location_id_name, agents_index)
Example #15
0
    def run(self, job_dataset, dataset_pool, out_storage=None, jobs_table="jobs"):
        """
        Algorithm:
            1. For all non_home_based jobs that have parcel_id assigned but no building_id, try
                to choose a building from all buildings in that parcel. Draw the building with probabilities
                given by the sector-building_type distribution. The job sizes are
                fitted into the available space (the attribute job.sqft is updated).
            2. For all non_home_based jobs for which no building was found in step 1, check
                if the parcel has residential buildings. In such a case, re-assign the jobs to be
                home-based.
                Otherwise, if sum of non_residential_sqft over the involved buildings is 0,
                for all jobs that have impute_building_sqft_flag=True draw a building using
                the sector-building_type distribution and impute the corresponding sqft to 
                the non_residential_sqft of that building.
            3. For all home_based jobs that have parcel_id assigned but no building_id, try
                to choose a building from all buildings in that parcel. 
                The capacity of a single-family building is determined from sizes of the households living there 
                (for each household the minimum of number of members and 2 is taken). 
                For multi-family buildings the capacity is 50.
            4. Assign a building type to jobs that have missing building type. It is sampled 
                from the regional-wide distribution of home based and non-home based jobs.
            5. Update the table 'building_sqft_per_job' using the updated job.sqft.
        'in_storage' should contain the jobs table and the zone_averages_table. The 'dataset_pool_storage'
        should contain all other tables needed (buildings, households, building_types). 
        """
        parcel_ids = job_dataset.get_attribute("parcel_id")
        building_ids = job_dataset.get_attribute("building_id")
        home_base_status = job_dataset.get_attribute("home_based_status")
        sectors = job_dataset.get_attribute("sector_id")
        
        is_considered = logical_and(parcel_ids > 0, building_ids <= 0) # jobs that have assigned parcel but not building
        job_index_home_based = where(logical_and(is_considered, home_base_status == 0))[0]
        is_governmental_job = sectors == 18
        is_edu_job = sectors == 19
        job_index_governmental = where(logical_and(is_considered, is_governmental_job))[0]
        job_index_edu = where(logical_and(is_considered, is_edu_job))[0]
        
        building_dataset = dataset_pool.get_dataset('building')
        parcel_ids_in_bldgs = building_dataset.get_attribute("parcel_id")
        bldg_ids_in_bldgs = building_dataset.get_id_attribute()
        bldg_types_in_bldgs = building_dataset.get_attribute("building_type_id")
        
        non_res_sqft = building_dataset.get_attribute("non_residential_sqft")

        preferred_nhb_btypes =   (building_dataset['building.building_type_id'] == 3) + \
                                (building_dataset['building.building_type_id'] == 8) + \
                                (building_dataset['building.building_type_id'] == 13) + \
                                (building_dataset['building.building_type_id'] == 20) + \
                                (building_dataset['building.building_type_id'] == 21)
        non_res_sqft_preferred =  non_res_sqft  * preferred_nhb_btypes              
                                          
        is_governmental = building_dataset.compute_variables([
            "numpy.logical_and(building.disaggregate(building_type.generic_building_type_id == 7), building.building_type_id <> 18)"],
                                                                     dataset_pool=dataset_pool)
        idx_gov = where(is_governmental)[0]
        is_edu = building_dataset['building.building_type_id'] == 18
        idx_edu = where(is_edu)[0]
        
        bldgs_is_residential = logical_and(logical_not(logical_or(is_governmental, is_edu)), 
                                           building_dataset.compute_variables(["urbansim_parcel.building.is_residential"], 
                                                           dataset_pool=dataset_pool))
        
        bldgs_isnot_residential = logical_not(bldgs_is_residential)
        
        # assign buildings to educational jobs randomly
        unique_parcels = unique(parcel_ids[job_index_edu])
        logger.log_status("Placing educational jobs ...")
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs[idx_edu] == parcel)[0]
            if idx_in_bldgs.size <= 0:
                continue
            idx_in_jobs = where(parcel_ids[job_index_edu] == parcel)[0]
            draw = sample_replace(idx_in_bldgs, idx_in_jobs.size)
            building_ids[job_index_edu[idx_in_jobs]] = bldg_ids_in_bldgs[idx_edu[draw]]
        logger.log_status("%s educational jobs (out of %s edu. jobs) were placed." % (
                                        (building_ids[job_index_edu]>0).sum(), job_index_edu.size))
        
        # assign buildings to governmental jobs randomly
        unique_parcels = unique(parcel_ids[job_index_governmental])
        logger.log_status("Placing governmental jobs ...")
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs[idx_gov] == parcel)[0]
            if idx_in_bldgs.size <= 0:
                continue
            idx_in_jobs = where(parcel_ids[job_index_governmental] == parcel)[0]
            draw = sample_replace(idx_in_bldgs, idx_in_jobs.size)
            building_ids[job_index_governmental[idx_in_jobs]] = bldg_ids_in_bldgs[idx_gov[draw]]
        logger.log_status("%s governmental jobs (out of %s gov. jobs) were placed." % (
                        (building_ids[job_index_governmental]>0).sum(), job_index_governmental.size))
        logger.log_status("The unplaced governmental jobs will be added to the non-home based jobs.")
        
        #tmp = unique(parcel_ids[job_index_governmental][building_ids[job_index_governmental]<=0])
        #output_dir =  "/Users/hana"
        #write_to_text_file(os.path.join(output_dir, 'parcels_with_no_gov_bldg.txt'), tmp, delimiter='\n')
        
        # consider the unplaced governmental jobs together with other non-home-based jobs
        is_now_considered = logical_and(is_considered, building_ids <= 0)
        job_index_non_home_based = where(logical_and(is_now_considered, logical_or(home_base_status == 0, is_governmental_job)))[0]
                                    
        # assign buildings to non_home_based jobs based on available space
        unique_parcels = unique(parcel_ids[job_index_non_home_based])
        # iterate over parcels
        logger.log_status("Placing non-home-based jobs ...")
        nhb_not_placed = 0
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0]
            if idx_in_bldgs.size <= 0:
                continue
            idx_in_jobs = where(parcel_ids[job_index_non_home_based] == parcel)[0]
            # sample proportionally to the building size
            weights = non_res_sqft_preferred[idx_in_bldgs] # 1.preference: preferred building types with non-res sqft 
            if weights.sum() <= 0:
                weights = preferred_nhb_btypes[idx_in_bldgs] # 2.preference: preferred building types
                if weights.sum() <= 0:
                    weights = non_res_sqft[idx_in_bldgs] # 3.preference: any building with non-res sqft 
                    if weights.sum() <= 0: 
                        weights = bldgs_isnot_residential[idx_in_bldgs] # 4.preference: any non-res building
                        if weights.sum() <= 0: 
                            nhb_not_placed = nhb_not_placed + idx_in_jobs.size
                            continue
            draw = probsample_replace(idx_in_bldgs, idx_in_jobs.size, weights/float(weights.sum()))
            building_ids[job_index_non_home_based[idx_in_jobs]] = bldg_ids_in_bldgs[draw]
            
        logger.log_status("%s non home based jobs (out of %s nhb jobs) were placed. No capacity in buildings for %s jobs." % (
                                                                (building_ids[job_index_non_home_based]>0).sum(),
                                                                 job_index_non_home_based.size, nhb_not_placed))
        
        job_dataset.modify_attribute(name="building_id", data = building_ids)
        
        # re-classify unplaced non-home based jobs to home-based if parcels contain residential buildings

        is_now_considered = logical_and(parcel_ids > 0, building_ids <= 0)
        job_index_non_home_based_unplaced = where(logical_and(is_now_considered, 
                                               logical_and(home_base_status == 0, logical_not(is_governmental_job))))[0]
        unique_parcels = unique(parcel_ids[job_index_non_home_based_unplaced])

        logger.log_status("Try to reclassify non-home-based jobs (excluding governmental jobs) ...")
        nhb_reclass = 0
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0]
            if idx_in_bldgs.size <= 0:
                continue
            idx_in_jobs = where(parcel_ids[job_index_non_home_based_unplaced] == parcel)[0]
            where_residential = where(bldgs_is_residential[idx_in_bldgs])[0]
            if where_residential.size > 0:
                #home_base_status[job_index_non_home_based_unplaced[idx_in_jobs]] = 1 # set to home-based jobs
                nhb_reclass = nhb_reclass + idx_in_jobs.size
            else:
                draw = sample_replace(idx_in_bldgs, idx_in_jobs.size)
                #building_ids[job_index_non_home_based_unplaced[idx_in_jobs]] = bldg_ids_in_bldgs[draw]

        #job_dataset.modify_attribute(name="home_base_status", data = home_base_status)
        #job_dataset.modify_attribute(name="building_id", data = building_ids)
        
        job_index_home_based = where(logical_and(is_considered, home_base_status == 1))[0]
        logger.log_status("%s non-home based jobs reclassified as home-based." % nhb_reclass)

        # home_based jobs
        unique_parcels = unique(parcel_ids[job_index_home_based])
        capacity_in_buildings = building_dataset.compute_variables([
                          "clip_to_zero(urbansim_parcel.building.total_home_based_job_space-building.aggregate(job.home_based_status==1))"],
                             dataset_pool=dataset_pool)
        parcels_with_exceeded_capacity = []
        # iterate over parcels
        logger.log_status("Placing home-based jobs ...")
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0]
            idx_in_jobs = where(parcel_ids[job_index_home_based] == parcel)[0]
            capacity = capacity_in_buildings[idx_in_bldgs]
            if capacity.sum() <= 0:
                continue
            probcomb = ones((idx_in_bldgs.size, idx_in_jobs.size))
            taken = zeros(capacity.shape, dtype="int32")
            while True:
                zero_cap = where((capacity - taken) <= 0)[0]
                probcomb[zero_cap,:] = 0
                if probcomb.sum() <= 0:
                    break
                req =  probcomb.sum(axis=0)
                wmaxi = where(req==req.max())[0]
                drawjob = sample_noreplace(arange(wmaxi.size), 1) # draw job from available jobs
                imax_req = wmaxi[drawjob]
                weights = probcomb[:,imax_req]
                # sample building
                draw = probsample_noreplace(arange(probcomb.shape[0]), 1, resize(weights/weights.sum(), (probcomb.shape[0],)))
                taken[draw] = taken[draw] + 1
                building_ids[job_index_home_based[idx_in_jobs[imax_req]]] = bldg_ids_in_bldgs[idx_in_bldgs[draw]]
                probcomb[:,imax_req] = 0
            if -1 in building_ids[job_index_home_based[idx_in_jobs]]:
                parcels_with_exceeded_capacity.append(parcel)
        parcels_with_exceeded_capacity = array(parcels_with_exceeded_capacity)    
        
        logger.log_status("%s home based jobs (out of %s hb jobs) were placed." % ((building_ids[job_index_home_based]>0).sum(),
                                                                         job_index_home_based.size))
        
        # assign building type where missing
        # determine regional distribution
        idx_home_based = where(home_base_status == 1)[0]
        idx_non_home_based = where(home_base_status == 0)[0]
        idx_bt_missing = where(home_base_status <= 0)[0]
        if idx_bt_missing.size > 0:
            # sample building types
            sample_bt = probsample_replace(array([1,0]), idx_bt_missing.size, 
               array([idx_home_based.size, idx_non_home_based.size])/float(idx_home_based.size + idx_non_home_based.size))
            # coerce to int32 (on a 64 bit machine, sample_bt will be of type int64)
            home_base_status[idx_bt_missing] = sample_bt.astype(int32)
            job_dataset.modify_attribute(name="home_based_status", data = home_base_status) 
        
        if out_storage is not None:
            job_dataset.write_dataset(out_table_name=jobs_table, out_storage=out_storage, attributes=AttributeType.PRIMARY)
        logger.log_status("Assigning building_id to jobs done.")