def compute(self, dataset_pool):
     zone_set = self.get_dataset()
     travel_data = dataset_pool.get_dataset('travel_data')
     from_zone_id = travel_data.get_attribute('from_zone_id')
     zone_ids = zone_set.get_attribute('zone_id')
     time = travel_data.get_attribute(self.time_attribute_name)
     trips = travel_data.get_attribute(self.trips_attribute_name)
     
     numerator = array(ndimage_sum(time * trips,
                                    labels = from_zone_id, index=zone_ids))
     denominator = array(ndimage_sum(trips,
                                      labels = from_zone_id, index=zone_ids), dtype=float32)
     
     # if there is a divide by zero then subsititute the values from the zone one below that one
     # if there are contigious places of zero division the values should propigate upon iteration
     no_trips_from_here = where(denominator == 0)[0]
     while no_trips_from_here.size != 0:
         if no_trips_from_here.size == denominator.size:
             logger.log_warning("%s attribute of travel_data is all zeros; %s returns all zeros" % (self.trips_attribute_name, 
                                                                                                    self.name()
                                                                                                    ))
             break
              
         substitute_locations = no_trips_from_here - 1    # a mapping, what zone the new data will come from
         if substitute_locations[0] < 0: substitute_locations[0] = 1
         numerator[no_trips_from_here] = numerator[substitute_locations]
         denominator[no_trips_from_here] = denominator[substitute_locations] 
         no_trips_from_here = where(denominator == 0)[0]
         
     return safe_array_divide(numerator, denominator)
Esempio n. 2
0
 def compute(self, dataset_pool):
     zone_set = self.get_dataset()
     travel_data = dataset_pool.get_dataset('travel_data')
     logsum_hbw_am_income = travel_data.get_attribute(self.logsum_hbw_am_income)
     trips_hbw_am_income = travel_data.get_attribute(self.trips_hbw_am_income)
     from_zone_id = travel_data.get_attribute('from_zone_id')
     zone_ids = zone_set.get_attribute('zone_id')
     
     numerator = array(ndimage_sum(travel_data.get_attribute(self.trips_hbw_am_income) *
                              travel_data.get_attribute(self.logsum_hbw_am_income),
                                 labels = from_zone_id, index=zone_ids))
     denominator = array(ndimage_sum(travel_data.get_attribute(self.trips_hbw_am_income),
                                 labels = from_zone_id, index=zone_ids), dtype=float32)
     
     # if there is a divide by zero then subsititute the values from the zone one below that one
     # if there are contigious places of zero division the values should propigate upon iteration
     no_trips_from_here = where(denominator == 0)[0]
     while no_trips_from_here.size != 0:
         substitute_locations = no_trips_from_here - 1    # a mapping, what zone the new data will come from
         if substitute_locations[0] < 0: substitute_locations[0] = 1
         numerator[no_trips_from_here] = numerator[substitute_locations]
         denominator[no_trips_from_here] = denominator[substitute_locations] 
         no_trips_from_here = where(denominator == 0)[0]
         
     return numerator / denominator
    def compute(self, dataset_pool):
        zone_set = self.get_dataset()
        travel_data = dataset_pool.get_dataset("travel_data")
        logsum_hbw_am_income = travel_data.get_attribute(self.logsum_hbw_am_income)
        trips_hbw_am_income = travel_data.get_attribute(self.trips_hbw_am_income)
        from_zone_id = travel_data.get_attribute("from_zone_id")
        zone_ids = zone_set.get_attribute("zone_id")

        numerator = array(
            ndimage_sum(
                travel_data.get_attribute(self.trips_hbw_am_income)
                * travel_data.get_attribute(self.logsum_hbw_am_income),
                labels=from_zone_id,
                index=zone_ids,
            )
        )
        denominator = array(
            ndimage_sum(travel_data.get_attribute(self.trips_hbw_am_income), labels=from_zone_id, index=zone_ids),
            dtype=float32,
        )

        # if there is a divide by zero then subsititute the values from the zone one below that one
        # if there are contigious places of zero division the values should propigate upon iteration
        no_trips_from_here = where(denominator == 0)[0]
        while no_trips_from_here.size != 0:
            substitute_locations = no_trips_from_here - 1  # a mapping, what zone the new data will come from
            if substitute_locations[0] < 0:
                substitute_locations[0] = 1
            numerator[no_trips_from_here] = numerator[substitute_locations]
            denominator[no_trips_from_here] = denominator[substitute_locations]
            no_trips_from_here = where(denominator == 0)[0]

        return numerator / denominator
 def get_average_omega(self, omega, probability, index, nsupply, demand):
     omega_prob = omega[:, newaxis]*probability
     omega_prob_sum_over_i = array(ndimage_sum(omega_prob, labels=index+1, index=arange(nsupply)+1))
     prob_sum_over_i = array(ndimage_sum(probability, labels=index+1, index=arange(nsupply)+1))
     average_omega = ma.filled(omega_prob_sum_over_i/
                   ma.masked_where(prob_sum_over_i==0, prob_sum_over_i), 0.0)
     return average_omega
Esempio n. 5
0
 def get_average_omega(self, omega, probability, index, nsupply, demand):
     omega_prob = omega[:, newaxis] * probability
     omega_prob_sum_over_i = array(
         ndimage_sum(omega_prob,
                     labels=index + 1,
                     index=arange(nsupply) + 1))
     prob_sum_over_i = array(
         ndimage_sum(probability,
                     labels=index + 1,
                     index=arange(nsupply) + 1))
     average_omega = ma.filled(
         omega_prob_sum_over_i /
         ma.masked_where(prob_sum_over_i == 0, prob_sum_over_i), 0.0)
     return average_omega
    def compute(self, dataset_pool):
        zone_set = self.get_dataset()
        zone_ids = zone_set.get_attribute('zone_id')
        
        numerator = zeros(len(zone_ids)).astype(float32)
        for mode in self.numerator_modes:
            numerator += ndimage_sum(zone_set.get_attribute(mode), labels = zone_ids, index=zone_ids)
        denominator = zeros(len(zone_ids)).astype(float32)
        for mode in self.denominator_modes:
            denominator += ndimage_sum(zone_set.get_attribute(mode), labels = zone_ids, index=zone_ids)
                
        """if there is a divide by zero then replace with 1"""
        denominator[where(denominator == 0)] = 1

        return numerator / denominator
Esempio n. 7
0
 def get_choice_histogram(self, units_to_occupy, choices, nchoices):
     """Counts the number of agents that decided for each choice.
     """
     return array(
         ndimage_sum(ones((choices.size, )),
                     labels=choices + 1,
                     index=arange(nchoices) + 1))
Esempio n. 8
0
 def get_demand(self, index, probability, nsupply):
     flat_index = index.ravel()
     l = flat_index + 1
     demand = array(
         ndimage_sum(probability.ravel(),
                     labels=l,
                     index=arange(nsupply) + 1))
     return demand
Esempio n. 9
0
    def compute(self, dataset_pool):
        zone_set = self.get_dataset()
        zone_ids = zone_set.get_attribute('zone_id')

        numerator = zeros(len(zone_ids)).astype(float32)
        for mode in self.numerator_modes:
            numerator += ndimage_sum(zone_set.get_attribute(mode),
                                     labels=zone_ids,
                                     index=zone_ids)
        denominator = zeros(len(zone_ids)).astype(float32)
        for mode in self.denominator_modes:
            denominator += ndimage_sum(zone_set.get_attribute(mode),
                                       labels=zone_ids,
                                       index=zone_ids)
        """if there is a divide by zero then replace with 1"""
        denominator[where(denominator == 0)] = 1

        return numerator / denominator
Esempio n. 10
0
 def get_choice_histogram(self, units_to_occupy, choices, nchoices):
     """Return a histogram of agent choices, where each agents occupy number of units given
     in 'units_to_occupy'. 'choices' are the agent choices of a location (as an index).
     'nchoices' is a number of unique values for possible choices.
     """
     return array(
         ndimage_sum(units_to_occupy,
                     labels=choices + 1,
                     index=arange(nchoices) + 1))
 def compute(self, dataset_pool):
     zone_set = self.get_dataset()
     travel_data = dataset_pool.get_dataset('travel_data')
     from_zone_id = travel_data.get_attribute('from_zone_id')
     zone_ids = zone_set.get_attribute('zone_id')
     
     results = zeros(len(zone_ids)).astype(float32)
     for matrix in self.matrices:
         results += array(ndimage_sum(travel_data.get_attribute(matrix), labels = from_zone_id, index=zone_ids))
     return results
    def get_average_omega(self, omega, probability, index, nsupply, nobs,
                          demand):
        omega_prob = ma.filled(ma.resize(omega, (nobs, 1)) * probability, 0.0)
        average_omega_nom = array(
            ndimage_sum(omega_prob,
                        labels=index + 1,
                        index=arange(nsupply) + 1))

        average_omega = ma.filled(
            average_omega_nom / ma.masked_where(demand == 0, demand), 0.0)
        return average_omega
Esempio n. 13
0
    def compute(self,  dataset_pool):
        zone_ids = self.get_dataset().get_id_attribute()
        travel_data = dataset_pool.get_dataset("travel_data")
        time = power(travel_data.get_attribute(self.mode), 2)
        
        to_zone_id = travel_data.get_attribute("to_zone_id")
        zone_index = self.get_dataset().get_id_index(to_zone_id)
        num_jobs = self.get_dataset().get_attribute('employment')[zone_index]

        from_zone_id = travel_data.get_attribute("from_zone_id")        
        results = array(ndimage_sum(num_jobs / time.astype(float32), labels = from_zone_id, index=zone_ids))
        
        return results
Esempio n. 14
0
    def compute(self, dataset_pool):
        zone_set = self.get_dataset()
        travel_data = dataset_pool.get_dataset('travel_data')
        from_zone_id = travel_data.get_attribute('from_zone_id')
        zone_ids = zone_set.get_attribute('zone_id')

        results = zeros(len(zone_ids)).astype(float32)
        for matrix in self.matrices:
            results += array(
                ndimage_sum(travel_data.get_attribute(matrix),
                            labels=from_zone_id,
                            index=zone_ids))
        return results
 def compute(self, dataset_pool):
     zone_set = self.get_dataset()
     travel_data = dataset_pool.get_dataset('travel_data')
     logsum_hbw_am_income = travel_data.get_attribute(self.logsum_hbw_am_income)
     from_zone_id = travel_data.get_attribute('from_zone_id')
     to_zone_id = travel_data.get_attribute('to_zone_id')
     td_attr_by_idx = travel_data.get_attribute_by_index
     zone_ids = zone_set.get_attribute('zone_id')
     njobs = zone_set.get_attribute_by_id(self.number_of_jobs, to_zone_id)
     sums_by_from_zone = ndimage_sum(
         njobs * exp(travel_data.get_attribute(self.logsum_hbw_am_income)), 
         labels=from_zone_id, index=zone_ids)
     return log(array(sums_by_from_zone))
    def compute(self, dataset_pool):
        """
        zone_ids = zone_set.get_attribute('zone_id')
        am_VMT = travel_data.get_attribute(self._am_VMT_attr)
        md_VMT = travel_data.get_attribute(self._md_VMT_attr)
        pm_ev_ni_VMT = travel_data.get_attribute(self._pm_ev_ni_VMT_attr)
        """

        zone_ids = self.get_dataset().get_id_attribute()
        travel_data = dataset_pool.get_dataset('travel_data')

        from_zone_id = travel_data.get_attribute("from_zone_id")

        am_VMT_attr = travel_data.get_attribute(self._am_VMT_attr)
        md_VMT_attr = travel_data.get_attribute(self._md_VMT_attr)
        pm_ev_ni_VMT_attr = travel_data.get_attribute(self._pm_ev_ni_VMT_attr)

        results =   array(ndimage_sum(am_VMT_attr, labels = from_zone_id, index=zone_ids)) + \
                    array(ndimage_sum(md_VMT_attr, labels = from_zone_id, index=zone_ids)) + \
                    array(ndimage_sum(pm_ev_ni_VMT_attr, labels = from_zone_id, index=zone_ids))

        return results
Esempio n. 17
0
    def compute(self, dataset_pool):
        zone_ids = self.get_dataset().get_id_attribute()
        travel_data = dataset_pool.get_dataset('travel_data')
        time = power(travel_data.get_attribute('am_total_transit_time_walk'), 2)
        
        from_zone_id = travel_data.get_attribute("from_zone_id")
        zone_index = self.get_dataset().get_id_index(from_zone_id)
        population = self.get_dataset().get_attribute('population')[zone_index]

        to_zone_id = travel_data.get_attribute("to_zone_id")        
        results = array(ndimage_sum(population / time.astype(float32), labels = to_zone_id, index=zone_ids))
        
        return results
 def compute(self, dataset_pool):
     zone_set = self.get_dataset()
     travel_data = dataset_pool.get_dataset("travel_data")
     logsum_hbw_am_income = travel_data.get_attribute(self.logsum_hbw_am_income)
     from_zone_id = travel_data.get_attribute("from_zone_id")
     to_zone_id = travel_data.get_attribute("to_zone_id")
     td_attr_by_idx = travel_data.get_attribute_by_index
     zone_ids = zone_set.get_attribute("zone_id")
     nhouseholds = zone_set.get_attribute_by_id(self.number_of_households, from_zone_id)
     sums_by_from_zone = ndimage_sum(
         nhouseholds * exp(travel_data.get_attribute(self.logsum_hbw_am_income)), labels=to_zone_id, index=zone_ids
     )
     return array(sums_by_from_zone)
 def compute(self, dataset_pool):
     """
     zone_ids = zone_set.get_attribute('zone_id')
     am_VMT = travel_data.get_attribute(self._am_VMT_attr)
     md_VMT = travel_data.get_attribute(self._md_VMT_attr)
     pm_ev_ni_VMT = travel_data.get_attribute(self._pm_ev_ni_VMT_attr)
     """
     
     zone_ids = self.get_dataset().get_id_attribute()
     travel_data = dataset_pool.get_dataset('travel_data')
     
     from_zone_id = travel_data.get_attribute("from_zone_id")       
     
     am_VMT_attr = travel_data.get_attribute(self._am_VMT_attr)  
     md_VMT_attr = travel_data.get_attribute(self._md_VMT_attr)  
     pm_ev_ni_VMT_attr = travel_data.get_attribute(self._pm_ev_ni_VMT_attr)  
     
     results =   array(ndimage_sum(am_VMT_attr, labels = from_zone_id, index=zone_ids)) + \
                 array(ndimage_sum(md_VMT_attr, labels = from_zone_id, index=zone_ids)) + \
                 array(ndimage_sum(pm_ev_ni_VMT_attr, labels = from_zone_id, index=zone_ids))        
     
     return results
    def compute(self, dataset_pool):
        zone_ids = self.get_dataset().get_id_attribute()
        travel_data = dataset_pool.get_dataset('travel_data')
        time = power(travel_data.get_attribute('am_total_transit_time_walk'), 2)
        
        to_zone_id = travel_data.get_attribute("to_zone_id")
        zone_index = self.get_dataset().get_id_index(to_zone_id)
        num_jobs = self.get_dataset().get_attribute('number_of_jobs')[zone_index]

        from_zone_id = travel_data.get_attribute("from_zone_id")        
        results = array(ndimage_sum(num_jobs / time.astype(float32), labels = from_zone_id, index=zone_ids))
        
        return results
    def compute(self, dataset_pool):
        zone_ids = self.get_dataset().get_id_attribute()
        travel_data = dataset_pool.get_dataset('travel_data')
        time = power(travel_data.get_attribute('am_walk_time_in_minutes'), 2)
        
        to_zone_id = travel_data.get_attribute("to_zone_id")
        zone_index = self.get_dataset().get_id_index(to_zone_id)
        num_jobs = self.get_dataset().get_attribute('number_of_jobs')[zone_index]

        from_zone_id = travel_data.get_attribute("from_zone_id")        
        results = array(ndimage_sum(num_jobs / time.astype(float32), labels = from_zone_id, index=zone_ids))
        
        return results
    def compute(self, dataset_pool):
        zone_ids = self.get_dataset().get_id_attribute()
        travel_data = dataset_pool.get_dataset("travel_data")
        time = power(travel_data.get_attribute("am_walk_time_in_minutes"), 2)

        to_zone_id = travel_data.get_attribute("to_zone_id")
        zone_index = self.get_dataset().get_id_index(to_zone_id)
        num_jobs = self.get_dataset().get_attribute("number_of_jobs")[zone_index]

        from_zone_id = travel_data.get_attribute("from_zone_id")
        results = array(ndimage_sum(num_jobs / time.astype(float32), labels=from_zone_id, index=zone_ids))

        return results
 def compute(self, dataset_pool):
     zone_set = self.get_dataset()
     travel_data = dataset_pool.get_dataset('travel_data')
     logsum_hbw_am_income = travel_data.get_attribute(
         self.logsum_hbw_am_income)
     from_zone_id = travel_data.get_attribute('from_zone_id')
     to_zone_id = travel_data.get_attribute('to_zone_id')
     td_attr_by_idx = travel_data.get_attribute_by_index
     zone_ids = zone_set.get_attribute('zone_id')
     njobs = zone_set.get_attribute_by_id(self.number_of_jobs, to_zone_id)
     sums_by_from_zone = ndimage_sum(
         njobs * exp(travel_data.get_attribute(self.logsum_hbw_am_income)),
         labels=from_zone_id,
         index=zone_ids)
     return log(array(sums_by_from_zone))
 def _unroll_field(self, attr_name, gridcells, buildings, bldgs_idx, dataset_pool=None):
     """Unroll the values for this field.
     """
     grid_ids = buildings.get_attribute('grid_id')[bldgs_idx]
     unique_grid_ids = unique(grid_ids)
     grid_idx = gridcells.get_id_index(unique_grid_ids)
     attr_values = gridcells.get_attribute_by_index(attr_name, grid_idx)
     buildings.compute_variables("urbansim.%s.%s" % (buildings.get_dataset_name(), attr_name), 
                                 dataset_pool=dataset_pool)
     # sum the amount over the same gridcells
     change_amounts = array(ndimage_sum(buildings.get_attribute(attr_name)[bldgs_idx], labels=grid_ids,
                                         index=unique_grid_ids))
     attr_values = clip(attr_values - change_amounts,
                        0, attr_values.max())
     gridcells.set_values_of_one_attribute(attr_name, attr_values, index=grid_idx)
    def _do_run_for_this_year(self, job_set):
        building_type = job_set.get_attribute("building_type")
        sectors = unique(
            self.control_totals_for_this_year.get_attribute("sector_id"))
        self._compute_sector_variables(sectors, job_set)
        for sector in sectors:
            isector = where(
                self.control_totals_for_this_year.get_attribute("sector_id") ==
                sector)[0]
            total_hb_jobs = self.control_totals_for_this_year.get_attribute(
                "total_home_based_employment")[isector]
            total_nhb_jobs = self.control_totals_for_this_year.get_attribute(
                "total_non_home_based_employment")[isector]
            is_in_sector_hb = job_set.get_attribute(
                "is_in_employment_sector_%s_home_based" % sector)
            is_in_sector_nhb = job_set.get_attribute(
                "is_in_employment_sector_%s_non_home_based" % sector)
            diff_hb = int(total_hb_jobs - is_in_sector_hb.astype(int8).sum())
            diff_nhb = int(total_nhb_jobs -
                           is_in_sector_nhb.astype(int8).sum())
            if diff_hb < 0:  # home based jobs to be removed
                w = where(is_in_sector_hb == 1)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(job_set, w, -1*diff_hb,
                                                         self.location_id_name)
                self.remove_jobs = concatenate(
                    (self.remove_jobs, non_placed,
                     sample_noreplace(sample_array,
                                      max(0,
                                          abs(diff_hb) - size_non_placed))))
            if diff_nhb < 0:  # non home based jobs to be removed
                w = where(is_in_sector_nhb == 1)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(job_set, w, -1*diff_nhb,
                                                         self.location_id_name)
                self.remove_jobs = concatenate(
                    (self.remove_jobs, non_placed,
                     sample_noreplace(sample_array,
                                      max(0,
                                          abs(diff_nhb) - size_non_placed))))

            if diff_hb > 0:  # home based jobs to be created
                self.new_jobs[self.location_id_name] = concatenate(
                    (self.new_jobs[self.location_id_name],
                     zeros(
                         (diff_hb, ),
                         dtype=self.new_jobs[self.location_id_name].dtype.type)
                     ))
                self.new_jobs["sector_id"] = concatenate(
                    (self.new_jobs["sector_id"], (resize(
                        array([sector],
                              dtype=self.new_jobs["sector_id"].dtype.type),
                        diff_hb))))
                if 1 in is_in_sector_hb:
                    building_type_distribution = array(
                        ndimage_sum(is_in_sector_hb,
                                    labels=building_type,
                                    index=self.available_building_types))
                elif 1 in job_set.get_attribute(
                        "is_home_based_job"
                ):  # take the building type distribution from the whole region
                    building_type_distribution = array(
                        ndimage_sum(job_set.get_attribute("is_home_based_job"),
                                    labels=building_type,
                                    index=self.available_building_types))
                else:  # there are no home-based jobs in the region, take uniform distribution
                    building_type_distribution = ones(
                        self.available_building_types.size)
                    building_type_distribution = building_type_distribution / building_type_distribution.sum(
                    )
                sampled_building_types = probsample_replace(
                    self.available_building_types, diff_hb,
                    building_type_distribution /
                    float(building_type_distribution.sum()))
                self.new_jobs["building_type"] = concatenate(
                    (self.new_jobs["building_type"],
                     sampled_building_types.astype(
                         self.new_jobs["building_type"].dtype.type)))
                new_max_id = self.max_id + diff_hb
                self.new_jobs[self.job_id_name] = concatenate(
                    (self.new_jobs[self.job_id_name],
                     arange(self.max_id + 1, new_max_id + 1)))
                self.max_id = new_max_id

            if diff_nhb > 0:  # non home based jobs to be created
                self.new_jobs[self.location_id_name] = concatenate(
                    (self.new_jobs[self.location_id_name],
                     zeros(
                         (diff_nhb, ),
                         dtype=self.new_jobs[self.location_id_name].dtype.type)
                     ))
                self.new_jobs["sector_id"] = concatenate(
                    (self.new_jobs["sector_id"], (resize(
                        array([sector],
                              dtype=self.new_jobs["sector_id"].dtype.type),
                        diff_nhb))))
                if 1 in is_in_sector_nhb:
                    building_type_distribution = array(
                        ndimage_sum(is_in_sector_nhb,
                                    labels=building_type,
                                    index=self.available_building_types))
                elif 1 in job_set.get_attribute(
                        "is_non_home_based_job"
                ):  # take the building type distribution from the whole region
                    building_type_distribution = array(
                        ndimage_sum(
                            job_set.get_attribute("is_non_home_based_job"),
                            labels=building_type,
                            index=self.available_building_types))
                else:  # there are no non-home-based jobs in the region, take uniform distribution
                    building_type_distribution = ones(
                        self.available_building_types.size)
                    building_type_distribution = building_type_distribution / building_type_distribution.sum(
                    )
                sampled_building_types = probsample_replace(
                    self.available_building_types, diff_nhb,
                    building_type_distribution /
                    float(building_type_distribution.sum()))
                self.new_jobs["building_type"] = concatenate(
                    (self.new_jobs["building_type"],
                     sampled_building_types.astype(
                         self.new_jobs["building_type"].dtype.type)))
                new_max_id = self.max_id + diff_nhb
                self.new_jobs[self.job_id_name] = concatenate(
                    (self.new_jobs[self.job_id_name],
                     arange(self.max_id + 1, new_max_id + 1)))
                self.max_id = new_max_id
    def run(self, job_dataset, dataset_pool, out_storage=None, jobs_table="jobs"):
        """
        Algorithm:
            1. For all non_home_based jobs that have parcel_id assigned but no building_id, try
                to choose a building from all buildings in that parcel. Draw the building with probabilities
                given by the sector-building_type distribution. The job sizes are
                fitted into the available space (the attribute job.sqft is updated).
            2. For all non_home_based jobs for which no building was found in step 1, check
                if the parcel has residential buildings. In such a case, re-assign the jobs to be
                home-based.
                Otherwise, if sum of non_residential_sqft over the involved buildings is 0,
                for all jobs that have impute_building_sqft_flag=True draw a building using
                the sector-building_type distribution and impute the corresponding sqft to 
                the non_residential_sqft of that building.
            3. For all home_based jobs that have parcel_id assigned but no building_id, try
                to choose a building from all buildings in that parcel. 
                The capacity of a single-family building is determined from sizes of the households living there 
                (for each household the minimum of number of members and 2 is taken). 
                For multi-family buildings the capacity is 50.
            4. Assign a building type to jobs that have missing building type. It is sampled 
                from the regional-wide distribution of home based and non-home based jobs.
            5. Update the table 'building_sqft_per_job' using the updated job.sqft.
        'in_storage' should contain the jobs table and the zone_averages_table. The 'dataset_pool_storage'
        should contain all other tables needed (buildings, households, building_types). 
        """
        parcel_ids = job_dataset.get_attribute("parcel_id")
        building_ids = job_dataset.get_attribute("building_id")
        building_types = job_dataset.get_attribute("building_type")
        try:
            impute_sqft_flags = job_dataset.get_attribute("impute_building_sqft_flag")
        except:
            impute_sqft_flags = zeros(job_dataset.size())
        is_considered = logical_and(parcel_ids > 0, building_ids <= 0) # jobs that have assigned parcel but not building
        job_index_home_based = where(logical_and(is_considered, building_types == 1))[0]
        job_index_governmental = where(logical_and(is_considered, building_types == 3))[0]
        
        building_dataset = dataset_pool.get_dataset('building')
        parcel_ids_in_bldgs = building_dataset.get_attribute("parcel_id")
        bldg_ids_in_bldgs = building_dataset.get_id_attribute()
        bldg_types_in_bldgs = building_dataset.get_attribute("building_type_id")
        
        non_res_sqft = building_dataset.get_attribute("non_residential_sqft")
        occupied = building_dataset.compute_variables(["urbansim_parcel.building.occupied_building_sqft_by_jobs"],
                                                                     dataset_pool=dataset_pool)
        is_governmental = building_dataset.compute_variables(["building.disaggregate(building_type.generic_building_type_id == 7)"],
                                                                     dataset_pool=dataset_pool)
        
        # assign buildings to governmental jobs randomly
        unique_parcels = unique(parcel_ids[job_index_governmental])
        logger.log_status("Placing governmental jobs ...")
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs[is_governmental] == parcel)[0]
            if idx_in_bldgs.size <= 0:
                continue
            idx_in_jobs = where(parcel_ids[job_index_governmental] == parcel)[0]
            draw = sample_replace(idx_in_bldgs, idx_in_jobs.size)
            building_ids[job_index_governmental[idx_in_jobs]] = bldg_ids_in_bldgs[where(is_governmental)[0][draw]]
        logger.log_status("%s governmental jobs (out of %s gov. jobs) were placed." % (
                                                                (building_ids[job_index_governmental]>0).sum(),
                                                                 job_index_governmental.size))
        logger.log_status("The not-placed governmental jobs will be added to the non-home based jobs.")
        
        # consider the unplaced governmental jobs together with other non-home-based jobs
        is_now_considered = logical_and(is_considered, building_ids <= 0)
        job_index_non_home_based = where(logical_and(is_now_considered, logical_or(building_types == 2, building_types == 3)))[0]
                                    
        # assign buildings to non_home_based jobs based on available space
        unique_parcels = unique(parcel_ids[job_index_non_home_based])
        job_building_types = job_dataset.compute_variables(["bldgs_building_type_id = job.disaggregate(building.building_type_id)"], 
                                                           dataset_pool=dataset_pool)
        where_valid_jbt = where(logical_and(job_building_types>0, logical_or(building_types == 2, building_types==3)))[0]
        building_type_dataset = dataset_pool.get_dataset("building_type")
        available_building_types= building_type_dataset.get_id_attribute()
        idx_available_bt = building_type_dataset.get_id_index(available_building_types)
        sectors = job_dataset.get_attribute("sector_id")
        unique_sectors = unique(sectors)
        sector_bt_distribution = zeros((unique_sectors.size, building_type_dataset.size()), dtype="float32")
        
        jobs_sqft = job_dataset.get_attribute_by_index("sqft", job_index_non_home_based).astype("float32")
        job_dataset._compute_if_needed("urbansim_parcel.job.zone_id", dataset_pool=dataset_pool) 
        jobs_zones = job_dataset.get_attribute_by_index("zone_id", job_index_non_home_based)
        new_jobs_sqft = job_dataset.get_attribute("sqft").copy()
        
        # find sector -> building_type distribution
        sector_index_mapping = {}
        for isector in range(unique_sectors.size):
            idx = where(sectors[where_valid_jbt]==unique_sectors[isector])[0]
            if idx.size == 0: continue
            o = ones(idx.size, dtype="int32")
            sector_bt_distribution[isector,:] = ndimage_sum(o, labels=job_building_types[where_valid_jbt[idx]], 
                                                            index=available_building_types)
            sector_bt_distribution[isector,:] = sector_bt_distribution[isector,:]/sector_bt_distribution[isector,:].sum()
            sector_index_mapping[unique_sectors[isector]] = isector
               
        # create a lookup table for zonal average per building type of sqft per employee
        zone_average_dataset = dataset_pool.get_dataset("building_sqft_per_job")
        zone_bt_lookup = zone_average_dataset.get_building_sqft_as_table(job_dataset.get_attribute("zone_id").max(),
                                                                         available_building_types.max())

        counter_zero_capacity = 0
        counter_zero_distr = 0
        # iterate over parcels
        logger.log_status("Placing non-home-based jobs ...")
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0]
            if idx_in_bldgs.size <= 0:
                continue
            idx_in_jobs = where(parcel_ids[job_index_non_home_based] == parcel)[0]
            capacity = maximum(non_res_sqft[idx_in_bldgs] - occupied[idx_in_bldgs],0)
            #capacity = non_res_sqft[idx_in_bldgs] - occupied[idx_in_bldgs]
            if capacity.sum() <= 0:
                counter_zero_capacity += idx_in_jobs.size
                continue
            this_jobs_sectors = sectors[job_index_non_home_based][idx_in_jobs]
            this_jobs_sqft_table = resize(jobs_sqft[idx_in_jobs], (idx_in_bldgs.size, idx_in_jobs.size))
            wn = jobs_sqft[idx_in_jobs] <= 0
            for i in range(idx_in_bldgs.size):
                this_jobs_sqft_table[i, where(wn)[0]] = zone_bt_lookup[jobs_zones[idx_in_jobs[wn]], bldg_types_in_bldgs[idx_in_bldgs[i]]]
            supply_demand_ratio = (resize(capacity, (capacity.size, 1))/this_jobs_sqft_table.astype("float32").sum(axis=0))/float(idx_in_jobs.size)*0.9
            if any(supply_demand_ratio < 1): # correct only if supply is smaller than demand 
                this_jobs_sqft_table = this_jobs_sqft_table * supply_demand_ratio
            probcomb = zeros(this_jobs_sqft_table.shape)
            bt = bldg_types_in_bldgs[idx_in_bldgs]
            ibt = building_type_dataset.get_id_index(bt)
            for i in range(probcomb.shape[0]):
                for j in range(probcomb.shape[1]):
                    probcomb[i,j] = sector_bt_distribution[sector_index_mapping[this_jobs_sectors[j]],ibt[i]]
            pcs = probcomb.sum(axis=0)
            probcomb = probcomb/pcs
            wz = where(pcs<=0)[0]
            counter_zero_distr += wz.size
            probcomb[:, wz] = 0 # to avoid nan values
            taken = zeros(capacity.shape)
            has_sqft = this_jobs_sqft_table > 0
            while True:
                if (has_sqft * probcomb).sum() <= 0:
                    break
                req =  (this_jobs_sqft_table * probcomb).sum(axis=0)
                maxi = req.max()
                wmaxi = where(req==maxi)[0]
                drawjob = sample_noreplace(arange(wmaxi.size), 1) # draw job from jobs with the maximum size
                imax_req = wmaxi[drawjob]
                weights = has_sqft[:,imax_req] * probcomb[:,imax_req]
                draw = probsample_noreplace(arange(probcomb.shape[0]), 1, resize(weights/weights.sum(), (probcomb.shape[0],)))
                if (taken[draw] + this_jobs_sqft_table[draw,imax_req]) > capacity[draw]:
                    probcomb[draw,imax_req]=0
                    continue
                taken[draw] = taken[draw] + this_jobs_sqft_table[draw,imax_req]
                building_ids[job_index_non_home_based[idx_in_jobs[imax_req]]] = bldg_ids_in_bldgs[idx_in_bldgs[draw]]
                probcomb[:,imax_req] = 0
                new_jobs_sqft[job_index_non_home_based[idx_in_jobs[imax_req]]] = int(min(self.maximum_sqft, max(round(this_jobs_sqft_table[draw,imax_req]), 
                                                                                     self.minimum_sqft)))
            
        logger.log_status("%s non home based jobs (out of %s nhb jobs) were placed." % (
                                                                (building_ids[job_index_non_home_based]>0).sum(),
                                                                 job_index_non_home_based.size))
        logger.log_status("Unplaced due to zero capacity: %s" % counter_zero_capacity)
        logger.log_status("Unplaced due to zero distribution: %s" % counter_zero_distr)
        
        job_dataset.modify_attribute(name="building_id", data = building_ids)
        
        # re-classify unplaced non-home based jobs to home-based if parcels contain residential buildings
        bldgs_is_residential = logical_and(logical_not(is_governmental), building_dataset.compute_variables(["urbansim_parcel.building.is_residential"], 
                                                           dataset_pool=dataset_pool))
        is_now_considered = logical_and(parcel_ids > 0, building_ids <= 0)
        job_index_non_home_based_unplaced = where(logical_and(is_now_considered, building_types == 2))[0]
        unique_parcels = unique(parcel_ids[job_index_non_home_based_unplaced])
        imputed_sqft = 0
        logger.log_status("Try to reclassify non-home-based jobs (excluding governemtal jobs) ...")
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0]
            if idx_in_bldgs.size <= 0:
                continue
            idx_in_jobs = where(parcel_ids[job_index_non_home_based_unplaced] == parcel)[0]
            where_residential = where(bldgs_is_residential[idx_in_bldgs])[0]
            if where_residential.size > 0:
                building_types[job_index_non_home_based_unplaced[idx_in_jobs]] = 1 # set to home-based jobs
            elif non_res_sqft[idx_in_bldgs].sum() <= 0:
                # impute non_residential_sqft and assign buildings
                this_jobs_sectors = sectors[job_index_non_home_based_unplaced][idx_in_jobs]
                this_jobs_sqft_table = resize(jobs_sqft[idx_in_jobs], (idx_in_bldgs.size, idx_in_jobs.size))
                wn = jobs_sqft[idx_in_jobs] <= 0
                for i in range(idx_in_bldgs.size):
                    this_jobs_sqft_table[i, where(wn)[0]] = zone_bt_lookup[jobs_zones[idx_in_jobs[wn]], bldg_types_in_bldgs[idx_in_bldgs[i]]]
                probcomb = zeros(this_jobs_sqft_table.shape)
                bt = bldg_types_in_bldgs[idx_in_bldgs]
                ibt = building_type_dataset.get_id_index(bt)
                for i in range(probcomb.shape[0]):
                    for j in range(probcomb.shape[1]):
                        probcomb[i,j] = sector_bt_distribution[sector_index_mapping[this_jobs_sectors[j]],ibt[i]]
                for ijob in range(probcomb.shape[1]):
                    if (probcomb[:,ijob].sum() <= 0) or (impute_sqft_flags[job_index_non_home_based_unplaced[ijob]] == 0):
                        continue
                    weights = probcomb[:,ijob]
                    draw = probsample_noreplace(arange(probcomb.shape[0]), 1, resize(weights/weights.sum(), (probcomb.shape[0],)))
                    non_res_sqft[idx_in_bldgs[draw]] += this_jobs_sqft_table[draw,ijob]
                    imputed_sqft += this_jobs_sqft_table[draw,ijob]
                    building_ids[job_index_non_home_based_unplaced[idx_in_jobs[ijob]]] = bldg_ids_in_bldgs[idx_in_bldgs[draw]]
                    new_jobs_sqft[job_index_non_home_based[idx_in_jobs[ijob]]] = int(min(self.maximum_sqft, max(round(this_jobs_sqft_table[draw,ijob]), 
                                                                                     self.minimum_sqft)))
                    
        building_dataset.modify_attribute(name="non_residential_sqft", data = non_res_sqft)
        job_dataset.modify_attribute(name="building_id", data = building_ids)
        job_dataset.modify_attribute(name="building_type", data = building_types)
        job_dataset.modify_attribute(name="sqft", data = new_jobs_sqft)
        
        old_nhb_size = job_index_non_home_based.size
        job_index_home_based = where(logical_and(is_considered, building_types == 1))[0]
        job_index_non_home_based = where(logical_and(is_considered, building_types == 2))[0]
        logger.log_status("%s non-home based jobs reclassified as home-based." % (old_nhb_size-job_index_non_home_based.size))
        logger.log_status("%s non-residential sqft imputed." % imputed_sqft)
        logger.log_status("Additionaly, %s non home based jobs were placed due to imputed sqft." % \
                                                (building_ids[job_index_non_home_based_unplaced]>0).sum())
        # home_based jobs
        unique_parcels = unique(parcel_ids[job_index_home_based])
        capacity_in_buildings = building_dataset.compute_variables([
                          "urbansim_parcel.building.vacant_home_based_job_space"],
                             dataset_pool=dataset_pool)
        parcels_with_exceeded_capacity = []
        # iterate over parcels
        logger.log_status("Placing home-based jobs ...")
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0]
            idx_in_jobs = where(parcel_ids[job_index_home_based] == parcel)[0]
            capacity = capacity_in_buildings[idx_in_bldgs]
            if capacity.sum() <= 0:
                continue
            probcomb = ones((idx_in_bldgs.size, idx_in_jobs.size))
            taken = zeros(capacity.shape, dtype="int32")
            while True:
                zero_cap = where((capacity - taken) <= 0)[0]
                probcomb[zero_cap,:] = 0
                if probcomb.sum() <= 0:
                    break
                req =  probcomb.sum(axis=0)
                wmaxi = where(req==req.max())[0]
                drawjob = sample_noreplace(arange(wmaxi.size), 1) # draw job from available jobs
                imax_req = wmaxi[drawjob]
                weights = probcomb[:,imax_req]
                # sample building
                draw = probsample_noreplace(arange(probcomb.shape[0]), 1, resize(weights/weights.sum(), (probcomb.shape[0],)))
                taken[draw] = taken[draw] + 1
                building_ids[job_index_home_based[idx_in_jobs[imax_req]]] = bldg_ids_in_bldgs[idx_in_bldgs[draw]]
                probcomb[:,imax_req] = 0
            if -1 in building_ids[job_index_home_based[idx_in_jobs]]:
                parcels_with_exceeded_capacity.append(parcel)
        parcels_with_exceeded_capacity = array(parcels_with_exceeded_capacity)    
        
        logger.log_status("%s home based jobs (out of %s hb jobs) were placed." % ((building_ids[job_index_home_based]>0).sum(),
                                                                         job_index_home_based.size))
        
        # assign building type where missing
        # determine regional distribution
        idx_home_based = where(building_types == 1)[0]
        idx_non_home_based = where(building_types == 2)[0]
        idx_bt_missing = where(building_types <= 0)[0]
        if idx_bt_missing.size > 0:
            # sample building types
            sample_bt = probsample_replace(array([1,2]), idx_bt_missing.size, 
               array([idx_home_based.size, idx_non_home_based.size])/float(idx_home_based.size + idx_non_home_based.size))
            # coerce to int32 (on a 64 bit machine, sample_bt will be of type int64)
            building_types[idx_bt_missing] = sample_bt.astype(int32)
            job_dataset.modify_attribute(name="building_type", data = building_types) 
        
        if out_storage is not None:
            job_dataset.write_dataset(out_table_name=jobs_table, out_storage=out_storage, attributes=AttributeType.PRIMARY)
            building_dataset.write_dataset(out_table_name='buildings', out_storage=out_storage, attributes=AttributeType.PRIMARY)
        logger.log_status("Assigning building_id to jobs done.")
 def get_choice_histogram(self, units_to_occupy, choices, nchoices):
     """Return a histogram of agent choices, where each agents occupy number of units given
     in 'units_to_occupy'. 'choices' are the agent choices of a location (as an index).
     'nchoices' is a number of unique values for possible choices.
     """
     return array(ndimage_sum(units_to_occupy, labels=choices+1, index=arange(nchoices)+1))
    def _do_run_for_this_year(self, job_set):
        building_type = job_set.get_attribute("building_type")
        sectors = unique(self.control_totals_for_this_year.get_attribute("sector_id"))
        self._compute_sector_variables(sectors, job_set)
        for sector in sectors:
            isector = where(self.control_totals_for_this_year.get_attribute("sector_id") == sector)[0]
            total_hb_jobs = self.control_totals_for_this_year.get_attribute("total_home_based_employment")[isector]
            total_nhb_jobs = self.control_totals_for_this_year.get_attribute("total_non_home_based_employment")[isector]
            is_in_sector_hb = job_set.get_attribute("is_in_employment_sector_%s_home_based" % sector)
            is_in_sector_nhb = job_set.get_attribute("is_in_employment_sector_%s_non_home_based" % sector)
            diff_hb = int(total_hb_jobs - is_in_sector_hb.astype(int8).sum())
            diff_nhb = int(total_nhb_jobs - is_in_sector_nhb.astype(int8).sum())
            if diff_hb < 0: # home based jobs to be removed
                w = where(is_in_sector_hb == 1)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(job_set, w, -1*diff_hb,
                                                         self.location_id_name)
                self.remove_jobs = concatenate((self.remove_jobs, non_placed,
                                           sample_noreplace(sample_array, max(0,abs(diff_hb)-size_non_placed))))
            if diff_nhb < 0: # non home based jobs to be removed
                w = where(is_in_sector_nhb == 1)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(job_set, w, -1*diff_nhb,
                                                         self.location_id_name)
                self.remove_jobs = concatenate((self.remove_jobs, non_placed,
                                           sample_noreplace(sample_array, max(0,abs(diff_nhb)-size_non_placed))))

            if diff_hb > 0: # home based jobs to be created
                self.new_jobs[self.location_id_name] = concatenate((self.new_jobs[self.location_id_name],
                                   zeros((diff_hb,), dtype=self.new_jobs[self.location_id_name].dtype.type)))
                self.new_jobs["sector_id"] = concatenate((self.new_jobs["sector_id"],
                                   (resize(array([sector], dtype=self.new_jobs["sector_id"].dtype.type), diff_hb))))
                if 1 in is_in_sector_hb:
                    building_type_distribution = array(ndimage_sum(is_in_sector_hb,
                                                                    labels=building_type,
                                                                    index=self.available_building_types))
                elif 1 in job_set.get_attribute("is_home_based_job"): # take the building type distribution from the whole region
                    building_type_distribution = array(ndimage_sum(
                                                                job_set.get_attribute("is_home_based_job"),
                                                                labels=building_type,
                                                                index=self.available_building_types))
                else: # there are no home-based jobs in the region, take uniform distribution
                    building_type_distribution = ones(self.available_building_types.size)
                    building_type_distribution = building_type_distribution/building_type_distribution.sum()
                sampled_building_types = probsample_replace(
                    self.available_building_types, diff_hb, building_type_distribution/
                    float(building_type_distribution.sum()))
                self.new_jobs["building_type"] = concatenate((self.new_jobs["building_type"],
                            sampled_building_types.astype(self.new_jobs["building_type"].dtype.type)))
                new_max_id = self.max_id + diff_hb
                self.new_jobs[self.job_id_name] = concatenate((self.new_jobs[self.job_id_name],
                                                     arange(self.max_id+1, new_max_id+1)))
                self.max_id = new_max_id

            if diff_nhb > 0: # non home based jobs to be created
                self.new_jobs[self.location_id_name]=concatenate((self.new_jobs[self.location_id_name],
                                     zeros((diff_nhb,), dtype=self.new_jobs[self.location_id_name].dtype.type)))
                self.new_jobs["sector_id"]=concatenate((self.new_jobs["sector_id"],
                                           (resize(array([sector], dtype=self.new_jobs["sector_id"].dtype.type), diff_nhb))))
                if 1 in is_in_sector_nhb:
                    building_type_distribution = array(ndimage_sum(is_in_sector_nhb,
                                                                    labels=building_type,
                                                                    index=self.available_building_types))
                elif 1 in job_set.get_attribute("is_non_home_based_job"): # take the building type distribution from the whole region
                    building_type_distribution = array(ndimage_sum(
                                                        job_set.get_attribute("is_non_home_based_job"),
                                                        labels=building_type,
                                                        index=self.available_building_types))
                else: # there are no non-home-based jobs in the region, take uniform distribution
                    building_type_distribution = ones(self.available_building_types.size)
                    building_type_distribution = building_type_distribution/building_type_distribution.sum()
                sampled_building_types = probsample_replace(
                    self.available_building_types, diff_nhb, building_type_distribution/
                    float(building_type_distribution.sum()))
                self.new_jobs["building_type"] = concatenate((self.new_jobs["building_type"],
                                        sampled_building_types.astype(self.new_jobs["building_type"].dtype.type)))
                new_max_id = self.max_id+diff_nhb
                self.new_jobs[self.job_id_name]=concatenate((self.new_jobs[self.job_id_name], arange(self.max_id+1, 
                                                                                                     new_max_id+1)))
                self.max_id = new_max_id
 def get_choice_histogram(self, units_to_occupy, choices, nchoices):
     """Counts the number of agents that decided for each choice.
     """
     return array(ndimage_sum(ones((choices.size,)), labels=choices + 1, index=arange(nchoices) + 1))
class HouseholdTransitionModel(Model):
    """Creates and removes households from household_set. New households are duplicated from the existing households, keeping 
       the joint distribution of all characteristics. 
    """

    model_name = "Household Transition Model"

    def __init__(self,
                 location_id_name="grid_id",
                 dataset_pool=None,
                 debuglevel=0):
        self.debug = DebugPrinter(debuglevel)
        self.location_id_name = location_id_name
        self.dataset_pool = self.create_dataset_pool(dataset_pool,
                                                     ["urbansim", "opus_core"])

    def run(self,
            year,
            household_set,
            control_totals,
            characteristics,
            resources=None):
        self._do_initialize_for_run(household_set)
        control_totals.get_attribute(
            "total_number_of_households")  # to make sure they are loaded
        self.characteristics = characteristics
        self.all_categories = self.characteristics.get_attribute(
            "characteristic")
        self.all_categories = array(
            map(lambda x: x.lower(), self.all_categories))
        self.scaled_characteristic_names = get_distinct_names(
            self.all_categories).tolist()
        self.marginal_characteristic_names = copy(control_totals.get_id_name())
        index_year = self.marginal_characteristic_names.index("year")
        self.marginal_characteristic_names.remove("year")
        idx = where(control_totals.get_attribute("year") == year)[0]
        self.control_totals_for_this_year = DatasetSubset(control_totals, idx)
        self._do_run_for_this_year(household_set)
        return self._update_household_set(household_set)

    def _update_household_set(self, household_set):
        index_of_duplicated_hhs = household_set.duplicate_rows(
            self.mapping_existing_hhs_to_new_hhs)
        household_set.modify_attribute(
            name=self.location_id_name,
            data=-1 * ones(
                (index_of_duplicated_hhs.size, ),
                dtype=household_set.get_data_type(self.location_id_name)),
            index=index_of_duplicated_hhs)
        household_set.remove_elements(self.remove_households)
        if self.new_households[self.location_id_name].size > 0:
            max_id = household_set.get_id_attribute().max()
            self.new_households[self.household_id_name] = concatenate(
                (self.new_households[self.household_id_name],
                 arange(
                     max_id + 1, max_id +
                     self.new_households[self.location_id_name].size + 1)))
            household_set.add_elements(self.new_households,
                                       require_all_attributes=False)

        difference = household_set.size() - self.household_size
        self.debug.print_debug(
            "Difference in number of households: %s"
            " (original %s, new %s, created %s, deleted %s)" %
            (difference, self.household_size, household_set.size(),
             self.new_households[self.household_id_name].size +
             self.mapping_existing_hhs_to_new_hhs.size,
             self.remove_households.size), 3)
        if self.location_id_name in household_set.get_attribute_names():
            self.debug.print_debug(
                "Number of unplaced households: %s" %
                where(household_set.get_attribute(self.location_id_name) <= 0)
                [0].size, 3)
        return difference

    def _do_initialize_for_run(self, household_set):
        self.household_id_name = household_set.get_id_name()[0]
        self.new_households = {
            self.location_id_name:
            array([],
                  dtype=household_set.get_data_type(self.location_id_name,
                                                    int32)),
            self.household_id_name:
            array([],
                  dtype=household_set.get_data_type(self.household_id_name,
                                                    int32))
        }
        self.remove_households = array([], dtype='int32')
        self.household_size = household_set.size()
        self.max_id = household_set.get_id_attribute().max()
        self.arrays_from_categories = {}
        self.arrays_from_categories_mapping = {}
        self.mapping_existing_hhs_to_new_hhs = array(
            [],
            dtype=household_set.get_data_type(self.household_id_name, int32))

    def _do_run_for_this_year(self, household_set):
        self.household_set = household_set
        groups = self.control_totals_for_this_year.get_id_attribute()
        self.create_arrays_from_categories(self.household_set)

        all_characteristics = self.arrays_from_categories.keys()
        self.household_set.load_dataset_if_not_loaded(
            attributes=all_characteristics
        )  # prevents from lazy loading to save runtime
        idx_shape = []
        number_of_combinations = 1
        num_attributes = len(all_characteristics)
        for iattr in range(num_attributes):
            attr = all_characteristics[iattr]
            max_bins = self.arrays_from_categories[attr].max() + 1
            idx_shape.append(max_bins)
            number_of_combinations = number_of_combinations * max_bins
            if attr not in self.new_households.keys():
                self.new_households[attr] = array(
                    [], dtype=self.household_set.get_data_type(attr, float32))

        self.number_of_combinations = int(number_of_combinations)
        idx_tmp = indices(tuple(idx_shape))

        categories_index = zeros((self.number_of_combinations, num_attributes))

        for i in range(num_attributes):  #create indices of all combinations
            categories_index[:, i] = idx_tmp[i].ravel()

        categories_index_mapping = {}
        for i in range(self.number_of_combinations):
            categories_index_mapping[tuple(categories_index[i, ].tolist())] = i

        def get_category(values):
            bins = map(lambda x, y: self.arrays_from_categories[x][int(y)],
                       all_characteristics, values)
            try:
                return categories_index_mapping[tuple(bins)]
            except KeyError, msg:
                where_error = where(array(bins) == -1)[0]
                if where_error.size > 0:
                    raise KeyError, \
                        "Invalid value of %s for attribute %s. It is not included in the characteristics groups." % (
                                                                               array(values)[where_error],
                                                                               array(all_characteristics)[where_error])
                raise KeyError, msg

        if num_attributes > 0:
            # the next array must be a copy of the household values, otherwise, it changes the original values
            values_array = reshape(
                array(self.household_set.get_attribute(
                    all_characteristics[0])), (self.household_set.size(), 1))
            if num_attributes > 1:
                for attr in all_characteristics[1:]:
                    values_array = concatenate(
                        (values_array,
                         reshape(array(self.household_set.get_attribute(attr)),
                                 (self.household_set.size(), 1))),
                        axis=1)
            for i in range(values_array.shape[1]):
                if values_array[:, i].max() > 10000:
                    values_array[:, i] = values_array[:, i] / 10
                values_array[:, i] = clip(
                    values_array[:, i], 0,
                    self.arrays_from_categories[all_characteristics[i]].size -
                    1)

            # determine for each household to what category it belongs to
            self.household_categories = array(
                map(lambda x: get_category(x),
                    values_array))  # performance bottleneck

            number_of_households_in_categories = array(
                ndimage_sum(ones((self.household_categories.size, )),
                            labels=self.household_categories + 1,
                            index=arange(self.number_of_combinations) + 1))
        else:
            # no marginal characteristics; consider just one group
            self.household_categories = zeros(self.household_set.size(),
                                              dtype='int32')
            number_of_households_in_categories = array(
                [self.household_set.size()])

        g = arange(num_attributes)

        #iterate over marginal characteristics
        for group in groups:
            if groups.ndim <= 1:  # there is only one group (no marginal char.)
                id = group
            else:
                id = tuple(group.tolist())
            group_element = self.control_totals_for_this_year.get_data_element_by_id(
                id)
            total = group_element.total_number_of_households
            for i in range(g.size):
                g[i] = eval("group_element." +
                            self.arrays_from_categories.keys()[i])
            if g.size <= 0:
                l = ones((number_of_households_in_categories.size, ))
            else:
                l = categories_index[:, 0] == g[0]
                for i in range(1, num_attributes):
                    l = logical_and(l, categories_index[:, i] == g[i])
            # l has 1's for combinations of this group
            number_in_group = array(
                ndimage_sum(number_of_households_in_categories,
                            labels=l,
                            index=1))
            diff = int(total - number_in_group)
            if diff < 0:  # households to be removed
                is_in_group = l[self.household_categories]
                w = where(is_in_group)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(self.household_set, w, -1*diff,
                                                          self.location_id_name)
                self.remove_households = concatenate(
                    (self.remove_households, non_placed,
                     sample_noreplace(sample_array,
                                      max(0,
                                          abs(diff) - size_non_placed))))
            if diff > 0:  # households to be created
                self._create_households(diff, l)
    def get_average_omega(self, omega, probability, index, nsupply, nobs, demand):
        omega_prob = ma.filled(ma.resize(omega,(nobs,1))*probability,0.0)
        average_omega_nom = array(ndimage_sum(omega_prob, labels=index+1, index=arange(nsupply)+1))

        average_omega = ma.filled(average_omega_nom / ma.masked_where(demand==0, demand), 0.0)
        return average_omega
Esempio n. 32
0
    def run(self, probability, resources=None):
        """ Compute choices according to given probability -- Constrain Location Choice procedure.
        'probability' is a 2D numpy array (nobservation x nequations).
        The returned value is a 1D array of choice indices [0, nequations-1] of the length nobservations).
        The argument 'resources' must contain an entry 'capacity'. It is 1D array whose number of elements
        corresponds to the number of choices. 
        Optional entry 'index' (1D or 2D array) gives indices of the choices.
        """
        if probability.ndim < 2:
            raise StandardError, "Argument 'probability' must be a 2D numpy array."
            
        resources.check_obligatory_keys(["capacity"])
        supply = resources["capacity"]
        if not isinstance(supply, ndarray):
            supply = array(supply)
        nsupply = supply.size
#        logger.log_status('Supply.shape:',supply.shape)
#        logger.log_status('supply.sum:', supply.sum())
        max_iter = resources.get("max_iterations", None)
        if max_iter == None:
            max_iter = 100 # default
        
        
        index = resources.get("index", None)
        if index == None:
            index = arange(nsupply)
#        logger.log_status('index.shape:',index.shape)

        neqs = probability.shape[1]
        nobs = probability.shape[0]

        if supply.sum < nobs:
            raise StandardError, "Aggregate Supply Must be Greater than Aggregate Demand."


        if index.ndim <= 1:
            index = repeat(reshape(index, (1,index.shape[0])), nobs)        
        resources.merge({"index":index})
#        logger.log_status('index.shape:',index.shape)


        flat_index = index.ravel()
        unique_index = unique(flat_index)
#        logger.log_status('flat_index.shape:',flat_index.shape)
#        logger.log_status('unique_index.shape',unique_index.shape)
#        logger.log_status(unique_index)
        l = flat_index + 1
        demand = array(ndimage_sum(probability.ravel(), labels=l, index=arange(nsupply)+1))
#        logger.log_status('demand.shape:',demand.shape)
#        logger.log_status('demand.sum:', demand.sum())
#        logger.log_status('probability.sum:',probability.sum())
        #initial calculations
        
        sdratio = ma.filled(supply/ma.masked_where(demand==0, demand),1.0)
#        logger.log_status('sdratio.shape:',sdratio.shape)
        constrained_locations = where(sdratio<1,1,0)
        unconstrained_locations = 1-constrained_locations
        
        # Compute the iteration zero omegas
        
        sdratio_matrix = sdratio[index]
        constrained_locations_matrix = constrained_locations[index]
        unconstrained_locations_matrix = unconstrained_locations[index]
        prob_sum = 1-(probability*constrained_locations_matrix).sum(axis=1)
        omega = (1-(probability*constrained_locations_matrix*sdratio_matrix).sum(axis=1))/ \
                ma.masked_where(prob_sum ==0, prob_sum)
        pi = sdratio_matrix / ma.resize(omega, (nobs,1)) * constrained_locations_matrix + unconstrained_locations_matrix
        average_omega = ma.filled((ma.resize(omega,(nobs,1))*probability).sum(axis=0)/\
                      ma.masked_where(demand[index]==0, demand[index]),0.0)
        number_constrained_locations=zeros((max_iter,))
            # Iterative Constrained Location Procedure
        for i in range(max_iter):
            logger.log_status('Iteration ',i+1, 'Average Omega:',average_omega[0:4])
            # Recompute the constrained locations using iteration zero value of Omega
            constrained_locations_matrix = where(supply[index]<(average_omega*demand[index]),1,0)
            unconstrained_locations_matrix = 1-constrained_locations_matrix
            # Update values of Omega using new Constrained Locations
            prob_sum = 1-(probability*constrained_locations_matrix).sum(axis=1)
            omega = (1-(probability*constrained_locations_matrix*sdratio_matrix).sum(axis=1))/\
                    ma.masked_where(prob_sum ==0, prob_sum)
#            pi = sdratio_matrix / ma.resize(omega, (nobs,1)) * constrained_locations_matrix + unconstrained_locations_matrix       
#            logger.log_status('sdratio_matrix',sdratio_matrix.shape)
#            logger.log_status('constrained_locations_matrix',constrained_locations_matrix.shape)
#            logger.log_status('omega',omega.shape)
#            logger.log_status('unconstrained_locations_matrix',unconstrained_locations_matrix.shape)
#            pi_ta = (sdratio_matrix*constrained_locations_matrix)
#            logger.log_status('pi+ta',pi_ta.shape)
#            pi_tb = ma.resize(omega,(nobs,neqs))*unconstrained_locations_matrix
#            logger.log_status('pi_tb',pi_tb.shape)
            pi_t = (sdratio_matrix*constrained_locations_matrix)+ma.resize(omega,(nobs,neqs))*unconstrained_locations_matrix
#            logger.log_status('pi_tilde:',pi_t.shape)
            # Update the values of average Omegas per alternative
            average_omega = ma.filled((ma.resize(omega,(nobs,1))*probability).sum(axis=0)/
                          ma.masked_where(demand[index]==0, demand[index]),0.0)
            number_constrained_locations[i]= constrained_locations_matrix.sum()
            # Test for Convergence and if Reached, Exit
            if i > 0:
                if number_constrained_locations[i] == number_constrained_locations[i-1]:
                    break
          
        # update probabilities
#        new_probability = ma.filled(probability*ma.resize(omega,(nobs,1))*pi,0.0)
        new_probability = ma.filled(probability*pi_t,0.0)
        choices = lottery_choices().run(new_probability, resources)
        return choices
        
Esempio n. 33
0
    def run(self, probability, resources=None):
        """ Compute choices according to given probability -- Constrain Location Choice procedure.
        'probability' is a 2D numpy array (nobservation x nequations).
        The returned value is a 1D array of choice indices [0, nequations-1] of the length nobservations).
        The argument 'resources' must contain an entry 'capacity'. It is 1D array whose number of elements
        corresponds to the number of choices.
        Optional entry 'index' (1D or 2D array) gives indices of the choices.
        """
        if probability.ndim < 2:
            raise StandardError, "Argument 'probability' must be a 2D numpy array."

        resources.check_obligatory_keys(["capacity"])
        supply = resources["capacity"]
        if not isinstance(supply, ndarray):
            supply = array(supply)
        nsupply = supply.size

        max_iter = resources.get("max_iterations", None)
        if max_iter == None:
            max_iter = 100 # default

        index = resources.get("index", None)
        if index == None:
            index = arange(nsupply)

        neqs = probability.shape[1]
        nobs = probability.shape[0]

        if index.ndim <= 1:
            index = repeat(reshape(index, (1,index.shape[0])), nobs)
        resources.merge({"index":index})

        flat_index = index.ravel()
        unique_index = unique(flat_index)
        l = flat_index + 1
        demand = array(ndimage_sum(probability.ravel(), labels=l, index=arange(nsupply)+1))

        #initial calculations
        sdratio = ma.filled(supply/ma.masked_where(demand==0, demand),2.0)
        constrained_locations = logical_and(sdratio<1,demand-supply>0.1).astype("int8")
        unconstrained_locations = 1-constrained_locations
        excess_demand = (demand-supply)*constrained_locations
        global_excess_demand = excess_demand.sum()

        # Compute the iteration zero omegas

        sdratio_matrix = sdratio[index]
        constrained_locations_matrix = constrained_locations[index]
# Would like to include following print statements in debug printing
#        logger.log_status('Total demand:',demand.sum())
#        logger.log_status('Total supply:',supply.sum())
        logger.log_status('Global excess demand:',global_excess_demand)
#        logger.log_status('Constrained locations:',constrained_locations.sum())
        unconstrained_locations_matrix = unconstrained_locations[index]
        prob_sum = 1-(probability*constrained_locations_matrix).sum(axis=1)

        # The recoding of prob_sum and omega are to handle extreme values of omega and zero divide problems
        # A complete solution involves stratifying the choice set in the initialization to ensure that
        # there are always a mixture of constrained and unconstrained alternatives in each choice set.

        prob_sum = where(prob_sum==0,-1,prob_sum)
        omega = (1-(probability*constrained_locations_matrix*sdratio_matrix).sum(axis=1))/prob_sum
        omega = where(omega>5,5,omega)
        omega = where(omega<.5,5,omega)
        omega = where(prob_sum<0,5,omega)

# Debug print statements
#        logger.log_status('Minimum omega',minimum(omega))
#        logger.log_status('Maximum omega',maximum(omega))
#        logger.log_status('Median omega',median(omega))
#        logger.log_status('Omega < 0',(where(omega<0,1,0)).sum())
#        logger.log_status('Omega < 1',(where(omega<1,1,0)).sum())
#        logger.log_status('Omega > 30',(where(omega>30,1,0)).sum())
#        logger.log_status('Omega > 100',(where(omega>100,1,0)).sum())
#        logger.log_status('Omega histogram:',histogram(omega,0,30,30))
#        logger.log_status('Excess demand max:',maximum(excess_demand))
#        logger.log_status('Excess demand 0-1000:',histogram(excess_demand,0,1000,20))
#        logger.log_status('Excess demand 0-10:',histogram(excess_demand,0,10,20))

        pi = sdratio_matrix / ma.resize(omega, (nobs,1)) * constrained_locations_matrix + unconstrained_locations_matrix

        omega_prob = ma.filled(ma.resize(omega,(nobs,1))*probability,0.0)
        average_omega_nom = array(ndimage_sum(omega_prob, labels=index+1, index=arange(nsupply)+1))

        average_omega = ma.filled(average_omega_nom/
                      ma.masked_where(demand==0, demand), 0.0)

#        logger.log_status('Total demand:',new_demand.sum())
#        logger.log_status('Excess demand:',excess_demand)
        number_constrained_locations=zeros((max_iter,))
        # Iterative Constrained Location Procedure
        for i in range(max_iter):
            logger.log_status()
            logger.log_status('Constrained location choice iteration ',i+1)
            # Recompute the constrained locations using preceding iteration value of Omega
            constrained_locations = where((average_omega*demand-supply>0.1),1,0)
            unconstrained_locations = 1-constrained_locations
            constrained_locations_matrix = constrained_locations[index]
            unconstrained_locations_matrix = unconstrained_locations[index]
#            logger.log_status('supply.shape,average_omega.shape,demand.shape',supply.shape,average_omega.shape,demand.shape)
#            logger.log_status('constrained_locations_matrix',constrained_locations_matrix)
#            logger.log_status('constrained_locations_matrix.shape',constrained_locations_matrix.shape)
#            logger.log_status('unconstrained_locations_matrix',unconstrained_locations_matrix)
            # Update values of Omega using new Constrained Locations
            prob_sum = 1-(probability*constrained_locations_matrix).sum(axis=1)
            prob_sum = where(prob_sum==0,-1,prob_sum)
            omega = (1-(probability*constrained_locations_matrix*sdratio_matrix).sum(axis=1))/prob_sum
            omega = where(omega>5,5,omega)
            omega = where(omega<.5,5,omega)
            omega = where(prob_sum<0,5,omega)
            pi = sdratio_matrix / ma.resize(omega, (nobs,1)) * constrained_locations_matrix + unconstrained_locations_matrix
            # Update the values of average Omegas per alternative
            omega_prob = ma.filled(ma.resize(omega,(nobs,1)), 1.0)*probability
            average_omega_num = array(ndimage_sum(omega_prob, labels=index+1, index=arange(nsupply)+1))

            average_omega = ma.filled(average_omega_num/
                      ma.masked_where(demand==0, demand), 0.0)

            number_constrained_locations[i] = constrained_locations.sum()
            new_probability = ma.filled(probability*ma.resize(omega,(nobs,1))*pi,0.0)
            new_demand = array(ndimage_sum(new_probability.ravel(), labels=l, index=arange(nsupply)+1))
            excess_demand = (new_demand-supply)*constrained_locations
            global_excess_demand = excess_demand.sum()
#            logger.log_status('Total demand:',new_demand.sum())
            logger.log_status('Global excess demand:',global_excess_demand)
#            logger.log_status('Constrained locations:', number_constrained_locations[i])
#            logger.log_status('Minimum omega',minimum(omega))
#            logger.log_status('Maximum omega',maximum(omega))
#            logger.log_status('Median omega',median(omega))
#            logger.log_status('Omega < 0',(where(omega<0,1,0)).sum())
#            logger.log_status('Omega < 1',(where(omega<1,1,0)).sum())
#            logger.log_status('Omega > 30',(where(omega>30,1,0)).sum())
#            logger.log_status('Omega > 100',(where(omega>100,1,0)).sum())
#            logger.log_status('Omega histogram:',histogram(omega,0,30,30))
#            logger.log_status('Excess demand max:',maximum(excess_demand))
#            logger.log_status('Excess demand 0-5:',histogram(excess_demand,0,5,20))
#            logger.log_status('Excess demand 0-1:',histogram(excess_demand,0,1,20))
            # Test for Convergence and if Reached, Exit
            if i > 0:
                if number_constrained_locations[i] == number_constrained_locations[i-1]:
                    logger.log_status()
                    logger.log_status('Constrained choices converged.')
                    break

        # update probabilities
        new_probability = ma.filled(probability*ma.resize(omega,(nobs,1))*pi,0.0)
        choices = lottery_choices().run(new_probability, resources)
        return choices
 def get_demand(self, index, probability, nsupply):
     flat_index = index.ravel()
     l = flat_index + 1
     demand = array(ndimage_sum(probability.ravel(), labels=l, index=arange(nsupply)+1))
     return demand