コード例 #1
0
    def _add(self,
             amount=0,
             attribute='',
             dataset=None,
             index=None,
             data_dict={},
             **kwargs):
        new_data = {}
        dataset_known_attributes = dataset.get_known_attribute_names()
        if index.size > 0:  # sample from agents
            lucky_index = sample_replace(index, amount)
            for attr in dataset_known_attributes:
                new_data[attr] = dataset.get_attribute_by_index(
                    attr, lucky_index)
        else:
            ## if attributes are not fully specified, the missing attributes will be filled with 0's
            for attr in dataset.get_primary_attribute_names():
                if data_dict.has_key(attr):
                    new_data[attr] = resize(array(data_dict[attr]), amount)
                else:
                    if attr == dataset.get_id_name()[0]:
                        new_data[attr] = zeros(
                            amount, dtype=dataset.get_id_attribute().dtype)
                    else:
                        logger.log_warning(
                            "Attribute %s is unspecified for 'add' event; its value will be sampled from all %s values of %s."
                            % (attr, attr, dataset.get_dataset_name()))
                        new_data[attr] = sample_replace(
                            dataset.get_attribute(attr), amount)

        dataset.add_elements(data=new_data, change_ids_if_not_unique=True)
コード例 #2
0
    def run(self,
            person_set,
            tour_set,
            person_index=None,
            tour_filter=None,
            dataset_pool=None):
        if person_index == None:
            person_index = arange(person_set.size())

        if tour_filter <> None:
            tour_index = where(tour_set.compute_variables(tour_filter))[0]
        else:
            tour_index = arange(tour_set.size())

        sampled_tour_id = sample_replace(
            tour_set.get_id_attribute()[tour_index],
            person_index.size,
            return_index=False)
        if 'tour_id' in person_set.get_known_attribute_names():
            person_set.set_values_of_one_attribute('tour_id', sampled_tour_id,
                                                   person_index)
        else:
            tour_id = -1 * ones(person_set.size(), dtype="int32")
            tour_id[person_index] = sampled_tour_id
            person_set.add_primary_attribute(tour_id, 'tour_id')

        return sampled_tour_id
コード例 #3
0
ファイル: refinement_model.py プロジェクト: psrc/urbansim
 def _clone(self, agents_pool, amount, 
               agent_dataset, location_dataset, 
               this_refinement,
               dataset_pool ):
     """ clone certain amount of agent satisfying condition specified by agent_expression and location_expression
     and add them to agents_pool.  Useful to add certain agents to location where there is no such agent exist previously.
     """
     
     fit_index = self.get_fit_agents_index(agent_dataset, 
                                           this_refinement.agent_expression, 
                                           this_refinement.location_expression,
                                           dataset_pool)
     if fit_index.size == 0:
         logger.log_error("Refinement requests to clone %i agents,  but there are no agents satisfying %s." \
                             % (amount,  ' and '.join( [this_refinement.agent_expression, 
                                                        this_refinement.location_expression] ).strip(' and '),
                                                      ))
         return
    
     
     clone_index = sample_replace( fit_index, amount )
         
     agents_pool += clone_index.tolist()
        
     agent_dataset.modify_attribute(location_dataset.get_id_name()[0], 
                                    -1 * ones( clone_index.size, dtype='int32' ),
                                    index = clone_index
                                    )
     self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=clone_index)
コード例 #4
0
 def _create_households(self, diff, l):
     # sample existing households to copy
     is_hh_in_group = l[self.household_categories]
     consider_hhs_idx = where(is_hh_in_group)[0]
     if consider_hhs_idx.size > 0:
         sample_from_existing_hhs = sample_replace(consider_hhs_idx, diff)
         self.mapping_existing_hhs_to_new_hhs = concatenate((self.mapping_existing_hhs_to_new_hhs, sample_from_existing_hhs))
コード例 #5
0
 def test_sample_replace(self):
     start_time = time.time()
     sample = sample_replace(self.all, self.size, return_index=True)
     logger.log_status("sample_replace %s from %s items array in " % (self.size,self.n) + str(time.time() - start_time) + " sec")
     self.assertEqual(sample.size, self.size, msg ="sample size not equal to size parameter")
     assert isinstance(sample, ndarray), "sample is not of type ndarray"
     assert 0 <= sample.min() <= self.n-1, "sampled elements not in between min and max of source array"
     assert 0 <= sample.max() <= self.n-1, "sampled elements not in between min and max of source array"
コード例 #6
0
 def _create_households(self, diff, l):
     # sample existing households to copy
     is_hh_in_group = l[self.household_categories]
     consider_hhs_idx = where(is_hh_in_group)[0]
     if consider_hhs_idx.size > 0:
         sample_from_existing_hhs = sample_replace(consider_hhs_idx, diff)
         self.mapping_existing_hhs_to_new_hhs = concatenate(
             (self.mapping_existing_hhs_to_new_hhs,
              sample_from_existing_hhs))
コード例 #7
0
 def _add(self, amount=0, attribute='', dataset=None, index=None, data_dict={}, **kwargs):
     new_data = {}
     dataset_known_attributes = dataset.get_known_attribute_names() 
     if index.size > 0:  # sample from agents
         lucky_index = sample_replace(index, amount)
         for attr in dataset_known_attributes:
             new_data[attr] = dataset.get_attribute_by_index(attr, lucky_index)
     else:
         ## if attributes are not fully specified, the missing attributes will be filled with 0's
         for attr in dataset.get_primary_attribute_names():
             if data_dict.has_key(attr):
                 new_data[attr] = resize(array(data_dict[attr]), amount)
             else:
                 if attr == dataset.get_id_name()[0]:
                     new_data[attr] = zeros(amount, dtype=dataset.get_id_attribute().dtype)
                 else:
                     logger.log_warning("Attribute %s is unspecified for 'add' event; its value will be sampled from all %s values of %s." % 
                                        (attr, attr, dataset.get_dataset_name()) )
                     new_data[attr] = sample_replace(dataset.get_attribute(attr), amount)
     
     dataset.add_elements(data=new_data, change_ids_if_not_unique=True)
コード例 #8
0
    def run(self, person_set, tour_set, person_index=None, tour_filter=None, dataset_pool=None):
        if person_index == None:
            person_index = arange(person_set.size())

        if tour_filter <> None:
            tour_index = where(tour_set.compute_variables(tour_filter))[0]
        else:
            tour_index = arange(tour_set.size())
        
        sampled_tour_id = sample_replace(tour_set.get_id_attribute()[tour_index],
                                        person_index.size,
                                        return_index=False)
        if 'tour_id' in person_set.get_known_attribute_names():
            person_set.set_values_of_one_attribute('tour_id', sampled_tour_id,
                                                   person_index)
        else:
            tour_id = -1 * ones(person_set.size(), dtype="int32")
            tour_id[person_index] = sampled_tour_id
            person_set.add_primary_attribute(tour_id, 'tour_id')

        return sampled_tour_id
    def run(self, job_dataset, dataset_pool, out_storage=None, jobs_table="jobs"):
        """
        Algorithm:
            1. For all non_home_based jobs that have parcel_id assigned but no building_id, try
                to choose a building from all buildings in that parcel. Draw the building with probabilities
                given by the sector-building_type distribution. The job sizes are
                fitted into the available space (the attribute job.sqft is updated).
            2. For all non_home_based jobs for which no building was found in step 1, check
                if the parcel has residential buildings. In such a case, re-assign the jobs to be
                home-based.
                Otherwise, if sum of non_residential_sqft over the involved buildings is 0,
                for all jobs that have impute_building_sqft_flag=True draw a building using
                the sector-building_type distribution and impute the corresponding sqft to 
                the non_residential_sqft of that building.
            3. For all home_based jobs that have parcel_id assigned but no building_id, try
                to choose a building from all buildings in that parcel. 
                The capacity of a single-family building is determined from sizes of the households living there 
                (for each household the minimum of number of members and 2 is taken). 
                For multi-family buildings the capacity is 50.
            4. Assign a building type to jobs that have missing building type. It is sampled 
                from the regional-wide distribution of home based and non-home based jobs.
            5. Update the table 'building_sqft_per_job' using the updated job.sqft.
        'in_storage' should contain the jobs table and the zone_averages_table. The 'dataset_pool_storage'
        should contain all other tables needed (buildings, households, building_types). 
        """
        parcel_ids = job_dataset.get_attribute("parcel_id")
        building_ids = job_dataset.get_attribute("building_id")
        building_types = job_dataset.get_attribute("building_type")
        try:
            impute_sqft_flags = job_dataset.get_attribute("impute_building_sqft_flag")
        except:
            impute_sqft_flags = zeros(job_dataset.size())
        is_considered = logical_and(parcel_ids > 0, building_ids <= 0) # jobs that have assigned parcel but not building
        job_index_home_based = where(logical_and(is_considered, building_types == 1))[0]
        job_index_governmental = where(logical_and(is_considered, building_types == 3))[0]
        
        building_dataset = dataset_pool.get_dataset('building')
        parcel_ids_in_bldgs = building_dataset.get_attribute("parcel_id")
        bldg_ids_in_bldgs = building_dataset.get_id_attribute()
        bldg_types_in_bldgs = building_dataset.get_attribute("building_type_id")
        
        non_res_sqft = building_dataset.get_attribute("non_residential_sqft")
        occupied = building_dataset.compute_variables(["urbansim_parcel.building.occupied_building_sqft_by_jobs"],
                                                                     dataset_pool=dataset_pool)
        is_governmental = building_dataset.compute_variables(["building.disaggregate(building_type.generic_building_type_id == 7)"],
                                                                     dataset_pool=dataset_pool)
        
        # assign buildings to governmental jobs randomly
        unique_parcels = unique(parcel_ids[job_index_governmental])
        logger.log_status("Placing governmental jobs ...")
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs[is_governmental] == parcel)[0]
            if idx_in_bldgs.size <= 0:
                continue
            idx_in_jobs = where(parcel_ids[job_index_governmental] == parcel)[0]
            draw = sample_replace(idx_in_bldgs, idx_in_jobs.size)
            building_ids[job_index_governmental[idx_in_jobs]] = bldg_ids_in_bldgs[where(is_governmental)[0][draw]]
        logger.log_status("%s governmental jobs (out of %s gov. jobs) were placed." % (
                                                                (building_ids[job_index_governmental]>0).sum(),
                                                                 job_index_governmental.size))
        logger.log_status("The not-placed governmental jobs will be added to the non-home based jobs.")
        
        # consider the unplaced governmental jobs together with other non-home-based jobs
        is_now_considered = logical_and(is_considered, building_ids <= 0)
        job_index_non_home_based = where(logical_and(is_now_considered, logical_or(building_types == 2, building_types == 3)))[0]
                                    
        # assign buildings to non_home_based jobs based on available space
        unique_parcels = unique(parcel_ids[job_index_non_home_based])
        job_building_types = job_dataset.compute_variables(["bldgs_building_type_id = job.disaggregate(building.building_type_id)"], 
                                                           dataset_pool=dataset_pool)
        where_valid_jbt = where(logical_and(job_building_types>0, logical_or(building_types == 2, building_types==3)))[0]
        building_type_dataset = dataset_pool.get_dataset("building_type")
        available_building_types= building_type_dataset.get_id_attribute()
        idx_available_bt = building_type_dataset.get_id_index(available_building_types)
        sectors = job_dataset.get_attribute("sector_id")
        unique_sectors = unique(sectors)
        sector_bt_distribution = zeros((unique_sectors.size, building_type_dataset.size()), dtype="float32")
        
        jobs_sqft = job_dataset.get_attribute_by_index("sqft", job_index_non_home_based).astype("float32")
        job_dataset._compute_if_needed("urbansim_parcel.job.zone_id", dataset_pool=dataset_pool) 
        jobs_zones = job_dataset.get_attribute_by_index("zone_id", job_index_non_home_based)
        new_jobs_sqft = job_dataset.get_attribute("sqft").copy()
        
        # find sector -> building_type distribution
        sector_index_mapping = {}
        for isector in range(unique_sectors.size):
            idx = where(sectors[where_valid_jbt]==unique_sectors[isector])[0]
            if idx.size == 0: continue
            o = ones(idx.size, dtype="int32")
            sector_bt_distribution[isector,:] = ndimage_sum(o, labels=job_building_types[where_valid_jbt[idx]], 
                                                            index=available_building_types)
            sector_bt_distribution[isector,:] = sector_bt_distribution[isector,:]/sector_bt_distribution[isector,:].sum()
            sector_index_mapping[unique_sectors[isector]] = isector
               
        # create a lookup table for zonal average per building type of sqft per employee
        zone_average_dataset = dataset_pool.get_dataset("building_sqft_per_job")
        zone_bt_lookup = zone_average_dataset.get_building_sqft_as_table(job_dataset.get_attribute("zone_id").max(),
                                                                         available_building_types.max())

        counter_zero_capacity = 0
        counter_zero_distr = 0
        # iterate over parcels
        logger.log_status("Placing non-home-based jobs ...")
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0]
            if idx_in_bldgs.size <= 0:
                continue
            idx_in_jobs = where(parcel_ids[job_index_non_home_based] == parcel)[0]
            capacity = maximum(non_res_sqft[idx_in_bldgs] - occupied[idx_in_bldgs],0)
            #capacity = non_res_sqft[idx_in_bldgs] - occupied[idx_in_bldgs]
            if capacity.sum() <= 0:
                counter_zero_capacity += idx_in_jobs.size
                continue
            this_jobs_sectors = sectors[job_index_non_home_based][idx_in_jobs]
            this_jobs_sqft_table = resize(jobs_sqft[idx_in_jobs], (idx_in_bldgs.size, idx_in_jobs.size))
            wn = jobs_sqft[idx_in_jobs] <= 0
            for i in range(idx_in_bldgs.size):
                this_jobs_sqft_table[i, where(wn)[0]] = zone_bt_lookup[jobs_zones[idx_in_jobs[wn]], bldg_types_in_bldgs[idx_in_bldgs[i]]]
            supply_demand_ratio = (resize(capacity, (capacity.size, 1))/this_jobs_sqft_table.astype("float32").sum(axis=0))/float(idx_in_jobs.size)*0.9
            if any(supply_demand_ratio < 1): # correct only if supply is smaller than demand 
                this_jobs_sqft_table = this_jobs_sqft_table * supply_demand_ratio
            probcomb = zeros(this_jobs_sqft_table.shape)
            bt = bldg_types_in_bldgs[idx_in_bldgs]
            ibt = building_type_dataset.get_id_index(bt)
            for i in range(probcomb.shape[0]):
                for j in range(probcomb.shape[1]):
                    probcomb[i,j] = sector_bt_distribution[sector_index_mapping[this_jobs_sectors[j]],ibt[i]]
            pcs = probcomb.sum(axis=0)
            probcomb = probcomb/pcs
            wz = where(pcs<=0)[0]
            counter_zero_distr += wz.size
            probcomb[:, wz] = 0 # to avoid nan values
            taken = zeros(capacity.shape)
            has_sqft = this_jobs_sqft_table > 0
            while True:
                if (has_sqft * probcomb).sum() <= 0:
                    break
                req =  (this_jobs_sqft_table * probcomb).sum(axis=0)
                maxi = req.max()
                wmaxi = where(req==maxi)[0]
                drawjob = sample_noreplace(arange(wmaxi.size), 1) # draw job from jobs with the maximum size
                imax_req = wmaxi[drawjob]
                weights = has_sqft[:,imax_req] * probcomb[:,imax_req]
                draw = probsample_noreplace(arange(probcomb.shape[0]), 1, resize(weights/weights.sum(), (probcomb.shape[0],)))
                if (taken[draw] + this_jobs_sqft_table[draw,imax_req]) > capacity[draw]:
                    probcomb[draw,imax_req]=0
                    continue
                taken[draw] = taken[draw] + this_jobs_sqft_table[draw,imax_req]
                building_ids[job_index_non_home_based[idx_in_jobs[imax_req]]] = bldg_ids_in_bldgs[idx_in_bldgs[draw]]
                probcomb[:,imax_req] = 0
                new_jobs_sqft[job_index_non_home_based[idx_in_jobs[imax_req]]] = int(min(self.maximum_sqft, max(round(this_jobs_sqft_table[draw,imax_req]), 
                                                                                     self.minimum_sqft)))
            
        logger.log_status("%s non home based jobs (out of %s nhb jobs) were placed." % (
                                                                (building_ids[job_index_non_home_based]>0).sum(),
                                                                 job_index_non_home_based.size))
        logger.log_status("Unplaced due to zero capacity: %s" % counter_zero_capacity)
        logger.log_status("Unplaced due to zero distribution: %s" % counter_zero_distr)
        
        job_dataset.modify_attribute(name="building_id", data = building_ids)
        
        # re-classify unplaced non-home based jobs to home-based if parcels contain residential buildings
        bldgs_is_residential = logical_and(logical_not(is_governmental), building_dataset.compute_variables(["urbansim_parcel.building.is_residential"], 
                                                           dataset_pool=dataset_pool))
        is_now_considered = logical_and(parcel_ids > 0, building_ids <= 0)
        job_index_non_home_based_unplaced = where(logical_and(is_now_considered, building_types == 2))[0]
        unique_parcels = unique(parcel_ids[job_index_non_home_based_unplaced])
        imputed_sqft = 0
        logger.log_status("Try to reclassify non-home-based jobs (excluding governemtal jobs) ...")
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0]
            if idx_in_bldgs.size <= 0:
                continue
            idx_in_jobs = where(parcel_ids[job_index_non_home_based_unplaced] == parcel)[0]
            where_residential = where(bldgs_is_residential[idx_in_bldgs])[0]
            if where_residential.size > 0:
                building_types[job_index_non_home_based_unplaced[idx_in_jobs]] = 1 # set to home-based jobs
            elif non_res_sqft[idx_in_bldgs].sum() <= 0:
                # impute non_residential_sqft and assign buildings
                this_jobs_sectors = sectors[job_index_non_home_based_unplaced][idx_in_jobs]
                this_jobs_sqft_table = resize(jobs_sqft[idx_in_jobs], (idx_in_bldgs.size, idx_in_jobs.size))
                wn = jobs_sqft[idx_in_jobs] <= 0
                for i in range(idx_in_bldgs.size):
                    this_jobs_sqft_table[i, where(wn)[0]] = zone_bt_lookup[jobs_zones[idx_in_jobs[wn]], bldg_types_in_bldgs[idx_in_bldgs[i]]]
                probcomb = zeros(this_jobs_sqft_table.shape)
                bt = bldg_types_in_bldgs[idx_in_bldgs]
                ibt = building_type_dataset.get_id_index(bt)
                for i in range(probcomb.shape[0]):
                    for j in range(probcomb.shape[1]):
                        probcomb[i,j] = sector_bt_distribution[sector_index_mapping[this_jobs_sectors[j]],ibt[i]]
                for ijob in range(probcomb.shape[1]):
                    if (probcomb[:,ijob].sum() <= 0) or (impute_sqft_flags[job_index_non_home_based_unplaced[ijob]] == 0):
                        continue
                    weights = probcomb[:,ijob]
                    draw = probsample_noreplace(arange(probcomb.shape[0]), 1, resize(weights/weights.sum(), (probcomb.shape[0],)))
                    non_res_sqft[idx_in_bldgs[draw]] += this_jobs_sqft_table[draw,ijob]
                    imputed_sqft += this_jobs_sqft_table[draw,ijob]
                    building_ids[job_index_non_home_based_unplaced[idx_in_jobs[ijob]]] = bldg_ids_in_bldgs[idx_in_bldgs[draw]]
                    new_jobs_sqft[job_index_non_home_based[idx_in_jobs[ijob]]] = int(min(self.maximum_sqft, max(round(this_jobs_sqft_table[draw,ijob]), 
                                                                                     self.minimum_sqft)))
                    
        building_dataset.modify_attribute(name="non_residential_sqft", data = non_res_sqft)
        job_dataset.modify_attribute(name="building_id", data = building_ids)
        job_dataset.modify_attribute(name="building_type", data = building_types)
        job_dataset.modify_attribute(name="sqft", data = new_jobs_sqft)
        
        old_nhb_size = job_index_non_home_based.size
        job_index_home_based = where(logical_and(is_considered, building_types == 1))[0]
        job_index_non_home_based = where(logical_and(is_considered, building_types == 2))[0]
        logger.log_status("%s non-home based jobs reclassified as home-based." % (old_nhb_size-job_index_non_home_based.size))
        logger.log_status("%s non-residential sqft imputed." % imputed_sqft)
        logger.log_status("Additionaly, %s non home based jobs were placed due to imputed sqft." % \
                                                (building_ids[job_index_non_home_based_unplaced]>0).sum())
        # home_based jobs
        unique_parcels = unique(parcel_ids[job_index_home_based])
        capacity_in_buildings = building_dataset.compute_variables([
                          "urbansim_parcel.building.vacant_home_based_job_space"],
                             dataset_pool=dataset_pool)
        parcels_with_exceeded_capacity = []
        # iterate over parcels
        logger.log_status("Placing home-based jobs ...")
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0]
            idx_in_jobs = where(parcel_ids[job_index_home_based] == parcel)[0]
            capacity = capacity_in_buildings[idx_in_bldgs]
            if capacity.sum() <= 0:
                continue
            probcomb = ones((idx_in_bldgs.size, idx_in_jobs.size))
            taken = zeros(capacity.shape, dtype="int32")
            while True:
                zero_cap = where((capacity - taken) <= 0)[0]
                probcomb[zero_cap,:] = 0
                if probcomb.sum() <= 0:
                    break
                req =  probcomb.sum(axis=0)
                wmaxi = where(req==req.max())[0]
                drawjob = sample_noreplace(arange(wmaxi.size), 1) # draw job from available jobs
                imax_req = wmaxi[drawjob]
                weights = probcomb[:,imax_req]
                # sample building
                draw = probsample_noreplace(arange(probcomb.shape[0]), 1, resize(weights/weights.sum(), (probcomb.shape[0],)))
                taken[draw] = taken[draw] + 1
                building_ids[job_index_home_based[idx_in_jobs[imax_req]]] = bldg_ids_in_bldgs[idx_in_bldgs[draw]]
                probcomb[:,imax_req] = 0
            if -1 in building_ids[job_index_home_based[idx_in_jobs]]:
                parcels_with_exceeded_capacity.append(parcel)
        parcels_with_exceeded_capacity = array(parcels_with_exceeded_capacity)    
        
        logger.log_status("%s home based jobs (out of %s hb jobs) were placed." % ((building_ids[job_index_home_based]>0).sum(),
                                                                         job_index_home_based.size))
        
        # assign building type where missing
        # determine regional distribution
        idx_home_based = where(building_types == 1)[0]
        idx_non_home_based = where(building_types == 2)[0]
        idx_bt_missing = where(building_types <= 0)[0]
        if idx_bt_missing.size > 0:
            # sample building types
            sample_bt = probsample_replace(array([1,2]), idx_bt_missing.size, 
               array([idx_home_based.size, idx_non_home_based.size])/float(idx_home_based.size + idx_non_home_based.size))
            # coerce to int32 (on a 64 bit machine, sample_bt will be of type int64)
            building_types[idx_bt_missing] = sample_bt.astype(int32)
            job_dataset.modify_attribute(name="building_type", data = building_types) 
        
        if out_storage is not None:
            job_dataset.write_dataset(out_table_name=jobs_table, out_storage=out_storage, attributes=AttributeType.PRIMARY)
            building_dataset.write_dataset(out_table_name='buildings', out_storage=out_storage, attributes=AttributeType.PRIMARY)
        logger.log_status("Assigning building_id to jobs done.")
コード例 #10
0
    def run(
        self,
        individual_dataset,
        counts_dataset,
        fraction_dataset,
        id_name1="blockgroup_id",
        id_name2="zone_id",
        fraction_attribute_name="fraction",
        out_storage=None,
    ):

        """
        """
        assert id_name1 in individual_dataset.get_known_attribute_names()
        if id_name2 not in individual_dataset.get_known_attribute_names():
            individual_dataset.add_primary_attribute(-1 * ones(individual_dataset.size()), id_name2)

        lucky_household_index = array([], dtype="int32")
        hh_zone_id = array([], dtype="int32")
        output_data = {}

        logger.start_block("Start assigning individuals")
        zone_ids = counts_dataset.get_attribute(id_name2)
        building_types = counts_dataset.get_attribute("building_type_id")
        households = counts_dataset.get_attribute("households")
        for zone_id, building_type, n in zip(zone_ids, building_types, households):
            logger.log_status("n(%s=%i & %s=%i) = %s:" % (id_name2, zone_id, "building_type_id", building_type, n))
            fraction_index = where(fraction_dataset.get_attribute(id_name2) == zone_id)

            blockgroup_ids = fraction_dataset.get_attribute_by_index(id_name1, fraction_index)
            fractions = fraction_dataset.get_attribute_by_index(fraction_attribute_name, fraction_index)
            for blockgroup_id, fraction in zip(blockgroup_ids, fractions):
                nn = int(round(n * fraction))
                logger.log_status("\tfrac(%s=%s) = %s, n = %s" % ("blockgroup_id", blockgroup_id, fraction, nn))
                if nn >= 1:
                    suitable_household_index = where(
                        logical_and(
                            individual_dataset.get_attribute(id_name1) == blockgroup_id,
                            individual_dataset.get_attribute("building_type_id") == building_type,
                        )
                    )[0]
                    logger.log_status(
                        "\t\t sample %s from %s suitable households" % (nn, suitable_household_index.size)
                    )
                    if suitable_household_index.size == 0:
                        logger.log_warning("\tNo suitable households")
                        continue
                    lucky_household_index = concatenate(
                        (lucky_household_index, sample_replace(suitable_household_index, nn))
                    )
                    hh_zone_id = concatenate((hh_zone_id, [zone_id] * nn))

        for attribute_name in individual_dataset.get_known_attribute_names():
            output_data[attribute_name] = individual_dataset.get_attribute_by_index(
                attribute_name, lucky_household_index
            )
        output_data["original_household_id"] = output_data["household_id"]
        output_data["household_id"] = 1 + arange(lucky_household_index.size)
        output_data["zone_id"] = hh_zone_id

        storage = StorageFactory().get_storage("dict_storage")
        storage.write_table(table_name="households", table_data=output_data)
        output_dataset = Dataset(in_storage=storage, id_name=["household_id"], in_table_name="households")
        output_dataset.write_dataset(out_storage=out_storage, out_table_name="households")
コード例 #11
0
    def _add(self, agents_pool, amount, agent_dataset, location_dataset,
             this_refinement, dataset_pool):

        fit_index = self.get_fit_agents_index(
            agent_dataset, this_refinement.agent_expression,
            this_refinement.location_expression, dataset_pool)
        movers_index = array([], dtype="int32")
        amount_from_agents_pool = min(amount, len(agents_pool))
        if amount_from_agents_pool > 0:
            agents_index_from_agents_pool = sample_noreplace(
                agents_pool, amount_from_agents_pool)
            [agents_pool.remove(i) for i in agents_index_from_agents_pool]
            if fit_index.size == 0:
                ##cannot find agents to copy their location or clone them, place agents in agents_pool
                logger.log_warning("Refinement requests to add %i agents,  but there are only %i agents subtracted from previous action(s) and no agents satisfying %s to clone from;" \
                                   "add %i agents instead" % (amount, amount_from_agents_pool,
                                                              ' and '.join( [this_refinement.agent_expression,
                                                                           this_refinement.location_expression]).strip(' and '),
                                                              amount_from_agents_pool,) )

                amount = amount_from_agents_pool

                is_suitable_location = location_dataset.compute_variables(
                    this_refinement.location_expression,
                    dataset_pool=dataset_pool)
                location_id_for_agents_pool = sample_replace(
                    location_dataset.get_id_attribute()[is_suitable_location],
                    amount_from_agents_pool)
            else:

                agents_index_for_location = sample_replace(
                    fit_index, amount_from_agents_pool)
                location_id_for_agents_pool = agent_dataset.get_attribute(
                    location_dataset.get_id_name()
                    [0])[agents_index_for_location]
                movers_index = concatenate(
                    (movers_index, agents_index_for_location))

        elif fit_index.size == 0:
            ## no agents in agents_pool and no agents to clone either, --> fail
            logger.log_error( "Action 'add' failed: there is no agent subtracted from previous action, and no suitable agents satisfying %s to clone from." % \
                              ' and '.join( [this_refinement.agent_expression, this_refinement.location_expression] ).strip('and') )
            return

        if amount > amount_from_agents_pool:
            agents_index_to_clone = sample_replace(
                fit_index, amount - amount_from_agents_pool)
            movers_index = concatenate((movers_index, agents_index_to_clone))

        if movers_index.size > 0 and this_refinement.location_capacity_attribute is not None and len(
                this_refinement.location_capacity_attribute) > 0:
            movers_location_id = agent_dataset.get_attribute(
                location_dataset.get_id_name()[0])[movers_index]
            movers_location_index = location_dataset.get_id_index(
                movers_location_id)
            # see previous comment about histogram function
            num_of_movers_by_location = histogram(
                movers_location_index,
                bins=arange(location_dataset.size() + 1))[0]
            num_of_agents_by_location = location_dataset.compute_variables( "number_of_agents=%s.number_of_agents(%s)" % \
                                                                            ( location_dataset.dataset_name,
                                                                            agent_dataset.dataset_name ),
                                                                            dataset_pool=dataset_pool)

            expand_factor = safe_array_divide(
                (num_of_agents_by_location +
                 num_of_movers_by_location).astype('float32'),
                num_of_agents_by_location,
                return_value_if_denominator_is_zero=1.0)
            new_values = round_(expand_factor * location_dataset.get_attribute(
                this_refinement.location_capacity_attribute))
            location_dataset.modify_attribute(
                this_refinement.location_capacity_attribute, new_values)
            self._add_refinement_info_to_dataset(location_dataset,
                                                 self.id_names,
                                                 this_refinement,
                                                 index=movers_location_index)
        if amount_from_agents_pool > 0:
            agent_dataset.modify_attribute(location_dataset.get_id_name()[0],
                                           location_id_for_agents_pool,
                                           agents_index_from_agents_pool)
            self._add_refinement_info_to_dataset(
                agent_dataset,
                self.id_names,
                this_refinement,
                index=agents_index_from_agents_pool)
        if amount > amount_from_agents_pool:
            new_agents_index = agent_dataset.duplicate_rows(
                agents_index_to_clone)
            self._add_refinement_info_to_dataset(agent_dataset,
                                                 self.id_names,
                                                 this_refinement,
                                                 index=agents_index_to_clone)
            self._add_refinement_info_to_dataset(agent_dataset,
                                                 self.id_names,
                                                 this_refinement,
                                                 index=new_agents_index)
コード例 #12
0
ファイル: refinement_model.py プロジェクト: psrc/urbansim
    def _add(self, agents_pool, amount, 
             agent_dataset, location_dataset, 
             this_refinement,
             dataset_pool ):
        
        fit_index = self.get_fit_agents_index(agent_dataset, 
                                              this_refinement.agent_expression, 
                                              this_refinement.location_expression,
                                              dataset_pool)
        movers_index = array([],dtype="int32")
        amount_from_agents_pool = min( amount, len(agents_pool) )
        if amount_from_agents_pool > 0:
            agents_index_from_agents_pool = sample_noreplace( agents_pool, amount_from_agents_pool )
            [ agents_pool.remove(i) for i in agents_index_from_agents_pool ]
            if fit_index.size == 0:
                ##cannot find agents to copy their location or clone them, place agents in agents_pool
                if amount > amount_from_agents_pool:                   
                    logger.log_warning("Refinement requests to add %i agents,  but there are only %i agents subtracted from previous action(s) and no agents satisfying %s to clone from;" \
                                   "add %i agents instead" % (amount, amount_from_agents_pool, 
                                                              ' and '.join( [this_refinement.agent_expression, 
                                                                           this_refinement.location_expression]).strip(' and '), 
                                                              amount_from_agents_pool,) )
                    amount = amount_from_agents_pool
                # sample from all suitable locations
                is_suitable_location = location_dataset.compute_variables( this_refinement.location_expression,
                                                                           dataset_pool=dataset_pool )
                location_id_for_agents_pool = sample_replace( location_dataset.get_id_attribute()[is_suitable_location],
                                                                 amount_from_agents_pool )
            else:
                #sample from locations of suitable agents            
                agents_index_for_location = sample_replace( fit_index, amount_from_agents_pool)
                location_id_for_agents_pool = agent_dataset.get_attribute( location_dataset.get_id_name()[0] 
                                                                         )[agents_index_for_location]
                movers_index = concatenate( (movers_index, agents_index_for_location) )

        elif fit_index.size == 0:
            ## no agents in agents_pool and no agents to clone either, --> fail
            logger.log_error( "Action 'add' failed: there is no agent subtracted from previous action, and no suitable agents satisfying %s to clone from." % \
                              ' and '.join( [this_refinement.agent_expression, this_refinement.location_expression] ).strip('and') )
            return
            
        if amount > amount_from_agents_pool:
            agents_index_to_clone = sample_replace( fit_index, amount - amount_from_agents_pool)
            movers_index = concatenate( (movers_index, agents_index_to_clone) )

        if movers_index.size > 0 and this_refinement.location_capacity_attribute is not None and len(this_refinement.location_capacity_attribute) > 0:
            movers_location_id = agent_dataset.get_attribute( location_dataset.get_id_name()[0] )[movers_index]
            movers_location_index = location_dataset.get_id_index( movers_location_id )
            # see previous comment about histogram function
            num_of_movers_by_location = histogram( movers_location_index, bins=arange(location_dataset.size() +1) )[0]
            num_of_agents_by_location = location_dataset.compute_variables( "number_of_agents=%s.number_of_agents(%s)" % \
                                                                            ( location_dataset.dataset_name,
                                                                            agent_dataset.dataset_name ),
                                                                            dataset_pool=dataset_pool)
            
            expand_factor = safe_array_divide( (num_of_agents_by_location + num_of_movers_by_location ).astype('float32'),
                                                num_of_agents_by_location, return_value_if_denominator_is_zero = 1.0 )
            new_values = round_( expand_factor * location_dataset.get_attribute(this_refinement.location_capacity_attribute) )
            location_dataset.modify_attribute( this_refinement.location_capacity_attribute, 
                                               new_values
                                           )
            self._add_refinement_info_to_dataset(location_dataset, self.id_names, this_refinement, index=movers_location_index)
        if amount_from_agents_pool > 0:
            agent_dataset.modify_attribute( location_dataset.get_id_name()[0],
                                            location_id_for_agents_pool,
                                            agents_index_from_agents_pool
                                            )
            self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=agents_index_from_agents_pool)
        if amount > amount_from_agents_pool:
            new_agents_index = agent_dataset.duplicate_rows(agents_index_to_clone)
            self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=agents_index_to_clone)
            self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=new_agents_index)
コード例 #13
0
    def run(self, job_dataset, dataset_pool, out_storage=None, jobs_table="jobs"):
        """
        Algorithm:
            1. For all non_home_based jobs that have parcel_id assigned but no building_id, try
                to choose a building from all buildings in that parcel. Draw the building with probabilities
                given by the sector-building_type distribution. The job sizes are
                fitted into the available space (the attribute job.sqft is updated).
            2. For all non_home_based jobs for which no building was found in step 1, check
                if the parcel has residential buildings. In such a case, re-assign the jobs to be
                home-based.
                Otherwise, if sum of non_residential_sqft over the involved buildings is 0,
                for all jobs that have impute_building_sqft_flag=True draw a building using
                the sector-building_type distribution and impute the corresponding sqft to 
                the non_residential_sqft of that building.
            3. For all home_based jobs that have parcel_id assigned but no building_id, try
                to choose a building from all buildings in that parcel. 
                The capacity of a single-family building is determined from sizes of the households living there 
                (for each household the minimum of number of members and 2 is taken). 
                For multi-family buildings the capacity is 50.
            4. Assign a building type to jobs that have missing building type. It is sampled 
                from the regional-wide distribution of home based and non-home based jobs.
            5. Update the table 'building_sqft_per_job' using the updated job.sqft.
        'in_storage' should contain the jobs table and the zone_averages_table. The 'dataset_pool_storage'
        should contain all other tables needed (buildings, households, building_types). 
        """
        parcel_ids = job_dataset.get_attribute("parcel_id")
        building_ids = job_dataset.get_attribute("building_id")
        home_base_status = job_dataset.get_attribute("home_based_status")
        sectors = job_dataset.get_attribute("sector_id")
        
        is_considered = logical_and(parcel_ids > 0, building_ids <= 0) # jobs that have assigned parcel but not building
        job_index_home_based = where(logical_and(is_considered, home_base_status == 0))[0]
        is_governmental_job = sectors == 18
        is_edu_job = sectors == 19
        job_index_governmental = where(logical_and(is_considered, is_governmental_job))[0]
        job_index_edu = where(logical_and(is_considered, is_edu_job))[0]
        
        building_dataset = dataset_pool.get_dataset('building')
        parcel_ids_in_bldgs = building_dataset.get_attribute("parcel_id")
        bldg_ids_in_bldgs = building_dataset.get_id_attribute()
        bldg_types_in_bldgs = building_dataset.get_attribute("building_type_id")
        
        non_res_sqft = building_dataset.get_attribute("non_residential_sqft")

        preferred_nhb_btypes =   (building_dataset['building.building_type_id'] == 3) + \
                                (building_dataset['building.building_type_id'] == 8) + \
                                (building_dataset['building.building_type_id'] == 13) + \
                                (building_dataset['building.building_type_id'] == 20) + \
                                (building_dataset['building.building_type_id'] == 21)
        non_res_sqft_preferred =  non_res_sqft  * preferred_nhb_btypes              
                                          
        is_governmental = building_dataset.compute_variables([
            "numpy.logical_and(building.disaggregate(building_type.generic_building_type_id == 7), building.building_type_id <> 18)"],
                                                                     dataset_pool=dataset_pool)
        idx_gov = where(is_governmental)[0]
        is_edu = building_dataset['building.building_type_id'] == 18
        idx_edu = where(is_edu)[0]
        
        bldgs_is_residential = logical_and(logical_not(logical_or(is_governmental, is_edu)), 
                                           building_dataset.compute_variables(["urbansim_parcel.building.is_residential"], 
                                                           dataset_pool=dataset_pool))
        
        bldgs_isnot_residential = logical_not(bldgs_is_residential)
        
        # assign buildings to educational jobs randomly
        unique_parcels = unique(parcel_ids[job_index_edu])
        logger.log_status("Placing educational jobs ...")
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs[idx_edu] == parcel)[0]
            if idx_in_bldgs.size <= 0:
                continue
            idx_in_jobs = where(parcel_ids[job_index_edu] == parcel)[0]
            draw = sample_replace(idx_in_bldgs, idx_in_jobs.size)
            building_ids[job_index_edu[idx_in_jobs]] = bldg_ids_in_bldgs[idx_edu[draw]]
        logger.log_status("%s educational jobs (out of %s edu. jobs) were placed." % (
                                        (building_ids[job_index_edu]>0).sum(), job_index_edu.size))
        
        # assign buildings to governmental jobs randomly
        unique_parcels = unique(parcel_ids[job_index_governmental])
        logger.log_status("Placing governmental jobs ...")
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs[idx_gov] == parcel)[0]
            if idx_in_bldgs.size <= 0:
                continue
            idx_in_jobs = where(parcel_ids[job_index_governmental] == parcel)[0]
            draw = sample_replace(idx_in_bldgs, idx_in_jobs.size)
            building_ids[job_index_governmental[idx_in_jobs]] = bldg_ids_in_bldgs[idx_gov[draw]]
        logger.log_status("%s governmental jobs (out of %s gov. jobs) were placed." % (
                        (building_ids[job_index_governmental]>0).sum(), job_index_governmental.size))
        logger.log_status("The unplaced governmental jobs will be added to the non-home based jobs.")
        
        #tmp = unique(parcel_ids[job_index_governmental][building_ids[job_index_governmental]<=0])
        #output_dir =  "/Users/hana"
        #write_to_text_file(os.path.join(output_dir, 'parcels_with_no_gov_bldg.txt'), tmp, delimiter='\n')
        
        # consider the unplaced governmental jobs together with other non-home-based jobs
        is_now_considered = logical_and(is_considered, building_ids <= 0)
        job_index_non_home_based = where(logical_and(is_now_considered, logical_or(home_base_status == 0, is_governmental_job)))[0]
                                    
        # assign buildings to non_home_based jobs based on available space
        unique_parcels = unique(parcel_ids[job_index_non_home_based])
        # iterate over parcels
        logger.log_status("Placing non-home-based jobs ...")
        nhb_not_placed = 0
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0]
            if idx_in_bldgs.size <= 0:
                continue
            idx_in_jobs = where(parcel_ids[job_index_non_home_based] == parcel)[0]
            # sample proportionally to the building size
            weights = non_res_sqft_preferred[idx_in_bldgs] # 1.preference: preferred building types with non-res sqft 
            if weights.sum() <= 0:
                weights = preferred_nhb_btypes[idx_in_bldgs] # 2.preference: preferred building types
                if weights.sum() <= 0:
                    weights = non_res_sqft[idx_in_bldgs] # 3.preference: any building with non-res sqft 
                    if weights.sum() <= 0: 
                        weights = bldgs_isnot_residential[idx_in_bldgs] # 4.preference: any non-res building
                        if weights.sum() <= 0: 
                            nhb_not_placed = nhb_not_placed + idx_in_jobs.size
                            continue
            draw = probsample_replace(idx_in_bldgs, idx_in_jobs.size, weights/float(weights.sum()))
            building_ids[job_index_non_home_based[idx_in_jobs]] = bldg_ids_in_bldgs[draw]
            
        logger.log_status("%s non home based jobs (out of %s nhb jobs) were placed. No capacity in buildings for %s jobs." % (
                                                                (building_ids[job_index_non_home_based]>0).sum(),
                                                                 job_index_non_home_based.size, nhb_not_placed))
        
        job_dataset.modify_attribute(name="building_id", data = building_ids)
        
        # re-classify unplaced non-home based jobs to home-based if parcels contain residential buildings

        is_now_considered = logical_and(parcel_ids > 0, building_ids <= 0)
        job_index_non_home_based_unplaced = where(logical_and(is_now_considered, 
                                               logical_and(home_base_status == 0, logical_not(is_governmental_job))))[0]
        unique_parcels = unique(parcel_ids[job_index_non_home_based_unplaced])

        logger.log_status("Try to reclassify non-home-based jobs (excluding governmental jobs) ...")
        nhb_reclass = 0
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0]
            if idx_in_bldgs.size <= 0:
                continue
            idx_in_jobs = where(parcel_ids[job_index_non_home_based_unplaced] == parcel)[0]
            where_residential = where(bldgs_is_residential[idx_in_bldgs])[0]
            if where_residential.size > 0:
                #home_base_status[job_index_non_home_based_unplaced[idx_in_jobs]] = 1 # set to home-based jobs
                nhb_reclass = nhb_reclass + idx_in_jobs.size
            else:
                draw = sample_replace(idx_in_bldgs, idx_in_jobs.size)
                #building_ids[job_index_non_home_based_unplaced[idx_in_jobs]] = bldg_ids_in_bldgs[draw]

        #job_dataset.modify_attribute(name="home_base_status", data = home_base_status)
        #job_dataset.modify_attribute(name="building_id", data = building_ids)
        
        job_index_home_based = where(logical_and(is_considered, home_base_status == 1))[0]
        logger.log_status("%s non-home based jobs reclassified as home-based." % nhb_reclass)

        # home_based jobs
        unique_parcels = unique(parcel_ids[job_index_home_based])
        capacity_in_buildings = building_dataset.compute_variables([
                          "clip_to_zero(urbansim_parcel.building.total_home_based_job_space-building.aggregate(job.home_based_status==1))"],
                             dataset_pool=dataset_pool)
        parcels_with_exceeded_capacity = []
        # iterate over parcels
        logger.log_status("Placing home-based jobs ...")
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0]
            idx_in_jobs = where(parcel_ids[job_index_home_based] == parcel)[0]
            capacity = capacity_in_buildings[idx_in_bldgs]
            if capacity.sum() <= 0:
                continue
            probcomb = ones((idx_in_bldgs.size, idx_in_jobs.size))
            taken = zeros(capacity.shape, dtype="int32")
            while True:
                zero_cap = where((capacity - taken) <= 0)[0]
                probcomb[zero_cap,:] = 0
                if probcomb.sum() <= 0:
                    break
                req =  probcomb.sum(axis=0)
                wmaxi = where(req==req.max())[0]
                drawjob = sample_noreplace(arange(wmaxi.size), 1) # draw job from available jobs
                imax_req = wmaxi[drawjob]
                weights = probcomb[:,imax_req]
                # sample building
                draw = probsample_noreplace(arange(probcomb.shape[0]), 1, resize(weights/weights.sum(), (probcomb.shape[0],)))
                taken[draw] = taken[draw] + 1
                building_ids[job_index_home_based[idx_in_jobs[imax_req]]] = bldg_ids_in_bldgs[idx_in_bldgs[draw]]
                probcomb[:,imax_req] = 0
            if -1 in building_ids[job_index_home_based[idx_in_jobs]]:
                parcels_with_exceeded_capacity.append(parcel)
        parcels_with_exceeded_capacity = array(parcels_with_exceeded_capacity)    
        
        logger.log_status("%s home based jobs (out of %s hb jobs) were placed." % ((building_ids[job_index_home_based]>0).sum(),
                                                                         job_index_home_based.size))
        
        # assign building type where missing
        # determine regional distribution
        idx_home_based = where(home_base_status == 1)[0]
        idx_non_home_based = where(home_base_status == 0)[0]
        idx_bt_missing = where(home_base_status <= 0)[0]
        if idx_bt_missing.size > 0:
            # sample building types
            sample_bt = probsample_replace(array([1,0]), idx_bt_missing.size, 
               array([idx_home_based.size, idx_non_home_based.size])/float(idx_home_based.size + idx_non_home_based.size))
            # coerce to int32 (on a 64 bit machine, sample_bt will be of type int64)
            home_base_status[idx_bt_missing] = sample_bt.astype(int32)
            job_dataset.modify_attribute(name="home_based_status", data = home_base_status) 
        
        if out_storage is not None:
            job_dataset.write_dataset(out_table_name=jobs_table, out_storage=out_storage, attributes=AttributeType.PRIMARY)
        logger.log_status("Assigning building_id to jobs done.")
コード例 #14
0
    def run(self, realestate_dataset,
            year=None, 
            occupied_spaces_variable="occupied_units",
            total_spaces_variable="total_units",
            target_attribute_name='target_vacancy_rate',
            sample_from_dataset = None,
            sample_filter="",
            reset_attribute_value={}, 
            year_built = 'year_built',
            dataset_pool=None,
            append_to_realestate_dataset = False,
            table_name = "development_projects",
            dataset_name = "development_project",
            id_name = 'development_project_id',
            **kwargs):
        """         
        sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning
        append_to_realestate_dataset - whether to append the new dataset to realestate_dataset
        """
        
        if self.target_vancy_dataset is None:
            raise RuntimeError, "target_vacancy_rate dataset is unspecified."
        
        if not sample_from_dataset:
            sample_from_dataset = realestate_dataset
            
        #if dataset_pool is None:
        #    dataset_pool = SessionConfiguration().get_dataset_pool()
        alldata = dataset_pool.get_dataset('alldata')
        unit_names = dataset_pool.get_dataset('building_type').get_attribute('unit_name')
        sqft_per_job = dataset_pool.get_dataset('building_sqft_per_job')
        zones = realestate_dataset.compute_variables("building.disaggregate(parcel.zone_id)")
        type_ids = realestate_dataset.get_attribute("building_type_id")
        building_sqft_per_job_table = sqft_per_job.get_building_sqft_as_table(zones.max(), type_ids.max())
        if year is None:
            year = SimulationState().get_current_time()
        this_year_index = where(self.target_vancy_dataset.get_attribute('year')==year)[0]
        target_vacancy_for_this_year = DatasetSubset(self.target_vancy_dataset, this_year_index)
        
        column_names = list(set( self.target_vancy_dataset.get_known_attribute_names() ) - set( [ target_attribute_name, occupied_spaces_variable, total_spaces_variable, 'year', '_hidden_id_'] ))
        column_names.sort(reverse=True)
        column_values = dict([ (name, target_vacancy_for_this_year.get_attribute(name)) for name in column_names + [target_attribute_name]])
        
        independent_variables = list(set([re.sub('_max$', '', re.sub('_min$', '', col)) for col in column_names]))
        dataset_known_attributes = realestate_dataset.get_known_attribute_names()
        sample_dataset_known_attributes = sample_from_dataset.get_known_attribute_names()
        for variable in independent_variables:
            if variable not in dataset_known_attributes:
                realestate_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)
            if variable not in sample_dataset_known_attributes:
                sample_from_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)
                
        dataset_known_attributes = realestate_dataset.get_known_attribute_names() #update after compute
        if sample_filter:
            short_name = VariableName(sample_filter).get_alias()
            if short_name not in dataset_known_attributes:
                filter_indicator = sample_from_dataset.compute_variables(sample_filter, dataset_pool=dataset_pool)
            else:
                filter_indicator = sample_from_dataset.get_attribute(short_name)
        else:
            filter_indicator = 1
                
        sampled_index = array([], dtype=int32)

        #log header
        if PrettyTable is not None:
            status_log = PrettyTable()
            status_log.set_field_names(column_names + ["actual", "target", "difference", "action"])
        else:
            logger.log_status("\t".join(column_names + ["actual", "target", "difference", "action"]))
        error_log = ''
        for index in range(target_vacancy_for_this_year.size()):
            this_sampled_index = array([], dtype=int32)
            indicator = ones( realestate_dataset.size(), dtype='bool' )
            sample_indicator = ones( sample_from_dataset.size(), dtype='bool' )
            criterion = {}   # for logging
            for attribute in independent_variables:
                if attribute in dataset_known_attributes:
                    dataset_attribute = realestate_dataset.get_attribute(attribute)
                    sample_attribute = sample_from_dataset.get_attribute(attribute)
                else:
                    raise ValueError, "attribute %s used in target vacancy dataset can not be found in dataset %s" % (attribute, realestate_dataset.get_dataset_name())
                
                if attribute + '_min' in column_names:
                    amin = target_vacancy_for_this_year.get_attribute(attribute+'_min')[index] 
                    criterion.update({attribute + '_min':amin})
                    if amin != -1:
                        indicator *= dataset_attribute >= amin
                        sample_indicator *= sample_attribute >= amin
                if attribute + '_max' in column_names: 
                    amax = target_vacancy_for_this_year.get_attribute(attribute+'_max')[index]
                    criterion.update({attribute + '_max':amax}) 
                    if amax != -1:
                        indicator *= dataset_attribute <= amax
                        sample_indicator *= sample_attribute <= amax
                if attribute in column_names: 
                    aval = column_values[attribute][index] 
                    criterion.update({attribute:aval}) 
                    if aval == -1:
                        continue
                    elif aval == -2:  ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column
                        indicator *= logical_not(ismember(dataset_attribute, column_values[attribute]))
                        sample_indicator *= logical_not(ismember(sample_attribute, column_values[attribute]))
                    else:
                        indicator *= dataset_attribute == aval
                        sample_indicator *= sample_attribute == aval
                        
            this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable
            ## total/occupied_spaces_variable can be specified either as a universal name for all realestate 
            ## or in targe_vacancy_rate dataset for each vacancy category
            if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_occupied_spaces_variable = target_vacancy_for_this_year.get_attribute(occupied_spaces_variable)[index]

            if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_total_spaces_variable = target_vacancy_for_this_year.get_attribute(total_spaces_variable)[index]
            
            logger.be_quiet() #temporarily disable logging
            realestate_dataset.compute_one_variable_with_unknown_package(this_occupied_spaces_variable, dataset_pool=dataset_pool)
            realestate_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool)
            sample_from_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool)
            if unit_names[index]=="residential_units":
                num_units = alldata.compute_variables("alldata.aggregate_all(household.building_type_id==%s)" % (index+1))
                #persons = household_set.compute_variables("%s.number_of_agents(%s)" % (hh_ds_name, person_ds_name), resources=resources)
                num_units = num_units[0]
            else:
                num_units = alldata.compute_variables("alldata.aggregate_all(job.disaggregate(employment_submarket.building_type_id)==%s)" % (index+1))
                num_units = num_units * building_sqft_per_job_table[1, (index+1)]
                num_units = num_units[0]
            #need to make sure that job empsubmarket doesn't rely on building...
            #Must do non-home-based jobs only and then multiply by building_sqft
            logger.talk()
            
            actual_num = (indicator * realestate_dataset.get_attribute(this_total_spaces_variable)).sum()
            #target_num = int(round( (indicator * realestate_dataset.get_attribute(this_occupied_spaces_variable)).sum() /\
            target_num = int(round( num_units /\
                                    (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]) 
                            ))
            diff = target_num - actual_num
            if diff > 0:
                total_spaces_in_sample_dataset = sample_from_dataset.get_attribute(this_total_spaces_variable)
                legit_index = where(logical_and(sample_indicator, filter_indicator) * total_spaces_in_sample_dataset > 0)[0]
                if legit_index.size > 0:
                    mean_size = total_spaces_in_sample_dataset[legit_index].mean()
                    num_of_projects_to_sample = int( diff / mean_size )
                    ##sampled at least 1 project when diff > 0, otherwise it is a endless loop when num_of_projects_to_sample = 0
                    num_of_projects_to_sample = num_of_projects_to_sample if num_of_projects_to_sample > 0 else 1
                    while total_spaces_in_sample_dataset[this_sampled_index].sum() < diff:
                        lucky_index = sample_replace(legit_index, num_of_projects_to_sample)
                        this_sampled_index = concatenate((this_sampled_index, lucky_index))
                    this_sampled_index = this_sampled_index[0:(1+searchsorted(cumsum(total_spaces_in_sample_dataset[this_sampled_index]), diff))]
                    sampled_index = concatenate((sampled_index, this_sampled_index))
                else:
                    error_log += "There is nothing to sample from %s and no new development will happen for " % sample_from_dataset.get_dataset_name() + \
                              ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n'
            #if diff < 0: #TODO demolition; not yet supported
            
            ##log status
            action = "0"
            if this_sampled_index.size > 0:
                action_num = total_spaces_in_sample_dataset[this_sampled_index].sum()
                if diff > 0: action = "+" + str(action_num)
                if diff < 0: action = "-" + str(action_num)
            cat = [ str(criterion[col]) for col in column_names]
            cat += [str(actual_num), str(target_num), str(diff), action]
            
            if PrettyTable is not None:
                status_log.add_row(cat)
            else:                
                logger.log_status("\t".join(cat))
            
        if PrettyTable is not None:
            logger.log_status("\n" + status_log.get_string())
        if error_log:
            logger.log_error(error_log)
            
        result_data = {}
        result_dataset = None
        index = array([], dtype='int32')
        if sampled_index.size > 0:
            ### ideally duplicate_rows() is all needed to add newly cloned rows
            ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data
            ##realestate_dataset.duplicate_rows(sampled_index)
            result_data.setdefault(year_built, resize(year, sampled_index.size).astype('int32'))
            ## also add 'independent_variables' to the new dataset
            for attribute in set(sample_from_dataset.get_primary_attribute_names() + independent_variables):
                if reset_attribute_value.has_key(attribute):
                    result_data[attribute] = resize(array(reset_attribute_value[attribute]), sampled_index.size)
                else:
                    result_data[attribute] = sample_from_dataset.get_attribute_by_index(attribute, sampled_index)
        
            if id_name and result_data and id_name not in result_data:
                result_data[id_name] = arange(sampled_index.size, dtype='int32') + 1
        
            storage = StorageFactory().get_storage('dict_storage')
            storage.write_table(table_name=table_name, table_data=result_data)
            
            result_dataset = Dataset(id_name = id_name,
                                      in_storage = storage,
                                      in_table_name = table_name,
                                      dataset_name = dataset_name
                                      )
            index = arange(result_dataset.size())
        
            
        if append_to_realestate_dataset:
            if len(result_data) > 0:
                index = realestate_dataset.add_elements(result_data, require_all_attributes=False,
                                                        change_ids_if_not_unique=True)                
            result_dataset = realestate_dataset
        
        return (result_dataset, index)
コード例 #15
0
    def run(self, realestate_dataset,
            living_units_dataset,
            year=None, 
            occupied_spaces_variable="occupied_units",
            total_spaces_variable="total_units",
            target_attribute_name='target_vacancy_rate',
            sample_from_dataset = None,
            living_units_from_dataset = None,
            sample_filter="",
            reset_attribute_value={}, 
            year_built = 'year_built',
            dataset_pool=None,
            append_to_realestate_dataset = False,
            table_name = "development_projects",
            dataset_name = "development_project",
            id_name = 'development_project_id',
            **kwargs):
        """         
        sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning
        append_to_realestate_dataset - whether to append the new dataset to realestate_dataset
        """
        
        if self.target_vancy_dataset is None:
            raise RuntimeError, "target_vacancy_rate dataset is unspecified."
        
        if not sample_from_dataset or not living_units_from_dataset:
            logger.log_note('No development projects or no living units of development projects to sample from. Development projects are taken from building dataset and thus living units from living_units dataset.')
            sample_from_dataset = realestate_dataset
            living_units_from_dataset = living_units_dataset
            
        if dataset_pool is None:
            dataset_pool = SessionConfiguration().get_dataset_pool()
        if year is None:
            year = SimulationState().get_current_time()
        this_year_index = where(self.target_vancy_dataset.get_attribute('year')==year)[0]
        target_vacancy_for_this_year = DatasetSubset(self.target_vancy_dataset, this_year_index)
        
        column_names = list(set( self.target_vancy_dataset.get_known_attribute_names() ) - set( [ target_attribute_name, occupied_spaces_variable, total_spaces_variable, 'year', '_hidden_id_'] ))
        column_names.sort(reverse=True)
        column_values = dict([ (name, target_vacancy_for_this_year.get_attribute(name)) for name in column_names + [target_attribute_name]])
        
        
        independent_variables = list(set([re.sub('_max$', '', re.sub('_min$', '', col)) for col in column_names]))
        sample_dataset_known_attributes = sample_from_dataset.get_known_attribute_names()
        for attribute in independent_variables:
            if attribute not in sample_dataset_known_attributes:
                sample_from_dataset.compute_one_variable_with_unknown_package(attribute, dataset_pool=dataset_pool)
        sample_dataset_known_attributes = sample_from_dataset.get_known_attribute_names() #update after compute
                
        if sample_filter:
            short_name = VariableName(sample_filter).get_alias()
            if short_name not in sample_dataset_known_attributes:
                filter_indicator = sample_from_dataset.compute_variables(sample_filter, dataset_pool=dataset_pool)
            else:
                filter_indicator = sample_from_dataset.get_attribute(short_name)
        else:
            filter_indicator = 1
                
        sampled_index = array([], dtype=int32)

        #log header
        if PrettyTable is not None:
            status_log = PrettyTable()
            status_log.set_field_names(column_names + ["actual", "target", "expected", "difference", "action"])
        else:
            logger.log_status("\t".join(column_names + ["actual", "target", "expected", "difference", "action"]))
        error_log = ''
        for index in range(target_vacancy_for_this_year.size()):
            sample_indicator = ones( sample_from_dataset.size(), dtype='bool' )
            criterion = {}   # for logging
            for attribute in independent_variables:
                if attribute in sample_dataset_known_attributes:
                    sample_attribute = sample_from_dataset.get_attribute(attribute)
                else:
                    raise ValueError, "attribute %s used in target vacancy dataset can not be found in dataset %s" % (attribute, realestate_dataset.get_dataset_name())
                
                if attribute + '_min' in column_names:
                    amin = target_vacancy_for_this_year.get_attribute(attribute+'_min')[index] 
                    criterion.update({attribute + '_min':amin})
                    if amin != -1:
                        sample_indicator *= sample_attribute >= amin
                if attribute + '_max' in column_names: 
                    amax = target_vacancy_for_this_year.get_attribute(attribute+'_max')[index]
                    criterion.update({attribute + '_max':amax}) 
                    if amax != -1:
                        sample_indicator *= sample_attribute <= amax
                if attribute in column_names: 
                    aval = column_values[attribute][index] 
                    criterion.update({attribute:aval}) 
                    if aval == -1:
                        continue
                    elif aval == -2:  ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column
                        sample_indicator *= logical_not(ismember(sample_attribute, column_values[attribute]))
                    else:
                        sample_indicator *= sample_attribute == aval
                        
            this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable
            ## total/occupied_spaces_variable can be specified either as a universal name for all realestate 
            ## or in targe_vacancy_rate dataset for each vacancy category
            if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_occupied_spaces_variable = target_vacancy_for_this_year.get_attribute(occupied_spaces_variable)[index]

            if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_total_spaces_variable = target_vacancy_for_this_year.get_attribute(total_spaces_variable)[index]
            
            this_total_spaces_variable += '_' + str(criterion[col])
            this_occupied_spaces_variable += '_' + str(criterion[col])
            
            logger.be_quiet() #temporarily disable logging
            realestate_dataset.compute_one_variable_with_unknown_package(this_occupied_spaces_variable, dataset_pool=dataset_pool)
            realestate_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool)
            sample_from_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool)
            logger.talk()
            
            actual_num = (realestate_dataset.get_attribute(this_total_spaces_variable)).sum()
            #target_num is obsolete with this version.
            target_num = int(round( (realestate_dataset.get_attribute(this_occupied_spaces_variable)).sum() /\
                                    (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])))
            '''If the target vacancy is very small and the inflow to the region big it is not enough to check
            only the current simulation year's vacancy. The simulation is more robust if the BTM is anticipating the
            next year's population (of households and jobs).
            This version calculates the non residential spaces based on sqft requirements of jobs per sector. 
            #TODO: Make code more general to cover various stratifications in the real estate market.
            '''
            if criterion[col] == 0:
                """ Option without demography model
                idx = where(self.control_totals.get_attribute("year")==year + 1)[0]
                this_years_control_totals = DatasetSubset(self.control_totals, idx)
                expected_num = int(round( this_years_control_totals.get_attribute('total_number_of_households').sum() /\
                                    (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])))""" 
                hh_dataset = dataset_pool.get_dataset( 'household' )
                number_of_hh = hh_dataset.size()
                expected_num = int(round( number_of_hh /\
                                    (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]))) 
            if criterion[col] > 0:
                # Getting control totals per sector in a dictionary
                idx = where(self.employment_control_totals.get_attribute("year")==year)[0] # Create index to get the subset of control totals for the next simulation year.
                this_years_control_totals = DatasetSubset(self.employment_control_totals, idx) # Create the subset of control totals.
                idx_non_home_based = where(logical_and(this_years_control_totals['home_based_status'] == 0,this_years_control_totals['sector_id'] == criterion[col]))[0] # Create index of non home based control totals in current sector. Only non home based jobs are supported. TODO: Support home based jobs.
                this_years_control_totals = DatasetSubset(this_years_control_totals, idx_non_home_based)
#                idx_current_sector = where(this_years_control_totals['sector_id'] == criterion[col])[0]
                next_years_jobs = this_years_control_totals['number_of_jobs']
                controled_sectors = this_years_control_totals['sector_id']                
                sector_job_totals = dict(zip(controled_sectors, next_years_jobs.T)) # creating dictionary with sector id's as key and number of jobs as values to ensure multiplication with right requiremtents.

                # Getting infos on required sqft per sector. 
#                a_zone_id = min(self.building_sqft_per_job['zone_id']) # Get a zone number from the definition table. Here choose to take the minimum which is arbitrary. This code assumes constant sqft requirements in all zones. TODO: Support different sqft requirements per zone.
#                idx_zone = where(self.building_sqft_per_job['zone_id'] == a_zone_id)[0]
#                subset_sqft_per_job = DatasetSubset(self.building_sqft_per_job, idx_zone)
#                sqft_per_job = subset_sqft_per_job['building_sqft_per_job']
#                sectors_with_requirements = subset_sqft_per_job['sector_id']
#                requirements_by_sector = dict(zip(sectors_with_requirements, sqft_per_job.T))
#                
#                needed_sqft_over_all_sectors = sector_job_totals[criterion[col]] * requirements_by_sector[criterion[col]]
#                expected_num = int(round( needed_sqft_over_all_sectors /\
#                                    (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])))
                
                idx_sector = where(self.sectors['sector_id'] == criterion[col])
                subset_sqft_per_job_sector = DatasetSubset(self.sectors, idx_sector)
                needed_sqft_current_sector = sector_job_totals[criterion[col]] * subset_sqft_per_job_sector.get_attribute('sqm_per_job')
                expected_num = int(round( needed_sqft_current_sector /\
                                    (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])))

            diff = expected_num - actual_num
            
            #Previous version which is checking the current years occupation.
            #diff = target_num - actual_num
            
            this_sampled_index = array([], dtype=int32)
            if diff > 0:
                total_spaces_in_sample_dataset = sample_from_dataset.get_attribute(this_total_spaces_variable)
                legit_index = where(logical_and(sample_indicator, filter_indicator) * total_spaces_in_sample_dataset > 0)[0]
                if legit_index.size > 0:
                    mean_size = total_spaces_in_sample_dataset[legit_index].mean()
                    num_of_projects_to_sample = int( diff / mean_size )
                    ##sampled at least 1 project when diff > 0, otherwise it is a endless loop when num_of_projects_to_sample = 0
                    num_of_projects_to_sample = num_of_projects_to_sample if num_of_projects_to_sample > 0 else 1
                    while total_spaces_in_sample_dataset[this_sampled_index].sum() < diff:
                        lucky_index = sample_replace(legit_index, num_of_projects_to_sample)
                        this_sampled_index = concatenate((this_sampled_index, lucky_index))
                    this_sampled_index = this_sampled_index[0:(1+searchsorted(cumsum(total_spaces_in_sample_dataset[this_sampled_index]), diff))]
                    sampled_index = concatenate((sampled_index, this_sampled_index))
                else:
                    error_log += "There is nothing to sample from %s and no new development will happen for " % sample_from_dataset.get_dataset_name() + \
                              ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n'
            #if diff < 0: #TODO demolition; not yet supported
            
            ##log status
            action = "0"
            if this_sampled_index.size > 0:
                action_num = total_spaces_in_sample_dataset[this_sampled_index].sum()
                if diff > 0: action = "+" + str(action_num)
                if diff < 0: action = "-" + str(action_num)
            cat = [ str(criterion[col]) for col in column_names]
            cat += [str(actual_num), str(target_num), str(expected_num), str(diff), action]
            
            if PrettyTable is not None:
                status_log.add_row(cat)
            else:                
                logger.log_status("\t".join(cat))
            
        if PrettyTable is not None:
            logger.log_status("\n" + status_log.get_string())
        if error_log:
            logger.log_error(error_log)
        
        
        #logger.log_note("Updating attributes of %s sampled development events." % sampled_index.size)
        result_data = {}
        result_dataset = None
        index = array([], dtype='int32')
        if sampled_index.size > 0:
            ### ideally duplicate_rows() is all needed to add newly cloned rows
            ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data
            ##realestate_dataset.duplicate_rows(sampled_index)
            #result_data.setdefault(year_built, resize(year, sampled_index.size).astype('int32')) # Reset the year_built attribute. Uncommented because it is overwritten in the for loop afterwards.
            ## also add 'independent_variables' to the new dataset
            for attribute in set(sample_from_dataset.get_primary_attribute_names() + independent_variables):
                if reset_attribute_value.has_key(attribute):
                    result_data[attribute] = resize(array(reset_attribute_value[attribute]), sampled_index.size)
                else:
                    result_data[attribute] = sample_from_dataset.get_attribute_by_index(attribute, sampled_index)
            # Reset the year_built attribute.
            result_data['year_built'] = resize(year, sampled_index.size).astype('int32')
            # TODO: Uncomment the following three lines to reset land_area, tax_exempt, zgde. Test still to be done. parcel_id should be changed by location choice model.
            #result_data['land_area'] = resize(-1, sampled_index.size).astype('int32')
            #result_data['tax_exempt'] = resize(-1, sampled_index.size).astype('int32')
            #result_data['zgde'] = resize(-1, sampled_index.size).astype('int32')
            
            if id_name and result_data and id_name not in result_data:
                result_data[id_name] = arange(sampled_index.size, dtype='int32') + 1
            storage = StorageFactory().get_storage('dict_storage')
            storage.write_table(table_name=table_name, table_data=result_data)
            
            result_dataset = Dataset(id_name = id_name,
                                      in_storage = storage,
                                      in_table_name = table_name,
                                      dataset_name = dataset_name
                                      )
            index = arange(result_dataset.size())

        if append_to_realestate_dataset:
            if len(result_data) > 0:
                logger.start_block('Appending development events and living units')
                logger.log_note("Append %d sampled development events to real estate dataset." % len(result_data[result_data.keys()[0]]))
                index = realestate_dataset.add_elements(result_data, require_all_attributes=False,
                                                        change_ids_if_not_unique=True)
                logger.start_block('Creating id mapping')
                # remember the ids from the development_event_history dataset.
                mapping_new_old = self.get_mapping_of_old_ids_to_new_ids(result_data, realestate_dataset, index)
                logger.end_block()
                
                '''Getting living units associated to selected development events by iterating over the mapping dictionary and 
                selecting each time all the living units according to the old building ids.
                The living units are then added to selected_living_units_dict which is then
                added to living_units dataset. A dictionary is needed to use the add_elements method.
                Creating a dictionary also clones the records. The subset is only a view on the original table.'''
                selected_living_units_dict = {}
                counter = 0
                for new_id in mapping_new_old:
                    if counter == 0:
                        logger.log_note("Log assignment of every 100th development event")
                    counter +=1
                    if counter % 100 == 0:
                        logger.log_note("Assembling living units for development event %s" % new_id)
                    sel_index = [i for i in range(0, len(living_units_from_dataset['building_id'])) if living_units_from_dataset['building_id'][i] == mapping_new_old[new_id]]
                    living_units_this_sampled_building = DatasetSubset(living_units_from_dataset, sel_index) 
                    if len(selected_living_units_dict) == 0:
                        logger.start_block('Assign new building id')
                        for attribute_name in living_units_this_sampled_building.get_primary_attribute_names():
                            column = living_units_this_sampled_building.get_attribute(attribute_name)
                            if attribute_name == 'building_id':
                                new_ids = array(living_units_this_sampled_building.size() * [new_id], dtype=int32)
                                selected_living_units_dict.update({attribute_name: new_ids})
                            else:
                                selected_living_units_dict.update({attribute_name: column})
                        logger.end_block()
                    else:
                        this_living_units_dict ={}
                        for attribute_name in living_units_this_sampled_building.get_primary_attribute_names():
                            column = living_units_this_sampled_building.get_attribute(attribute_name)
                            if attribute_name == 'building_id':
                                new_ids = array(living_units_this_sampled_building.size() * [new_id], dtype=int32)
                                this_living_units_dict.update({attribute_name: new_ids})
                            else:
                                this_living_units_dict.update({attribute_name: column})
                        for attribute_name in living_units_this_sampled_building.get_primary_attribute_names():
                            selected_living_units_dict[attribute_name] = concatenate([selected_living_units_dict[attribute_name], this_living_units_dict[attribute_name]])
                # Reset year_built attribute of living units
                selected_living_units_dict['year_built'] = resize(year, len(selected_living_units_dict['year_built'])).astype('int32')
                # TODO: Uncomment the following two lines to reset rent_price, zgde. Test still to be done
                # selected_living_units_dict['rent_price'] = resize(-1, len(selected_living_units_dict['rent_price'])).astype('int32')
                # selected_living_units_dict['zgde'] = resize(-1, len(selected_living_units_dict['zgde'])).astype('int32')


                
                index_units = living_units_dataset.add_elements(selected_living_units_dict, require_all_attributes=False,
                                                        change_ids_if_not_unique=True)
                
                # Check consistency of buildings and living units. All living units must belong to a building
                if SimulationState().get_current_time() - SimulationState().get_start_time() == 1:
                    for building_id in living_units_dataset['building_id']:
                        if building_id not in realestate_dataset['building_id']:
                            logger.log_warning('Living unit with building_id %d has no corresponding building.' % (building_id))
                        # Uncomment next line to enforce consistency of living units and building dataset. Then you may uncomment the two previous lines.
#                        assert(building_id in realestate_dataset['building_id']), 'Living unit with building_id %d has no corresponding building.' % (building_id)

            result_dataset = realestate_dataset
        logger.end_block()

        # It is recommended to derive all variables of buildings in relation to living units via expression variables.
        # However, if the building dataset contains attributes derived from living units these attributes should be consistent
        # with the living units table. Below an example.
        # Residential_units attribute of each building should be consistent with the number of living units associated.
#        self.check_consistency_of_living_units_per_building(realestate_dataset, living_units_dataset, mapping_new_old)

        return (result_dataset, index)
コード例 #16
0
    def _add(self, agents_pool, amount, 
             agent_dataset, location_dataset, 
             this_refinement,
             dataset_pool ):
        
        fit_index = self.get_fit_agents_index(agent_dataset, 
                                              this_refinement.agent_expression, 
                                              this_refinement.location_expression,
                                              dataset_pool)
        if this_refinement.agent_expression is not None and len(this_refinement.agent_expression) > 0:
            agents_index = where(agent_dataset.compute_variables(this_refinement.agent_expression, 
                                                               dataset_pool=dataset_pool)>0)[0]
        else:
            agents_index = arange(agent_dataset.size())
        movers_index = array([],dtype="int32")
        ar_pool = array(agents_pool)
        fitted_agents_pool = ar_pool[in1d(ar_pool, agents_index)]
        amount_from_agents_pool = min( amount, fitted_agents_pool.size )
        prob_string = self.probability_attributes.get(agent_dataset.get_dataset_name(),None)
        if prob_string is not None:
            probs_values = (agent_dataset.compute_variables([prob_string], dataset_pool=dataset_pool)).astype('int32')
            uprobs_values = unique(probs_values[fit_index])
            if uprobs_values.size > 0:
                probs_existing = array(ndimage_sum(ones(fit_index.size), 
                                         labels=probs_values[fit_index], index=uprobs_values))
        if amount_from_agents_pool > 0:        
            if prob_string is not None and uprobs_values.size > 0:                
                prob_pool_values = probs_values[fitted_agents_pool]
                probs_pool=zeros(prob_pool_values.size)
                for i in range(uprobs_values.size):
                    probpoolidx = where(prob_pool_values == uprobs_values[i])[0]
                    if probpoolidx.size == 0:
                        continue
                    probs_pool[probpoolidx]=probs_existing[i]/float(probpoolidx.size)
                probs_pool[probs_pool<=0] = (probs_existing.min()/10.0)/float((probs_pool<=0).sum())
            else:
                probs_pool=ones(fitted_agents_pool.size)
            
            agents_index_from_agents_pool = probsample_noreplace( fitted_agents_pool, amount_from_agents_pool, prob_array=probs_pool )
            [ agents_pool.remove(i) for i in agents_index_from_agents_pool ]
            if fit_index.size == 0:
                ##cannot find agents to copy their location or clone them, place agents in agents_pool
                if amount > amount_from_agents_pool:                   
                    logger.log_warning("Refinement requests to add %i agents,  but there are only %i agents subtracted from previous action(s) and no agents satisfying %s to clone from;" \
                                   "add %i agents instead" % (amount, amount_from_agents_pool, 
                                                              ' and '.join( [this_refinement.agent_expression, 
                                                                           this_refinement.location_expression]).strip(' and '), 
                                                              amount_from_agents_pool,) )
                    amount = amount_from_agents_pool
                # sample from all suitable locations
                is_suitable_location = location_dataset.compute_variables( this_refinement.location_expression,
                                                                           dataset_pool=dataset_pool )
                location_id_for_agents_pool = sample_replace( location_dataset.get_id_attribute()[is_suitable_location],
                                                                 amount_from_agents_pool )
            else:
                #sample from locations of suitable agents            
                agents_index_for_location = sample_replace( fit_index, amount_from_agents_pool)
                location_id_for_agents_pool = agent_dataset.get_attribute( location_dataset.get_id_name()[0] 
                                                                         )[agents_index_for_location]
                movers_index = concatenate( (movers_index, agents_index_for_location) )

        elif fit_index.size == 0:
            ## no agents in agents_pool and no agents to clone either, --> fail
            logger.log_error( "Action 'add' failed: there is no agent subtracted from previous action, and no suitable agents satisfying %s to clone from." % \
                              ' and '.join( [this_refinement.agent_expression, this_refinement.location_expression] ).strip('and') )
            return
            
        if amount > amount_from_agents_pool:
            agents_index_to_clone = sample_replace( fit_index, amount - amount_from_agents_pool)
            movers_index = concatenate( (movers_index, agents_index_to_clone) )

        if movers_index.size > 0 and this_refinement.location_capacity_attribute is not None and len(this_refinement.location_capacity_attribute) > 0:
            movers_location_id = agent_dataset.get_attribute( location_dataset.get_id_name()[0] )[movers_index]
            movers_location_index = location_dataset.get_id_index( movers_location_id )
            # see previous comment about histogram function
            num_of_movers_by_location = histogram( movers_location_index, bins=arange(location_dataset.size() +1) )[0]
            num_of_agents_by_location = location_dataset.compute_variables( "number_of_agents=%s.number_of_agents(%s)" % \
                                                                            ( location_dataset.dataset_name,
                                                                            agent_dataset.dataset_name ),
                                                                            dataset_pool=dataset_pool)
            
            expand_factor = safe_array_divide( (num_of_agents_by_location + num_of_movers_by_location ).astype('float32'),
                                                num_of_agents_by_location, return_value_if_denominator_is_zero = 1.0 )
            new_values = round_( expand_factor * location_dataset.get_attribute(this_refinement.location_capacity_attribute) )
            location_dataset.modify_attribute( this_refinement.location_capacity_attribute, 
                                               new_values
                                           )
            self._add_refinement_info_to_dataset(location_dataset, self.id_names, this_refinement, index=movers_location_index)
        if amount_from_agents_pool > 0:
            agent_dataset.modify_attribute( 'building_id',
                                            -1 * ones( agents_index_from_agents_pool.size, dtype='int32' ),
                                            agents_index_from_agents_pool
                                            )
            agent_dataset.modify_attribute( location_dataset.get_id_name()[0],
                                            location_id_for_agents_pool,
                                            agents_index_from_agents_pool
                                            )

            self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=agents_index_from_agents_pool)
            self.processed_locations['add'] = concatenate((self.processed_locations.get('add', array([])), 
                                                unique(location_dataset[self.subarea_id_name][location_dataset.get_id_index(location_id_for_agents_pool)])))
            
        if amount > amount_from_agents_pool:
            new_agents_index = agent_dataset.duplicate_rows(agents_index_to_clone)
            self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=agents_index_to_clone)
            self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=new_agents_index)
            if location_dataset.get_dataset_name() <> 'building':
                agent_dataset.modify_attribute( 'building_id',
                                            -1 * ones( new_agents_index.size, dtype='int32' ),
                                            new_agents_index
                                            )
            self.processed_locations['add'] = concatenate((self.processed_locations.get('add', array([])), 
                                                unique(agent_dataset[self.subarea_id_name][new_agents_index])))
コード例 #17
0
    def run(self,
            individual_dataset,
            counts_dataset,
            fraction_dataset,
            id_name1='blockgroup_id',
            id_name2='zone_id',
            fraction_attribute_name='fraction',
            out_storage=None):
        """
        """
        assert id_name1 in individual_dataset.get_known_attribute_names()
        if id_name2 not in individual_dataset.get_known_attribute_names():
            individual_dataset.add_primary_attribute(
                -1 * ones(individual_dataset.size()), id_name2)

        lucky_household_index = array([], dtype="int32")
        hh_zone_id = array([], dtype="int32")
        output_data = {}

        logger.start_block("Start assigning individuals")
        zone_ids = counts_dataset.get_attribute(id_name2)
        building_types = counts_dataset.get_attribute("building_type_id")
        households = counts_dataset.get_attribute("households")
        for zone_id, building_type, n in zip(zone_ids, building_types,
                                             households):
            logger.log_status(
                "n(%s=%i & %s=%i) = %s:" %
                (id_name2, zone_id, "building_type_id", building_type, n))
            fraction_index = where(
                fraction_dataset.get_attribute(id_name2) == zone_id)

            blockgroup_ids = fraction_dataset.get_attribute_by_index(
                id_name1, fraction_index)
            fractions = fraction_dataset.get_attribute_by_index(
                fraction_attribute_name, fraction_index)
            for blockgroup_id, fraction in zip(blockgroup_ids, fractions):
                nn = int(round(n * fraction))
                logger.log_status(
                    "\tfrac(%s=%s) = %s, n = %s" %
                    ("blockgroup_id", blockgroup_id, fraction, nn))
                if nn >= 1:
                    suitable_household_index = where(
                        logical_and(
                            individual_dataset.get_attribute(id_name1) ==
                            blockgroup_id,
                            individual_dataset.get_attribute(
                                "building_type_id") == building_type))[0]
                    logger.log_status(
                        "\t\t sample %s from %s suitable households" %
                        (nn, suitable_household_index.size))
                    if suitable_household_index.size == 0:
                        logger.log_warning("\tNo suitable households")
                        continue
                    lucky_household_index = concatenate(
                        (lucky_household_index,
                         sample_replace(suitable_household_index, nn)))
                    hh_zone_id = concatenate((hh_zone_id, [zone_id] * nn))

        for attribute_name in individual_dataset.get_known_attribute_names():
            output_data[
                attribute_name] = individual_dataset.get_attribute_by_index(
                    attribute_name, lucky_household_index)
        output_data["original_household_id"] = output_data["household_id"]
        output_data["household_id"] = 1 + arange(lucky_household_index.size)
        output_data["zone_id"] = hh_zone_id

        storage = StorageFactory().get_storage('dict_storage')
        storage.write_table(table_name="households", table_data=output_data)
        output_dataset = Dataset(in_storage=storage,
                                 id_name=["household_id"],
                                 in_table_name="households")
        output_dataset.write_dataset(out_storage=out_storage,
                                     out_table_name="households")
コード例 #18
0
    def run(self, location_set, agent_event_set, agent_set, current_year, disaggregate_to=None, 
            location_characteristics=[], dataset_pool=None):
        """ The agent_event_set is expected to have attributes:
                grid_id, scheduled_year, total_number, is_percentage, change_type, (optionally other agent characteristics)
            'grid_id' is not a mandatory name, but it must match to the id name of the location_set.
            Thus, the model works on any geography level. If it's a aggregated geography and there is a need to 
            disaggregate the agents into lower geography, set the dataset to be disaggregated to in the 
            'disaggregate_to' argument (e.g. location_set=zone, disaggregate_to=building).
            'is_percentage' (bool) determines if the 'total_number' is a percentage of existing agents (True) or 
            an absolute number (False).
            'change_type' can have values 'D' (delete), 'A' (add) and determines the type
            of change for the agents. If this column is missing, the model considers 'D' as default
            for all entries in the agent_event_set.
            If the change of type is 'D', 
            the method finds agents from the agent_set (jobs, households) located in the given locations 
            (e.g. grid_id), then samples the given amount for the current_year and unplaces them.
            If other characteristics columns are contained in the agent_event_set, their names
            must match column names in the agent_set (e.g. 'sector_id' for jobs).
            In such a case the deletion is done among agents that match the given characteristics.
            If the change of type is 'A', the agent_event_set can contain attributes of the location set.
            It determines agents used for sampling missing characteristics of the added agents, for example
            values of income or persons. Values of these characteristics can be -1 if no restriction 
            for the sampling is desired. Such location attributes must be passed in the argument 
            'location_characteristics'. 
        """        
        if not agent_event_set or (agent_event_set.size() == 0): 
            logger.log_status("No %s agents for event processing." % agent_set.get_dataset_name())
            return

        idx_of_events_this_year = agent_event_set.get_attribute("scheduled_year") == current_year
        if idx_of_events_this_year.sum() == 0:
            logger.log_status("No %s agents for this year event processing." % agent_set.get_dataset_name())
            return
        
        self.dataset_pool = self.create_dataset_pool(dataset_pool)
        
        location_id_name = location_set.get_id_name()[0]
        location_ids_in_event_set = agent_event_set.get_attribute_by_index(location_id_name, 
                                                                           idx_of_events_this_year)
        
        other_characteristics = agent_event_set.get_known_attribute_names()
        for name in agent_event_set.get_id_name() + [location_id_name, 
                    "scheduled_year", "total_number", "is_percentage", "change_type"] + location_characteristics:
            if name in other_characteristics:
                other_characteristics.remove(name)
        
        totals = agent_event_set.get_attribute_by_index("total_number", idx_of_events_this_year)
        if "change_type" not in agent_event_set.get_known_attribute_names():
            types_of_change = array(idx_of_events_this_year.sum()*['D'])
        else:
            types_of_change = agent_event_set.get_attribute_by_index("change_type", 
                                                                           idx_of_events_this_year)
        if "is_percentage" not in agent_event_set.get_known_attribute_names():
            is_percentage = zeros(idx_of_events_this_year.sum(), dtype='bool8')
        else:
            is_percentage = agent_event_set.get_attribute_by_index("is_percentage", 
                                                                           idx_of_events_this_year)
        
        # pre-load other characteristics
        for name in other_characteristics:
            agent_event_set.get_attribute(name)
            
        if location_id_name not in agent_set.get_known_attribute_names():
            # compute agents locations
            agent_set.compute_one_variable_with_unknown_package(location_id_name, self.dataset_pool)
                    
        # iterate over rows in the event set
        for ilocation_id in range(location_ids_in_event_set.size):
            agent_ids = agent_set.get_attribute(location_id_name)
            location_id = location_ids_in_event_set[ilocation_id]
            change_type = types_of_change[ilocation_id]

            agents_to_consider = agent_ids == location_id
            for characteristics in other_characteristics:
                characteristics_value = agent_event_set[characteristics][idx_of_events_this_year][ilocation_id]
                agents_to_consider = logical_and(agents_to_consider, 
                                                 agent_set.get_attribute(characteristics) == characteristics_value)
            number_of_agents = totals[ilocation_id]
            agent_index = where(agents_to_consider)[0]  
            if  is_percentage[ilocation_id]: # number_of_agents means percentage; convert to absolute number
                number_of_agents = agent_index.size*number_of_agents/100.0
            number_of_agents = int(number_of_agents)
            if change_type == 'D':
                if number_of_agents > 0:
                    if agent_index.size <= number_of_agents: # unplace all agents
                        unplace_index = agent_index
                    else: # sample agents
                        unplace_index = sample_noreplace(agent_index, number_of_agents)
                    agent_set.modify_attribute(name=location_id_name, 
                                           data=resize(array([-1], dtype=agent_ids.dtype), unplace_index.size),
                                           index = unplace_index)
                    logger.log_status('%s agents deleted from location %s' % (number_of_agents, location_id))
            elif change_type == 'A':
                if number_of_agents <= 0:
                    continue
                data = {agent_set.get_id_name()[0]: arange(1, number_of_agents+1, 1) + agent_set.get_id_attribute().max()}
                if disaggregate_to is not None:
                    if location_id_name not in disaggregate_to.get_known_attribute_names():
                        disaggregate_to.compute_one_variable_with_unknown_package(location_id_name, self.dataset_pool)
                    disaggr_idx = where(disaggregate_to[location_id_name] == location_id)[0]
                    if disaggr_idx.size <= 0:
                        logger.log_warning('No %s locations found for %s=%s. %s agents not created.' % (
                                disaggregate_to.get_dataset_name(), location_id_name, location_id, number_of_agents))
                        continue
                    # sample disaggregated locations
                    disaggr_sidx = sample_replace(disaggr_idx, number_of_agents)
                    data[disaggregate_to.get_id_name()[0]] = disaggregate_to.get_id_attribute()[disaggr_sidx]
                else:
                    data[location_id_name] = array([location_id] * number_of_agents)
                
                for characteristics in other_characteristics:
                    characteristics_value = agent_event_set[characteristics][idx_of_events_this_year][ilocation_id]
                    data[characteristics] = array([characteristics_value] * number_of_agents)
                
                # determine agents with the desire characteristics to impute missing characteristics to the new agents
                loc_indicator = ones(agent_set.size(), dtype='bool8')
                for locchar in location_characteristics:
                    if agent_event_set[locchar][idx_of_events_this_year][ilocation_id] == -1:
                        continue
                    var = agent_set.compute_one_variable_with_unknown_package(locchar, self.dataset_pool)
                    loc_indicator = logical_and(loc_indicator, var == agent_event_set[locchar][idx_of_events_this_year][ilocation_id])
                if loc_indicator.sum() == 0:
                    loc_indicator[:] = True
                clone_attr_index = sample_replace(where(loc_indicator)[0], number_of_agents)
                # impute remaining attributes
                for attr in agent_set.get_primary_attribute_names():
                    if attr not in data.keys():
                        data[attr] = agent_set[attr][clone_attr_index]
                
                agent_set.add_elements(data, require_all_attributes=False)
                if location_id_name not in agent_set.get_known_attribute_names():
                    # re-compute agents locations, because the add_elements method deleted all computed attributes
                    agent_set.compute_one_variable_with_unknown_package(location_id_name, self.dataset_pool)
                logger.log_status('%s agents added to location %s' % (number_of_agents, location_id))
コード例 #19
0
    def run(self, year=None, 
            target_attribute_name='number_of_households', 
            sample_filter="", 
            reset_dataset_attribute_value={}, 
            dataset_pool=None,  **kwargs):
        """ sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning
        """
        #if dataset_pool is None:
        #    dataset_pool = SessionConfiguration().get_dataset_pool()

        if year is None:
            year = SimulationState().get_current_time()
        this_year_index = where(self.control_totals.get_attribute('year')==year)[0]
        control_totals_for_this_year = DatasetSubset(self.control_totals, this_year_index)
        column_names = list(set( self.control_totals.get_known_attribute_names() ) - set( [ target_attribute_name, 'year', '_hidden_id_'] ))
        column_names.sort(reverse=True)
        column_values = dict([ (name, control_totals_for_this_year.get_attribute(name)) for name in column_names + [target_attribute_name]])
        
        independent_variables = list(set([re.sub('_max$', '', re.sub('_min$', '', col)) for col in column_names]))
        dataset_known_attributes = self.dataset.get_known_attribute_names()
        for variable in independent_variables:
            if variable not in dataset_known_attributes:
                self.dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)
        dataset_known_attributes = self.dataset.get_known_attribute_names() #update after compute
        if sample_filter:
            short_name = VariableName(sample_filter).get_alias()
            if short_name not in dataset_known_attributes:
                filter_indicator = self.dataset.compute_variables(sample_filter, dataset_pool=dataset_pool)
            else:
                filter_indicator = self.dataset.get_attribute(short_name)
        else:
            filter_indicator = 1

        to_be_cloned = array([], dtype=int32)
        to_be_removed = array([], dtype=int32)
        #log header
        if PrettyTable is not None:
            status_log = PrettyTable()
            status_log.set_field_names(column_names + ["actual", "target", "difference", "action"])
        else:        
            logger.log_status("\t".join(column_names + ["actual", "target", "difference", "action"]))
        error_log = ''
        for index in range(control_totals_for_this_year.size()):
            lucky_index = None
            indicator = ones( self.dataset.size(), dtype='bool' )
            criterion = {}
            for attribute in independent_variables:
                if attribute in dataset_known_attributes:
                    dataset_attribute = self.dataset.get_attribute(attribute)
                else:
                    raise ValueError, "attribute %s used in control total dataset can not be found in dataset %s" % (attribute, self.dataset.get_dataset_name())
                if attribute + '_min' in column_names:
                    amin = column_values[attribute + '_min'][index]
                    criterion.update({attribute + '_min':amin})
                    if amin != -1:
                        indicator *= dataset_attribute >= amin
                if attribute + '_max' in column_names: 
                    amax = column_values[attribute+'_max'][index]
                    criterion.update({attribute + '_max':amax}) 
                    if amax != -1:
                        indicator *= dataset_attribute <= amax
                if attribute in column_names: 
                    aval = column_values[attribute][index] 
                    criterion.update({attribute:aval}) 
                    if aval == -1:
                        continue
                    elif aval == -2:   ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column
                        complement_values = setdiff1d( dataset_attribute, column_values[attribute] )
                        has_one_of_the_complement_value = zeros(dataset_attribute.size, dtype='bool')
                        for value in complement_values:
                            has_one_of_the_complement_value += dataset_attribute == value
                        indicator *= has_one_of_the_complement_value
                    else:
                        indicator *= dataset_attribute == aval
                        
            target_num = column_values[target_attribute_name][index]
            ## if accounting attribute is None, count number of agents with indicator = True 
            if self.dataset_accounting_attribute is None:
                actual_num = indicator.sum()
                action_num = 0
                diff = target_num - actual_num
                if actual_num != target_num:
                    legit_index = where(logical_and(indicator, filter_indicator))[0]
                    if legit_index.size > 0:                    
                        if actual_num < target_num:
                            lucky_index = sample_replace(legit_index, target_num - actual_num)
                            to_be_cloned = concatenate((to_be_cloned, lucky_index))
                        elif actual_num > target_num:
                            lucky_index = sample_noreplace(legit_index, actual_num-target_num)
                            to_be_removed = concatenate((to_be_removed, lucky_index))
                        action_num = lucky_index.size
                    else:
                        error_log += "There is nothing to sample from %s and no action will happen for" % self.dataset.get_dataset_name() + \
                                  ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n'
                        
            else: 
                ## sum accounting attribute for agents with indicator = True; 
                ## assume dataset_accouting_attribute is a primary attribute 
                accounting = self.dataset.get_attribute(self.dataset_accounting_attribute) * indicator
                actual_num = accounting.sum()
                mean_size = float(actual_num) / indicator.sum()
                action_num = 0
                diff = target_num - actual_num
                if actual_num != target_num:
                    legit_index = where(logical_and(indicator, filter_indicator))[0]
                    if legit_index.size > 0:
                        while actual_num + action_num < target_num:
                            lucky_index = sample_replace(legit_index, ceil((target_num - actual_num - action_num)/mean_size) )
                            action_num += accounting[lucky_index].sum()
                            to_be_cloned = concatenate((to_be_cloned, lucky_index))
                        while actual_num - action_num > target_num:
                            lucky_index = sample_noreplace(legit_index, ceil((actual_num - target_num - action_num)/mean_size) )
                            action_num += accounting[lucky_index].sum()
                            to_be_removed = concatenate((to_be_removed, lucky_index))                
                    else:
                        error_log += "There is nothing to sample from %s and no action will happen for " % self.dataset.get_dataset_name() + \
                                  ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n'
            
            ##log status
            action = "0"
            if lucky_index is not None:                    
                if actual_num < target_num: action = "+" + str(action_num)
                if actual_num > target_num: action = "-" + str(action_num)
                    
            cat = [ str(criterion[col]) for col in column_names]
            cat += [str(actual_num), str(target_num), str(diff), action]
            if PrettyTable is not None:
                status_log.add_row(cat)
            else:
                logger.log_status("\t".join(cat))

        if PrettyTable is not None:
            logger.log_status("\n" + status_log.get_string())
        if error_log:
            logger.log_error(error_log)
                    
        clone_data = {}
        if to_be_cloned.size > 0:
            ### ideally duplicate_rows() is all needed to add newly cloned rows
            ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data
            ##self.dataset.duplicate_rows(to_be_cloned)
            logger.log_status()
            for attribute in dataset_known_attributes:
                if reset_dataset_attribute_value.has_key(attribute):
                    clone_data[attribute] = resize(array(reset_dataset_attribute_value[attribute]), to_be_cloned.size)
                else:
                    clone_data[attribute] = self.dataset.get_attribute_by_index(attribute, to_be_cloned)
                    
        self.post_run(self.dataset, to_be_cloned, to_be_removed, **kwargs)
        
        if to_be_removed.size > 0:
            logger.log_status()
            self.dataset.remove_elements(to_be_removed)
            
        if clone_data:
            self.dataset.add_elements(data=clone_data, change_ids_if_not_unique=True)
            
        return self.dataset
コード例 #20
0
    def run(
        self,
        realestate_dataset,
        year=None,
        occupied_spaces_variable="occupied_units",
        total_spaces_variable="total_units",
        target_attribute_name="target_vacancy_rate",
        sample_from_dataset=None,
        sample_filter="",
        reset_attribute_value={},
        year_built="year_built",
        dataset_pool=None,
        append_to_realestate_dataset=False,
        table_name="development_projects",
        dataset_name="development_project",
        id_name="development_project_id",
        **kwargs
    ):
        """         
        sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning
        append_to_realestate_dataset - whether to append the new dataset to realestate_dataset
        """

        if self.target_vancy_dataset is None:
            raise RuntimeError, "target_vacancy_rate dataset is unspecified."

        if not sample_from_dataset:
            sample_from_dataset = realestate_dataset

        # if dataset_pool is None:
        #    dataset_pool = SessionConfiguration().get_dataset_pool()
        if year is None:
            year = SimulationState().get_current_time()
        this_year_index = where(self.target_vancy_dataset.get_attribute("year") == year)[0]
        target_vacancy_for_this_year = DatasetSubset(self.target_vancy_dataset, this_year_index)

        column_names = list(
            set(self.target_vancy_dataset.get_known_attribute_names())
            - set([target_attribute_name, occupied_spaces_variable, total_spaces_variable, "year", "_hidden_id_"])
        )
        column_names.sort(reverse=True)
        column_values = dict(
            [
                (name, target_vacancy_for_this_year.get_attribute(name))
                for name in column_names + [target_attribute_name]
            ]
        )

        independent_variables = list(set([re.sub("_max$", "", re.sub("_min$", "", col)) for col in column_names]))
        dataset_known_attributes = realestate_dataset.get_known_attribute_names()
        sample_dataset_known_attributes = sample_from_dataset.get_known_attribute_names()
        for variable in independent_variables:
            if variable not in dataset_known_attributes:
                realestate_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)
            if variable not in sample_dataset_known_attributes:
                sample_from_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)

        dataset_known_attributes = realestate_dataset.get_known_attribute_names()  # update after compute
        if sample_filter:
            short_name = VariableName(sample_filter).get_alias()
            if short_name not in dataset_known_attributes:
                filter_indicator = sample_from_dataset.compute_variables(sample_filter, dataset_pool=dataset_pool)
            else:
                filter_indicator = sample_from_dataset.get_attribute(short_name)
        else:
            filter_indicator = 1

        sampled_index = array([], dtype=int32)

        # log header
        if PrettyTable is not None:
            status_log = PrettyTable()
            status_log.set_field_names(column_names + ["actual", "target", "expected", "difference", "action"])
        else:
            logger.log_status("\t".join(column_names + ["actual", "target", "expected", "difference", "action"]))
        error_log = ""
        for index in range(target_vacancy_for_this_year.size()):
            this_sampled_index = array([], dtype=int32)
            indicator = ones(realestate_dataset.size(), dtype="bool")
            sample_indicator = ones(sample_from_dataset.size(), dtype="bool")
            criterion = {}  # for logging
            for attribute in independent_variables:
                if attribute in dataset_known_attributes:
                    dataset_attribute = realestate_dataset.get_attribute(attribute)
                    sample_attribute = sample_from_dataset.get_attribute(attribute)
                else:
                    raise ValueError, "attribute %s used in target vacancy dataset can not be found in dataset %s" % (
                        attribute,
                        realestate_dataset.get_dataset_name(),
                    )

                if attribute + "_min" in column_names:
                    amin = target_vacancy_for_this_year.get_attribute(attribute + "_min")[index]
                    criterion.update({attribute + "_min": amin})
                    if amin != -1:
                        indicator *= dataset_attribute >= amin
                        sample_indicator *= sample_attribute >= amin
                if attribute + "_max" in column_names:
                    amax = target_vacancy_for_this_year.get_attribute(attribute + "_max")[index]
                    criterion.update({attribute + "_max": amax})
                    if amax != -1:
                        indicator *= dataset_attribute <= amax
                        sample_indicator *= sample_attribute <= amax
                if attribute in column_names:
                    aval = column_values[attribute][index]
                    criterion.update({attribute: aval})
                    if aval == -1:
                        continue
                    elif (
                        aval == -2
                    ):  ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column
                        indicator *= logical_not(ismember(dataset_attribute, column_values[attribute]))
                        sample_indicator *= logical_not(ismember(sample_attribute, column_values[attribute]))
                    else:
                        indicator *= dataset_attribute == aval
                        sample_indicator *= sample_attribute == aval

            this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable
            ## total/occupied_spaces_variable can be specified either as a universal name for all realestate
            ## or in targe_vacancy_rate dataset for each vacancy category
            if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_occupied_spaces_variable = target_vacancy_for_this_year.get_attribute(occupied_spaces_variable)[
                    index
                ]

            if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_total_spaces_variable = target_vacancy_for_this_year.get_attribute(total_spaces_variable)[index]

            this_total_spaces_variable += "_" + str(criterion[col])
            this_occupied_spaces_variable += "_" + str(criterion[col])

            logger.be_quiet()  # temporarily disable logging
            realestate_dataset.compute_one_variable_with_unknown_package(
                this_occupied_spaces_variable, dataset_pool=dataset_pool
            )
            realestate_dataset.compute_one_variable_with_unknown_package(
                this_total_spaces_variable, dataset_pool=dataset_pool
            )
            sample_from_dataset.compute_one_variable_with_unknown_package(
                this_total_spaces_variable, dataset_pool=dataset_pool
            )
            logger.talk()

            actual_num = (realestate_dataset.get_attribute(this_total_spaces_variable)).sum()
            # target_num is obsolete with this version.
            target_num = int(
                round(
                    (realestate_dataset.get_attribute(this_occupied_spaces_variable)).sum()
                    / (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])
                )
            )
            """If the target vacancy is very small and the inflow to the region big it is not enough to check
            only the current simulation year's vacancy. The simulation is more robust if the BTM is anticipating the
            next year's population (of households and jobs).
            #TODO: Make code more general to cover various stratifications in the real estate market.
            """
            if criterion[col] == 1:
                idx = where(self.control_totals.get_attribute("year") == year + 1)[0]
                this_years_control_totals = DatasetSubset(self.control_totals, idx)
                expected_num = int(
                    round(
                        this_years_control_totals.get_attribute("total_number_of_households").sum()
                        / (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])
                    )
                )
            if criterion[col] == 0:
                idx = where(self.employment_control_totals.get_attribute("year") == year + 1)[0]
                next_years_control_totals = DatasetSubset(self.employment_control_totals, idx)
                expected_num = int(
                    round(
                        next_years_control_totals.get_attribute("number_of_jobs").sum()
                        / (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])
                    )
                )

            diff = expected_num - actual_num

            # Previous version which is checking the current years occupation.
            # diff = target_num - actual_num

            if diff > 0:
                total_spaces_in_sample_dataset = sample_from_dataset.get_attribute(this_total_spaces_variable)
                legit_index = where(
                    logical_and(sample_indicator, filter_indicator) * total_spaces_in_sample_dataset > 0
                )[0]
                if legit_index.size > 0:
                    mean_size = total_spaces_in_sample_dataset[legit_index].mean()
                    num_of_projects_to_sample = int(diff / mean_size)
                    ##sampled at least 1 project when diff > 0, otherwise it is a endless loop when num_of_projects_to_sample = 0
                    num_of_projects_to_sample = num_of_projects_to_sample if num_of_projects_to_sample > 0 else 1
                    while total_spaces_in_sample_dataset[this_sampled_index].sum() < diff:
                        lucky_index = sample_replace(legit_index, num_of_projects_to_sample)
                        this_sampled_index = concatenate((this_sampled_index, lucky_index))
                    this_sampled_index = this_sampled_index[
                        0 : (1 + searchsorted(cumsum(total_spaces_in_sample_dataset[this_sampled_index]), diff))
                    ]
                    sampled_index = concatenate((sampled_index, this_sampled_index))
                else:
                    error_log += (
                        "There is nothing to sample from %s and no new development will happen for "
                        % sample_from_dataset.get_dataset_name()
                        + ",".join([col + "=" + str(criterion[col]) for col in column_names])
                        + "\n"
                    )
            # if diff < 0: #TODO demolition; not yet supported

            ##log status
            action = "0"
            if this_sampled_index.size > 0:
                action_num = total_spaces_in_sample_dataset[this_sampled_index].sum()
                if diff > 0:
                    action = "+" + str(action_num)
                if diff < 0:
                    action = "-" + str(action_num)
            cat = [str(criterion[col]) for col in column_names]
            cat += [str(actual_num), str(target_num), str(expected_num), str(diff), action]

            if PrettyTable is not None:
                status_log.add_row(cat)
            else:
                logger.log_status("\t".join(cat))

        if PrettyTable is not None:
            logger.log_status("\n" + status_log.get_string())
        if error_log:
            logger.log_error(error_log)

        result_data = {}
        result_dataset = None
        index = array([], dtype="int32")
        if True:  # sampled_index.size > 0:
            ### ideally duplicate_rows() is all needed to add newly cloned rows
            ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data
            ##realestate_dataset.duplicate_rows(sampled_index)
            result_data.setdefault(year_built, resize(year, sampled_index.size).astype("int32"))
            ## also add 'independent_variables' to the new dataset
            for attribute in set(sample_from_dataset.get_primary_attribute_names() + independent_variables):
                if reset_attribute_value.has_key(attribute):
                    result_data[attribute] = resize(array(reset_attribute_value[attribute]), sampled_index.size)
                else:
                    result_data[attribute] = sample_from_dataset.get_attribute_by_index(attribute, sampled_index)

            if id_name and result_data and id_name not in result_data:
                result_data[id_name] = arange(sampled_index.size, dtype="int32") + 1

            storage = StorageFactory().get_storage("dict_storage")
            storage.write_table(table_name=table_name, table_data=result_data)

            result_dataset = Dataset(
                id_name=id_name, in_storage=storage, in_table_name=table_name, dataset_name=dataset_name
            )
            index = arange(result_dataset.size())

        if append_to_realestate_dataset:
            if len(result_data) > 0:
                index = realestate_dataset.add_elements(
                    result_data, require_all_attributes=False, change_ids_if_not_unique=True
                )
            result_dataset = realestate_dataset

        return (result_dataset, index)
コード例 #21
0
    def run(self, realestate_dataset,
            year=None, 
            occupied_spaces_variable="occupied_units",
            total_spaces_variable="total_units",
            target_attribute_name='target_vacancy_rate',
            sample_from_dataset = None,
            sample_filter="",
            reset_attribute_value={}, 
            year_built = 'year_built',
            dataset_pool=None,
            append_to_realestate_dataset = False,
            table_name = "development_projects",
            dataset_name = "development_project",
            id_name = [],
            **kwargs):
        """         
        sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning
        append_to_realestate_dataset - whether to append the new dataset to realestate_dataset
        """
        
        if self.target_vancy_dataset is None:
            raise RuntimeError, "target_vacancy_rate dataset is unspecified."
        
        if not sample_from_dataset:
            sample_from_dataset = realestate_dataset
            
        #if dataset_pool is None:
        #    dataset_pool = SessionConfiguration().get_dataset_pool()
        if year is None:
            year = SimulationState().get_current_time()
        this_year_index = where(self.target_vancy_dataset.get_attribute('year')==year)[0]
        target_vacancy_for_this_year = DatasetSubset(self.target_vancy_dataset, this_year_index)
        
        column_names = list(set( self.target_vancy_dataset.get_known_attribute_names() ) - set( [ target_attribute_name, occupied_spaces_variable, total_spaces_variable, 'year', '_hidden_id_'] ))
        column_names.sort(reverse=True)
        column_values = dict([ (name, target_vacancy_for_this_year.get_attribute(name)) for name in column_names + [target_attribute_name]])
        
        independent_variables = list(set([re.sub('_max$', '', re.sub('_min$', '', col)) for col in column_names]))
        dataset_known_attributes = realestate_dataset.get_known_attribute_names()
        for variable in independent_variables:
            if variable not in dataset_known_attributes:
                realestate_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)
                sample_from_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)
                
        dataset_known_attributes = realestate_dataset.get_known_attribute_names() #update after compute
        if sample_filter:
            short_name = VariableName(sample_filter).get_alias()
            if short_name not in dataset_known_attributes:
                filter_indicator = sample_from_dataset.compute_variables(sample_filter, dataset_pool=dataset_pool)
            else:
                filter_indicator = sample_from_dataset.get_attribute(short_name)
        else:
            filter_indicator = 1
                
        sampled_index = array([], dtype=int32)

        #log header
        if PrettyTable is not None:
            status_log = PrettyTable()
            status_log.set_field_names(column_names + ["actual", "target", "difference", "action"])
        else:
            logger.log_status("\t".join(column_names + ["actual", "target", "difference", "action"]))
        error_log = ''
        for index in range(target_vacancy_for_this_year.size()):
            this_sampled_index = array([], dtype=int32)
            indicator = ones( realestate_dataset.size(), dtype='bool' )
            sample_indicator = ones( sample_from_dataset.size(), dtype='bool' )
            criterion = {}   # for logging
            for attribute in independent_variables:
                if attribute in dataset_known_attributes:
                    dataset_attribute = realestate_dataset.get_attribute(attribute)
                    sample_attribute = sample_from_dataset.get_attribute(attribute)
                else:
                    raise ValueError, "attribute %s used in target vacancy dataset can not be found in dataset %s" % (attribute, realestate_dataset.get_dataset_name())
                
                if attribute + '_min' in column_names:
                    amin = target_vacancy_for_this_year.get_attribute(attribute+'_min')[index] 
                    criterion.update({attribute + '_min':amin})
                    if amin != -1:
                        indicator *= dataset_attribute >= amin
                        sample_indicator *= sample_attribute >= amin
                if attribute + '_max' in column_names: 
                    amax = target_vacancy_for_this_year.get_attribute(attribute+'_max')[index]
                    criterion.update({attribute + '_max':amax}) 
                    if amax != -1:
                        indicator *= dataset_attribute <= amax
                        sample_indicator *= sample_attribute <= amax
                if attribute in column_names: 
                    aval = column_values[attribute][index] 
                    criterion.update({attribute:aval}) 
                    if aval == -1:
                        continue
                    elif aval == -2:  ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column
                        indicator *= logical_not(ismember(dataset_attribute, column_values[attribute]))
                        sample_indicator *= logical_not(ismember(sample_attribute, column_values[attribute]))
                    else:
                        indicator *= dataset_attribute == aval
                        sample_indicator *= sample_attribute == aval
                        
            this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable
            ## total/occupied_spaces_variable can be specified either as a universal name for all realestate 
            ## or in targe_vacancy_rate dataset for each vacancy category
            if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_occupied_spaces_variable = target_vacancy_for_this_year.get_attribute(occupied_spaces_variable)[index]

            if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_total_spaces_variable = target_vacancy_for_this_year.get_attribute(total_spaces_variable)[index]
            
            logger.be_quiet() #temporarily disable logging
            realestate_dataset.compute_one_variable_with_unknown_package(this_occupied_spaces_variable, dataset_pool=dataset_pool)
            realestate_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool)
            sample_from_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool)
            logger.talk()
            
            actual_num = (indicator * realestate_dataset.get_attribute(this_total_spaces_variable)).sum()
            target_num = int(round( (indicator * realestate_dataset.get_attribute(this_occupied_spaces_variable)).sum() /\
                                    (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]) 
                            ))
            diff = target_num - actual_num
            if diff > 0:
                total_spaces_in_sample_dataset = sample_from_dataset.get_attribute(this_total_spaces_variable)
                legit_index = where(logical_and(sample_indicator, filter_indicator) * total_spaces_in_sample_dataset > 0)[0]
                if legit_index.size > 0:
                    mean_size = total_spaces_in_sample_dataset[legit_index].mean()
                    num_of_projects_to_sample = int( diff / mean_size )
                    while total_spaces_in_sample_dataset[this_sampled_index].sum() < diff:
                        lucky_index = sample_replace(legit_index, num_of_projects_to_sample)
                        this_sampled_index = concatenate((this_sampled_index, lucky_index))
                    this_sampled_index = this_sampled_index[0:(1+searchsorted(cumsum(total_spaces_in_sample_dataset[this_sampled_index]), diff))]
                    sampled_index = concatenate((sampled_index, this_sampled_index))
                else:
                    error_log += "There is nothing to sample from %s and no new development will happen for " % sample_from_dataset.get_dataset_name() + \
                              ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n'
            #if diff < 0: #TODO demolition; not yet supported
            
            ##log status
            action = "0"
            if this_sampled_index.size > 0:
                action_num = total_spaces_in_sample_dataset[this_sampled_index].sum()
                if diff > 0: action = "+" + str(action_num)
                if diff < 0: action = "-" + str(action_num)
            cat = [ str(criterion[col]) for col in column_names]
            cat += [str(actual_num), str(target_num), str(diff), action]
            
            if PrettyTable is not None:
                status_log.add_row(cat)
            else:                
                logger.log_status("\t".join(cat))
            
        if PrettyTable is not None:
            logger.log_status("\n" + status_log.get_string())
        if error_log:
            logger.log_error(error_log)
            
        result_data = {}
        result_dataset = None
        index = array([], dtype='int32')
        if sampled_index.size > 0:
            ### ideally duplicate_rows() is all needed to add newly cloned rows
            ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data
            ##realestate_dataset.duplicate_rows(sampled_index)
            result_data.setdefault(year_built, resize(year, sampled_index.size).astype('int32'))
            for attribute in sample_from_dataset.get_primary_attribute_names():
                if reset_attribute_value.has_key(attribute):
                    result_data[attribute] = resize(array(reset_attribute_value[attribute]), sampled_index.size)
                else:
                    result_data[attribute] = sample_from_dataset.get_attribute_by_index(attribute, sampled_index)
        
            storage = StorageFactory().get_storage('dict_storage')
            storage.write_table(table_name=table_name, table_data=result_data)
    
            result_dataset = Dataset(id_name = id_name,
                                      in_storage = storage,
                                      in_table_name = table_name,
                                      dataset_name = dataset_name
                                      )
            index = arange(result_dataset.size())
            
        if append_to_realestate_dataset:
            if len(result_data) > 0:
                index = realestate_dataset.add_elements(result_data, require_all_attributes=False,
                                                        change_ids_if_not_unique=True)                
            result_dataset = realestate_dataset
        
        return (result_dataset, index)