def run(self, year, job_set, control_totals, job_building_types, data_objects=None, resources=None):
     self._do_initialize_for_run(job_set, job_building_types, data_objects)
     large_area_ids = control_totals.get_attribute("large_area_id")
     jobs_large_area_ids = job_set.compute_variables("washtenaw.job.large_area_id")
     unique_large_areas = unique(large_area_ids)
     is_year = control_totals.get_attribute("year")==year
     all_jobs_index = arange(job_set.size())
     sectors = unique(control_totals.get_attribute("sector_id")[is_year])
     self._compute_sector_variables(sectors, job_set)
     for area in unique_large_areas:
         idx = where(logical_and(is_year, large_area_ids == area))[0]
         self.control_totals_for_this_year = DatasetSubset(control_totals, idx)
         jobs_index = where(jobs_large_area_ids == area)[0]
         jobs_for_this_area = DatasetSubset(job_set, jobs_index)
         logger.log_status("ETM for area %s (currently %s jobs)" % (area, jobs_for_this_area.size()))
         last_remove_idx = self.remove_jobs.size
         self._do_run_for_this_year(jobs_for_this_area)
         add_jobs_size = self.new_jobs[self.location_id_name].size-self.new_jobs["large_area_id"].size
         remove_jobs_size = self.remove_jobs.size-last_remove_idx
         logger.log_status("add %s, remove %s, total %s" % (add_jobs_size, remove_jobs_size,
                                                            jobs_for_this_area.size()+add_jobs_size-remove_jobs_size))
         self.new_jobs["large_area_id"] = concatenate((self.new_jobs["large_area_id"],
                 array(add_jobs_size*[area], dtype="int32")))
         # transform indices of removing jobs into indices of the whole dataset
         self.remove_jobs[last_remove_idx:self.remove_jobs.size] = all_jobs_index[jobs_index[self.remove_jobs[last_remove_idx:self.remove_jobs.size]]]
     self._update_job_set(job_set)
     idx_new_jobs = arange(job_set.size()-self.new_jobs["large_area_id"].size, job_set.size())
     jobs_large_area_ids = job_set.compute_variables("washtenaw.job.large_area_id")
     jobs_large_area_ids[idx_new_jobs] = self.new_jobs["large_area_id"]
     job_set.delete_one_attribute("large_area_id")
     job_set.add_attribute(jobs_large_area_ids, "large_area_id", metadata=AttributeType.PRIMARY)
     # return an index of new jobs
     return arange(job_set.size()-self.new_jobs["large_area_id"].size, job_set.size())  
Exemple #2
0
 def run(self,
         year,
         job_set,
         control_totals,
         job_building_types,
         data_objects=None,
         resources=None):
     self._do_initialize_for_run(job_set, job_building_types, data_objects)
     subarea_ids = control_totals.get_attribute(self.subarea_id_name)
     jobs_subarea_ids = job_set.compute_one_variable_with_unknown_package(
         variable_name="%s" % (self.subarea_id_name),
         dataset_pool=self.dataset_pool)
     unique_subareas = unique(subarea_ids)
     is_year = control_totals.get_attribute("year") == year
     all_jobs_index = arange(job_set.size())
     sectors = unique(control_totals.get_attribute("sector_id")[is_year])
     self._compute_sector_variables(sectors, job_set)
     for area in unique_subareas:
         idx = where(logical_and(is_year, subarea_ids == area))[0]
         self.control_totals_for_this_year = DatasetSubset(
             control_totals, idx)
         jobs_index = where(jobs_subarea_ids == area)[0]
         jobs_for_this_area = DatasetSubset(job_set, jobs_index)
         logger.log_status("ETM for area %s (currently %s jobs)" %
                           (area, jobs_for_this_area.size()))
         last_remove_idx = self.remove_jobs.size
         self._do_run_for_this_year(jobs_for_this_area)
         add_jobs_size = self.new_jobs[
             self.location_id_name].size - self.new_jobs[
                 self.subarea_id_name].size
         remove_jobs_size = self.remove_jobs.size - last_remove_idx
         logger.log_status(
             "add %s, remove %s, total %s" %
             (add_jobs_size, remove_jobs_size,
              jobs_for_this_area.size() + add_jobs_size - remove_jobs_size))
         self.new_jobs[self.subarea_id_name] = concatenate(
             (self.new_jobs[self.subarea_id_name],
              array(add_jobs_size * [area], dtype="int32")))
         # transform indices of removing jobs into indices of the whole dataset
         self.remove_jobs[last_remove_idx:self.remove_jobs.
                          size] = all_jobs_index[jobs_index[
                              self.remove_jobs[last_remove_idx:self.
                                               remove_jobs.size]]]
     self._update_job_set(job_set)
     idx_new_jobs = arange(
         job_set.size() - self.new_jobs[self.subarea_id_name].size,
         job_set.size())
     jobs_subarea_ids = job_set.compute_one_variable_with_unknown_package(
         variable_name="%s" % (self.subarea_id_name),
         dataset_pool=self.dataset_pool)
     jobs_subarea_ids[idx_new_jobs] = self.new_jobs[self.subarea_id_name]
     job_set.delete_one_attribute(self.subarea_id_name)
     job_set.add_attribute(jobs_subarea_ids,
                           self.subarea_id_name,
                           metadata=AttributeType.PRIMARY)
     # return an index of new jobs
     return arange(
         job_set.size() - self.new_jobs[self.subarea_id_name].size,
         job_set.size())
    def run(self,
            chunk_specification,
            dataset,
            dataset_index=None,
            result_array_type=float32,
            **kwargs):
        """ 'chunk_specification' - determines number of chunks to use when computing over
                the dataset set.
            'dataset' - an object of class Dataset that is to be chunked.
            'dataset_index' - index of individuals in dataset to be chunked.
            'result_array_type' - type of the resulting array. Can be any numerical type of numpy array.
            **kwargs - keyword arguments.
            The method chunks dataset_index in the desired number of chunks (minimum is 1) and for each chunk it calls the method
            'run_chunk'. The order of the individuals entering the chunking is determined by the method 'get_agents_order'.
        """
        if dataset_index == None:
            dataset_index = arange(dataset.size())
        if not isinstance(dataset_index, ndarray):
            dataset_index = array(dataset_index)
        logger.log_status("Total number of individuals: %s" %
                          dataset_index.size)
        result_array = zeros(dataset_index.size, dtype=result_array_type)

        if dataset_index.size <= 0:
            logger.log_status("Nothing to be done.")
            return result_array

        all_indexed_individuals = DatasetSubset(dataset, dataset_index)
        ordered_agent_indices = self.get_agents_order(
            all_indexed_individuals)  # set order of individuals in chunks

        # TODO: Remove next six lines after we inherit chunk specification as a text string.
        if (chunk_specification is None):
            chunk_specification = {'nchunks': 1}
        chunker = ChunkSpecification(chunk_specification)
        self.number_of_chunks = chunker.nchunks(dataset_index)
        chunksize = int(
            ceil(all_indexed_individuals.size() /
                 float(self.number_of_chunks)))
        for ichunk in range(self.number_of_chunks):
            logger.start_block("%s chunk %d out of %d." %
                               (self.model_short_name,
                                (ichunk + 1), self.number_of_chunks))
            self.index_of_current_chunk = ichunk
            try:
                chunk_agent_indices = ordered_agent_indices[arange(
                    (ichunk * chunksize),
                    min((ichunk + 1) * chunksize,
                        all_indexed_individuals.size()))]
                logger.log_status("Number of agents in this chunk: %s" %
                                  chunk_agent_indices.size)
                result_array[chunk_agent_indices] = self.run_chunk(
                    dataset_index[chunk_agent_indices], dataset,
                    **kwargs).astype(result_array_type)
            finally:
                logger.end_block()

        return result_array
    def run(self, year, household_set, control_totals, characteristics, resources=None):
#        self.person_set = person_set
        self._do_initialize_for_run(household_set)
        control_totals.get_attribute("total_number_of_households") # to make sure they are loaded
        self.characteristics = characteristics
        self.all_categories = self.characteristics.get_attribute("characteristic")
        self.all_categories = array(map(lambda x: x.lower(), self.all_categories))
        self.scaled_characteristic_names = get_distinct_names(self.all_categories).tolist()
        self.marginal_characteristic_names = copy(control_totals.get_id_name())
        index_year = self.marginal_characteristic_names.index("year")
        self.marginal_characteristic_names.remove("year")
        self.marginal_characteristic_names.remove(self.subarea_id_name)
        region_ids = control_totals.get_attribute(self.subarea_id_name)
        households_region_ids = household_set.compute_one_variable_with_unknown_package(variable_name="%s" % (self.subarea_id_name), dataset_pool=self.dataset_pool)

        unique_regions = unique(region_ids)
        is_year = control_totals.get_attribute("year")==year
        all_households_index = arange(household_set.size())
        for area in unique_regions:
            idx = where(logical_and(is_year, region_ids == area))[0]
            self.control_totals_for_this_year = DatasetSubset(control_totals, idx)
            households_index = where(households_region_ids == area)[0]
            if households_index.size == 0:
                continue
            households_for_this_area = DatasetSubset(household_set, households_index)
            logger.log_status("HTM for area %s (currently %s households)" % (area, households_for_this_area.size()))
            last_remove_idx = self.remove_households.size
            last_new_hhs_idx = self.mapping_existing_hhs_to_new_hhs.size
            self._do_run_for_this_year(households_for_this_area)
            add_hhs_size = self.new_households[self.location_id_name].size-self.new_households[self.subarea_id_name].size+self.mapping_existing_hhs_to_new_hhs.size-last_new_hhs_idx
            remove_hhs_size = self.remove_households.size-last_remove_idx
            logger.log_status("add %s, remove %s, total %s" % (add_hhs_size, remove_hhs_size,
                                                               households_for_this_area.size()+add_hhs_size-remove_hhs_size
                                                               ))
            self.new_households[self.subarea_id_name] = concatenate((self.new_households[self.subarea_id_name],
                                            array((self.new_households[self.location_id_name].size-self.new_households[self.subarea_id_name].size)*[area], dtype="int32")))
            # transform indices of removing households into indices of the whole dataset
            self.remove_households[last_remove_idx:self.remove_households.size] = all_households_index[households_index[self.remove_households[last_remove_idx:self.remove_households.size]]]
            # do the same for households to be duplicated
            self.mapping_existing_hhs_to_new_hhs[last_new_hhs_idx:self.mapping_existing_hhs_to_new_hhs.size] = all_households_index[households_index[self.mapping_existing_hhs_to_new_hhs[last_new_hhs_idx:self.mapping_existing_hhs_to_new_hhs.size]]]
            
        self._update_household_set(household_set)
        idx_new_households = arange(household_set.size()-self.new_households[self.subarea_id_name].size, household_set.size())
        #household_region_ids = household_set.compute_variables("urbansim_parcel.household.%s" % self.subarea_id_name)
        #household_region_ids[idx_new_households] = self.new_households[self.subarea_id_name]
        region_ids = household_set.get_attribute(self.subarea_id_name).copy()
        household_set.delete_one_attribute(self.subarea_id_name)
        household_set.add_attribute(region_ids, self.subarea_id_name, metadata=AttributeType.PRIMARY)
        # return an index of new households
        return idx_new_households
    def run(self, year, household_set, control_totals, characteristics, resources=None):
#        self.person_set = person_set
        self._do_initialize_for_run(household_set)
        control_totals.get_attribute("total_number_of_households") # to make sure they are loaded
        self.characteristics = characteristics
        self.all_categories = self.characteristics.get_attribute("characteristic")
        self.all_categories = array(map(lambda x: x.lower(), self.all_categories))
        self.scaled_characteristic_names = get_distinct_names(self.all_categories).tolist()
        self.marginal_characteristic_names = copy(control_totals.get_id_name())
        index_year = self.marginal_characteristic_names.index("year")
        self.marginal_characteristic_names.remove("year")
        self.marginal_characteristic_names.remove(self.subarea_id_name)
        region_ids = control_totals.get_attribute(self.subarea_id_name)
        households_region_ids = household_set.compute_one_variable_with_unknown_package(variable_name="%s" % (self.subarea_id_name), dataset_pool=self.dataset_pool)

        unique_regions = unique(region_ids)
        is_year = control_totals.get_attribute("year")==year
        all_households_index = arange(household_set.size())
        for area in unique_regions:
            idx = where(logical_and(is_year, region_ids == area))[0]
            self.control_totals_for_this_year = DatasetSubset(control_totals, idx)
            households_index = where(households_region_ids == area)[0]
            if households_index.size == 0:
                continue
            households_for_this_area = DatasetSubset(household_set, households_index)
            logger.log_status("HTM for area %s (currently %s households)" % (area, households_for_this_area.size()))
            last_remove_idx = self.remove_households.size
            last_new_hhs_idx = self.mapping_existing_hhs_to_new_hhs.size
            self._do_run_for_this_year(households_for_this_area)
            add_hhs_size = self.new_households[self.location_id_name].size-self.new_households[self.subarea_id_name].size+self.mapping_existing_hhs_to_new_hhs.size-last_new_hhs_idx
            remove_hhs_size = self.remove_households.size-last_remove_idx
            logger.log_status("add %s, remove %s, total %s" % (add_hhs_size, remove_hhs_size,
                                                               households_for_this_area.size()+add_hhs_size-remove_hhs_size
                                                               ))
            self.new_households[self.subarea_id_name] = concatenate((self.new_households[self.subarea_id_name],
                                            array((self.new_households[self.location_id_name].size-self.new_households[self.subarea_id_name].size)*[area], dtype="int32")))
            # transform indices of removing households into indices of the whole dataset
            self.remove_households[last_remove_idx:self.remove_households.size] = all_households_index[households_index[self.remove_households[last_remove_idx:self.remove_households.size]]]
            # do the same for households to be duplicated
            self.mapping_existing_hhs_to_new_hhs[last_new_hhs_idx:self.mapping_existing_hhs_to_new_hhs.size] = all_households_index[households_index[self.mapping_existing_hhs_to_new_hhs[last_new_hhs_idx:self.mapping_existing_hhs_to_new_hhs.size]]]
            
        self._update_household_set(household_set)
        idx_new_households = arange(household_set.size()-self.new_households[self.subarea_id_name].size, household_set.size())
        #household_region_ids = household_set.compute_variables("urbansim_parcel.household.%s" % self.subarea_id_name)
        #household_region_ids[idx_new_households] = self.new_households[self.subarea_id_name]
        region_ids = household_set.get_attribute(self.subarea_id_name).copy()
        household_set.delete_one_attribute(self.subarea_id_name)
        household_set.add_attribute(region_ids, self.subarea_id_name, metadata=AttributeType.PRIMARY)
        # return an index of new households
        return idx_new_households
Exemple #6
0
    def _do_run(self, location_set, agent_set, agents_index, data_objects=None, resources=None):
        location_id_name = location_set.get_id_name()[0]
        jobsubset = DatasetSubset(agent_set, agents_index)
        if jobsubset.size() <= 0:
            return array([], dtype='int32')
        #unplace jobs
        agent_set.set_values_of_one_attribute(location_id_name, 
                                              resize(array([-1.0]), jobsubset.size()), agents_index)
        sector_ids = jobsubset.get_attribute("sector_id")
        sectors = unique(sector_ids)
        counts = ndimage_sum(ones((jobsubset.size(),)), labels=sector_ids.astype('int32'), index=sectors.astype('int32'))
        if sectors.size <=1 :
            counts = array([counts])
        variables = map(lambda x: "number_of_jobs_of_sector_"+str(int(x)), sectors)
        compute_variables = map(lambda var: self.variable_package + "." + 
            location_set.get_dataset_name()+ "." + var, variables)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.dataset_pool.add_datasets_if_not_included({agent_set.get_dataset_name():agent_set})
        location_set.compute_variables(compute_variables, dataset_pool=self.dataset_pool)
        if self.filter is None:
            location_index = arange(location_set.size())
        else:
            filter_values = location_set.compute_variables([self.filter], dataset_pool=self.dataset_pool)
            location_index = where(filter_values > 0)[0]
        if location_index.size <= 0:
            logger.log_status("No locations available. Nothing to be done.")
            return array([])
        location_subset = DatasetSubset(location_set, location_index)
        i=0
        for sector in sectors:
            distr = location_subset.get_attribute(variables[i])
            if ma.allclose(distr.sum(), 0):
                uniform_prob = 1.0/distr.size
                distr = resize(array([uniform_prob], dtype='float64'), distr.size)
                logger.log_warning("Probabilities in scaling model for sector " + str(sector) + " sum to 0.0.  Substituting uniform distribution!")
#                random_sample = sample(location_set.get_attribute("grid_id"), k=int(counts[i]), \
#                                   probabilities = distr)
            distr = distr/float(distr.sum())
            random_sample = probsample_replace(location_subset.get_id_attribute(), size=int(counts[i]), 
                                       prob_array=distr)
            idx = where(sector_ids == sector)[0]
            #modify job locations
            agent_set.set_values_of_one_attribute(location_id_name, random_sample, agents_index[idx])
            i+=1
        return agent_set.get_attribute_by_index(location_id_name, agents_index)
    def run(self, chunk_specification, dataset, dataset_index=None, result_array_type=float32, **kwargs):
        """ 'chunk_specification' - determines number of chunks to use when computing over
                the dataset set.
            'dataset' - an object of class Dataset that is to be chunked.
            'dataset_index' - index of individuals in dataset to be chunked.
            'result_array_type' - type of the resulting array. Can be any numerical type of numpy array.
            **kwargs - keyword arguments.
            The method chunks dataset_index in the desired number of chunks (minimum is 1) and for each chunk it calls the method
            'run_chunk'. The order of the individuals entering the chunking is determined by the method 'get_agents_order'.
        """
        if dataset_index==None:
            dataset_index=arange(dataset.size())
        if not isinstance(dataset_index,ndarray):
            dataset_index=array(dataset_index)
        logger.log_status("Total number of individuals: %s" % dataset_index.size)
        result_array = zeros(dataset_index.size, dtype=result_array_type)

        if dataset_index.size <= 0:
            logger.log_status("Nothing to be done.")
            return result_array

        all_indexed_individuals = DatasetSubset(dataset, dataset_index)
        ordered_agent_indices = self.get_agents_order(all_indexed_individuals)# set order of individuals in chunks

        # TODO: Remove next six lines after we inherit chunk specification as a text string.
        if (chunk_specification is None):
            chunk_specification = {'nchunks':1}
        chunker = ChunkSpecification(chunk_specification)
        self.number_of_chunks = chunker.nchunks(dataset_index)
        chunksize = int(ceil(all_indexed_individuals.size()/float(self.number_of_chunks)))
        for ichunk in range(self.number_of_chunks):
            logger.start_block("%s chunk %d out of %d."
                               % (self.model_short_name, (ichunk+1), self.number_of_chunks))
            self.index_of_current_chunk = ichunk
            try:
                chunk_agent_indices = ordered_agent_indices[arange((ichunk*chunksize),
                                                                   min((ichunk+1)*chunksize,
                                                                       all_indexed_individuals.size()))]
                logger.log_status("Number of agents in this chunk: %s" % chunk_agent_indices.size)
                result_array[chunk_agent_indices] = self.run_chunk(dataset_index[chunk_agent_indices],
                                                                   dataset, **kwargs).astype(result_array_type)
            finally:
                logger.end_block()

        return result_array
    def run(self, year=None,
            dataset_pool=None,  **kwargs):
        """
        """
        if dataset_pool is None:
            dataset_pool = SessionConfiguration().get_dataset_pool()

        if year is None:
            year = SimulationState().get_current_time()
        
        this_year_index = where(self.scheduled_events.get_attribute('year')==year)[0]
        scheduled_events_for_this_year = DatasetSubset(self.scheduled_events, this_year_index)
        scheduled_events_for_this_year.load_dataset_if_not_loaded()
        column_names = list(set( self.scheduled_events.get_known_attribute_names() ) - set( [ 'year', 'action', 'attribute', 'amount', 'event_id', '_hidden_id_'] ))
        column_names.sort()
#        column_values = dict([ (name, scheduled_events_for_this_year.get_attribute(name)) for name in column_names])
        
        for index in range(scheduled_events_for_this_year.size()):
            indicator = ones( self.dataset.size(), dtype='bool' )
            event_attr = {}
            for attribute in column_names:
                if attribute in self.dataset.get_known_attribute_names():
                    dataset_attribute = self.dataset.get_attribute(attribute)
                else:
                    ## this is done inside the loop because some action may delete computed attributes, such as dataset.add_elements()
                    try:
                        dataset_attribute = self.dataset.compute_one_variable_with_unknown_package(attribute, dataset_pool=dataset_pool)
                    except:
                        raise ValueError, "attribute %s used in scheduled events dataset can not be found in dataset %s" % (attribute, self.dataset.get_dataset_name())
                
#                if attribute in column_names: 
                aval = scheduled_events_for_this_year.get_attribute(attribute)[index]
                if aval == -1:
                    continue    # ignore if column value is -1
                else:
                    indicator *= dataset_attribute == aval
                    event_attr.update({attribute:aval})
            
            #agents in dataset satisfying all conditions are identified by indicator
            legit_index = where(indicator)[0]
            
            this_event = scheduled_events_for_this_year.get_data_element(index)
            if not hasattr(this_event, 'attribute'):
                action_attr_name = ''
            else:
                action_attr_name = this_event.attribute
            action_function = getattr(self, '_' + this_event.action.strip().lower())
            action_function( amount=this_event.amount,
                             attribute=action_attr_name,
                             dataset=self.dataset, 
                             index=legit_index,
                             data_dict=event_attr )
            
            self.post_run(self.dataset, legit_index, **kwargs)

        return self.dataset
Exemple #9
0
    def run(self, n=500, 
            realestate_dataset_name = 'building',
            current_year=None,
            **kwargs):

        target_vacancy = self.dataset_pool.get_dataset('target_vacancy')

        if current_year is None:
            year = SimulationState().get_current_time()
        else:
            year = current_year
        self.current_year = year
        this_year_index = where(target_vacancy['year']==year)[0]
        target_vacancy_for_this_year = DatasetSubset(target_vacancy, this_year_index)
        if target_vacancy_for_this_year.size() == 0:
            raise IOError, 'No target vacancy defined for year %s.' % year
        self.all_btypes_size = target_vacancy_for_this_year.size()
        return DevelopmentProjectProposalSamplingModelWithMinimum.run(self, n=n, realestate_dataset_name=realestate_dataset_name,
                                                                      current_year=current_year, **kwargs)
 def _do_run(self, location_set, agent_set, agents_index, resources=None):
     location_id_name = location_set.get_id_name()[0]
     asubset = DatasetSubset(agent_set, agents_index)
     if asubset.size() <= 0:
         return array([], dtype='int32')
     #unplace agents
     agent_set.modify_attribute(location_id_name, 
                             resize(array([-1]), asubset.size()), agents_index)
     if self.filter is None:
         location_index = arange(location_set.size())
     else:
         filter_values = location_set.compute_variables([self.filter], dataset_pool=self.dataset_pool)
         location_index = where(filter_values > 0)[0]
     if location_index.size <= 0:
         logger.log_status("No locations available. Nothing to be done.")
         return array([])
     
     location_subset = DatasetSubset(location_set, location_index)
     if self.consider_capacity:
         location_set.compute_variables([self.capacity_attribute], 
                                        dataset_pool=self.dataset_pool)
         weights = location_subset[self.capacity_attribute]
         if self.number_of_agents_attribute is not None:
             location_set.compute_variables([self.number_of_agents_attribute], 
                                        dataset_pool=self.dataset_pool)
             weights = clip(weights - location_subset[self.number_of_agents_attribute],
                                        0, location_subset[self.capacity_attribute])
     else:
         weights = ones(location_subset.size())
     
     if weights.sum() <=0:
         logger.log_status("Locations' capacity sums to zero. Nothing to be done.")
         return array([])        
     distr = weights/float(weights.sum())
     random_sample = probsample_replace(location_subset.get_id_attribute(), size=asubset.size(), 
                                    prob_array=distr)
     agent_set.modify_attribute(location_id_name, random_sample, agents_index)
     return agent_set.get_attribute_by_index(location_id_name, agents_index)
    def run(self, agent_set, **kwargs):

        large_areas = agent_set.get_attribute(self.large_area_id_name)
        valid_large_area = where(large_areas > 0)[0]
        if valid_large_area.size > 0:
            unique_large_areas = unique(large_areas[valid_large_area])
            cond_array = zeros(agent_set.size(), dtype="bool8")
            cond_array[valid_large_area] = True
            result = array([], dtype="int32")
            for area in unique_large_areas:
                new_index = where(logical_and(cond_array, large_areas == area))[0]
                agent_subset =  DatasetSubset(agent_set, new_index)
                logger.log_status("ARM for area %s (%s agents)" % (area, agent_subset.size()))
                this_result = AgentRelocationModel.run(self, agent_subset, **kwargs)
                result = concatenate((result, new_index[this_result]))
        no_large_area = where(large_areas <= 0)[0]
        result = concatenate((result, no_large_area))
        return result
    def run(self, agent_set, **kwargs):

        large_areas = agent_set.get_attribute(self.large_area_id_name)
        valid_large_area = where(large_areas > 0)[0]
        if valid_large_area.size > 0:
            unique_large_areas = unique(large_areas[valid_large_area])
            cond_array = zeros(agent_set.size(), dtype="bool8")
            cond_array[valid_large_area] = True
            result = array([], dtype="int32")
            for area in unique_large_areas:
                new_index = where(logical_and(cond_array,
                                              large_areas == area))[0]
                agent_subset = DatasetSubset(agent_set, new_index)
                logger.log_status("ARM for area %s (%s agents)" %
                                  (area, agent_subset.size()))
                this_result = AgentRelocationModel.run(self, agent_subset,
                                                       **kwargs)
                result = concatenate((result, new_index[this_result]))
        no_large_area = where(large_areas <= 0)[0]
        result = concatenate((result, no_large_area))
        return result
    def run(self, in_storage, out_storage=None, business_dsname="business", zone_dsname=None):
        dataset_pool = DatasetPool(storage=in_storage, package_order=['psrc_parcel', 'urbansim_parcel', 'urbansim', 'opus_core'] )
        seed(1)
        allbusinesses = dataset_pool.get_dataset(business_dsname)
        parcels = dataset_pool.get_dataset('parcel')
        buildings = dataset_pool.get_dataset('building')
        parcels.compute_variables(["urbansim_parcel.parcel.residential_units", "number_of_buildings = parcel.number_of_agents(building)", 
                                   "non_residential_sqft = (parcel.aggregate(building.non_residential_sqft)).astype(int32)",
                                   "number_of_res_buildings = parcel.aggregate(urbansim_parcel.building.is_residential)",
                                   "number_of_nonres_buildings = parcel.aggregate(urbansim_parcel.building.is_non_residential)",
                                   "number_of_mixed_use_buildings = parcel.aggregate(urbansim_parcel.building.is_generic_building_type_6)"
                                   ], 
                                  dataset_pool=dataset_pool)
        restypes = [12, 4, 19, 11, 34, 10, 33]
        reslutypes = [13,14,15,24]
        is_valid_business = ones(allbusinesses.size(), dtype='bool8')
        parcels_not_matched = logical_and(in1d(allbusinesses["parcel_id"], parcels.get_id_attribute(), invert=True), allbusinesses["parcel_id"] > 0)
        if(parcels_not_matched.sum() > 0):
            is_valid_business[where(parcels_not_matched)] = False
            logger.log_warning(message="No parcel exists for %s businesses (%s jobs)" % (parcels_not_matched.sum(), 
                                                                                         allbusinesses[self.number_of_jobs_attr][where(parcels_not_matched)].sum()))
        zero_parcel = allbusinesses["parcel_id"]<=0
        if zero_parcel.sum() > 0:
            is_valid_business[where(zero_parcel)] = False
            logger.log_warning(message="%s businesses (%s jobs) located on zero parcel_id" % (zero_parcel.sum(), 
                                                                                         allbusinesses[self.number_of_jobs_attr][where(zero_parcel)].sum()))            
            
        zero_size = logical_and(is_valid_business, allbusinesses[self.number_of_jobs_attr].round() == 0)
        if(sum(zero_size) > 0):
            is_valid_business[where(zero_size)] = False
            logger.log_warning(message="%s businesses are of size 0." % sum(zero_size))
        
        businesses = DatasetSubset(allbusinesses, index=where(is_valid_business)[0])
        
        parcels.add_attribute(name="number_of_workplaces", data=parcels.sum_dataset_over_ids(businesses, constant=1))
        
        has_single_res_buildings = logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_res_buildings"] == 1) # 1 (1 residential)
        parcels.add_attribute(data=has_single_res_buildings.astype("int32"), name="buildings_code")
        has_mult_res_buildings = logical_and(parcels["number_of_buildings"] > 1,  parcels["number_of_nonres_buildings"] == 0) # 2 (mult residential)
        parcels.modify_attribute("buildings_code", data=2*ones(has_mult_res_buildings.sum()), index=where(has_mult_res_buildings)) 
        has_single_nonres_buildings = logical_and(logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_nonres_buildings"] == 1), parcels["number_of_mixed_use_buildings"] == 0) # 3 (1 non-res)
        parcels.modify_attribute("buildings_code", data=3*ones(has_single_nonres_buildings.sum()), index=where(has_single_nonres_buildings)) 
        has_mult_nonres_buildings = logical_and(logical_and(parcels["number_of_buildings"] > 1, parcels["number_of_res_buildings"] == 0), parcels["number_of_mixed_use_buildings"] == 0) # 4 (mult non-res)
        parcels.modify_attribute("buildings_code", data=4*ones(has_mult_nonres_buildings.sum()), index=where(has_mult_nonres_buildings))
        has_single_mixed_buildings = logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_mixed_use_buildings"] == 1) # 5 (1 mixed-use)
        parcels.modify_attribute("buildings_code", data=5*ones(has_single_mixed_buildings.sum()), index=where(has_single_mixed_buildings))
        has_mult_mixed_buildings = logical_and(parcels["number_of_buildings"] > 1, 
                                               logical_or(logical_and(parcels["number_of_res_buildings"] > 0, parcels["number_of_nonres_buildings"] > 0), 
                                                          logical_or(parcels["number_of_mixed_use_buildings"] > 1, 
                                                                     logical_and(parcels["number_of_res_buildings"] == 0, 
                                                                                 parcels["number_of_mixed_use_buildings"] > 0)))) # 6
        parcels.modify_attribute("buildings_code", data=6*ones(has_mult_mixed_buildings.sum()), index=where(has_mult_mixed_buildings))
        has_no_building_res_lutype = logical_and(parcels["number_of_buildings"] == 0, in1d(parcels["land_use_type_id"], reslutypes)) # 7 (vacant with res LU type)
        parcels.modify_attribute("buildings_code", data=7*ones(has_no_building_res_lutype.sum()), index=where(has_no_building_res_lutype)) 
        has_no_building_nonres_lutype = logical_and(parcels["number_of_buildings"] == 0, in1d(parcels["land_use_type_id"], reslutypes)==0) # 8 (vacant with non-res LU type)
        parcels.modify_attribute("buildings_code", data=8*ones(has_no_building_nonres_lutype.sum()), index=where(has_no_building_nonres_lutype))
        
        business_sizes = businesses[self.number_of_jobs_attr].round().astype("int32") 
        business_location = {}
        business_location1wrkpl = zeros(businesses.size(), dtype="int32")
        business_location1wrkplres = zeros(businesses.size(), dtype="int32")
        business_ids = businesses.get_id_attribute()
        # sample one building for cases when sampling is required.
        for ibusid in range(businesses.size()):
            idx = where(buildings['parcel_id'] == businesses['parcel_id'][ibusid])[0]
            bldgids = buildings['building_id'][idx]
            business_location[business_ids[ibusid]] = bldgids
            if bldgids.size == 1:
                business_location1wrkpl[ibusid] = bldgids[0]
            elif bldgids.size > 1:
                business_location1wrkpl[ibusid] = bldgids[sample_noreplace(arange(bldgids.size), 1)]
                if buildings['residential_units'][idx].sum() > 0:
                    # Residential buildings are sampled with probabilities proportional to residential units
                    business_location1wrkplres[ibusid] = bldgids[probsample_noreplace(arange(bldgids.size), 1, prob_array=buildings['residential_units'][idx])]
                else:
                    business_location1wrkplres[ibusid] = business_location1wrkpl[ibusid]
        
        home_based = zeros(business_sizes.sum(), dtype="bool8")
        job_building_id = zeros(business_sizes.sum(), dtype="int32")
        job_array_labels = business_ids.repeat(business_sizes)
        job_assignment_case = zeros(business_sizes.sum(), dtype="int32")
        processed_bindicator = zeros(businesses.size(), dtype="bool8")
        business_codes = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"])
        business_nworkplaces = parcels.get_attribute_by_id("number_of_workplaces", businesses["parcel_id"])
        logger.log_status("Total number of jobs: %s" % home_based.size)
        
        # 1. 1-2 worker business in 1 residential building
        idx_sngl_wrk_1bld_fit = where(logical_and(business_sizes < 3, business_codes == 1))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_1bld_fit])
        home_based[jidx] = True
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_1bld_fit].repeat(business_sizes[idx_sngl_wrk_1bld_fit])
        job_assignment_case[jidx] = 1
        processed_bindicator[idx_sngl_wrk_1bld_fit] = True
        logger.log_status("1. %s jobs (%s businesses) set as home-based due to 1-2 worker x 1 residential building fit." % (
            business_sizes[idx_sngl_wrk_1bld_fit].sum(), idx_sngl_wrk_1bld_fit.size))
        
        # 2. 1-2 worker business in multiple residential buildings
        idx_sngl_wrk_multbld_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 2))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_multbld_fit])
        home_based[jidx] = True
        job_building_id[jidx] = business_location1wrkplres[idx_sngl_wrk_multbld_fit].repeat(business_sizes[idx_sngl_wrk_multbld_fit])
        job_assignment_case[jidx] = 2
        processed_bindicator[idx_sngl_wrk_multbld_fit] = True
        logger.log_status("2. %s jobs (%s businesses) set as home-based due to 1-2 worker x multiple residential buildings fit." % (
            business_sizes[idx_sngl_wrk_multbld_fit].sum(), idx_sngl_wrk_multbld_fit.size))
               
        # 3. 1-2 worker in single non-res building (not mixed-use)
        idx_sngl_wrk_single_nonres_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 3))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_single_nonres_fit])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_single_nonres_fit].repeat(business_sizes[idx_sngl_wrk_single_nonres_fit])
        job_assignment_case[jidx] = 3
        processed_bindicator[idx_sngl_wrk_single_nonres_fit] = True
        logger.log_status("3. %s jobs (%s businesses) placed due to 1-2 worker x single non-res building fit." % (
                          business_sizes[idx_sngl_wrk_single_nonres_fit].sum(), idx_sngl_wrk_single_nonres_fit.size))     
        
        # 4. 1-2 worker in multiple non-res building (not mixed-use)
        idx_sngl_wrk_mult_nonres_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 4))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_mult_nonres_fit])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_mult_nonres_fit].repeat(business_sizes[idx_sngl_wrk_mult_nonres_fit])
        job_assignment_case[jidx] = 4
        processed_bindicator[idx_sngl_wrk_mult_nonres_fit] = True
        logger.log_status("4. %s jobs (%s businesses) placed due to 1-2 worker x multiple non-res building fit." % (
            business_sizes[idx_sngl_wrk_mult_nonres_fit].sum(), idx_sngl_wrk_mult_nonres_fit.size))      
                
        # 5. 1-2 worker in single mixed-use building
        idx_sngl_wrk_smu_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 5))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_smu_fit])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_smu_fit].repeat(business_sizes[idx_sngl_wrk_smu_fit])
        job_assignment_case[jidx] = 5
        processed_bindicator[idx_sngl_wrk_smu_fit] = True
        logger.log_status("5. %s jobs (%s businesses) in 1-2 worker x single mixed-use building." % (
            business_sizes[idx_sngl_wrk_smu_fit].sum(), idx_sngl_wrk_smu_fit.size))       
        
        # 6. 1-2 worker in multiple mixed-type buildings
        idx_sngl_wrk_mmu_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 6))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_mmu_fit])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_mmu_fit].repeat(business_sizes[idx_sngl_wrk_mmu_fit])
        bldtype = buildings.get_attribute_by_id("building_type_id", business_location1wrkpl[idx_sngl_wrk_mmu_fit])
        is_bldtype_res = in1d(bldtype, restypes)
        home_based[in1d(job_array_labels, business_ids[idx_sngl_wrk_mmu_fit][where(is_bldtype_res)])] = True
        job_assignment_case[jidx] = 6
        processed_bindicator[idx_sngl_wrk_mmu_fit] = True
        logger.log_status("6. %s jobs (%s businesses) in 1-2 worker x multiple mixed-type buildings. %s jobs classified as home-based." % (
            business_sizes[idx_sngl_wrk_mmu_fit].sum(), idx_sngl_wrk_mmu_fit.size, business_sizes[idx_sngl_wrk_mmu_fit][where(is_bldtype_res)].sum()))            

        # 7. 1-2 worker business in residential parcel with no building
        idx_sngl_wrk_vacant_res = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 7))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_vacant_res])
        job_assignment_case[jidx] = 7
        home_based[jidx] = True
        processed_bindicator[idx_sngl_wrk_vacant_res] = True
        logger.log_status("7. %s jobs (%s businesses of size 1-2) could not be placed due to non-existing buildings in parcels with residential LU type." % (
            business_sizes[idx_sngl_wrk_vacant_res].sum(), idx_sngl_wrk_vacant_res.size))        

        # 8. 3+ workers of governmental workplaces in 1+ residential building
        ind_bussiness_case8 = logical_and(logical_and(processed_bindicator==0, logical_and(business_sizes > 2, in1d(businesses['sector_id'], [18,19]))), in1d(business_codes, [1,2]))
        idx_wrk_fit = where(ind_bussiness_case8)[0]
        jidx = in1d(job_array_labels, business_ids[idx_wrk_fit])
        job_assignment_case[jidx] = 8
        processed_bindicator[idx_wrk_fit] = True
        logger.log_status("8. %s governmental jobs (%s businesses of size 3+) could not be placed due to residing in residential buildings only." % (
                    business_sizes[idx_wrk_fit].sum(), idx_wrk_fit.size))
        
        # 9. 3-30 workers in single residential building. Make two of them home based.
        idx_sngl_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, logical_and(business_sizes > 2, business_sizes <= 30)), business_codes == 1))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_fit])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_fit].repeat(business_sizes[idx_sngl_wrk_fit])
        bsizeminus2 = vstack((2*ones(idx_sngl_wrk_fit.size), business_sizes[idx_sngl_wrk_fit]-2)).ravel("F").astype("int32") # interweaving 2 and remaining business size
        hbidx = tile(array([True, False]), bsizeminus2.size/2).repeat(bsizeminus2) # set the first two jobs of every business to True, others to False
        home_based[(where(jidx)[0])[hbidx]] = True
        job_assignment_case[jidx] = 9
        processed_bindicator[idx_sngl_wrk_fit] = True        
        logger.log_status("9. %s jobs (%s businesses) in 3-30 worker x single residential building. %s jobs assigned as home-based." % (
            business_sizes[idx_sngl_wrk_fit].sum(), idx_sngl_wrk_fit.size, hbidx.sum()))      
        
        # 10. 3-30 workers in multiple residential buildings. Make two of them home based.
        idx_sngl_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, logical_and(business_sizes > 2, business_sizes <= 30)), business_codes == 2))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_fit])
        job_assignment_case[jidx] = 10
        processed_bindicator[idx_sngl_wrk_fit] = True
        # sample buildings to businesses by parcels 
        bpcls = unique(businesses["parcel_id"][idx_sngl_wrk_fit])
        for ipcl in range(bpcls.size):
            bidx = where(buildings['parcel_id'] == bpcls[ipcl])[0]
            bldgids = buildings['building_id'][bidx]
            bussids = intersect1d(business_ids[businesses["parcel_id"] == bpcls[ipcl]], business_ids[idx_sngl_wrk_fit])
            # multiply by units for sampling prop. to units rather than buildings
            bldgids = bldgids.repeat(maximum(1, buildings['residential_units'][bidx].astype('int32'))) 
            if bldgids.size < bussids.size:
                bldarray = bldgids.repeat(1+ceil((bussids.size - bldgids.size)/float(bldgids.size)) )
            else:
                bldarray = bldgids
            shuffle(bldarray) # randomly reorder in-place
            for ib in range(bussids.size):
                jidx = where(job_array_labels == bussids[ib])[0]
                job_building_id[jidx] = bldarray[ib]
                home_based[jidx[0:2]] = True
        logger.log_status("10. %s jobs (%s businesses) in 3-30 worker x multiple residential building. %s jobs assigned as home-based." % (
            business_sizes[idx_sngl_wrk_fit].sum(), idx_sngl_wrk_fit.size, idx_sngl_wrk_fit.size*2))        


        # 11. single workplace, 3+ workers in single non-res or mixed-use building (11.)
        idx_sngl_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2), 
                                                            logical_or(business_codes==3, business_codes==5)),
                                                business_nworkplaces==1))[0]
        which_labels = where(in1d(job_array_labels, business_ids[idx_sngl_wrkplace_2plus_workers]))[0]
        job_building_id[which_labels] = business_location1wrkpl[idx_sngl_wrkplace_2plus_workers].repeat(business_sizes[idx_sngl_wrkplace_2plus_workers])   
        job_assignment_case[which_labels] = 11
        processed_bindicator[idx_sngl_wrkplace_2plus_workers] = True
        logger.log_status("11. %s jobs (%s businesses) could be placed due to single workplace x 3+ workers x single non-res/mixed-use building fit." % (
            business_sizes[idx_sngl_wrkplace_2plus_workers].sum(), idx_sngl_wrkplace_2plus_workers.size))
        
        # 12. single workplace, 3+ workers in multiple mixed-type building
        idx_sngl_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2),
                                                                        logical_or(business_codes==4, business_codes==6)),
                                                            business_nworkplaces==1))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrkplace_2plus_workers])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrkplace_2plus_workers].repeat(business_sizes[idx_sngl_wrkplace_2plus_workers])    
        job_assignment_case[jidx] = 12
        processed_bindicator[idx_sngl_wrkplace_2plus_workers] = True
        logger.log_status("12. %s jobs (%s businesses) could be placed due to single workplace x 3+ workers x multiple non-res/mixed building fit." % (
            business_sizes[idx_sngl_wrkplace_2plus_workers].sum(), idx_sngl_wrkplace_2plus_workers.size))

        # 13. multiple workplaces, 3+ workers in single non-res or mixed building
        idx_mult_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2),
                                                                        logical_or(business_codes==3, business_codes==5)),
                                                            business_nworkplaces > 1))[0]
        jidx = in1d(job_array_labels, business_ids[idx_mult_wrkplace_2plus_workers])
        job_building_id[jidx] = business_location1wrkpl[idx_mult_wrkplace_2plus_workers].repeat(business_sizes[idx_mult_wrkplace_2plus_workers])
        job_assignment_case[jidx] = 13
        processed_bindicator[idx_mult_wrkplace_2plus_workers] = True
        logger.log_status("13. %s jobs (%s businesses) could be placed due to multiple workplaces x 3+ workers x single non-res/mixed building fit." % (
            business_sizes[idx_mult_wrkplace_2plus_workers].sum(), idx_mult_wrkplace_2plus_workers.size))
        
        # 14. multiple workplaces, 3+ workers in multiple non-res or mixed building
        idx_mult_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2),
                                                                        logical_or(business_codes==4, business_codes==6)),
                                                            business_nworkplaces > 1))[0]
        processed_bindicator[idx_mult_wrkplace_2plus_workers] = True
        # sample buildings to businesses by parcels 
        bpcls = unique(businesses["parcel_id"][idx_mult_wrkplace_2plus_workers])
        #hbasedsum = home_based.sum()
        for ipcl in range(bpcls.size):
            bldgids = buildings['building_id'][buildings['parcel_id'] == bpcls[ipcl]]
            bussids = intersect1d(business_ids[businesses["parcel_id"] == bpcls[ipcl]], business_ids[idx_mult_wrkplace_2plus_workers])
            if bldgids.size < bussids.size:
                bldarray = bldgids.repeat(1+ceil((bussids.size - bldgids.size)/float(bldgids.size)))
            else:
                bldarray = bldgids
            shuffle(bldarray) # randomly reorder in-place
            is_res = in1d(bldarray, restypes)
            for ib in range(bussids.size):
                jidx = where(job_array_labels == bussids[ib])
                job_building_id[jidx] = bldarray[ib]
                #home_based[jidx] = is_res
                job_assignment_case[jidx] = 14
        logger.log_status("14. %s jobs (%s businesses) could be placed due to multiple workplaces x 3+ workers x multiple non-res/mixed building fit." % (
            business_sizes[idx_mult_wrkplace_2plus_workers].sum(), idx_mult_wrkplace_2plus_workers.size))
        
        
        # 15. 3+ workers in residential parcel with no building
        idx_wrk_vacant_res = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 2), business_codes == 7))[0]
        jidx = in1d(job_array_labels, business_ids[idx_wrk_vacant_res])
        job_assignment_case[jidx] = 15
        processed_bindicator[idx_wrk_vacant_res] = True
        logger.log_status("15. %s jobs (%s businesses of 3+ workers) could not be placed due to non-existing buildings in parcels with residential LU type." % (
            business_sizes[idx_wrk_vacant_res].sum(), idx_wrk_vacant_res.size))
        
        # 16. nonresidential parcel with no building
        idx_wrk_vacant_nonres = where(logical_and(processed_bindicator==0, business_codes == 8))[0]
        jidx = in1d(job_array_labels, business_ids[idx_wrk_vacant_nonres])
        job_assignment_case[jidx] = 16
        processed_bindicator[idx_wrk_vacant_nonres] = True
        logger.log_status("16. %s jobs (%s businesses) could not be placed due to non-existing buildings in parcels with non-esidential LU type." % (
            business_sizes[idx_wrk_vacant_nonres].sum(), idx_wrk_vacant_nonres.size))        
        
        # 17. 31+ workers in single residential building. Do not place - will go into ELCM.
        idx_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 30), business_codes == 1))[0]
        jidx = in1d(job_array_labels, business_ids[idx_wrk_fit])
        job_assignment_case[jidx] = 17
        processed_bindicator[idx_wrk_fit] = True        
        logger.log_status("17. %s jobs (%s businesses) in 31+ workers x single residential building." % (
            business_sizes[idx_wrk_fit].sum(), idx_wrk_fit.size))         
    
        # 18. 31+ workers in multiple residential buildings.
        idx_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 30), business_codes == 2))[0]
        jidx = in1d(job_array_labels, business_ids[idx_wrk_fit])
        job_assignment_case[jidx] = 18
        processed_bindicator[idx_wrk_fit] = True
        logger.log_status("18. %s jobs (%s businesses) in 31+ workers x multiple residential building." % (
            business_sizes[idx_wrk_fit].sum(), idx_wrk_fit.size))                

        # jobs in messy buildings
        idx_messy_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 0), business_codes == 0))[0]
        processed_bindicator[idx_messy_fit] = True
        logger.log_status("%s jobs (%s businesses) could not be placed due to messy buildings." % (
            business_sizes[idx_messy_fit].sum(), idx_messy_fit.size))         
         
        # build new buildings for jobs in cases 7, 8, 15 and 16
        jidx_no_bld = where(in1d(job_assignment_case, [7,8,15,16]))[0]
        bus = unique(job_array_labels[jidx_no_bld])
        bsidx = businesses.get_id_index(bus)
        # first create buildings for single workplaces per parcel
        single_workplace_idx = where(business_nworkplaces[bsidx] == 1)[0]
        newbld_parcel_id = businesses['parcel_id'][bsidx][single_workplace_idx]
        newbld_bt = sector2building_type(businesses['sector_id'][bsidx][single_workplace_idx])
        newbids = arange(buildings.get_id_attribute().max()+1, buildings.get_id_attribute().max()+single_workplace_idx.size+1)
        bbldid = zeros(bsidx.size, dtype='int32')
        bbldid[single_workplace_idx] = newbids
        # for parcels with multiple workplaces select the largest business to determine its building type
        mult_bsidx = bsidx[where(business_nworkplaces[bsidx] > 1)[0]]
        empty_parcels = businesses['parcel_id'][mult_bsidx]
        uempty_parcels = unique(empty_parcels)
        bsize_on_empty_pcl = ndmax(business_sizes[mult_bsidx], labels=empty_parcels, index=uempty_parcels)
        newbld2_sec = zeros(uempty_parcels.size, dtype='int32')
        newbids2 = arange(newbids.max()+1, newbids.max()+uempty_parcels.size+1)
        for ipcl in range(uempty_parcels.size):
            newbld2_sec[ipcl] = businesses['sector_id'][mult_bsidx][logical_and(businesses['parcel_id'][mult_bsidx] == uempty_parcels[ipcl], 
                                                                                business_sizes[mult_bsidx]==bsize_on_empty_pcl[ipcl])][0]
            this_bidx = where(businesses['parcel_id'][bsidx] == uempty_parcels[ipcl])
            bbldid[this_bidx] = newbids2[ipcl]
            
        newbld_parcel_id = concatenate((newbld_parcel_id, uempty_parcels))
        newbld_bt = concatenate((newbld_bt, sector2building_type(newbld2_sec)))    
        
        newbldgs = {'building_id': concatenate((newbids, newbids2)),
                    'parcel_id': newbld_parcel_id,
                    'building_type_id': newbld_bt,
                    }
        buildings.add_elements(newbldgs, require_all_attributes=False)
        jidx = where(in1d(job_array_labels, business_ids[bsidx]))[0]
        job_building_id[jidx] = bbldid.repeat(business_sizes[bsidx])
        logger.log_status("Build %s new buildings to accommodate %s jobs (out of which %s are governmental) from cases 7, 15, 16." % (
            newbld_parcel_id.size, jidx.size, business_sizes[bsidx][where(in1d(businesses['sector_id'][bsidx], [18,19]))].sum()))
        
        
        logger.log_status("Assigned %s (%s percent) home-based jobs." % (home_based.sum(), round(home_based.sum()/(home_based.size/100.),2)))
        logger.log_status("Finished %s percent (%s) jobs (%s businesses) processed. %s jobs (%s businesses) remain to be processed." % \
                          (round(business_sizes[processed_bindicator].sum()/(home_based.size/100.),2),
                           business_sizes[processed_bindicator].sum(), processed_bindicator.sum(),
                          business_sizes[logical_not(processed_bindicator)].sum(), business_sizes[logical_not(processed_bindicator)].size))
        
        logger.start_block("Storing jobs data.")
        # create job dataset
        job_data = {"job_id": (arange(job_building_id.size)+1).astype("int32"),
                    "home_based_status" : home_based,
                    "building_id": job_building_id,
                    "business_id": job_array_labels.astype("int32"),
                    "sector_id": businesses['sector_id'].repeat(business_sizes).astype("int32"), 
                    "parcel_id": businesses['parcel_id'].repeat(business_sizes).astype("int32"), 
                    "assignment_case": job_assignment_case}

        # join with zones
        if zone_dsname is not None:
            zones = dataset_pool.get_dataset(zone_dsname)
            idname = zones.get_id_name()[0]
            #jpcls = buildings.get_attribute_by_id('parcel_id', job_building_id)
            job_data[idname] = parcels.get_attribute_by_id(idname, job_data["parcel_id"])
            
            
        dictstorage = StorageFactory().get_storage('dict_storage')
        dictstorage.write_table(table_name="jobs", table_data=job_data)
        jobs = Dataset(in_storage=dictstorage, in_table_name="jobs", dataset_name="job", id_name="job_id")
        if out_storage is not None:
            jobs.write_dataset(out_storage=out_storage, out_table_name="jobs")
            buildings.write_dataset(out_storage=out_storage, attributes=AttributeType.PRIMARY)
        logger.end_block()        
        return jobs
class DevelopmentProjectTransitionModel(Model):
    """
    Creates development projects. Each development project is for a single type
    of development, e.g. 'industrial' or 'commercial'.  This model creates
    enough development projects to match the desired vacancy rates, as defined in the target_vacancies
    table.  It does not place any projects in locations; that is the job of the development project
    location choice models.  The distribution of project sizes (amount of space, value of space) is
    determined by sampling from the projects in the development_event_history table.
    """
    model_name = "Development Project Transition Model"

    def __init__(self, debuglevel=0):
        self.debug = DebugPrinter(debuglevel)

    def pre_check(self, location_set, vacancy_table, types):
        for ptype in types:
            self.check_for_space(
                location_set.get_attribute(
                    self.variable_for_total_units[ptype]))
        self.check_target_vacancy_is_not_100_percent(
            vacancy_table.get_attribute("target_total_vacancy"))

    def check_for_space(self, values):
        """Check that this array of values sums to something > 0."""
        self.do_check("x > 0", array([values.sum()]))

    def check_target_vacancy_is_not_100_percent(self, value):
        """Check that the target vacancy rate is not 100% (ratio == 1), because it doesn't make sense,
        and it also causes a divide by 0 error."""
        self.do_check("x < 1", value)

    def run(self,
            vacancy_table,
            history_table,
            year,
            location_set,
            dataset_pool=None,
            resources=None):
        self.dataset_pool = dataset_pool
        building_types = self.dataset_pool.get_dataset('building_type')
        target_vacancy_this_year = DatasetSubset(
            vacancy_table,
            index=where(vacancy_table.get_attribute("year") == year)[0])
        building_type_ids = target_vacancy_this_year.get_attribute(
            'building_type_id')
        building_type_idx = building_types.get_id_index(building_type_ids)
        self.used_building_types = DatasetSubset(building_types,
                                                 index=building_type_idx)
        project_types = self.used_building_types.get_attribute(
            'building_type_name')
        is_residential = self.used_building_types.get_attribute(
            'is_residential')
        unit_names = where(is_residential, 'residential_units',
                           'non_residential_sqft')
        specific_unit_names = where(is_residential, 'residential_units',
                                    '_sqft')
        rates = target_vacancy_this_year.get_attribute('target_total_vacancy')
        self.project_units = {}
        self.project_specific_units = {}
        target_rates = {}
        for i in range(self.used_building_types.size()):
            self.project_units[project_types[i]] = unit_names[i]
            if is_residential[i]:
                self.project_specific_units[
                    project_types[i]] = specific_unit_names[i]
            else:
                self.project_specific_units[project_types[i]] = "%s%s" % (
                    project_types[i], specific_unit_names[i])
            target_rates[building_type_ids[i]] = rates[i]

        self._compute_vacancy_and_total_units_variables(
            location_set, project_types, resources)
        self.pre_check(location_set, target_vacancy_this_year, project_types)

        projects = None
        for project_type_id, target_vacancy_rate in target_rates.iteritems():
            # determine current-year vacancy rates
            project_type = building_types.get_attribute_by_id(
                'building_type_name', project_type_id)
            vacant_units_sum = location_set.get_attribute(
                self.variable_for_vacancy[project_type]).sum()
            units_sum = float(
                location_set.get_attribute(
                    self.variable_for_total_units[project_type]).sum())
            should_develop_units = int(
                round(
                    max(0,
                        (target_vacancy_rate * units_sum - vacant_units_sum) /
                        (1 - target_vacancy_rate))))
            logger.log_status(
                project_type +
                ": vacant units: %d, should be vacant: %f, sum units: %d" %
                (vacant_units_sum, target_vacancy_rate * units_sum, units_sum))

            if not should_develop_units:
                logger.log_note((
                    "Will not build any " + project_type +
                    " units, because the current vacancy of %d units\n" +
                    "is more than the %d units desired for the vacancy rate of %f."
                ) % (vacant_units_sum, target_vacancy_rate * units_sum,
                     target_vacancy_rate))
            #create projects
            if should_develop_units > 0:
                this_project = self._create_projects(
                    should_develop_units, project_type, project_type_id,
                    history_table, location_set, units_sum, resources)
                if projects is None:
                    projects = this_project
                else:
                    projects.join_by_rows(this_project,
                                          change_ids_if_not_unique=True)
        return projects

    def _compute_vacancy_and_total_units_variables(self,
                                                   location_set,
                                                   project_types,
                                                   resources=None):
        compute_resources = Resources(resources)
        compute_resources.merge({"debug": self.debug})
        self.variable_for_vacancy = {}
        self.variable_for_total_units = {}
        for ptype in project_types:
            self.variable_for_vacancy[ptype] = compute_resources.get(
                "%s_vacant_variable" % ptype, "urbansim_zone.%s.vacant_%s" %
                (location_set.get_dataset_name(),
                 self.project_specific_units[ptype]))
            self.variable_for_total_units[ptype] = compute_resources.get(
                "%s_total_units_variable" % ptype,
                "%s.aggregate(urbansim_zone.building.total_%s)" %
                (location_set.get_dataset_name(),
                 self.project_specific_units[ptype]))
            location_set.compute_variables([
                self.variable_for_vacancy[ptype],
                self.variable_for_total_units[ptype]
            ],
                                           dataset_pool=self.dataset_pool,
                                           resources=compute_resources)

    def _create_projects(self,
                         should_develop_units,
                         project_type,
                         project_type_id,
                         history_table,
                         location_set,
                         units_sum,
                         resources=None):
        history_values = history_table.get_attribute(
            self.project_units[project_type])
        type_code_values = history_table.get_change_type_code_attribute(
            self.project_units[project_type])
        # take only non-zero history values and those that don't represent demolished buildings
        history_values_without_zeros = history_values[logical_and(
            history_values > 0,
            type_code_values != DevelopmentEventTypeOfChange.DELETE)]
        mean_size = history_values_without_zeros.mean()
        idx = array([], dtype="int32")
        # Ensure that there are some development projects to choose from.
        num_of_projects_to_select = max(
            10, round_(should_develop_units / mean_size))
        while True:
            idx = concatenate((idx,
                               randint(0, history_values_without_zeros.size,
                                       num_of_projects_to_select)))
            csum = history_values_without_zeros[idx].cumsum()
            idx1 = idx[csum <= should_develop_units]
            if idx1.size == 0:  # at least one project should be selected
                idx = array([idx[0]], dtype="int32")
            else:
                idx = idx1
            if csum[-1] >= should_develop_units:
                break
        data = {
            "residential_units": zeros((idx.size, ), dtype=int32),
            "non_residential_sqft": zeros((idx.size, ), dtype=int32),
            'building_type_id': array(idx.size * [project_type_id]),
            "project_id": arange(idx.size) + 1,
            "building_id": zeros((idx.size, ), dtype=int32)
        }
        data[self.
             project_units[project_type]] = history_values_without_zeros[idx]
        storage = StorageFactory().get_storage('dict_storage')

        development_projects_table_name = 'development_projects'
        storage.write_table(table_name=development_projects_table_name,
                            table_data=data)

        return Dataset(in_storage=storage,
                       in_table_name=development_projects_table_name,
                       id_name='project_id')
    def run(self, realestate_dataset,
            year=None, 
            occupied_spaces_variable="occupied_units",
            total_spaces_variable="total_units",
            target_attribute_name='target_vacancy_rate',
            sample_from_dataset = None,
            sample_filter="",
            reset_attribute_value={}, 
            year_built = 'year_built',
            dataset_pool=None,
            append_to_realestate_dataset = False,
            table_name = "development_projects",
            dataset_name = "development_project",
            id_name = [],
            **kwargs):
        """         
        sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning
        append_to_realestate_dataset - whether to append the new dataset to realestate_dataset
        """
        
        if self.target_vancy_dataset is None:
            raise RuntimeError, "target_vacancy_rate dataset is unspecified."
        
        if not sample_from_dataset:
            sample_from_dataset = realestate_dataset
            
        #if dataset_pool is None:
        #    dataset_pool = SessionConfiguration().get_dataset_pool()
        if year is None:
            year = SimulationState().get_current_time()
        this_year_index = where(self.target_vancy_dataset.get_attribute('year')==year)[0]
        target_vacancy_for_this_year = DatasetSubset(self.target_vancy_dataset, this_year_index)
        
        column_names = list(set( self.target_vancy_dataset.get_known_attribute_names() ) - set( [ target_attribute_name, occupied_spaces_variable, total_spaces_variable, 'year', '_hidden_id_'] ))
        column_names.sort(reverse=True)
        column_values = dict([ (name, target_vacancy_for_this_year.get_attribute(name)) for name in column_names + [target_attribute_name]])
        
        independent_variables = list(set([re.sub('_max$', '', re.sub('_min$', '', col)) for col in column_names]))
        dataset_known_attributes = realestate_dataset.get_known_attribute_names()
        for variable in independent_variables:
            if variable not in dataset_known_attributes:
                realestate_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)
                sample_from_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)
                
        dataset_known_attributes = realestate_dataset.get_known_attribute_names() #update after compute
        if sample_filter:
            short_name = VariableName(sample_filter).get_alias()
            if short_name not in dataset_known_attributes:
                filter_indicator = sample_from_dataset.compute_variables(sample_filter, dataset_pool=dataset_pool)
            else:
                filter_indicator = sample_from_dataset.get_attribute(short_name)
        else:
            filter_indicator = 1
                
        sampled_index = array([], dtype=int32)

        #log header
        if PrettyTable is not None:
            status_log = PrettyTable()
            status_log.set_field_names(column_names + ["actual", "target", "difference", "action"])
        else:
            logger.log_status("\t".join(column_names + ["actual", "target", "difference", "action"]))
        error_log = ''
        for index in range(target_vacancy_for_this_year.size()):
            this_sampled_index = array([], dtype=int32)
            indicator = ones( realestate_dataset.size(), dtype='bool' )
            sample_indicator = ones( sample_from_dataset.size(), dtype='bool' )
            criterion = {}   # for logging
            for attribute in independent_variables:
                if attribute in dataset_known_attributes:
                    dataset_attribute = realestate_dataset.get_attribute(attribute)
                    sample_attribute = sample_from_dataset.get_attribute(attribute)
                else:
                    raise ValueError, "attribute %s used in target vacancy dataset can not be found in dataset %s" % (attribute, realestate_dataset.get_dataset_name())
                
                if attribute + '_min' in column_names:
                    amin = target_vacancy_for_this_year.get_attribute(attribute+'_min')[index] 
                    criterion.update({attribute + '_min':amin})
                    if amin != -1:
                        indicator *= dataset_attribute >= amin
                        sample_indicator *= sample_attribute >= amin
                if attribute + '_max' in column_names: 
                    amax = target_vacancy_for_this_year.get_attribute(attribute+'_max')[index]
                    criterion.update({attribute + '_max':amax}) 
                    if amax != -1:
                        indicator *= dataset_attribute <= amax
                        sample_indicator *= sample_attribute <= amax
                if attribute in column_names: 
                    aval = column_values[attribute][index] 
                    criterion.update({attribute:aval}) 
                    if aval == -1:
                        continue
                    elif aval == -2:  ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column
                        indicator *= logical_not(ismember(dataset_attribute, column_values[attribute]))
                        sample_indicator *= logical_not(ismember(sample_attribute, column_values[attribute]))
                    else:
                        indicator *= dataset_attribute == aval
                        sample_indicator *= sample_attribute == aval
                        
            this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable
            ## total/occupied_spaces_variable can be specified either as a universal name for all realestate 
            ## or in targe_vacancy_rate dataset for each vacancy category
            if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_occupied_spaces_variable = target_vacancy_for_this_year.get_attribute(occupied_spaces_variable)[index]

            if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_total_spaces_variable = target_vacancy_for_this_year.get_attribute(total_spaces_variable)[index]
            
            logger.be_quiet() #temporarily disable logging
            realestate_dataset.compute_one_variable_with_unknown_package(this_occupied_spaces_variable, dataset_pool=dataset_pool)
            realestate_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool)
            sample_from_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool)
            logger.talk()
            
            actual_num = (indicator * realestate_dataset.get_attribute(this_total_spaces_variable)).sum()
            target_num = int(round( (indicator * realestate_dataset.get_attribute(this_occupied_spaces_variable)).sum() /\
                                    (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]) 
                            ))
            diff = target_num - actual_num
            if diff > 0:
                total_spaces_in_sample_dataset = sample_from_dataset.get_attribute(this_total_spaces_variable)
                legit_index = where(logical_and(sample_indicator, filter_indicator) * total_spaces_in_sample_dataset > 0)[0]
                if legit_index.size > 0:
                    mean_size = total_spaces_in_sample_dataset[legit_index].mean()
                    num_of_projects_to_sample = int( diff / mean_size )
                    while total_spaces_in_sample_dataset[this_sampled_index].sum() < diff:
                        lucky_index = sample_replace(legit_index, num_of_projects_to_sample)
                        this_sampled_index = concatenate((this_sampled_index, lucky_index))
                    this_sampled_index = this_sampled_index[0:(1+searchsorted(cumsum(total_spaces_in_sample_dataset[this_sampled_index]), diff))]
                    sampled_index = concatenate((sampled_index, this_sampled_index))
                else:
                    error_log += "There is nothing to sample from %s and no new development will happen for " % sample_from_dataset.get_dataset_name() + \
                              ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n'
            #if diff < 0: #TODO demolition; not yet supported
            
            ##log status
            action = "0"
            if this_sampled_index.size > 0:
                action_num = total_spaces_in_sample_dataset[this_sampled_index].sum()
                if diff > 0: action = "+" + str(action_num)
                if diff < 0: action = "-" + str(action_num)
            cat = [ str(criterion[col]) for col in column_names]
            cat += [str(actual_num), str(target_num), str(diff), action]
            
            if PrettyTable is not None:
                status_log.add_row(cat)
            else:                
                logger.log_status("\t".join(cat))
            
        if PrettyTable is not None:
            logger.log_status("\n" + status_log.get_string())
        if error_log:
            logger.log_error(error_log)
            
        result_data = {}
        result_dataset = None
        index = array([], dtype='int32')
        if sampled_index.size > 0:
            ### ideally duplicate_rows() is all needed to add newly cloned rows
            ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data
            ##realestate_dataset.duplicate_rows(sampled_index)
            result_data.setdefault(year_built, resize(year, sampled_index.size).astype('int32'))
            for attribute in sample_from_dataset.get_primary_attribute_names():
                if reset_attribute_value.has_key(attribute):
                    result_data[attribute] = resize(array(reset_attribute_value[attribute]), sampled_index.size)
                else:
                    result_data[attribute] = sample_from_dataset.get_attribute_by_index(attribute, sampled_index)
        
            storage = StorageFactory().get_storage('dict_storage')
            storage.write_table(table_name=table_name, table_data=result_data)
    
            result_dataset = Dataset(id_name = id_name,
                                      in_storage = storage,
                                      in_table_name = table_name,
                                      dataset_name = dataset_name
                                      )
            index = arange(result_dataset.size())
            
        if append_to_realestate_dataset:
            if len(result_data) > 0:
                index = realestate_dataset.add_elements(result_data, require_all_attributes=False,
                                                        change_ids_if_not_unique=True)                
            result_dataset = realestate_dataset
        
        return (result_dataset, index)
Exemple #16
0
    def run(self, n=500, run_config=None, current_year=None, debuglevel=0):
        """
        n - sample n proposals at a time, evaluate them one by one
        """
        self.demolished_buildings = array(
            [], dtype='int32')  #id of buildings to be demolished
        if current_year is None:
            current_year = SimulationState().get_current_time()
        if not self.positive_proposals:
            logger.log_status(
                "Proposal Set size <= 0, no proposals to consider, skipping DPPSM."
            )
            return (self.proposal_set, self.demolished_buildings)
        self.proposal_component_set.compute_variables([
            'urbansim_parcel.development_project_proposal_component.units_proposed',
            'urbansim_parcel.development_project_proposal_component.is_residential'
        ],
                                                      dataset_pool=self.
                                                      dataset_pool)
        self.proposal_set.compute_variables(
            [
                'urbansim_parcel.development_project_proposal.number_of_components',
                'zone_id=development_project_proposal.disaggregate(parcel.zone_id)',
                #'occurence_frequency = development_project_proposal.disaggregate(development_template.sample_size)'
            ],
            dataset_pool=self.dataset_pool)
        buildings = self.dataset_pool.get_dataset("building")
        buildings.compute_variables(
            [
                "occupied_units_for_jobs = urbansim_parcel.building.number_of_non_home_based_jobs",
                "units_for_jobs = urbansim_parcel.building.total_non_home_based_job_space",
                "occupied_residential_units = urbansim_parcel.building.number_of_households",
                #                                "urbansim_parcel.building.existing_units",
                "urbansim_parcel.building.is_residential"
            ],
            dataset_pool=self.dataset_pool)

        ## define unit_name by whether a building is residential or not (with is_residential attribute)
        ## if it is non-residential (0), count units by number of job spaces (units_for_jobs)
        ## if it is residential (1), count units by residenital units
        self.unit_name = array(["units_for_jobs", "residential_units"])

        target_vacancy = self.dataset_pool.get_dataset('target_vacancy')
        target_vacancy.compute_variables([
            'is_residential = target_vacancy.disaggregate(building_type.is_residential)'
        ],
                                         dataset_pool=self.dataset_pool)
        # This try-except block checks to see if the object has a subarea_id_name,
        # if it does, it calculates the vacancy rates by subarea_id_name
        try:
            # Check for subarea_id_name in target_vacancies dataset
            # if it is present, vacancy rates are specified by subarea_id_name
            # if it is not, vacancy rates are specified region wide
            target_vacancy.load_dataset()
            if self.subarea_id_name in target_vacancy.get_attribute_names():
                current_target_vacancy_this_year = DatasetSubset(
                    target_vacancy,
                    index=where(
                        target_vacancy.get_attribute("year") ==
                        current_year)[0])
                current_target_vacancy = DatasetSubset(
                    current_target_vacancy_this_year,
                    index=where(
                        current_target_vacancy_this_year.get_attribute(
                            self.subarea_id_name) == self.area_id)[0])
            else:
                current_target_vacancy = DatasetSubset(
                    target_vacancy,
                    index=where(
                        target_vacancy.get_attribute("year") ==
                        current_year)[0])
        except AttributeError:
            # vacancy rates are specified region wide:
            current_target_vacancy = DatasetSubset(
                target_vacancy,
                index=where(
                    target_vacancy.get_attribute("year") == current_year)[0])

        if current_target_vacancy.size() == 0:
            raise IOError, 'No target vacancy defined for year %s.' % current_year

        self.existing_units = {}  #total existing units by land_use type
        self.occupied_units = {}  #total occupied units by land_use type
        self.proposed_units = {}  #total proposed units by land_use type
        self.demolished_units = {
        }  #total (to be) demolished units by land_use type

        components_building_type_ids = self.proposal_component_set.get_attribute(
            "building_type_id").astype("int32")
        proposal_ids = self.proposal_set.get_id_attribute()
        proposal_ids_in_component_set = self.proposal_component_set.get_attribute(
            "proposal_id")
        all_units_proposed = self.proposal_component_set.get_attribute(
            "units_proposed")
        number_of_components_in_proposals = self.proposal_set.get_attribute(
            "number_of_components")

        self.accepting_proposals = zeros(
            current_target_vacancy.get_attribute("building_type_id").max() + 1,
            dtype='bool8'
        )  #whether accepting new proposals, for each building type
        self.accepted_proposals = []  # index of accepted proposals

        self.target_vacancies = {}
        tv_building_types = current_target_vacancy.get_attribute(
            "building_type_id")
        tv_rate = current_target_vacancy.get_attribute("target_vacancy_rate")
        for itype in range(tv_building_types.size):
            self.target_vacancies[tv_building_types[itype]] = tv_rate[itype]

        self.check_vacancy_rates(
            current_target_vacancy
        )  #initialize self.accepting_proposal based on current vacancy rate

        sqft_per_job = self.dataset_pool.get_dataset("building_sqft_per_job")
        zones_of_proposals = self.proposal_set.get_attribute("zone_id")
        self.building_sqft_per_job_table = sqft_per_job.get_building_sqft_as_table(
            zones_of_proposals.max(), tv_building_types.max())
        # consider only those proposals that have all components of accepted type and sum of proposed units > 0
        is_accepted_type = self.accepting_proposals[
            components_building_type_ids]
        sum_is_accepted_type_over_proposals = array(
            ndimage.sum(is_accepted_type,
                        labels=proposal_ids_in_component_set,
                        index=proposal_ids))
        sum_of_units_proposed = array(
            ndimage.sum(all_units_proposed,
                        labels=proposal_ids_in_component_set,
                        index=proposal_ids))
        is_proposal_eligible = logical_and(
            sum_is_accepted_type_over_proposals ==
            number_of_components_in_proposals, sum_of_units_proposed > 0)

        is_proposal_eligible = logical_and(
            is_proposal_eligible,
            self.proposal_set.get_attribute("start_year") == current_year)
        ## handle planned proposals: all proposals with status_id == is_planned
        ## and start_year == current_year are accepted
        planned_proposal_indexes = where(
            logical_and(
                self.proposal_set.get_attribute(
                    "status_id") == self.proposal_set.id_planned,
                self.proposal_set.get_attribute("start_year") ==
                current_year))[0]

        self.consider_proposals(planned_proposal_indexes, force_accepting=True)
        # consider proposals (in this order: planned, proposed, tentative)
        for status in [
                self.proposal_set.id_proposed, self.proposal_set.id_tentative
        ]:
            idx = where(
                logical_and(
                    self.proposal_set.get_attribute("status_id") == status,
                    is_proposal_eligible))[0]
            if idx.size <= 0:
                continue
            logger.log_status(
                "Sampling from %s eligible proposals with status %s." %
                (idx.size, status))
            while (True in self.accepting_proposals):
                if self.weight[idx].sum() == 0.0:
                    logger.log_warning(
                        "Running out of proposals; there aren't any proposals with non-zero weight"
                    )
                    break

                idx = idx[self.weight[idx] > 0]
                n = minimum(idx.size, n)
                sampled_proposal_indexes = probsample_noreplace(
                    proposal_ids[idx],
                    n,
                    prob_array=(self.weight[idx] /
                                float(self.weight[idx].sum())),
                    exclude_index=None,
                    return_index=True)
                self.consider_proposals(
                    arange(self.proposal_set.size())[
                        idx[sampled_proposal_indexes]])
                self.weight[idx[sampled_proposal_indexes]] = 0

        # set status of accepted proposals to 'active'
        self.proposal_set.modify_attribute(name="status_id",
                                           data=self.proposal_set.id_active,
                                           index=array(self.accepted_proposals,
                                                       dtype='int32'))
        building_types = self.dataset_pool.get_dataset("building_type")
        logger.log_status("Status of %s development proposals set to active." %
                          len(self.accepted_proposals))
        logger.log_status(
            "Target/existing vacancy rates (reached using eligible proposals) by building type:"
        )
        for type_id in self.existing_units.keys():
            units_stock = self._get_units_stock(type_id)
            vr = self._get_vacancy_rates(type_id)
            ## units = residential_units if building_type is residential
            ## units = number of job spaces if building_type is non-residential
            logger.log_status(
                              """%(type_id)s[%(type_name)s]: %(vr)s = ((existing_units:%(existing_units)s + 
                              units_proposed:%(units_proposed)s - units_to_be_demolished:%(units_demolished)s) 
                              - units_occupied:%(units_occupied)s) / units_stock:%(units_stock)s""" %  \
                                          { 'type_id': type_id,
                                            'type_name': building_types.get_attribute_by_id("building_type_name", type_id),
                                            'vr':  vr,
                                            'existing_units': int(self.existing_units[type_id]),
                                            'units_occupied': int(self.occupied_units[type_id]),
                                            'units_proposed': int(self.proposed_units[type_id]),
                                            'units_demolished': int(self.demolished_units[type_id]),
                                            'units_stock': int(units_stock)
                                          }
                            )
        # Code added by Jesse Ayers, MAG, 7/20/2009
        # Get the active projects:
        stat_id = self.proposal_set.get_attribute('status_id')
        actv = where(stat_id == 1)[0]
        # Where there are active projects, compute the total_land_area_taken
        # and store it on the development_project_proposals dataset
        # so it can be used by the building_construction_model for the proper
        # computation of units_proposed for those projects with velocity curves
        if actv.size > 0:
            total_land_area_taken_computed = self.proposal_set.get_attribute(
                'urbansim_parcel.development_project_proposal.land_area_taken')
            self.proposal_set.modify_attribute(
                'total_land_area_taken', total_land_area_taken_computed[actv],
                actv)

        return (self.proposal_set, self.demolished_buildings)
    def run(self, n=500, run_config=None, current_year=None, debuglevel=0):
        """
        n - sample n proposals at a time, evaluate them one by one
        """
        self.demolished_buildings = array([], dtype='int32')  #id of buildings to be demolished
        if current_year is None:
            current_year = SimulationState().get_current_time()
        if not self.positive_proposals:
            logger.log_status("Proposal Set size <= 0, no proposals to consider, skipping DPPSM.")
            return (self.proposal_set, self.demolished_buildings) 
        self.proposal_component_set.compute_variables([
            'urbansim_parcel.development_project_proposal_component.units_proposed',
            'urbansim_parcel.development_project_proposal_component.is_residential'],
                                        dataset_pool=self.dataset_pool)
        self.proposal_set.compute_variables([
            'urbansim_parcel.development_project_proposal.number_of_components',
            'zone_id=development_project_proposal.disaggregate(parcel.zone_id)',
            #'occurence_frequency = development_project_proposal.disaggregate(development_template.sample_size)'
            ],
                                        dataset_pool=self.dataset_pool)
        buildings = self.dataset_pool.get_dataset("building")
        buildings.compute_variables([
                                "occupied_units_for_jobs = urbansim_parcel.building.number_of_non_home_based_jobs",
                                "units_for_jobs = urbansim_parcel.building.total_non_home_based_job_space",
                                "occupied_residential_units = urbansim_parcel.building.number_of_households",
#                                "urbansim_parcel.building.existing_units",
                                "urbansim_parcel.building.is_residential"
                                    ],
                                    dataset_pool=self.dataset_pool)

        ## define unit_name by whether a building is residential or not (with is_residential attribute)
        ## if it is non-residential (0), count units by number of job spaces (units_for_jobs)
        ## if it is residential (1), count units by residenital units
        self.unit_name = array(["units_for_jobs", "residential_units"])
                
        target_vacancy = self.dataset_pool.get_dataset('target_vacancy')
        target_vacancy.compute_variables(['is_residential = target_vacancy.disaggregate(building_type.is_residential)'],
                                         dataset_pool=self.dataset_pool)
        # This try-except block checks to see if the object has a subarea_id_name,
        # if it does, it calculates the vacancy rates by subarea_id_name
        try:
            # Check for subarea_id_name in target_vacancies dataset
            # if it is present, vacancy rates are specified by subarea_id_name
            # if it is not, vacancy rates are specified region wide
            target_vacancy.load_dataset()
            if self.subarea_id_name in target_vacancy.get_attribute_names():
                current_target_vacancy_this_year = DatasetSubset(target_vacancy, index=where(target_vacancy.get_attribute("year")==current_year)[0])
                current_target_vacancy = DatasetSubset(current_target_vacancy_this_year, index=where(current_target_vacancy_this_year.get_attribute(self.subarea_id_name)==self.area_id)[0])
            else:
                current_target_vacancy = DatasetSubset(target_vacancy, index=where(target_vacancy.get_attribute("year")==current_year)[0])
        except AttributeError:
            # vacancy rates are specified region wide:
            current_target_vacancy = DatasetSubset(target_vacancy, index=where(target_vacancy.get_attribute("year")==current_year)[0])

        if current_target_vacancy.size() == 0:
            raise IOError, 'No target vacancy defined for year %s.' % current_year
        
        self.existing_units = {}   #total existing units by land_use type
        self.occupied_units = {}   #total occupied units by land_use type
        self.proposed_units = {}   #total proposed units by land_use type
        self.demolished_units = {} #total (to be) demolished units by land_use type

        components_building_type_ids = self.proposal_component_set.get_attribute("building_type_id").astype("int32")
        proposal_ids = self.proposal_set.get_id_attribute()
        proposal_ids_in_component_set = self.proposal_component_set.get_attribute("proposal_id")
        all_units_proposed = self.proposal_component_set.get_attribute("units_proposed")
        number_of_components_in_proposals = self.proposal_set.get_attribute("number_of_components")
        
        self.accepting_proposals = zeros(current_target_vacancy.get_attribute("building_type_id").max()+1, dtype='bool8')  #whether accepting new proposals, for each building type
        self.accepted_proposals = [] # index of accepted proposals

        self.target_vacancies = {}
        tv_building_types = current_target_vacancy.get_attribute("building_type_id")
        tv_rate = current_target_vacancy.get_attribute("target_vacancy_rate")
        for itype in range(tv_building_types.size):
            self.target_vacancies[tv_building_types[itype]] = tv_rate[itype]
            
        self.check_vacancy_rates(current_target_vacancy)  #initialize self.accepting_proposal based on current vacancy rate

        sqft_per_job = self.dataset_pool.get_dataset("building_sqft_per_job")
        zones_of_proposals = self.proposal_set.get_attribute("zone_id")
        self.building_sqft_per_job_table = sqft_per_job.get_building_sqft_as_table(zones_of_proposals.max(), 
                                                                                   tv_building_types.max())
        # consider only those proposals that have all components of accepted type and sum of proposed units > 0
        is_accepted_type = self.accepting_proposals[components_building_type_ids]
        sum_is_accepted_type_over_proposals = array(ndimage.sum(is_accepted_type, labels = proposal_ids_in_component_set, 
                                                          index = proposal_ids))
        sum_of_units_proposed = array(ndimage.sum(all_units_proposed, labels = proposal_ids_in_component_set, 
                                                          index = proposal_ids))
        is_proposal_eligible = logical_and(sum_is_accepted_type_over_proposals == number_of_components_in_proposals,
                                           sum_of_units_proposed > 0)

        is_proposal_eligible = logical_and(is_proposal_eligible,
                                           self.proposal_set.get_attribute("start_year")==current_year )
        ## handle planned proposals: all proposals with status_id == is_planned 
        ## and start_year == current_year are accepted
        planned_proposal_indexes = where(logical_and(
                                                  self.proposal_set.get_attribute("status_id") == self.proposal_set.id_planned, 
                                                  self.proposal_set.get_attribute("start_year") == current_year ) 
                                        )[0] 
                                   
        self.consider_proposals(planned_proposal_indexes, force_accepting=True)
        # consider proposals (in this order: planned, proposed, tentative)
        for status in [self.proposal_set.id_proposed, self.proposal_set.id_tentative]:
            idx = where(logical_and(self.proposal_set.get_attribute("status_id") == status, is_proposal_eligible))[0]
            if idx.size <= 0:
                continue
            logger.log_status("Sampling from %s eligible proposals with status %s." % (idx.size, status))
            while (True in self.accepting_proposals):
                if self.weight[idx].sum() == 0.0:
                    logger.log_warning("Running out of proposals; there aren't any proposals with non-zero weight")
                    break
                
                idx = idx[self.weight[idx] > 0]
                n = minimum(idx.size, n)
                sampled_proposal_indexes = probsample_noreplace(proposal_ids[idx], n, 
                                                prob_array=(self.weight[idx]/float(self.weight[idx].sum())),                                                                
                                                exclude_index=None, return_index=True)
                self.consider_proposals(arange(self.proposal_set.size())[idx[sampled_proposal_indexes]])
                self.weight[idx[sampled_proposal_indexes]] = 0

        # set status of accepted proposals to 'active'
        self.proposal_set.modify_attribute(name="status_id", data=self.proposal_set.id_active,
                                          index=array(self.accepted_proposals, dtype='int32'))
        building_types = self.dataset_pool.get_dataset("building_type")
        logger.log_status("Status of %s development proposals set to active." % len(self.accepted_proposals))
        logger.log_status("Target/existing vacancy rates (reached using eligible proposals) by building type:")
        for type_id in self.existing_units.keys():
            units_stock = self._get_units_stock(type_id)
            vr = self._get_vacancy_rates(type_id)
            ## units = residential_units if building_type is residential
            ## units = number of job spaces if building_type is non-residential
            logger.log_status(
                              """%(type_id)s[%(type_name)s]: %(vr)s = ((existing_units:%(existing_units)s + 
                              units_proposed:%(units_proposed)s - units_to_be_demolished:%(units_demolished)s) 
                              - units_occupied:%(units_occupied)s) / units_stock:%(units_stock)s""" %  \
                                          { 'type_id': type_id,
                                            'type_name': building_types.get_attribute_by_id("building_type_name", type_id),
                                            'vr':  vr,
                                            'existing_units': int(self.existing_units[type_id]),
                                            'units_occupied': int(self.occupied_units[type_id]),
                                            'units_proposed': int(self.proposed_units[type_id]),
                                            'units_demolished': int(self.demolished_units[type_id]),
                                            'units_stock': int(units_stock)
                                          }
                            )
        # Code added by Jesse Ayers, MAG, 7/20/2009
        # Get the active projects:
        stat_id = self.proposal_set.get_attribute('status_id')
        actv = where(stat_id==1)[0]
        # Where there are active projects, compute the total_land_area_taken
        # and store it on the development_project_proposals dataset
        # so it can be used by the building_construction_model for the proper
        # computation of units_proposed for those projects with velocity curves
        if actv.size > 0:          
            total_land_area_taken_computed = self.proposal_set.get_attribute('urbansim_parcel.development_project_proposal.land_area_taken')
            self.proposal_set.modify_attribute('total_land_area_taken', total_land_area_taken_computed[actv], actv)

        return (self.proposal_set, self.demolished_buildings) 
 def run(self, dataset, outcome_attribute, weight_attribute, 
              control_totals, current_year, control_total_attribute=None, 
              year_attribute='year', capacity_attribute=None, add_quantity=False, dataset_pool=None):
     """'dataset' is a Dataset for which a quantity 'outcome_attribute' is created. The total amount of the quantity is 
     given by the attribute 'control_total_attribute' of the 'control_totals' Dataset. If it is not given, it is assumed 
     to have the same name as 'outcome_attribute'. The 'weight_attribute' of 'dataset' determines the allocation weights.
     The 'control_totals' Dataset contains an attribute 'year' (or alternatively, an attribute given by the 'year_attribute' argument)
     and optionally other attributes that must be known to the 'dataset' (such as a geography). For each row of the control_totals dataset
     for which year matches the 'current_year', the total amount is distributed among the corresponding members of 'dataset' according to weights.
     If a 'capacity_attribute' is given (attribute of 'dataset'), the algorithm removes any allocations that exceeds the capacity and 
     redistributes it among remaining members. The resulting values are appended to 'dataset' as 'outcome_attribute' (as primary attribute).
     If add_quantity is True and the 'outcome_attribute' exists in dataset, the resulting values are added to the current values of 
     'outcome_attribute'.
     """
     ct_attr = control_totals.get_known_attribute_names()
     if year_attribute not in ct_attr:
         raise StandardError, "Year attribute '%s' must be a known attribute of the control totals dataset." % year_attribute
     ct_attr.remove(year_attribute)
     if control_total_attribute is None:
         control_total_attribute = outcome_attribute
     if control_total_attribute not in ct_attr:
         raise StandardError, "Attribute '%s' must be a known attribute of the control totals dataset." % control_total_attribute
     ct_attr.remove(control_total_attribute)
     if control_totals._is_hidden_id():
         ct_attr.remove(control_totals.id_name()[0])
         
     # compute weights and other attributes necessary for allocation
     attrs_to_compute = [weight_attribute] + ct_attr
     if capacity_attribute is not None:
         attrs_to_compute.append(capacity_attribute)
     for attr in attrs_to_compute:
         try:
             dataset.compute_variables(attr, dataset_pool=dataset_pool)
         except:
             dataset.compute_one_variable_with_unknown_package(attr, dataset_pool=dataset_pool)
     
     # create subset of control totals for the current year
     year_index = where(control_totals.get_attribute(year_attribute) == current_year)[0]
     if year_index.size <= 0:
         logger.log_warning("No control total for year %s" % current_year)
         return None
     control_totals_for_this_year = DatasetSubset(control_totals, year_index)
     
     # check capacity
     if capacity_attribute is not None:
         if dataset.get_attribute(capacity_attribute).sum() < control_totals_for_this_year.get_attribute(control_total_attribute).sum():
             logger.log_warning("Capacity (%s) is smaller than the amount to allocate (%s)." % (dataset.get_attribute(capacity_attribute).sum(), 
                                                                                               control_totals_for_this_year.get_attribute(control_total_attribute).sum()))
         C = dataset.get_attribute(capacity_attribute).astype('int32')
         
     all_weights = dataset.get_attribute(weight_attribute)
     outcome = zeros(dataset.size(), dtype='int32')
     for ct_row in range(control_totals_for_this_year.size()):
         is_considered = ones(dataset.size(), dtype='bool8')
         for characteristics in ct_attr:
             is_considered = logical_and(is_considered, dataset.get_attribute(characteristics) == control_totals_for_this_year.get_attribute(characteristics)[ct_row])
         T = control_totals_for_this_year.get_attribute(control_total_attribute)[ct_row]
         it = 1
         while True:
             is_considered_idx = where(is_considered)[0]
             weights = all_weights[is_considered_idx]
             weights_sum = float(weights.sum())
             outcome[is_considered_idx] = round_(outcome[is_considered_idx] + T * (weights/weights_sum)).astype('int32')
             if capacity_attribute is None:
                 break
             diff = outcome[is_considered_idx] - C[is_considered_idx]
             outcome[is_considered_idx] = clip(outcome[is_considered_idx], 0, C[is_considered_idx])
             if it == 1 and C[is_considered_idx].sum() < T:
                 logger.log_warning("Control total %s cannot be met due to a capacity restriction of %s" % (T, C[is_considered_idx].sum()))
             T = where(diff < 0, 0, diff).sum()
             if T <= 0:
                 break
             is_considered = logical_and(is_considered, outcome < C)
             it += 1
     if add_quantity and (outcome_attribute in dataset.get_known_attribute_names()):
         dataset.modify_attribute(name=outcome_attribute, data=outcome+dataset.get_attribute(outcome_attribute))
         logger.log_status('New values added to the attribute %s of dataset %s.' % (outcome_attribute, dataset.get_dataset_name()))
     else:
         dataset.add_primary_attribute(name=outcome_attribute, data=outcome)
         logger.log_status('New values stored into attribute %s of dataset %s.' % (outcome_attribute, dataset.get_dataset_name()))
     dataset.flush_attribute(outcome_attribute)
     return outcome
Exemple #19
0
    def run(self, realestate_dataset,
            year=None, 
            occupied_spaces_variable="occupied_units",
            total_spaces_variable="total_units",
            target_attribute_name='target_vacancy_rate',
            sample_from_dataset = None,
            sample_filter="",
            reset_attribute_value={}, 
            year_built = 'year_built',
            dataset_pool=None,
            append_to_realestate_dataset = False,
            table_name = "development_projects",
            dataset_name = "development_project",
            id_name = 'development_project_id',
            **kwargs):
        """         
        sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning
        append_to_realestate_dataset - whether to append the new dataset to realestate_dataset
        """
        
        if self.target_vancy_dataset is None:
            raise RuntimeError, "target_vacancy_rate dataset is unspecified."
        
        if not sample_from_dataset:
            sample_from_dataset = realestate_dataset
            
        #if dataset_pool is None:
        #    dataset_pool = SessionConfiguration().get_dataset_pool()
        alldata = dataset_pool.get_dataset('alldata')
        unit_names = dataset_pool.get_dataset('building_type').get_attribute('unit_name')
        sqft_per_job = dataset_pool.get_dataset('building_sqft_per_job')
        zones = realestate_dataset.compute_variables("building.disaggregate(parcel.zone_id)")
        type_ids = realestate_dataset.get_attribute("building_type_id")
        building_sqft_per_job_table = sqft_per_job.get_building_sqft_as_table(zones.max(), type_ids.max())
        if year is None:
            year = SimulationState().get_current_time()
        this_year_index = where(self.target_vancy_dataset.get_attribute('year')==year)[0]
        target_vacancy_for_this_year = DatasetSubset(self.target_vancy_dataset, this_year_index)
        
        column_names = list(set( self.target_vancy_dataset.get_known_attribute_names() ) - set( [ target_attribute_name, occupied_spaces_variable, total_spaces_variable, 'year', '_hidden_id_'] ))
        column_names.sort(reverse=True)
        column_values = dict([ (name, target_vacancy_for_this_year.get_attribute(name)) for name in column_names + [target_attribute_name]])
        
        independent_variables = list(set([re.sub('_max$', '', re.sub('_min$', '', col)) for col in column_names]))
        dataset_known_attributes = realestate_dataset.get_known_attribute_names()
        sample_dataset_known_attributes = sample_from_dataset.get_known_attribute_names()
        for variable in independent_variables:
            if variable not in dataset_known_attributes:
                realestate_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)
            if variable not in sample_dataset_known_attributes:
                sample_from_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)
                
        dataset_known_attributes = realestate_dataset.get_known_attribute_names() #update after compute
        if sample_filter:
            short_name = VariableName(sample_filter).get_alias()
            if short_name not in dataset_known_attributes:
                filter_indicator = sample_from_dataset.compute_variables(sample_filter, dataset_pool=dataset_pool)
            else:
                filter_indicator = sample_from_dataset.get_attribute(short_name)
        else:
            filter_indicator = 1
                
        sampled_index = array([], dtype=int32)

        #log header
        if PrettyTable is not None:
            status_log = PrettyTable()
            status_log.set_field_names(column_names + ["actual", "target", "difference", "action"])
        else:
            logger.log_status("\t".join(column_names + ["actual", "target", "difference", "action"]))
        error_log = ''
        for index in range(target_vacancy_for_this_year.size()):
            this_sampled_index = array([], dtype=int32)
            indicator = ones( realestate_dataset.size(), dtype='bool' )
            sample_indicator = ones( sample_from_dataset.size(), dtype='bool' )
            criterion = {}   # for logging
            for attribute in independent_variables:
                if attribute in dataset_known_attributes:
                    dataset_attribute = realestate_dataset.get_attribute(attribute)
                    sample_attribute = sample_from_dataset.get_attribute(attribute)
                else:
                    raise ValueError, "attribute %s used in target vacancy dataset can not be found in dataset %s" % (attribute, realestate_dataset.get_dataset_name())
                
                if attribute + '_min' in column_names:
                    amin = target_vacancy_for_this_year.get_attribute(attribute+'_min')[index] 
                    criterion.update({attribute + '_min':amin})
                    if amin != -1:
                        indicator *= dataset_attribute >= amin
                        sample_indicator *= sample_attribute >= amin
                if attribute + '_max' in column_names: 
                    amax = target_vacancy_for_this_year.get_attribute(attribute+'_max')[index]
                    criterion.update({attribute + '_max':amax}) 
                    if amax != -1:
                        indicator *= dataset_attribute <= amax
                        sample_indicator *= sample_attribute <= amax
                if attribute in column_names: 
                    aval = column_values[attribute][index] 
                    criterion.update({attribute:aval}) 
                    if aval == -1:
                        continue
                    elif aval == -2:  ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column
                        indicator *= logical_not(ismember(dataset_attribute, column_values[attribute]))
                        sample_indicator *= logical_not(ismember(sample_attribute, column_values[attribute]))
                    else:
                        indicator *= dataset_attribute == aval
                        sample_indicator *= sample_attribute == aval
                        
            this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable
            ## total/occupied_spaces_variable can be specified either as a universal name for all realestate 
            ## or in targe_vacancy_rate dataset for each vacancy category
            if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_occupied_spaces_variable = target_vacancy_for_this_year.get_attribute(occupied_spaces_variable)[index]

            if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_total_spaces_variable = target_vacancy_for_this_year.get_attribute(total_spaces_variable)[index]
            
            logger.be_quiet() #temporarily disable logging
            realestate_dataset.compute_one_variable_with_unknown_package(this_occupied_spaces_variable, dataset_pool=dataset_pool)
            realestate_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool)
            sample_from_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool)
            if unit_names[index]=="residential_units":
                num_units = alldata.compute_variables("alldata.aggregate_all(household.building_type_id==%s)" % (index+1))
                #persons = household_set.compute_variables("%s.number_of_agents(%s)" % (hh_ds_name, person_ds_name), resources=resources)
                num_units = num_units[0]
            else:
                num_units = alldata.compute_variables("alldata.aggregate_all(job.disaggregate(employment_submarket.building_type_id)==%s)" % (index+1))
                num_units = num_units * building_sqft_per_job_table[1, (index+1)]
                num_units = num_units[0]
            #need to make sure that job empsubmarket doesn't rely on building...
            #Must do non-home-based jobs only and then multiply by building_sqft
            logger.talk()
            
            actual_num = (indicator * realestate_dataset.get_attribute(this_total_spaces_variable)).sum()
            #target_num = int(round( (indicator * realestate_dataset.get_attribute(this_occupied_spaces_variable)).sum() /\
            target_num = int(round( num_units /\
                                    (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]) 
                            ))
            diff = target_num - actual_num
            if diff > 0:
                total_spaces_in_sample_dataset = sample_from_dataset.get_attribute(this_total_spaces_variable)
                legit_index = where(logical_and(sample_indicator, filter_indicator) * total_spaces_in_sample_dataset > 0)[0]
                if legit_index.size > 0:
                    mean_size = total_spaces_in_sample_dataset[legit_index].mean()
                    num_of_projects_to_sample = int( diff / mean_size )
                    ##sampled at least 1 project when diff > 0, otherwise it is a endless loop when num_of_projects_to_sample = 0
                    num_of_projects_to_sample = num_of_projects_to_sample if num_of_projects_to_sample > 0 else 1
                    while total_spaces_in_sample_dataset[this_sampled_index].sum() < diff:
                        lucky_index = sample_replace(legit_index, num_of_projects_to_sample)
                        this_sampled_index = concatenate((this_sampled_index, lucky_index))
                    this_sampled_index = this_sampled_index[0:(1+searchsorted(cumsum(total_spaces_in_sample_dataset[this_sampled_index]), diff))]
                    sampled_index = concatenate((sampled_index, this_sampled_index))
                else:
                    error_log += "There is nothing to sample from %s and no new development will happen for " % sample_from_dataset.get_dataset_name() + \
                              ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n'
            #if diff < 0: #TODO demolition; not yet supported
            
            ##log status
            action = "0"
            if this_sampled_index.size > 0:
                action_num = total_spaces_in_sample_dataset[this_sampled_index].sum()
                if diff > 0: action = "+" + str(action_num)
                if diff < 0: action = "-" + str(action_num)
            cat = [ str(criterion[col]) for col in column_names]
            cat += [str(actual_num), str(target_num), str(diff), action]
            
            if PrettyTable is not None:
                status_log.add_row(cat)
            else:                
                logger.log_status("\t".join(cat))
            
        if PrettyTable is not None:
            logger.log_status("\n" + status_log.get_string())
        if error_log:
            logger.log_error(error_log)
            
        result_data = {}
        result_dataset = None
        index = array([], dtype='int32')
        if sampled_index.size > 0:
            ### ideally duplicate_rows() is all needed to add newly cloned rows
            ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data
            ##realestate_dataset.duplicate_rows(sampled_index)
            result_data.setdefault(year_built, resize(year, sampled_index.size).astype('int32'))
            ## also add 'independent_variables' to the new dataset
            for attribute in set(sample_from_dataset.get_primary_attribute_names() + independent_variables):
                if reset_attribute_value.has_key(attribute):
                    result_data[attribute] = resize(array(reset_attribute_value[attribute]), sampled_index.size)
                else:
                    result_data[attribute] = sample_from_dataset.get_attribute_by_index(attribute, sampled_index)
        
            if id_name and result_data and id_name not in result_data:
                result_data[id_name] = arange(sampled_index.size, dtype='int32') + 1
        
            storage = StorageFactory().get_storage('dict_storage')
            storage.write_table(table_name=table_name, table_data=result_data)
            
            result_dataset = Dataset(id_name = id_name,
                                      in_storage = storage,
                                      in_table_name = table_name,
                                      dataset_name = dataset_name
                                      )
            index = arange(result_dataset.size())
        
            
        if append_to_realestate_dataset:
            if len(result_data) > 0:
                index = realestate_dataset.add_elements(result_data, require_all_attributes=False,
                                                        change_ids_if_not_unique=True)                
            result_dataset = realestate_dataset
        
        return (result_dataset, index)
Exemple #20
0
    def run(self, in_storage, business_dsname="business"):
        dataset_pool = DatasetPool(storage=in_storage, package_order=['psrc_parcel', 'urbansim_parcel', 'urbansim', 'opus_core'] )
        seed(1)
        allbusinesses = dataset_pool.get_dataset(business_dsname)
        parcels = dataset_pool.get_dataset('parcel')
        buildings = dataset_pool.get_dataset('building')
        parcels.compute_variables(["urbansim_parcel.parcel.residential_units", "number_of_buildings = parcel.number_of_agents(building)", 
                                   "non_residential_sqft = (parcel.aggregate(building.non_residential_sqft)).astype(int32)",
                                   "number_of_res_buildings = parcel.aggregate(urbansim_parcel.building.is_residential)",
                                   "number_of_nonres_buildings = parcel.aggregate(urbansim_parcel.building.is_non_residential)",
                                   "number_of_mixed_use_buildings = parcel.aggregate(urbansim_parcel.building.is_generic_building_type_6)"
                                   ], 
                                  dataset_pool=dataset_pool)
        restypes = [12, 4, 19, 11, 34, 10, 33]
        reslutypes = [13,14,15,24]
        is_valid_business = ones(allbusinesses.size(), dtype='bool8')
        parcels_not_matched = logical_and(in1d(allbusinesses["parcel_id"], parcels.get_id_attribute(), invert=True), allbusinesses["parcel_id"] > 0)
        if(parcels_not_matched.sum() > 0):
            is_valid_business[where(parcels_not_matched)] = False
            logger.log_warning(message="No parcel exists for %s businesses (%s jobs)" % (parcels_not_matched.sum(), 
                                                                                         allbusinesses[self.number_of_jobs_attr][where(parcels_not_matched)].sum()))
        zero_parcel = allbusinesses["parcel_id"]<=0
        if zero_parcel.sum() > 0:
            is_valid_business[where(zero_parcel)] = False
            logger.log_warning(message="%s businesses (%s jobs) located on zero parcel_id" % (zero_parcel.sum(), 
                                                                                         allbusinesses[self.number_of_jobs_attr][where(zero_parcel)].sum()))            
            
        zero_size = logical_and(is_valid_business, allbusinesses[self.number_of_jobs_attr].round() == 0)
        if(sum(zero_size) > 0):
            is_valid_business[where(zero_size)] = False
            logger.log_warning(message="%s businesses are of size 0." % sum(zero_size))
        
        businesses = DatasetSubset(allbusinesses, index=where(is_valid_business)[0])
            
        parcels.add_attribute(name="number_of_workplaces", data=parcels.sum_dataset_over_ids(businesses, constant=1))
        
        has_single_res_buildings = logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_res_buildings"] == 1) # 1 (1 residential)
        parcels.add_attribute(data=has_single_res_buildings.astype("int32"), name="buildings_code")
        has_mult_res_buildings = logical_and(parcels["number_of_buildings"] > 1,  parcels["number_of_nonres_buildings"] == 0) # 2 (mult residential)
        parcels.modify_attribute("buildings_code", data=2*ones(has_mult_res_buildings.sum()), index=where(has_mult_res_buildings)) 
        has_single_nonres_buildings = logical_and(logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_nonres_buildings"] == 1), parcels["number_of_mixed_use_buildings"] == 0) # 3 (1 non-res)
        parcels.modify_attribute("buildings_code", data=3*ones(has_single_nonres_buildings.sum()), index=where(has_single_nonres_buildings)) 
        has_mult_nonres_buildings = logical_and(logical_and(parcels["number_of_buildings"] > 1, parcels["number_of_res_buildings"] == 0), parcels["number_of_mixed_use_buildings"] == 0) # 4 (mult non-res)
        parcels.modify_attribute("buildings_code", data=4*ones(has_mult_nonres_buildings.sum()), index=where(has_mult_nonres_buildings))
        has_single_mixed_buildings = logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_mixed_use_buildings"] == 1) # 5 (1 mixed-use)
        parcels.modify_attribute("buildings_code", data=5*ones(has_single_mixed_buildings.sum()), index=where(has_single_mixed_buildings))
        has_mult_mixed_buildings = logical_and(parcels["number_of_buildings"] > 1, 
                                               logical_or(logical_and(parcels["number_of_res_buildings"] > 0, parcels["number_of_nonres_buildings"] > 0), 
                                                          logical_or(parcels["number_of_mixed_use_buildings"] > 1, 
                                                                     logical_and(parcels["number_of_res_buildings"] == 0, 
                                                                                 parcels["number_of_mixed_use_buildings"] > 0)))) # 6
        parcels.modify_attribute("buildings_code", data=6*ones(has_mult_mixed_buildings.sum()), index=where(has_mult_mixed_buildings))
        has_no_building_res_lutype = logical_and(parcels["number_of_buildings"] == 0, in1d(parcels["land_use_type_id"], reslutypes)) # 7 (vacant with res LU type)
        parcels.modify_attribute("buildings_code", data=7*ones(has_no_building_res_lutype.sum()), index=where(has_no_building_res_lutype)) 
        has_no_building_nonres_lutype = logical_and(parcels["number_of_buildings"] == 0, in1d(parcels["land_use_type_id"], reslutypes)==0) # 8 (vacant with non-res LU type)
        parcels.modify_attribute("buildings_code", data=8*ones(has_no_building_nonres_lutype.sum()), index=where(has_no_building_nonres_lutype))
        
        business_sizes = businesses[self.number_of_jobs_attr].round().astype("int32") 
        business_location = {}
        business_location1wrkpl = zeros(businesses.size(), dtype="int32")
        business_location1wrkplres = zeros(businesses.size(), dtype="int32")
        # sample one building for cases when sampling is required.
        for ibusid in range(businesses.size()):
            idx = where(buildings['parcel_id'] == businesses['parcel_id'][ibusid])[0]
            bldgids = buildings['building_id'][idx]
            business_location[businesses['business_id'][ibusid]] = bldgids
            if bldgids.size == 1:
                business_location1wrkpl[ibusid] = bldgids[0]
            elif bldgids.size > 1:
                business_location1wrkpl[ibusid] = bldgids[sample_noreplace(arange(bldgids.size), 1)]
                if buildings['residential_units'][idx].sum() > 0:
                    # Residential buildings are sampled with probabilities proportional to residential units
                    business_location1wrkplres[ibusid] = bldgids[probsample_noreplace(arange(bldgids.size), 1, prob_array=buildings['residential_units'][idx])]
                else:
                    business_location1wrkplres[ibusid] = business_location1wrkpl[ibusid]
        
        home_based = zeros(business_sizes.sum(), dtype="bool8")
        job_building_id = zeros(business_sizes.sum(), dtype="int32")
        job_array_labels = businesses['business_id'].repeat(business_sizes)
        job_assignment_case = zeros(business_sizes.sum(), dtype="int32")
        processed_bindicator = zeros(businesses.size(), dtype="bool8")
        logger.log_status("Total number of jobs: %s" % home_based.size)
        
        # 1. up to 5 workers-business in 1 residential building
        idx_single_worker = where(business_sizes < 6)[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_single_worker])
        idx_sngl_wrk_1bld_fit = where(bcode == 1)[0]
        jidx = in1d(job_array_labels, businesses['business_id'][idx_single_worker[idx_sngl_wrk_1bld_fit]])
        home_based[jidx] = True
        job_building_id[jidx] = business_location1wrkpl[idx_single_worker[idx_sngl_wrk_1bld_fit]].repeat(business_sizes[idx_single_worker[idx_sngl_wrk_1bld_fit]])
        job_assignment_case[jidx] = 1
        processed_bindicator[idx_single_worker[idx_sngl_wrk_1bld_fit]] = True
        logger.log_status("1. %s jobs (%s businesses) set as home-based due to <6 worker x 1 residential building fit." % (
            business_sizes[idx_single_worker[idx_sngl_wrk_1bld_fit]].sum(), idx_sngl_wrk_1bld_fit.size))
        
        # 2. up to 5 workers-business in multiple residential buildings
        idx_single_worker = where(logical_and(processed_bindicator==0, business_sizes < 6))[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_single_worker])
        idx_sngl_wrk_multbld_fit = where(bcode == 2)[0]
        jidx = in1d(job_array_labels, businesses['business_id'][idx_single_worker[idx_sngl_wrk_multbld_fit]])
        home_based[jidx] = True
        job_building_id[jidx] = business_location1wrkplres[idx_single_worker[idx_sngl_wrk_multbld_fit]].repeat(business_sizes[idx_single_worker[idx_sngl_wrk_multbld_fit]])
        job_assignment_case[jidx] = 2
        processed_bindicator[idx_single_worker[idx_sngl_wrk_multbld_fit]] = True
        logger.log_status("2. %s jobs (%s businesses) set as home-based due to single worker x multiple residential buildings fit." % (
            business_sizes[idx_single_worker[idx_sngl_wrk_multbld_fit]].sum(), idx_sngl_wrk_multbld_fit.size))
               
        # 3. single worker in single non-res building (not mixed-use)
        idx_single_worker = where(logical_and(processed_bindicator==0, business_sizes == 1))[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_single_worker])        
        idx_sngl_wrk_single_nonres_fit = where(bcode == 3)[0]
        jidx = in1d(job_array_labels, businesses['business_id'][idx_single_worker[idx_sngl_wrk_single_nonres_fit]])
        job_building_id[jidx] = business_location1wrkpl[idx_single_worker[idx_sngl_wrk_single_nonres_fit]] 
        job_assignment_case[jidx] = 3
        processed_bindicator[idx_single_worker[idx_sngl_wrk_single_nonres_fit]] = True
        logger.log_status("3. %s jobs could be placed due to single worker x single non-res building fit." % idx_sngl_wrk_single_nonres_fit.size)        
        
        # 4. single worker in multiple non-res building (not mixed-use)
        idx_single_worker = where(logical_and(processed_bindicator==0, business_sizes == 1))[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_single_worker])        
        idx_sngl_wrk_mult_nonres_fit = where(bcode == 4)[0]
        jidx = in1d(job_array_labels, businesses['business_id'][idx_single_worker[idx_sngl_wrk_mult_nonres_fit]])
        job_building_id[jidx] = business_location1wrkpl[idx_single_worker[idx_sngl_wrk_mult_nonres_fit]]
        job_assignment_case[jidx] = 4
        processed_bindicator[idx_single_worker[idx_sngl_wrk_mult_nonres_fit]] = True
        logger.log_status("4. %s jobs could be placed due to single worker x multiple non-res building fit." % idx_sngl_wrk_mult_nonres_fit.size)        

                
        # 5. single worker in single mixed-use building
        idx_single_worker = where(logical_and(processed_bindicator==0, business_sizes == 1))[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_single_worker])        
        idx_sngl_wrk_smu_fit = where(bcode == 5)[0]
        jidx = in1d(job_array_labels, businesses['business_id'][idx_single_worker[idx_sngl_wrk_smu_fit]])
        job_building_id[jidx] = business_location1wrkpl[idx_single_worker[idx_sngl_wrk_smu_fit]]  
        job_assignment_case[jidx] = 5
        processed_bindicator[idx_single_worker[idx_sngl_wrk_smu_fit]] = True
        logger.log_status("5. %s jobs in single worker x single mixed-use building." % idx_sngl_wrk_smu_fit.size)          
        
        # 6. single worker in multiple mixed-type buildings
        idx_single_worker = where(logical_and(processed_bindicator==0, business_sizes == 1))[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_single_worker])        
        idx_sngl_wrk_mmu_fit = where(bcode == 6)[0]
        jidx = in1d(job_array_labels, businesses['business_id'][idx_single_worker[idx_sngl_wrk_mmu_fit]])
        job_building_id[jidx] = business_location1wrkpl[idx_single_worker[idx_sngl_wrk_mmu_fit]]
        bldtype = buildings.get_attribute_by_id("building_type_id", business_location1wrkpl[idx_single_worker[idx_sngl_wrk_mmu_fit]])
        is_bldtype_res = in1d(bldtype, restypes)
        home_based[in1d(job_array_labels, businesses['business_id'][idx_single_worker[idx_sngl_wrk_mmu_fit]][where(is_bldtype_res)])] = True
        job_assignment_case[jidx] = 6
        processed_bindicator[idx_single_worker[idx_sngl_wrk_mmu_fit]] = True
        logger.log_status("6. %s jobs in single worker x multiple mixed-type buildings. %s jobs classified as home-based." % (idx_sngl_wrk_mmu_fit.size, is_bldtype_res.sum()))            

        # 7. up to 5 workers-business in residential parcel with no building
        idx_single_worker = where(logical_and(processed_bindicator==0, business_sizes < 6))[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_single_worker]) 
        idx_sngl_wrk_vacant_res = where(bcode == 7)[0]
        jidx = in1d(job_array_labels, businesses['business_id'][idx_single_worker[idx_sngl_wrk_vacant_res]])
        job_assignment_case[jidx] = 7
        home_based[jidx] = True
        processed_bindicator[idx_single_worker[idx_sngl_wrk_vacant_res]] = True
        logger.log_status("7. %s jobs (%s businesses of size <6) could not be placed due to non-existing buildings in parcels with residential LU type." % (
            business_sizes[idx_single_worker[idx_sngl_wrk_vacant_res]].sum(), idx_sngl_wrk_vacant_res.size))
        
        

        # 9. 6+ workers in single residential building: do not place - will go into ELCM
        idx_more_workers = where(logical_and(processed_bindicator==0, business_sizes > 5))[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_more_workers])
        idx_sngl_wrk_fit = where(bcode == 1)[0]
        jidx = in1d(job_array_labels, businesses['business_id'][idx_more_workers[idx_sngl_wrk_fit]])
        #job_building_id[jidx] = business_location1wrkpl[idx_more_workers[idx_sngl_wrk_fit]].repeat(business_sizes[idx_more_workers[idx_sngl_wrk_fit]])
        #home_based[jidx] = True
        job_assignment_case[jidx] = 9
        processed_bindicator[idx_more_workers[idx_sngl_wrk_fit]] = True        
        logger.log_status("9. %s jobs (%s businesses) in 6+ worker x single residential building. Not placed." % (
            business_sizes[idx_more_workers[idx_sngl_wrk_fit]].sum(), idx_sngl_wrk_fit.size))
        
        # 10. 6+ workers in multiple residential building: do not place - will go into ELCM
        idx_more_workers = where(logical_and(processed_bindicator==0, business_sizes > 5))[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_more_workers])
        idx_sngl_wrk_fit = where(bcode == 2)[0]
        jidx = in1d(job_array_labels, businesses['business_id'][idx_more_workers[idx_sngl_wrk_fit]])
        job_assignment_case[jidx] = 10
        processed_bindicator[idx_more_workers[idx_sngl_wrk_fit]] = True
        # sample buildings to businesses by parcels 
        #bpcls = unique(businesses["parcel_id"][idx_more_workers[idx_sngl_wrk_fit]])
        #for ipcl in range(bpcls.size):
            #bidx = where(buildings['parcel_id'] == bpcls[ipcl])[0]
            #bldgids = buildings['building_id'][bidx]
            #bussids = businesses['business_id'][businesses["parcel_id"] == bpcls[ipcl]]
            ## multiply by units for sampling prop. to units rather than buildings
            #bldgids = bldgids.repeat(maximum(1, buildings['residential_units'][bidx].astype('int32'))) 
            #if bldgids.size < bussids.size:
                #bldarray = bldgids.repeat(1+ceil((bussids.size - bldgids.size)/float(bldgids.size)) )
            #else:
                #bldarray = bldgids
            #shuffle(bldarray) # randomly reorder in-place
            #for ib in range(bussids.size):
                #jidx = where(job_array_labels == bussids[ib])
                #job_building_id[jidx] = bldarray[ib]
                #home_based[jidx] = True
                #job_assignment_case[jidx] = 10
        logger.log_status("10. %s jobs (%s businesses) in 6+ worker x multiple residential building. Not placed." % (
            business_sizes[idx_more_workers[idx_sngl_wrk_fit]].sum(), idx_sngl_wrk_fit.size))        


        # 11. single workplace, 2+ workers in single non-res or mixed-use building (11.)
        idx_2plus_workers = where(logical_and(processed_bindicator==0, business_sizes > 1))[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_2plus_workers])
        workplace_filter = parcels.get_attribute_by_id("number_of_workplaces", businesses["parcel_id"][idx_2plus_workers])
        idx_sngl_wrkplace_2plus_workers = where(logical_and(logical_or(bcode==3, bcode==5), workplace_filter==1))[0]
        which_labels = where(in1d(job_array_labels, businesses['business_id'][idx_2plus_workers[idx_sngl_wrkplace_2plus_workers]]))[0]
        job_building_id[which_labels] = business_location1wrkpl[idx_2plus_workers[idx_sngl_wrkplace_2plus_workers]].repeat(business_sizes[idx_2plus_workers[idx_sngl_wrkplace_2plus_workers]])   
        job_assignment_case[which_labels] = 11
        processed_bindicator[idx_2plus_workers[idx_sngl_wrkplace_2plus_workers]] = True
        logger.log_status("11. %s jobs (%s businesses) could be placed due to single workplace x 2+ workers x single non-res/mixed-use building fit." % (
            business_sizes[idx_2plus_workers[idx_sngl_wrkplace_2plus_workers]].sum(), idx_sngl_wrkplace_2plus_workers.size))
        
        # 12. single workplace, 2+ workers in multiple mixed-type building
        idx_2plus_workers = where(logical_and(processed_bindicator==0, business_sizes > 1))[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_2plus_workers])
        workplace_filter = parcels.get_attribute_by_id("number_of_workplaces", businesses["parcel_id"][idx_2plus_workers])
        idx_sngl_wrkplace_2plus_workers = where(logical_and(logical_or(bcode==6, bcode==4), workplace_filter==1))[0]
        jidx = in1d(job_array_labels, businesses['business_id'][idx_2plus_workers[idx_sngl_wrkplace_2plus_workers]])
        job_building_id[jidx] = business_location1wrkpl[idx_2plus_workers[idx_sngl_wrkplace_2plus_workers]].repeat(business_sizes[idx_2plus_workers[idx_sngl_wrkplace_2plus_workers]])    
        job_assignment_case[jidx] = 12
        processed_bindicator[idx_2plus_workers[idx_sngl_wrkplace_2plus_workers]] = True
        logger.log_status("12. %s jobs (%s businesses) could be placed due to single workplace x 2+ workers x multiple non-res/mixed building fit." % (
            business_sizes[idx_2plus_workers[idx_sngl_wrkplace_2plus_workers]].sum(), idx_sngl_wrkplace_2plus_workers.size))

        # 13. multiple workplaces, 2+ workers in single non-res or mixed building
        idx_2plus_workers = where(logical_and(processed_bindicator==0, business_sizes > 1))[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_2plus_workers])
        workplace_filter = parcels.get_attribute_by_id("number_of_workplaces", businesses["parcel_id"][idx_2plus_workers])
        idx_mult_wrkplace_2plus_workers = where(logical_and(logical_or(bcode==3, bcode==5), workplace_filter > 1))[0]
        jidx = in1d(job_array_labels, businesses['business_id'][idx_2plus_workers[idx_mult_wrkplace_2plus_workers]])
        job_building_id[jidx] = business_location1wrkpl[idx_2plus_workers[idx_mult_wrkplace_2plus_workers]].repeat(business_sizes[idx_2plus_workers[idx_mult_wrkplace_2plus_workers]])
        job_assignment_case[jidx] = 13
        processed_bindicator[idx_2plus_workers[idx_mult_wrkplace_2plus_workers]] = True
        logger.log_status("13. %s jobs (%s businesses) could be placed due to multiple workplaces x 2+ workers x single non-res/mixed building fit." % (
            business_sizes[idx_2plus_workers[idx_mult_wrkplace_2plus_workers]].sum(), idx_mult_wrkplace_2plus_workers.size))
        
        # 14. multiple workplaces, 2+ workers in multiple non-res or mixed building
        idx_2plus_workers = where(logical_and(processed_bindicator==0, business_sizes > 1))[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_2plus_workers])
        workplace_filter = parcels.get_attribute_by_id("number_of_workplaces", businesses["parcel_id"][idx_2plus_workers])
        idx_mult_wrkplace_2plus_workers = where(logical_and(logical_or(bcode==4, bcode==6), workplace_filter > 1))[0]
        processed_bindicator[idx_2plus_workers[idx_mult_wrkplace_2plus_workers]] = True
        # sample buildings to businesses by parcels 
        bpcls = unique(businesses["parcel_id"][idx_2plus_workers[idx_mult_wrkplace_2plus_workers]])
        hbasedsum = home_based.sum()
        for ipcl in range(bpcls.size):
            bldgids = buildings['building_id'][buildings['parcel_id'] == bpcls[ipcl]]
            bussids = businesses['business_id'][businesses["parcel_id"] == bpcls[ipcl]]
            if bldgids.size < bussids.size:
                bldarray = bldgids.repeat(1+ceil((bussids.size - bldgids.size)/float(bldgids.size)))
            else:
                bldarray = bldgids
            shuffle(bldarray) # randomly reorder in-place
            is_res = in1d(bldarray, restypes)
            for ib in range(bussids.size):
                jidx = where(job_array_labels == bussids[ib])
                job_building_id[jidx] = bldarray[ib]   
                home_based[jidx] = is_res
                job_assignment_case[jidx] = 14
        logger.log_status("14. %s jobs (%s businesses) could be placed due to multiple workplaces x 2+ workers x multiple non-res/mixed building fit. Classify %s jobs as home-based." % (
            business_sizes[idx_2plus_workers[idx_mult_wrkplace_2plus_workers]].sum(), idx_mult_wrkplace_2plus_workers.size, home_based.sum()-hbasedsum))
        
        # 15. 6+ workers in residential parcel with no building
        idx_2plus_workers = where(logical_and(processed_bindicator==0, business_sizes > 5))[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_2plus_workers]) 
        idx_wrk_vacant_res = where(bcode == 7)[0]
        jidx = in1d(job_array_labels, businesses['business_id'][idx_2plus_workers[idx_wrk_vacant_res]])
        job_assignment_case[jidx] = 15
        processed_bindicator[idx_2plus_workers[idx_wrk_vacant_res]] = True
        logger.log_status("15. %s jobs (%s businesses of 6+ workers) could not be placed due to non-existing buildings in parcels with residential LU type." % (
            business_sizes[idx_2plus_workers[idx_wrk_vacant_res]].sum(), idx_wrk_vacant_res.size))
        
        # 16. nonresidential parcel with no building
        idx_any_workers = where(processed_bindicator==0)[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_any_workers]) 
        idx_wrk_vacant_nonres = where(bcode == 8)[0]
        jidx = in1d(job_array_labels, businesses['business_id'][idx_any_workers[idx_wrk_vacant_nonres]])
        job_assignment_case[jidx] = 16
        processed_bindicator[idx_any_workers[idx_wrk_vacant_nonres]] = True
        logger.log_status("16. %s jobs (%s businesses) could not be placed due to non-existing buildings in parcels with rnon-esidential LU type." % (
            business_sizes[idx_any_workers[idx_wrk_vacant_nonres]].sum(), idx_wrk_vacant_nonres.size))        
        
        
        
        # jobs in messy buildings
        idx_worker = where(logical_and(processed_bindicator==0, business_sizes > 0))[0]
        bcode = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"][idx_worker])
        idx_messy_fit = where(bcode == 0)[0]
        processed_bindicator[idx_worker[idx_messy_fit]] = True
        logger.log_status("%s jobs (%s businesses) could not be placed due to messy buildings." % (
            business_sizes[idx_worker[idx_messy_fit]].sum(), idx_messy_fit.size))         

        
        logger.log_status("So far %s (%s percent) home-based jobs." % (home_based.sum(), round(home_based.sum()/(home_based.size/100.),2)))
        logger.log_status("So far %s percent (%s) jobs (%s businesses) processed. %s jobs (%s businesses) remain to be processed." % \
                          (round(business_sizes[processed_bindicator].sum()/(home_based.size/100.),2),
                           business_sizes[processed_bindicator].sum(), processed_bindicator.sum(),
                          business_sizes[logical_not(processed_bindicator)].sum(), business_sizes[logical_not(processed_bindicator)].size))
        
        # create job dataset
        job_data = {"job_id": arange(job_building_id.size)+1,
                    "home_based" : home_based,
                    "building_id": job_building_id,
                    "business_id": job_array_labels,
                    "sector_id": zeros(job_building_id.size),
                    "parcel_id": zeros(job_building_id.size),
                    "assignment_case": job_assignment_case}
        
        for ib in range(businesses.size()):
            idx = where(job_data['business_id'] == businesses['business_id'][ib])
            job_data["sector_id"][idx] = businesses['sector_id'][ib]
            job_data["parcel_id"][idx] = businesses['parcel_id'][ib]

        dictstorage = StorageFactory().get_storage('dict_storage')
        dictstorage.write_table(table_name="jobs", table_data=job_data)        
        return Dataset(in_storage=dictstorage, in_table_name="jobs", dataset_name="job", id_name="job_id")
    def run(self, year=None, dataset_pool=None, **kwargs):
        """
        """
        if dataset_pool is None:
            dataset_pool = SessionConfiguration().get_dataset_pool()

        if year is None:
            year = SimulationState().get_current_time()

        this_year_index = where(
            self.scheduled_events.get_attribute('year') == year)[0]
        scheduled_events_for_this_year = DatasetSubset(self.scheduled_events,
                                                       this_year_index)
        scheduled_events_for_this_year.load_dataset_if_not_loaded()
        column_names = list(
            set(self.scheduled_events.get_known_attribute_names()) - set([
                'year', 'action', 'attribute', 'amount', 'event_id',
                '_hidden_id_'
            ]))
        column_names.sort()
        #        column_values = dict([ (name, scheduled_events_for_this_year.get_attribute(name)) for name in column_names])

        for index in range(scheduled_events_for_this_year.size()):
            indicator = ones(self.dataset.size(), dtype='bool')
            event_attr = {}
            for attribute in column_names:
                if attribute in self.dataset.get_known_attribute_names():
                    dataset_attribute = self.dataset.get_attribute(attribute)
                else:
                    ## this is done inside the loop because some action may delete computed attributes, such as dataset.add_elements()
                    try:
                        dataset_attribute = self.dataset.compute_one_variable_with_unknown_package(
                            attribute, dataset_pool=dataset_pool)
                    except:
                        raise ValueError, "attribute %s used in scheduled events dataset can not be found in dataset %s" % (
                            attribute, self.dataset.get_dataset_name())


#                if attribute in column_names:
                aval = scheduled_events_for_this_year.get_attribute(
                    attribute)[index]
                if aval == -1:
                    continue  # ignore if column value is -1
                else:
                    indicator *= dataset_attribute == aval
                    event_attr.update({attribute: aval})

            #agents in dataset satisfying all conditions are identified by indicator
            legit_index = where(indicator)[0]

            this_event = scheduled_events_for_this_year.get_data_element(index)
            if not hasattr(this_event, 'attribute'):
                action_attr_name = ''
            else:
                action_attr_name = this_event.attribute
            action_function = getattr(self,
                                      '_' + this_event.action.strip().lower())
            action_function(amount=this_event.amount,
                            attribute=action_attr_name,
                            dataset=self.dataset,
                            index=legit_index,
                            data_dict=event_attr)

            self.post_run(self.dataset, legit_index, **kwargs)

        return self.dataset
Exemple #22
0
 def run(self, dataset, outcome_attribute, weight_attribute, 
              control_totals, current_year, control_total_attribute=None, 
              year_attribute='year', capacity_attribute=None, add_quantity=False, dataset_pool=None):
     """'dataset' is a Dataset for which a quantity 'outcome_attribute' is created. The total amount of the quantity is 
     given by the attribute 'control_total_attribute' of the 'control_totals' Dataset. If it is not given, it is assumed 
     to have the same name as 'outcome_attribute'. The 'weight_attribute' of 'dataset' determines the allocation weights.
     The 'control_totals' Dataset contains an attribute 'year' (or alternatively, an attribute given by the 'year_attribute' argument)
     and optionally other attributes that must be known to the 'dataset' (such as a geography). For each row of the control_totals dataset
     for which year matches the 'current_year', the total amount is distributed among the corresponding members of 'dataset' according to weights.
     If a 'capacity_attribute' is given (attribute of 'dataset'), the algorithm removes any allocations that exceeds the capacity and 
     redistributes it among remaining members. The resulting values are appended to 'dataset' as 'outcome_attribute' (as primary attribute).
     If add_quantity is True and the 'outcome_attribute' exists in dataset, the resulting values are added to the current values of 
     'outcome_attribute'.
     """
     python_version = '2.%s' % (sys.version_info[1])
     ct_attr = control_totals.get_known_attribute_names()
     if year_attribute not in ct_attr:
         raise StandardError, "Year attribute '%s' must be a known attribute of the control totals dataset." % year_attribute
     ct_attr.remove(year_attribute)
     if control_total_attribute is None:
         control_total_attribute = outcome_attribute
     if control_total_attribute not in ct_attr:
         raise StandardError, "Attribute '%s' must be a known attribute of the control totals dataset." % control_total_attribute
     ct_attr.remove(control_total_attribute)
     if control_totals._is_hidden_id():
         ct_attr.remove(control_totals.id_name()[0])
         
     # compute weights and other attributes necessary for allocation
     attrs_to_compute = [weight_attribute] + ct_attr
     if capacity_attribute is not None:
         attrs_to_compute.append(capacity_attribute)
     for attr in attrs_to_compute:
         try:
             dataset.compute_variables(attr, dataset_pool=dataset_pool)
         except:
             dataset.compute_one_variable_with_unknown_package(attr, dataset_pool=dataset_pool)
     
     # create subset of control totals for the current year
     year_index = where(control_totals.get_attribute(year_attribute) == current_year)[0]
     if year_index.size <= 0:
         logger.log_warning("No control total for year %s" % current_year)
         return None
     control_totals_for_this_year = DatasetSubset(control_totals, year_index)
     
     # check capacity
     if capacity_attribute is not None:
         if dataset.get_attribute(capacity_attribute).sum() < control_totals_for_this_year.get_attribute(control_total_attribute).sum():
             logger.log_warning("Capacity (%s) is smaller than the amount to allocate (%s)." % (dataset.get_attribute(capacity_attribute).sum(), 
                                                                                               control_totals_for_this_year.get_attribute(control_total_attribute).sum()))
         C = dataset.get_attribute(capacity_attribute).astype('int32')
         
     all_weights = dataset.get_attribute(weight_attribute)
     outcome = zeros(dataset.size(), dtype='int32')
     for ct_row in range(control_totals_for_this_year.size()):
         is_considered = ones(dataset.size(), dtype='bool8')
         for characteristics in ct_attr:
             is_considered = logical_and(is_considered, dataset.get_attribute(characteristics) == control_totals_for_this_year.get_attribute(characteristics)[ct_row])
         T = control_totals_for_this_year.get_attribute(control_total_attribute)[ct_row]
         it = 1
         while True:
             is_considered_idx = where(is_considered)[0]
             # Make sure to jump out of the loop when no index is considered, or else cause the index out of boundary error in cum_prob[-1]
             if is_considered_idx.size < 1:
                 break
             # End of Make sure
             weights = all_weights[is_considered_idx]
             weights_sum = float(weights.sum())
             # Add: Do the prob sample for exact match --Hanyi
             prob_array = (weights*1.0/weights_sum).astype('float64')
             cum_prob = cumsum(prob_array, dtype='float64')
             sample_prob = uniform(0, cum_prob[-1], T)
             sampled_index = searchsorted(cum_prob, sample_prob)
             sampled_index = sampled_index.astype('int32')
             # due to precision problems, searchsorted could return index = cum_prob.size
             sampled_index = clip(sampled_index, 0, cum_prob.size-1)
             if python_version == '2.6':
                 # Start: Python 2.6 version
                 count = zeros(prob_array.size).astype('int32')
                
                 for i in range(0,prob_array.size):
                     sub_indx = where(sampled_index==i)[0]
                     count[i] = sub_indx.size
                 # End: Python 2.6 version
             elif python_version == '2.7':
                 # Start: Python 2.7 version
                 from collections import Counter
                 dict_count_ini = dict([(x,0) for x in range(prob_array.size)])
                 ## Alternative init dict method
                 #from numpy import arange
                 #dict_count_ini_keys = arange(prob_array.size).astype('int32')
                 #dict_count_ini_values = zeros(prob_array.size).astype('int32')
                 #dict_count_ini = dict(zip(dict_count_ini_keys, dict_count_ini_values))
                 ## Alternative End
                 dict_count = Counter(sampled_index)
                 dict_count_ini.update(dict(dict_count))
                 count = array(dict(sorted(dict_count_ini.items())).values())
                 # End: Python 2.7 version
             
             outcome[is_considered_idx] = outcome[is_considered_idx] + count
             # End
             # Comment out the line below --Hanyi
             #outcome[is_considered_idx] = round_(outcome[is_considered_idx] + T * (weights/weights_sum)).astype('int32')
             if capacity_attribute is None:
                 break
             diff = outcome[is_considered_idx] - C[is_considered_idx]
             outcome[is_considered_idx] = clip(outcome[is_considered_idx], 0, C[is_considered_idx])
             if it == 1 and C[is_considered_idx].sum() < T:
                 logger.log_warning("Control total %s cannot be met due to a capacity restriction of %s" % (T, C[is_considered_idx].sum()))
             T = where(diff < 0, 0, diff).sum()
             if T <= 0:
                 break
             is_considered = logical_and(is_considered, outcome < C)
             it += 1
     if add_quantity and (outcome_attribute in dataset.get_known_attribute_names()):
         dataset.modify_attribute(name=outcome_attribute, data=outcome+dataset.get_attribute(outcome_attribute))
         logger.log_status('New values added to the attribute %s of dataset %s.' % (outcome_attribute, dataset.get_dataset_name()))
     else:
         dataset.add_primary_attribute(name=outcome_attribute, data=outcome)
         logger.log_status('New values stored into attribute %s of dataset %s.' % (outcome_attribute, dataset.get_dataset_name()))
     dataset.flush_attribute(outcome_attribute)
     return outcome
    def prepare_for_run(self, dataset_pool, 
                        create_proposal_set=True,
                        parcel_filter_for_new_development=None, 
                        parcel_filter_for_redevelopment=None, 
                        template_filter=None,
                        spec_replace_module_variable_pair=None,
                        proposed_units_variable="urbansim_parcel.development_project_proposal.units_proposed",
                        **kwargs):
        """create development project proposal dataset from parcels and development templates.
        spec_replace_module_variable_pair is a tuple with two elements: module name, variable within the module
        that contans a dictionary of model variables to be replaced in the specification.
        """
        specification, coefficients, dummy = RegressionModel.prepare_for_run(self, **kwargs)
        try:
            existing_proposal_set_parent = dataset_pool.get_dataset('development_project_proposal')
            if 'units_proposed' not in existing_proposal_set_parent.get_known_attribute_names():
                ## compute 'units_proposed' and add it as a primary attribute (as it may be missing when loaded from the base_year_data)
                units_proposed = existing_proposal_set_parent.compute_variables(proposed_units_variable, dataset_pool)
                existing_proposal_set_parent.add_attribute(units_proposed, "units_proposed", AttributeType.PRIMARY)
            
            #load proposals whose status_id are not of id_tentative or id_not_available
            available_idx = where(in1d(existing_proposal_set_parent.get_attribute("status_id"), 
                                       array([DevelopmentProjectProposalDataset.id_active,
                                              DevelopmentProjectProposalDataset.id_proposed,
                                              DevelopmentProjectProposalDataset.id_planned,
                                              DevelopmentProjectProposalDataset.id_with_velocity])))[0]
            existing_proposal_set = DatasetSubset(existing_proposal_set_parent, available_idx)
            # Code updated by Hanyi Li, MAG 6/8/2010
            # Replacing the cached 'development_project_proposal' dataset with
            # the filtered dataset 'existing_proposal_set'
            dataset_pool.replace_dataset(existing_proposal_set_parent.get_dataset_name(), existing_proposal_set)
        except:
            existing_proposal_set = None
        
        parcels = dataset_pool.get_dataset('parcel')
        templates = dataset_pool.get_dataset('development_template')

        # It is important that during this method no variable flushing happens, since
        # we create datasets of the same name for different purposes (new development and redevelopment)
        # and flushing would mix them up
        flush_variables_current = SimulationState().get_flush_datasets()
        SimulationState().set_flush_datasets(False)
        
        # Code added by Jesse Ayers, MAG, 9/14/2009
        # Getting an index of parcels that have actively developing projects (those on a velocity function)
        # and making sure that new proposals are not generated for them
        if existing_proposal_set and existing_proposal_set.size()>0:
            parcels_with_proposals = existing_proposal_set.get_attribute('parcel_id')
            parcels_with_proposals_idx = parcels.get_id_index(parcels_with_proposals)
            if parcel_filter_for_new_development is not None:
                if parcel_filter_for_new_development[parcel_filter_for_new_development.find('=')+1] == '=':
                    filter = 'flter = numpy.logical_and(parcel.number_of_agents(development_project_proposal) == 0, %s)' % parcel_filter_for_new_development
                else:
                    parcel_filter_for_new_development = parcel_filter_for_new_development[parcel_filter_for_new_development.find('=')+1:].lstrip()
                    filter = 'flter = numpy.logical_and(parcel.number_of_agents(development_project_proposal) == 0, %s)' % parcel_filter_for_new_development
                index1 = where(parcels.compute_variables(filter))[0]

        else:
            if parcel_filter_for_new_development is not None:
                index1 = where(parcels.compute_variables(parcel_filter_for_new_development))[0]
            else:
                index1 = None
            
        if template_filter is not None:
            try:
                index2 = where(templates.compute_variables(template_filter))[0]
            except Exception, e:
                logger.log_warning( "template_filter is set to %s, but there is an error when computing it: %s"
                                   % (template_filter, e) )
                index2 = None
    def run(self, year=None, 
            target_attribute_name='number_of_households', 
            sample_filter="", 
            reset_dataset_attribute_value={}, 
            dataset_pool=None,  **kwargs):
        """ sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning
        """
        #if dataset_pool is None:
        #    dataset_pool = SessionConfiguration().get_dataset_pool()

        if year is None:
            year = SimulationState().get_current_time()
        this_year_index = where(self.control_totals.get_attribute('year')==year)[0]
        control_totals_for_this_year = DatasetSubset(self.control_totals, this_year_index)
        column_names = list(set( self.control_totals.get_known_attribute_names() ) - set( [ target_attribute_name, 'year', '_hidden_id_'] ))
        column_names.sort(reverse=True)
        column_values = dict([ (name, control_totals_for_this_year.get_attribute(name)) for name in column_names + [target_attribute_name]])
        
        independent_variables = list(set([re.sub('_max$', '', re.sub('_min$', '', col)) for col in column_names]))
        dataset_known_attributes = self.dataset.get_known_attribute_names()
        for variable in independent_variables:
            if variable not in dataset_known_attributes:
                self.dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)
        dataset_known_attributes = self.dataset.get_known_attribute_names() #update after compute
        if sample_filter:
            short_name = VariableName(sample_filter).get_alias()
            if short_name not in dataset_known_attributes:
                filter_indicator = self.dataset.compute_variables(sample_filter, dataset_pool=dataset_pool)
            else:
                filter_indicator = self.dataset.get_attribute(short_name)
        else:
            filter_indicator = 1

        to_be_cloned = array([], dtype=int32)
        to_be_removed = array([], dtype=int32)
        #log header
        if PrettyTable is not None:
            status_log = PrettyTable()
            status_log.set_field_names(column_names + ["actual", "target", "difference", "action"])
        else:        
            logger.log_status("\t".join(column_names + ["actual", "target", "difference", "action"]))
        error_log = ''
        for index in range(control_totals_for_this_year.size()):
            lucky_index = None
            indicator = ones( self.dataset.size(), dtype='bool' )
            criterion = {}
            for attribute in independent_variables:
                if attribute in dataset_known_attributes:
                    dataset_attribute = self.dataset.get_attribute(attribute)
                else:
                    raise ValueError, "attribute %s used in control total dataset can not be found in dataset %s" % (attribute, self.dataset.get_dataset_name())
                if attribute + '_min' in column_names:
                    amin = column_values[attribute + '_min'][index]
                    criterion.update({attribute + '_min':amin})
                    if amin != -1:
                        indicator *= dataset_attribute >= amin
                if attribute + '_max' in column_names: 
                    amax = column_values[attribute+'_max'][index]
                    criterion.update({attribute + '_max':amax}) 
                    if amax != -1:
                        indicator *= dataset_attribute <= amax
                if attribute in column_names: 
                    aval = column_values[attribute][index] 
                    criterion.update({attribute:aval}) 
                    if aval == -1:
                        continue
                    elif aval == -2:   ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column
                        complement_values = setdiff1d( dataset_attribute, column_values[attribute] )
                        has_one_of_the_complement_value = zeros(dataset_attribute.size, dtype='bool')
                        for value in complement_values:
                            has_one_of_the_complement_value += dataset_attribute == value
                        indicator *= has_one_of_the_complement_value
                    else:
                        indicator *= dataset_attribute == aval
                        
            target_num = column_values[target_attribute_name][index]
            ## if accounting attribute is None, count number of agents with indicator = True 
            if self.dataset_accounting_attribute is None:
                actual_num = indicator.sum()
                action_num = 0
                diff = target_num - actual_num
                if actual_num != target_num:
                    legit_index = where(logical_and(indicator, filter_indicator))[0]
                    if legit_index.size > 0:                    
                        if actual_num < target_num:
                            lucky_index = sample_replace(legit_index, target_num - actual_num)
                            to_be_cloned = concatenate((to_be_cloned, lucky_index))
                        elif actual_num > target_num:
                            lucky_index = sample_noreplace(legit_index, actual_num-target_num)
                            to_be_removed = concatenate((to_be_removed, lucky_index))
                        action_num = lucky_index.size
                    else:
                        error_log += "There is nothing to sample from %s and no action will happen for" % self.dataset.get_dataset_name() + \
                                  ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n'
                        
            else: 
                ## sum accounting attribute for agents with indicator = True; 
                ## assume dataset_accouting_attribute is a primary attribute 
                accounting = self.dataset.get_attribute(self.dataset_accounting_attribute) * indicator
                actual_num = accounting.sum()
                mean_size = float(actual_num) / indicator.sum()
                action_num = 0
                diff = target_num - actual_num
                if actual_num != target_num:
                    legit_index = where(logical_and(indicator, filter_indicator))[0]
                    if legit_index.size > 0:
                        while actual_num + action_num < target_num:
                            lucky_index = sample_replace(legit_index, ceil((target_num - actual_num - action_num)/mean_size) )
                            action_num += accounting[lucky_index].sum()
                            to_be_cloned = concatenate((to_be_cloned, lucky_index))
                        while actual_num - action_num > target_num:
                            lucky_index = sample_noreplace(legit_index, ceil((actual_num - target_num - action_num)/mean_size) )
                            action_num += accounting[lucky_index].sum()
                            to_be_removed = concatenate((to_be_removed, lucky_index))                
                    else:
                        error_log += "There is nothing to sample from %s and no action will happen for " % self.dataset.get_dataset_name() + \
                                  ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n'
            
            ##log status
            action = "0"
            if lucky_index is not None:                    
                if actual_num < target_num: action = "+" + str(action_num)
                if actual_num > target_num: action = "-" + str(action_num)
                    
            cat = [ str(criterion[col]) for col in column_names]
            cat += [str(actual_num), str(target_num), str(diff), action]
            if PrettyTable is not None:
                status_log.add_row(cat)
            else:
                logger.log_status("\t".join(cat))

        if PrettyTable is not None:
            logger.log_status("\n" + status_log.get_string())
        if error_log:
            logger.log_error(error_log)
                    
        clone_data = {}
        if to_be_cloned.size > 0:
            ### ideally duplicate_rows() is all needed to add newly cloned rows
            ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data
            ##self.dataset.duplicate_rows(to_be_cloned)
            logger.log_status()
            for attribute in dataset_known_attributes:
                if reset_dataset_attribute_value.has_key(attribute):
                    clone_data[attribute] = resize(array(reset_dataset_attribute_value[attribute]), to_be_cloned.size)
                else:
                    clone_data[attribute] = self.dataset.get_attribute_by_index(attribute, to_be_cloned)
                    
        self.post_run(self.dataset, to_be_cloned, to_be_removed, **kwargs)
        
        if to_be_removed.size > 0:
            logger.log_status()
            self.dataset.remove_elements(to_be_removed)
            
        if clone_data:
            self.dataset.add_elements(data=clone_data, change_ids_if_not_unique=True)
            
        return self.dataset
    def run(self,
            dataset,
            outcome_attribute,
            weight_attribute,
            control_totals,
            current_year,
            control_total_attribute=None,
            year_attribute='year',
            capacity_attribute=None,
            add_quantity=False,
            dataset_pool=None):
        """'dataset' is a Dataset for which a quantity 'outcome_attribute' is created. The total amount of the quantity is 
        given by the attribute 'control_total_attribute' of the 'control_totals' Dataset. If it is not given, it is assumed 
        to have the same name as 'outcome_attribute'. The 'weight_attribute' of 'dataset' determines the allocation weights.
        The 'control_totals' Dataset contains an attribute 'year' (or alternatively, an attribute given by the 'year_attribute' argument)
        and optionally other attributes that must be known to the 'dataset' (such as a geography). For each row of the control_totals dataset
        for which year matches the 'current_year', the total amount is distributed among the corresponding members of 'dataset' according to weights.
        If a 'capacity_attribute' is given (attribute of 'dataset'), the algorithm removes any allocations that exceeds the capacity and 
        redistributes it among remaining members. The resulting values are appended to 'dataset' as 'outcome_attribute' (as primary attribute).
        If add_quantity is True and the 'outcome_attribute' exists in dataset, the resulting values are added to the current values of 
        'outcome_attribute'.
        """
        ct_attr = control_totals.get_known_attribute_names()
        if year_attribute not in ct_attr:
            raise StandardError, "Year attribute '%s' must be a known attribute of the control totals dataset." % year_attribute
        ct_attr.remove(year_attribute)
        if control_total_attribute is None:
            control_total_attribute = outcome_attribute
        if control_total_attribute not in ct_attr:
            raise StandardError, "Attribute '%s' must be a known attribute of the control totals dataset." % control_total_attribute
        ct_attr.remove(control_total_attribute)
        if control_totals._is_hidden_id():
            ct_attr.remove(control_totals.id_name()[0])

        # compute weights and other attributes necessary for allocation
        attrs_to_compute = [weight_attribute] + ct_attr
        if capacity_attribute is not None:
            attrs_to_compute.append(capacity_attribute)
        for attr in attrs_to_compute:
            try:
                dataset.compute_variables(attr, dataset_pool=dataset_pool)
            except:
                dataset.compute_one_variable_with_unknown_package(
                    attr, dataset_pool=dataset_pool)

        # create subset of control totals for the current year
        year_index = where(
            control_totals.get_attribute(year_attribute) == current_year)[0]
        if year_index.size <= 0:
            logger.log_warning("No control total for year %s" % current_year)
            return None
        control_totals_for_this_year = DatasetSubset(control_totals,
                                                     year_index)

        # check capacity
        if capacity_attribute is not None:
            if dataset.get_attribute(capacity_attribute).sum(
            ) < control_totals_for_this_year.get_attribute(
                    control_total_attribute).sum():
                logger.log_warning(
                    "Capacity (%s) is smaller than the amount to allocate (%s)."
                    % (dataset.get_attribute(capacity_attribute).sum(),
                       control_totals_for_this_year.get_attribute(
                           control_total_attribute).sum()))
            C = dataset.get_attribute(capacity_attribute).astype('int32')

        all_weights = dataset.get_attribute(weight_attribute)
        outcome = zeros(dataset.size(), dtype='int32')
        for ct_row in range(control_totals_for_this_year.size()):
            is_considered = ones(dataset.size(), dtype='bool8')
            for characteristics in ct_attr:
                is_considered = logical_and(
                    is_considered,
                    dataset.get_attribute(characteristics) ==
                    control_totals_for_this_year.get_attribute(
                        characteristics)[ct_row])
            T = control_totals_for_this_year.get_attribute(
                control_total_attribute)[ct_row]
            it = 1
            while True:
                is_considered_idx = where(is_considered)[0]
                weights = all_weights[is_considered_idx]
                weights_sum = float(weights.sum())
                outcome[is_considered_idx] = round_(
                    outcome[is_considered_idx] + T *
                    (weights / weights_sum)).astype('int32')
                if capacity_attribute is None:
                    break
                diff = outcome[is_considered_idx] - C[is_considered_idx]
                outcome[is_considered_idx] = clip(outcome[is_considered_idx],
                                                  0, C[is_considered_idx])
                if it == 1 and C[is_considered_idx].sum() < T:
                    logger.log_warning(
                        "Control total %s cannot be met due to a capacity restriction of %s"
                        % (T, C[is_considered_idx].sum()))
                T = where(diff < 0, 0, diff).sum()
                if T <= 0:
                    break
                is_considered = logical_and(is_considered, outcome < C)
                it += 1
        if add_quantity and (outcome_attribute
                             in dataset.get_known_attribute_names()):
            dataset.modify_attribute(name=outcome_attribute,
                                     data=outcome +
                                     dataset.get_attribute(outcome_attribute))
            logger.log_status(
                'New values added to the attribute %s of dataset %s.' %
                (outcome_attribute, dataset.get_dataset_name()))
        else:
            dataset.add_primary_attribute(name=outcome_attribute, data=outcome)
            logger.log_status(
                'New values stored into attribute %s of dataset %s.' %
                (outcome_attribute, dataset.get_dataset_name()))
        dataset.flush_attribute(outcome_attribute)
        return outcome
Exemple #26
0
    def run(self, n=500, 
            realestate_dataset_name = 'building',
            current_year=None,
            occupied_spaces_variable="occupied_spaces",
            total_spaces_variable="total_spaces",
            minimum_spaces_attribute="minimum_spaces",
            within_parcel_selection_weight_string=None,
            within_parcel_selection_n=0,
            within_parcel_selection_compete_among_types=False,
            within_parcel_selection_threshold=75,
            within_parcel_selection_MU_same_weight=False,
            within_parcel_selection_transpose_interpcl_weight=True,
            run_config=None,
            debuglevel=0):
        """
        run method of the Development Project Proposal Sampling Model
        
        **Parameters**
        
            **n** : int, sample size for each iteration
                   
                   sample n proposals at a time, which are then evaluated one by one until the 
                   target vacancies are satisfied or proposals are running out
                   
            **realestate_dataset_name** : string, name of real estate dataset
            
            **current_year**: int, simulation year. If None, get value from SimulationState
            
            **occupied_spaces_variable** : string, variable name for calculating how much spaces are currently occupied
                                        
                                          It can either be a variable for real_estate dataset that returns 
                                          the amount spaces being occupied or a target_vacancy attribute 
                                          that contains the name of real_estate variables.   
            
            **total_spaces_variable** : string, variable name for calculating total existing spaces
            
        **Returns**
        
            **proposal_set** : indices to proposal_set that are accepted 
            
            **demolished_buildings** : buildings to be demolished for re-development
        """

        self.accepted_proposals = []
        self.demolished_buildings = []  #id of buildings to be demolished

        if self.proposal_set.n <= 0:
            logger.log_status("The size of proposal_set is 0; no proposals to consider, skipping DPPSM.")
            return (self.proposal_set, self.demolished_buildings)

        target_vacancy = self.dataset_pool.get_dataset('target_vacancy')

        if current_year is None:
            year = SimulationState().get_current_time()
        else:
            year = current_year
        this_year_index = where(target_vacancy['year']==year)[0]
        target_vacancy_for_this_year = DatasetSubset(target_vacancy, this_year_index)
        if target_vacancy_for_this_year.size() == 0:
            raise IOError, 'No target vacancy defined for year %s.' % year
        
        ## current_target_vacancy.target_attribute_name = 'target_vacancy_rate'
        ## each column provides a category for which a target vacancy is specified
        self.column_names = list(set( target_vacancy.get_known_attribute_names() ) - \
                            set( [ target_vacancy.target_attribute_name, 
                                   'year', '_hidden_id_', minimum_spaces_attribute,
                                   occupied_spaces_variable, total_spaces_variable
                                   ] )
                            )
        self.column_names.sort(reverse=True)
            
        ## buildings table provides existing stocks
        self.realestate_dataset = self.dataset_pool.get_dataset(realestate_dataset_name)
        
        occupied_spaces_variables = [occupied_spaces_variable]
        total_spaces_variables = [total_spaces_variable]
        if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
            occupied_spaces_variables += unique(target_vacancy_for_this_year[occupied_spaces_variable]).tolist()
        if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
            total_spaces_variables += unique(target_vacancy_for_this_year[total_spaces_variable]).tolist()
            
        self._compute_variables_for_dataset_if_needed(self.realestate_dataset, self.column_names + occupied_spaces_variables + total_spaces_variables)
        self._compute_variables_for_dataset_if_needed(self.proposal_component_set, self.column_names + total_spaces_variables)
        self.proposal_set.compute_variables(["urbansim_parcel.development_project_proposal.number_of_components", 
                                             "urbansim_parcel.development_project_proposal.land_area_taken"],
                                            dataset_pool=self.dataset_pool)
        
        n_column = len(self.column_names)
        self.column_names_index = {}
        for iname in range(n_column):
            self.column_names_index[self.column_names[iname]] = iname
 
        target_vacancy_for_this_year.column_values = target_vacancy_for_this_year.get_multiple_attributes(self.column_names).reshape((-1, n_column))
        self.realestate_dataset.column_values = self.realestate_dataset.get_multiple_attributes(self.column_names).reshape((-1, n_column))
        self.proposal_component_set.column_values = self.proposal_component_set.get_multiple_attributes(self.column_names).reshape((-1, n_column))
        #defaults, can be changed later by spaces_variable specified in target_vacancy rates
        self.realestate_dataset.total_spaces = self.realestate_dataset[total_spaces_variable]
        self.proposal_component_set.total_spaces = self.proposal_component_set[total_spaces_variable]
        self.realestate_dataset.occupied_spaces = self.realestate_dataset[occupied_spaces_variable]
        
        self.accounting = {}; self.logging = {}
        #has_needed_components = zeros(self.proposal_set.size(), dtype='bool')
        for index in range(target_vacancy_for_this_year.size()):
            column_value = tuple(target_vacancy_for_this_year.column_values[index,:].tolist())
            accounting = {'target_vacancy': target_vacancy_for_this_year[target_vacancy.target_attribute_name][index]}
            if minimum_spaces_attribute in target_vacancy_for_this_year.get_known_attribute_names():
                accounting['minimum_spaces'] = target_vacancy_for_this_year[minimum_spaces_attribute][index]
            realestate_indexes = self.get_index_by_condition(self.realestate_dataset.column_values, column_value)
            component_indexes = self.get_index_by_condition(self.proposal_component_set.column_values, column_value)
            
            this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable
            ## total/occupied_spaces_variable can be specified either as a universal name for all realestate
            ## or in targe_vacancy_rate dataset for each vacancy category
            if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_occupied_spaces_variable = target_vacancy_for_this_year[occupied_spaces_variable][index]
                self.realestate_dataset.occupied_spaces[realestate_indexes] = (self.realestate_dataset[this_occupied_spaces_variable][realestate_indexes]
                                                                               ).astype(self.realestate_dataset.occupied_spaces.dtype)
    
            if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_total_spaces_variable = target_vacancy_for_this_year[total_spaces_variable][index]    
                self.realestate_dataset.total_spaces[realestate_indexes] = (self.realestate_dataset[this_total_spaces_variable][realestate_indexes]
                                                                            ).astype(self.realestate_dataset.total_spaces.dtype)
                self.proposal_component_set.total_spaces[component_indexes] = (self.proposal_component_set[this_total_spaces_variable][component_indexes]
                                                                               ).astype(self.proposal_component_set.total_spaces.dtype)
                
            accounting["total_spaces_variable"] = this_total_spaces_variable
            accounting["total_spaces"] = self.realestate_dataset.total_spaces[realestate_indexes].sum()
            accounting["occupied_spaces_variable"] = this_occupied_spaces_variable
            accounting["occupied_spaces"] = self.realestate_dataset.occupied_spaces[realestate_indexes].sum()
            accounting["target_spaces"] = int( round( accounting["occupied_spaces"] /\
                                                     (1 - accounting["target_vacancy"])
                                               ) )
            accounting["proposed_spaces"] = 0
            accounting["demolished_spaces"] = 0
            
            self.accounting[column_value] = accounting
            
            if self._is_target_reached(column_value):
                proposal_indexes = self.proposal_set.get_id_index(unique(self.proposal_component_set['proposal_id'][component_indexes]))
                if n_column == 1:
                    comp_indexes = where(ndimage.sum(self.proposal_component_set[self.column_names[0]]==column_value[0], 
                                    labels=self.proposal_component_set['proposal_id'], 
                                    index=self.proposal_set.get_id_attribute()
                                    ) == self.proposal_set["number_of_components"])[0]
                else:
                    comp_indexes = where(self.proposal_set["number_of_components"]==1)[0]
                target_reached_prop_idx = intersect1d(proposal_indexes, comp_indexes)
                self.weight[target_reached_prop_idx] = 0.0
                self.proposal_set["status_id"][intersect1d(target_reached_prop_idx, where(self.proposal_set["status_id"]==self.proposal_set.id_tentative)[0])] = self.proposal_set.id_no_demand
                
        ## handle planned proposals: all proposals with status_id == is_planned 
        ## and start_year == year are accepted
        planned_proposal_indexes = where(logical_and(
                                                  self.proposal_set.get_attribute("status_id") == self.proposal_set.id_planned, 
                                                  self.proposal_set.get_attribute("start_year") == year ) 
                                        )[0]
        
        logger.start_block("Processing %s planned proposals" % planned_proposal_indexes.size)
        self.consider_proposals(planned_proposal_indexes, force_accepting=True)
        logger.end_block()
        
        if within_parcel_selection_n > 0:
            logger.start_block("Selecting proposals within parcels (%s proposals per parcel)" % within_parcel_selection_n)
            self.select_proposals_within_parcels(nmax=within_parcel_selection_n, weight_string=within_parcel_selection_weight_string,
                                                 compete_among_types=within_parcel_selection_compete_among_types, 
                                                 filter_threshold=within_parcel_selection_threshold,
                                                 MU_same_weight=within_parcel_selection_MU_same_weight,
                                                 transpose_interpcl_weight=within_parcel_selection_transpose_interpcl_weight)
            logger.end_block()
        
        # consider proposals (in this order: proposed, tentative)
        for status in [self.proposal_set.id_proposed, self.proposal_set.id_tentative]:
            stat = (self.proposal_set.get_attribute("status_id") == status)
            if stat.sum() == 0:
                continue
            
            logger.log_status("Sampling from %s eligible proposals of status %s." % (stat.sum(), status))
            iteration = 0
            while (not self._is_target_reached()):
                ## prevent proposals from being sampled for vacancy type whose target is reached
                #for column_value in self.accounting.keys():
                
                if self.weight[stat].sum() == 0.0:
                    logger.log_warning("Running out of proposals of status %s before vacancy targets are reached; there aren't any proposals with non-zero weight" % status)
                    break
                
                available_indexes = where(logical_and(stat, self.weight > 0))[0]
                sample_size = minimum(available_indexes.size, n)
                sampled_proposal_indexes = probsample_noreplace(available_indexes, sample_size, 
                                                                prob_array=self.weight[available_indexes],
                                                                return_index=False)
                #sorted_sampled_indices = argsort(self.weight[sampled_proposal_indexes])
                #self.consider_proposals(sampled_proposal_indexes[sorted_sampled_indices][::-1])
                self.consider_proposals(sampled_proposal_indexes)
                self.weight[sampled_proposal_indexes] = 0
                iteration += 1
        
        self._log_status()
        
        # set status of accepted proposals to 'active'
        self.proposal_set.modify_attribute(name="status_id", 
                                           data=self.proposal_set.id_active,
                                           index=array(self.accepted_proposals, dtype='int32'))
        
        # Code added by Jesse Ayers, MAG, 7/20/2009
        # Get the active projects:
        stat_id = self.proposal_set.get_attribute('status_id')
        actv = where(stat_id==1)[0]
        # Where there are active projects, compute the total_land_area_taken
        # and store it on the development_project_proposals dataset
        # so it can be used by the building_construction_model for the proper
        # computation of units_proposed for those projects with velocity curves
        if actv.size > 0:          
            total_land_area_taken_computed = self.proposal_set['land_area_taken']
            self.proposal_set.modify_attribute('total_land_area_taken', total_land_area_taken_computed[actv], actv)

        return (self.proposal_set, self.realestate_dataset.get_id_attribute()[self.demolished_buildings])
    def run(self, realestate_dataset,
            living_units_dataset,
            year=None, 
            occupied_spaces_variable="occupied_units",
            total_spaces_variable="total_units",
            target_attribute_name='target_vacancy_rate',
            sample_from_dataset = None,
            living_units_from_dataset = None,
            sample_filter="",
            reset_attribute_value={}, 
            year_built = 'year_built',
            dataset_pool=None,
            append_to_realestate_dataset = False,
            table_name = "development_projects",
            dataset_name = "development_project",
            id_name = 'development_project_id',
            **kwargs):
        """         
        sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning
        append_to_realestate_dataset - whether to append the new dataset to realestate_dataset
        """
        
        if self.target_vancy_dataset is None:
            raise RuntimeError, "target_vacancy_rate dataset is unspecified."
        
        if not sample_from_dataset or not living_units_from_dataset:
            logger.log_note('No development projects or no living units of development projects to sample from. Development projects are taken from building dataset and thus living units from living_units dataset.')
            sample_from_dataset = realestate_dataset
            living_units_from_dataset = living_units_dataset
            
        if dataset_pool is None:
            dataset_pool = SessionConfiguration().get_dataset_pool()
        if year is None:
            year = SimulationState().get_current_time()
        this_year_index = where(self.target_vancy_dataset.get_attribute('year')==year)[0]
        target_vacancy_for_this_year = DatasetSubset(self.target_vancy_dataset, this_year_index)
        
        column_names = list(set( self.target_vancy_dataset.get_known_attribute_names() ) - set( [ target_attribute_name, occupied_spaces_variable, total_spaces_variable, 'year', '_hidden_id_'] ))
        column_names.sort(reverse=True)
        column_values = dict([ (name, target_vacancy_for_this_year.get_attribute(name)) for name in column_names + [target_attribute_name]])
        
        
        independent_variables = list(set([re.sub('_max$', '', re.sub('_min$', '', col)) for col in column_names]))
        sample_dataset_known_attributes = sample_from_dataset.get_known_attribute_names()
        for attribute in independent_variables:
            if attribute not in sample_dataset_known_attributes:
                sample_from_dataset.compute_one_variable_with_unknown_package(attribute, dataset_pool=dataset_pool)
        sample_dataset_known_attributes = sample_from_dataset.get_known_attribute_names() #update after compute
                
        if sample_filter:
            short_name = VariableName(sample_filter).get_alias()
            if short_name not in sample_dataset_known_attributes:
                filter_indicator = sample_from_dataset.compute_variables(sample_filter, dataset_pool=dataset_pool)
            else:
                filter_indicator = sample_from_dataset.get_attribute(short_name)
        else:
            filter_indicator = 1
                
        sampled_index = array([], dtype=int32)

        #log header
        if PrettyTable is not None:
            status_log = PrettyTable()
            status_log.set_field_names(column_names + ["actual", "target", "expected", "difference", "action"])
        else:
            logger.log_status("\t".join(column_names + ["actual", "target", "expected", "difference", "action"]))
        error_log = ''
        for index in range(target_vacancy_for_this_year.size()):
            sample_indicator = ones( sample_from_dataset.size(), dtype='bool' )
            criterion = {}   # for logging
            for attribute in independent_variables:
                if attribute in sample_dataset_known_attributes:
                    sample_attribute = sample_from_dataset.get_attribute(attribute)
                else:
                    raise ValueError, "attribute %s used in target vacancy dataset can not be found in dataset %s" % (attribute, realestate_dataset.get_dataset_name())
                
                if attribute + '_min' in column_names:
                    amin = target_vacancy_for_this_year.get_attribute(attribute+'_min')[index] 
                    criterion.update({attribute + '_min':amin})
                    if amin != -1:
                        sample_indicator *= sample_attribute >= amin
                if attribute + '_max' in column_names: 
                    amax = target_vacancy_for_this_year.get_attribute(attribute+'_max')[index]
                    criterion.update({attribute + '_max':amax}) 
                    if amax != -1:
                        sample_indicator *= sample_attribute <= amax
                if attribute in column_names: 
                    aval = column_values[attribute][index] 
                    criterion.update({attribute:aval}) 
                    if aval == -1:
                        continue
                    elif aval == -2:  ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column
                        sample_indicator *= logical_not(ismember(sample_attribute, column_values[attribute]))
                    else:
                        sample_indicator *= sample_attribute == aval
                        
            this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable
            ## total/occupied_spaces_variable can be specified either as a universal name for all realestate 
            ## or in targe_vacancy_rate dataset for each vacancy category
            if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_occupied_spaces_variable = target_vacancy_for_this_year.get_attribute(occupied_spaces_variable)[index]

            if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_total_spaces_variable = target_vacancy_for_this_year.get_attribute(total_spaces_variable)[index]
            
            this_total_spaces_variable += '_' + str(criterion[col])
            this_occupied_spaces_variable += '_' + str(criterion[col])
            
            logger.be_quiet() #temporarily disable logging
            realestate_dataset.compute_one_variable_with_unknown_package(this_occupied_spaces_variable, dataset_pool=dataset_pool)
            realestate_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool)
            sample_from_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool)
            logger.talk()
            
            actual_num = (realestate_dataset.get_attribute(this_total_spaces_variable)).sum()
            #target_num is obsolete with this version.
            target_num = int(round( (realestate_dataset.get_attribute(this_occupied_spaces_variable)).sum() /\
                                    (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])))
            '''If the target vacancy is very small and the inflow to the region big it is not enough to check
            only the current simulation year's vacancy. The simulation is more robust if the BTM is anticipating the
            next year's population (of households and jobs).
            This version calculates the non residential spaces based on sqft requirements of jobs per sector. 
            #TODO: Make code more general to cover various stratifications in the real estate market.
            '''
            if criterion[col] == 0:
                """ Option without demography model
                idx = where(self.control_totals.get_attribute("year")==year + 1)[0]
                this_years_control_totals = DatasetSubset(self.control_totals, idx)
                expected_num = int(round( this_years_control_totals.get_attribute('total_number_of_households').sum() /\
                                    (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])))""" 
                hh_dataset = dataset_pool.get_dataset( 'household' )
                number_of_hh = hh_dataset.size()
                expected_num = int(round( number_of_hh /\
                                    (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]))) 
            if criterion[col] > 0:
                # Getting control totals per sector in a dictionary
                idx = where(self.employment_control_totals.get_attribute("year")==year)[0] # Create index to get the subset of control totals for the next simulation year.
                this_years_control_totals = DatasetSubset(self.employment_control_totals, idx) # Create the subset of control totals.
                idx_non_home_based = where(logical_and(this_years_control_totals['home_based_status'] == 0,this_years_control_totals['sector_id'] == criterion[col]))[0] # Create index of non home based control totals in current sector. Only non home based jobs are supported. TODO: Support home based jobs.
                this_years_control_totals = DatasetSubset(this_years_control_totals, idx_non_home_based)
#                idx_current_sector = where(this_years_control_totals['sector_id'] == criterion[col])[0]
                next_years_jobs = this_years_control_totals['number_of_jobs']
                controled_sectors = this_years_control_totals['sector_id']                
                sector_job_totals = dict(zip(controled_sectors, next_years_jobs.T)) # creating dictionary with sector id's as key and number of jobs as values to ensure multiplication with right requiremtents.

                # Getting infos on required sqft per sector. 
#                a_zone_id = min(self.building_sqft_per_job['zone_id']) # Get a zone number from the definition table. Here choose to take the minimum which is arbitrary. This code assumes constant sqft requirements in all zones. TODO: Support different sqft requirements per zone.
#                idx_zone = where(self.building_sqft_per_job['zone_id'] == a_zone_id)[0]
#                subset_sqft_per_job = DatasetSubset(self.building_sqft_per_job, idx_zone)
#                sqft_per_job = subset_sqft_per_job['building_sqft_per_job']
#                sectors_with_requirements = subset_sqft_per_job['sector_id']
#                requirements_by_sector = dict(zip(sectors_with_requirements, sqft_per_job.T))
#                
#                needed_sqft_over_all_sectors = sector_job_totals[criterion[col]] * requirements_by_sector[criterion[col]]
#                expected_num = int(round( needed_sqft_over_all_sectors /\
#                                    (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])))
                
                idx_sector = where(self.sectors['sector_id'] == criterion[col])
                subset_sqft_per_job_sector = DatasetSubset(self.sectors, idx_sector)
                needed_sqft_current_sector = sector_job_totals[criterion[col]] * subset_sqft_per_job_sector.get_attribute('sqm_per_job')
                expected_num = int(round( needed_sqft_current_sector /\
                                    (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])))

            diff = expected_num - actual_num
            
            #Previous version which is checking the current years occupation.
            #diff = target_num - actual_num
            
            this_sampled_index = array([], dtype=int32)
            if diff > 0:
                total_spaces_in_sample_dataset = sample_from_dataset.get_attribute(this_total_spaces_variable)
                legit_index = where(logical_and(sample_indicator, filter_indicator) * total_spaces_in_sample_dataset > 0)[0]
                if legit_index.size > 0:
                    mean_size = total_spaces_in_sample_dataset[legit_index].mean()
                    num_of_projects_to_sample = int( diff / mean_size )
                    ##sampled at least 1 project when diff > 0, otherwise it is a endless loop when num_of_projects_to_sample = 0
                    num_of_projects_to_sample = num_of_projects_to_sample if num_of_projects_to_sample > 0 else 1
                    while total_spaces_in_sample_dataset[this_sampled_index].sum() < diff:
                        lucky_index = sample_replace(legit_index, num_of_projects_to_sample)
                        this_sampled_index = concatenate((this_sampled_index, lucky_index))
                    this_sampled_index = this_sampled_index[0:(1+searchsorted(cumsum(total_spaces_in_sample_dataset[this_sampled_index]), diff))]
                    sampled_index = concatenate((sampled_index, this_sampled_index))
                else:
                    error_log += "There is nothing to sample from %s and no new development will happen for " % sample_from_dataset.get_dataset_name() + \
                              ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n'
            #if diff < 0: #TODO demolition; not yet supported
            
            ##log status
            action = "0"
            if this_sampled_index.size > 0:
                action_num = total_spaces_in_sample_dataset[this_sampled_index].sum()
                if diff > 0: action = "+" + str(action_num)
                if diff < 0: action = "-" + str(action_num)
            cat = [ str(criterion[col]) for col in column_names]
            cat += [str(actual_num), str(target_num), str(expected_num), str(diff), action]
            
            if PrettyTable is not None:
                status_log.add_row(cat)
            else:                
                logger.log_status("\t".join(cat))
            
        if PrettyTable is not None:
            logger.log_status("\n" + status_log.get_string())
        if error_log:
            logger.log_error(error_log)
        
        
        #logger.log_note("Updating attributes of %s sampled development events." % sampled_index.size)
        result_data = {}
        result_dataset = None
        index = array([], dtype='int32')
        if sampled_index.size > 0:
            ### ideally duplicate_rows() is all needed to add newly cloned rows
            ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data
            ##realestate_dataset.duplicate_rows(sampled_index)
            #result_data.setdefault(year_built, resize(year, sampled_index.size).astype('int32')) # Reset the year_built attribute. Uncommented because it is overwritten in the for loop afterwards.
            ## also add 'independent_variables' to the new dataset
            for attribute in set(sample_from_dataset.get_primary_attribute_names() + independent_variables):
                if reset_attribute_value.has_key(attribute):
                    result_data[attribute] = resize(array(reset_attribute_value[attribute]), sampled_index.size)
                else:
                    result_data[attribute] = sample_from_dataset.get_attribute_by_index(attribute, sampled_index)
            # Reset the year_built attribute.
            result_data['year_built'] = resize(year, sampled_index.size).astype('int32')
            # TODO: Uncomment the following three lines to reset land_area, tax_exempt, zgde. Test still to be done. parcel_id should be changed by location choice model.
            #result_data['land_area'] = resize(-1, sampled_index.size).astype('int32')
            #result_data['tax_exempt'] = resize(-1, sampled_index.size).astype('int32')
            #result_data['zgde'] = resize(-1, sampled_index.size).astype('int32')
            
            if id_name and result_data and id_name not in result_data:
                result_data[id_name] = arange(sampled_index.size, dtype='int32') + 1
            storage = StorageFactory().get_storage('dict_storage')
            storage.write_table(table_name=table_name, table_data=result_data)
            
            result_dataset = Dataset(id_name = id_name,
                                      in_storage = storage,
                                      in_table_name = table_name,
                                      dataset_name = dataset_name
                                      )
            index = arange(result_dataset.size())

        if append_to_realestate_dataset:
            if len(result_data) > 0:
                logger.start_block('Appending development events and living units')
                logger.log_note("Append %d sampled development events to real estate dataset." % len(result_data[result_data.keys()[0]]))
                index = realestate_dataset.add_elements(result_data, require_all_attributes=False,
                                                        change_ids_if_not_unique=True)
                logger.start_block('Creating id mapping')
                # remember the ids from the development_event_history dataset.
                mapping_new_old = self.get_mapping_of_old_ids_to_new_ids(result_data, realestate_dataset, index)
                logger.end_block()
                
                '''Getting living units associated to selected development events by iterating over the mapping dictionary and 
                selecting each time all the living units according to the old building ids.
                The living units are then added to selected_living_units_dict which is then
                added to living_units dataset. A dictionary is needed to use the add_elements method.
                Creating a dictionary also clones the records. The subset is only a view on the original table.'''
                selected_living_units_dict = {}
                counter = 0
                for new_id in mapping_new_old:
                    if counter == 0:
                        logger.log_note("Log assignment of every 100th development event")
                    counter +=1
                    if counter % 100 == 0:
                        logger.log_note("Assembling living units for development event %s" % new_id)
                    sel_index = [i for i in range(0, len(living_units_from_dataset['building_id'])) if living_units_from_dataset['building_id'][i] == mapping_new_old[new_id]]
                    living_units_this_sampled_building = DatasetSubset(living_units_from_dataset, sel_index) 
                    if len(selected_living_units_dict) == 0:
                        logger.start_block('Assign new building id')
                        for attribute_name in living_units_this_sampled_building.get_primary_attribute_names():
                            column = living_units_this_sampled_building.get_attribute(attribute_name)
                            if attribute_name == 'building_id':
                                new_ids = array(living_units_this_sampled_building.size() * [new_id], dtype=int32)
                                selected_living_units_dict.update({attribute_name: new_ids})
                            else:
                                selected_living_units_dict.update({attribute_name: column})
                        logger.end_block()
                    else:
                        this_living_units_dict ={}
                        for attribute_name in living_units_this_sampled_building.get_primary_attribute_names():
                            column = living_units_this_sampled_building.get_attribute(attribute_name)
                            if attribute_name == 'building_id':
                                new_ids = array(living_units_this_sampled_building.size() * [new_id], dtype=int32)
                                this_living_units_dict.update({attribute_name: new_ids})
                            else:
                                this_living_units_dict.update({attribute_name: column})
                        for attribute_name in living_units_this_sampled_building.get_primary_attribute_names():
                            selected_living_units_dict[attribute_name] = concatenate([selected_living_units_dict[attribute_name], this_living_units_dict[attribute_name]])
                # Reset year_built attribute of living units
                selected_living_units_dict['year_built'] = resize(year, len(selected_living_units_dict['year_built'])).astype('int32')
                # TODO: Uncomment the following two lines to reset rent_price, zgde. Test still to be done
                # selected_living_units_dict['rent_price'] = resize(-1, len(selected_living_units_dict['rent_price'])).astype('int32')
                # selected_living_units_dict['zgde'] = resize(-1, len(selected_living_units_dict['zgde'])).astype('int32')


                
                index_units = living_units_dataset.add_elements(selected_living_units_dict, require_all_attributes=False,
                                                        change_ids_if_not_unique=True)
                
                # Check consistency of buildings and living units. All living units must belong to a building
                if SimulationState().get_current_time() - SimulationState().get_start_time() == 1:
                    for building_id in living_units_dataset['building_id']:
                        if building_id not in realestate_dataset['building_id']:
                            logger.log_warning('Living unit with building_id %d has no corresponding building.' % (building_id))
                        # Uncomment next line to enforce consistency of living units and building dataset. Then you may uncomment the two previous lines.
#                        assert(building_id in realestate_dataset['building_id']), 'Living unit with building_id %d has no corresponding building.' % (building_id)

            result_dataset = realestate_dataset
        logger.end_block()

        # It is recommended to derive all variables of buildings in relation to living units via expression variables.
        # However, if the building dataset contains attributes derived from living units these attributes should be consistent
        # with the living units table. Below an example.
        # Residential_units attribute of each building should be consistent with the number of living units associated.
#        self.check_consistency_of_living_units_per_building(realestate_dataset, living_units_dataset, mapping_new_old)

        return (result_dataset, index)
    def run(
        self,
        realestate_dataset,
        year=None,
        occupied_spaces_variable="occupied_units",
        total_spaces_variable="total_units",
        target_attribute_name="target_vacancy_rate",
        sample_from_dataset=None,
        sample_filter="",
        reset_attribute_value={},
        year_built="year_built",
        dataset_pool=None,
        append_to_realestate_dataset=False,
        table_name="development_projects",
        dataset_name="development_project",
        id_name="development_project_id",
        **kwargs
    ):
        """         
        sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning
        append_to_realestate_dataset - whether to append the new dataset to realestate_dataset
        """

        if self.target_vancy_dataset is None:
            raise RuntimeError, "target_vacancy_rate dataset is unspecified."

        if not sample_from_dataset:
            sample_from_dataset = realestate_dataset

        # if dataset_pool is None:
        #    dataset_pool = SessionConfiguration().get_dataset_pool()
        if year is None:
            year = SimulationState().get_current_time()
        this_year_index = where(self.target_vancy_dataset.get_attribute("year") == year)[0]
        target_vacancy_for_this_year = DatasetSubset(self.target_vancy_dataset, this_year_index)

        column_names = list(
            set(self.target_vancy_dataset.get_known_attribute_names())
            - set([target_attribute_name, occupied_spaces_variable, total_spaces_variable, "year", "_hidden_id_"])
        )
        column_names.sort(reverse=True)
        column_values = dict(
            [
                (name, target_vacancy_for_this_year.get_attribute(name))
                for name in column_names + [target_attribute_name]
            ]
        )

        independent_variables = list(set([re.sub("_max$", "", re.sub("_min$", "", col)) for col in column_names]))
        dataset_known_attributes = realestate_dataset.get_known_attribute_names()
        sample_dataset_known_attributes = sample_from_dataset.get_known_attribute_names()
        for variable in independent_variables:
            if variable not in dataset_known_attributes:
                realestate_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)
            if variable not in sample_dataset_known_attributes:
                sample_from_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)

        dataset_known_attributes = realestate_dataset.get_known_attribute_names()  # update after compute
        if sample_filter:
            short_name = VariableName(sample_filter).get_alias()
            if short_name not in dataset_known_attributes:
                filter_indicator = sample_from_dataset.compute_variables(sample_filter, dataset_pool=dataset_pool)
            else:
                filter_indicator = sample_from_dataset.get_attribute(short_name)
        else:
            filter_indicator = 1

        sampled_index = array([], dtype=int32)

        # log header
        if PrettyTable is not None:
            status_log = PrettyTable()
            status_log.set_field_names(column_names + ["actual", "target", "expected", "difference", "action"])
        else:
            logger.log_status("\t".join(column_names + ["actual", "target", "expected", "difference", "action"]))
        error_log = ""
        for index in range(target_vacancy_for_this_year.size()):
            this_sampled_index = array([], dtype=int32)
            indicator = ones(realestate_dataset.size(), dtype="bool")
            sample_indicator = ones(sample_from_dataset.size(), dtype="bool")
            criterion = {}  # for logging
            for attribute in independent_variables:
                if attribute in dataset_known_attributes:
                    dataset_attribute = realestate_dataset.get_attribute(attribute)
                    sample_attribute = sample_from_dataset.get_attribute(attribute)
                else:
                    raise ValueError, "attribute %s used in target vacancy dataset can not be found in dataset %s" % (
                        attribute,
                        realestate_dataset.get_dataset_name(),
                    )

                if attribute + "_min" in column_names:
                    amin = target_vacancy_for_this_year.get_attribute(attribute + "_min")[index]
                    criterion.update({attribute + "_min": amin})
                    if amin != -1:
                        indicator *= dataset_attribute >= amin
                        sample_indicator *= sample_attribute >= amin
                if attribute + "_max" in column_names:
                    amax = target_vacancy_for_this_year.get_attribute(attribute + "_max")[index]
                    criterion.update({attribute + "_max": amax})
                    if amax != -1:
                        indicator *= dataset_attribute <= amax
                        sample_indicator *= sample_attribute <= amax
                if attribute in column_names:
                    aval = column_values[attribute][index]
                    criterion.update({attribute: aval})
                    if aval == -1:
                        continue
                    elif (
                        aval == -2
                    ):  ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column
                        indicator *= logical_not(ismember(dataset_attribute, column_values[attribute]))
                        sample_indicator *= logical_not(ismember(sample_attribute, column_values[attribute]))
                    else:
                        indicator *= dataset_attribute == aval
                        sample_indicator *= sample_attribute == aval

            this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable
            ## total/occupied_spaces_variable can be specified either as a universal name for all realestate
            ## or in targe_vacancy_rate dataset for each vacancy category
            if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_occupied_spaces_variable = target_vacancy_for_this_year.get_attribute(occupied_spaces_variable)[
                    index
                ]

            if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_total_spaces_variable = target_vacancy_for_this_year.get_attribute(total_spaces_variable)[index]

            this_total_spaces_variable += "_" + str(criterion[col])
            this_occupied_spaces_variable += "_" + str(criterion[col])

            logger.be_quiet()  # temporarily disable logging
            realestate_dataset.compute_one_variable_with_unknown_package(
                this_occupied_spaces_variable, dataset_pool=dataset_pool
            )
            realestate_dataset.compute_one_variable_with_unknown_package(
                this_total_spaces_variable, dataset_pool=dataset_pool
            )
            sample_from_dataset.compute_one_variable_with_unknown_package(
                this_total_spaces_variable, dataset_pool=dataset_pool
            )
            logger.talk()

            actual_num = (realestate_dataset.get_attribute(this_total_spaces_variable)).sum()
            # target_num is obsolete with this version.
            target_num = int(
                round(
                    (realestate_dataset.get_attribute(this_occupied_spaces_variable)).sum()
                    / (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])
                )
            )
            """If the target vacancy is very small and the inflow to the region big it is not enough to check
            only the current simulation year's vacancy. The simulation is more robust if the BTM is anticipating the
            next year's population (of households and jobs).
            #TODO: Make code more general to cover various stratifications in the real estate market.
            """
            if criterion[col] == 1:
                idx = where(self.control_totals.get_attribute("year") == year + 1)[0]
                this_years_control_totals = DatasetSubset(self.control_totals, idx)
                expected_num = int(
                    round(
                        this_years_control_totals.get_attribute("total_number_of_households").sum()
                        / (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])
                    )
                )
            if criterion[col] == 0:
                idx = where(self.employment_control_totals.get_attribute("year") == year + 1)[0]
                next_years_control_totals = DatasetSubset(self.employment_control_totals, idx)
                expected_num = int(
                    round(
                        next_years_control_totals.get_attribute("number_of_jobs").sum()
                        / (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])
                    )
                )

            diff = expected_num - actual_num

            # Previous version which is checking the current years occupation.
            # diff = target_num - actual_num

            if diff > 0:
                total_spaces_in_sample_dataset = sample_from_dataset.get_attribute(this_total_spaces_variable)
                legit_index = where(
                    logical_and(sample_indicator, filter_indicator) * total_spaces_in_sample_dataset > 0
                )[0]
                if legit_index.size > 0:
                    mean_size = total_spaces_in_sample_dataset[legit_index].mean()
                    num_of_projects_to_sample = int(diff / mean_size)
                    ##sampled at least 1 project when diff > 0, otherwise it is a endless loop when num_of_projects_to_sample = 0
                    num_of_projects_to_sample = num_of_projects_to_sample if num_of_projects_to_sample > 0 else 1
                    while total_spaces_in_sample_dataset[this_sampled_index].sum() < diff:
                        lucky_index = sample_replace(legit_index, num_of_projects_to_sample)
                        this_sampled_index = concatenate((this_sampled_index, lucky_index))
                    this_sampled_index = this_sampled_index[
                        0 : (1 + searchsorted(cumsum(total_spaces_in_sample_dataset[this_sampled_index]), diff))
                    ]
                    sampled_index = concatenate((sampled_index, this_sampled_index))
                else:
                    error_log += (
                        "There is nothing to sample from %s and no new development will happen for "
                        % sample_from_dataset.get_dataset_name()
                        + ",".join([col + "=" + str(criterion[col]) for col in column_names])
                        + "\n"
                    )
            # if diff < 0: #TODO demolition; not yet supported

            ##log status
            action = "0"
            if this_sampled_index.size > 0:
                action_num = total_spaces_in_sample_dataset[this_sampled_index].sum()
                if diff > 0:
                    action = "+" + str(action_num)
                if diff < 0:
                    action = "-" + str(action_num)
            cat = [str(criterion[col]) for col in column_names]
            cat += [str(actual_num), str(target_num), str(expected_num), str(diff), action]

            if PrettyTable is not None:
                status_log.add_row(cat)
            else:
                logger.log_status("\t".join(cat))

        if PrettyTable is not None:
            logger.log_status("\n" + status_log.get_string())
        if error_log:
            logger.log_error(error_log)

        result_data = {}
        result_dataset = None
        index = array([], dtype="int32")
        if True:  # sampled_index.size > 0:
            ### ideally duplicate_rows() is all needed to add newly cloned rows
            ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data
            ##realestate_dataset.duplicate_rows(sampled_index)
            result_data.setdefault(year_built, resize(year, sampled_index.size).astype("int32"))
            ## also add 'independent_variables' to the new dataset
            for attribute in set(sample_from_dataset.get_primary_attribute_names() + independent_variables):
                if reset_attribute_value.has_key(attribute):
                    result_data[attribute] = resize(array(reset_attribute_value[attribute]), sampled_index.size)
                else:
                    result_data[attribute] = sample_from_dataset.get_attribute_by_index(attribute, sampled_index)

            if id_name and result_data and id_name not in result_data:
                result_data[id_name] = arange(sampled_index.size, dtype="int32") + 1

            storage = StorageFactory().get_storage("dict_storage")
            storage.write_table(table_name=table_name, table_data=result_data)

            result_dataset = Dataset(
                id_name=id_name, in_storage=storage, in_table_name=table_name, dataset_name=dataset_name
            )
            index = arange(result_dataset.size())

        if append_to_realestate_dataset:
            if len(result_data) > 0:
                index = realestate_dataset.add_elements(
                    result_data, require_all_attributes=False, change_ids_if_not_unique=True
                )
            result_dataset = realestate_dataset

        return (result_dataset, index)
Exemple #29
0
class DevelopmentProjectTransitionModel( Model ):
    """
    Creates development projects. Each development project is for a single type
    of development, e.g. 'industrial' or 'commercial'.  This model creates
    enough development projects to match the desired vacancy rates, as defined in the target_vacancies
    table.  It does not place any projects in locations; that is the job of the development project
    location choice models.  The distribution of project sizes (amount of space, value of space) is
    determined by sampling from the projects in the development_event_history table.
    """
    model_name = "Development Project Transition Model"
    
    def __init__( self, debuglevel=0 ):
        self.debug = DebugPrinter( debuglevel )

    def pre_check( self, location_set, vacancy_table, types ):
        for ptype in types:
            self.check_for_space( location_set.get_attribute(self.variable_for_total_units[ptype]))
        self.check_target_vacancy_is_not_100_percent( vacancy_table.get_attribute( "target_total_vacancy"))

    def check_for_space( self, values ):
        """Check that this array of values sums to something > 0."""
        self.do_check( "x > 0", array( [values.sum()] ) )

    def check_target_vacancy_is_not_100_percent( self, value ):
        """Check that the target vacancy rate is not 100% (ratio == 1), because it doesn't make sense,
        and it also causes a divide by 0 error."""
        self.do_check( "x < 1", value )

    def run( self, vacancy_table, history_table, year, location_set, dataset_pool=None, resources=None ):
        self.dataset_pool=dataset_pool
        building_types = self.dataset_pool.get_dataset('building_type')
        target_vacancy_this_year = DatasetSubset(vacancy_table, index=where(vacancy_table.get_attribute("year")==year)[0])
        building_type_ids = target_vacancy_this_year.get_attribute('building_type_id')
        building_type_idx = building_types.get_id_index(building_type_ids)
        self.used_building_types = DatasetSubset(building_types, index=building_type_idx)
        project_types =  self.used_building_types.get_attribute('building_type_name')
        is_residential = self.used_building_types.get_attribute('is_residential')
        unit_names =  where(is_residential, 'residential_units', 'non_residential_sqft')
        specific_unit_names =  where(is_residential, 'residential_units', '_sqft')
        rates =  target_vacancy_this_year.get_attribute('target_total_vacancy')
        self.project_units = {}
        self.project_specific_units = {}
        target_rates = {}
        for i in range(self.used_building_types.size()):
            self.project_units[project_types[i]] = unit_names[i]
            if is_residential[i]:
                self.project_specific_units[project_types[i]] = specific_unit_names[i]
            else:
                self.project_specific_units[project_types[i]] = "%s%s" % (project_types[i], specific_unit_names[i])
            target_rates[building_type_ids[i]] = rates[i]
            
        self._compute_vacancy_and_total_units_variables(location_set, project_types, resources)
        self.pre_check( location_set, target_vacancy_this_year, project_types)
    
        projects = None
        for project_type_id, target_vacancy_rate in target_rates.iteritems():
            # determine current-year vacancy rates
            project_type = building_types.get_attribute_by_id('building_type_name', project_type_id)
            vacant_units_sum = location_set.get_attribute(self.variable_for_vacancy[project_type]).sum()
            units_sum = float( location_set.get_attribute(self.variable_for_total_units[project_type]).sum() )
            should_develop_units = int(round(max( 0, ( target_vacancy_rate * units_sum - vacant_units_sum ) /
                                         ( 1 - target_vacancy_rate ) )))
            logger.log_status(project_type + ": vacant units: %d, should be vacant: %f, sum units: %d"
                          % (vacant_units_sum, target_vacancy_rate * units_sum, units_sum))

            if not should_develop_units:
                logger.log_note(("Will not build any " + project_type + " units, because the current vacancy of %d units\n"
                             + "is more than the %d units desired for the vacancy rate of %f.")
                            % (vacant_units_sum,
                               target_vacancy_rate * units_sum,
                               target_vacancy_rate))
            #create projects
            if should_develop_units > 0:
                this_project = self._create_projects(should_develop_units, project_type, project_type_id, history_table,
                                                               location_set, units_sum, resources)
                if projects is None:
                    projects = this_project
                else:
                    projects.join_by_rows(this_project, change_ids_if_not_unique=True)
        return projects

    
    def _compute_vacancy_and_total_units_variables(self, location_set, project_types, resources=None):
        compute_resources = Resources(resources)
        compute_resources.merge({"debug":self.debug})
        self.variable_for_vacancy = {}
        self.variable_for_total_units = {}
        for ptype in project_types:
            self.variable_for_vacancy[ptype] = compute_resources.get(
                                    "%s_vacant_variable" % ptype,
                                    "urbansim_zone.%s.vacant_%s" % (location_set.get_dataset_name(),
                                                                     self.project_specific_units[ptype]))
            self.variable_for_total_units[ptype] = compute_resources.get(
                                    "%s_total_units_variable" % ptype,
                                    "%s.aggregate(urbansim_zone.building.total_%s)" % (location_set.get_dataset_name(), 
                                                             self.project_specific_units[ptype]))
            location_set.compute_variables([self.variable_for_vacancy[ptype], self.variable_for_total_units[ptype]], 
                                           dataset_pool=self.dataset_pool, resources = compute_resources)
            
    def _create_projects(self, should_develop_units, project_type, project_type_id, history_table, location_set, units_sum, resources=None):
        history_values = history_table.get_attribute(self.project_units[project_type])
        type_code_values = history_table.get_change_type_code_attribute(self.project_units[project_type])
        # take only non-zero history values and those that don't represent demolished buildings 
        history_values_without_zeros = history_values[logical_and( history_values > 0, 
                                                                  type_code_values !=  DevelopmentEventTypeOfChange.DELETE)]
        mean_size = history_values_without_zeros.mean()
        idx = array( [], dtype="int32" )
        # Ensure that there are some development projects to choose from.
        num_of_projects_to_select = max( 10, round_( should_develop_units / mean_size ) )
        while True:
            idx = concatenate( ( idx, randint( 0, history_values_without_zeros.size,
                                                num_of_projects_to_select ) ) )
            csum = history_values_without_zeros[idx].cumsum()
            idx1 = idx[csum <= should_develop_units]
            if idx1.size == 0: # at least one project should be selected
                idx = array([idx[0]], dtype="int32")
            else:
                idx = idx1
            if csum[-1] >= should_develop_units:
                break
        data = {"residential_units": zeros( ( idx.size, ), dtype=int32),
                "non_residential_sqft": zeros( ( idx.size, ), dtype=int32),
                'building_type_id': array(idx.size* [project_type_id]),
                "project_id": arange( idx.size ) + 1,
                "building_id": zeros( ( idx.size, ), dtype=int32)}
        data[self.project_units[project_type]]= history_values_without_zeros[idx]
        storage = StorageFactory().get_storage('dict_storage')

        development_projects_table_name = 'development_projects'
        storage.write_table(table_name=development_projects_table_name, table_data=data)

        return Dataset(
            in_storage = storage,
            in_table_name = development_projects_table_name,
            id_name='project_id'
            )
    def run(self,
            n=500,
            realestate_dataset_name='building',
            current_year=None,
            occupied_spaces_variable="occupied_spaces",
            total_spaces_variable="total_spaces",
            run_config=None,
            debuglevel=0):
        """
        run method of the Development Project Proposal Sampling Model
        
        **Parameters**
        
            **n** : int, sample size for each iteration
                   
                   sample n proposals at a time, which are then evaluated one by one until the 
                   target vacancies are satisfied or proposals are running out
                   
            **realestate_dataset_name** : string, name of real estate dataset
            
            **current_year**: int, simulation year. If None, get value from SimulationState
            
            **occupied_spaces_variable** : string, variable name for calculating how much spaces are currently occupied
                                        
                                          It can either be a variable for real_estate dataset that returns 
                                          the amount spaces being occupied or a target_vacancy attribute 
                                          that contains the name of real_estate variables.   
            
            **total_spaces_variable** : string, variable name for calculating total existing spaces
            
        **Returns**
        
            **proposal_set** : indices to proposal_set that are accepted 
            
            **demolished_buildings** : buildings to be demolished for re-development
        """

        self.accepted_proposals = []
        self.demolished_buildings = []  #id of buildings to be demolished
        if self.proposal_set.n <= 0:
            logger.log_status(
                "The size of proposal_set is 0; no proposals to consider, skipping DPPSM."
            )
            return (self.proposal_set, self.demolished_buildings)

        target_vacancy = self.dataset_pool.get_dataset('target_vacancy')

        if current_year is None:
            year = SimulationState().get_current_time()
        else:
            year = current_year
        this_year_index = where(target_vacancy['year'] == year)[0]
        target_vacancy_for_this_year = DatasetSubset(target_vacancy,
                                                     this_year_index)
        if target_vacancy_for_this_year.size() == 0:
            raise IOError, 'No target vacancy defined for year %s.' % year

        ## current_target_vacancy.target_attribute_name = 'target_vacancy_rate'
        ## each column provides a category for which a target vacancy is specified
        self.column_names = list(set( target_vacancy.get_known_attribute_names() ) - \
                            set( [ target_vacancy.target_attribute_name,
                                   'year', '_hidden_id_',
                                   occupied_spaces_variable, total_spaces_variable
                                   ] )
                            )
        self.column_names.sort(reverse=True)

        ## buildings table provides existing stocks
        self.realestate_dataset = self.dataset_pool.get_dataset(
            realestate_dataset_name)

        occupied_spaces_variables = [occupied_spaces_variable]
        total_spaces_variables = [total_spaces_variable]
        if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(
        ):
            occupied_spaces_variables += unique(
                target_vacancy_for_this_year[occupied_spaces_variable]).tolist(
                )
        if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(
        ):
            total_spaces_variables += unique(
                target_vacancy_for_this_year[total_spaces_variable]).tolist()

        self._compute_variables_for_dataset_if_needed(
            self.realestate_dataset, self.column_names +
            occupied_spaces_variables + total_spaces_variables)
        self._compute_variables_for_dataset_if_needed(
            self.proposal_component_set,
            self.column_names + total_spaces_variables)
        self.proposal_set.compute_variables(
            "urbansim_parcel.development_project_proposal.number_of_components",
            dataset_pool=self.dataset_pool)

        n_column = len(self.column_names)
        target_vacancy_for_this_year.column_values = target_vacancy_for_this_year.get_multiple_attributes(
            self.column_names).reshape((-1, n_column))
        self.realestate_dataset.column_values = self.realestate_dataset.get_multiple_attributes(
            self.column_names).reshape((-1, n_column))
        self.proposal_component_set.column_values = self.proposal_component_set.get_multiple_attributes(
            self.column_names).reshape((-1, n_column))
        #defaults, can be changed later by spaces_variable specified in target_vacancy rates
        self.realestate_dataset.total_spaces = self.realestate_dataset[
            total_spaces_variable]
        self.proposal_component_set.total_spaces = self.proposal_component_set[
            total_spaces_variable]
        self.realestate_dataset.occupied_spaces = self.realestate_dataset[
            occupied_spaces_variable]

        self.accounting = {}
        self.logging = {}
        #has_needed_components = zeros(self.proposal_set.size(), dtype='bool')
        for index in range(target_vacancy_for_this_year.size()):
            column_value = tuple(
                target_vacancy_for_this_year.column_values[index, :].tolist())
            accounting = {
                'target_vacancy':
                target_vacancy_for_this_year[
                    target_vacancy.target_attribute_name][index]
            }

            realestate_indexes = self.get_index_by_condition(
                self.realestate_dataset.column_values, column_value)
            component_indexes = self.get_index_by_condition(
                self.proposal_component_set.column_values, column_value)

            this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable
            ## total/occupied_spaces_variable can be specified either as a universal name for all realestate
            ## or in targe_vacancy_rate dataset for each vacancy category
            if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(
            ):
                this_occupied_spaces_variable = target_vacancy_for_this_year[
                    occupied_spaces_variable][index]
                self.realestate_dataset.occupied_spaces[realestate_indexes] = (
                    self.realestate_dataset[this_occupied_spaces_variable]
                    [realestate_indexes]).astype(
                        self.realestate_dataset.occupied_spaces.dtype)

            if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(
            ):
                this_total_spaces_variable = target_vacancy_for_this_year[
                    total_spaces_variable][index]
                self.realestate_dataset.total_spaces[realestate_indexes] = (
                    self.realestate_dataset[this_total_spaces_variable]
                    [realestate_indexes]).astype(
                        self.realestate_dataset.total_spaces.dtype)
                self.proposal_component_set.total_spaces[component_indexes] = (
                    self.proposal_component_set[this_total_spaces_variable]
                    [component_indexes]).astype(
                        self.proposal_component_set.total_spaces.dtype)

            accounting["total_spaces_variable"] = this_total_spaces_variable
            accounting["total_spaces"] = self.realestate_dataset.total_spaces[
                realestate_indexes].sum()
            accounting[
                "occupied_spaces_variable"] = this_occupied_spaces_variable
            accounting[
                "occupied_spaces"] = self.realestate_dataset.occupied_spaces[
                    realestate_indexes].sum()
            accounting["target_spaces"] = int( round( accounting["occupied_spaces"] /\
                                                     (1 - accounting["target_vacancy"])
                                               ) )
            accounting["proposed_spaces"] = 0
            accounting["demolished_spaces"] = 0

            self.accounting[column_value] = accounting

            if self._is_target_reached(column_value):
                proposal_indexes = self.proposal_set.get_id_index(
                    unique(self.proposal_component_set['proposal_id']
                           [component_indexes]))
                single_component_indexes = where(
                    self.proposal_set["number_of_components"] == 1)[0]
                self.weight[intersect1d(proposal_indexes,
                                        single_component_indexes)] = 0.0

        ## handle planned proposals: all proposals with status_id == is_planned
        ## and start_year == year are accepted
        planned_proposal_indexes = where(
            logical_and(
                self.proposal_set.get_attribute("status_id") ==
                self.proposal_set.id_planned,
                self.proposal_set.get_attribute("start_year") == year))[0]

        self.consider_proposals(planned_proposal_indexes, force_accepting=True)

        # consider proposals (in this order: proposed, tentative)
        for status in [
                self.proposal_set.id_proposed, self.proposal_set.id_tentative
        ]:
            stat = (self.proposal_set.get_attribute("status_id") == status)
            if stat.sum() == 0:
                continue

            logger.log_status(
                "Sampling from %s eligible proposals of status %s." %
                (stat.sum(), status))
            iteration = 0
            while (not self._is_target_reached()):
                ## prevent proposals from being sampled for vacancy type whose target is reached
                #for column_value in self.accounting.keys():

                if self.weight[stat].sum() == 0.0:
                    logger.log_warning(
                        "Running out of proposals of status %s before vacancy targets are reached; there aren't any proposals with non-zero weight"
                        % status)
                    break

                available_indexes = where(logical_and(stat,
                                                      self.weight > 0))[0]
                sample_size = minimum(available_indexes.size, n)
                sampled_proposal_indexes = probsample_noreplace(
                    available_indexes,
                    sample_size,
                    prob_array=self.weight[available_indexes],
                    return_index=False)

                self.consider_proposals(sampled_proposal_indexes)
                self.weight[sampled_proposal_indexes] = 0
                #sample_size = 1
                #sampled_proposal_index = probsample_noreplace(available_indexes, sample_size,
                #prob_array=self.weight[available_indexes],
                #return_index=False)

                #self.consider_proposal(sampled_proposal_index)

                #self.weight[sampled_proposal_index] = 0
                iteration += 1

        self._log_status()

        # set status of accepted proposals to 'active'
        self.proposal_set.modify_attribute(name="status_id",
                                           data=self.proposal_set.id_active,
                                           index=array(self.accepted_proposals,
                                                       dtype='int32'))

        # Code added by Jesse Ayers, MAG, 7/20/2009
        # Get the active projects:
        stat_id = self.proposal_set.get_attribute('status_id')
        actv = where(stat_id == 1)[0]
        # Where there are active projects, compute the total_land_area_taken
        # and store it on the development_project_proposals dataset
        # so it can be used by the building_construction_model for the proper
        # computation of units_proposed for those projects with velocity curves
        if actv.size > 0:
            total_land_area_taken_computed = self.proposal_set.get_attribute(
                'urbansim_parcel.development_project_proposal.land_area_taken')
            self.proposal_set.modify_attribute(
                'total_land_area_taken', total_land_area_taken_computed[actv],
                actv)

        return (self.proposal_set, self.realestate_dataset.get_id_attribute()[
            self.demolished_buildings])
    def _do_run(self,
                location_set,
                agent_set,
                agents_index,
                data_objects=None,
                resources=None):
        location_id_name = location_set.get_id_name()[0]
        jobsubset = DatasetSubset(agent_set, agents_index)
        if jobsubset.size() <= 0:
            return array([], dtype='int32')
        #unplace jobs
        agent_set.set_values_of_one_attribute(
            location_id_name, resize(array([-1.0]), jobsubset.size()),
            agents_index)
        sector_ids = jobsubset.get_attribute("sector_id")
        sectors = unique(sector_ids)
        counts = ndimage_sum(ones((jobsubset.size(), )),
                             labels=sector_ids.astype('int32'),
                             index=sectors.astype('int32'))
        if sectors.size <= 1:
            counts = array([counts])
        variables = map(lambda x: "number_of_jobs_of_sector_" + str(int(x)),
                        sectors)
        compute_variables = map(
            lambda var: self.variable_package + "." + location_set.
            get_dataset_name() + "." + var, variables)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.dataset_pool.add_datasets_if_not_included(
            {agent_set.get_dataset_name(): agent_set})
        location_set.compute_variables(compute_variables,
                                       dataset_pool=self.dataset_pool)
        if self.filter is None:
            location_index = arange(location_set.size())
        else:
            filter_values = location_set.compute_variables(
                [self.filter], dataset_pool=self.dataset_pool)
            location_index = where(filter_values > 0)[0]
        if location_index.size <= 0:
            logger.log_status("No locations available. Nothing to be done.")
            return array([])
        location_subset = DatasetSubset(location_set, location_index)
        i = 0
        for sector in sectors:
            distr = location_subset.get_attribute(variables[i])
            if ma.allclose(distr.sum(), 0):
                uniform_prob = 1.0 / distr.size
                distr = resize(array([uniform_prob], dtype='float64'),
                               distr.size)
                logger.log_warning(
                    "Probabilities in scaling model for sector " +
                    str(sector) +
                    " sum to 0.0.  Substituting uniform distribution!")


#                random_sample = sample(location_set.get_attribute("grid_id"), k=int(counts[i]), \
#                                   probabilities = distr)
            distr = distr / float(distr.sum())
            random_sample = probsample_replace(
                location_subset.get_id_attribute(),
                size=int(counts[i]),
                prob_array=distr)
            idx = where(sector_ids == sector)[0]
            #modify job locations
            agent_set.set_values_of_one_attribute(location_id_name,
                                                  random_sample,
                                                  agents_index[idx])
            i += 1
        return agent_set.get_attribute_by_index(location_id_name, agents_index)