Exemple #1
0
    def run( self, vacancy_table, history_table, year, location_set, dataset_pool=None, resources=None ):
        self.dataset_pool=dataset_pool
        building_types = self.dataset_pool.get_dataset('building_type')
        target_vacancy_this_year = DatasetSubset(vacancy_table, index=where(vacancy_table.get_attribute("year")==year)[0])
        building_type_ids = target_vacancy_this_year.get_attribute('building_type_id')
        building_type_idx = building_types.get_id_index(building_type_ids)
        self.used_building_types = DatasetSubset(building_types, index=building_type_idx)
        project_types =  self.used_building_types.get_attribute('building_type_name')
        is_residential = self.used_building_types.get_attribute('is_residential')
        unit_names =  where(is_residential, 'residential_units', 'non_residential_sqft')
        specific_unit_names =  where(is_residential, 'residential_units', '_sqft')
        rates =  target_vacancy_this_year.get_attribute('target_total_vacancy')
        self.project_units = {}
        self.project_specific_units = {}
        target_rates = {}
        for i in range(self.used_building_types.size()):
            self.project_units[project_types[i]] = unit_names[i]
            if is_residential[i]:
                self.project_specific_units[project_types[i]] = specific_unit_names[i]
            else:
                self.project_specific_units[project_types[i]] = "%s%s" % (project_types[i], specific_unit_names[i])
            target_rates[building_type_ids[i]] = rates[i]
            
        self._compute_vacancy_and_total_units_variables(location_set, project_types, resources)
        self.pre_check( location_set, target_vacancy_this_year, project_types)
    
        projects = None
        for project_type_id, target_vacancy_rate in target_rates.iteritems():
            # determine current-year vacancy rates
            project_type = building_types.get_attribute_by_id('building_type_name', project_type_id)
            vacant_units_sum = location_set.get_attribute(self.variable_for_vacancy[project_type]).sum()
            units_sum = float( location_set.get_attribute(self.variable_for_total_units[project_type]).sum() )
            should_develop_units = int(round(max( 0, ( target_vacancy_rate * units_sum - vacant_units_sum ) /
                                         ( 1 - target_vacancy_rate ) )))
            logger.log_status(project_type + ": vacant units: %d, should be vacant: %f, sum units: %d"
                          % (vacant_units_sum, target_vacancy_rate * units_sum, units_sum))

            if not should_develop_units:
                logger.log_note(("Will not build any " + project_type + " units, because the current vacancy of %d units\n"
                             + "is more than the %d units desired for the vacancy rate of %f.")
                            % (vacant_units_sum,
                               target_vacancy_rate * units_sum,
                               target_vacancy_rate))
            #create projects
            if should_develop_units > 0:
                this_project = self._create_projects(should_develop_units, project_type, project_type_id, history_table,
                                                               location_set, units_sum, resources)
                if projects is None:
                    projects = this_project
                else:
                    projects.join_by_rows(this_project, change_ids_if_not_unique=True)
        return projects
    def run( self, model_configuration, vacancy_table, history_table, year, 
             location_set, resources=None):
        large_area_ids = vacancy_table.get_attribute("large_area_id")
        locations_large_area_ids = location_set.compute_variables("washtenaw.%s.large_area_id" % location_set.get_dataset_name())
        unique_large_areas = unique(large_area_ids)
        self._compute_vacancy_variables(location_set, 
                                        model_configuration['development_project_types'], 
                                        resources)

        projects = {}
        for area in unique_large_areas:
            location_index = where(locations_large_area_ids == area)[0]
            locations_for_this_area = DatasetSubset(location_set, location_index)
            logger.log_status("DPLCM for area %s", area)
            target_residential_vacancy_rate, target_non_residential_vacancy_rate = self._get_target_vacancy_rates(vacancy_table, year, area)
            for project_type in model_configuration['development_project_types']:
                # determine current-year vacancy rates
                vacant_units_sum = locations_for_this_area.get_attribute(self.variable_for_vacancy[project_type]).sum()
                units_sum = float( locations_for_this_area.get_attribute(self.units_variable[project_type]).sum() )
                vacant_rate = self.safe_divide(vacant_units_sum, units_sum)
                if model_configuration['development_project_types'][project_type]['residential']:
                    target_vacancy_rate = target_residential_vacancy_rate
                else:
                    target_vacancy_rate = target_non_residential_vacancy_rate
                should_develop_units = int(round(max( 0, ( target_vacancy_rate * units_sum - vacant_units_sum ) /
                                             ( 1 - target_vacancy_rate ) )))
                logger.log_status(project_type + ": vacant units: %d, should be vacant: %f, sum units: %d, will develop: %d"
                          % (vacant_units_sum, target_vacancy_rate * units_sum, units_sum, should_develop_units))
                #create projects
                if should_develop_units > 0:
                    project_dataset = self._create_projects(should_develop_units, project_type, history_table,
                                                                   locations_for_this_area, units_sum, 
                                                                   model_configuration['development_project_types'], 
                                                                   resources)
                    project_dataset.add_attribute(array(project_dataset.size()*[area]), "large_area_id", 
                                                  metadata=AttributeType.PRIMARY)
                    if (project_type not in projects.keys()) or (projects[project_type] is None):
                        projects[project_type] = project_dataset
                    else:
                        projects[project_type].join_by_rows(project_dataset, change_ids_if_not_unique=True)
 
        for project_type in model_configuration['development_project_types']:
            if project_type not in projects.keys():
                projects[project_type] = None
            if projects[project_type] is None:
                size = 0
            else:
                projects[project_type].add_submodel_categories()
                size = projects[project_type].size()
            logger.log_status("%s %s projects to be built" % (size, project_type))  
        return projects
Exemple #3
0
 def estimate_mu(self):
     iout = -1
     self.values_from_mr = {}
     for quantity in self.observed_data.get_quantity_objects():
         dataset_name = quantity.get_dataset_name()
         variable = quantity.get_variable_name()
         iout += 1
         dimension_reduced = False
         quantity_ids = quantity.get_dataset().get_id_attribute()
         for i in range(self.number_of_runs):
             ds = self._compute_variable_for_one_run(i, variable, dataset_name, self.get_calibration_year(), quantity)
             if isinstance(ds, InteractionDataset):
                 ds = ds.get_flatten_dataset()
             if i == 0: # first run
                 self.mu[iout] = zeros((self.y[iout].size, self.number_of_runs), dtype=float32)
                 ids = ds.get_id_attribute()
             else:
                 if ds.size() > ids.shape[0]:
                     ds = DatasetSubset(ds, ds.get_id_index(ids))
                     dimension_reduced = True
             scale = self.get_scales(ds, i+1, variable)
             matching_index = ds.get_id_index(quantity_ids)
             values = scale[matching_index] * ds.get_attribute(variable)[matching_index]
             self.mu[iout][:,i] = try_transformation(values, quantity.get_transformation())
             
         self.values_from_mr[variable.get_expression()] = self.mu[iout]
         if dimension_reduced:
             self.y[iout] = self.y[iout][quantity.get_dataset().get_id_index(ids)]
 def choose_agents_to_move_from_overfilled_locations(self, capacity,
                                                     agent_set, agents_index, agents_locations):
     """Agents with the smallest number of units should move again.
     """
     if capacity is None:
         return array([], dtype='int32')
     index_valid_agents_locations = where(agents_locations > 0)[0]
     valid_agents_locations = agents_locations[index_valid_agents_locations].astype("int32")
     unique_locations = unique(valid_agents_locations).astype("int32")
     index_consider_capacity = self.choice_set.get_id_index(unique_locations)
     capacity_of_affected_locations = capacity[index_consider_capacity]
     overfilled = where(capacity_of_affected_locations < 0)[0]
     movers = array([], dtype='int32')
     indexed_individuals = DatasetSubset(agent_set, agents_index[index_valid_agents_locations])
     ordered_agent_indices = self.get_agents_order(indexed_individuals)
     sizes = indexed_individuals.get_attribute(self.units_full_name)[ordered_agent_indices]
     choice_ids = self.choice_set.get_id_attribute()
     for loc in overfilled:
         agents_to_move = where(valid_agents_locations == choice_ids[index_consider_capacity[loc]])[0]
         if agents_to_move.size > 0:
             n = int(-1*capacity_of_affected_locations[loc])
             this_sizes = sizes[agents_to_move]
             csum = this_sizes[arange(this_sizes.size-1,-1,-1)].cumsum() # ordered increasingly
             csum = csum[arange(csum.size-1, -1,-1)] # ordered back decreasingly
             w = where(csum < n)[0]
             if w.size < agents_to_move.size: #add one more agent in order the cumsum be larger than n
                 w = concatenate((array([agents_to_move.size-w.size-1]), w))
             idx = ordered_agent_indices[agents_to_move[w]]
             movers = concatenate((movers, idx))
     return movers
Exemple #5
0
    def _do_run(self, location_set, agent_set, agents_index, data_objects=None, resources=None):
        location_id_name = location_set.get_id_name()[0]
        jobsubset = DatasetSubset(agent_set, agents_index)
        if jobsubset.size() <= 0:
            return array([], dtype='int32')
        #unplace jobs
        agent_set.set_values_of_one_attribute(location_id_name, 
                                              resize(array([-1.0]), jobsubset.size()), agents_index)
        sector_ids = jobsubset.get_attribute("sector_id")
        sectors = unique(sector_ids)
        counts = ndimage_sum(ones((jobsubset.size(),)), labels=sector_ids.astype('int32'), index=sectors.astype('int32'))
        if sectors.size <=1 :
            counts = array([counts])
        variables = map(lambda x: "number_of_jobs_of_sector_"+str(int(x)), sectors)
        compute_variables = map(lambda var: self.variable_package + "." + 
            location_set.get_dataset_name()+ "." + var, variables)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.dataset_pool.add_datasets_if_not_included({agent_set.get_dataset_name():agent_set})
        location_set.compute_variables(compute_variables, dataset_pool=self.dataset_pool)
        if self.filter is None:
            location_index = arange(location_set.size())
        else:
            filter_values = location_set.compute_variables([self.filter], dataset_pool=self.dataset_pool)
            location_index = where(filter_values > 0)[0]
        if location_index.size <= 0:
            logger.log_status("No locations available. Nothing to be done.")
            return array([])
        location_subset = DatasetSubset(location_set, location_index)
        i=0
        for sector in sectors:
            distr = location_subset.get_attribute(variables[i])
            if ma.allclose(distr.sum(), 0):
                uniform_prob = 1.0/distr.size
                distr = resize(array([uniform_prob], dtype='float64'), distr.size)
                logger.log_warning("Probabilities in scaling model for sector " + str(sector) + " sum to 0.0.  Substituting uniform distribution!")
#                random_sample = sample(location_set.get_attribute("grid_id"), k=int(counts[i]), \
#                                   probabilities = distr)
            distr = distr/float(distr.sum())
            random_sample = probsample_replace(location_subset.get_id_attribute(), size=int(counts[i]), 
                                       prob_array=distr)
            idx = where(sector_ids == sector)[0]
            #modify job locations
            agent_set.set_values_of_one_attribute(location_id_name, random_sample, agents_index[idx])
            i+=1
        return agent_set.get_attribute_by_index(location_id_name, agents_index)
    def run(self, year=None,
            dataset_pool=None,  **kwargs):
        """
        """
        if dataset_pool is None:
            dataset_pool = SessionConfiguration().get_dataset_pool()

        if year is None:
            year = SimulationState().get_current_time()
        
        this_year_index = where(self.scheduled_events.get_attribute('year')==year)[0]
        scheduled_events_for_this_year = DatasetSubset(self.scheduled_events, this_year_index)
        scheduled_events_for_this_year.load_dataset_if_not_loaded()
        column_names = list(set( self.scheduled_events.get_known_attribute_names() ) - set( [ 'year', 'action', 'attribute', 'amount', 'event_id', '_hidden_id_'] ))
        column_names.sort()
#        column_values = dict([ (name, scheduled_events_for_this_year.get_attribute(name)) for name in column_names])
        
        for index in range(scheduled_events_for_this_year.size()):
            indicator = ones( self.dataset.size(), dtype='bool' )
            event_attr = {}
            for attribute in column_names:
                if attribute in self.dataset.get_known_attribute_names():
                    dataset_attribute = self.dataset.get_attribute(attribute)
                else:
                    ## this is done inside the loop because some action may delete computed attributes, such as dataset.add_elements()
                    try:
                        dataset_attribute = self.dataset.compute_one_variable_with_unknown_package(attribute, dataset_pool=dataset_pool)
                    except:
                        raise ValueError, "attribute %s used in scheduled events dataset can not be found in dataset %s" % (attribute, self.dataset.get_dataset_name())
                
#                if attribute in column_names: 
                aval = scheduled_events_for_this_year.get_attribute(attribute)[index]
                if aval == -1:
                    continue    # ignore if column value is -1
                else:
                    indicator *= dataset_attribute == aval
                    event_attr.update({attribute:aval})
            
            #agents in dataset satisfying all conditions are identified by indicator
            legit_index = where(indicator)[0]
            
            this_event = scheduled_events_for_this_year.get_data_element(index)
            if not hasattr(this_event, 'attribute'):
                action_attr_name = ''
            else:
                action_attr_name = this_event.attribute
            action_function = getattr(self, '_' + this_event.action.strip().lower())
            action_function( amount=this_event.amount,
                             attribute=action_attr_name,
                             dataset=self.dataset, 
                             index=legit_index,
                             data_dict=event_attr )
            
            self.post_run(self.dataset, legit_index, **kwargs)

        return self.dataset
Exemple #7
0
 def choose_agents_to_move_from_overfilled_locations(
         self, capacity, agent_set, agents_index, agents_locations):
     """Agents with the smallest number of units should move again.
     """
     if capacity is None:
         return array([], dtype='int32')
     index_valid_agents_locations = where(agents_locations > 0)[0]
     valid_agents_locations = agents_locations[
         index_valid_agents_locations].astype("int32")
     unique_locations = unique(valid_agents_locations).astype("int32")
     index_consider_capacity = self.choice_set.get_id_index(
         unique_locations)
     capacity_of_affected_locations = capacity[index_consider_capacity]
     overfilled = where(capacity_of_affected_locations < 0)[0]
     movers = array([], dtype='int32')
     indexed_individuals = DatasetSubset(
         agent_set, agents_index[index_valid_agents_locations])
     ordered_agent_indices = self.get_agents_order(indexed_individuals)
     sizes = indexed_individuals.get_attribute(
         self.units_full_name)[ordered_agent_indices]
     choice_ids = self.choice_set.get_id_attribute()
     for loc in overfilled:
         agents_to_move = where(valid_agents_locations == choice_ids[
             index_consider_capacity[loc]])[0]
         if agents_to_move.size > 0:
             n = int(-1 * capacity_of_affected_locations[loc])
             this_sizes = sizes[agents_to_move]
             csum = this_sizes[arange(this_sizes.size - 1, -1,
                                      -1)].cumsum()  # ordered increasingly
             csum = csum[arange(csum.size - 1, -1,
                                -1)]  # ordered back decreasingly
             w = where(csum < n)[0]
             if w.size < agents_to_move.size:  #add one more agent in order the cumsum be larger than n
                 w = concatenate(
                     (array([agents_to_move.size - w.size - 1]), w))
             idx = ordered_agent_indices[agents_to_move[w]]
             movers = concatenate((movers, idx))
     return movers
    def run(self, realestate_dataset,
            year=None, 
            occupied_spaces_variable="occupied_units",
            total_spaces_variable="total_units",
            target_attribute_name='target_vacancy_rate',
            sample_from_dataset = None,
            sample_filter="",
            reset_attribute_value={}, 
            year_built = 'year_built',
            dataset_pool=None,
            append_to_realestate_dataset = False,
            table_name = "development_projects",
            dataset_name = "development_project",
            id_name = [],
            **kwargs):
        """         
        sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning
        append_to_realestate_dataset - whether to append the new dataset to realestate_dataset
        """
        
        if self.target_vancy_dataset is None:
            raise RuntimeError, "target_vacancy_rate dataset is unspecified."
        
        if not sample_from_dataset:
            sample_from_dataset = realestate_dataset
            
        #if dataset_pool is None:
        #    dataset_pool = SessionConfiguration().get_dataset_pool()
        if year is None:
            year = SimulationState().get_current_time()
        this_year_index = where(self.target_vancy_dataset.get_attribute('year')==year)[0]
        target_vacancy_for_this_year = DatasetSubset(self.target_vancy_dataset, this_year_index)
        
        column_names = list(set( self.target_vancy_dataset.get_known_attribute_names() ) - set( [ target_attribute_name, occupied_spaces_variable, total_spaces_variable, 'year', '_hidden_id_'] ))
        column_names.sort(reverse=True)
        column_values = dict([ (name, target_vacancy_for_this_year.get_attribute(name)) for name in column_names + [target_attribute_name]])
        
        independent_variables = list(set([re.sub('_max$', '', re.sub('_min$', '', col)) for col in column_names]))
        dataset_known_attributes = realestate_dataset.get_known_attribute_names()
        for variable in independent_variables:
            if variable not in dataset_known_attributes:
                realestate_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)
                sample_from_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)
                
        dataset_known_attributes = realestate_dataset.get_known_attribute_names() #update after compute
        if sample_filter:
            short_name = VariableName(sample_filter).get_alias()
            if short_name not in dataset_known_attributes:
                filter_indicator = sample_from_dataset.compute_variables(sample_filter, dataset_pool=dataset_pool)
            else:
                filter_indicator = sample_from_dataset.get_attribute(short_name)
        else:
            filter_indicator = 1
                
        sampled_index = array([], dtype=int32)

        #log header
        if PrettyTable is not None:
            status_log = PrettyTable()
            status_log.set_field_names(column_names + ["actual", "target", "difference", "action"])
        else:
            logger.log_status("\t".join(column_names + ["actual", "target", "difference", "action"]))
        error_log = ''
        for index in range(target_vacancy_for_this_year.size()):
            this_sampled_index = array([], dtype=int32)
            indicator = ones( realestate_dataset.size(), dtype='bool' )
            sample_indicator = ones( sample_from_dataset.size(), dtype='bool' )
            criterion = {}   # for logging
            for attribute in independent_variables:
                if attribute in dataset_known_attributes:
                    dataset_attribute = realestate_dataset.get_attribute(attribute)
                    sample_attribute = sample_from_dataset.get_attribute(attribute)
                else:
                    raise ValueError, "attribute %s used in target vacancy dataset can not be found in dataset %s" % (attribute, realestate_dataset.get_dataset_name())
                
                if attribute + '_min' in column_names:
                    amin = target_vacancy_for_this_year.get_attribute(attribute+'_min')[index] 
                    criterion.update({attribute + '_min':amin})
                    if amin != -1:
                        indicator *= dataset_attribute >= amin
                        sample_indicator *= sample_attribute >= amin
                if attribute + '_max' in column_names: 
                    amax = target_vacancy_for_this_year.get_attribute(attribute+'_max')[index]
                    criterion.update({attribute + '_max':amax}) 
                    if amax != -1:
                        indicator *= dataset_attribute <= amax
                        sample_indicator *= sample_attribute <= amax
                if attribute in column_names: 
                    aval = column_values[attribute][index] 
                    criterion.update({attribute:aval}) 
                    if aval == -1:
                        continue
                    elif aval == -2:  ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column
                        indicator *= logical_not(ismember(dataset_attribute, column_values[attribute]))
                        sample_indicator *= logical_not(ismember(sample_attribute, column_values[attribute]))
                    else:
                        indicator *= dataset_attribute == aval
                        sample_indicator *= sample_attribute == aval
                        
            this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable
            ## total/occupied_spaces_variable can be specified either as a universal name for all realestate 
            ## or in targe_vacancy_rate dataset for each vacancy category
            if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_occupied_spaces_variable = target_vacancy_for_this_year.get_attribute(occupied_spaces_variable)[index]

            if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_total_spaces_variable = target_vacancy_for_this_year.get_attribute(total_spaces_variable)[index]
            
            logger.be_quiet() #temporarily disable logging
            realestate_dataset.compute_one_variable_with_unknown_package(this_occupied_spaces_variable, dataset_pool=dataset_pool)
            realestate_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool)
            sample_from_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool)
            logger.talk()
            
            actual_num = (indicator * realestate_dataset.get_attribute(this_total_spaces_variable)).sum()
            target_num = int(round( (indicator * realestate_dataset.get_attribute(this_occupied_spaces_variable)).sum() /\
                                    (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]) 
                            ))
            diff = target_num - actual_num
            if diff > 0:
                total_spaces_in_sample_dataset = sample_from_dataset.get_attribute(this_total_spaces_variable)
                legit_index = where(logical_and(sample_indicator, filter_indicator) * total_spaces_in_sample_dataset > 0)[0]
                if legit_index.size > 0:
                    mean_size = total_spaces_in_sample_dataset[legit_index].mean()
                    num_of_projects_to_sample = int( diff / mean_size )
                    while total_spaces_in_sample_dataset[this_sampled_index].sum() < diff:
                        lucky_index = sample_replace(legit_index, num_of_projects_to_sample)
                        this_sampled_index = concatenate((this_sampled_index, lucky_index))
                    this_sampled_index = this_sampled_index[0:(1+searchsorted(cumsum(total_spaces_in_sample_dataset[this_sampled_index]), diff))]
                    sampled_index = concatenate((sampled_index, this_sampled_index))
                else:
                    error_log += "There is nothing to sample from %s and no new development will happen for " % sample_from_dataset.get_dataset_name() + \
                              ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n'
            #if diff < 0: #TODO demolition; not yet supported
            
            ##log status
            action = "0"
            if this_sampled_index.size > 0:
                action_num = total_spaces_in_sample_dataset[this_sampled_index].sum()
                if diff > 0: action = "+" + str(action_num)
                if diff < 0: action = "-" + str(action_num)
            cat = [ str(criterion[col]) for col in column_names]
            cat += [str(actual_num), str(target_num), str(diff), action]
            
            if PrettyTable is not None:
                status_log.add_row(cat)
            else:                
                logger.log_status("\t".join(cat))
            
        if PrettyTable is not None:
            logger.log_status("\n" + status_log.get_string())
        if error_log:
            logger.log_error(error_log)
            
        result_data = {}
        result_dataset = None
        index = array([], dtype='int32')
        if sampled_index.size > 0:
            ### ideally duplicate_rows() is all needed to add newly cloned rows
            ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data
            ##realestate_dataset.duplicate_rows(sampled_index)
            result_data.setdefault(year_built, resize(year, sampled_index.size).astype('int32'))
            for attribute in sample_from_dataset.get_primary_attribute_names():
                if reset_attribute_value.has_key(attribute):
                    result_data[attribute] = resize(array(reset_attribute_value[attribute]), sampled_index.size)
                else:
                    result_data[attribute] = sample_from_dataset.get_attribute_by_index(attribute, sampled_index)
        
            storage = StorageFactory().get_storage('dict_storage')
            storage.write_table(table_name=table_name, table_data=result_data)
    
            result_dataset = Dataset(id_name = id_name,
                                      in_storage = storage,
                                      in_table_name = table_name,
                                      dataset_name = dataset_name
                                      )
            index = arange(result_dataset.size())
            
        if append_to_realestate_dataset:
            if len(result_data) > 0:
                index = realestate_dataset.add_elements(result_data, require_all_attributes=False,
                                                        change_ids_if_not_unique=True)                
            result_dataset = realestate_dataset
        
        return (result_dataset, index)
class EmploymentTransitionModel(Model):
    """Creates and removes jobs from job_set."""

    model_name = "Employment Transition Model"
    location_id_name_default = "grid_id"
    variable_package_default = "urbansim"

    def __init__(self, location_id_name=None, variable_package=None, dataset_pool=None, debuglevel=0):
        self.debug = DebugPrinter(debuglevel)
        self.location_id_name = self.location_id_name_default
        self.variable_package = self.variable_package_default
        if location_id_name is not None:
            self.location_id_name = location_id_name
        if variable_package is not None:
            self.variable_package = variable_package
        self.dataset_pool = self.create_dataset_pool(dataset_pool, ["urbansim", "opus_core"])

    def run(self, year, job_set, control_totals, job_building_types, data_objects=None, resources=None):
        self._do_initialize_for_run(job_set, job_building_types, data_objects)
        idx = where(control_totals.get_attribute("year")==year)[0]
        self.control_totals_for_this_year = DatasetSubset(control_totals, idx)
        self._do_run_for_this_year(job_set)
        return self._update_job_set(job_set)
        
    def _do_initialize_for_run(self, job_set, job_building_types, data_objects=None):
        self.max_id = job_set.get_id_attribute().max()
        self.job_size = job_set.size()
        self.job_id_name = job_set.get_id_name()[0]
        self.new_jobs = {
            self.location_id_name:array([], dtype=job_set.get_data_type(self.location_id_name, int32)),
            "sector_id":array([], dtype=job_set.get_data_type("sector_id", int32)),
            self.job_id_name:array([], dtype=job_set.get_data_type(self.job_id_name, int32)),
            "building_type":array([], dtype=job_set.get_data_type("building_type", int8))
                    }
        self.remove_jobs = array([], dtype=int32)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.dataset_pool.add_datasets_if_not_included({job_building_types.get_dataset_name():job_building_types})
        self.available_building_types = job_building_types.get_id_attribute()

    def _compute_sector_variables(self, sectors, job_set):
        compute_resources = Resources({"debug":self.debug})
        job_set.compute_variables(
            map(lambda x: "%s.%s.is_in_employment_sector_%s_home_based"
                    % (self.variable_package, job_set.get_dataset_name(), x),
                sectors) +
            map(lambda x: "%s.%s.is_in_employment_sector_%s_non_home_based"
                    % (self.variable_package, job_set.get_dataset_name(), x),
                sectors) + ["is_non_home_based_job", "is_home_based_job"],
            dataset_pool = self.dataset_pool,
            resources = compute_resources)
        
    def _do_run_for_this_year(self, job_set):
        building_type = job_set.get_attribute("building_type")
        sectors = unique(self.control_totals_for_this_year.get_attribute("sector_id"))
        self._compute_sector_variables(sectors, job_set)
        for sector in sectors:
            isector = where(self.control_totals_for_this_year.get_attribute("sector_id") == sector)[0]
            total_hb_jobs = self.control_totals_for_this_year.get_attribute("total_home_based_employment")[isector]
            total_nhb_jobs = self.control_totals_for_this_year.get_attribute("total_non_home_based_employment")[isector]
            is_in_sector_hb = job_set.get_attribute("is_in_employment_sector_%s_home_based" % sector)
            is_in_sector_nhb = job_set.get_attribute("is_in_employment_sector_%s_non_home_based" % sector)
            diff_hb = int(total_hb_jobs - is_in_sector_hb.astype(int8).sum())
            diff_nhb = int(total_nhb_jobs - is_in_sector_nhb.astype(int8).sum())
            if diff_hb < 0: # home based jobs to be removed
                w = where(is_in_sector_hb == 1)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(job_set, w, -1*diff_hb,
                                                         self.location_id_name)
                self.remove_jobs = concatenate((self.remove_jobs, non_placed,
                                           sample_noreplace(sample_array, max(0,abs(diff_hb)-size_non_placed))))
            if diff_nhb < 0: # non home based jobs to be removed
                w = where(is_in_sector_nhb == 1)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(job_set, w, -1*diff_nhb,
                                                         self.location_id_name)
                self.remove_jobs = concatenate((self.remove_jobs, non_placed,
                                           sample_noreplace(sample_array, max(0,abs(diff_nhb)-size_non_placed))))

            if diff_hb > 0: # home based jobs to be created
                self.new_jobs[self.location_id_name] = concatenate((self.new_jobs[self.location_id_name],
                                   zeros((diff_hb,), dtype=self.new_jobs[self.location_id_name].dtype.type)))
                self.new_jobs["sector_id"] = concatenate((self.new_jobs["sector_id"],
                                   (resize(array([sector], dtype=self.new_jobs["sector_id"].dtype.type), diff_hb))))
                if 1 in is_in_sector_hb:
                    building_type_distribution = array(ndimage_sum(is_in_sector_hb,
                                                                    labels=building_type,
                                                                    index=self.available_building_types))
                elif 1 in job_set.get_attribute("is_home_based_job"): # take the building type distribution from the whole region
                    building_type_distribution = array(ndimage_sum(
                                                                job_set.get_attribute("is_home_based_job"),
                                                                labels=building_type,
                                                                index=self.available_building_types))
                else: # there are no home-based jobs in the region, take uniform distribution
                    building_type_distribution = ones(self.available_building_types.size)
                    building_type_distribution = building_type_distribution/building_type_distribution.sum()
                sampled_building_types = probsample_replace(
                    self.available_building_types, diff_hb, building_type_distribution/
                    float(building_type_distribution.sum()))
                self.new_jobs["building_type"] = concatenate((self.new_jobs["building_type"],
                            sampled_building_types.astype(self.new_jobs["building_type"].dtype.type)))
                new_max_id = self.max_id + diff_hb
                self.new_jobs[self.job_id_name] = concatenate((self.new_jobs[self.job_id_name],
                                                     arange(self.max_id+1, new_max_id+1)))
                self.max_id = new_max_id

            if diff_nhb > 0: # non home based jobs to be created
                self.new_jobs[self.location_id_name]=concatenate((self.new_jobs[self.location_id_name],
                                     zeros((diff_nhb,), dtype=self.new_jobs[self.location_id_name].dtype.type)))
                self.new_jobs["sector_id"]=concatenate((self.new_jobs["sector_id"],
                                           (resize(array([sector], dtype=self.new_jobs["sector_id"].dtype.type), diff_nhb))))
                if 1 in is_in_sector_nhb:
                    building_type_distribution = array(ndimage_sum(is_in_sector_nhb,
                                                                    labels=building_type,
                                                                    index=self.available_building_types))
                elif 1 in job_set.get_attribute("is_non_home_based_job"): # take the building type distribution from the whole region
                    building_type_distribution = array(ndimage_sum(
                                                        job_set.get_attribute("is_non_home_based_job"),
                                                        labels=building_type,
                                                        index=self.available_building_types))
                else: # there are no non-home-based jobs in the region, take uniform distribution
                    building_type_distribution = ones(self.available_building_types.size)
                    building_type_distribution = building_type_distribution/building_type_distribution.sum()
                sampled_building_types = probsample_replace(
                    self.available_building_types, diff_nhb, building_type_distribution/
                    float(building_type_distribution.sum()))
                self.new_jobs["building_type"] = concatenate((self.new_jobs["building_type"],
                                        sampled_building_types.astype(self.new_jobs["building_type"].dtype.type)))
                new_max_id = self.max_id+diff_nhb
                self.new_jobs[self.job_id_name]=concatenate((self.new_jobs[self.job_id_name], arange(self.max_id+1, 
                                                                                                     new_max_id+1)))
                self.max_id = new_max_id

    def _update_job_set(self, job_set):
        job_set.remove_elements(self.remove_jobs)
        job_set.add_elements(self.new_jobs, require_all_attributes=False)
        difference = job_set.size()-self.job_size
        self.debug.print_debug("Difference in number of jobs: %s (original %s,"
            " new %s, created %s, deleted %s)"
                % (difference,
                   self.job_size,
                   job_set.size(),
                   self.new_jobs[self.job_id_name].size,
                   self.remove_jobs.size),
            3)
        self.debug.print_debug("Number of unplaced jobs: %s"
            % where(job_set.get_attribute(self.location_id_name) <=0)[0].size,
            3)
        return difference

    def prepare_for_run(self, storage, **kwargs):
        from urbansim.datasets.control_total_dataset import ControlTotalDataset
        control_totals = ControlTotalDataset(in_storage=storage, what="employment")
        sample_control_totals(storage, control_totals, **kwargs)
        return control_totals
    def run(
        self,
        percent_active_development=100,
        build_minimum_units=False,
        year=None,
        start_year=None,
        dataset_pool=None,
        capacity_this_year_variable="mag_zone.active_development.capacity_this_year",
    ):
        # General TODO:
        #    - deal w/ "other_spaces" columns
        #    - look at generalizing the weight used when building units
        #    - build unit test for minimum build feature

        # LIST OF MODEL ASSUMPTIONS:
        #    - TODO: can i generalize the need for these pre-defined variables?
        #    - the model expects variables to exist that correspond to this naming pattern
        #      for every is_developing building_type_name in the building_types dataset:
        #        - total_<building_type_name>_units_col
        #        - occupied_<building_type_name>_units_col
        #    - building_type_name must be unique, lowercase, contain no spaces
        #    - target_vacancy.is_developing defines which building_types are considered

        # Minimum build feature
        #    - The user can specify 2 additional columns in the building_types dataset:
        #        - adm_minimum_annual_build_units
        #        - adm_minimum_annual_build_max_year
        #    - If these fields are present, and the "build_minimum_units" run option is set to True
        #        - The model will utilize the information in the fields to build the minimum # of units annually
        #          specified in the building_types table up to the maximum year specified in the table.  This feature
        #          is designed to simulate the case when demand is too low to build new units, some will be built anyway

        # CURRENT LIST OF KNOWN ISSUES:
        #    -

        # Get current simulation year
        if year is None:
            simulation_year = SimulationState().get_current_time()
        else:
            simulation_year = year

        # only run if start_year
        if start_year:
            if start_year > simulation_year:
                return

        # Get the percent_active_development
        # convert it to a float
        percent_active_development = percent_active_development / 100.0

        # Get the dataset pool
        if dataset_pool is None:
            dataset_pool = SessionConfiguration().get_dataset_pool()
        else:
            dataset_pool = dataset_pool

        # get the active_developments dataset, subset it for actually active projects
        # compute some variables
        developments_dataset = dataset_pool.get_dataset("active_developments")
        active_developments_capacity = developments_dataset.compute_variables([capacity_this_year_variable])
        # TODO: need to further filter active developments, not only by start_year<=simulation_year,
        #       but also by whether they are built out, etc.
        active_developments_index = where(developments_dataset.get_attribute("start_year") <= simulation_year)[0]
        active_developments_capacity_this_year = active_developments_capacity[active_developments_index]
        # debug help
        self.debug.print_debug("\n*** BEGIN DEBUG INFO:", 1)
        self.debug.print_debug("len(active_developments_index) = %s" % len(active_developments_index), 1)
        self.debug.print_debug("len(active_developments_index) = %s" % len(active_developments_index), 1)
        self.debug.print_debug(
            "len(active_developments_capacity_this_year) = %s" % len(active_developments_capacity_this_year), 1
        )
        self.debug.print_debug("END DEBUG INFO ***\n", 1)

        # get the target_vacancy_rates dataset
        target_vacancy_rates_dataset = dataset_pool.get_dataset("target_vacancy")
        # get target vacancy rates for this simulation_year
        this_year_index = where(target_vacancy_rates_dataset.get_attribute("year") == simulation_year)[0]
        target_vacancies_for_this_year = DatasetSubset(target_vacancy_rates_dataset, this_year_index)
        # get some columns
        bldg_types = target_vacancies_for_this_year.get_attribute("building_type_id")
        tgt_vacancies = target_vacancies_for_this_year.get_attribute("target_vacancy")
        # get unique building types
        unique_building_types = unique1d(bldg_types)
        # build a dictionary containing building_type_id:{'target_vacancy_rate':<float>}
        developing_building_types_info = {}
        for unique_building_type in unique_building_types:
            unique_building_type_index = where(bldg_types == unique_building_type)[0]
            developing_building_types_info[unique_building_type] = {
                "target_vacancy_rate": tgt_vacancies[unique_building_type_index].mean()
            }
        # debug help
        if self.debuglevel > 0:
            self.debug_printer("developing_building_types_info", developing_building_types_info)

        # get the building_types dataset
        building_types_dataset = dataset_pool.get_dataset("building_type")
        # get the attribute names
        # I don't think this next line is used at all:
        # building_types_dataset_attribute_names = building_types_dataset.get_attribute_names()

        # get only the developing building types
        developing_types_index = where(building_types_dataset.get_attribute("is_developing") == 1)[0]
        developing_building_types_dataset = DatasetSubset(building_types_dataset, developing_types_index)
        # calculate active development capacity this simulation_year
        developing_building_type_ids = developing_building_types_dataset.get_attribute("building_type_id")
        building_type_names = developing_building_types_dataset.get_attribute("building_type_name")

        # add building_type_name to the dictionary
        # now the dictionary takes the form of:
        #    building_type_id:{'target_vacancy_rate':<float>,'building_type_name':<string>}
        counter = 0
        for developing_building_type_id in developing_building_type_ids:
            try:
                developing_building_types_info[developing_building_type_id]["building_type_name"] = building_type_names[
                    counter
                ]
                counter += 1
            except:
                logger.log_warning(
                    "You may have a mismatch in the building_type_ids between those in the target_vacancies dataset and the developing types in the building_types dataset."
                )
        # debug help
        if self.debuglevel > 0:
            self.debug_printer("developing_building_types_info", developing_building_types_info)

        # add 'is_residential' to the developing_building_types_info dictionary
        # now the dictionary takes the form of:
        #    building_type_id:{'target_vacancy_rate':<float>,'building_type_name':<string>,'is_residential':<integer>}
        for developing_building_type in developing_building_types_info:
            indx = where(building_types_dataset.get_attribute("building_type_id") == developing_building_type)[0]
            developing_building_types_info[developing_building_type][
                "is_residential"
            ] = building_types_dataset.get_attribute("is_residential")[indx][0]
        # debug help
        if self.debuglevel > 0:
            self.debug_printer("developing_building_types_info", developing_building_types_info)

        # add 'adm_minimum_annual_build_units' and 'adm_minimum_annual_build_max_year' to the developing_building_types_info dictionary
        # now the dictionary takes the form of:
        #    building_type_id:{'':<float>,'building_type_name':<string>,'is_residential':<integer>,'adm_minimum_annual_build_units':<integer>, 'adm_minimum_annual_build_max_units':<integer>}
        if build_minimum_units:
            try:
                for developing_building_type in developing_building_types_info:
                    indx = where(building_types_dataset.get_attribute("building_type_id") == developing_building_type)[
                        0
                    ]
                    developing_building_types_info[developing_building_type][
                        "adm_minimum_annual_build_units"
                    ] = building_types_dataset.get_attribute("adm_minimum_annual_build_units")[indx][0]
                for developing_building_type in developing_building_types_info:
                    indx = where(building_types_dataset.get_attribute("building_type_id") == developing_building_type)[
                        0
                    ]
                    developing_building_types_info[developing_building_type][
                        "adm_minimum_annual_build_max_year"
                    ] = building_types_dataset.get_attribute("adm_minimum_annual_build_max_year")[indx][0]
            except:
                logger.log_error(
                    '\n\nYou have the option "build_minimum_units" set to "True" but appear to be missing the "adm_minimum_annual_build_units" and "adm_minimum_annual_build_max_year" units in your "building_types" dataset.\n'
                )
                return

        # build a list of total and occupied units variables to compute of the form
        #     ['occupied_rsf_units_col','total_rsf_units_col', ...]
        # The variables that this section creates and computes need to be defined in the buildings
        #     dataset aliases.py file
        building_variables = []
        for building_type_id, dict_of_info in developing_building_types_info.iteritems():
            try:
                total, occupied = (
                    "total_%s_units_col" % dict_of_info["building_type_name"],
                    "occupied_%s_units_col" % dict_of_info["building_type_name"],
                )
                building_variables.append(total)
                building_variables.append(occupied)
            except:
                logger.log_warning(
                    "You may have a mismatch in the building_type_ids between those in the target_vacancies dataset and the developing types in the building_types dataset."
                )
        # debug help
        if self.debuglevel > 0:
            self.debug_printer("building_variables", building_variables)

        # get the buildings dataset
        buildings_dataset = dataset_pool.get_dataset("building")
        # compute total and occupied units variables
        buildings_dataset.compute_variables(building_variables)
        # sum up those variables into a dictionary of the form:
        #    {'occupied_rsf_units':<integer>, 'total_rsf_units':<integer>, ...}
        total_and_occupied_variable_sums = {}
        for building_variable in building_variables:
            summed_attribute = buildings_dataset.get_attribute("%s" % building_variable).sum()
            total_and_occupied_variable_sums[building_variable.replace("_col", "")] = summed_attribute
        # debug help
        if self.debuglevel > 0:
            self.debug_printer("total_and_occupied_variable_sums", total_and_occupied_variable_sums)

        # set up a table to log into
        status_log = PrettyTable()
        status_log.set_field_names(
            [  # "Type",
                "Name",
                "Occ Units",
                "Tot Units",
                "CurrentVR",
                "Target Units",
                "TargetVR",
                "Difference",
                "Max Act Dev Action",
                "Avail Act Dev",
                "Build Action",
            ]
        )

        # compute target units, vacancy rates, etc
        # go over each developing building type and compute target units, differences, total development required,
        #    available capacity in active_developments, and action to take in active_developments
        for developing_building_type in developing_building_types_info:
            # compute target variables
            # compute target variables into developing_building_types_info dict
            developing_building_types_info[developing_building_type][
                "target_%s_units" % developing_building_types_info[developing_building_type]["building_type_name"]
            ] = int(
                round(
                    total_and_occupied_variable_sums[
                        "occupied_%s_units"
                        % developing_building_types_info[developing_building_type]["building_type_name"]
                    ]
                    / (1 - developing_building_types_info[developing_building_type]["target_vacancy_rate"])
                )
            )

            # compute difference variables
            # compute difference variables into developing_building_types_info dict
            developing_building_types_info[developing_building_type][
                "%s_diff" % developing_building_types_info[developing_building_type]["building_type_name"]
            ] = (
                developing_building_types_info[developing_building_type][
                    "target_%s_units" % developing_building_types_info[developing_building_type]["building_type_name"]
                ]
                - total_and_occupied_variable_sums[
                    "total_%s_units" % developing_building_types_info[developing_building_type]["building_type_name"]
                ]
            )

            # compute action variables
            # if the computed difference is  0 or negative (no demand for units of this type):
            if (
                developing_building_types_info[developing_building_type][
                    "%s_diff" % developing_building_types_info[developing_building_type]["building_type_name"]
                ]
                < 1
            ):
                # consider whether to build the minimum units
                # check simulation year against maximum annual build year
                if (
                    build_minimum_units
                    and developing_building_types_info[developing_building_type]["adm_minimum_annual_build_max_year"]
                    >= simulation_year
                ):
                    # build minimum
                    developing_building_types_info[developing_building_type][
                        "%s_action" % developing_building_types_info[developing_building_type]["building_type_name"]
                    ] = developing_building_types_info[developing_building_type]["adm_minimum_annual_build_units"]
                else:
                    # build nothing
                    developing_building_types_info[developing_building_type][
                        "%s_action" % developing_building_types_info[developing_building_type]["building_type_name"]
                    ] = 0
            # the computed difference is positive (demand for units of this type)
            # decide how much to build, the actual number demanded, or the minimum
            else:
                # compute the difference * the percent_active_development
                diff_with_pct_active = int(
                    developing_building_types_info[developing_building_type][
                        "%s_diff" % developing_building_types_info[developing_building_type]["building_type_name"]
                    ]
                    * percent_active_development
                )
                # if the diff_with_pct_active is greater than the minimum development:
                if (
                    build_minimum_units
                    and diff_with_pct_active
                    > developing_building_types_info[developing_building_type]["adm_minimum_annual_build_units"]
                ):
                    # just build the diff_with_pct_active
                    developing_building_types_info[developing_building_type][
                        "%s_action" % developing_building_types_info[developing_building_type]["building_type_name"]
                    ] = diff_with_pct_active
                # the pct_diff_with_pct_active < minimum build and the max year for annual build is appropriate:
                elif (
                    build_minimum_units
                    and developing_building_types_info[developing_building_type]["adm_minimum_annual_build_max_year"]
                    >= simulation_year
                ):
                    # build the minimum
                    developing_building_types_info[developing_building_type][
                        "%s_action" % developing_building_types_info[developing_building_type]["building_type_name"]
                    ] = developing_building_types_info[developing_building_type]["adm_minimum_annual_build_units"]
                # last case is the demand < minimum, but the simulation year > max year:
                else:
                    # build the pct_diff_with_pct_active
                    developing_building_types_info[developing_building_type][
                        "%s_action" % developing_building_types_info[developing_building_type]["building_type_name"]
                    ] = diff_with_pct_active

            # compute how much development is available in active developments
            # add this information to the developing_building_types_info dictionary:
            #     building_type_id:{'target_vacancy_rate':<float>,'building_type_name':<string>,'available_active_capacity_this_year':<integer>}
            indx = where(
                developments_dataset.get_attribute("building_type_id")[active_developments_index]
                == developing_building_type
            )
            developing_building_types_info[developing_building_type][
                "active_developments_capacity_this_year_index"
            ] = indx
            developing_building_types_info[developing_building_type][
                "available_active_capacity_this_year"
            ] = active_developments_capacity_this_year[indx].sum()

            # compute actual action to take
            action = developing_building_types_info[developing_building_type][
                "%s_action" % developing_building_types_info[developing_building_type]["building_type_name"]
            ]
            available = developing_building_types_info[developing_building_type]["available_active_capacity_this_year"]
            actual_action = self.lesser(action, available)
            # revise actual action if minimum build units is in effect:
            if (
                build_minimum_units
                and developing_building_types_info[developing_building_type]["adm_minimum_annual_build_max_year"]
                >= simulation_year
            ):
                actual_action = self.greater(
                    actual_action,
                    developing_building_types_info[developing_building_type]["adm_minimum_annual_build_units"],
                )
            developing_building_types_info[developing_building_type]["action_to_take_this_year"] = actual_action

            # create status line for logging
            status_line = [  # developing_building_type,
                developing_building_types_info[developing_building_type]["building_type_name"],
                total_and_occupied_variable_sums[
                    "occupied_%s_units" % developing_building_types_info[developing_building_type]["building_type_name"]
                ],
                total_and_occupied_variable_sums[
                    "total_%s_units" % developing_building_types_info[developing_building_type]["building_type_name"]
                ],
                round(
                    1
                    - (
                        total_and_occupied_variable_sums[
                            "occupied_%s_units"
                            % developing_building_types_info[developing_building_type]["building_type_name"]
                        ]
                        / total_and_occupied_variable_sums[
                            "total_%s_units"
                            % developing_building_types_info[developing_building_type]["building_type_name"]
                        ]
                    ),
                    4,
                ),
                developing_building_types_info[developing_building_type][
                    "target_%s_units" % developing_building_types_info[developing_building_type]["building_type_name"]
                ],
                developing_building_types_info[developing_building_type]["target_vacancy_rate"],
                developing_building_types_info[developing_building_type][
                    "%s_diff" % developing_building_types_info[developing_building_type]["building_type_name"]
                ],
                developing_building_types_info[developing_building_type][
                    "%s_action" % developing_building_types_info[developing_building_type]["building_type_name"]
                ],
                developing_building_types_info[developing_building_type]["available_active_capacity_this_year"],
                actual_action,
            ]
            status_log.add_row(status_line)

        # print the status table to the log
        logger.log_status(status_log)

        # debug help
        if self.debuglevel > 0:
            self.debug_printer("developing_building_types_info", developing_building_types_info)

        # update the active_developments and buildings datasets with new units
        for developing_building_type in developing_building_types_info:
            if developing_building_types_info[developing_building_type]["action_to_take_this_year"] > 0:
                # update 'current_built_units' column in active_developments dataset

                # get the index of the records of the current developing_building_type
                indx = developing_building_types_info[developing_building_type][
                    "active_developments_capacity_this_year_index"
                ]
                # get the total number of units to build this year
                total_action = developing_building_types_info[developing_building_type]["action_to_take_this_year"]
                # compute the weight as build_out capacity - current_built_units
                buildout_capacity = developments_dataset.get_attribute("build_out_capacity")[active_developments_index][
                    indx
                ]
                current_built_units = developments_dataset.get_attribute("current_built_units")[
                    active_developments_index
                ][indx]
                weights = buildout_capacity - current_built_units
                weights_sum = float(weights.sum())
                weight_array = weights / weights_sum
                # distribute the total to build against the weight
                action_array = (total_action * weight_array).astype("int32")
                new_built_units = current_built_units + action_array
                # make sure we are not going to build more than the buildout_capacity
                check = buildout_capacity - new_built_units
                check_lt_zero = where(check < 0)
                if check_lt_zero[0].size > 0:
                    # We have a problem, set the new_built_units = the buildout_capacity
                    #  for those records where we are blowing the buildout of the development
                    new_built_units[check_lt_zero] = buildout_capacity[check_lt_zero]
                # update the current_built_units column with new values
                developments_building_ids = developments_dataset.get_attribute("building_id")
                building_ids_to_be_updated = developments_building_ids[active_developments_index][indx]
                if self.debuglevel > 0:
                    self.debug_printer("building_ids_to_be_updated", building_ids_to_be_updated)
                building_ids_to_be_updated_index_on_developments = in1d(
                    developments_building_ids, building_ids_to_be_updated
                )
                developments_dataset.set_values_of_one_attribute(
                    "current_built_units", new_built_units, building_ids_to_be_updated_index_on_developments
                )
                # debug help
                if self.debuglevel > 0:
                    self.debug_printer("new_built_units", new_built_units)

                # update the relevant units column on the buildings dataset with new units
                # debug help
                if self.debuglevel > 0:
                    self.debug_printer("building_ids_to_be_updated", building_ids_to_be_updated)
                building_ids_to_be_updated_index_on_buildings = buildings_dataset.get_id_index(
                    building_ids_to_be_updated
                )
                # debug help
                if self.debuglevel > 0:
                    self.debug_printer(
                        "building_ids_to_be_updated_index_on_buildings", building_ids_to_be_updated_index_on_buildings
                    )
                if developing_building_types_info[developing_building_type]["is_residential"]:
                    buildings_dataset.set_values_of_one_attribute(
                        "residential_units", new_built_units, building_ids_to_be_updated_index_on_buildings
                    )
                else:
                    buildings_dataset.set_values_of_one_attribute(
                        "non_residential_sqft", new_built_units, building_ids_to_be_updated_index_on_buildings
                    )
Exemple #11
0
class DevelopmentProjectTransitionModel( Model ):
    """
    Creates development projects. Each development project is for a single type
    of development, e.g. 'industrial' or 'commercial'.  This model creates
    enough development projects to match the desired vacancy rates, as defined in the target_vacancies
    table.  It does not place any projects in locations; that is the job of the development project
    location choice models.  The distribution of project sizes (amount of space, value of space) is
    determined by sampling from the projects in the development_event_history table.
    """
    model_name = "Development Project Transition Model"
    
    def __init__( self, debuglevel=0 ):
        self.debug = DebugPrinter( debuglevel )

    def pre_check( self, location_set, vacancy_table, types ):
        for ptype in types:
            self.check_for_space( location_set.get_attribute(self.variable_for_total_units[ptype]))
        self.check_target_vacancy_is_not_100_percent( vacancy_table.get_attribute( "target_total_vacancy"))

    def check_for_space( self, values ):
        """Check that this array of values sums to something > 0."""
        self.do_check( "x > 0", array( [values.sum()] ) )

    def check_target_vacancy_is_not_100_percent( self, value ):
        """Check that the target vacancy rate is not 100% (ratio == 1), because it doesn't make sense,
        and it also causes a divide by 0 error."""
        self.do_check( "x < 1", value )

    def run( self, vacancy_table, history_table, year, location_set, dataset_pool=None, resources=None ):
        self.dataset_pool=dataset_pool
        building_types = self.dataset_pool.get_dataset('building_type')
        target_vacancy_this_year = DatasetSubset(vacancy_table, index=where(vacancy_table.get_attribute("year")==year)[0])
        building_type_ids = target_vacancy_this_year.get_attribute('building_type_id')
        building_type_idx = building_types.get_id_index(building_type_ids)
        self.used_building_types = DatasetSubset(building_types, index=building_type_idx)
        project_types =  self.used_building_types.get_attribute('building_type_name')
        is_residential = self.used_building_types.get_attribute('is_residential')
        unit_names =  where(is_residential, 'residential_units', 'non_residential_sqft')
        specific_unit_names =  where(is_residential, 'residential_units', '_sqft')
        rates =  target_vacancy_this_year.get_attribute('target_total_vacancy')
        self.project_units = {}
        self.project_specific_units = {}
        target_rates = {}
        for i in range(self.used_building_types.size()):
            self.project_units[project_types[i]] = unit_names[i]
            if is_residential[i]:
                self.project_specific_units[project_types[i]] = specific_unit_names[i]
            else:
                self.project_specific_units[project_types[i]] = "%s%s" % (project_types[i], specific_unit_names[i])
            target_rates[building_type_ids[i]] = rates[i]
            
        self._compute_vacancy_and_total_units_variables(location_set, project_types, resources)
        self.pre_check( location_set, target_vacancy_this_year, project_types)
    
        projects = None
        for project_type_id, target_vacancy_rate in target_rates.iteritems():
            # determine current-year vacancy rates
            project_type = building_types.get_attribute_by_id('building_type_name', project_type_id)
            vacant_units_sum = location_set.get_attribute(self.variable_for_vacancy[project_type]).sum()
            units_sum = float( location_set.get_attribute(self.variable_for_total_units[project_type]).sum() )
            should_develop_units = int(round(max( 0, ( target_vacancy_rate * units_sum - vacant_units_sum ) /
                                         ( 1 - target_vacancy_rate ) )))
            logger.log_status(project_type + ": vacant units: %d, should be vacant: %f, sum units: %d"
                          % (vacant_units_sum, target_vacancy_rate * units_sum, units_sum))

            if not should_develop_units:
                logger.log_note(("Will not build any " + project_type + " units, because the current vacancy of %d units\n"
                             + "is more than the %d units desired for the vacancy rate of %f.")
                            % (vacant_units_sum,
                               target_vacancy_rate * units_sum,
                               target_vacancy_rate))
            #create projects
            if should_develop_units > 0:
                this_project = self._create_projects(should_develop_units, project_type, project_type_id, history_table,
                                                               location_set, units_sum, resources)
                if projects is None:
                    projects = this_project
                else:
                    projects.join_by_rows(this_project, change_ids_if_not_unique=True)
        return projects

    
    def _compute_vacancy_and_total_units_variables(self, location_set, project_types, resources=None):
        compute_resources = Resources(resources)
        compute_resources.merge({"debug":self.debug})
        self.variable_for_vacancy = {}
        self.variable_for_total_units = {}
        for ptype in project_types:
            self.variable_for_vacancy[ptype] = compute_resources.get(
                                    "%s_vacant_variable" % ptype,
                                    "urbansim_zone.%s.vacant_%s" % (location_set.get_dataset_name(),
                                                                     self.project_specific_units[ptype]))
            self.variable_for_total_units[ptype] = compute_resources.get(
                                    "%s_total_units_variable" % ptype,
                                    "%s.aggregate(urbansim_zone.building.total_%s)" % (location_set.get_dataset_name(), 
                                                             self.project_specific_units[ptype]))
            location_set.compute_variables([self.variable_for_vacancy[ptype], self.variable_for_total_units[ptype]], 
                                           dataset_pool=self.dataset_pool, resources = compute_resources)
            
    def _create_projects(self, should_develop_units, project_type, project_type_id, history_table, location_set, units_sum, resources=None):
        history_values = history_table.get_attribute(self.project_units[project_type])
        type_code_values = history_table.get_change_type_code_attribute(self.project_units[project_type])
        # take only non-zero history values and those that don't represent demolished buildings 
        history_values_without_zeros = history_values[logical_and( history_values > 0, 
                                                                  type_code_values !=  DevelopmentEventTypeOfChange.DELETE)]
        mean_size = history_values_without_zeros.mean()
        idx = array( [], dtype="int32" )
        # Ensure that there are some development projects to choose from.
        num_of_projects_to_select = max( 10, round_( should_develop_units / mean_size ) )
        while True:
            idx = concatenate( ( idx, randint( 0, history_values_without_zeros.size,
                                                num_of_projects_to_select ) ) )
            csum = history_values_without_zeros[idx].cumsum()
            idx1 = idx[csum <= should_develop_units]
            if idx1.size == 0: # at least one project should be selected
                idx = array([idx[0]], dtype="int32")
            else:
                idx = idx1
            if csum[-1] >= should_develop_units:
                break
        data = {"residential_units": zeros( ( idx.size, ), dtype=int32),
                "non_residential_sqft": zeros( ( idx.size, ), dtype=int32),
                'building_type_id': array(idx.size* [project_type_id]),
                "project_id": arange( idx.size ) + 1,
                "building_id": zeros( ( idx.size, ), dtype=int32)}
        data[self.project_units[project_type]]= history_values_without_zeros[idx]
        storage = StorageFactory().get_storage('dict_storage')

        development_projects_table_name = 'development_projects'
        storage.write_table(table_name=development_projects_table_name, table_data=data)

        return Dataset(
            in_storage = storage,
            in_table_name = development_projects_table_name,
            id_name='project_id'
            )
Exemple #12
0
    def run(self, realestate_dataset,
            year=None, 
            occupied_spaces_variable="occupied_units",
            total_spaces_variable="total_units",
            target_attribute_name='target_vacancy_rate',
            sample_from_dataset = None,
            sample_filter="",
            reset_attribute_value={}, 
            year_built = 'year_built',
            dataset_pool=None,
            append_to_realestate_dataset = False,
            table_name = "development_projects",
            dataset_name = "development_project",
            id_name = 'development_project_id',
            **kwargs):
        """         
        sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning
        append_to_realestate_dataset - whether to append the new dataset to realestate_dataset
        """
        
        if self.target_vancy_dataset is None:
            raise RuntimeError, "target_vacancy_rate dataset is unspecified."
        
        if not sample_from_dataset:
            sample_from_dataset = realestate_dataset
            
        #if dataset_pool is None:
        #    dataset_pool = SessionConfiguration().get_dataset_pool()
        alldata = dataset_pool.get_dataset('alldata')
        unit_names = dataset_pool.get_dataset('building_type').get_attribute('unit_name')
        sqft_per_job = dataset_pool.get_dataset('building_sqft_per_job')
        zones = realestate_dataset.compute_variables("building.disaggregate(parcel.zone_id)")
        type_ids = realestate_dataset.get_attribute("building_type_id")
        building_sqft_per_job_table = sqft_per_job.get_building_sqft_as_table(zones.max(), type_ids.max())
        if year is None:
            year = SimulationState().get_current_time()
        this_year_index = where(self.target_vancy_dataset.get_attribute('year')==year)[0]
        target_vacancy_for_this_year = DatasetSubset(self.target_vancy_dataset, this_year_index)
        
        column_names = list(set( self.target_vancy_dataset.get_known_attribute_names() ) - set( [ target_attribute_name, occupied_spaces_variable, total_spaces_variable, 'year', '_hidden_id_'] ))
        column_names.sort(reverse=True)
        column_values = dict([ (name, target_vacancy_for_this_year.get_attribute(name)) for name in column_names + [target_attribute_name]])
        
        independent_variables = list(set([re.sub('_max$', '', re.sub('_min$', '', col)) for col in column_names]))
        dataset_known_attributes = realestate_dataset.get_known_attribute_names()
        sample_dataset_known_attributes = sample_from_dataset.get_known_attribute_names()
        for variable in independent_variables:
            if variable not in dataset_known_attributes:
                realestate_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)
            if variable not in sample_dataset_known_attributes:
                sample_from_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)
                
        dataset_known_attributes = realestate_dataset.get_known_attribute_names() #update after compute
        if sample_filter:
            short_name = VariableName(sample_filter).get_alias()
            if short_name not in dataset_known_attributes:
                filter_indicator = sample_from_dataset.compute_variables(sample_filter, dataset_pool=dataset_pool)
            else:
                filter_indicator = sample_from_dataset.get_attribute(short_name)
        else:
            filter_indicator = 1
                
        sampled_index = array([], dtype=int32)

        #log header
        if PrettyTable is not None:
            status_log = PrettyTable()
            status_log.set_field_names(column_names + ["actual", "target", "difference", "action"])
        else:
            logger.log_status("\t".join(column_names + ["actual", "target", "difference", "action"]))
        error_log = ''
        for index in range(target_vacancy_for_this_year.size()):
            this_sampled_index = array([], dtype=int32)
            indicator = ones( realestate_dataset.size(), dtype='bool' )
            sample_indicator = ones( sample_from_dataset.size(), dtype='bool' )
            criterion = {}   # for logging
            for attribute in independent_variables:
                if attribute in dataset_known_attributes:
                    dataset_attribute = realestate_dataset.get_attribute(attribute)
                    sample_attribute = sample_from_dataset.get_attribute(attribute)
                else:
                    raise ValueError, "attribute %s used in target vacancy dataset can not be found in dataset %s" % (attribute, realestate_dataset.get_dataset_name())
                
                if attribute + '_min' in column_names:
                    amin = target_vacancy_for_this_year.get_attribute(attribute+'_min')[index] 
                    criterion.update({attribute + '_min':amin})
                    if amin != -1:
                        indicator *= dataset_attribute >= amin
                        sample_indicator *= sample_attribute >= amin
                if attribute + '_max' in column_names: 
                    amax = target_vacancy_for_this_year.get_attribute(attribute+'_max')[index]
                    criterion.update({attribute + '_max':amax}) 
                    if amax != -1:
                        indicator *= dataset_attribute <= amax
                        sample_indicator *= sample_attribute <= amax
                if attribute in column_names: 
                    aval = column_values[attribute][index] 
                    criterion.update({attribute:aval}) 
                    if aval == -1:
                        continue
                    elif aval == -2:  ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column
                        indicator *= logical_not(ismember(dataset_attribute, column_values[attribute]))
                        sample_indicator *= logical_not(ismember(sample_attribute, column_values[attribute]))
                    else:
                        indicator *= dataset_attribute == aval
                        sample_indicator *= sample_attribute == aval
                        
            this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable
            ## total/occupied_spaces_variable can be specified either as a universal name for all realestate 
            ## or in targe_vacancy_rate dataset for each vacancy category
            if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_occupied_spaces_variable = target_vacancy_for_this_year.get_attribute(occupied_spaces_variable)[index]

            if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_total_spaces_variable = target_vacancy_for_this_year.get_attribute(total_spaces_variable)[index]
            
            logger.be_quiet() #temporarily disable logging
            realestate_dataset.compute_one_variable_with_unknown_package(this_occupied_spaces_variable, dataset_pool=dataset_pool)
            realestate_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool)
            sample_from_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool)
            if unit_names[index]=="residential_units":
                num_units = alldata.compute_variables("alldata.aggregate_all(household.building_type_id==%s)" % (index+1))
                #persons = household_set.compute_variables("%s.number_of_agents(%s)" % (hh_ds_name, person_ds_name), resources=resources)
                num_units = num_units[0]
            else:
                num_units = alldata.compute_variables("alldata.aggregate_all(job.disaggregate(employment_submarket.building_type_id)==%s)" % (index+1))
                num_units = num_units * building_sqft_per_job_table[1, (index+1)]
                num_units = num_units[0]
            #need to make sure that job empsubmarket doesn't rely on building...
            #Must do non-home-based jobs only and then multiply by building_sqft
            logger.talk()
            
            actual_num = (indicator * realestate_dataset.get_attribute(this_total_spaces_variable)).sum()
            #target_num = int(round( (indicator * realestate_dataset.get_attribute(this_occupied_spaces_variable)).sum() /\
            target_num = int(round( num_units /\
                                    (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]) 
                            ))
            diff = target_num - actual_num
            if diff > 0:
                total_spaces_in_sample_dataset = sample_from_dataset.get_attribute(this_total_spaces_variable)
                legit_index = where(logical_and(sample_indicator, filter_indicator) * total_spaces_in_sample_dataset > 0)[0]
                if legit_index.size > 0:
                    mean_size = total_spaces_in_sample_dataset[legit_index].mean()
                    num_of_projects_to_sample = int( diff / mean_size )
                    ##sampled at least 1 project when diff > 0, otherwise it is a endless loop when num_of_projects_to_sample = 0
                    num_of_projects_to_sample = num_of_projects_to_sample if num_of_projects_to_sample > 0 else 1
                    while total_spaces_in_sample_dataset[this_sampled_index].sum() < diff:
                        lucky_index = sample_replace(legit_index, num_of_projects_to_sample)
                        this_sampled_index = concatenate((this_sampled_index, lucky_index))
                    this_sampled_index = this_sampled_index[0:(1+searchsorted(cumsum(total_spaces_in_sample_dataset[this_sampled_index]), diff))]
                    sampled_index = concatenate((sampled_index, this_sampled_index))
                else:
                    error_log += "There is nothing to sample from %s and no new development will happen for " % sample_from_dataset.get_dataset_name() + \
                              ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n'
            #if diff < 0: #TODO demolition; not yet supported
            
            ##log status
            action = "0"
            if this_sampled_index.size > 0:
                action_num = total_spaces_in_sample_dataset[this_sampled_index].sum()
                if diff > 0: action = "+" + str(action_num)
                if diff < 0: action = "-" + str(action_num)
            cat = [ str(criterion[col]) for col in column_names]
            cat += [str(actual_num), str(target_num), str(diff), action]
            
            if PrettyTable is not None:
                status_log.add_row(cat)
            else:                
                logger.log_status("\t".join(cat))
            
        if PrettyTable is not None:
            logger.log_status("\n" + status_log.get_string())
        if error_log:
            logger.log_error(error_log)
            
        result_data = {}
        result_dataset = None
        index = array([], dtype='int32')
        if sampled_index.size > 0:
            ### ideally duplicate_rows() is all needed to add newly cloned rows
            ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data
            ##realestate_dataset.duplicate_rows(sampled_index)
            result_data.setdefault(year_built, resize(year, sampled_index.size).astype('int32'))
            ## also add 'independent_variables' to the new dataset
            for attribute in set(sample_from_dataset.get_primary_attribute_names() + independent_variables):
                if reset_attribute_value.has_key(attribute):
                    result_data[attribute] = resize(array(reset_attribute_value[attribute]), sampled_index.size)
                else:
                    result_data[attribute] = sample_from_dataset.get_attribute_by_index(attribute, sampled_index)
        
            if id_name and result_data and id_name not in result_data:
                result_data[id_name] = arange(sampled_index.size, dtype='int32') + 1
        
            storage = StorageFactory().get_storage('dict_storage')
            storage.write_table(table_name=table_name, table_data=result_data)
            
            result_dataset = Dataset(id_name = id_name,
                                      in_storage = storage,
                                      in_table_name = table_name,
                                      dataset_name = dataset_name
                                      )
            index = arange(result_dataset.size())
        
            
        if append_to_realestate_dataset:
            if len(result_data) > 0:
                index = realestate_dataset.add_elements(result_data, require_all_attributes=False,
                                                        change_ids_if_not_unique=True)                
            result_dataset = realestate_dataset
        
        return (result_dataset, index)
    def run(self,
            vacancy_table,
            history_table,
            year,
            location_set,
            dataset_pool=None,
            resources=None):
        self.dataset_pool = dataset_pool
        building_types = self.dataset_pool.get_dataset('building_type')
        target_vacancy_this_year = DatasetSubset(
            vacancy_table,
            index=where(vacancy_table.get_attribute("year") == year)[0])
        building_type_ids = target_vacancy_this_year.get_attribute(
            'building_type_id')
        building_type_idx = building_types.get_id_index(building_type_ids)
        self.used_building_types = DatasetSubset(building_types,
                                                 index=building_type_idx)
        project_types = self.used_building_types.get_attribute(
            'building_type_name')
        is_residential = self.used_building_types.get_attribute(
            'is_residential')
        unit_names = where(is_residential, 'residential_units',
                           'non_residential_sqft')
        specific_unit_names = where(is_residential, 'residential_units',
                                    '_sqft')
        rates = target_vacancy_this_year.get_attribute('target_total_vacancy')
        self.project_units = {}
        self.project_specific_units = {}
        target_rates = {}
        for i in range(self.used_building_types.size()):
            self.project_units[project_types[i]] = unit_names[i]
            if is_residential[i]:
                self.project_specific_units[
                    project_types[i]] = specific_unit_names[i]
            else:
                self.project_specific_units[project_types[i]] = "%s%s" % (
                    project_types[i], specific_unit_names[i])
            target_rates[building_type_ids[i]] = rates[i]

        self._compute_vacancy_and_total_units_variables(
            location_set, project_types, resources)
        self.pre_check(location_set, target_vacancy_this_year, project_types)

        projects = None
        for project_type_id, target_vacancy_rate in target_rates.iteritems():
            # determine current-year vacancy rates
            project_type = building_types.get_attribute_by_id(
                'building_type_name', project_type_id)
            vacant_units_sum = location_set.get_attribute(
                self.variable_for_vacancy[project_type]).sum()
            units_sum = float(
                location_set.get_attribute(
                    self.variable_for_total_units[project_type]).sum())
            should_develop_units = int(
                round(
                    max(0,
                        (target_vacancy_rate * units_sum - vacant_units_sum) /
                        (1 - target_vacancy_rate))))
            logger.log_status(
                project_type +
                ": vacant units: %d, should be vacant: %f, sum units: %d" %
                (vacant_units_sum, target_vacancy_rate * units_sum, units_sum))

            if not should_develop_units:
                logger.log_note((
                    "Will not build any " + project_type +
                    " units, because the current vacancy of %d units\n" +
                    "is more than the %d units desired for the vacancy rate of %f."
                ) % (vacant_units_sum, target_vacancy_rate * units_sum,
                     target_vacancy_rate))
            #create projects
            if should_develop_units > 0:
                this_project = self._create_projects(
                    should_develop_units, project_type, project_type_id,
                    history_table, location_set, units_sum, resources)
                if projects is None:
                    projects = this_project
                else:
                    projects.join_by_rows(this_project,
                                          change_ids_if_not_unique=True)
        return projects
    def run(self, year=None, dataset_pool=None, **kwargs):
        """
        """
        if dataset_pool is None:
            dataset_pool = SessionConfiguration().get_dataset_pool()

        if year is None:
            year = SimulationState().get_current_time()

        this_year_index = where(
            self.scheduled_events.get_attribute('year') == year)[0]
        scheduled_events_for_this_year = DatasetSubset(self.scheduled_events,
                                                       this_year_index)
        scheduled_events_for_this_year.load_dataset_if_not_loaded()
        column_names = list(
            set(self.scheduled_events.get_known_attribute_names()) - set([
                'year', 'action', 'attribute', 'amount', 'event_id',
                '_hidden_id_'
            ]))
        column_names.sort()
        #        column_values = dict([ (name, scheduled_events_for_this_year.get_attribute(name)) for name in column_names])

        for index in range(scheduled_events_for_this_year.size()):
            indicator = ones(self.dataset.size(), dtype='bool')
            event_attr = {}
            for attribute in column_names:
                if attribute in self.dataset.get_known_attribute_names():
                    dataset_attribute = self.dataset.get_attribute(attribute)
                else:
                    ## this is done inside the loop because some action may delete computed attributes, such as dataset.add_elements()
                    try:
                        dataset_attribute = self.dataset.compute_one_variable_with_unknown_package(
                            attribute, dataset_pool=dataset_pool)
                    except:
                        raise ValueError, "attribute %s used in scheduled events dataset can not be found in dataset %s" % (
                            attribute, self.dataset.get_dataset_name())


#                if attribute in column_names:
                aval = scheduled_events_for_this_year.get_attribute(
                    attribute)[index]
                if aval == -1:
                    continue  # ignore if column value is -1
                else:
                    indicator *= dataset_attribute == aval
                    event_attr.update({attribute: aval})

            #agents in dataset satisfying all conditions are identified by indicator
            legit_index = where(indicator)[0]

            this_event = scheduled_events_for_this_year.get_data_element(index)
            if not hasattr(this_event, 'attribute'):
                action_attr_name = ''
            else:
                action_attr_name = this_event.attribute
            action_function = getattr(self,
                                      '_' + this_event.action.strip().lower())
            action_function(amount=this_event.amount,
                            attribute=action_attr_name,
                            dataset=self.dataset,
                            index=legit_index,
                            data_dict=event_attr)

            self.post_run(self.dataset, legit_index, **kwargs)

        return self.dataset
    def test_agents_placed_in_appropriate_types(self):
        """Create 1000 unplaced industrial jobs and 1 commercial job. Allocate 50 commercial
        gridcells with enough space for 10 commercial jobs per gridcell. After running the
        EmploymentLocationChoiceModel, the 1 commercial job should be placed,
        but the 100 industrial jobs should remain unplaced
        """
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='job_building_types',
            table_data = {
                'id':array([2,1]),
                'name': array(['commercial', 'industrial'])
                }
            )
        job_building_types = JobBuildingTypeDataset(in_storage=storage, in_table_name='job_building_types')

        storage.write_table(table_name='jobs',
            table_data = {
                'job_id': arange(1001)+1,
                'grid_id': array([0]*1001),
                'building_type': array([1]*1000 + [2])
                }
            )
        jobs = JobDataset(in_storage=storage, in_table_name='jobs')

        storage.write_table(table_name='gridcells',
            table_data = {
                'grid_id': arange(50)+1,
                'commercial_sqft': array([1000]*50),
                'commercial_sqft_per_job': array([100]*50)
                }
            )
        gridcells = GridcellDataset(in_storage=storage, in_table_name='gridcells')

        coefficients = Coefficients(names=("dummy",), values=(0.1,))
        specification = EquationSpecification(variables=("gridcell.commercial_sqft",), coefficients=("dummy",))

        compute_resources = Resources({"job":jobs, "job_building_type": job_building_types})
        agents_index = where(jobs.get_attribute("grid_id") == 0)
        unplace_jobs = DatasetSubset(jobs, agents_index)
        agents_index = where(unplace_jobs.get_attribute("building_type") == 2)[0]
        gridcells.compute_variables(["urbansim.gridcell.number_of_commercial_jobs"],
                                    resources=compute_resources)
        commercial_jobs = gridcells.get_attribute("number_of_commercial_jobs")

        gridcells.compute_variables(["urbansim.gridcell.number_of_industrial_jobs"],
                                    resources=compute_resources)
        industrial_jobs = gridcells.get_attribute("number_of_industrial_jobs")
        model_group = ModelGroup(job_building_types, "name")
        elcm = EmploymentLocationChoiceModel(ModelGroupMember(model_group,"commercial"), location_set=gridcells,
               agents_grouping_attribute = "job.building_type",
               choices = "opus_core.random_choices_from_index", sample_size_locations = 30)
        elcm.run(specification, coefficients, agent_set = jobs, agents_index=agents_index, debuglevel=1)

        gridcells.compute_variables(["urbansim.gridcell.number_of_commercial_jobs"],
                                    resources=compute_resources)
        commercial_jobs = gridcells.get_attribute("number_of_commercial_jobs")

        gridcells.compute_variables(["urbansim.gridcell.number_of_industrial_jobs"],
                                    resources=compute_resources)
        industrial_jobs = gridcells.get_attribute("number_of_industrial_jobs")

        self.assertEqual(commercial_jobs.sum() == 1,
                         True, "Error, there should only be a total of 1 commercial job")
        self.assertEqual(industrial_jobs.sum() == 0,
                         True, "Error, there should be no industrial jobs because there's no space for them")
    def run(self, n=500, run_config=None, current_year=None, debuglevel=0):
        """
        n - sample n proposals at a time, evaluate them one by one
        """
        self.demolished_buildings = array([], dtype='int32')  #id of buildings to be demolished
        if current_year is None:
            current_year = SimulationState().get_current_time()
        if not self.positive_proposals:
            logger.log_status("Proposal Set size <= 0, no proposals to consider, skipping DPPSM.")
            return (self.proposal_set, self.demolished_buildings) 
        self.proposal_component_set.compute_variables([
            'urbansim_parcel.development_project_proposal_component.units_proposed',
            'urbansim_parcel.development_project_proposal_component.is_residential'],
                                        dataset_pool=self.dataset_pool)
        self.proposal_set.compute_variables([
            'urbansim_parcel.development_project_proposal.number_of_components',
            'zone_id=development_project_proposal.disaggregate(parcel.zone_id)',
            #'occurence_frequency = development_project_proposal.disaggregate(development_template.sample_size)'
            ],
                                        dataset_pool=self.dataset_pool)
        buildings = self.dataset_pool.get_dataset("building")
        buildings.compute_variables([
                                "occupied_units_for_jobs = urbansim_parcel.building.number_of_non_home_based_jobs",
                                "units_for_jobs = urbansim_parcel.building.total_non_home_based_job_space",
                                "occupied_residential_units = urbansim_parcel.building.number_of_households",
#                                "urbansim_parcel.building.existing_units",
                                "urbansim_parcel.building.is_residential"
                                    ],
                                    dataset_pool=self.dataset_pool)

        ## define unit_name by whether a building is residential or not (with is_residential attribute)
        ## if it is non-residential (0), count units by number of job spaces (units_for_jobs)
        ## if it is residential (1), count units by residenital units
        self.unit_name = array(["units_for_jobs", "residential_units"])
                
        target_vacancy = self.dataset_pool.get_dataset('target_vacancy')
        target_vacancy.compute_variables(['is_residential = target_vacancy.disaggregate(building_type.is_residential)'],
                                         dataset_pool=self.dataset_pool)
        # This try-except block checks to see if the object has a subarea_id_name,
        # if it does, it calculates the vacancy rates by subarea_id_name
        try:
            # Check for subarea_id_name in target_vacancies dataset
            # if it is present, vacancy rates are specified by subarea_id_name
            # if it is not, vacancy rates are specified region wide
            target_vacancy.load_dataset()
            if self.subarea_id_name in target_vacancy.get_attribute_names():
                current_target_vacancy_this_year = DatasetSubset(target_vacancy, index=where(target_vacancy.get_attribute("year")==current_year)[0])
                current_target_vacancy = DatasetSubset(current_target_vacancy_this_year, index=where(current_target_vacancy_this_year.get_attribute(self.subarea_id_name)==self.area_id)[0])
            else:
                current_target_vacancy = DatasetSubset(target_vacancy, index=where(target_vacancy.get_attribute("year")==current_year)[0])
        except AttributeError:
            # vacancy rates are specified region wide:
            current_target_vacancy = DatasetSubset(target_vacancy, index=where(target_vacancy.get_attribute("year")==current_year)[0])

        if current_target_vacancy.size() == 0:
            raise IOError, 'No target vacancy defined for year %s.' % current_year
        
        self.existing_units = {}   #total existing units by land_use type
        self.occupied_units = {}   #total occupied units by land_use type
        self.proposed_units = {}   #total proposed units by land_use type
        self.demolished_units = {} #total (to be) demolished units by land_use type

        components_building_type_ids = self.proposal_component_set.get_attribute("building_type_id").astype("int32")
        proposal_ids = self.proposal_set.get_id_attribute()
        proposal_ids_in_component_set = self.proposal_component_set.get_attribute("proposal_id")
        all_units_proposed = self.proposal_component_set.get_attribute("units_proposed")
        number_of_components_in_proposals = self.proposal_set.get_attribute("number_of_components")
        
        self.accepting_proposals = zeros(current_target_vacancy.get_attribute("building_type_id").max()+1, dtype='bool8')  #whether accepting new proposals, for each building type
        self.accepted_proposals = [] # index of accepted proposals

        self.target_vacancies = {}
        tv_building_types = current_target_vacancy.get_attribute("building_type_id")
        tv_rate = current_target_vacancy.get_attribute("target_vacancy_rate")
        for itype in range(tv_building_types.size):
            self.target_vacancies[tv_building_types[itype]] = tv_rate[itype]
            
        self.check_vacancy_rates(current_target_vacancy)  #initialize self.accepting_proposal based on current vacancy rate

        sqft_per_job = self.dataset_pool.get_dataset("building_sqft_per_job")
        zones_of_proposals = self.proposal_set.get_attribute("zone_id")
        self.building_sqft_per_job_table = sqft_per_job.get_building_sqft_as_table(zones_of_proposals.max(), 
                                                                                   tv_building_types.max())
        # consider only those proposals that have all components of accepted type and sum of proposed units > 0
        is_accepted_type = self.accepting_proposals[components_building_type_ids]
        sum_is_accepted_type_over_proposals = array(ndimage.sum(is_accepted_type, labels = proposal_ids_in_component_set, 
                                                          index = proposal_ids))
        sum_of_units_proposed = array(ndimage.sum(all_units_proposed, labels = proposal_ids_in_component_set, 
                                                          index = proposal_ids))
        is_proposal_eligible = logical_and(sum_is_accepted_type_over_proposals == number_of_components_in_proposals,
                                           sum_of_units_proposed > 0)

        is_proposal_eligible = logical_and(is_proposal_eligible,
                                           self.proposal_set.get_attribute("start_year")==current_year )
        ## handle planned proposals: all proposals with status_id == is_planned 
        ## and start_year == current_year are accepted
        planned_proposal_indexes = where(logical_and(
                                                  self.proposal_set.get_attribute("status_id") == self.proposal_set.id_planned, 
                                                  self.proposal_set.get_attribute("start_year") == current_year ) 
                                        )[0] 
                                   
        self.consider_proposals(planned_proposal_indexes, force_accepting=True)
        # consider proposals (in this order: planned, proposed, tentative)
        for status in [self.proposal_set.id_proposed, self.proposal_set.id_tentative]:
            idx = where(logical_and(self.proposal_set.get_attribute("status_id") == status, is_proposal_eligible))[0]
            if idx.size <= 0:
                continue
            logger.log_status("Sampling from %s eligible proposals with status %s." % (idx.size, status))
            while (True in self.accepting_proposals):
                if self.weight[idx].sum() == 0.0:
                    logger.log_warning("Running out of proposals; there aren't any proposals with non-zero weight")
                    break
                
                idx = idx[self.weight[idx] > 0]
                n = minimum(idx.size, n)
                sampled_proposal_indexes = probsample_noreplace(proposal_ids[idx], n, 
                                                prob_array=(self.weight[idx]/float(self.weight[idx].sum())),                                                                
                                                exclude_index=None, return_index=True)
                self.consider_proposals(arange(self.proposal_set.size())[idx[sampled_proposal_indexes]])
                self.weight[idx[sampled_proposal_indexes]] = 0

        # set status of accepted proposals to 'active'
        self.proposal_set.modify_attribute(name="status_id", data=self.proposal_set.id_active,
                                          index=array(self.accepted_proposals, dtype='int32'))
        building_types = self.dataset_pool.get_dataset("building_type")
        logger.log_status("Status of %s development proposals set to active." % len(self.accepted_proposals))
        logger.log_status("Target/existing vacancy rates (reached using eligible proposals) by building type:")
        for type_id in self.existing_units.keys():
            units_stock = self._get_units_stock(type_id)
            vr = self._get_vacancy_rates(type_id)
            ## units = residential_units if building_type is residential
            ## units = number of job spaces if building_type is non-residential
            logger.log_status(
                              """%(type_id)s[%(type_name)s]: %(vr)s = ((existing_units:%(existing_units)s + 
                              units_proposed:%(units_proposed)s - units_to_be_demolished:%(units_demolished)s) 
                              - units_occupied:%(units_occupied)s) / units_stock:%(units_stock)s""" %  \
                                          { 'type_id': type_id,
                                            'type_name': building_types.get_attribute_by_id("building_type_name", type_id),
                                            'vr':  vr,
                                            'existing_units': int(self.existing_units[type_id]),
                                            'units_occupied': int(self.occupied_units[type_id]),
                                            'units_proposed': int(self.proposed_units[type_id]),
                                            'units_demolished': int(self.demolished_units[type_id]),
                                            'units_stock': int(units_stock)
                                          }
                            )
        # Code added by Jesse Ayers, MAG, 7/20/2009
        # Get the active projects:
        stat_id = self.proposal_set.get_attribute('status_id')
        actv = where(stat_id==1)[0]
        # Where there are active projects, compute the total_land_area_taken
        # and store it on the development_project_proposals dataset
        # so it can be used by the building_construction_model for the proper
        # computation of units_proposed for those projects with velocity curves
        if actv.size > 0:          
            total_land_area_taken_computed = self.proposal_set.get_attribute('urbansim_parcel.development_project_proposal.land_area_taken')
            self.proposal_set.modify_attribute('total_land_area_taken', total_land_area_taken_computed[actv], actv)

        return (self.proposal_set, self.demolished_buildings) 
    def run(self, year=None, 
            target_attribute_name='number_of_households', 
            sample_filter="", 
            reset_dataset_attribute_value={}, 
            dataset_pool=None,  **kwargs):
        """ sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning
        """
        #if dataset_pool is None:
        #    dataset_pool = SessionConfiguration().get_dataset_pool()

        if year is None:
            year = SimulationState().get_current_time()
        this_year_index = where(self.control_totals.get_attribute('year')==year)[0]
        control_totals_for_this_year = DatasetSubset(self.control_totals, this_year_index)
        column_names = list(set( self.control_totals.get_known_attribute_names() ) - set( [ target_attribute_name, 'year', '_hidden_id_'] ))
        column_names.sort(reverse=True)
        column_values = dict([ (name, control_totals_for_this_year.get_attribute(name)) for name in column_names + [target_attribute_name]])
        
        independent_variables = list(set([re.sub('_max$', '', re.sub('_min$', '', col)) for col in column_names]))
        dataset_known_attributes = self.dataset.get_known_attribute_names()
        for variable in independent_variables:
            if variable not in dataset_known_attributes:
                self.dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)
        dataset_known_attributes = self.dataset.get_known_attribute_names() #update after compute
        if sample_filter:
            short_name = VariableName(sample_filter).get_alias()
            if short_name not in dataset_known_attributes:
                filter_indicator = self.dataset.compute_variables(sample_filter, dataset_pool=dataset_pool)
            else:
                filter_indicator = self.dataset.get_attribute(short_name)
        else:
            filter_indicator = 1

        to_be_cloned = array([], dtype=int32)
        to_be_removed = array([], dtype=int32)
        #log header
        if PrettyTable is not None:
            status_log = PrettyTable()
            status_log.set_field_names(column_names + ["actual", "target", "difference", "action"])
        else:        
            logger.log_status("\t".join(column_names + ["actual", "target", "difference", "action"]))
        error_log = ''
        for index in range(control_totals_for_this_year.size()):
            lucky_index = None
            indicator = ones( self.dataset.size(), dtype='bool' )
            criterion = {}
            for attribute in independent_variables:
                if attribute in dataset_known_attributes:
                    dataset_attribute = self.dataset.get_attribute(attribute)
                else:
                    raise ValueError, "attribute %s used in control total dataset can not be found in dataset %s" % (attribute, self.dataset.get_dataset_name())
                if attribute + '_min' in column_names:
                    amin = column_values[attribute + '_min'][index]
                    criterion.update({attribute + '_min':amin})
                    if amin != -1:
                        indicator *= dataset_attribute >= amin
                if attribute + '_max' in column_names: 
                    amax = column_values[attribute+'_max'][index]
                    criterion.update({attribute + '_max':amax}) 
                    if amax != -1:
                        indicator *= dataset_attribute <= amax
                if attribute in column_names: 
                    aval = column_values[attribute][index] 
                    criterion.update({attribute:aval}) 
                    if aval == -1:
                        continue
                    elif aval == -2:   ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column
                        complement_values = setdiff1d( dataset_attribute, column_values[attribute] )
                        has_one_of_the_complement_value = zeros(dataset_attribute.size, dtype='bool')
                        for value in complement_values:
                            has_one_of_the_complement_value += dataset_attribute == value
                        indicator *= has_one_of_the_complement_value
                    else:
                        indicator *= dataset_attribute == aval
                        
            target_num = column_values[target_attribute_name][index]
            ## if accounting attribute is None, count number of agents with indicator = True 
            if self.dataset_accounting_attribute is None:
                actual_num = indicator.sum()
                action_num = 0
                diff = target_num - actual_num
                if actual_num != target_num:
                    legit_index = where(logical_and(indicator, filter_indicator))[0]
                    if legit_index.size > 0:                    
                        if actual_num < target_num:
                            lucky_index = sample_replace(legit_index, target_num - actual_num)
                            to_be_cloned = concatenate((to_be_cloned, lucky_index))
                        elif actual_num > target_num:
                            lucky_index = sample_noreplace(legit_index, actual_num-target_num)
                            to_be_removed = concatenate((to_be_removed, lucky_index))
                        action_num = lucky_index.size
                    else:
                        error_log += "There is nothing to sample from %s and no action will happen for" % self.dataset.get_dataset_name() + \
                                  ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n'
                        
            else: 
                ## sum accounting attribute for agents with indicator = True; 
                ## assume dataset_accouting_attribute is a primary attribute 
                accounting = self.dataset.get_attribute(self.dataset_accounting_attribute) * indicator
                actual_num = accounting.sum()
                mean_size = float(actual_num) / indicator.sum()
                action_num = 0
                diff = target_num - actual_num
                if actual_num != target_num:
                    legit_index = where(logical_and(indicator, filter_indicator))[0]
                    if legit_index.size > 0:
                        while actual_num + action_num < target_num:
                            lucky_index = sample_replace(legit_index, ceil((target_num - actual_num - action_num)/mean_size) )
                            action_num += accounting[lucky_index].sum()
                            to_be_cloned = concatenate((to_be_cloned, lucky_index))
                        while actual_num - action_num > target_num:
                            lucky_index = sample_noreplace(legit_index, ceil((actual_num - target_num - action_num)/mean_size) )
                            action_num += accounting[lucky_index].sum()
                            to_be_removed = concatenate((to_be_removed, lucky_index))                
                    else:
                        error_log += "There is nothing to sample from %s and no action will happen for " % self.dataset.get_dataset_name() + \
                                  ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n'
            
            ##log status
            action = "0"
            if lucky_index is not None:                    
                if actual_num < target_num: action = "+" + str(action_num)
                if actual_num > target_num: action = "-" + str(action_num)
                    
            cat = [ str(criterion[col]) for col in column_names]
            cat += [str(actual_num), str(target_num), str(diff), action]
            if PrettyTable is not None:
                status_log.add_row(cat)
            else:
                logger.log_status("\t".join(cat))

        if PrettyTable is not None:
            logger.log_status("\n" + status_log.get_string())
        if error_log:
            logger.log_error(error_log)
                    
        clone_data = {}
        if to_be_cloned.size > 0:
            ### ideally duplicate_rows() is all needed to add newly cloned rows
            ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data
            ##self.dataset.duplicate_rows(to_be_cloned)
            logger.log_status()
            for attribute in dataset_known_attributes:
                if reset_dataset_attribute_value.has_key(attribute):
                    clone_data[attribute] = resize(array(reset_dataset_attribute_value[attribute]), to_be_cloned.size)
                else:
                    clone_data[attribute] = self.dataset.get_attribute_by_index(attribute, to_be_cloned)
                    
        self.post_run(self.dataset, to_be_cloned, to_be_removed, **kwargs)
        
        if to_be_removed.size > 0:
            logger.log_status()
            self.dataset.remove_elements(to_be_removed)
            
        if clone_data:
            self.dataset.add_elements(data=clone_data, change_ids_if_not_unique=True)
            
        return self.dataset
    def run(self,
            dataset,
            outcome_attribute,
            weight_attribute,
            control_totals,
            current_year,
            control_total_attribute=None,
            year_attribute='year',
            capacity_attribute=None,
            add_quantity=False,
            dataset_pool=None):
        """'dataset' is a Dataset for which a quantity 'outcome_attribute' is created. The total amount of the quantity is 
        given by the attribute 'control_total_attribute' of the 'control_totals' Dataset. If it is not given, it is assumed 
        to have the same name as 'outcome_attribute'. The 'weight_attribute' of 'dataset' determines the allocation weights.
        The 'control_totals' Dataset contains an attribute 'year' (or alternatively, an attribute given by the 'year_attribute' argument)
        and optionally other attributes that must be known to the 'dataset' (such as a geography). For each row of the control_totals dataset
        for which year matches the 'current_year', the total amount is distributed among the corresponding members of 'dataset' according to weights.
        If a 'capacity_attribute' is given (attribute of 'dataset'), the algorithm removes any allocations that exceeds the capacity and 
        redistributes it among remaining members. The resulting values are appended to 'dataset' as 'outcome_attribute' (as primary attribute).
        If add_quantity is True and the 'outcome_attribute' exists in dataset, the resulting values are added to the current values of 
        'outcome_attribute'.
        """
        ct_attr = control_totals.get_known_attribute_names()
        if year_attribute not in ct_attr:
            raise StandardError, "Year attribute '%s' must be a known attribute of the control totals dataset." % year_attribute
        ct_attr.remove(year_attribute)
        if control_total_attribute is None:
            control_total_attribute = outcome_attribute
        if control_total_attribute not in ct_attr:
            raise StandardError, "Attribute '%s' must be a known attribute of the control totals dataset." % control_total_attribute
        ct_attr.remove(control_total_attribute)
        if control_totals._is_hidden_id():
            ct_attr.remove(control_totals.id_name()[0])

        # compute weights and other attributes necessary for allocation
        attrs_to_compute = [weight_attribute] + ct_attr
        if capacity_attribute is not None:
            attrs_to_compute.append(capacity_attribute)
        for attr in attrs_to_compute:
            try:
                dataset.compute_variables(attr, dataset_pool=dataset_pool)
            except:
                dataset.compute_one_variable_with_unknown_package(
                    attr, dataset_pool=dataset_pool)

        # create subset of control totals for the current year
        year_index = where(
            control_totals.get_attribute(year_attribute) == current_year)[0]
        if year_index.size <= 0:
            logger.log_warning("No control total for year %s" % current_year)
            return None
        control_totals_for_this_year = DatasetSubset(control_totals,
                                                     year_index)

        # check capacity
        if capacity_attribute is not None:
            if dataset.get_attribute(capacity_attribute).sum(
            ) < control_totals_for_this_year.get_attribute(
                    control_total_attribute).sum():
                logger.log_warning(
                    "Capacity (%s) is smaller than the amount to allocate (%s)."
                    % (dataset.get_attribute(capacity_attribute).sum(),
                       control_totals_for_this_year.get_attribute(
                           control_total_attribute).sum()))
            C = dataset.get_attribute(capacity_attribute).astype('int32')

        all_weights = dataset.get_attribute(weight_attribute)
        outcome = zeros(dataset.size(), dtype='int32')
        for ct_row in range(control_totals_for_this_year.size()):
            is_considered = ones(dataset.size(), dtype='bool8')
            for characteristics in ct_attr:
                is_considered = logical_and(
                    is_considered,
                    dataset.get_attribute(characteristics) ==
                    control_totals_for_this_year.get_attribute(
                        characteristics)[ct_row])
            T = control_totals_for_this_year.get_attribute(
                control_total_attribute)[ct_row]
            it = 1
            while True:
                is_considered_idx = where(is_considered)[0]
                weights = all_weights[is_considered_idx]
                weights_sum = float(weights.sum())
                outcome[is_considered_idx] = round_(
                    outcome[is_considered_idx] + T *
                    (weights / weights_sum)).astype('int32')
                if capacity_attribute is None:
                    break
                diff = outcome[is_considered_idx] - C[is_considered_idx]
                outcome[is_considered_idx] = clip(outcome[is_considered_idx],
                                                  0, C[is_considered_idx])
                if it == 1 and C[is_considered_idx].sum() < T:
                    logger.log_warning(
                        "Control total %s cannot be met due to a capacity restriction of %s"
                        % (T, C[is_considered_idx].sum()))
                T = where(diff < 0, 0, diff).sum()
                if T <= 0:
                    break
                is_considered = logical_and(is_considered, outcome < C)
                it += 1
        if add_quantity and (outcome_attribute
                             in dataset.get_known_attribute_names()):
            dataset.modify_attribute(name=outcome_attribute,
                                     data=outcome +
                                     dataset.get_attribute(outcome_attribute))
            logger.log_status(
                'New values added to the attribute %s of dataset %s.' %
                (outcome_attribute, dataset.get_dataset_name()))
        else:
            dataset.add_primary_attribute(name=outcome_attribute, data=outcome)
            logger.log_status(
                'New values stored into attribute %s of dataset %s.' %
                (outcome_attribute, dataset.get_dataset_name()))
        dataset.flush_attribute(outcome_attribute)
        return outcome
 def run(self, dataset, outcome_attribute, weight_attribute, 
              control_totals, current_year, control_total_attribute=None, 
              year_attribute='year', capacity_attribute=None, add_quantity=False, dataset_pool=None):
     """'dataset' is a Dataset for which a quantity 'outcome_attribute' is created. The total amount of the quantity is 
     given by the attribute 'control_total_attribute' of the 'control_totals' Dataset. If it is not given, it is assumed 
     to have the same name as 'outcome_attribute'. The 'weight_attribute' of 'dataset' determines the allocation weights.
     The 'control_totals' Dataset contains an attribute 'year' (or alternatively, an attribute given by the 'year_attribute' argument)
     and optionally other attributes that must be known to the 'dataset' (such as a geography). For each row of the control_totals dataset
     for which year matches the 'current_year', the total amount is distributed among the corresponding members of 'dataset' according to weights.
     If a 'capacity_attribute' is given (attribute of 'dataset'), the algorithm removes any allocations that exceeds the capacity and 
     redistributes it among remaining members. The resulting values are appended to 'dataset' as 'outcome_attribute' (as primary attribute).
     If add_quantity is True and the 'outcome_attribute' exists in dataset, the resulting values are added to the current values of 
     'outcome_attribute'.
     """
     ct_attr = control_totals.get_known_attribute_names()
     if year_attribute not in ct_attr:
         raise StandardError, "Year attribute '%s' must be a known attribute of the control totals dataset." % year_attribute
     ct_attr.remove(year_attribute)
     if control_total_attribute is None:
         control_total_attribute = outcome_attribute
     if control_total_attribute not in ct_attr:
         raise StandardError, "Attribute '%s' must be a known attribute of the control totals dataset." % control_total_attribute
     ct_attr.remove(control_total_attribute)
     if control_totals._is_hidden_id():
         ct_attr.remove(control_totals.id_name()[0])
         
     # compute weights and other attributes necessary for allocation
     attrs_to_compute = [weight_attribute] + ct_attr
     if capacity_attribute is not None:
         attrs_to_compute.append(capacity_attribute)
     for attr in attrs_to_compute:
         try:
             dataset.compute_variables(attr, dataset_pool=dataset_pool)
         except:
             dataset.compute_one_variable_with_unknown_package(attr, dataset_pool=dataset_pool)
     
     # create subset of control totals for the current year
     year_index = where(control_totals.get_attribute(year_attribute) == current_year)[0]
     if year_index.size <= 0:
         logger.log_warning("No control total for year %s" % current_year)
         return None
     control_totals_for_this_year = DatasetSubset(control_totals, year_index)
     
     # check capacity
     if capacity_attribute is not None:
         if dataset.get_attribute(capacity_attribute).sum() < control_totals_for_this_year.get_attribute(control_total_attribute).sum():
             logger.log_warning("Capacity (%s) is smaller than the amount to allocate (%s)." % (dataset.get_attribute(capacity_attribute).sum(), 
                                                                                               control_totals_for_this_year.get_attribute(control_total_attribute).sum()))
         C = dataset.get_attribute(capacity_attribute).astype('int32')
         
     all_weights = dataset.get_attribute(weight_attribute)
     outcome = zeros(dataset.size(), dtype='int32')
     for ct_row in range(control_totals_for_this_year.size()):
         is_considered = ones(dataset.size(), dtype='bool8')
         for characteristics in ct_attr:
             is_considered = logical_and(is_considered, dataset.get_attribute(characteristics) == control_totals_for_this_year.get_attribute(characteristics)[ct_row])
         T = control_totals_for_this_year.get_attribute(control_total_attribute)[ct_row]
         it = 1
         while True:
             is_considered_idx = where(is_considered)[0]
             weights = all_weights[is_considered_idx]
             weights_sum = float(weights.sum())
             outcome[is_considered_idx] = round_(outcome[is_considered_idx] + T * (weights/weights_sum)).astype('int32')
             if capacity_attribute is None:
                 break
             diff = outcome[is_considered_idx] - C[is_considered_idx]
             outcome[is_considered_idx] = clip(outcome[is_considered_idx], 0, C[is_considered_idx])
             if it == 1 and C[is_considered_idx].sum() < T:
                 logger.log_warning("Control total %s cannot be met due to a capacity restriction of %s" % (T, C[is_considered_idx].sum()))
             T = where(diff < 0, 0, diff).sum()
             if T <= 0:
                 break
             is_considered = logical_and(is_considered, outcome < C)
             it += 1
     if add_quantity and (outcome_attribute in dataset.get_known_attribute_names()):
         dataset.modify_attribute(name=outcome_attribute, data=outcome+dataset.get_attribute(outcome_attribute))
         logger.log_status('New values added to the attribute %s of dataset %s.' % (outcome_attribute, dataset.get_dataset_name()))
     else:
         dataset.add_primary_attribute(name=outcome_attribute, data=outcome)
         logger.log_status('New values stored into attribute %s of dataset %s.' % (outcome_attribute, dataset.get_dataset_name()))
     dataset.flush_attribute(outcome_attribute)
     return outcome
    def _do_run(self,
                location_set,
                agent_set,
                agents_index,
                data_objects=None,
                resources=None):
        location_id_name = location_set.get_id_name()[0]
        jobsubset = DatasetSubset(agent_set, agents_index)
        if jobsubset.size() <= 0:
            return array([], dtype='int32')
        #unplace jobs
        agent_set.set_values_of_one_attribute(
            location_id_name, resize(array([-1.0]), jobsubset.size()),
            agents_index)
        sector_ids = jobsubset.get_attribute("sector_id")
        sectors = unique(sector_ids)
        counts = ndimage_sum(ones((jobsubset.size(), )),
                             labels=sector_ids.astype('int32'),
                             index=sectors.astype('int32'))
        if sectors.size <= 1:
            counts = array([counts])
        variables = map(lambda x: "number_of_jobs_of_sector_" + str(int(x)),
                        sectors)
        compute_variables = map(
            lambda var: self.variable_package + "." + location_set.
            get_dataset_name() + "." + var, variables)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.dataset_pool.add_datasets_if_not_included(
            {agent_set.get_dataset_name(): agent_set})
        location_set.compute_variables(compute_variables,
                                       dataset_pool=self.dataset_pool)
        if self.filter is None:
            location_index = arange(location_set.size())
        else:
            filter_values = location_set.compute_variables(
                [self.filter], dataset_pool=self.dataset_pool)
            location_index = where(filter_values > 0)[0]
        if location_index.size <= 0:
            logger.log_status("No locations available. Nothing to be done.")
            return array([])
        location_subset = DatasetSubset(location_set, location_index)
        i = 0
        for sector in sectors:
            distr = location_subset.get_attribute(variables[i])
            if ma.allclose(distr.sum(), 0):
                uniform_prob = 1.0 / distr.size
                distr = resize(array([uniform_prob], dtype='float64'),
                               distr.size)
                logger.log_warning(
                    "Probabilities in scaling model for sector " +
                    str(sector) +
                    " sum to 0.0.  Substituting uniform distribution!")


#                random_sample = sample(location_set.get_attribute("grid_id"), k=int(counts[i]), \
#                                   probabilities = distr)
            distr = distr / float(distr.sum())
            random_sample = probsample_replace(
                location_subset.get_id_attribute(),
                size=int(counts[i]),
                prob_array=distr)
            idx = where(sector_ids == sector)[0]
            #modify job locations
            agent_set.set_values_of_one_attribute(location_id_name,
                                                  random_sample,
                                                  agents_index[idx])
            i += 1
        return agent_set.get_attribute_by_index(location_id_name, agents_index)
Exemple #21
0
    def run(self, n=500, run_config=None, current_year=None, debuglevel=0):
        """
        n - sample n proposals at a time, evaluate them one by one
        """
        self.demolished_buildings = array(
            [], dtype='int32')  #id of buildings to be demolished
        if current_year is None:
            current_year = SimulationState().get_current_time()
        if not self.positive_proposals:
            logger.log_status(
                "Proposal Set size <= 0, no proposals to consider, skipping DPPSM."
            )
            return (self.proposal_set, self.demolished_buildings)
        self.proposal_component_set.compute_variables([
            'urbansim_parcel.development_project_proposal_component.units_proposed',
            'urbansim_parcel.development_project_proposal_component.is_residential'
        ],
                                                      dataset_pool=self.
                                                      dataset_pool)
        self.proposal_set.compute_variables(
            [
                'urbansim_parcel.development_project_proposal.number_of_components',
                'zone_id=development_project_proposal.disaggregate(parcel.zone_id)',
                #'occurence_frequency = development_project_proposal.disaggregate(development_template.sample_size)'
            ],
            dataset_pool=self.dataset_pool)
        buildings = self.dataset_pool.get_dataset("building")
        buildings.compute_variables(
            [
                "occupied_units_for_jobs = urbansim_parcel.building.number_of_non_home_based_jobs",
                "units_for_jobs = urbansim_parcel.building.total_non_home_based_job_space",
                "occupied_residential_units = urbansim_parcel.building.number_of_households",
                #                                "urbansim_parcel.building.existing_units",
                "urbansim_parcel.building.is_residential"
            ],
            dataset_pool=self.dataset_pool)

        ## define unit_name by whether a building is residential or not (with is_residential attribute)
        ## if it is non-residential (0), count units by number of job spaces (units_for_jobs)
        ## if it is residential (1), count units by residenital units
        self.unit_name = array(["units_for_jobs", "residential_units"])

        target_vacancy = self.dataset_pool.get_dataset('target_vacancy')
        target_vacancy.compute_variables([
            'is_residential = target_vacancy.disaggregate(building_type.is_residential)'
        ],
                                         dataset_pool=self.dataset_pool)
        # This try-except block checks to see if the object has a subarea_id_name,
        # if it does, it calculates the vacancy rates by subarea_id_name
        try:
            # Check for subarea_id_name in target_vacancies dataset
            # if it is present, vacancy rates are specified by subarea_id_name
            # if it is not, vacancy rates are specified region wide
            target_vacancy.load_dataset()
            if self.subarea_id_name in target_vacancy.get_attribute_names():
                current_target_vacancy_this_year = DatasetSubset(
                    target_vacancy,
                    index=where(
                        target_vacancy.get_attribute("year") ==
                        current_year)[0])
                current_target_vacancy = DatasetSubset(
                    current_target_vacancy_this_year,
                    index=where(
                        current_target_vacancy_this_year.get_attribute(
                            self.subarea_id_name) == self.area_id)[0])
            else:
                current_target_vacancy = DatasetSubset(
                    target_vacancy,
                    index=where(
                        target_vacancy.get_attribute("year") ==
                        current_year)[0])
        except AttributeError:
            # vacancy rates are specified region wide:
            current_target_vacancy = DatasetSubset(
                target_vacancy,
                index=where(
                    target_vacancy.get_attribute("year") == current_year)[0])

        if current_target_vacancy.size() == 0:
            raise IOError, 'No target vacancy defined for year %s.' % current_year

        self.existing_units = {}  #total existing units by land_use type
        self.occupied_units = {}  #total occupied units by land_use type
        self.proposed_units = {}  #total proposed units by land_use type
        self.demolished_units = {
        }  #total (to be) demolished units by land_use type

        components_building_type_ids = self.proposal_component_set.get_attribute(
            "building_type_id").astype("int32")
        proposal_ids = self.proposal_set.get_id_attribute()
        proposal_ids_in_component_set = self.proposal_component_set.get_attribute(
            "proposal_id")
        all_units_proposed = self.proposal_component_set.get_attribute(
            "units_proposed")
        number_of_components_in_proposals = self.proposal_set.get_attribute(
            "number_of_components")

        self.accepting_proposals = zeros(
            current_target_vacancy.get_attribute("building_type_id").max() + 1,
            dtype='bool8'
        )  #whether accepting new proposals, for each building type
        self.accepted_proposals = []  # index of accepted proposals

        self.target_vacancies = {}
        tv_building_types = current_target_vacancy.get_attribute(
            "building_type_id")
        tv_rate = current_target_vacancy.get_attribute("target_vacancy_rate")
        for itype in range(tv_building_types.size):
            self.target_vacancies[tv_building_types[itype]] = tv_rate[itype]

        self.check_vacancy_rates(
            current_target_vacancy
        )  #initialize self.accepting_proposal based on current vacancy rate

        sqft_per_job = self.dataset_pool.get_dataset("building_sqft_per_job")
        zones_of_proposals = self.proposal_set.get_attribute("zone_id")
        self.building_sqft_per_job_table = sqft_per_job.get_building_sqft_as_table(
            zones_of_proposals.max(), tv_building_types.max())
        # consider only those proposals that have all components of accepted type and sum of proposed units > 0
        is_accepted_type = self.accepting_proposals[
            components_building_type_ids]
        sum_is_accepted_type_over_proposals = array(
            ndimage.sum(is_accepted_type,
                        labels=proposal_ids_in_component_set,
                        index=proposal_ids))
        sum_of_units_proposed = array(
            ndimage.sum(all_units_proposed,
                        labels=proposal_ids_in_component_set,
                        index=proposal_ids))
        is_proposal_eligible = logical_and(
            sum_is_accepted_type_over_proposals ==
            number_of_components_in_proposals, sum_of_units_proposed > 0)

        is_proposal_eligible = logical_and(
            is_proposal_eligible,
            self.proposal_set.get_attribute("start_year") == current_year)
        ## handle planned proposals: all proposals with status_id == is_planned
        ## and start_year == current_year are accepted
        planned_proposal_indexes = where(
            logical_and(
                self.proposal_set.get_attribute(
                    "status_id") == self.proposal_set.id_planned,
                self.proposal_set.get_attribute("start_year") ==
                current_year))[0]

        self.consider_proposals(planned_proposal_indexes, force_accepting=True)
        # consider proposals (in this order: planned, proposed, tentative)
        for status in [
                self.proposal_set.id_proposed, self.proposal_set.id_tentative
        ]:
            idx = where(
                logical_and(
                    self.proposal_set.get_attribute("status_id") == status,
                    is_proposal_eligible))[0]
            if idx.size <= 0:
                continue
            logger.log_status(
                "Sampling from %s eligible proposals with status %s." %
                (idx.size, status))
            while (True in self.accepting_proposals):
                if self.weight[idx].sum() == 0.0:
                    logger.log_warning(
                        "Running out of proposals; there aren't any proposals with non-zero weight"
                    )
                    break

                idx = idx[self.weight[idx] > 0]
                n = minimum(idx.size, n)
                sampled_proposal_indexes = probsample_noreplace(
                    proposal_ids[idx],
                    n,
                    prob_array=(self.weight[idx] /
                                float(self.weight[idx].sum())),
                    exclude_index=None,
                    return_index=True)
                self.consider_proposals(
                    arange(self.proposal_set.size())[
                        idx[sampled_proposal_indexes]])
                self.weight[idx[sampled_proposal_indexes]] = 0

        # set status of accepted proposals to 'active'
        self.proposal_set.modify_attribute(name="status_id",
                                           data=self.proposal_set.id_active,
                                           index=array(self.accepted_proposals,
                                                       dtype='int32'))
        building_types = self.dataset_pool.get_dataset("building_type")
        logger.log_status("Status of %s development proposals set to active." %
                          len(self.accepted_proposals))
        logger.log_status(
            "Target/existing vacancy rates (reached using eligible proposals) by building type:"
        )
        for type_id in self.existing_units.keys():
            units_stock = self._get_units_stock(type_id)
            vr = self._get_vacancy_rates(type_id)
            ## units = residential_units if building_type is residential
            ## units = number of job spaces if building_type is non-residential
            logger.log_status(
                              """%(type_id)s[%(type_name)s]: %(vr)s = ((existing_units:%(existing_units)s + 
                              units_proposed:%(units_proposed)s - units_to_be_demolished:%(units_demolished)s) 
                              - units_occupied:%(units_occupied)s) / units_stock:%(units_stock)s""" %  \
                                          { 'type_id': type_id,
                                            'type_name': building_types.get_attribute_by_id("building_type_name", type_id),
                                            'vr':  vr,
                                            'existing_units': int(self.existing_units[type_id]),
                                            'units_occupied': int(self.occupied_units[type_id]),
                                            'units_proposed': int(self.proposed_units[type_id]),
                                            'units_demolished': int(self.demolished_units[type_id]),
                                            'units_stock': int(units_stock)
                                          }
                            )
        # Code added by Jesse Ayers, MAG, 7/20/2009
        # Get the active projects:
        stat_id = self.proposal_set.get_attribute('status_id')
        actv = where(stat_id == 1)[0]
        # Where there are active projects, compute the total_land_area_taken
        # and store it on the development_project_proposals dataset
        # so it can be used by the building_construction_model for the proper
        # computation of units_proposed for those projects with velocity curves
        if actv.size > 0:
            total_land_area_taken_computed = self.proposal_set.get_attribute(
                'urbansim_parcel.development_project_proposal.land_area_taken')
            self.proposal_set.modify_attribute(
                'total_land_area_taken', total_land_area_taken_computed[actv],
                actv)

        return (self.proposal_set, self.demolished_buildings)
    def prepare_for_run(
            self,
            dataset_pool,
            create_proposal_set=True,
            parcel_filter_for_new_development=None,
            parcel_filter_for_redevelopment=None,
            template_filter=None,
            spec_replace_module_variable_pair=None,
            proposed_units_variable="urbansim_parcel.development_project_proposal.units_proposed",
            **kwargs):
        """create development project proposal dataset from parcels and development templates.
        spec_replace_module_variable_pair is a tuple with two elements: module name, variable within the module
        that contans a dictionary of model variables to be replaced in the specification.
        """
        specification, coefficients, dummy = RegressionModel.prepare_for_run(
            self, **kwargs)
        try:
            existing_proposal_set_parent = dataset_pool.get_dataset(
                'development_project_proposal')
            #load proposals whose status_id are not of id_tentative or id_not_available
            available_idx = where(
                logical_and(
                    existing_proposal_set_parent.get_attribute("status_id") !=
                    DevelopmentProjectProposalDataset.id_tentative,
                    existing_proposal_set_parent.get_attribute("status_id") !=
                    DevelopmentProjectProposalDataset.id_not_available))[0]
            existing_proposal_set = DatasetSubset(existing_proposal_set_parent,
                                                  available_idx)
            # Code updated by Hanyi Li, MAG 6/8/2010
            # Replacing the cached 'development_project_proposal' dataset with
            # the filtered dataset 'existing_proposal_set'
            dataset_pool.replace_dataset(
                existing_proposal_set_parent.get_dataset_name(),
                existing_proposal_set)
        except:
            existing_proposal_set = None

        parcels = dataset_pool.get_dataset('parcel')
        templates = dataset_pool.get_dataset('development_template')

        # It is important that during this method no variable flushing happens, since
        # we create datasets of the same name for different purposes (new development and redevelopment)
        # and flushing would mix them up
        flush_variables_current = SessionConfiguration().get(
            'flush_variables', False)
        SessionConfiguration().put_data({'flush_variables': False})

        # Code added by Jesse Ayers, MAG, 9/14/2009
        # Getting an index of parcels that have actively developing projects (those on a velocity function)
        # and making sure that new proposals are not generated for them
        if existing_proposal_set:
            parcels_with_proposals = existing_proposal_set.get_attribute(
                'parcel_id')
            parcels_with_proposals_idx = parcels.get_id_index(
                parcels_with_proposals)
            if parcel_filter_for_new_development is not None:
                if parcel_filter_for_new_development[
                        parcel_filter_for_new_development.find('=') +
                        1] == '=':
                    filter = 'flter = numpy.logical_and(parcel.number_of_agents(development_project_proposal) == 0, %s)' % parcel_filter_for_new_development
                else:
                    parcel_filter_for_new_development = parcel_filter_for_new_development[
                        parcel_filter_for_new_development.find('=') +
                        1:].lstrip()
                    filter = 'flter = numpy.logical_and(parcel.number_of_agents(development_project_proposal) == 0, %s)' % parcel_filter_for_new_development
                index1 = where(parcels.compute_variables(filter))[0]

        else:
            if parcel_filter_for_new_development is not None:
                index1 = where(
                    parcels.compute_variables(
                        parcel_filter_for_new_development))[0]
            else:
                index1 = None

        if template_filter is not None:
            try:
                index2 = where(templates.compute_variables(template_filter))[0]
            except Exception, e:
                logger.log_warning(
                    "template_filter is set to %s, but there is an error when computing it: %s"
                    % (template_filter, e))
                index2 = None
Exemple #23
0
    def test_agents_placed_in_appropriate_types(self):
        """Create 1000 unplaced industrial jobs and 1 commercial job. Allocate 50 commercial
        gridcells with enough space for 10 commercial jobs per gridcell. After running the
        EmploymentLocationChoiceModel, the 1 commercial job should be placed,
        but the 100 industrial jobs should remain unplaced
        """
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='job_building_types',
                            table_data={
                                'id': array([2, 1]),
                                'name': array(['commercial', 'industrial'])
                            })
        job_building_types = JobBuildingTypeDataset(
            in_storage=storage, in_table_name='job_building_types')

        storage.write_table(table_name='jobs',
                            table_data={
                                'job_id': arange(1001) + 1,
                                'grid_id': array([0] * 1001),
                                'building_type': array([1] * 1000 + [2])
                            })
        jobs = JobDataset(in_storage=storage, in_table_name='jobs')

        storage.write_table(table_name='gridcells',
                            table_data={
                                'grid_id': arange(50) + 1,
                                'commercial_sqft': array([1000] * 50),
                                'commercial_sqft_per_job': array([100] * 50)
                            })
        gridcells = GridcellDataset(in_storage=storage,
                                    in_table_name='gridcells')

        coefficients = Coefficients(names=("dummy", ), values=(0.1, ))
        specification = EquationSpecification(
            variables=("gridcell.commercial_sqft", ), coefficients=("dummy", ))

        compute_resources = Resources({
            "job": jobs,
            "job_building_type": job_building_types
        })
        agents_index = where(jobs.get_attribute("grid_id") == 0)
        unplace_jobs = DatasetSubset(jobs, agents_index)
        agents_index = where(
            unplace_jobs.get_attribute("building_type") == 2)[0]
        gridcells.compute_variables(
            ["urbansim.gridcell.number_of_commercial_jobs"],
            resources=compute_resources)
        commercial_jobs = gridcells.get_attribute("number_of_commercial_jobs")

        gridcells.compute_variables(
            ["urbansim.gridcell.number_of_industrial_jobs"],
            resources=compute_resources)
        industrial_jobs = gridcells.get_attribute("number_of_industrial_jobs")
        model_group = ModelGroup(job_building_types, "name")
        elcm = EmploymentLocationChoiceModel(
            ModelGroupMember(model_group, "commercial"),
            location_set=gridcells,
            agents_grouping_attribute="job.building_type",
            choices="opus_core.random_choices_from_index",
            sample_size_locations=30)
        elcm.run(specification,
                 coefficients,
                 agent_set=jobs,
                 agents_index=agents_index,
                 debuglevel=1)

        gridcells.compute_variables(
            ["urbansim.gridcell.number_of_commercial_jobs"],
            resources=compute_resources)
        commercial_jobs = gridcells.get_attribute("number_of_commercial_jobs")

        gridcells.compute_variables(
            ["urbansim.gridcell.number_of_industrial_jobs"],
            resources=compute_resources)
        industrial_jobs = gridcells.get_attribute("number_of_industrial_jobs")

        self.assertEqual(
            commercial_jobs.sum() == 1, True,
            "Error, there should only be a total of 1 commercial job")
        self.assertEqual(
            industrial_jobs.sum() == 0, True,
            "Error, there should be no industrial jobs because there's no space for them"
        )
Exemple #24
0
 def run(self, dataset, outcome_attribute, weight_attribute, 
              control_totals, current_year, control_total_attribute=None, 
              year_attribute='year', capacity_attribute=None, add_quantity=False, dataset_pool=None):
     """'dataset' is a Dataset for which a quantity 'outcome_attribute' is created. The total amount of the quantity is 
     given by the attribute 'control_total_attribute' of the 'control_totals' Dataset. If it is not given, it is assumed 
     to have the same name as 'outcome_attribute'. The 'weight_attribute' of 'dataset' determines the allocation weights.
     The 'control_totals' Dataset contains an attribute 'year' (or alternatively, an attribute given by the 'year_attribute' argument)
     and optionally other attributes that must be known to the 'dataset' (such as a geography). For each row of the control_totals dataset
     for which year matches the 'current_year', the total amount is distributed among the corresponding members of 'dataset' according to weights.
     If a 'capacity_attribute' is given (attribute of 'dataset'), the algorithm removes any allocations that exceeds the capacity and 
     redistributes it among remaining members. The resulting values are appended to 'dataset' as 'outcome_attribute' (as primary attribute).
     If add_quantity is True and the 'outcome_attribute' exists in dataset, the resulting values are added to the current values of 
     'outcome_attribute'.
     """
     python_version = '2.%s' % (sys.version_info[1])
     ct_attr = control_totals.get_known_attribute_names()
     if year_attribute not in ct_attr:
         raise StandardError, "Year attribute '%s' must be a known attribute of the control totals dataset." % year_attribute
     ct_attr.remove(year_attribute)
     if control_total_attribute is None:
         control_total_attribute = outcome_attribute
     if control_total_attribute not in ct_attr:
         raise StandardError, "Attribute '%s' must be a known attribute of the control totals dataset." % control_total_attribute
     ct_attr.remove(control_total_attribute)
     if control_totals._is_hidden_id():
         ct_attr.remove(control_totals.id_name()[0])
         
     # compute weights and other attributes necessary for allocation
     attrs_to_compute = [weight_attribute] + ct_attr
     if capacity_attribute is not None:
         attrs_to_compute.append(capacity_attribute)
     for attr in attrs_to_compute:
         try:
             dataset.compute_variables(attr, dataset_pool=dataset_pool)
         except:
             dataset.compute_one_variable_with_unknown_package(attr, dataset_pool=dataset_pool)
     
     # create subset of control totals for the current year
     year_index = where(control_totals.get_attribute(year_attribute) == current_year)[0]
     if year_index.size <= 0:
         logger.log_warning("No control total for year %s" % current_year)
         return None
     control_totals_for_this_year = DatasetSubset(control_totals, year_index)
     
     # check capacity
     if capacity_attribute is not None:
         if dataset.get_attribute(capacity_attribute).sum() < control_totals_for_this_year.get_attribute(control_total_attribute).sum():
             logger.log_warning("Capacity (%s) is smaller than the amount to allocate (%s)." % (dataset.get_attribute(capacity_attribute).sum(), 
                                                                                               control_totals_for_this_year.get_attribute(control_total_attribute).sum()))
         C = dataset.get_attribute(capacity_attribute).astype('int32')
         
     all_weights = dataset.get_attribute(weight_attribute)
     outcome = zeros(dataset.size(), dtype='int32')
     for ct_row in range(control_totals_for_this_year.size()):
         is_considered = ones(dataset.size(), dtype='bool8')
         for characteristics in ct_attr:
             is_considered = logical_and(is_considered, dataset.get_attribute(characteristics) == control_totals_for_this_year.get_attribute(characteristics)[ct_row])
         T = control_totals_for_this_year.get_attribute(control_total_attribute)[ct_row]
         it = 1
         while True:
             is_considered_idx = where(is_considered)[0]
             # Make sure to jump out of the loop when no index is considered, or else cause the index out of boundary error in cum_prob[-1]
             if is_considered_idx.size < 1:
                 break
             # End of Make sure
             weights = all_weights[is_considered_idx]
             weights_sum = float(weights.sum())
             # Add: Do the prob sample for exact match --Hanyi
             prob_array = (weights*1.0/weights_sum).astype('float64')
             cum_prob = cumsum(prob_array, dtype='float64')
             sample_prob = uniform(0, cum_prob[-1], T)
             sampled_index = searchsorted(cum_prob, sample_prob)
             sampled_index = sampled_index.astype('int32')
             # due to precision problems, searchsorted could return index = cum_prob.size
             sampled_index = clip(sampled_index, 0, cum_prob.size-1)
             if python_version == '2.6':
                 # Start: Python 2.6 version
                 count = zeros(prob_array.size).astype('int32')
                
                 for i in range(0,prob_array.size):
                     sub_indx = where(sampled_index==i)[0]
                     count[i] = sub_indx.size
                 # End: Python 2.6 version
             elif python_version == '2.7':
                 # Start: Python 2.7 version
                 from collections import Counter
                 dict_count_ini = dict([(x,0) for x in range(prob_array.size)])
                 ## Alternative init dict method
                 #from numpy import arange
                 #dict_count_ini_keys = arange(prob_array.size).astype('int32')
                 #dict_count_ini_values = zeros(prob_array.size).astype('int32')
                 #dict_count_ini = dict(zip(dict_count_ini_keys, dict_count_ini_values))
                 ## Alternative End
                 dict_count = Counter(sampled_index)
                 dict_count_ini.update(dict(dict_count))
                 count = array(dict(sorted(dict_count_ini.items())).values())
                 # End: Python 2.7 version
             
             outcome[is_considered_idx] = outcome[is_considered_idx] + count
             # End
             # Comment out the line below --Hanyi
             #outcome[is_considered_idx] = round_(outcome[is_considered_idx] + T * (weights/weights_sum)).astype('int32')
             if capacity_attribute is None:
                 break
             diff = outcome[is_considered_idx] - C[is_considered_idx]
             outcome[is_considered_idx] = clip(outcome[is_considered_idx], 0, C[is_considered_idx])
             if it == 1 and C[is_considered_idx].sum() < T:
                 logger.log_warning("Control total %s cannot be met due to a capacity restriction of %s" % (T, C[is_considered_idx].sum()))
             T = where(diff < 0, 0, diff).sum()
             if T <= 0:
                 break
             is_considered = logical_and(is_considered, outcome < C)
             it += 1
     if add_quantity and (outcome_attribute in dataset.get_known_attribute_names()):
         dataset.modify_attribute(name=outcome_attribute, data=outcome+dataset.get_attribute(outcome_attribute))
         logger.log_status('New values added to the attribute %s of dataset %s.' % (outcome_attribute, dataset.get_dataset_name()))
     else:
         dataset.add_primary_attribute(name=outcome_attribute, data=outcome)
         logger.log_status('New values stored into attribute %s of dataset %s.' % (outcome_attribute, dataset.get_dataset_name()))
     dataset.flush_attribute(outcome_attribute)
     return outcome
Exemple #25
0
    def run(self,
            model_configuration,
            vacancy_table,
            history_table,
            year,
            location_set,
            resources=None):
        large_area_ids = vacancy_table.get_attribute("large_area_id")
        locations_large_area_ids = location_set.compute_variables(
            "washtenaw.%s.large_area_id" % location_set.get_dataset_name())
        unique_large_areas = unique(large_area_ids)
        self._compute_vacancy_variables(
            location_set, model_configuration['development_project_types'],
            resources)

        projects = {}
        for area in unique_large_areas:
            location_index = where(locations_large_area_ids == area)[0]
            locations_for_this_area = DatasetSubset(location_set,
                                                    location_index)
            logger.log_status("DPLCM for area %s", area)
            target_residential_vacancy_rate, target_non_residential_vacancy_rate = self._get_target_vacancy_rates(
                vacancy_table, year, area)
            for project_type in model_configuration[
                    'development_project_types']:
                # determine current-year vacancy rates
                vacant_units_sum = locations_for_this_area.get_attribute(
                    self.variable_for_vacancy[project_type]).sum()
                units_sum = float(
                    locations_for_this_area.get_attribute(
                        self.units_variable[project_type]).sum())
                vacant_rate = self.safe_divide(vacant_units_sum, units_sum)
                if model_configuration['development_project_types'][
                        project_type]['residential']:
                    target_vacancy_rate = target_residential_vacancy_rate
                else:
                    target_vacancy_rate = target_non_residential_vacancy_rate
                should_develop_units = int(
                    round(
                        max(0,
                            (target_vacancy_rate * units_sum -
                             vacant_units_sum) / (1 - target_vacancy_rate))))
                logger.log_status(
                    project_type +
                    ": vacant units: %d, should be vacant: %f, sum units: %d, will develop: %d"
                    % (vacant_units_sum, target_vacancy_rate * units_sum,
                       units_sum, should_develop_units))
                #create projects
                if should_develop_units > 0:
                    project_dataset = self._create_projects(
                        should_develop_units, project_type, history_table,
                        locations_for_this_area, units_sum,
                        model_configuration['development_project_types'],
                        resources)
                    project_dataset.add_attribute(
                        array(project_dataset.size() * [area]),
                        "large_area_id",
                        metadata=AttributeType.PRIMARY)
                    if (project_type not in projects.keys()) or (
                            projects[project_type] is None):
                        projects[project_type] = project_dataset
                    else:
                        projects[project_type].join_by_rows(
                            project_dataset, change_ids_if_not_unique=True)

        for project_type in model_configuration['development_project_types']:
            if project_type not in projects.keys():
                projects[project_type] = None
            if projects[project_type] is None:
                size = 0
            else:
                projects[project_type].add_submodel_categories()
                size = projects[project_type].size()
            logger.log_status("%s %s projects to be built" %
                              (size, project_type))
        return projects
class EmploymentTransitionModel(Model):
    """Creates and removes jobs from job_set."""

    model_name = "Employment Transition Model"
    location_id_name_default = "grid_id"
    variable_package_default = "urbansim"

    def __init__(self,
                 location_id_name=None,
                 variable_package=None,
                 dataset_pool=None,
                 debuglevel=0):
        self.debug = DebugPrinter(debuglevel)
        self.location_id_name = self.location_id_name_default
        self.variable_package = self.variable_package_default
        if location_id_name is not None:
            self.location_id_name = location_id_name
        if variable_package is not None:
            self.variable_package = variable_package
        self.dataset_pool = self.create_dataset_pool(dataset_pool,
                                                     ["urbansim", "opus_core"])

    def run(self,
            year,
            job_set,
            control_totals,
            job_building_types,
            data_objects=None,
            resources=None):
        self._do_initialize_for_run(job_set, job_building_types, data_objects)
        idx = where(control_totals.get_attribute("year") == year)[0]
        self.control_totals_for_this_year = DatasetSubset(control_totals, idx)
        self._do_run_for_this_year(job_set)
        return self._update_job_set(job_set)

    def _do_initialize_for_run(self,
                               job_set,
                               job_building_types,
                               data_objects=None):
        self.max_id = job_set.get_id_attribute().max()
        self.job_size = job_set.size()
        self.job_id_name = job_set.get_id_name()[0]
        self.new_jobs = {
            self.location_id_name:
            array([],
                  dtype=job_set.get_data_type(self.location_id_name, int32)),
            "sector_id":
            array([], dtype=job_set.get_data_type("sector_id", int32)),
            self.job_id_name:
            array([], dtype=job_set.get_data_type(self.job_id_name, int32)),
            "building_type":
            array([], dtype=job_set.get_data_type("building_type", int8))
        }
        self.remove_jobs = array([], dtype=int32)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.dataset_pool.add_datasets_if_not_included(
            {job_building_types.get_dataset_name(): job_building_types})
        self.available_building_types = job_building_types.get_id_attribute()

    def _compute_sector_variables(self, sectors, job_set):
        compute_resources = Resources({"debug": self.debug})
        job_set.compute_variables(map(
            lambda x: "%s.%s.is_in_employment_sector_%s_home_based" %
            (self.variable_package, job_set.get_dataset_name(), x),
            sectors) + map(
                lambda x: "%s.%s.is_in_employment_sector_%s_non_home_based" %
                (self.variable_package, job_set.get_dataset_name(), x),
                sectors) + ["is_non_home_based_job", "is_home_based_job"],
                                  dataset_pool=self.dataset_pool,
                                  resources=compute_resources)

    def _do_run_for_this_year(self, job_set):
        building_type = job_set.get_attribute("building_type")
        sectors = unique(
            self.control_totals_for_this_year.get_attribute("sector_id"))
        self._compute_sector_variables(sectors, job_set)
        for sector in sectors:
            isector = where(
                self.control_totals_for_this_year.get_attribute("sector_id") ==
                sector)[0]
            total_hb_jobs = self.control_totals_for_this_year.get_attribute(
                "total_home_based_employment")[isector]
            total_nhb_jobs = self.control_totals_for_this_year.get_attribute(
                "total_non_home_based_employment")[isector]
            is_in_sector_hb = job_set.get_attribute(
                "is_in_employment_sector_%s_home_based" % sector)
            is_in_sector_nhb = job_set.get_attribute(
                "is_in_employment_sector_%s_non_home_based" % sector)
            diff_hb = int(total_hb_jobs - is_in_sector_hb.astype(int8).sum())
            diff_nhb = int(total_nhb_jobs -
                           is_in_sector_nhb.astype(int8).sum())
            if diff_hb < 0:  # home based jobs to be removed
                w = where(is_in_sector_hb == 1)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(job_set, w, -1*diff_hb,
                                                         self.location_id_name)
                self.remove_jobs = concatenate(
                    (self.remove_jobs, non_placed,
                     sample_noreplace(sample_array,
                                      max(0,
                                          abs(diff_hb) - size_non_placed))))
            if diff_nhb < 0:  # non home based jobs to be removed
                w = where(is_in_sector_nhb == 1)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(job_set, w, -1*diff_nhb,
                                                         self.location_id_name)
                self.remove_jobs = concatenate(
                    (self.remove_jobs, non_placed,
                     sample_noreplace(sample_array,
                                      max(0,
                                          abs(diff_nhb) - size_non_placed))))

            if diff_hb > 0:  # home based jobs to be created
                self.new_jobs[self.location_id_name] = concatenate(
                    (self.new_jobs[self.location_id_name],
                     zeros(
                         (diff_hb, ),
                         dtype=self.new_jobs[self.location_id_name].dtype.type)
                     ))
                self.new_jobs["sector_id"] = concatenate(
                    (self.new_jobs["sector_id"], (resize(
                        array([sector],
                              dtype=self.new_jobs["sector_id"].dtype.type),
                        diff_hb))))
                if 1 in is_in_sector_hb:
                    building_type_distribution = array(
                        ndimage_sum(is_in_sector_hb,
                                    labels=building_type,
                                    index=self.available_building_types))
                elif 1 in job_set.get_attribute(
                        "is_home_based_job"
                ):  # take the building type distribution from the whole region
                    building_type_distribution = array(
                        ndimage_sum(job_set.get_attribute("is_home_based_job"),
                                    labels=building_type,
                                    index=self.available_building_types))
                else:  # there are no home-based jobs in the region, take uniform distribution
                    building_type_distribution = ones(
                        self.available_building_types.size)
                    building_type_distribution = building_type_distribution / building_type_distribution.sum(
                    )
                sampled_building_types = probsample_replace(
                    self.available_building_types, diff_hb,
                    building_type_distribution /
                    float(building_type_distribution.sum()))
                self.new_jobs["building_type"] = concatenate(
                    (self.new_jobs["building_type"],
                     sampled_building_types.astype(
                         self.new_jobs["building_type"].dtype.type)))
                new_max_id = self.max_id + diff_hb
                self.new_jobs[self.job_id_name] = concatenate(
                    (self.new_jobs[self.job_id_name],
                     arange(self.max_id + 1, new_max_id + 1)))
                self.max_id = new_max_id

            if diff_nhb > 0:  # non home based jobs to be created
                self.new_jobs[self.location_id_name] = concatenate(
                    (self.new_jobs[self.location_id_name],
                     zeros(
                         (diff_nhb, ),
                         dtype=self.new_jobs[self.location_id_name].dtype.type)
                     ))
                self.new_jobs["sector_id"] = concatenate(
                    (self.new_jobs["sector_id"], (resize(
                        array([sector],
                              dtype=self.new_jobs["sector_id"].dtype.type),
                        diff_nhb))))
                if 1 in is_in_sector_nhb:
                    building_type_distribution = array(
                        ndimage_sum(is_in_sector_nhb,
                                    labels=building_type,
                                    index=self.available_building_types))
                elif 1 in job_set.get_attribute(
                        "is_non_home_based_job"
                ):  # take the building type distribution from the whole region
                    building_type_distribution = array(
                        ndimage_sum(
                            job_set.get_attribute("is_non_home_based_job"),
                            labels=building_type,
                            index=self.available_building_types))
                else:  # there are no non-home-based jobs in the region, take uniform distribution
                    building_type_distribution = ones(
                        self.available_building_types.size)
                    building_type_distribution = building_type_distribution / building_type_distribution.sum(
                    )
                sampled_building_types = probsample_replace(
                    self.available_building_types, diff_nhb,
                    building_type_distribution /
                    float(building_type_distribution.sum()))
                self.new_jobs["building_type"] = concatenate(
                    (self.new_jobs["building_type"],
                     sampled_building_types.astype(
                         self.new_jobs["building_type"].dtype.type)))
                new_max_id = self.max_id + diff_nhb
                self.new_jobs[self.job_id_name] = concatenate(
                    (self.new_jobs[self.job_id_name],
                     arange(self.max_id + 1, new_max_id + 1)))
                self.max_id = new_max_id

    def _update_job_set(self, job_set):
        job_set.remove_elements(self.remove_jobs)
        job_set.add_elements(self.new_jobs, require_all_attributes=False)
        difference = job_set.size() - self.job_size
        self.debug.print_debug(
            "Difference in number of jobs: %s (original %s,"
            " new %s, created %s, deleted %s)" %
            (difference, self.job_size, job_set.size(),
             self.new_jobs[self.job_id_name].size, self.remove_jobs.size), 3)
        self.debug.print_debug(
            "Number of unplaced jobs: %s" %
            where(job_set.get_attribute(self.location_id_name) <= 0)[0].size,
            3)
        return difference

    def prepare_for_run(self, storage, **kwargs):
        from urbansim.datasets.control_total_dataset import ControlTotalDataset
        control_totals = ControlTotalDataset(in_storage=storage,
                                             what="employment")
        sample_control_totals(storage, control_totals, **kwargs)
        return control_totals
    def prepare_for_run(self, dataset_pool, 
                        create_proposal_set=True,
                        parcel_filter_for_new_development=None, 
                        parcel_filter_for_redevelopment=None, 
                        template_filter=None,
                        spec_replace_module_variable_pair=None,
                        proposed_units_variable="urbansim_parcel.development_project_proposal.units_proposed",
                        **kwargs):
        """create development project proposal dataset from parcels and development templates.
        spec_replace_module_variable_pair is a tuple with two elements: module name, variable within the module
        that contans a dictionary of model variables to be replaced in the specification.
        """
        specification, coefficients, dummy = RegressionModel.prepare_for_run(self, **kwargs)
        try:
            existing_proposal_set_parent = dataset_pool.get_dataset('development_project_proposal')
            #load proposals whose status_id are not of id_tentative or id_not_available
            available_idx = where(logical_and(existing_proposal_set_parent.get_attribute("status_id") != DevelopmentProjectProposalDataset.id_tentative,
                                              existing_proposal_set_parent.get_attribute("status_id") != DevelopmentProjectProposalDataset.id_not_available))[0]
            existing_proposal_set = DatasetSubset(existing_proposal_set_parent, available_idx)
            # Code updated by Hanyi Li, MAG 6/8/2010
            # Replacing the cached 'development_project_proposal' dataset with
            # the filtered dataset 'existing_proposal_set'
            dataset_pool.replace_dataset(existing_proposal_set_parent.get_dataset_name(), existing_proposal_set)
        except:
            existing_proposal_set = None
        
        parcels = dataset_pool.get_dataset('parcel')
        templates = dataset_pool.get_dataset('development_template')

        # It is important that during this method no variable flushing happens, since
        # we create datasets of the same name for different purposes (new development and redevelopment)
        # and flushing would mix them up
        flush_variables_current = SessionConfiguration().get('flush_variables', False)
        SessionConfiguration().put_data({'flush_variables': False})
        
        # Code added by Jesse Ayers, MAG, 9/14/2009
        # Getting an index of parcels that have actively developing projects (those on a velocity function)
        # and making sure that new proposals are not generated for them
        if existing_proposal_set:
            parcels_with_proposals = existing_proposal_set.get_attribute('parcel_id')
            parcels_with_proposals_idx = parcels.get_id_index(parcels_with_proposals)
            if parcel_filter_for_new_development is not None:
                if parcel_filter_for_new_development[parcel_filter_for_new_development.find('=')+1] == '=':
                    filter = 'flter = numpy.logical_and(parcel.number_of_agents(development_project_proposal) == 0, %s)' % parcel_filter_for_new_development
                else:
                    parcel_filter_for_new_development = parcel_filter_for_new_development[parcel_filter_for_new_development.find('=')+1:].lstrip()
                    filter = 'flter = numpy.logical_and(parcel.number_of_agents(development_project_proposal) == 0, %s)' % parcel_filter_for_new_development
                index1 = where(parcels.compute_variables(filter))[0]

        else:
            if parcel_filter_for_new_development is not None:
                index1 = where(parcels.compute_variables(parcel_filter_for_new_development))[0]
            else:
                index1 = None
            
        if template_filter is not None:
            try:
                index2 = where(templates.compute_variables(template_filter))[0]
            except Exception, e:
                logger.log_warning( "template_filter is set to %s, but there is an error when computing it: %s"
                                   % (template_filter, e) )
                index2 = None
    def run(self, realestate_dataset,
            living_units_dataset,
            year=None, 
            occupied_spaces_variable="occupied_units",
            total_spaces_variable="total_units",
            target_attribute_name='target_vacancy_rate',
            sample_from_dataset = None,
            living_units_from_dataset = None,
            sample_filter="",
            reset_attribute_value={}, 
            year_built = 'year_built',
            dataset_pool=None,
            append_to_realestate_dataset = False,
            table_name = "development_projects",
            dataset_name = "development_project",
            id_name = 'development_project_id',
            **kwargs):
        """         
        sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning
        append_to_realestate_dataset - whether to append the new dataset to realestate_dataset
        """
        
        if self.target_vancy_dataset is None:
            raise RuntimeError, "target_vacancy_rate dataset is unspecified."
        
        if not sample_from_dataset or not living_units_from_dataset:
            logger.log_note('No development projects or no living units of development projects to sample from. Development projects are taken from building dataset and thus living units from living_units dataset.')
            sample_from_dataset = realestate_dataset
            living_units_from_dataset = living_units_dataset
            
        if dataset_pool is None:
            dataset_pool = SessionConfiguration().get_dataset_pool()
        if year is None:
            year = SimulationState().get_current_time()
        this_year_index = where(self.target_vancy_dataset.get_attribute('year')==year)[0]
        target_vacancy_for_this_year = DatasetSubset(self.target_vancy_dataset, this_year_index)
        
        column_names = list(set( self.target_vancy_dataset.get_known_attribute_names() ) - set( [ target_attribute_name, occupied_spaces_variable, total_spaces_variable, 'year', '_hidden_id_'] ))
        column_names.sort(reverse=True)
        column_values = dict([ (name, target_vacancy_for_this_year.get_attribute(name)) for name in column_names + [target_attribute_name]])
        
        
        independent_variables = list(set([re.sub('_max$', '', re.sub('_min$', '', col)) for col in column_names]))
        sample_dataset_known_attributes = sample_from_dataset.get_known_attribute_names()
        for attribute in independent_variables:
            if attribute not in sample_dataset_known_attributes:
                sample_from_dataset.compute_one_variable_with_unknown_package(attribute, dataset_pool=dataset_pool)
        sample_dataset_known_attributes = sample_from_dataset.get_known_attribute_names() #update after compute
                
        if sample_filter:
            short_name = VariableName(sample_filter).get_alias()
            if short_name not in sample_dataset_known_attributes:
                filter_indicator = sample_from_dataset.compute_variables(sample_filter, dataset_pool=dataset_pool)
            else:
                filter_indicator = sample_from_dataset.get_attribute(short_name)
        else:
            filter_indicator = 1
                
        sampled_index = array([], dtype=int32)

        #log header
        if PrettyTable is not None:
            status_log = PrettyTable()
            status_log.set_field_names(column_names + ["actual", "target", "expected", "difference", "action"])
        else:
            logger.log_status("\t".join(column_names + ["actual", "target", "expected", "difference", "action"]))
        error_log = ''
        for index in range(target_vacancy_for_this_year.size()):
            sample_indicator = ones( sample_from_dataset.size(), dtype='bool' )
            criterion = {}   # for logging
            for attribute in independent_variables:
                if attribute in sample_dataset_known_attributes:
                    sample_attribute = sample_from_dataset.get_attribute(attribute)
                else:
                    raise ValueError, "attribute %s used in target vacancy dataset can not be found in dataset %s" % (attribute, realestate_dataset.get_dataset_name())
                
                if attribute + '_min' in column_names:
                    amin = target_vacancy_for_this_year.get_attribute(attribute+'_min')[index] 
                    criterion.update({attribute + '_min':amin})
                    if amin != -1:
                        sample_indicator *= sample_attribute >= amin
                if attribute + '_max' in column_names: 
                    amax = target_vacancy_for_this_year.get_attribute(attribute+'_max')[index]
                    criterion.update({attribute + '_max':amax}) 
                    if amax != -1:
                        sample_indicator *= sample_attribute <= amax
                if attribute in column_names: 
                    aval = column_values[attribute][index] 
                    criterion.update({attribute:aval}) 
                    if aval == -1:
                        continue
                    elif aval == -2:  ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column
                        sample_indicator *= logical_not(ismember(sample_attribute, column_values[attribute]))
                    else:
                        sample_indicator *= sample_attribute == aval
                        
            this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable
            ## total/occupied_spaces_variable can be specified either as a universal name for all realestate 
            ## or in targe_vacancy_rate dataset for each vacancy category
            if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_occupied_spaces_variable = target_vacancy_for_this_year.get_attribute(occupied_spaces_variable)[index]

            if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_total_spaces_variable = target_vacancy_for_this_year.get_attribute(total_spaces_variable)[index]
            
            this_total_spaces_variable += '_' + str(criterion[col])
            this_occupied_spaces_variable += '_' + str(criterion[col])
            
            logger.be_quiet() #temporarily disable logging
            realestate_dataset.compute_one_variable_with_unknown_package(this_occupied_spaces_variable, dataset_pool=dataset_pool)
            realestate_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool)
            sample_from_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool)
            logger.talk()
            
            actual_num = (realestate_dataset.get_attribute(this_total_spaces_variable)).sum()
            #target_num is obsolete with this version.
            target_num = int(round( (realestate_dataset.get_attribute(this_occupied_spaces_variable)).sum() /\
                                    (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])))
            '''If the target vacancy is very small and the inflow to the region big it is not enough to check
            only the current simulation year's vacancy. The simulation is more robust if the BTM is anticipating the
            next year's population (of households and jobs).
            This version calculates the non residential spaces based on sqft requirements of jobs per sector. 
            #TODO: Make code more general to cover various stratifications in the real estate market.
            '''
            if criterion[col] == 0:
                """ Option without demography model
                idx = where(self.control_totals.get_attribute("year")==year + 1)[0]
                this_years_control_totals = DatasetSubset(self.control_totals, idx)
                expected_num = int(round( this_years_control_totals.get_attribute('total_number_of_households').sum() /\
                                    (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])))""" 
                hh_dataset = dataset_pool.get_dataset( 'household' )
                number_of_hh = hh_dataset.size()
                expected_num = int(round( number_of_hh /\
                                    (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]))) 
            if criterion[col] > 0:
                # Getting control totals per sector in a dictionary
                idx = where(self.employment_control_totals.get_attribute("year")==year)[0] # Create index to get the subset of control totals for the next simulation year.
                this_years_control_totals = DatasetSubset(self.employment_control_totals, idx) # Create the subset of control totals.
                idx_non_home_based = where(logical_and(this_years_control_totals['home_based_status'] == 0,this_years_control_totals['sector_id'] == criterion[col]))[0] # Create index of non home based control totals in current sector. Only non home based jobs are supported. TODO: Support home based jobs.
                this_years_control_totals = DatasetSubset(this_years_control_totals, idx_non_home_based)
#                idx_current_sector = where(this_years_control_totals['sector_id'] == criterion[col])[0]
                next_years_jobs = this_years_control_totals['number_of_jobs']
                controled_sectors = this_years_control_totals['sector_id']                
                sector_job_totals = dict(zip(controled_sectors, next_years_jobs.T)) # creating dictionary with sector id's as key and number of jobs as values to ensure multiplication with right requiremtents.

                # Getting infos on required sqft per sector. 
#                a_zone_id = min(self.building_sqft_per_job['zone_id']) # Get a zone number from the definition table. Here choose to take the minimum which is arbitrary. This code assumes constant sqft requirements in all zones. TODO: Support different sqft requirements per zone.
#                idx_zone = where(self.building_sqft_per_job['zone_id'] == a_zone_id)[0]
#                subset_sqft_per_job = DatasetSubset(self.building_sqft_per_job, idx_zone)
#                sqft_per_job = subset_sqft_per_job['building_sqft_per_job']
#                sectors_with_requirements = subset_sqft_per_job['sector_id']
#                requirements_by_sector = dict(zip(sectors_with_requirements, sqft_per_job.T))
#                
#                needed_sqft_over_all_sectors = sector_job_totals[criterion[col]] * requirements_by_sector[criterion[col]]
#                expected_num = int(round( needed_sqft_over_all_sectors /\
#                                    (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])))
                
                idx_sector = where(self.sectors['sector_id'] == criterion[col])
                subset_sqft_per_job_sector = DatasetSubset(self.sectors, idx_sector)
                needed_sqft_current_sector = sector_job_totals[criterion[col]] * subset_sqft_per_job_sector.get_attribute('sqm_per_job')
                expected_num = int(round( needed_sqft_current_sector /\
                                    (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])))

            diff = expected_num - actual_num
            
            #Previous version which is checking the current years occupation.
            #diff = target_num - actual_num
            
            this_sampled_index = array([], dtype=int32)
            if diff > 0:
                total_spaces_in_sample_dataset = sample_from_dataset.get_attribute(this_total_spaces_variable)
                legit_index = where(logical_and(sample_indicator, filter_indicator) * total_spaces_in_sample_dataset > 0)[0]
                if legit_index.size > 0:
                    mean_size = total_spaces_in_sample_dataset[legit_index].mean()
                    num_of_projects_to_sample = int( diff / mean_size )
                    ##sampled at least 1 project when diff > 0, otherwise it is a endless loop when num_of_projects_to_sample = 0
                    num_of_projects_to_sample = num_of_projects_to_sample if num_of_projects_to_sample > 0 else 1
                    while total_spaces_in_sample_dataset[this_sampled_index].sum() < diff:
                        lucky_index = sample_replace(legit_index, num_of_projects_to_sample)
                        this_sampled_index = concatenate((this_sampled_index, lucky_index))
                    this_sampled_index = this_sampled_index[0:(1+searchsorted(cumsum(total_spaces_in_sample_dataset[this_sampled_index]), diff))]
                    sampled_index = concatenate((sampled_index, this_sampled_index))
                else:
                    error_log += "There is nothing to sample from %s and no new development will happen for " % sample_from_dataset.get_dataset_name() + \
                              ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n'
            #if diff < 0: #TODO demolition; not yet supported
            
            ##log status
            action = "0"
            if this_sampled_index.size > 0:
                action_num = total_spaces_in_sample_dataset[this_sampled_index].sum()
                if diff > 0: action = "+" + str(action_num)
                if diff < 0: action = "-" + str(action_num)
            cat = [ str(criterion[col]) for col in column_names]
            cat += [str(actual_num), str(target_num), str(expected_num), str(diff), action]
            
            if PrettyTable is not None:
                status_log.add_row(cat)
            else:                
                logger.log_status("\t".join(cat))
            
        if PrettyTable is not None:
            logger.log_status("\n" + status_log.get_string())
        if error_log:
            logger.log_error(error_log)
        
        
        #logger.log_note("Updating attributes of %s sampled development events." % sampled_index.size)
        result_data = {}
        result_dataset = None
        index = array([], dtype='int32')
        if sampled_index.size > 0:
            ### ideally duplicate_rows() is all needed to add newly cloned rows
            ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data
            ##realestate_dataset.duplicate_rows(sampled_index)
            #result_data.setdefault(year_built, resize(year, sampled_index.size).astype('int32')) # Reset the year_built attribute. Uncommented because it is overwritten in the for loop afterwards.
            ## also add 'independent_variables' to the new dataset
            for attribute in set(sample_from_dataset.get_primary_attribute_names() + independent_variables):
                if reset_attribute_value.has_key(attribute):
                    result_data[attribute] = resize(array(reset_attribute_value[attribute]), sampled_index.size)
                else:
                    result_data[attribute] = sample_from_dataset.get_attribute_by_index(attribute, sampled_index)
            # Reset the year_built attribute.
            result_data['year_built'] = resize(year, sampled_index.size).astype('int32')
            # TODO: Uncomment the following three lines to reset land_area, tax_exempt, zgde. Test still to be done. parcel_id should be changed by location choice model.
            #result_data['land_area'] = resize(-1, sampled_index.size).astype('int32')
            #result_data['tax_exempt'] = resize(-1, sampled_index.size).astype('int32')
            #result_data['zgde'] = resize(-1, sampled_index.size).astype('int32')
            
            if id_name and result_data and id_name not in result_data:
                result_data[id_name] = arange(sampled_index.size, dtype='int32') + 1
            storage = StorageFactory().get_storage('dict_storage')
            storage.write_table(table_name=table_name, table_data=result_data)
            
            result_dataset = Dataset(id_name = id_name,
                                      in_storage = storage,
                                      in_table_name = table_name,
                                      dataset_name = dataset_name
                                      )
            index = arange(result_dataset.size())

        if append_to_realestate_dataset:
            if len(result_data) > 0:
                logger.start_block('Appending development events and living units')
                logger.log_note("Append %d sampled development events to real estate dataset." % len(result_data[result_data.keys()[0]]))
                index = realestate_dataset.add_elements(result_data, require_all_attributes=False,
                                                        change_ids_if_not_unique=True)
                logger.start_block('Creating id mapping')
                # remember the ids from the development_event_history dataset.
                mapping_new_old = self.get_mapping_of_old_ids_to_new_ids(result_data, realestate_dataset, index)
                logger.end_block()
                
                '''Getting living units associated to selected development events by iterating over the mapping dictionary and 
                selecting each time all the living units according to the old building ids.
                The living units are then added to selected_living_units_dict which is then
                added to living_units dataset. A dictionary is needed to use the add_elements method.
                Creating a dictionary also clones the records. The subset is only a view on the original table.'''
                selected_living_units_dict = {}
                counter = 0
                for new_id in mapping_new_old:
                    if counter == 0:
                        logger.log_note("Log assignment of every 100th development event")
                    counter +=1
                    if counter % 100 == 0:
                        logger.log_note("Assembling living units for development event %s" % new_id)
                    sel_index = [i for i in range(0, len(living_units_from_dataset['building_id'])) if living_units_from_dataset['building_id'][i] == mapping_new_old[new_id]]
                    living_units_this_sampled_building = DatasetSubset(living_units_from_dataset, sel_index) 
                    if len(selected_living_units_dict) == 0:
                        logger.start_block('Assign new building id')
                        for attribute_name in living_units_this_sampled_building.get_primary_attribute_names():
                            column = living_units_this_sampled_building.get_attribute(attribute_name)
                            if attribute_name == 'building_id':
                                new_ids = array(living_units_this_sampled_building.size() * [new_id], dtype=int32)
                                selected_living_units_dict.update({attribute_name: new_ids})
                            else:
                                selected_living_units_dict.update({attribute_name: column})
                        logger.end_block()
                    else:
                        this_living_units_dict ={}
                        for attribute_name in living_units_this_sampled_building.get_primary_attribute_names():
                            column = living_units_this_sampled_building.get_attribute(attribute_name)
                            if attribute_name == 'building_id':
                                new_ids = array(living_units_this_sampled_building.size() * [new_id], dtype=int32)
                                this_living_units_dict.update({attribute_name: new_ids})
                            else:
                                this_living_units_dict.update({attribute_name: column})
                        for attribute_name in living_units_this_sampled_building.get_primary_attribute_names():
                            selected_living_units_dict[attribute_name] = concatenate([selected_living_units_dict[attribute_name], this_living_units_dict[attribute_name]])
                # Reset year_built attribute of living units
                selected_living_units_dict['year_built'] = resize(year, len(selected_living_units_dict['year_built'])).astype('int32')
                # TODO: Uncomment the following two lines to reset rent_price, zgde. Test still to be done
                # selected_living_units_dict['rent_price'] = resize(-1, len(selected_living_units_dict['rent_price'])).astype('int32')
                # selected_living_units_dict['zgde'] = resize(-1, len(selected_living_units_dict['zgde'])).astype('int32')


                
                index_units = living_units_dataset.add_elements(selected_living_units_dict, require_all_attributes=False,
                                                        change_ids_if_not_unique=True)
                
                # Check consistency of buildings and living units. All living units must belong to a building
                if SimulationState().get_current_time() - SimulationState().get_start_time() == 1:
                    for building_id in living_units_dataset['building_id']:
                        if building_id not in realestate_dataset['building_id']:
                            logger.log_warning('Living unit with building_id %d has no corresponding building.' % (building_id))
                        # Uncomment next line to enforce consistency of living units and building dataset. Then you may uncomment the two previous lines.
#                        assert(building_id in realestate_dataset['building_id']), 'Living unit with building_id %d has no corresponding building.' % (building_id)

            result_dataset = realestate_dataset
        logger.end_block()

        # It is recommended to derive all variables of buildings in relation to living units via expression variables.
        # However, if the building dataset contains attributes derived from living units these attributes should be consistent
        # with the living units table. Below an example.
        # Residential_units attribute of each building should be consistent with the number of living units associated.
#        self.check_consistency_of_living_units_per_building(realestate_dataset, living_units_dataset, mapping_new_old)

        return (result_dataset, index)
    def run(
        self,
        realestate_dataset,
        year=None,
        occupied_spaces_variable="occupied_units",
        total_spaces_variable="total_units",
        target_attribute_name="target_vacancy_rate",
        sample_from_dataset=None,
        sample_filter="",
        reset_attribute_value={},
        year_built="year_built",
        dataset_pool=None,
        append_to_realestate_dataset=False,
        table_name="development_projects",
        dataset_name="development_project",
        id_name="development_project_id",
        **kwargs
    ):
        """         
        sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning
        append_to_realestate_dataset - whether to append the new dataset to realestate_dataset
        """

        if self.target_vancy_dataset is None:
            raise RuntimeError, "target_vacancy_rate dataset is unspecified."

        if not sample_from_dataset:
            sample_from_dataset = realestate_dataset

        # if dataset_pool is None:
        #    dataset_pool = SessionConfiguration().get_dataset_pool()
        if year is None:
            year = SimulationState().get_current_time()
        this_year_index = where(self.target_vancy_dataset.get_attribute("year") == year)[0]
        target_vacancy_for_this_year = DatasetSubset(self.target_vancy_dataset, this_year_index)

        column_names = list(
            set(self.target_vancy_dataset.get_known_attribute_names())
            - set([target_attribute_name, occupied_spaces_variable, total_spaces_variable, "year", "_hidden_id_"])
        )
        column_names.sort(reverse=True)
        column_values = dict(
            [
                (name, target_vacancy_for_this_year.get_attribute(name))
                for name in column_names + [target_attribute_name]
            ]
        )

        independent_variables = list(set([re.sub("_max$", "", re.sub("_min$", "", col)) for col in column_names]))
        dataset_known_attributes = realestate_dataset.get_known_attribute_names()
        sample_dataset_known_attributes = sample_from_dataset.get_known_attribute_names()
        for variable in independent_variables:
            if variable not in dataset_known_attributes:
                realestate_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)
            if variable not in sample_dataset_known_attributes:
                sample_from_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)

        dataset_known_attributes = realestate_dataset.get_known_attribute_names()  # update after compute
        if sample_filter:
            short_name = VariableName(sample_filter).get_alias()
            if short_name not in dataset_known_attributes:
                filter_indicator = sample_from_dataset.compute_variables(sample_filter, dataset_pool=dataset_pool)
            else:
                filter_indicator = sample_from_dataset.get_attribute(short_name)
        else:
            filter_indicator = 1

        sampled_index = array([], dtype=int32)

        # log header
        if PrettyTable is not None:
            status_log = PrettyTable()
            status_log.set_field_names(column_names + ["actual", "target", "expected", "difference", "action"])
        else:
            logger.log_status("\t".join(column_names + ["actual", "target", "expected", "difference", "action"]))
        error_log = ""
        for index in range(target_vacancy_for_this_year.size()):
            this_sampled_index = array([], dtype=int32)
            indicator = ones(realestate_dataset.size(), dtype="bool")
            sample_indicator = ones(sample_from_dataset.size(), dtype="bool")
            criterion = {}  # for logging
            for attribute in independent_variables:
                if attribute in dataset_known_attributes:
                    dataset_attribute = realestate_dataset.get_attribute(attribute)
                    sample_attribute = sample_from_dataset.get_attribute(attribute)
                else:
                    raise ValueError, "attribute %s used in target vacancy dataset can not be found in dataset %s" % (
                        attribute,
                        realestate_dataset.get_dataset_name(),
                    )

                if attribute + "_min" in column_names:
                    amin = target_vacancy_for_this_year.get_attribute(attribute + "_min")[index]
                    criterion.update({attribute + "_min": amin})
                    if amin != -1:
                        indicator *= dataset_attribute >= amin
                        sample_indicator *= sample_attribute >= amin
                if attribute + "_max" in column_names:
                    amax = target_vacancy_for_this_year.get_attribute(attribute + "_max")[index]
                    criterion.update({attribute + "_max": amax})
                    if amax != -1:
                        indicator *= dataset_attribute <= amax
                        sample_indicator *= sample_attribute <= amax
                if attribute in column_names:
                    aval = column_values[attribute][index]
                    criterion.update({attribute: aval})
                    if aval == -1:
                        continue
                    elif (
                        aval == -2
                    ):  ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column
                        indicator *= logical_not(ismember(dataset_attribute, column_values[attribute]))
                        sample_indicator *= logical_not(ismember(sample_attribute, column_values[attribute]))
                    else:
                        indicator *= dataset_attribute == aval
                        sample_indicator *= sample_attribute == aval

            this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable
            ## total/occupied_spaces_variable can be specified either as a universal name for all realestate
            ## or in targe_vacancy_rate dataset for each vacancy category
            if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_occupied_spaces_variable = target_vacancy_for_this_year.get_attribute(occupied_spaces_variable)[
                    index
                ]

            if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_total_spaces_variable = target_vacancy_for_this_year.get_attribute(total_spaces_variable)[index]

            this_total_spaces_variable += "_" + str(criterion[col])
            this_occupied_spaces_variable += "_" + str(criterion[col])

            logger.be_quiet()  # temporarily disable logging
            realestate_dataset.compute_one_variable_with_unknown_package(
                this_occupied_spaces_variable, dataset_pool=dataset_pool
            )
            realestate_dataset.compute_one_variable_with_unknown_package(
                this_total_spaces_variable, dataset_pool=dataset_pool
            )
            sample_from_dataset.compute_one_variable_with_unknown_package(
                this_total_spaces_variable, dataset_pool=dataset_pool
            )
            logger.talk()

            actual_num = (realestate_dataset.get_attribute(this_total_spaces_variable)).sum()
            # target_num is obsolete with this version.
            target_num = int(
                round(
                    (realestate_dataset.get_attribute(this_occupied_spaces_variable)).sum()
                    / (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])
                )
            )
            """If the target vacancy is very small and the inflow to the region big it is not enough to check
            only the current simulation year's vacancy. The simulation is more robust if the BTM is anticipating the
            next year's population (of households and jobs).
            #TODO: Make code more general to cover various stratifications in the real estate market.
            """
            if criterion[col] == 1:
                idx = where(self.control_totals.get_attribute("year") == year + 1)[0]
                this_years_control_totals = DatasetSubset(self.control_totals, idx)
                expected_num = int(
                    round(
                        this_years_control_totals.get_attribute("total_number_of_households").sum()
                        / (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])
                    )
                )
            if criterion[col] == 0:
                idx = where(self.employment_control_totals.get_attribute("year") == year + 1)[0]
                next_years_control_totals = DatasetSubset(self.employment_control_totals, idx)
                expected_num = int(
                    round(
                        next_years_control_totals.get_attribute("number_of_jobs").sum()
                        / (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])
                    )
                )

            diff = expected_num - actual_num

            # Previous version which is checking the current years occupation.
            # diff = target_num - actual_num

            if diff > 0:
                total_spaces_in_sample_dataset = sample_from_dataset.get_attribute(this_total_spaces_variable)
                legit_index = where(
                    logical_and(sample_indicator, filter_indicator) * total_spaces_in_sample_dataset > 0
                )[0]
                if legit_index.size > 0:
                    mean_size = total_spaces_in_sample_dataset[legit_index].mean()
                    num_of_projects_to_sample = int(diff / mean_size)
                    ##sampled at least 1 project when diff > 0, otherwise it is a endless loop when num_of_projects_to_sample = 0
                    num_of_projects_to_sample = num_of_projects_to_sample if num_of_projects_to_sample > 0 else 1
                    while total_spaces_in_sample_dataset[this_sampled_index].sum() < diff:
                        lucky_index = sample_replace(legit_index, num_of_projects_to_sample)
                        this_sampled_index = concatenate((this_sampled_index, lucky_index))
                    this_sampled_index = this_sampled_index[
                        0 : (1 + searchsorted(cumsum(total_spaces_in_sample_dataset[this_sampled_index]), diff))
                    ]
                    sampled_index = concatenate((sampled_index, this_sampled_index))
                else:
                    error_log += (
                        "There is nothing to sample from %s and no new development will happen for "
                        % sample_from_dataset.get_dataset_name()
                        + ",".join([col + "=" + str(criterion[col]) for col in column_names])
                        + "\n"
                    )
            # if diff < 0: #TODO demolition; not yet supported

            ##log status
            action = "0"
            if this_sampled_index.size > 0:
                action_num = total_spaces_in_sample_dataset[this_sampled_index].sum()
                if diff > 0:
                    action = "+" + str(action_num)
                if diff < 0:
                    action = "-" + str(action_num)
            cat = [str(criterion[col]) for col in column_names]
            cat += [str(actual_num), str(target_num), str(expected_num), str(diff), action]

            if PrettyTable is not None:
                status_log.add_row(cat)
            else:
                logger.log_status("\t".join(cat))

        if PrettyTable is not None:
            logger.log_status("\n" + status_log.get_string())
        if error_log:
            logger.log_error(error_log)

        result_data = {}
        result_dataset = None
        index = array([], dtype="int32")
        if True:  # sampled_index.size > 0:
            ### ideally duplicate_rows() is all needed to add newly cloned rows
            ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data
            ##realestate_dataset.duplicate_rows(sampled_index)
            result_data.setdefault(year_built, resize(year, sampled_index.size).astype("int32"))
            ## also add 'independent_variables' to the new dataset
            for attribute in set(sample_from_dataset.get_primary_attribute_names() + independent_variables):
                if reset_attribute_value.has_key(attribute):
                    result_data[attribute] = resize(array(reset_attribute_value[attribute]), sampled_index.size)
                else:
                    result_data[attribute] = sample_from_dataset.get_attribute_by_index(attribute, sampled_index)

            if id_name and result_data and id_name not in result_data:
                result_data[id_name] = arange(sampled_index.size, dtype="int32") + 1

            storage = StorageFactory().get_storage("dict_storage")
            storage.write_table(table_name=table_name, table_data=result_data)

            result_dataset = Dataset(
                id_name=id_name, in_storage=storage, in_table_name=table_name, dataset_name=dataset_name
            )
            index = arange(result_dataset.size())

        if append_to_realestate_dataset:
            if len(result_data) > 0:
                index = realestate_dataset.add_elements(
                    result_data, require_all_attributes=False, change_ids_if_not_unique=True
                )
            result_dataset = realestate_dataset

        return (result_dataset, index)
class DevelopmentProjectTransitionModel(Model):
    """
    Creates development projects. Each development project is for a single type
    of development, e.g. 'industrial' or 'commercial'.  This model creates
    enough development projects to match the desired vacancy rates, as defined in the target_vacancies
    table.  It does not place any projects in locations; that is the job of the development project
    location choice models.  The distribution of project sizes (amount of space, value of space) is
    determined by sampling from the projects in the development_event_history table.
    """
    model_name = "Development Project Transition Model"

    def __init__(self, debuglevel=0):
        self.debug = DebugPrinter(debuglevel)

    def pre_check(self, location_set, vacancy_table, types):
        for ptype in types:
            self.check_for_space(
                location_set.get_attribute(
                    self.variable_for_total_units[ptype]))
        self.check_target_vacancy_is_not_100_percent(
            vacancy_table.get_attribute("target_total_vacancy"))

    def check_for_space(self, values):
        """Check that this array of values sums to something > 0."""
        self.do_check("x > 0", array([values.sum()]))

    def check_target_vacancy_is_not_100_percent(self, value):
        """Check that the target vacancy rate is not 100% (ratio == 1), because it doesn't make sense,
        and it also causes a divide by 0 error."""
        self.do_check("x < 1", value)

    def run(self,
            vacancy_table,
            history_table,
            year,
            location_set,
            dataset_pool=None,
            resources=None):
        self.dataset_pool = dataset_pool
        building_types = self.dataset_pool.get_dataset('building_type')
        target_vacancy_this_year = DatasetSubset(
            vacancy_table,
            index=where(vacancy_table.get_attribute("year") == year)[0])
        building_type_ids = target_vacancy_this_year.get_attribute(
            'building_type_id')
        building_type_idx = building_types.get_id_index(building_type_ids)
        self.used_building_types = DatasetSubset(building_types,
                                                 index=building_type_idx)
        project_types = self.used_building_types.get_attribute(
            'building_type_name')
        is_residential = self.used_building_types.get_attribute(
            'is_residential')
        unit_names = where(is_residential, 'residential_units',
                           'non_residential_sqft')
        specific_unit_names = where(is_residential, 'residential_units',
                                    '_sqft')
        rates = target_vacancy_this_year.get_attribute('target_total_vacancy')
        self.project_units = {}
        self.project_specific_units = {}
        target_rates = {}
        for i in range(self.used_building_types.size()):
            self.project_units[project_types[i]] = unit_names[i]
            if is_residential[i]:
                self.project_specific_units[
                    project_types[i]] = specific_unit_names[i]
            else:
                self.project_specific_units[project_types[i]] = "%s%s" % (
                    project_types[i], specific_unit_names[i])
            target_rates[building_type_ids[i]] = rates[i]

        self._compute_vacancy_and_total_units_variables(
            location_set, project_types, resources)
        self.pre_check(location_set, target_vacancy_this_year, project_types)

        projects = None
        for project_type_id, target_vacancy_rate in target_rates.iteritems():
            # determine current-year vacancy rates
            project_type = building_types.get_attribute_by_id(
                'building_type_name', project_type_id)
            vacant_units_sum = location_set.get_attribute(
                self.variable_for_vacancy[project_type]).sum()
            units_sum = float(
                location_set.get_attribute(
                    self.variable_for_total_units[project_type]).sum())
            should_develop_units = int(
                round(
                    max(0,
                        (target_vacancy_rate * units_sum - vacant_units_sum) /
                        (1 - target_vacancy_rate))))
            logger.log_status(
                project_type +
                ": vacant units: %d, should be vacant: %f, sum units: %d" %
                (vacant_units_sum, target_vacancy_rate * units_sum, units_sum))

            if not should_develop_units:
                logger.log_note((
                    "Will not build any " + project_type +
                    " units, because the current vacancy of %d units\n" +
                    "is more than the %d units desired for the vacancy rate of %f."
                ) % (vacant_units_sum, target_vacancy_rate * units_sum,
                     target_vacancy_rate))
            #create projects
            if should_develop_units > 0:
                this_project = self._create_projects(
                    should_develop_units, project_type, project_type_id,
                    history_table, location_set, units_sum, resources)
                if projects is None:
                    projects = this_project
                else:
                    projects.join_by_rows(this_project,
                                          change_ids_if_not_unique=True)
        return projects

    def _compute_vacancy_and_total_units_variables(self,
                                                   location_set,
                                                   project_types,
                                                   resources=None):
        compute_resources = Resources(resources)
        compute_resources.merge({"debug": self.debug})
        self.variable_for_vacancy = {}
        self.variable_for_total_units = {}
        for ptype in project_types:
            self.variable_for_vacancy[ptype] = compute_resources.get(
                "%s_vacant_variable" % ptype, "urbansim_zone.%s.vacant_%s" %
                (location_set.get_dataset_name(),
                 self.project_specific_units[ptype]))
            self.variable_for_total_units[ptype] = compute_resources.get(
                "%s_total_units_variable" % ptype,
                "%s.aggregate(urbansim_zone.building.total_%s)" %
                (location_set.get_dataset_name(),
                 self.project_specific_units[ptype]))
            location_set.compute_variables([
                self.variable_for_vacancy[ptype],
                self.variable_for_total_units[ptype]
            ],
                                           dataset_pool=self.dataset_pool,
                                           resources=compute_resources)

    def _create_projects(self,
                         should_develop_units,
                         project_type,
                         project_type_id,
                         history_table,
                         location_set,
                         units_sum,
                         resources=None):
        history_values = history_table.get_attribute(
            self.project_units[project_type])
        type_code_values = history_table.get_change_type_code_attribute(
            self.project_units[project_type])
        # take only non-zero history values and those that don't represent demolished buildings
        history_values_without_zeros = history_values[logical_and(
            history_values > 0,
            type_code_values != DevelopmentEventTypeOfChange.DELETE)]
        mean_size = history_values_without_zeros.mean()
        idx = array([], dtype="int32")
        # Ensure that there are some development projects to choose from.
        num_of_projects_to_select = max(
            10, round_(should_develop_units / mean_size))
        while True:
            idx = concatenate((idx,
                               randint(0, history_values_without_zeros.size,
                                       num_of_projects_to_select)))
            csum = history_values_without_zeros[idx].cumsum()
            idx1 = idx[csum <= should_develop_units]
            if idx1.size == 0:  # at least one project should be selected
                idx = array([idx[0]], dtype="int32")
            else:
                idx = idx1
            if csum[-1] >= should_develop_units:
                break
        data = {
            "residential_units": zeros((idx.size, ), dtype=int32),
            "non_residential_sqft": zeros((idx.size, ), dtype=int32),
            'building_type_id': array(idx.size * [project_type_id]),
            "project_id": arange(idx.size) + 1,
            "building_id": zeros((idx.size, ), dtype=int32)
        }
        data[self.
             project_units[project_type]] = history_values_without_zeros[idx]
        storage = StorageFactory().get_storage('dict_storage')

        development_projects_table_name = 'development_projects'
        storage.write_table(table_name=development_projects_table_name,
                            table_data=data)

        return Dataset(in_storage=storage,
                       in_table_name=development_projects_table_name,
                       id_name='project_id')