Exemple #1
0
 def run(self,
         year,
         job_set,
         control_totals,
         job_building_types,
         data_objects=None,
         resources=None):
     self._do_initialize_for_run(job_set, job_building_types, data_objects)
     subarea_ids = control_totals.get_attribute(self.subarea_id_name)
     jobs_subarea_ids = job_set.compute_one_variable_with_unknown_package(
         variable_name="%s" % (self.subarea_id_name),
         dataset_pool=self.dataset_pool)
     unique_subareas = unique(subarea_ids)
     is_year = control_totals.get_attribute("year") == year
     all_jobs_index = arange(job_set.size())
     sectors = unique(control_totals.get_attribute("sector_id")[is_year])
     self._compute_sector_variables(sectors, job_set)
     for area in unique_subareas:
         idx = where(logical_and(is_year, subarea_ids == area))[0]
         self.control_totals_for_this_year = DatasetSubset(
             control_totals, idx)
         jobs_index = where(jobs_subarea_ids == area)[0]
         jobs_for_this_area = DatasetSubset(job_set, jobs_index)
         logger.log_status("ETM for area %s (currently %s jobs)" %
                           (area, jobs_for_this_area.size()))
         last_remove_idx = self.remove_jobs.size
         self._do_run_for_this_year(jobs_for_this_area)
         add_jobs_size = self.new_jobs[
             self.location_id_name].size - self.new_jobs[
                 self.subarea_id_name].size
         remove_jobs_size = self.remove_jobs.size - last_remove_idx
         logger.log_status(
             "add %s, remove %s, total %s" %
             (add_jobs_size, remove_jobs_size,
              jobs_for_this_area.size() + add_jobs_size - remove_jobs_size))
         self.new_jobs[self.subarea_id_name] = concatenate(
             (self.new_jobs[self.subarea_id_name],
              array(add_jobs_size * [area], dtype="int32")))
         # transform indices of removing jobs into indices of the whole dataset
         self.remove_jobs[last_remove_idx:self.remove_jobs.
                          size] = all_jobs_index[jobs_index[
                              self.remove_jobs[last_remove_idx:self.
                                               remove_jobs.size]]]
     self._update_job_set(job_set)
     idx_new_jobs = arange(
         job_set.size() - self.new_jobs[self.subarea_id_name].size,
         job_set.size())
     jobs_subarea_ids = job_set.compute_one_variable_with_unknown_package(
         variable_name="%s" % (self.subarea_id_name),
         dataset_pool=self.dataset_pool)
     jobs_subarea_ids[idx_new_jobs] = self.new_jobs[self.subarea_id_name]
     job_set.delete_one_attribute(self.subarea_id_name)
     job_set.add_attribute(jobs_subarea_ids,
                           self.subarea_id_name,
                           metadata=AttributeType.PRIMARY)
     # return an index of new jobs
     return arange(
         job_set.size() - self.new_jobs[self.subarea_id_name].size,
         job_set.size())
    def run(self, year, household_set, control_totals, characteristics, resources=None):
#        self.person_set = person_set
        self._do_initialize_for_run(household_set)
        control_totals.get_attribute("total_number_of_households") # to make sure they are loaded
        self.characteristics = characteristics
        self.all_categories = self.characteristics.get_attribute("characteristic")
        self.all_categories = array(map(lambda x: x.lower(), self.all_categories))
        self.scaled_characteristic_names = get_distinct_names(self.all_categories).tolist()
        self.marginal_characteristic_names = copy(control_totals.get_id_name())
        index_year = self.marginal_characteristic_names.index("year")
        self.marginal_characteristic_names.remove("year")
        self.marginal_characteristic_names.remove(self.subarea_id_name)
        region_ids = control_totals.get_attribute(self.subarea_id_name)
        households_region_ids = household_set.compute_one_variable_with_unknown_package(variable_name="%s" % (self.subarea_id_name), dataset_pool=self.dataset_pool)

        unique_regions = unique(region_ids)
        is_year = control_totals.get_attribute("year")==year
        all_households_index = arange(household_set.size())
        for area in unique_regions:
            idx = where(logical_and(is_year, region_ids == area))[0]
            self.control_totals_for_this_year = DatasetSubset(control_totals, idx)
            households_index = where(households_region_ids == area)[0]
            if households_index.size == 0:
                continue
            households_for_this_area = DatasetSubset(household_set, households_index)
            logger.log_status("HTM for area %s (currently %s households)" % (area, households_for_this_area.size()))
            last_remove_idx = self.remove_households.size
            last_new_hhs_idx = self.mapping_existing_hhs_to_new_hhs.size
            self._do_run_for_this_year(households_for_this_area)
            add_hhs_size = self.new_households[self.location_id_name].size-self.new_households[self.subarea_id_name].size+self.mapping_existing_hhs_to_new_hhs.size-last_new_hhs_idx
            remove_hhs_size = self.remove_households.size-last_remove_idx
            logger.log_status("add %s, remove %s, total %s" % (add_hhs_size, remove_hhs_size,
                                                               households_for_this_area.size()+add_hhs_size-remove_hhs_size
                                                               ))
            self.new_households[self.subarea_id_name] = concatenate((self.new_households[self.subarea_id_name],
                                            array((self.new_households[self.location_id_name].size-self.new_households[self.subarea_id_name].size)*[area], dtype="int32")))
            # transform indices of removing households into indices of the whole dataset
            self.remove_households[last_remove_idx:self.remove_households.size] = all_households_index[households_index[self.remove_households[last_remove_idx:self.remove_households.size]]]
            # do the same for households to be duplicated
            self.mapping_existing_hhs_to_new_hhs[last_new_hhs_idx:self.mapping_existing_hhs_to_new_hhs.size] = all_households_index[households_index[self.mapping_existing_hhs_to_new_hhs[last_new_hhs_idx:self.mapping_existing_hhs_to_new_hhs.size]]]
            
        self._update_household_set(household_set)
        idx_new_households = arange(household_set.size()-self.new_households[self.subarea_id_name].size, household_set.size())
        #household_region_ids = household_set.compute_variables("urbansim_parcel.household.%s" % self.subarea_id_name)
        #household_region_ids[idx_new_households] = self.new_households[self.subarea_id_name]
        region_ids = household_set.get_attribute(self.subarea_id_name).copy()
        household_set.delete_one_attribute(self.subarea_id_name)
        household_set.add_attribute(region_ids, self.subarea_id_name, metadata=AttributeType.PRIMARY)
        # return an index of new households
        return idx_new_households
 def compute(self, dataset_pool):
     jobs = dataset_pool.get_dataset('job')
     nhb_jobs = DatasetSubset(
         jobs,
         where(jobs.get_attribute('is_home_based_job') == 0)[0])
     return self.get_dataset().sum_dataset_over_ids(
         nhb_jobs, self.job_is_in_employment_sector_group)
 def _convert_lccm_input(self, flt_directory_in, flt_directory_out):
     gc.collect()
     t1 = time()
     lc = LandCoverDataset(in_storage=StorageFactory().get_storage(
         'flt_storage', storage_location=flt_directory_in),
                           out_storage=StorageFactory().get_storage(
                               'flt_storage',
                               storage_location=flt_directory_out))
     #        lc.get_header() # added 23 june 2009 by mm
     mask = lc.get_mask()
     idx = where(mask == 0)[0]
     lcsubset = DatasetSubset(lc, idx)
     print "Converting:"
     lcsubset.write_dataset(attributes=["relative_x"],
                            out_table_name="land_covers")
     lc.delete_one_attribute("relative_x")
     lcsubset.write_dataset(attributes=["relative_y"],
                            out_table_name="land_covers")
     lc.delete_one_attribute("relative_y")
     lc.flush_dataset()
     gc.collect()
     #        lc_names = lc.get_primary_attribute_names()
     for attr in lc.get_primary_attribute_names():
         print "   ", attr
         lcsubset.write_dataset(attributes=[attr],
                                out_table_name="land_covers")
         lc.delete_one_attribute(attr)
     logger.log_status("Data conversion done. " + str(time() - t1) + " s")
Exemple #5
0
    def run(self, employment_events, jobs, current_year):
        # select events for the current year
        events_for_this_year = DatasetSubset(
            employment_events,
            index=where(
                employment_events.get_attribute('scheduled_year') ==
                current_year)[0])

        # create control totals on the fly
        control_totals = self.create_control_totals(events_for_this_year,
                                                    jobs,
                                                    year=current_year)

        # run subarea employment transition model in order to create or delete the given number of jobs
        ETM = SubareaEmploymentTransitionModel(
            subarea_id_name=self.location_dataset.get_id_name()[0],
            location_id_name=self._job_location_id_name,
            dataset_pool=self.dataset_pool)
        etm_result = ETM.run(
            current_year, jobs, control_totals,
            self.dataset_pool.get_dataset('job_building_type'))

        self.place_jobs_into_buildings(events_for_this_year, jobs, etm_result)

        return etm_result
Exemple #6
0
 def estimate_mu(self):
     iout = -1
     self.values_from_mr = {}
     for quantity in self.observed_data.get_quantity_objects():
         dataset_name = quantity.get_dataset_name()
         variable = quantity.get_variable_name()
         iout += 1
         dimension_reduced = False
         quantity_ids = quantity.get_dataset().get_id_attribute()
         for i in range(self.number_of_runs):
             ds = self._compute_variable_for_one_run(i, variable, dataset_name, self.get_calibration_year(), quantity)
             if isinstance(ds, InteractionDataset):
                 ds = ds.get_flatten_dataset()
             if i == 0: # first run
                 self.mu[iout] = zeros((self.y[iout].size, self.number_of_runs), dtype=float32)
                 ids = ds.get_id_attribute()
             else:
                 if ds.size() > ids.shape[0]:
                     ds = DatasetSubset(ds, ds.get_id_index(ids))
                     dimension_reduced = True
             scale = self.get_scales(ds, i+1, variable)
             matching_index = ds.get_id_index(quantity_ids)
             values = scale[matching_index] * ds.get_attribute(variable)[matching_index]
             self.mu[iout][:,i] = try_transformation(values, quantity.get_transformation())
             
         self.values_from_mr[variable.get_expression()] = self.mu[iout]
         if dimension_reduced:
             self.y[iout] = self.y[iout][quantity.get_dataset().get_id_index(ids)]
    def run(self,
            chunk_specification,
            dataset,
            dataset_index=None,
            result_array_type=float32,
            **kwargs):
        """ 'chunk_specification' - determines number of chunks to use when computing over
                the dataset set.
            'dataset' - an object of class Dataset that is to be chunked.
            'dataset_index' - index of individuals in dataset to be chunked.
            'result_array_type' - type of the resulting array. Can be any numerical type of numpy array.
            **kwargs - keyword arguments.
            The method chunks dataset_index in the desired number of chunks (minimum is 1) and for each chunk it calls the method
            'run_chunk'. The order of the individuals entering the chunking is determined by the method 'get_agents_order'.
        """
        if dataset_index == None:
            dataset_index = arange(dataset.size())
        if not isinstance(dataset_index, ndarray):
            dataset_index = array(dataset_index)
        logger.log_status("Total number of individuals: %s" %
                          dataset_index.size)
        result_array = zeros(dataset_index.size, dtype=result_array_type)

        if dataset_index.size <= 0:
            logger.log_status("Nothing to be done.")
            return result_array

        all_indexed_individuals = DatasetSubset(dataset, dataset_index)
        ordered_agent_indices = self.get_agents_order(
            all_indexed_individuals)  # set order of individuals in chunks

        # TODO: Remove next six lines after we inherit chunk specification as a text string.
        if (chunk_specification is None):
            chunk_specification = {'nchunks': 1}
        chunker = ChunkSpecification(chunk_specification)
        self.number_of_chunks = chunker.nchunks(dataset_index)
        chunksize = int(
            ceil(all_indexed_individuals.size() /
                 float(self.number_of_chunks)))
        for ichunk in range(self.number_of_chunks):
            logger.start_block("%s chunk %d out of %d." %
                               (self.model_short_name,
                                (ichunk + 1), self.number_of_chunks))
            self.index_of_current_chunk = ichunk
            try:
                chunk_agent_indices = ordered_agent_indices[arange(
                    (ichunk * chunksize),
                    min((ichunk + 1) * chunksize,
                        all_indexed_individuals.size()))]
                logger.log_status("Number of agents in this chunk: %s" %
                                  chunk_agent_indices.size)
                result_array[chunk_agent_indices] = self.run_chunk(
                    dataset_index[chunk_agent_indices], dataset,
                    **kwargs).astype(result_array_type)
            finally:
                logger.end_block()

        return result_array
Exemple #8
0
 def run(self, location_set, development_event_set, *args, **kwargs):
     changed_indices, processed_development_event_indices = \
                     EventsCoordinator.run(self, location_set,
                                            development_event_set, *args, **kwargs)
     if development_event_set is not None:
         subset = DatasetSubset(development_event_set,
                                processed_development_event_indices)
         subset.write_dataset(out_storage=AttributeCache())
     return (changed_indices, processed_development_event_indices)
    def run(self, current_year_emme2_dir, current_year, dataset_pool, config=None):
        """Writes to the an emme2 input file in the [current_year_emme2_dir]/tripgen/inputtg/tazdata.ma2.
        """
        
        missing_dataset = ''
        try:
            missing_dataset = 'constant_taz_column'
            taz_col_set = dataset_pool.get_dataset("constant_taz_column")
            taz_col_set.load_dataset()
            missing_dataset = 'zone'
            zone_set = dataset_pool.get_dataset("zone")
            zone_set.load_dataset()
            missing_dataset = 'household'
            household_set = dataset_pool.get_dataset("household")
        except:
            raise Exception("Dataset %s is missing from dataset_pool" % missing_dataset)
        
        """specify travel input file name: [current_year_emme2_dir]/tripgen/inputtg/tazdata.ma2 """
        full_path = os.path.join(current_year_emme2_dir, 'tripgen', 'inputtg')
        if not os.path.exists(full_path):
            os.makedirs(full_path)
        tm_input_file = os.path.join(full_path, 'tazdata.ma2')
        
        tm_year = self._decade_floor(current_year)
        
        logger.log_status("calculating entries for emme2 input file")
        taz_col_set.compute_variables("zone_id=constant_taz_column.taz")
        current_taz_col = DatasetSubset(taz_col_set, index=where(taz_col_set.get_attribute("year")==tm_year)[0])
        
        current_taz_col._id_names = ['taz']
        current_taz_col._create_id_mapping()
        zone_set.join(current_taz_col, "pctmf", join_attribute='zone_id')
        zone_set.join(current_taz_col, "gqi", join_attribute='zone_id')
        zone_set.join(current_taz_col, "gqn", join_attribute='zone_id')
        zone_set.join(current_taz_col, "fteuniv", join_attribute='zone_id')
        zone_set.join(current_taz_col, "den", new_name='density', join_attribute='zone_id')

        value_122 = zeros(zone_set.size())
        index_122 = zone_set.try_get_id_index(array([58,59,60,71,72,73,84,85,86,150,251,266,489,578,687,688,797,868]))
        value_122[index_122[index_122 != -1]] = 1
        zone_set.add_attribute(data=value_122, name="v122")
        
        value_123 = zeros(zone_set.size())
        index_123 = zone_set.try_get_id_index(array([531,646,847,850,888,894,899,910]))
        value_123[index_123[index_123 != -1]] = 1
        zone_set.add_attribute(data=value_123, name="v123")
        
        value_124 = logical_not(value_122 + value_123)
        zone_set.add_attribute(data=value_124, name="v124")
                
        """specify which variables are passing from urbansim to travel model; the order matters"""
        variables_list = self.get_variables_list(dataset_pool)
        
        zone_set.compute_variables(variables_list, dataset_pool=dataset_pool )

        return self._write_to_file(zone_set, variables_list, tm_input_file)
Exemple #10
0
 def get_active_agent_set(self, submodel=None):
     """Return agent set that make choices in the model.
     Works only for the ChoiceModel class.
     """
     agents = self.get_agent_set()
     if submodel is None:
         index = self.get_agent_set_index()
     else:
         index = self.get_agent_set_index_for_submodel(submodel)
     return DatasetSubset(agents, index)
Exemple #11
0
 def get_active_choice_set(self, submodel=None):
     """Return choice set as seen by agents in the model.
     Works only for the ChoiceModel class.
     """
     if submodel is None:
         choices = self.get_choice_set_index()
     else:
         choices = self.get_choice_set_index_for_submodel(submodel)
     choices = unique(choices.flatten())
     ds = self.get_choice_set()
     return DatasetSubset(ds, choices)
 def run(self,
         year,
         job_set,
         control_totals,
         job_building_types,
         data_objects=None,
         resources=None):
     self._do_initialize_for_run(job_set, job_building_types, data_objects)
     idx = where(control_totals.get_attribute("year") == year)[0]
     self.control_totals_for_this_year = DatasetSubset(control_totals, idx)
     self._do_run_for_this_year(job_set)
     return self._update_job_set(job_set)
    def run(self, agent_set, **kwargs):

        large_areas = agent_set.get_attribute(self.large_area_id_name)
        valid_large_area = where(large_areas > 0)[0]
        if valid_large_area.size > 0:
            unique_large_areas = unique(large_areas[valid_large_area])
            cond_array = zeros(agent_set.size(), dtype="bool8")
            cond_array[valid_large_area] = True
            result = array([], dtype="int32")
            for area in unique_large_areas:
                new_index = where(logical_and(cond_array,
                                              large_areas == area))[0]
                agent_subset = DatasetSubset(agent_set, new_index)
                logger.log_status("ARM for area %s (%s agents)" %
                                  (area, agent_subset.size()))
                this_result = AgentRelocationModel.run(self, agent_subset,
                                                       **kwargs)
                result = concatenate((result, new_index[this_result]))
        no_large_area = where(large_areas <= 0)[0]
        result = concatenate((result, no_large_area))
        return result
Exemple #14
0
 def choose_agents_to_move_from_overfilled_locations(
         self, capacity, agent_set, agents_index, agents_locations):
     """Agents with the smallest number of units should move again.
     """
     if capacity is None:
         return array([], dtype='int32')
     index_valid_agents_locations = where(agents_locations > 0)[0]
     valid_agents_locations = agents_locations[
         index_valid_agents_locations].astype("int32")
     unique_locations = unique(valid_agents_locations).astype("int32")
     index_consider_capacity = self.choice_set.get_id_index(
         unique_locations)
     capacity_of_affected_locations = capacity[index_consider_capacity]
     overfilled = where(capacity_of_affected_locations < 0)[0]
     movers = array([], dtype='int32')
     indexed_individuals = DatasetSubset(
         agent_set, agents_index[index_valid_agents_locations])
     ordered_agent_indices = self.get_agents_order(indexed_individuals)
     sizes = indexed_individuals.get_attribute(
         self.units_full_name)[ordered_agent_indices]
     choice_ids = self.choice_set.get_id_attribute()
     for loc in overfilled:
         agents_to_move = where(valid_agents_locations == choice_ids[
             index_consider_capacity[loc]])[0]
         if agents_to_move.size > 0:
             n = int(-1 * capacity_of_affected_locations[loc])
             this_sizes = sizes[agents_to_move]
             csum = this_sizes[arange(this_sizes.size - 1, -1,
                                      -1)].cumsum()  # ordered increasingly
             csum = csum[arange(csum.size - 1, -1,
                                -1)]  # ordered back decreasingly
             w = where(csum < n)[0]
             if w.size < agents_to_move.size:  #add one more agent in order the cumsum be larger than n
                 w = concatenate(
                     (array([agents_to_move.size - w.size - 1]), w))
             idx = ordered_agent_indices[agents_to_move[w]]
             movers = concatenate((movers, idx))
     return movers
 def run(self,
         year,
         household_set,
         control_totals,
         characteristics,
         resources=None):
     self._do_initialize_for_run(household_set)
     control_totals.get_attribute(
         "total_number_of_households")  # to make sure they are loaded
     self.characteristics = characteristics
     self.all_categories = self.characteristics.get_attribute(
         "characteristic")
     self.all_categories = array(
         map(lambda x: x.lower(), self.all_categories))
     self.scaled_characteristic_names = get_distinct_names(
         self.all_categories).tolist()
     self.marginal_characteristic_names = copy(control_totals.get_id_name())
     index_year = self.marginal_characteristic_names.index("year")
     self.marginal_characteristic_names.remove("year")
     idx = where(control_totals.get_attribute("year") == year)[0]
     self.control_totals_for_this_year = DatasetSubset(control_totals, idx)
     self._do_run_for_this_year(household_set)
     return self._update_household_set(household_set)
    def run(self, realestate_dataset,
            year=None, 
            occupied_spaces_variable="occupied_units",
            total_spaces_variable="total_units",
            target_attribute_name='target_vacancy_rate',
            sample_from_dataset = None,
            sample_filter="",
            reset_attribute_value={}, 
            year_built = 'year_built',
            dataset_pool=None,
            append_to_realestate_dataset = False,
            table_name = "development_projects",
            dataset_name = "development_project",
            id_name = [],
            **kwargs):
        """         
        sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning
        append_to_realestate_dataset - whether to append the new dataset to realestate_dataset
        """
        
        if self.target_vancy_dataset is None:
            raise RuntimeError, "target_vacancy_rate dataset is unspecified."
        
        if not sample_from_dataset:
            sample_from_dataset = realestate_dataset
            
        #if dataset_pool is None:
        #    dataset_pool = SessionConfiguration().get_dataset_pool()
        if year is None:
            year = SimulationState().get_current_time()
        this_year_index = where(self.target_vancy_dataset.get_attribute('year')==year)[0]
        target_vacancy_for_this_year = DatasetSubset(self.target_vancy_dataset, this_year_index)
        
        column_names = list(set( self.target_vancy_dataset.get_known_attribute_names() ) - set( [ target_attribute_name, occupied_spaces_variable, total_spaces_variable, 'year', '_hidden_id_'] ))
        column_names.sort(reverse=True)
        column_values = dict([ (name, target_vacancy_for_this_year.get_attribute(name)) for name in column_names + [target_attribute_name]])
        
        independent_variables = list(set([re.sub('_max$', '', re.sub('_min$', '', col)) for col in column_names]))
        dataset_known_attributes = realestate_dataset.get_known_attribute_names()
        for variable in independent_variables:
            if variable not in dataset_known_attributes:
                realestate_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)
                sample_from_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)
                
        dataset_known_attributes = realestate_dataset.get_known_attribute_names() #update after compute
        if sample_filter:
            short_name = VariableName(sample_filter).get_alias()
            if short_name not in dataset_known_attributes:
                filter_indicator = sample_from_dataset.compute_variables(sample_filter, dataset_pool=dataset_pool)
            else:
                filter_indicator = sample_from_dataset.get_attribute(short_name)
        else:
            filter_indicator = 1
                
        sampled_index = array([], dtype=int32)

        #log header
        if PrettyTable is not None:
            status_log = PrettyTable()
            status_log.set_field_names(column_names + ["actual", "target", "difference", "action"])
        else:
            logger.log_status("\t".join(column_names + ["actual", "target", "difference", "action"]))
        error_log = ''
        for index in range(target_vacancy_for_this_year.size()):
            this_sampled_index = array([], dtype=int32)
            indicator = ones( realestate_dataset.size(), dtype='bool' )
            sample_indicator = ones( sample_from_dataset.size(), dtype='bool' )
            criterion = {}   # for logging
            for attribute in independent_variables:
                if attribute in dataset_known_attributes:
                    dataset_attribute = realestate_dataset.get_attribute(attribute)
                    sample_attribute = sample_from_dataset.get_attribute(attribute)
                else:
                    raise ValueError, "attribute %s used in target vacancy dataset can not be found in dataset %s" % (attribute, realestate_dataset.get_dataset_name())
                
                if attribute + '_min' in column_names:
                    amin = target_vacancy_for_this_year.get_attribute(attribute+'_min')[index] 
                    criterion.update({attribute + '_min':amin})
                    if amin != -1:
                        indicator *= dataset_attribute >= amin
                        sample_indicator *= sample_attribute >= amin
                if attribute + '_max' in column_names: 
                    amax = target_vacancy_for_this_year.get_attribute(attribute+'_max')[index]
                    criterion.update({attribute + '_max':amax}) 
                    if amax != -1:
                        indicator *= dataset_attribute <= amax
                        sample_indicator *= sample_attribute <= amax
                if attribute in column_names: 
                    aval = column_values[attribute][index] 
                    criterion.update({attribute:aval}) 
                    if aval == -1:
                        continue
                    elif aval == -2:  ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column
                        indicator *= logical_not(ismember(dataset_attribute, column_values[attribute]))
                        sample_indicator *= logical_not(ismember(sample_attribute, column_values[attribute]))
                    else:
                        indicator *= dataset_attribute == aval
                        sample_indicator *= sample_attribute == aval
                        
            this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable
            ## total/occupied_spaces_variable can be specified either as a universal name for all realestate 
            ## or in targe_vacancy_rate dataset for each vacancy category
            if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_occupied_spaces_variable = target_vacancy_for_this_year.get_attribute(occupied_spaces_variable)[index]

            if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_total_spaces_variable = target_vacancy_for_this_year.get_attribute(total_spaces_variable)[index]
            
            logger.be_quiet() #temporarily disable logging
            realestate_dataset.compute_one_variable_with_unknown_package(this_occupied_spaces_variable, dataset_pool=dataset_pool)
            realestate_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool)
            sample_from_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool)
            logger.talk()
            
            actual_num = (indicator * realestate_dataset.get_attribute(this_total_spaces_variable)).sum()
            target_num = int(round( (indicator * realestate_dataset.get_attribute(this_occupied_spaces_variable)).sum() /\
                                    (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]) 
                            ))
            diff = target_num - actual_num
            if diff > 0:
                total_spaces_in_sample_dataset = sample_from_dataset.get_attribute(this_total_spaces_variable)
                legit_index = where(logical_and(sample_indicator, filter_indicator) * total_spaces_in_sample_dataset > 0)[0]
                if legit_index.size > 0:
                    mean_size = total_spaces_in_sample_dataset[legit_index].mean()
                    num_of_projects_to_sample = int( diff / mean_size )
                    while total_spaces_in_sample_dataset[this_sampled_index].sum() < diff:
                        lucky_index = sample_replace(legit_index, num_of_projects_to_sample)
                        this_sampled_index = concatenate((this_sampled_index, lucky_index))
                    this_sampled_index = this_sampled_index[0:(1+searchsorted(cumsum(total_spaces_in_sample_dataset[this_sampled_index]), diff))]
                    sampled_index = concatenate((sampled_index, this_sampled_index))
                else:
                    error_log += "There is nothing to sample from %s and no new development will happen for " % sample_from_dataset.get_dataset_name() + \
                              ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n'
            #if diff < 0: #TODO demolition; not yet supported
            
            ##log status
            action = "0"
            if this_sampled_index.size > 0:
                action_num = total_spaces_in_sample_dataset[this_sampled_index].sum()
                if diff > 0: action = "+" + str(action_num)
                if diff < 0: action = "-" + str(action_num)
            cat = [ str(criterion[col]) for col in column_names]
            cat += [str(actual_num), str(target_num), str(diff), action]
            
            if PrettyTable is not None:
                status_log.add_row(cat)
            else:                
                logger.log_status("\t".join(cat))
            
        if PrettyTable is not None:
            logger.log_status("\n" + status_log.get_string())
        if error_log:
            logger.log_error(error_log)
            
        result_data = {}
        result_dataset = None
        index = array([], dtype='int32')
        if sampled_index.size > 0:
            ### ideally duplicate_rows() is all needed to add newly cloned rows
            ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data
            ##realestate_dataset.duplicate_rows(sampled_index)
            result_data.setdefault(year_built, resize(year, sampled_index.size).astype('int32'))
            for attribute in sample_from_dataset.get_primary_attribute_names():
                if reset_attribute_value.has_key(attribute):
                    result_data[attribute] = resize(array(reset_attribute_value[attribute]), sampled_index.size)
                else:
                    result_data[attribute] = sample_from_dataset.get_attribute_by_index(attribute, sampled_index)
        
            storage = StorageFactory().get_storage('dict_storage')
            storage.write_table(table_name=table_name, table_data=result_data)
    
            result_dataset = Dataset(id_name = id_name,
                                      in_storage = storage,
                                      in_table_name = table_name,
                                      dataset_name = dataset_name
                                      )
            index = arange(result_dataset.size())
            
        if append_to_realestate_dataset:
            if len(result_data) > 0:
                index = realestate_dataset.add_elements(result_data, require_all_attributes=False,
                                                        change_ids_if_not_unique=True)                
            result_dataset = realestate_dataset
        
        return (result_dataset, index)
Exemple #17
0
        #        shutil.rmtree(flt_directory_out)
        #        os.mkdir(flt_directory_out)

        logger.log_status("Convert input data from ", str(input_year))

    lc = LandCoverDataset(in_storage=StorageFactory().get_storage(
        'flt_storage', storage_location=flt_directory_in),
                          out_storage=StorageFactory().get_storage(
                              'flt_storage',
                              storage_location=flt_directory_out))

    lc.get_header()  # added 23 june 2009 by mm
    mask = lc.get_mask()
    idx = where(mask == 0)[0]
    lcsubset = DatasetSubset(lc, idx)
    print "Converting:"
    lcsubset.write_dataset(attributes=["relative_x"],
                           out_table_name="land_covers")
    #lcsubset.write_dataset(attributes=["relative_x"], out_table_name="land_covers",
    #                            valuetypes=valuetypes)
    lc.delete_one_attribute("relative_x")
    lcsubset.write_dataset(attributes=["relative_y"],
                           out_table_name="land_covers")
    #lcsubset.write_dataset(attributes=["relative_y"], out_table_name="land_covers",
    #                            valuetypes=valuetypes)
    lc.delete_one_attribute("relative_y")
    #    srcdir = os.path.join(flt_directory_out, "land_covers", "computed")
    #    shutil.move(os.path.join(srcdir,"relative_x.li4"), os.path.join(flt_directory_out, "land_covers"))
    #    shutil.move(os.path.join(srcdir,"relative_y.li4"), os.path.join(flt_directory_out, "land_covers"))
    #    shutil.rmtree(srcdir)
    def run(self, **kwargs):
        """Runs the parent model for each subarea separately.
        """
        buildings = self.dataset_pool.get_dataset("building")
        buildings.compute_variables([
            "occupied_units_for_jobs = urbansim_parcel.building.number_of_non_home_based_jobs",
            "units_for_jobs = urbansim_parcel.building.total_non_home_based_job_space",
            "occupied_residential_units = urbansim_parcel.building.number_of_households",
            "urbansim_parcel.building.existing_units",
        ],
                                    dataset_pool=self.dataset_pool)
        buildings.compute_one_variable_with_unknown_package(
            variable_name="%s" % (self.subarea_id_name),
            dataset_pool=self.dataset_pool)
        # keep copy of the weights
        original_weight = self.weight.copy()
        self.all_demolished_buildings = array([], dtype='int32')

        regions = self.proposal_set.compute_one_variable_with_unknown_package(
            variable_name="%s" % (self.subarea_id_name),
            dataset_pool=self.dataset_pool)

        unique_regions = unique(regions)
        original_status = self.proposal_set.get_attribute("status_id").copy()
        bldgs_regions = buildings.get_attribute(self.subarea_id_name)
        for area_index in range(unique_regions.size):
            self.area_id = unique_regions[area_index]
            status = self.proposal_set.get_attribute("status_id")
            where_area = regions == self.area_id
            idx_area_in_proposal = where(where_area)[0]
            if idx_area_in_proposal.size <= 0:
                logger.log_status("No proposals for area %s" % self.area_id)
                continue
            bldgs_area_idx = where(bldgs_regions == self.area_id)[0]
            bldgs_subset = DatasetSubset(buildings, index=bldgs_area_idx)
            self.dataset_pool.replace_dataset('building', bldgs_subset)
            idx_out_area_not_active_not_refused = where(
                logical_and(
                    logical_and(status != self.proposal_set.id_active,
                                status != self.proposal_set.id_refused),
                    logical_not(where_area)))[0]
            status[idx_area_in_proposal] = original_status[
                idx_area_in_proposal]
            status[
                idx_out_area_not_active_not_refused] = self.proposal_set.id_not_available
            self.proposal_set.modify_attribute(name="status_id", data=status)
            self.weight[:] = original_weight[:]

            logger.log_status("\nDPSM for area %s" % self.area_id)
            dummy, demolished_bldgs = DevelopmentProjectProposalSamplingModel.run(
                self, **kwargs)

            self.all_demolished_buildings = concatenate(
                (self.all_demolished_buildings, demolished_bldgs))
            status = self.proposal_set.get_attribute("status_id")
            where_not_active = where(
                status[idx_area_in_proposal] != self.proposal_set.id_active)[0]
            status[idx_area_in_proposal[
                where_not_active]] = self.proposal_set.id_refused
            self.proposal_set.modify_attribute(name="status_id", data=status)

        # set all proposals that were not set to 'active' to their original status
        idx = where(status != self.proposal_set.id_active)[0]
        self.proposal_set.set_values_of_one_attribute("status_id",
                                                      original_status[idx],
                                                      idx)
        self.dataset_pool.replace_dataset('building', buildings)
        return (self.proposal_set, self.all_demolished_buildings)
Exemple #19
0
    def test_agents_placed_in_appropriate_types(self):
        """Create 1000 unplaced industrial jobs and 1 commercial job. Allocate 50 commercial
        gridcells with enough space for 10 commercial jobs per gridcell. After running the
        EmploymentLocationChoiceModel, the 1 commercial job should be placed,
        but the 100 industrial jobs should remain unplaced
        """
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='job_building_types',
                            table_data={
                                'id': array([2, 1]),
                                'name': array(['commercial', 'industrial'])
                            })
        job_building_types = JobBuildingTypeDataset(
            in_storage=storage, in_table_name='job_building_types')

        storage.write_table(table_name='jobs',
                            table_data={
                                'job_id': arange(1001) + 1,
                                'grid_id': array([0] * 1001),
                                'building_type': array([1] * 1000 + [2])
                            })
        jobs = JobDataset(in_storage=storage, in_table_name='jobs')

        storage.write_table(table_name='gridcells',
                            table_data={
                                'grid_id': arange(50) + 1,
                                'commercial_sqft': array([1000] * 50),
                                'commercial_sqft_per_job': array([100] * 50)
                            })
        gridcells = GridcellDataset(in_storage=storage,
                                    in_table_name='gridcells')

        coefficients = Coefficients(names=("dummy", ), values=(0.1, ))
        specification = EquationSpecification(
            variables=("gridcell.commercial_sqft", ), coefficients=("dummy", ))

        compute_resources = Resources({
            "job": jobs,
            "job_building_type": job_building_types
        })
        agents_index = where(jobs.get_attribute("grid_id") == 0)
        unplace_jobs = DatasetSubset(jobs, agents_index)
        agents_index = where(
            unplace_jobs.get_attribute("building_type") == 2)[0]
        gridcells.compute_variables(
            ["urbansim.gridcell.number_of_commercial_jobs"],
            resources=compute_resources)
        commercial_jobs = gridcells.get_attribute("number_of_commercial_jobs")

        gridcells.compute_variables(
            ["urbansim.gridcell.number_of_industrial_jobs"],
            resources=compute_resources)
        industrial_jobs = gridcells.get_attribute("number_of_industrial_jobs")
        model_group = ModelGroup(job_building_types, "name")
        elcm = EmploymentLocationChoiceModel(
            ModelGroupMember(model_group, "commercial"),
            location_set=gridcells,
            agents_grouping_attribute="job.building_type",
            choices="opus_core.random_choices_from_index",
            sample_size_locations=30)
        elcm.run(specification,
                 coefficients,
                 agent_set=jobs,
                 agents_index=agents_index,
                 debuglevel=1)

        gridcells.compute_variables(
            ["urbansim.gridcell.number_of_commercial_jobs"],
            resources=compute_resources)
        commercial_jobs = gridcells.get_attribute("number_of_commercial_jobs")

        gridcells.compute_variables(
            ["urbansim.gridcell.number_of_industrial_jobs"],
            resources=compute_resources)
        industrial_jobs = gridcells.get_attribute("number_of_industrial_jobs")

        self.assertEqual(
            commercial_jobs.sum() == 1, True,
            "Error, there should only be a total of 1 commercial job")
        self.assertEqual(
            industrial_jobs.sum() == 0, True,
            "Error, there should be no industrial jobs because there's no space for them"
        )
Exemple #20
0
    def run(self, n=500, run_config=None, current_year=None, debuglevel=0):
        """
        n - sample n proposals at a time, evaluate them one by one
        """
        self.demolished_buildings = array(
            [], dtype='int32')  #id of buildings to be demolished
        if current_year is None:
            current_year = SimulationState().get_current_time()
        if not self.positive_proposals:
            logger.log_status(
                "Proposal Set size <= 0, no proposals to consider, skipping DPPSM."
            )
            return (self.proposal_set, self.demolished_buildings)
        self.proposal_component_set.compute_variables([
            'urbansim_parcel.development_project_proposal_component.units_proposed',
            'urbansim_parcel.development_project_proposal_component.is_residential'
        ],
                                                      dataset_pool=self.
                                                      dataset_pool)
        self.proposal_set.compute_variables(
            [
                'urbansim_parcel.development_project_proposal.number_of_components',
                'zone_id=development_project_proposal.disaggregate(parcel.zone_id)',
                #'occurence_frequency = development_project_proposal.disaggregate(development_template.sample_size)'
            ],
            dataset_pool=self.dataset_pool)
        buildings = self.dataset_pool.get_dataset("building")
        buildings.compute_variables(
            [
                "occupied_units_for_jobs = urbansim_parcel.building.number_of_non_home_based_jobs",
                "units_for_jobs = urbansim_parcel.building.total_non_home_based_job_space",
                "occupied_residential_units = urbansim_parcel.building.number_of_households",
                #                                "urbansim_parcel.building.existing_units",
                "urbansim_parcel.building.is_residential"
            ],
            dataset_pool=self.dataset_pool)

        ## define unit_name by whether a building is residential or not (with is_residential attribute)
        ## if it is non-residential (0), count units by number of job spaces (units_for_jobs)
        ## if it is residential (1), count units by residenital units
        self.unit_name = array(["units_for_jobs", "residential_units"])

        target_vacancy = self.dataset_pool.get_dataset('target_vacancy')
        target_vacancy.compute_variables([
            'is_residential = target_vacancy.disaggregate(building_type.is_residential)'
        ],
                                         dataset_pool=self.dataset_pool)
        # This try-except block checks to see if the object has a subarea_id_name,
        # if it does, it calculates the vacancy rates by subarea_id_name
        try:
            # Check for subarea_id_name in target_vacancies dataset
            # if it is present, vacancy rates are specified by subarea_id_name
            # if it is not, vacancy rates are specified region wide
            target_vacancy.load_dataset()
            if self.subarea_id_name in target_vacancy.get_attribute_names():
                current_target_vacancy_this_year = DatasetSubset(
                    target_vacancy,
                    index=where(
                        target_vacancy.get_attribute("year") ==
                        current_year)[0])
                current_target_vacancy = DatasetSubset(
                    current_target_vacancy_this_year,
                    index=where(
                        current_target_vacancy_this_year.get_attribute(
                            self.subarea_id_name) == self.area_id)[0])
            else:
                current_target_vacancy = DatasetSubset(
                    target_vacancy,
                    index=where(
                        target_vacancy.get_attribute("year") ==
                        current_year)[0])
        except AttributeError:
            # vacancy rates are specified region wide:
            current_target_vacancy = DatasetSubset(
                target_vacancy,
                index=where(
                    target_vacancy.get_attribute("year") == current_year)[0])

        if current_target_vacancy.size() == 0:
            raise IOError, 'No target vacancy defined for year %s.' % current_year

        self.existing_units = {}  #total existing units by land_use type
        self.occupied_units = {}  #total occupied units by land_use type
        self.proposed_units = {}  #total proposed units by land_use type
        self.demolished_units = {
        }  #total (to be) demolished units by land_use type

        components_building_type_ids = self.proposal_component_set.get_attribute(
            "building_type_id").astype("int32")
        proposal_ids = self.proposal_set.get_id_attribute()
        proposal_ids_in_component_set = self.proposal_component_set.get_attribute(
            "proposal_id")
        all_units_proposed = self.proposal_component_set.get_attribute(
            "units_proposed")
        number_of_components_in_proposals = self.proposal_set.get_attribute(
            "number_of_components")

        self.accepting_proposals = zeros(
            current_target_vacancy.get_attribute("building_type_id").max() + 1,
            dtype='bool8'
        )  #whether accepting new proposals, for each building type
        self.accepted_proposals = []  # index of accepted proposals

        self.target_vacancies = {}
        tv_building_types = current_target_vacancy.get_attribute(
            "building_type_id")
        tv_rate = current_target_vacancy.get_attribute("target_vacancy_rate")
        for itype in range(tv_building_types.size):
            self.target_vacancies[tv_building_types[itype]] = tv_rate[itype]

        self.check_vacancy_rates(
            current_target_vacancy
        )  #initialize self.accepting_proposal based on current vacancy rate

        sqft_per_job = self.dataset_pool.get_dataset("building_sqft_per_job")
        zones_of_proposals = self.proposal_set.get_attribute("zone_id")
        self.building_sqft_per_job_table = sqft_per_job.get_building_sqft_as_table(
            zones_of_proposals.max(), tv_building_types.max())
        # consider only those proposals that have all components of accepted type and sum of proposed units > 0
        is_accepted_type = self.accepting_proposals[
            components_building_type_ids]
        sum_is_accepted_type_over_proposals = array(
            ndimage.sum(is_accepted_type,
                        labels=proposal_ids_in_component_set,
                        index=proposal_ids))
        sum_of_units_proposed = array(
            ndimage.sum(all_units_proposed,
                        labels=proposal_ids_in_component_set,
                        index=proposal_ids))
        is_proposal_eligible = logical_and(
            sum_is_accepted_type_over_proposals ==
            number_of_components_in_proposals, sum_of_units_proposed > 0)

        is_proposal_eligible = logical_and(
            is_proposal_eligible,
            self.proposal_set.get_attribute("start_year") == current_year)
        ## handle planned proposals: all proposals with status_id == is_planned
        ## and start_year == current_year are accepted
        planned_proposal_indexes = where(
            logical_and(
                self.proposal_set.get_attribute(
                    "status_id") == self.proposal_set.id_planned,
                self.proposal_set.get_attribute("start_year") ==
                current_year))[0]

        self.consider_proposals(planned_proposal_indexes, force_accepting=True)
        # consider proposals (in this order: planned, proposed, tentative)
        for status in [
                self.proposal_set.id_proposed, self.proposal_set.id_tentative
        ]:
            idx = where(
                logical_and(
                    self.proposal_set.get_attribute("status_id") == status,
                    is_proposal_eligible))[0]
            if idx.size <= 0:
                continue
            logger.log_status(
                "Sampling from %s eligible proposals with status %s." %
                (idx.size, status))
            while (True in self.accepting_proposals):
                if self.weight[idx].sum() == 0.0:
                    logger.log_warning(
                        "Running out of proposals; there aren't any proposals with non-zero weight"
                    )
                    break

                idx = idx[self.weight[idx] > 0]
                n = minimum(idx.size, n)
                sampled_proposal_indexes = probsample_noreplace(
                    proposal_ids[idx],
                    n,
                    prob_array=(self.weight[idx] /
                                float(self.weight[idx].sum())),
                    exclude_index=None,
                    return_index=True)
                self.consider_proposals(
                    arange(self.proposal_set.size())[
                        idx[sampled_proposal_indexes]])
                self.weight[idx[sampled_proposal_indexes]] = 0

        # set status of accepted proposals to 'active'
        self.proposal_set.modify_attribute(name="status_id",
                                           data=self.proposal_set.id_active,
                                           index=array(self.accepted_proposals,
                                                       dtype='int32'))
        building_types = self.dataset_pool.get_dataset("building_type")
        logger.log_status("Status of %s development proposals set to active." %
                          len(self.accepted_proposals))
        logger.log_status(
            "Target/existing vacancy rates (reached using eligible proposals) by building type:"
        )
        for type_id in self.existing_units.keys():
            units_stock = self._get_units_stock(type_id)
            vr = self._get_vacancy_rates(type_id)
            ## units = residential_units if building_type is residential
            ## units = number of job spaces if building_type is non-residential
            logger.log_status(
                              """%(type_id)s[%(type_name)s]: %(vr)s = ((existing_units:%(existing_units)s + 
                              units_proposed:%(units_proposed)s - units_to_be_demolished:%(units_demolished)s) 
                              - units_occupied:%(units_occupied)s) / units_stock:%(units_stock)s""" %  \
                                          { 'type_id': type_id,
                                            'type_name': building_types.get_attribute_by_id("building_type_name", type_id),
                                            'vr':  vr,
                                            'existing_units': int(self.existing_units[type_id]),
                                            'units_occupied': int(self.occupied_units[type_id]),
                                            'units_proposed': int(self.proposed_units[type_id]),
                                            'units_demolished': int(self.demolished_units[type_id]),
                                            'units_stock': int(units_stock)
                                          }
                            )
        # Code added by Jesse Ayers, MAG, 7/20/2009
        # Get the active projects:
        stat_id = self.proposal_set.get_attribute('status_id')
        actv = where(stat_id == 1)[0]
        # Where there are active projects, compute the total_land_area_taken
        # and store it on the development_project_proposals dataset
        # so it can be used by the building_construction_model for the proper
        # computation of units_proposed for those projects with velocity curves
        if actv.size > 0:
            total_land_area_taken_computed = self.proposal_set.get_attribute(
                'urbansim_parcel.development_project_proposal.land_area_taken')
            self.proposal_set.modify_attribute(
                'total_land_area_taken', total_land_area_taken_computed[actv],
                actv)

        return (self.proposal_set, self.demolished_buildings)
    def run(self,
            n=500,
            realestate_dataset_name='building',
            current_year=None,
            occupied_spaces_variable="occupied_spaces",
            total_spaces_variable="total_spaces",
            run_config=None,
            debuglevel=0):
        """
        run method of the Development Project Proposal Sampling Model
        
        **Parameters**
        
            **n** : int, sample size for each iteration
                   
                   sample n proposals at a time, which are then evaluated one by one until the 
                   target vacancies are satisfied or proposals are running out
                   
            **realestate_dataset_name** : string, name of real estate dataset
            
            **current_year**: int, simulation year. If None, get value from SimulationState
            
            **occupied_spaces_variable** : string, variable name for calculating how much spaces are currently occupied
                                        
                                          It can either be a variable for real_estate dataset that returns 
                                          the amount spaces being occupied or a target_vacancy attribute 
                                          that contains the name of real_estate variables.   
            
            **total_spaces_variable** : string, variable name for calculating total existing spaces
            
        **Returns**
        
            **proposal_set** : indices to proposal_set that are accepted 
            
            **demolished_buildings** : buildings to be demolished for re-development
        """

        self.accepted_proposals = []
        self.demolished_buildings = []  #id of buildings to be demolished
        if self.proposal_set.n <= 0:
            logger.log_status(
                "The size of proposal_set is 0; no proposals to consider, skipping DPPSM."
            )
            return (self.proposal_set, self.demolished_buildings)

        target_vacancy = self.dataset_pool.get_dataset('target_vacancy')

        if current_year is None:
            year = SimulationState().get_current_time()
        else:
            year = current_year
        this_year_index = where(target_vacancy['year'] == year)[0]
        target_vacancy_for_this_year = DatasetSubset(target_vacancy,
                                                     this_year_index)
        if target_vacancy_for_this_year.size() == 0:
            raise IOError, 'No target vacancy defined for year %s.' % year

        ## current_target_vacancy.target_attribute_name = 'target_vacancy_rate'
        ## each column provides a category for which a target vacancy is specified
        self.column_names = list(set( target_vacancy.get_known_attribute_names() ) - \
                            set( [ target_vacancy.target_attribute_name,
                                   'year', '_hidden_id_',
                                   occupied_spaces_variable, total_spaces_variable
                                   ] )
                            )
        self.column_names.sort(reverse=True)

        ## buildings table provides existing stocks
        self.realestate_dataset = self.dataset_pool.get_dataset(
            realestate_dataset_name)

        occupied_spaces_variables = [occupied_spaces_variable]
        total_spaces_variables = [total_spaces_variable]
        if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(
        ):
            occupied_spaces_variables += unique(
                target_vacancy_for_this_year[occupied_spaces_variable]).tolist(
                )
        if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(
        ):
            total_spaces_variables += unique(
                target_vacancy_for_this_year[total_spaces_variable]).tolist()

        self._compute_variables_for_dataset_if_needed(
            self.realestate_dataset, self.column_names +
            occupied_spaces_variables + total_spaces_variables)
        self._compute_variables_for_dataset_if_needed(
            self.proposal_component_set,
            self.column_names + total_spaces_variables)
        self.proposal_set.compute_variables(
            "urbansim_parcel.development_project_proposal.number_of_components",
            dataset_pool=self.dataset_pool)

        n_column = len(self.column_names)
        target_vacancy_for_this_year.column_values = target_vacancy_for_this_year.get_multiple_attributes(
            self.column_names).reshape((-1, n_column))
        self.realestate_dataset.column_values = self.realestate_dataset.get_multiple_attributes(
            self.column_names).reshape((-1, n_column))
        self.proposal_component_set.column_values = self.proposal_component_set.get_multiple_attributes(
            self.column_names).reshape((-1, n_column))
        #defaults, can be changed later by spaces_variable specified in target_vacancy rates
        self.realestate_dataset.total_spaces = self.realestate_dataset[
            total_spaces_variable]
        self.proposal_component_set.total_spaces = self.proposal_component_set[
            total_spaces_variable]
        self.realestate_dataset.occupied_spaces = self.realestate_dataset[
            occupied_spaces_variable]

        self.accounting = {}
        self.logging = {}
        #has_needed_components = zeros(self.proposal_set.size(), dtype='bool')
        for index in range(target_vacancy_for_this_year.size()):
            column_value = tuple(
                target_vacancy_for_this_year.column_values[index, :].tolist())
            accounting = {
                'target_vacancy':
                target_vacancy_for_this_year[
                    target_vacancy.target_attribute_name][index]
            }

            realestate_indexes = self.get_index_by_condition(
                self.realestate_dataset.column_values, column_value)
            component_indexes = self.get_index_by_condition(
                self.proposal_component_set.column_values, column_value)

            this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable
            ## total/occupied_spaces_variable can be specified either as a universal name for all realestate
            ## or in targe_vacancy_rate dataset for each vacancy category
            if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(
            ):
                this_occupied_spaces_variable = target_vacancy_for_this_year[
                    occupied_spaces_variable][index]
                self.realestate_dataset.occupied_spaces[realestate_indexes] = (
                    self.realestate_dataset[this_occupied_spaces_variable]
                    [realestate_indexes]).astype(
                        self.realestate_dataset.occupied_spaces.dtype)

            if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(
            ):
                this_total_spaces_variable = target_vacancy_for_this_year[
                    total_spaces_variable][index]
                self.realestate_dataset.total_spaces[realestate_indexes] = (
                    self.realestate_dataset[this_total_spaces_variable]
                    [realestate_indexes]).astype(
                        self.realestate_dataset.total_spaces.dtype)
                self.proposal_component_set.total_spaces[component_indexes] = (
                    self.proposal_component_set[this_total_spaces_variable]
                    [component_indexes]).astype(
                        self.proposal_component_set.total_spaces.dtype)

            accounting["total_spaces_variable"] = this_total_spaces_variable
            accounting["total_spaces"] = self.realestate_dataset.total_spaces[
                realestate_indexes].sum()
            accounting[
                "occupied_spaces_variable"] = this_occupied_spaces_variable
            accounting[
                "occupied_spaces"] = self.realestate_dataset.occupied_spaces[
                    realestate_indexes].sum()
            accounting["target_spaces"] = int( round( accounting["occupied_spaces"] /\
                                                     (1 - accounting["target_vacancy"])
                                               ) )
            accounting["proposed_spaces"] = 0
            accounting["demolished_spaces"] = 0

            self.accounting[column_value] = accounting

            if self._is_target_reached(column_value):
                proposal_indexes = self.proposal_set.get_id_index(
                    unique(self.proposal_component_set['proposal_id']
                           [component_indexes]))
                single_component_indexes = where(
                    self.proposal_set["number_of_components"] == 1)[0]
                self.weight[intersect1d(proposal_indexes,
                                        single_component_indexes)] = 0.0

        ## handle planned proposals: all proposals with status_id == is_planned
        ## and start_year == year are accepted
        planned_proposal_indexes = where(
            logical_and(
                self.proposal_set.get_attribute("status_id") ==
                self.proposal_set.id_planned,
                self.proposal_set.get_attribute("start_year") == year))[0]

        self.consider_proposals(planned_proposal_indexes, force_accepting=True)

        # consider proposals (in this order: proposed, tentative)
        for status in [
                self.proposal_set.id_proposed, self.proposal_set.id_tentative
        ]:
            stat = (self.proposal_set.get_attribute("status_id") == status)
            if stat.sum() == 0:
                continue

            logger.log_status(
                "Sampling from %s eligible proposals of status %s." %
                (stat.sum(), status))
            iteration = 0
            while (not self._is_target_reached()):
                ## prevent proposals from being sampled for vacancy type whose target is reached
                #for column_value in self.accounting.keys():

                if self.weight[stat].sum() == 0.0:
                    logger.log_warning(
                        "Running out of proposals of status %s before vacancy targets are reached; there aren't any proposals with non-zero weight"
                        % status)
                    break

                available_indexes = where(logical_and(stat,
                                                      self.weight > 0))[0]
                sample_size = minimum(available_indexes.size, n)
                sampled_proposal_indexes = probsample_noreplace(
                    available_indexes,
                    sample_size,
                    prob_array=self.weight[available_indexes],
                    return_index=False)

                self.consider_proposals(sampled_proposal_indexes)
                self.weight[sampled_proposal_indexes] = 0
                #sample_size = 1
                #sampled_proposal_index = probsample_noreplace(available_indexes, sample_size,
                #prob_array=self.weight[available_indexes],
                #return_index=False)

                #self.consider_proposal(sampled_proposal_index)

                #self.weight[sampled_proposal_index] = 0
                iteration += 1

        self._log_status()

        # set status of accepted proposals to 'active'
        self.proposal_set.modify_attribute(name="status_id",
                                           data=self.proposal_set.id_active,
                                           index=array(self.accepted_proposals,
                                                       dtype='int32'))

        # Code added by Jesse Ayers, MAG, 7/20/2009
        # Get the active projects:
        stat_id = self.proposal_set.get_attribute('status_id')
        actv = where(stat_id == 1)[0]
        # Where there are active projects, compute the total_land_area_taken
        # and store it on the development_project_proposals dataset
        # so it can be used by the building_construction_model for the proper
        # computation of units_proposed for those projects with velocity curves
        if actv.size > 0:
            total_land_area_taken_computed = self.proposal_set.get_attribute(
                'urbansim_parcel.development_project_proposal.land_area_taken')
            self.proposal_set.modify_attribute(
                'total_land_area_taken', total_land_area_taken_computed[actv],
                actv)

        return (self.proposal_set, self.realestate_dataset.get_id_attribute()[
            self.demolished_buildings])
    def prepare_for_run(
            self,
            dataset_pool,
            create_proposal_set=True,
            parcel_filter_for_new_development=None,
            parcel_filter_for_redevelopment=None,
            template_filter=None,
            spec_replace_module_variable_pair=None,
            proposed_units_variable="urbansim_parcel.development_project_proposal.units_proposed",
            **kwargs):
        """create development project proposal dataset from parcels and development templates.
        spec_replace_module_variable_pair is a tuple with two elements: module name, variable within the module
        that contans a dictionary of model variables to be replaced in the specification.
        """
        specification, coefficients, dummy = RegressionModel.prepare_for_run(
            self, **kwargs)
        try:
            existing_proposal_set_parent = dataset_pool.get_dataset(
                'development_project_proposal')
            #load proposals whose status_id are not of id_tentative or id_not_available
            available_idx = where(
                logical_and(
                    existing_proposal_set_parent.get_attribute("status_id") !=
                    DevelopmentProjectProposalDataset.id_tentative,
                    existing_proposal_set_parent.get_attribute("status_id") !=
                    DevelopmentProjectProposalDataset.id_not_available))[0]
            existing_proposal_set = DatasetSubset(existing_proposal_set_parent,
                                                  available_idx)
            # Code updated by Hanyi Li, MAG 6/8/2010
            # Replacing the cached 'development_project_proposal' dataset with
            # the filtered dataset 'existing_proposal_set'
            dataset_pool.replace_dataset(
                existing_proposal_set_parent.get_dataset_name(),
                existing_proposal_set)
        except:
            existing_proposal_set = None

        parcels = dataset_pool.get_dataset('parcel')
        templates = dataset_pool.get_dataset('development_template')

        # It is important that during this method no variable flushing happens, since
        # we create datasets of the same name for different purposes (new development and redevelopment)
        # and flushing would mix them up
        flush_variables_current = SessionConfiguration().get(
            'flush_variables', False)
        SessionConfiguration().put_data({'flush_variables': False})

        # Code added by Jesse Ayers, MAG, 9/14/2009
        # Getting an index of parcels that have actively developing projects (those on a velocity function)
        # and making sure that new proposals are not generated for them
        if existing_proposal_set:
            parcels_with_proposals = existing_proposal_set.get_attribute(
                'parcel_id')
            parcels_with_proposals_idx = parcels.get_id_index(
                parcels_with_proposals)
            if parcel_filter_for_new_development is not None:
                if parcel_filter_for_new_development[
                        parcel_filter_for_new_development.find('=') +
                        1] == '=':
                    filter = 'flter = numpy.logical_and(parcel.number_of_agents(development_project_proposal) == 0, %s)' % parcel_filter_for_new_development
                else:
                    parcel_filter_for_new_development = parcel_filter_for_new_development[
                        parcel_filter_for_new_development.find('=') +
                        1:].lstrip()
                    filter = 'flter = numpy.logical_and(parcel.number_of_agents(development_project_proposal) == 0, %s)' % parcel_filter_for_new_development
                index1 = where(parcels.compute_variables(filter))[0]

        else:
            if parcel_filter_for_new_development is not None:
                index1 = where(
                    parcels.compute_variables(
                        parcel_filter_for_new_development))[0]
            else:
                index1 = None

        if template_filter is not None:
            try:
                index2 = where(templates.compute_variables(template_filter))[0]
            except Exception, e:
                logger.log_warning(
                    "template_filter is set to %s, but there is an error when computing it: %s"
                    % (template_filter, e))
                index2 = None
    def run(self, year=None, dataset_pool=None, **kwargs):
        """
        """
        if dataset_pool is None:
            dataset_pool = SessionConfiguration().get_dataset_pool()

        if year is None:
            year = SimulationState().get_current_time()

        this_year_index = where(
            self.scheduled_events.get_attribute('year') == year)[0]
        scheduled_events_for_this_year = DatasetSubset(self.scheduled_events,
                                                       this_year_index)
        scheduled_events_for_this_year.load_dataset_if_not_loaded()
        column_names = list(
            set(self.scheduled_events.get_known_attribute_names()) - set([
                'year', 'action', 'attribute', 'amount', 'event_id',
                '_hidden_id_'
            ]))
        column_names.sort()
        #        column_values = dict([ (name, scheduled_events_for_this_year.get_attribute(name)) for name in column_names])

        for index in range(scheduled_events_for_this_year.size()):
            indicator = ones(self.dataset.size(), dtype='bool')
            event_attr = {}
            for attribute in column_names:
                if attribute in self.dataset.get_known_attribute_names():
                    dataset_attribute = self.dataset.get_attribute(attribute)
                else:
                    ## this is done inside the loop because some action may delete computed attributes, such as dataset.add_elements()
                    try:
                        dataset_attribute = self.dataset.compute_one_variable_with_unknown_package(
                            attribute, dataset_pool=dataset_pool)
                    except:
                        raise ValueError, "attribute %s used in scheduled events dataset can not be found in dataset %s" % (
                            attribute, self.dataset.get_dataset_name())


#                if attribute in column_names:
                aval = scheduled_events_for_this_year.get_attribute(
                    attribute)[index]
                if aval == -1:
                    continue  # ignore if column value is -1
                else:
                    indicator *= dataset_attribute == aval
                    event_attr.update({attribute: aval})

            #agents in dataset satisfying all conditions are identified by indicator
            legit_index = where(indicator)[0]

            this_event = scheduled_events_for_this_year.get_data_element(index)
            if not hasattr(this_event, 'attribute'):
                action_attr_name = ''
            else:
                action_attr_name = this_event.attribute
            action_function = getattr(self,
                                      '_' + this_event.action.strip().lower())
            action_function(amount=this_event.amount,
                            attribute=action_attr_name,
                            dataset=self.dataset,
                            index=legit_index,
                            data_dict=event_attr)

            self.post_run(self.dataset, legit_index, **kwargs)

        return self.dataset
Exemple #24
0
        ]
    })

    #CacheScenarioDatabase().run(gridcell_config)

    # step 2 cache water demand data by
    dbcon = ScenarioDatabase(database_name="water_demand_seattle2")

    print "Create Storage object."
    from opus_core.storage_factory import StorageFactory
    storage = StorageFactory().get_storage(type="mysql_storage",
                                           storage_location=dbcon)

    from waterdemand.datasets.consumption_dataset import ConsumptionDataset
    consumption_types = ['wrmr', 'wcsr', 'wrsr']  #'wcmr'
    for consumption_type in consumption_types:

        consumption = ConsumptionDataset(in_storage=storage,
                                         in_table_name=consumption_type +
                                         '_grid')

        for year in range(1990, 2001):
            print "%s %s" % (consumption_type, year)
            year_index = where(consumption.get_attribute("billyear") == year)
            out_storage = StorageFactory().get_storage(
                type="flt_storage",
                storage_location=os.path.join(cache_directory, str(year)))
            consumption_subset = DatasetSubset(consumption, year_index)
            consumption_subset.write_dataset(
                out_storage=out_storage,
                out_table_name=consumption_type.lower())
 def get_active_agent_set(self):
     """Return agent set that make choices in the model.
     Works only for the ChoiceModel class.
     """
     agents = self.get_agent_set()
     return DatasetSubset(agents, self.get_agent_set_index())
    def _do_run(self,
                location_set,
                agent_set,
                agents_index,
                data_objects=None,
                resources=None):
        location_id_name = location_set.get_id_name()[0]
        jobsubset = DatasetSubset(agent_set, agents_index)
        if jobsubset.size() <= 0:
            return array([], dtype='int32')
        #unplace jobs
        agent_set.set_values_of_one_attribute(
            location_id_name, resize(array([-1.0]), jobsubset.size()),
            agents_index)
        sector_ids = jobsubset.get_attribute("sector_id")
        sectors = unique(sector_ids)
        counts = ndimage_sum(ones((jobsubset.size(), )),
                             labels=sector_ids.astype('int32'),
                             index=sectors.astype('int32'))
        if sectors.size <= 1:
            counts = array([counts])
        variables = map(lambda x: "number_of_jobs_of_sector_" + str(int(x)),
                        sectors)
        compute_variables = map(
            lambda var: self.variable_package + "." + location_set.
            get_dataset_name() + "." + var, variables)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.dataset_pool.add_datasets_if_not_included(
            {agent_set.get_dataset_name(): agent_set})
        location_set.compute_variables(compute_variables,
                                       dataset_pool=self.dataset_pool)
        if self.filter is None:
            location_index = arange(location_set.size())
        else:
            filter_values = location_set.compute_variables(
                [self.filter], dataset_pool=self.dataset_pool)
            location_index = where(filter_values > 0)[0]
        if location_index.size <= 0:
            logger.log_status("No locations available. Nothing to be done.")
            return array([])
        location_subset = DatasetSubset(location_set, location_index)
        i = 0
        for sector in sectors:
            distr = location_subset.get_attribute(variables[i])
            if ma.allclose(distr.sum(), 0):
                uniform_prob = 1.0 / distr.size
                distr = resize(array([uniform_prob], dtype='float64'),
                               distr.size)
                logger.log_warning(
                    "Probabilities in scaling model for sector " +
                    str(sector) +
                    " sum to 0.0.  Substituting uniform distribution!")


#                random_sample = sample(location_set.get_attribute("grid_id"), k=int(counts[i]), \
#                                   probabilities = distr)
            distr = distr / float(distr.sum())
            random_sample = probsample_replace(
                location_subset.get_id_attribute(),
                size=int(counts[i]),
                prob_array=distr)
            idx = where(sector_ids == sector)[0]
            #modify job locations
            agent_set.set_values_of_one_attribute(location_id_name,
                                                  random_sample,
                                                  agents_index[idx])
            i += 1
        return agent_set.get_attribute_by_index(location_id_name, agents_index)
Exemple #27
0
    def run(self,
            model_configuration,
            vacancy_table,
            history_table,
            year,
            location_set,
            resources=None):
        large_area_ids = vacancy_table.get_attribute("large_area_id")
        locations_large_area_ids = location_set.compute_variables(
            "washtenaw.%s.large_area_id" % location_set.get_dataset_name())
        unique_large_areas = unique(large_area_ids)
        self._compute_vacancy_variables(
            location_set, model_configuration['development_project_types'],
            resources)

        projects = {}
        for area in unique_large_areas:
            location_index = where(locations_large_area_ids == area)[0]
            locations_for_this_area = DatasetSubset(location_set,
                                                    location_index)
            logger.log_status("DPLCM for area %s", area)
            target_residential_vacancy_rate, target_non_residential_vacancy_rate = self._get_target_vacancy_rates(
                vacancy_table, year, area)
            for project_type in model_configuration[
                    'development_project_types']:
                # determine current-year vacancy rates
                vacant_units_sum = locations_for_this_area.get_attribute(
                    self.variable_for_vacancy[project_type]).sum()
                units_sum = float(
                    locations_for_this_area.get_attribute(
                        self.units_variable[project_type]).sum())
                vacant_rate = self.safe_divide(vacant_units_sum, units_sum)
                if model_configuration['development_project_types'][
                        project_type]['residential']:
                    target_vacancy_rate = target_residential_vacancy_rate
                else:
                    target_vacancy_rate = target_non_residential_vacancy_rate
                should_develop_units = int(
                    round(
                        max(0,
                            (target_vacancy_rate * units_sum -
                             vacant_units_sum) / (1 - target_vacancy_rate))))
                logger.log_status(
                    project_type +
                    ": vacant units: %d, should be vacant: %f, sum units: %d, will develop: %d"
                    % (vacant_units_sum, target_vacancy_rate * units_sum,
                       units_sum, should_develop_units))
                #create projects
                if should_develop_units > 0:
                    project_dataset = self._create_projects(
                        should_develop_units, project_type, history_table,
                        locations_for_this_area, units_sum,
                        model_configuration['development_project_types'],
                        resources)
                    project_dataset.add_attribute(
                        array(project_dataset.size() * [area]),
                        "large_area_id",
                        metadata=AttributeType.PRIMARY)
                    if (project_type not in projects.keys()) or (
                            projects[project_type] is None):
                        projects[project_type] = project_dataset
                    else:
                        projects[project_type].join_by_rows(
                            project_dataset, change_ids_if_not_unique=True)

        for project_type in model_configuration['development_project_types']:
            if project_type not in projects.keys():
                projects[project_type] = None
            if projects[project_type] is None:
                size = 0
            else:
                projects[project_type].add_submodel_categories()
                size = projects[project_type].size()
            logger.log_status("%s %s projects to be built" %
                              (size, project_type))
        return projects
Exemple #28
0
#years = [1991, 1995]
#years = [1995, 1999]
#years = [2002]
#years = sys.argv[3]
years = [2007, 2007]

lc1 = LandCoverDataset(in_storage=StorageFactory().get_storage(
    'flt_storage',
    storage_location=os.path.join(flt_directory_in, str(years[0]))),
                       out_storage=StorageFactory().get_storage(
                           'flt_storage',
                           storage_location=os.path.join(
                               flt_directory_out, str(years[0]))))

agents_index = where(lc1.get_attribute(index_attribute))[0]
lc1subset = DatasetSubset(lc1, agents_index)
print "Writing set 1:"
for attr in lc1.get_primary_attribute_names():
    print "   ", attr
    lc1subset.write_dataset(attributes=[attr], out_table_name="land_covers")
    lc1.delete_one_attribute(
        attr
    )  # leaving this line in causes the processing of every other input data file; commenting it causes memory error

lc2 = LandCoverDataset(in_storage=StorageFactory().get_storage(
    'flt_storage',
    storage_location=os.path.join(flt_directory_in, str(years[1]))),
                       out_storage=StorageFactory().get_storage(
                           'flt_storage',
                           storage_location=os.path.join(
                               flt_directory_out, str(years[1]))))
Exemple #29
0
    def run(self,
            development_proposal_set,
            building_dataset,
            dataset_pool,
            buildings_to_be_demolished=[],
            consider_amount_built_in_parcels=False,
            current_year=None):

        self.demolish_buildings(buildings_to_be_demolished, building_dataset,
                                dataset_pool)

        if development_proposal_set.size() <= 0:
            logger.log_status(
                "Proposal set is empty. Nothing to be constructed.")
            return development_proposal_set

        # load velocity function dataset
        try:
            velocity_function_set = dataset_pool.get_dataset(
                "velocity_function")
        except:
            velocity_function_set = None

        # choose active projects
        is_active = development_proposal_set.get_attribute(
            "status_id") == development_proposal_set.id_active
        is_delayed_or_active = logical_or(
            is_active,
            development_proposal_set.get_attribute("status_id") ==
            development_proposal_set.id_with_velocity)
        active_idx = where(is_delayed_or_active)[0]

        if active_idx.size <= 0:
            logger.log_status("No new buildings built.")
            return development_proposal_set

        if current_year is None:
            current_year = SimulationState().get_current_time()

        active_proposal_set = DatasetSubset(development_proposal_set,
                                            active_idx)

        # create proposal_component_set from the active proposals
        proposal_component_set = create_from_proposals_and_template_components(
            active_proposal_set,
            dataset_pool.get_dataset('development_template_component'))
        dataset_pool.replace_dataset(proposal_component_set.get_dataset_name(),
                                     proposal_component_set)

        # determine building types and corresponding unit names of the involved building_types
        building_type_id = proposal_component_set.get_attribute(
            "building_type_id")
        building_type_set = dataset_pool.get_dataset("building_type")
        #        unit_names = building_type_set.compute_variables([
        #                                  'building_type.disaggregate(generic_building_type.unit_name)'], dataset_pool=dataset_pool)
        unit_names = building_type_set.get_attribute("unit_name")
        # get unique values of the involved generic building types and unique unit names
        unique_building_types = unique(building_type_id)
        index_in_building_types = building_type_set.get_id_index(
            unique_building_types)
        unit_names = unit_names[index_in_building_types]
        is_residential = building_type_set.get_attribute(
            "is_residential")[index_in_building_types] == 1
        unique_unit_names = unique(unit_names)

        # determine existing units on parcels
        parcels = dataset_pool.get_dataset("parcel")
        parcels.compute_variables(
            ["urbansim_parcel.parcel.vacant_land_area"] +
            ["urbansim_parcel.parcel.residential_units"] +
            map(lambda x: "urbansim_parcel.parcel.%s" % x, unique_unit_names),
            dataset_pool=dataset_pool)
        parcel_is_lut_vacant = parcels.compute_variables(
            ["urbansim_parcel.parcel.is_land_use_type_vacant"],
            dataset_pool=dataset_pool)
        parcel_lut = parcels.get_attribute("land_use_type_id")
        component_land_use_types = proposal_component_set.compute_variables(
            [
                'development_project_proposal_component.disaggregate(development_template.land_use_type_id, [development_project_proposal])'
            ],
            dataset_pool=dataset_pool)

        # from the velocity function determine the amount to be built for each component (in %)
        if velocity_function_set is not None:
            cummulative_amount_of_development = proposal_component_set.compute_variables(
                [
                    "urbansim_parcel.development_project_proposal_component.cummulative_amount_of_development"
                ],
                dataset_pool=dataset_pool)
            percent_of_development_this_year = proposal_component_set.compute_variables(
                [
                    "urbansim_parcel.development_project_proposal_component.percent_of_development_this_year"
                ],
                dataset_pool=dataset_pool)
        else:  # if there is no velocity function, all components have velocity of 100%
            percent_of_development_this_year = resize(
                array([100], dtype="int32"), proposal_component_set.size())

        # amount to be built
        to_be_built = proposal_component_set.compute_variables(
            [
                'urbansim_parcel.development_project_proposal_component.units_proposed'
            ],
            dataset_pool=dataset_pool
        ) / 100.0 * percent_of_development_this_year

        # initializing for new buildings
        max_building_id = building_dataset.get_id_attribute().max()
        new_buildings = {}
        new_buildings["parcel_id"] = array([], dtype="int32")
        new_buildings["residential_units"] = array([], dtype="int32")
        new_buildings["non_residential_sqft"] = array([], dtype="int32")
        new_buildings["building_type_id"] = array([], dtype="int32")
        new_buildings["sqft_per_unit"] = array(
            [], dtype=building_dataset.get_attribute("sqft_per_unit").dtype)
        new_buildings["land_area"] = array(
            [], dtype=building_dataset.get_attribute("land_area").dtype)
        new_buildings["improvement_value"] = array(
            [],
            dtype=building_dataset.get_attribute("improvement_value").dtype)
        new_buildings["template_id"] = array([], dtype="int32")

        sqft_per_unit = proposal_component_set.get_attribute(
            "building_sqft_per_unit").astype(
                new_buildings["sqft_per_unit"].dtype)
        # Compute land_area_taken properly if velocity function is present
        if velocity_function_set is not None:
            larea_taken = proposal_component_set.compute_variables([
                'urbansim_parcel.development_project_proposal_component.land_area_taken'
            ],
                                                                   dataset_pool=
                                                                   dataset_pool
                                                                   )
            pct_dev_this_yr_conv = (percent_of_development_this_year / 100.0)
            land_area_taken = larea_taken * pct_dev_this_yr_conv
        else:
            land_area_taken = proposal_component_set.compute_variables(
                [
                    'urbansim_parcel.development_project_proposal_component.land_area_taken'
                ],
                dataset_pool=dataset_pool).astype(
                    new_buildings["land_area"].dtype)
        construction_cost = proposal_component_set.compute_variables(
            [
                'urbansim_parcel.development_project_proposal_component.construction_cost'
            ],
            dataset_pool=dataset_pool).astype(
                new_buildings["improvement_value"].dtype)
        template_ids = proposal_component_set.get_attribute("template_id")
        number_of_new_buildings = {}
        number_of_new_buildings_by_template_id = {}

        # iterate over building types that are unique over the involved proposals
        for itype in range(unique_building_types.size):
            this_building_type = unique_building_types[itype]
            number_of_new_buildings[this_building_type] = 0
            unit_name = unit_names[itype]
            if is_residential[itype]:
                unit_name = 'residential_units'
            component_index = where(building_type_id == this_building_type)[0]
            parcel_ids_in_components = proposal_component_set.get_attribute_by_index(
                "parcel_id", component_index)
            unique_parcels = unique(parcel_ids_in_components)
            # iterate over involved parcels
            for parcel_id in unique_parcels:
                pidx = component_index[parcel_ids_in_components == parcel_id]
                parcel_index = parcels.get_id_index(parcel_id)
                # what is already built on this parcel
                if consider_amount_built_in_parcels:
                    amount_built = parcels.get_attribute_by_index(
                        unit_name, parcel_index)
                else:
                    amount_built = 0
                # what is proposed on this parcel
                amount_proposed = to_be_built[pidx].sum()
                # build if needed
                if rint(amount_proposed) > amount_built:
                    if unit_name == "residential_units":
                        bunit = "residential_units"
                        bnunit = "non_residential_sqft"
                    else:
                        bnunit = "residential_units"
                        bunit = "non_residential_sqft"
                    to_be_built_cumsum = rint(cumsum(
                        to_be_built[pidx])).astype("int32")
                    idx_to_be_built = where(
                        to_be_built_cumsum > amount_built)[0]
                    new_buildings["parcel_id"] = concatenate(
                        (new_buildings["parcel_id"],
                         array(idx_to_be_built.size * [parcel_id],
                               dtype="int32")))
                    new_buildings[bunit] = concatenate(
                        (new_buildings[bunit],
                         rint(to_be_built[pidx][idx_to_be_built]).astype(
                             new_buildings[bunit].dtype)))
                    new_buildings[bnunit] = concatenate(
                        (new_buildings[bnunit],
                         array(idx_to_be_built.size * [0], dtype="int32")))
                    new_buildings["building_type_id"] = concatenate(
                        (new_buildings["building_type_id"],
                         array(idx_to_be_built.size * [this_building_type],
                               dtype="int32")))
                    new_buildings["sqft_per_unit"] = concatenate(
                        (new_buildings["sqft_per_unit"],
                         sqft_per_unit[pidx][idx_to_be_built]))
                    new_buildings["land_area"] = concatenate(
                        (new_buildings["land_area"],
                         land_area_taken[pidx][idx_to_be_built]))
                    new_buildings["improvement_value"] = concatenate(
                        (new_buildings["improvement_value"],
                         construction_cost[pidx][idx_to_be_built]))
                    new_buildings["template_id"] = concatenate(
                        (new_buildings["template_id"],
                         template_ids[pidx][idx_to_be_built]))
                    number_of_new_buildings[
                        this_building_type] += idx_to_be_built.size
                    if parcel_is_lut_vacant[parcel_index]:
                        parcel_lut[parcel_index] = component_land_use_types[
                            pidx][idx_to_be_built][0]
                    # count number of buildings by template ids
                    for icomp in range(idx_to_be_built.size):
                        if template_ids[pidx[idx_to_be_built[
                                icomp]]] not in number_of_new_buildings_by_template_id.keys(
                                ):
                            number_of_new_buildings_by_template_id[
                                template_ids[pidx[idx_to_be_built[icomp]]]] = 0
                        number_of_new_buildings_by_template_id[template_ids[
                            pidx[idx_to_be_built[icomp]]]] += 1

        # add created buildings to the existing building dataset
        buildings_id_name = building_dataset.get_id_name()[0]
        new_buildings[buildings_id_name] = max_building_id + arange(
            1, new_buildings["parcel_id"].size + 1)
        new_buildings['year_built'] = resize(
            array([current_year], dtype="int32"),
            new_buildings["parcel_id"].size)
        building_dataset.add_elements(new_buildings,
                                      require_all_attributes=False)
        if "zone_id" in building_dataset.get_known_attribute_names():
            zone_ids = building_dataset.compute_variables(
                ['building.disaggregate(parcel.zone_id)'],
                dataset_pool=dataset_pool)
            building_dataset.modify_attribute(name="zone_id", data=zone_ids)
        if "county" in building_dataset.get_known_attribute_names():
            county_ids = building_dataset.compute_variables(
                ['building.disaggregate(parcel.county)'],
                dataset_pool=dataset_pool)
            building_dataset.modify_attribute(name="county", data=county_ids)

        logger.log_status("%s new buildings built." %
                          new_buildings["parcel_id"].size)
        for type_id in number_of_new_buildings.keys():
            logger.log_status("building type %s: %s" %
                              (type_id, number_of_new_buildings[type_id]))
        logger.log_status("Number of new buildings by template ids:")
        logger.log_status(number_of_new_buildings_by_template_id)

        # recompute the cummulative development amount
        if velocity_function_set is not None:
            # determine, if everything has been built or if it should be considered next year
            cummulative_amount_of_development = development_proposal_set.compute_variables(
                [
                    "development_project_proposal.aggregate(urbansim_parcel.development_project_proposal_component.cummulative_amount_of_development)/urbansim_parcel.development_project_proposal.number_of_components"
                ],
                dataset_pool=dataset_pool)
        else:  # if there is no velocity function, all components have velocity of 100%
            ## TODO: need to be reviewed, probably by Hana
            ## changed from proposal_component_set to development_proposal_set
            ## so it will have the same shape as is_delayed_or_active
            cummulative_amount_of_development = resize(
                array([100], dtype="int32"), development_proposal_set.size())
        will_be_delayed = cummulative_amount_of_development < 100
        velocity_idx = where(logical_and(is_delayed_or_active,
                                         will_be_delayed))[0]
        if velocity_idx.size > 0:
            # for the unfinished projects set the status_id to id_with_velocity
            development_proposal_set.set_values_of_one_attribute(
                "status_id",
                development_proposal_set.id_with_velocity,
                index=velocity_idx)
        not_velocity_idx = where(
            logical_and(is_delayed_or_active, logical_not(will_be_delayed)))[0]
        if not_velocity_idx.size > 0:
            # for the remaining projects set the status_id to id_not_available
            development_proposal_set.set_values_of_one_attribute(
                "status_id",
                development_proposal_set.id_not_available,
                index=not_velocity_idx)

        dataset_pool._remove_dataset(proposal_component_set.get_dataset_name())
        return development_proposal_set
    def run(self,
            dataset,
            outcome_attribute,
            weight_attribute,
            control_totals,
            current_year,
            control_total_attribute=None,
            year_attribute='year',
            capacity_attribute=None,
            add_quantity=False,
            dataset_pool=None):
        """'dataset' is a Dataset for which a quantity 'outcome_attribute' is created. The total amount of the quantity is 
        given by the attribute 'control_total_attribute' of the 'control_totals' Dataset. If it is not given, it is assumed 
        to have the same name as 'outcome_attribute'. The 'weight_attribute' of 'dataset' determines the allocation weights.
        The 'control_totals' Dataset contains an attribute 'year' (or alternatively, an attribute given by the 'year_attribute' argument)
        and optionally other attributes that must be known to the 'dataset' (such as a geography). For each row of the control_totals dataset
        for which year matches the 'current_year', the total amount is distributed among the corresponding members of 'dataset' according to weights.
        If a 'capacity_attribute' is given (attribute of 'dataset'), the algorithm removes any allocations that exceeds the capacity and 
        redistributes it among remaining members. The resulting values are appended to 'dataset' as 'outcome_attribute' (as primary attribute).
        If add_quantity is True and the 'outcome_attribute' exists in dataset, the resulting values are added to the current values of 
        'outcome_attribute'.
        """
        ct_attr = control_totals.get_known_attribute_names()
        if year_attribute not in ct_attr:
            raise StandardError, "Year attribute '%s' must be a known attribute of the control totals dataset." % year_attribute
        ct_attr.remove(year_attribute)
        if control_total_attribute is None:
            control_total_attribute = outcome_attribute
        if control_total_attribute not in ct_attr:
            raise StandardError, "Attribute '%s' must be a known attribute of the control totals dataset." % control_total_attribute
        ct_attr.remove(control_total_attribute)
        if control_totals._is_hidden_id():
            ct_attr.remove(control_totals.id_name()[0])

        # compute weights and other attributes necessary for allocation
        attrs_to_compute = [weight_attribute] + ct_attr
        if capacity_attribute is not None:
            attrs_to_compute.append(capacity_attribute)
        for attr in attrs_to_compute:
            try:
                dataset.compute_variables(attr, dataset_pool=dataset_pool)
            except:
                dataset.compute_one_variable_with_unknown_package(
                    attr, dataset_pool=dataset_pool)

        # create subset of control totals for the current year
        year_index = where(
            control_totals.get_attribute(year_attribute) == current_year)[0]
        if year_index.size <= 0:
            logger.log_warning("No control total for year %s" % current_year)
            return None
        control_totals_for_this_year = DatasetSubset(control_totals,
                                                     year_index)

        # check capacity
        if capacity_attribute is not None:
            if dataset.get_attribute(capacity_attribute).sum(
            ) < control_totals_for_this_year.get_attribute(
                    control_total_attribute).sum():
                logger.log_warning(
                    "Capacity (%s) is smaller than the amount to allocate (%s)."
                    % (dataset.get_attribute(capacity_attribute).sum(),
                       control_totals_for_this_year.get_attribute(
                           control_total_attribute).sum()))
            C = dataset.get_attribute(capacity_attribute).astype('int32')

        all_weights = dataset.get_attribute(weight_attribute)
        outcome = zeros(dataset.size(), dtype='int32')
        for ct_row in range(control_totals_for_this_year.size()):
            is_considered = ones(dataset.size(), dtype='bool8')
            for characteristics in ct_attr:
                is_considered = logical_and(
                    is_considered,
                    dataset.get_attribute(characteristics) ==
                    control_totals_for_this_year.get_attribute(
                        characteristics)[ct_row])
            T = control_totals_for_this_year.get_attribute(
                control_total_attribute)[ct_row]
            it = 1
            while True:
                is_considered_idx = where(is_considered)[0]
                weights = all_weights[is_considered_idx]
                weights_sum = float(weights.sum())
                outcome[is_considered_idx] = round_(
                    outcome[is_considered_idx] + T *
                    (weights / weights_sum)).astype('int32')
                if capacity_attribute is None:
                    break
                diff = outcome[is_considered_idx] - C[is_considered_idx]
                outcome[is_considered_idx] = clip(outcome[is_considered_idx],
                                                  0, C[is_considered_idx])
                if it == 1 and C[is_considered_idx].sum() < T:
                    logger.log_warning(
                        "Control total %s cannot be met due to a capacity restriction of %s"
                        % (T, C[is_considered_idx].sum()))
                T = where(diff < 0, 0, diff).sum()
                if T <= 0:
                    break
                is_considered = logical_and(is_considered, outcome < C)
                it += 1
        if add_quantity and (outcome_attribute
                             in dataset.get_known_attribute_names()):
            dataset.modify_attribute(name=outcome_attribute,
                                     data=outcome +
                                     dataset.get_attribute(outcome_attribute))
            logger.log_status(
                'New values added to the attribute %s of dataset %s.' %
                (outcome_attribute, dataset.get_dataset_name()))
        else:
            dataset.add_primary_attribute(name=outcome_attribute, data=outcome)
            logger.log_status(
                'New values stored into attribute %s of dataset %s.' %
                (outcome_attribute, dataset.get_dataset_name()))
        dataset.flush_attribute(outcome_attribute)
        return outcome