def run(self, year, job_set, control_totals, job_building_types, data_objects=None, resources=None):
     self._do_initialize_for_run(job_set, job_building_types, data_objects)
     large_area_ids = control_totals.get_attribute("large_area_id")
     jobs_large_area_ids = job_set.compute_variables("washtenaw.job.large_area_id")
     unique_large_areas = unique(large_area_ids)
     is_year = control_totals.get_attribute("year")==year
     all_jobs_index = arange(job_set.size())
     sectors = unique(control_totals.get_attribute("sector_id")[is_year])
     self._compute_sector_variables(sectors, job_set)
     for area in unique_large_areas:
         idx = where(logical_and(is_year, large_area_ids == area))[0]
         self.control_totals_for_this_year = DatasetSubset(control_totals, idx)
         jobs_index = where(jobs_large_area_ids == area)[0]
         jobs_for_this_area = DatasetSubset(job_set, jobs_index)
         logger.log_status("ETM for area %s (currently %s jobs)" % (area, jobs_for_this_area.size()))
         last_remove_idx = self.remove_jobs.size
         self._do_run_for_this_year(jobs_for_this_area)
         add_jobs_size = self.new_jobs[self.location_id_name].size-self.new_jobs["large_area_id"].size
         remove_jobs_size = self.remove_jobs.size-last_remove_idx
         logger.log_status("add %s, remove %s, total %s" % (add_jobs_size, remove_jobs_size,
                                                            jobs_for_this_area.size()+add_jobs_size-remove_jobs_size))
         self.new_jobs["large_area_id"] = concatenate((self.new_jobs["large_area_id"],
                 array(add_jobs_size*[area], dtype="int32")))
         # transform indices of removing jobs into indices of the whole dataset
         self.remove_jobs[last_remove_idx:self.remove_jobs.size] = all_jobs_index[jobs_index[self.remove_jobs[last_remove_idx:self.remove_jobs.size]]]
     self._update_job_set(job_set)
     idx_new_jobs = arange(job_set.size()-self.new_jobs["large_area_id"].size, job_set.size())
     jobs_large_area_ids = job_set.compute_variables("washtenaw.job.large_area_id")
     jobs_large_area_ids[idx_new_jobs] = self.new_jobs["large_area_id"]
     job_set.delete_one_attribute("large_area_id")
     job_set.add_attribute(jobs_large_area_ids, "large_area_id", metadata=AttributeType.PRIMARY)
     # return an index of new jobs
     return arange(job_set.size()-self.new_jobs["large_area_id"].size, job_set.size())  
 def choose_agents_to_move_from_overfilled_locations(self, capacity,
                                                     agent_set, agents_index, agents_locations):
     """Agents with the smallest number of units should move again.
     """
     if capacity is None:
         return array([], dtype='int32')
     index_valid_agents_locations = where(agents_locations > 0)[0]
     valid_agents_locations = agents_locations[index_valid_agents_locations].astype("int32")
     unique_locations = unique(valid_agents_locations).astype("int32")
     index_consider_capacity = self.choice_set.get_id_index(unique_locations)
     capacity_of_affected_locations = capacity[index_consider_capacity]
     overfilled = where(capacity_of_affected_locations < 0)[0]
     movers = array([], dtype='int32')
     indexed_individuals = DatasetSubset(agent_set, agents_index[index_valid_agents_locations])
     ordered_agent_indices = self.get_agents_order(indexed_individuals)
     sizes = indexed_individuals.get_attribute(self.units_full_name)[ordered_agent_indices]
     choice_ids = self.choice_set.get_id_attribute()
     for loc in overfilled:
         agents_to_move = where(valid_agents_locations == choice_ids[index_consider_capacity[loc]])[0]
         if agents_to_move.size > 0:
             n = int(-1*capacity_of_affected_locations[loc])
             this_sizes = sizes[agents_to_move]
             csum = this_sizes[arange(this_sizes.size-1,-1,-1)].cumsum() # ordered increasingly
             csum = csum[arange(csum.size-1, -1,-1)] # ordered back decreasingly
             w = where(csum < n)[0]
             if w.size < agents_to_move.size: #add one more agent in order the cumsum be larger than n
                 w = concatenate((array([agents_to_move.size-w.size-1]), w))
             idx = ordered_agent_indices[agents_to_move[w]]
             movers = concatenate((movers, idx))
     return movers
Exemple #3
0
 def estimate_mu(self):
     iout = -1
     self.values_from_mr = {}
     for quantity in self.observed_data.get_quantity_objects():
         dataset_name = quantity.get_dataset_name()
         variable = quantity.get_variable_name()
         iout += 1
         dimension_reduced = False
         quantity_ids = quantity.get_dataset().get_id_attribute()
         for i in range(self.number_of_runs):
             ds = self._compute_variable_for_one_run(i, variable, dataset_name, self.get_calibration_year(), quantity)
             if isinstance(ds, InteractionDataset):
                 ds = ds.get_flatten_dataset()
             if i == 0: # first run
                 self.mu[iout] = zeros((self.y[iout].size, self.number_of_runs), dtype=float32)
                 ids = ds.get_id_attribute()
             else:
                 if ds.size() > ids.shape[0]:
                     ds = DatasetSubset(ds, ds.get_id_index(ids))
                     dimension_reduced = True
             scale = self.get_scales(ds, i+1, variable)
             matching_index = ds.get_id_index(quantity_ids)
             values = scale[matching_index] * ds.get_attribute(variable)[matching_index]
             self.mu[iout][:,i] = try_transformation(values, quantity.get_transformation())
             
         self.values_from_mr[variable.get_expression()] = self.mu[iout]
         if dimension_reduced:
             self.y[iout] = self.y[iout][quantity.get_dataset().get_id_index(ids)]
 def run(self, location_set, development_event_set, *args, **kwargs):
     changed_indices, processed_development_event_indices = \
                     EventsCoordinator.run(self, location_set, 
                                            development_event_set, *args, **kwargs)
     if development_event_set is not None:
         subset = DatasetSubset(development_event_set, processed_development_event_indices)
         subset.write_dataset(out_storage=AttributeCache())
     return (changed_indices, processed_development_event_indices)                               
    def run(self,
            chunk_specification,
            dataset,
            dataset_index=None,
            result_array_type=float32,
            **kwargs):
        """ 'chunk_specification' - determines number of chunks to use when computing over
                the dataset set.
            'dataset' - an object of class Dataset that is to be chunked.
            'dataset_index' - index of individuals in dataset to be chunked.
            'result_array_type' - type of the resulting array. Can be any numerical type of numpy array.
            **kwargs - keyword arguments.
            The method chunks dataset_index in the desired number of chunks (minimum is 1) and for each chunk it calls the method
            'run_chunk'. The order of the individuals entering the chunking is determined by the method 'get_agents_order'.
        """
        if dataset_index == None:
            dataset_index = arange(dataset.size())
        if not isinstance(dataset_index, ndarray):
            dataset_index = array(dataset_index)
        logger.log_status("Total number of individuals: %s" %
                          dataset_index.size)
        result_array = zeros(dataset_index.size, dtype=result_array_type)

        if dataset_index.size <= 0:
            logger.log_status("Nothing to be done.")
            return result_array

        all_indexed_individuals = DatasetSubset(dataset, dataset_index)
        ordered_agent_indices = self.get_agents_order(
            all_indexed_individuals)  # set order of individuals in chunks

        # TODO: Remove next six lines after we inherit chunk specification as a text string.
        if (chunk_specification is None):
            chunk_specification = {'nchunks': 1}
        chunker = ChunkSpecification(chunk_specification)
        self.number_of_chunks = chunker.nchunks(dataset_index)
        chunksize = int(
            ceil(all_indexed_individuals.size() /
                 float(self.number_of_chunks)))
        for ichunk in range(self.number_of_chunks):
            logger.start_block("%s chunk %d out of %d." %
                               (self.model_short_name,
                                (ichunk + 1), self.number_of_chunks))
            self.index_of_current_chunk = ichunk
            try:
                chunk_agent_indices = ordered_agent_indices[arange(
                    (ichunk * chunksize),
                    min((ichunk + 1) * chunksize,
                        all_indexed_individuals.size()))]
                logger.log_status("Number of agents in this chunk: %s" %
                                  chunk_agent_indices.size)
                result_array[chunk_agent_indices] = self.run_chunk(
                    dataset_index[chunk_agent_indices], dataset,
                    **kwargs).astype(result_array_type)
            finally:
                logger.end_block()

        return result_array
    def run(self, current_year_emme2_dir, current_year, dataset_pool, config=None):
        """Writes to the an emme2 input file in the [current_year_emme2_dir]/tripgen/inputtg/tazdata.ma2.
        """
        
        missing_dataset = ''
        try:
            missing_dataset = 'constant_taz_column'
            taz_col_set = dataset_pool.get_dataset("constant_taz_column")
            taz_col_set.load_dataset()
            missing_dataset = 'zone'
            zone_set = dataset_pool.get_dataset("zone")
            zone_set.load_dataset()
            missing_dataset = 'household'
            household_set = dataset_pool.get_dataset("household")
        except:
            raise Exception("Dataset %s is missing from dataset_pool" % missing_dataset)
        
        """specify travel input file name: [current_year_emme2_dir]/tripgen/inputtg/tazdata.ma2 """
        full_path = os.path.join(current_year_emme2_dir, 'tripgen', 'inputtg')
        if not os.path.exists(full_path):
            os.makedirs(full_path)
        tm_input_file = os.path.join(full_path, 'tazdata.ma2')
        
        tm_year = self._decade_floor(current_year)
        
        logger.log_status("calculating entries for emme2 input file")
        taz_col_set.compute_variables("zone_id=constant_taz_column.taz")
        current_taz_col = DatasetSubset(taz_col_set, index=where(taz_col_set.get_attribute("year")==tm_year)[0])
        
        current_taz_col._id_names = ['taz']
        current_taz_col._create_id_mapping()
        zone_set.join(current_taz_col, "pctmf", join_attribute='zone_id')
        zone_set.join(current_taz_col, "gqi", join_attribute='zone_id')
        zone_set.join(current_taz_col, "gqn", join_attribute='zone_id')
        zone_set.join(current_taz_col, "fteuniv", join_attribute='zone_id')
        zone_set.join(current_taz_col, "den", new_name='density', join_attribute='zone_id')

        value_122 = zeros(zone_set.size())
        index_122 = zone_set.try_get_id_index(array([58,59,60,71,72,73,84,85,86,150,251,266,489,578,687,688,797,868]))
        value_122[index_122[index_122 != -1]] = 1
        zone_set.add_attribute(data=value_122, name="v122")
        
        value_123 = zeros(zone_set.size())
        index_123 = zone_set.try_get_id_index(array([531,646,847,850,888,894,899,910]))
        value_123[index_123[index_123 != -1]] = 1
        zone_set.add_attribute(data=value_123, name="v123")
        
        value_124 = logical_not(value_122 + value_123)
        zone_set.add_attribute(data=value_124, name="v124")
                
        """specify which variables are passing from urbansim to travel model; the order matters"""
        variables_list = self.get_variables_list(dataset_pool)
        
        zone_set.compute_variables(variables_list, dataset_pool=dataset_pool )

        return self._write_to_file(zone_set, variables_list, tm_input_file)
 def run(self,
         year,
         job_set,
         control_totals,
         job_building_types,
         data_objects=None,
         resources=None):
     self._do_initialize_for_run(job_set, job_building_types, data_objects)
     idx = where(control_totals.get_attribute("year") == year)[0]
     self.control_totals_for_this_year = DatasetSubset(control_totals, idx)
     self._do_run_for_this_year(job_set)
     return self._update_job_set(job_set)
Exemple #8
0
    def run( self, vacancy_table, history_table, year, location_set, dataset_pool=None, resources=None ):
        self.dataset_pool=dataset_pool
        building_types = self.dataset_pool.get_dataset('building_type')
        target_vacancy_this_year = DatasetSubset(vacancy_table, index=where(vacancy_table.get_attribute("year")==year)[0])
        building_type_ids = target_vacancy_this_year.get_attribute('building_type_id')
        building_type_idx = building_types.get_id_index(building_type_ids)
        self.used_building_types = DatasetSubset(building_types, index=building_type_idx)
        project_types =  self.used_building_types.get_attribute('building_type_name')
        is_residential = self.used_building_types.get_attribute('is_residential')
        unit_names =  where(is_residential, 'residential_units', 'non_residential_sqft')
        specific_unit_names =  where(is_residential, 'residential_units', '_sqft')
        rates =  target_vacancy_this_year.get_attribute('target_total_vacancy')
        self.project_units = {}
        self.project_specific_units = {}
        target_rates = {}
        for i in range(self.used_building_types.size()):
            self.project_units[project_types[i]] = unit_names[i]
            if is_residential[i]:
                self.project_specific_units[project_types[i]] = specific_unit_names[i]
            else:
                self.project_specific_units[project_types[i]] = "%s%s" % (project_types[i], specific_unit_names[i])
            target_rates[building_type_ids[i]] = rates[i]
            
        self._compute_vacancy_and_total_units_variables(location_set, project_types, resources)
        self.pre_check( location_set, target_vacancy_this_year, project_types)
    
        projects = None
        for project_type_id, target_vacancy_rate in target_rates.iteritems():
            # determine current-year vacancy rates
            project_type = building_types.get_attribute_by_id('building_type_name', project_type_id)
            vacant_units_sum = location_set.get_attribute(self.variable_for_vacancy[project_type]).sum()
            units_sum = float( location_set.get_attribute(self.variable_for_total_units[project_type]).sum() )
            should_develop_units = int(round(max( 0, ( target_vacancy_rate * units_sum - vacant_units_sum ) /
                                         ( 1 - target_vacancy_rate ) )))
            logger.log_status(project_type + ": vacant units: %d, should be vacant: %f, sum units: %d"
                          % (vacant_units_sum, target_vacancy_rate * units_sum, units_sum))

            if not should_develop_units:
                logger.log_note(("Will not build any " + project_type + " units, because the current vacancy of %d units\n"
                             + "is more than the %d units desired for the vacancy rate of %f.")
                            % (vacant_units_sum,
                               target_vacancy_rate * units_sum,
                               target_vacancy_rate))
            #create projects
            if should_develop_units > 0:
                this_project = self._create_projects(should_develop_units, project_type, project_type_id, history_table,
                                                               location_set, units_sum, resources)
                if projects is None:
                    projects = this_project
                else:
                    projects.join_by_rows(this_project, change_ids_if_not_unique=True)
        return projects
    def run( self, model_configuration, vacancy_table, history_table, year, 
             location_set, resources=None):
        large_area_ids = vacancy_table.get_attribute("large_area_id")
        locations_large_area_ids = location_set.compute_variables("washtenaw.%s.large_area_id" % location_set.get_dataset_name())
        unique_large_areas = unique(large_area_ids)
        self._compute_vacancy_variables(location_set, 
                                        model_configuration['development_project_types'], 
                                        resources)

        projects = {}
        for area in unique_large_areas:
            location_index = where(locations_large_area_ids == area)[0]
            locations_for_this_area = DatasetSubset(location_set, location_index)
            logger.log_status("DPLCM for area %s", area)
            target_residential_vacancy_rate, target_non_residential_vacancy_rate = self._get_target_vacancy_rates(vacancy_table, year, area)
            for project_type in model_configuration['development_project_types']:
                # determine current-year vacancy rates
                vacant_units_sum = locations_for_this_area.get_attribute(self.variable_for_vacancy[project_type]).sum()
                units_sum = float( locations_for_this_area.get_attribute(self.units_variable[project_type]).sum() )
                vacant_rate = self.safe_divide(vacant_units_sum, units_sum)
                if model_configuration['development_project_types'][project_type]['residential']:
                    target_vacancy_rate = target_residential_vacancy_rate
                else:
                    target_vacancy_rate = target_non_residential_vacancy_rate
                should_develop_units = int(round(max( 0, ( target_vacancy_rate * units_sum - vacant_units_sum ) /
                                             ( 1 - target_vacancy_rate ) )))
                logger.log_status(project_type + ": vacant units: %d, should be vacant: %f, sum units: %d, will develop: %d"
                          % (vacant_units_sum, target_vacancy_rate * units_sum, units_sum, should_develop_units))
                #create projects
                if should_develop_units > 0:
                    project_dataset = self._create_projects(should_develop_units, project_type, history_table,
                                                                   locations_for_this_area, units_sum, 
                                                                   model_configuration['development_project_types'], 
                                                                   resources)
                    project_dataset.add_attribute(array(project_dataset.size()*[area]), "large_area_id", 
                                                  metadata=AttributeType.PRIMARY)
                    if (project_type not in projects.keys()) or (projects[project_type] is None):
                        projects[project_type] = project_dataset
                    else:
                        projects[project_type].join_by_rows(project_dataset, change_ids_if_not_unique=True)
 
        for project_type in model_configuration['development_project_types']:
            if project_type not in projects.keys():
                projects[project_type] = None
            if projects[project_type] is None:
                size = 0
            else:
                projects[project_type].add_submodel_categories()
                size = projects[project_type].size()
            logger.log_status("%s %s projects to be built" % (size, project_type))  
        return projects
    def run(self, year, household_set, control_totals, characteristics, resources=None):
#        self.person_set = person_set
        self._do_initialize_for_run(household_set)
        control_totals.get_attribute("total_number_of_households") # to make sure they are loaded
        self.characteristics = characteristics
        self.all_categories = self.characteristics.get_attribute("characteristic")
        self.all_categories = array(map(lambda x: x.lower(), self.all_categories))
        self.scaled_characteristic_names = get_distinct_names(self.all_categories).tolist()
        self.marginal_characteristic_names = copy(control_totals.get_id_name())
        index_year = self.marginal_characteristic_names.index("year")
        self.marginal_characteristic_names.remove("year")
        self.marginal_characteristic_names.remove(self.subarea_id_name)
        region_ids = control_totals.get_attribute(self.subarea_id_name)
        households_region_ids = household_set.compute_one_variable_with_unknown_package(variable_name="%s" % (self.subarea_id_name), dataset_pool=self.dataset_pool)

        unique_regions = unique(region_ids)
        is_year = control_totals.get_attribute("year")==year
        all_households_index = arange(household_set.size())
        for area in unique_regions:
            idx = where(logical_and(is_year, region_ids == area))[0]
            self.control_totals_for_this_year = DatasetSubset(control_totals, idx)
            households_index = where(households_region_ids == area)[0]
            if households_index.size == 0:
                continue
            households_for_this_area = DatasetSubset(household_set, households_index)
            logger.log_status("HTM for area %s (currently %s households)" % (area, households_for_this_area.size()))
            last_remove_idx = self.remove_households.size
            last_new_hhs_idx = self.mapping_existing_hhs_to_new_hhs.size
            self._do_run_for_this_year(households_for_this_area)
            add_hhs_size = self.new_households[self.location_id_name].size-self.new_households[self.subarea_id_name].size+self.mapping_existing_hhs_to_new_hhs.size-last_new_hhs_idx
            remove_hhs_size = self.remove_households.size-last_remove_idx
            logger.log_status("add %s, remove %s, total %s" % (add_hhs_size, remove_hhs_size,
                                                               households_for_this_area.size()+add_hhs_size-remove_hhs_size
                                                               ))
            self.new_households[self.subarea_id_name] = concatenate((self.new_households[self.subarea_id_name],
                                            array((self.new_households[self.location_id_name].size-self.new_households[self.subarea_id_name].size)*[area], dtype="int32")))
            # transform indices of removing households into indices of the whole dataset
            self.remove_households[last_remove_idx:self.remove_households.size] = all_households_index[households_index[self.remove_households[last_remove_idx:self.remove_households.size]]]
            # do the same for households to be duplicated
            self.mapping_existing_hhs_to_new_hhs[last_new_hhs_idx:self.mapping_existing_hhs_to_new_hhs.size] = all_households_index[households_index[self.mapping_existing_hhs_to_new_hhs[last_new_hhs_idx:self.mapping_existing_hhs_to_new_hhs.size]]]
            
        self._update_household_set(household_set)
        idx_new_households = arange(household_set.size()-self.new_households[self.subarea_id_name].size, household_set.size())
        #household_region_ids = household_set.compute_variables("urbansim_parcel.household.%s" % self.subarea_id_name)
        #household_region_ids[idx_new_households] = self.new_households[self.subarea_id_name]
        region_ids = household_set.get_attribute(self.subarea_id_name).copy()
        household_set.delete_one_attribute(self.subarea_id_name)
        household_set.add_attribute(region_ids, self.subarea_id_name, metadata=AttributeType.PRIMARY)
        # return an index of new households
        return idx_new_households
Exemple #11
0
 def _convert_lccm_input(self, flt_directory_in, flt_directory_out):
     gc.collect()
     t1 = time()
     lc = LandCoverDataset(in_storage=StorageFactory().get_storage(
         'flt_storage', storage_location=flt_directory_in),
                           out_storage=StorageFactory().get_storage(
                               'flt_storage',
                               storage_location=flt_directory_out))
     #        lc.get_header() # added 23 june 2009 by mm
     mask = lc.get_mask()
     idx = where(mask == 0)[0]
     lcsubset = DatasetSubset(lc, idx)
     print "Converting:"
     lcsubset.write_dataset(attributes=["relative_x"],
                            out_table_name="land_covers")
     lc.delete_one_attribute("relative_x")
     lcsubset.write_dataset(attributes=["relative_y"],
                            out_table_name="land_covers")
     lc.delete_one_attribute("relative_y")
     lc.flush_dataset()
     gc.collect()
     #        lc_names = lc.get_primary_attribute_names()
     for attr in lc.get_primary_attribute_names():
         print "   ", attr
         lcsubset.write_dataset(attributes=[attr],
                                out_table_name="land_covers")
         lc.delete_one_attribute(attr)
     logger.log_status("Data conversion done. " + str(time() - t1) + " s")
    def run(self, year, household_set, control_totals, characteristics, resources=None):
#        self.person_set = person_set
        self._do_initialize_for_run(household_set)
        control_totals.get_attribute("total_number_of_households") # to make sure they are loaded
        self.characteristics = characteristics
        self.all_categories = self.characteristics.get_attribute("characteristic")
        self.all_categories = array(map(lambda x: x.lower(), self.all_categories))
        self.scaled_characteristic_names = get_distinct_names(self.all_categories).tolist()
        self.marginal_characteristic_names = copy(control_totals.get_id_name())
        index_year = self.marginal_characteristic_names.index("year")
        self.marginal_characteristic_names.remove("year")
        self.marginal_characteristic_names.remove(self.subarea_id_name)
        region_ids = control_totals.get_attribute(self.subarea_id_name)
        households_region_ids = household_set.compute_one_variable_with_unknown_package(variable_name="%s" % (self.subarea_id_name), dataset_pool=self.dataset_pool)

        unique_regions = unique(region_ids)
        is_year = control_totals.get_attribute("year")==year
        all_households_index = arange(household_set.size())
        for area in unique_regions:
            idx = where(logical_and(is_year, region_ids == area))[0]
            self.control_totals_for_this_year = DatasetSubset(control_totals, idx)
            households_index = where(households_region_ids == area)[0]
            if households_index.size == 0:
                continue
            households_for_this_area = DatasetSubset(household_set, households_index)
            logger.log_status("HTM for area %s (currently %s households)" % (area, households_for_this_area.size()))
            last_remove_idx = self.remove_households.size
            last_new_hhs_idx = self.mapping_existing_hhs_to_new_hhs.size
            self._do_run_for_this_year(households_for_this_area)
            add_hhs_size = self.new_households[self.location_id_name].size-self.new_households[self.subarea_id_name].size+self.mapping_existing_hhs_to_new_hhs.size-last_new_hhs_idx
            remove_hhs_size = self.remove_households.size-last_remove_idx
            logger.log_status("add %s, remove %s, total %s" % (add_hhs_size, remove_hhs_size,
                                                               households_for_this_area.size()+add_hhs_size-remove_hhs_size
                                                               ))
            self.new_households[self.subarea_id_name] = concatenate((self.new_households[self.subarea_id_name],
                                            array((self.new_households[self.location_id_name].size-self.new_households[self.subarea_id_name].size)*[area], dtype="int32")))
            # transform indices of removing households into indices of the whole dataset
            self.remove_households[last_remove_idx:self.remove_households.size] = all_households_index[households_index[self.remove_households[last_remove_idx:self.remove_households.size]]]
            # do the same for households to be duplicated
            self.mapping_existing_hhs_to_new_hhs[last_new_hhs_idx:self.mapping_existing_hhs_to_new_hhs.size] = all_households_index[households_index[self.mapping_existing_hhs_to_new_hhs[last_new_hhs_idx:self.mapping_existing_hhs_to_new_hhs.size]]]
            
        self._update_household_set(household_set)
        idx_new_households = arange(household_set.size()-self.new_households[self.subarea_id_name].size, household_set.size())
        #household_region_ids = household_set.compute_variables("urbansim_parcel.household.%s" % self.subarea_id_name)
        #household_region_ids[idx_new_households] = self.new_households[self.subarea_id_name]
        region_ids = household_set.get_attribute(self.subarea_id_name).copy()
        household_set.delete_one_attribute(self.subarea_id_name)
        household_set.add_attribute(region_ids, self.subarea_id_name, metadata=AttributeType.PRIMARY)
        # return an index of new households
        return idx_new_households
Exemple #13
0
    def run(self, employment_events, jobs, current_year):
        # select events for the current year
        events_for_this_year = DatasetSubset(
            employment_events,
            index=where(
                employment_events.get_attribute('scheduled_year') ==
                current_year)[0])

        # create control totals on the fly
        control_totals = self.create_control_totals(events_for_this_year,
                                                    jobs,
                                                    year=current_year)

        # run subarea employment transition model in order to create or delete the given number of jobs
        ETM = SubareaEmploymentTransitionModel(
            subarea_id_name=self.location_dataset.get_id_name()[0],
            location_id_name=self._job_location_id_name,
            dataset_pool=self.dataset_pool)
        etm_result = ETM.run(
            current_year, jobs, control_totals,
            self.dataset_pool.get_dataset('job_building_type'))

        self.place_jobs_into_buildings(events_for_this_year, jobs, etm_result)

        return etm_result
 def compute(self, dataset_pool):
     jobs = dataset_pool.get_dataset('job')
     nhb_jobs = DatasetSubset(
         jobs,
         where(jobs.get_attribute('is_home_based_job') == 0)[0])
     return self.get_dataset().sum_dataset_over_ids(
         nhb_jobs, self.job_is_in_employment_sector_group)
    def run(self, chunk_specification, dataset, dataset_index=None, result_array_type=float32, **kwargs):
        """ 'chunk_specification' - determines number of chunks to use when computing over
                the dataset set.
            'dataset' - an object of class Dataset that is to be chunked.
            'dataset_index' - index of individuals in dataset to be chunked.
            'result_array_type' - type of the resulting array. Can be any numerical type of numpy array.
            **kwargs - keyword arguments.
            The method chunks dataset_index in the desired number of chunks (minimum is 1) and for each chunk it calls the method
            'run_chunk'. The order of the individuals entering the chunking is determined by the method 'get_agents_order'.
        """
        if dataset_index==None:
            dataset_index=arange(dataset.size())
        if not isinstance(dataset_index,ndarray):
            dataset_index=array(dataset_index)
        logger.log_status("Total number of individuals: %s" % dataset_index.size)
        result_array = zeros(dataset_index.size, dtype=result_array_type)

        if dataset_index.size <= 0:
            logger.log_status("Nothing to be done.")
            return result_array

        all_indexed_individuals = DatasetSubset(dataset, dataset_index)
        ordered_agent_indices = self.get_agents_order(all_indexed_individuals)# set order of individuals in chunks

        # TODO: Remove next six lines after we inherit chunk specification as a text string.
        if (chunk_specification is None):
            chunk_specification = {'nchunks':1}
        chunker = ChunkSpecification(chunk_specification)
        self.number_of_chunks = chunker.nchunks(dataset_index)
        chunksize = int(ceil(all_indexed_individuals.size()/float(self.number_of_chunks)))
        for ichunk in range(self.number_of_chunks):
            logger.start_block("%s chunk %d out of %d."
                               % (self.model_short_name, (ichunk+1), self.number_of_chunks))
            self.index_of_current_chunk = ichunk
            try:
                chunk_agent_indices = ordered_agent_indices[arange((ichunk*chunksize),
                                                                   min((ichunk+1)*chunksize,
                                                                       all_indexed_individuals.size()))]
                logger.log_status("Number of agents in this chunk: %s" % chunk_agent_indices.size)
                result_array[chunk_agent_indices] = self.run_chunk(dataset_index[chunk_agent_indices],
                                                                   dataset, **kwargs).astype(result_array_type)
            finally:
                logger.end_block()

        return result_array
    def run(self, agent_set, **kwargs):

        large_areas = agent_set.get_attribute(self.large_area_id_name)
        valid_large_area = where(large_areas > 0)[0]
        if valid_large_area.size > 0:
            unique_large_areas = unique(large_areas[valid_large_area])
            cond_array = zeros(agent_set.size(), dtype="bool8")
            cond_array[valid_large_area] = True
            result = array([], dtype="int32")
            for area in unique_large_areas:
                new_index = where(logical_and(cond_array, large_areas == area))[0]
                agent_subset =  DatasetSubset(agent_set, new_index)
                logger.log_status("ARM for area %s (%s agents)" % (area, agent_subset.size()))
                this_result = AgentRelocationModel.run(self, agent_subset, **kwargs)
                result = concatenate((result, new_index[this_result]))
        no_large_area = where(large_areas <= 0)[0]
        result = concatenate((result, no_large_area))
        return result
Exemple #17
0
    def run(self, n=500, 
            realestate_dataset_name = 'building',
            current_year=None,
            **kwargs):

        target_vacancy = self.dataset_pool.get_dataset('target_vacancy')

        if current_year is None:
            year = SimulationState().get_current_time()
        else:
            year = current_year
        self.current_year = year
        this_year_index = where(target_vacancy['year']==year)[0]
        target_vacancy_for_this_year = DatasetSubset(target_vacancy, this_year_index)
        if target_vacancy_for_this_year.size() == 0:
            raise IOError, 'No target vacancy defined for year %s.' % year
        self.all_btypes_size = target_vacancy_for_this_year.size()
        return DevelopmentProjectProposalSamplingModelWithMinimum.run(self, n=n, realestate_dataset_name=realestate_dataset_name,
                                                                      current_year=current_year, **kwargs)
Exemple #18
0
 def get_active_agent_set(self, submodel=None):
     """Return agent set that make choices in the model.
     Works only for the ChoiceModel class.
     """
     agents = self.get_agent_set()
     if submodel is None:
         index = self.get_agent_set_index()
     else:
         index = self.get_agent_set_index_for_submodel(submodel)
     return DatasetSubset(agents, index)
Exemple #19
0
 def get_active_choice_set(self, submodel=None):
     """Return choice set as seen by agents in the model.
     Works only for the ChoiceModel class.
     """
     if submodel is None:
         choices = self.get_choice_set_index()
     else:
         choices = self.get_choice_set_index_for_submodel(submodel)
     choices = unique(choices.flatten())
     ds = self.get_choice_set()
     return DatasetSubset(ds, choices)
Exemple #20
0
    def _write_input_file_1(self, current_year_emme2_dir, input_dir, current_year, dataset_pool, config=None):
        missing_dataset = ''
        try:
            missing_dataset = 'group_quarter'
            taz_col_set = dataset_pool.get_dataset("group_quarter")
            taz_col_set.load_dataset()
            missing_dataset = 'zone'
            zone_set = dataset_pool.get_dataset("zone")
            zone_set.load_dataset()
            missing_dataset = 'household'
            household_set = dataset_pool.get_dataset("household")
        except:
            raise Exception("Dataset %s is missing from dataset_pool" % missing_dataset)
        
        """specify travel input file name """
        if not os.path.exists(input_dir):
            os.makedirs(input_dir)
        tm_input_file = os.path.join(input_dir, 'tazdata.in')
        
        tm_year = self._get_tm_year(current_year, taz_col_set)
        
        logger.log_status("calculating entries for emme%s input file" % self.emme_version)
        taz_col_set.compute_variables("zone_id=group_quarter.taz")
        current_taz_col = DatasetSubset(taz_col_set, index=where(taz_col_set.get_attribute("year")==tm_year)[0])
        
        current_taz_col._id_names = ['taz']
        current_taz_col._create_id_mapping()
        zone_set.join(current_taz_col, "gqdorm", join_attribute='zone_id')
        zone_set.join(current_taz_col, "gqmil", join_attribute='zone_id')
        zone_set.join(current_taz_col, "gqoth", join_attribute='zone_id')
        zone_set.join(current_taz_col, "fteuniv", join_attribute='zone_id')
              
        """specify which variables are passing from urbansim to travel model; the order matters"""
        variables_list = self.get_variables_list(dataset_pool)
        
        zone_set.compute_variables(variables_list, dataset_pool=dataset_pool )

        return self._write_to_file(zone_set, variables_list, tm_input_file, tm_year)
 def run(self, year, household_set, control_totals, characteristics, resources=None):
     self._do_initialize_for_run(household_set)
     control_totals.get_attribute("total_number_of_households") # to make sure they are loaded
     self.characteristics = characteristics
     self.all_categories = self.characteristics.get_attribute("characteristic")
     self.all_categories = array(map(lambda x: x.lower(), self.all_categories))
     self.scaled_characteristic_names = get_distinct_names(self.all_categories).tolist()
     self.marginal_characteristic_names = copy(control_totals.get_id_name())
     index_year = self.marginal_characteristic_names.index("year")
     self.marginal_characteristic_names.remove("year")
     idx = where(control_totals.get_attribute("year")==year)[0]
     self.control_totals_for_this_year = DatasetSubset(control_totals, idx)
     self._do_run_for_this_year(household_set)
     return self._update_household_set(household_set)
Exemple #22
0
    def _do_run(self, location_set, agent_set, agents_index, data_objects=None, resources=None):
        location_id_name = location_set.get_id_name()[0]
        jobsubset = DatasetSubset(agent_set, agents_index)
        if jobsubset.size() <= 0:
            return array([], dtype='int32')
        #unplace jobs
        agent_set.set_values_of_one_attribute(location_id_name, 
                                              resize(array([-1.0]), jobsubset.size()), agents_index)
        sector_ids = jobsubset.get_attribute("sector_id")
        sectors = unique(sector_ids)
        counts = ndimage_sum(ones((jobsubset.size(),)), labels=sector_ids.astype('int32'), index=sectors.astype('int32'))
        if sectors.size <=1 :
            counts = array([counts])
        variables = map(lambda x: "number_of_jobs_of_sector_"+str(int(x)), sectors)
        compute_variables = map(lambda var: self.variable_package + "." + 
            location_set.get_dataset_name()+ "." + var, variables)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.dataset_pool.add_datasets_if_not_included({agent_set.get_dataset_name():agent_set})
        location_set.compute_variables(compute_variables, dataset_pool=self.dataset_pool)
        if self.filter is None:
            location_index = arange(location_set.size())
        else:
            filter_values = location_set.compute_variables([self.filter], dataset_pool=self.dataset_pool)
            location_index = where(filter_values > 0)[0]
        if location_index.size <= 0:
            logger.log_status("No locations available. Nothing to be done.")
            return array([])
        location_subset = DatasetSubset(location_set, location_index)
        i=0
        for sector in sectors:
            distr = location_subset.get_attribute(variables[i])
            if ma.allclose(distr.sum(), 0):
                uniform_prob = 1.0/distr.size
                distr = resize(array([uniform_prob], dtype='float64'), distr.size)
                logger.log_warning("Probabilities in scaling model for sector " + str(sector) + " sum to 0.0.  Substituting uniform distribution!")
#                random_sample = sample(location_set.get_attribute("grid_id"), k=int(counts[i]), \
#                                   probabilities = distr)
            distr = distr/float(distr.sum())
            random_sample = probsample_replace(location_subset.get_id_attribute(), size=int(counts[i]), 
                                       prob_array=distr)
            idx = where(sector_ids == sector)[0]
            #modify job locations
            agent_set.set_values_of_one_attribute(location_id_name, random_sample, agents_index[idx])
            i+=1
        return agent_set.get_attribute_by_index(location_id_name, agents_index)
    def run(self, year=None,
            dataset_pool=None,  **kwargs):
        """
        """
        if dataset_pool is None:
            dataset_pool = SessionConfiguration().get_dataset_pool()

        if year is None:
            year = SimulationState().get_current_time()
        
        this_year_index = where(self.scheduled_events.get_attribute('year')==year)[0]
        scheduled_events_for_this_year = DatasetSubset(self.scheduled_events, this_year_index)
        scheduled_events_for_this_year.load_dataset_if_not_loaded()
        column_names = list(set( self.scheduled_events.get_known_attribute_names() ) - set( [ 'year', 'action', 'attribute', 'amount', 'event_id', '_hidden_id_'] ))
        column_names.sort()
#        column_values = dict([ (name, scheduled_events_for_this_year.get_attribute(name)) for name in column_names])
        
        for index in range(scheduled_events_for_this_year.size()):
            indicator = ones( self.dataset.size(), dtype='bool' )
            event_attr = {}
            for attribute in column_names:
                if attribute in self.dataset.get_known_attribute_names():
                    dataset_attribute = self.dataset.get_attribute(attribute)
                else:
                    ## this is done inside the loop because some action may delete computed attributes, such as dataset.add_elements()
                    try:
                        dataset_attribute = self.dataset.compute_one_variable_with_unknown_package(attribute, dataset_pool=dataset_pool)
                    except:
                        raise ValueError, "attribute %s used in scheduled events dataset can not be found in dataset %s" % (attribute, self.dataset.get_dataset_name())
                
#                if attribute in column_names: 
                aval = scheduled_events_for_this_year.get_attribute(attribute)[index]
                if aval == -1:
                    continue    # ignore if column value is -1
                else:
                    indicator *= dataset_attribute == aval
                    event_attr.update({attribute:aval})
            
            #agents in dataset satisfying all conditions are identified by indicator
            legit_index = where(indicator)[0]
            
            this_event = scheduled_events_for_this_year.get_data_element(index)
            if not hasattr(this_event, 'attribute'):
                action_attr_name = ''
            else:
                action_attr_name = this_event.attribute
            action_function = getattr(self, '_' + this_event.action.strip().lower())
            action_function( amount=this_event.amount,
                             attribute=action_attr_name,
                             dataset=self.dataset, 
                             index=legit_index,
                             data_dict=event_attr )
            
            self.post_run(self.dataset, legit_index, **kwargs)

        return self.dataset
    def _convert_lccm_input(self, flt_directory_in, flt_directory_out):
        gc.collect()
        t1 = time()
        lc =  LandCoverDataset(in_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_directory_in), 
            out_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_directory_out))
#        lc.get_header() # added 23 june 2009 by mm
        mask = lc.get_mask()
        idx = where(mask==0)[0]
        lcsubset = DatasetSubset(lc, idx)
        print "Converting:"
        lcsubset.write_dataset(attributes=["relative_x"], out_table_name="land_covers")
        lc.delete_one_attribute("relative_x")
        lcsubset.write_dataset(attributes=["relative_y"], out_table_name="land_covers")
        lc.delete_one_attribute("relative_y")
        lc.flush_dataset()
        gc.collect()
#        lc_names = lc.get_primary_attribute_names()
        for attr in lc.get_primary_attribute_names():
            print "   ", attr
            lcsubset.write_dataset(attributes=[attr], out_table_name="land_covers")
            lc.delete_one_attribute(attr)
        logger.log_status("Data conversion done. " + str(time()-t1) + " s")
 def _do_run(self, location_set, agent_set, agents_index, resources=None):
     location_id_name = location_set.get_id_name()[0]
     asubset = DatasetSubset(agent_set, agents_index)
     if asubset.size() <= 0:
         return array([], dtype='int32')
     #unplace agents
     agent_set.modify_attribute(location_id_name, 
                             resize(array([-1]), asubset.size()), agents_index)
     if self.filter is None:
         location_index = arange(location_set.size())
     else:
         filter_values = location_set.compute_variables([self.filter], dataset_pool=self.dataset_pool)
         location_index = where(filter_values > 0)[0]
     if location_index.size <= 0:
         logger.log_status("No locations available. Nothing to be done.")
         return array([])
     
     location_subset = DatasetSubset(location_set, location_index)
     if self.consider_capacity:
         location_set.compute_variables([self.capacity_attribute], 
                                        dataset_pool=self.dataset_pool)
         weights = location_subset[self.capacity_attribute]
         if self.number_of_agents_attribute is not None:
             location_set.compute_variables([self.number_of_agents_attribute], 
                                        dataset_pool=self.dataset_pool)
             weights = clip(weights - location_subset[self.number_of_agents_attribute],
                                        0, location_subset[self.capacity_attribute])
     else:
         weights = ones(location_subset.size())
     
     if weights.sum() <=0:
         logger.log_status("Locations' capacity sums to zero. Nothing to be done.")
         return array([])        
     distr = weights/float(weights.sum())
     random_sample = probsample_replace(location_subset.get_id_attribute(), size=asubset.size(), 
                                    prob_array=distr)
     agent_set.modify_attribute(location_id_name, random_sample, agents_index)
     return agent_set.get_attribute_by_index(location_id_name, agents_index)
class HouseholdTransitionModel(Model):
    """Creates and removes households from household_set. New households are duplicated from the existing households, keeping 
       the joint distribution of all characteristics. 
    """

    model_name = "Household Transition Model"

    def __init__(self, location_id_name="grid_id", dataset_pool=None, debuglevel=0):
        self.debug = DebugPrinter(debuglevel)
        self.location_id_name = location_id_name
        self.dataset_pool = self.create_dataset_pool(dataset_pool, ["urbansim", "opus_core"])

    def run(self, year, household_set, control_totals, characteristics, resources=None):
        self._do_initialize_for_run(household_set)
        control_totals.get_attribute("total_number_of_households") # to make sure they are loaded
        self.characteristics = characteristics
        self.all_categories = self.characteristics.get_attribute("characteristic")
        self.all_categories = array(map(lambda x: x.lower(), self.all_categories))
        self.scaled_characteristic_names = get_distinct_names(self.all_categories).tolist()
        self.marginal_characteristic_names = copy(control_totals.get_id_name())
        index_year = self.marginal_characteristic_names.index("year")
        self.marginal_characteristic_names.remove("year")
        idx = where(control_totals.get_attribute("year")==year)[0]
        self.control_totals_for_this_year = DatasetSubset(control_totals, idx)
        self._do_run_for_this_year(household_set)
        return self._update_household_set(household_set)
        
    def _update_household_set(self, household_set):
        index_of_duplicated_hhs = household_set.duplicate_rows(self.mapping_existing_hhs_to_new_hhs)
        household_set.modify_attribute(name=self.location_id_name, data=-1 * ones((index_of_duplicated_hhs.size,), 
                                                                              dtype=household_set.get_data_type(self.location_id_name)),
                                                                          index=index_of_duplicated_hhs)
        household_set.remove_elements(self.remove_households)
        if self.new_households[self.location_id_name].size > 0:
            max_id = household_set.get_id_attribute().max()
            self.new_households[self.household_id_name]=concatenate((self.new_households[self.household_id_name],
                                                             arange(max_id+1, max_id+self.new_households[self.location_id_name].size+1)))
            household_set.add_elements(self.new_households, require_all_attributes=False)

        difference = household_set.size()-self.household_size
        self.debug.print_debug("Difference in number of households: %s"
            " (original %s, new %s, created %s, deleted %s)"
                % (difference,
                   self.household_size,
                   household_set.size(),
                   self.new_households[self.household_id_name].size + self.mapping_existing_hhs_to_new_hhs.size,
                   self.remove_households.size),
            3)
        if self.location_id_name in household_set.get_attribute_names():
            self.debug.print_debug("Number of unplaced households: %s"
                % where(household_set.get_attribute(self.location_id_name) <=0)[0].size,
                3)
        return difference

    def _do_initialize_for_run(self, household_set):
        self.household_id_name = household_set.get_id_name()[0]
        self.new_households = {
           self.location_id_name:array([], dtype=household_set.get_data_type(self.location_id_name, int32)),
           self.household_id_name:array([], dtype=household_set.get_data_type(self.household_id_name, int32))
                   }
        self.remove_households = array([], dtype='int32')
        self.household_size = household_set.size()
        self.max_id = household_set.get_id_attribute().max()
        self.arrays_from_categories = {}
        self.arrays_from_categories_mapping = {}
        self.mapping_existing_hhs_to_new_hhs = array([], dtype=household_set.get_data_type(self.household_id_name, int32))
        
    def _do_run_for_this_year(self, household_set):
        self.household_set = household_set
        groups = self.control_totals_for_this_year.get_id_attribute()
        self.create_arrays_from_categories(self.household_set)

        all_characteristics = self.arrays_from_categories.keys()
        self.household_set.load_dataset_if_not_loaded(attributes = all_characteristics) # prevents from lazy loading to save runtime
        idx_shape = []
        number_of_combinations=1
        num_attributes=len(all_characteristics)
        for iattr in range(num_attributes):
            attr = all_characteristics[iattr]
            max_bins = self.arrays_from_categories[attr].max()+1
            idx_shape.append(max_bins)
            number_of_combinations=number_of_combinations*max_bins
            if attr not in self.new_households.keys():
                self.new_households[attr] = array([], dtype=self.household_set.get_data_type(attr, float32))

        self.number_of_combinations = int(number_of_combinations)
        idx_tmp = indices(tuple(idx_shape))
        
        categories_index = zeros((self.number_of_combinations,num_attributes))

        for i in range(num_attributes): #create indices of all combinations
            categories_index[:,i] = idx_tmp[i].ravel()

        categories_index_mapping = {}
        for i in range(self.number_of_combinations):
            categories_index_mapping[tuple(categories_index[i,].tolist())] = i

        def get_category(values):
            bins = map(lambda x, y: self.arrays_from_categories[x][int(y)], all_characteristics, values)
            try:
                return categories_index_mapping[tuple(bins)]
            except KeyError, msg: 
                where_error = where(array(bins) == -1)[0]
                if where_error.size > 0:
                    raise KeyError, \
                        "Invalid value of %s for attribute %s. It is not included in the characteristics groups." % (
                                                                               array(values)[where_error], 
                                                                               array(all_characteristics)[where_error])
                raise KeyError, msg

        if num_attributes > 0:
            # the next array must be a copy of the household values, otherwise, it changes the original values
            values_array = reshape(array(self.household_set.get_attribute(all_characteristics[0])), (self.household_set.size(),1))
            if num_attributes > 1:
                for attr in all_characteristics[1:]:
                    values_array = concatenate((values_array, reshape(array(self.household_set.get_attribute(attr)),
                                                                      (self.household_set.size(),1))), axis=1)
            for i in range(values_array.shape[1]):
                if values_array[:,i].max() > 10000:
                    values_array[:,i] = values_array[:,i]/10
                values_array[:,i] = clip(values_array[:,i], 0, self.arrays_from_categories[all_characteristics[i]].size-1)
    
            # determine for each household to what category it belongs to
            self.household_categories = array(map(lambda x: get_category(x), values_array)) # performance bottleneck
    
            number_of_households_in_categories = array(ndimage_sum(ones((self.household_categories.size,)),
                                                                    labels=self.household_categories+1,
                                                                    index = arange(self.number_of_combinations)+1))
        else:
            # no marginal characteristics; consider just one group
            self.household_categories = zeros(self.household_set.size(), dtype='int32')
            number_of_households_in_categories = array([self.household_set.size()])

        g=arange(num_attributes)

        #iterate over marginal characteristics
        for group in groups:
            if groups.ndim <= 1: # there is only one group (no marginal char.)
                id = group
            else:
                id = tuple(group.tolist())
            group_element = self.control_totals_for_this_year.get_data_element_by_id(id)
            total = group_element.total_number_of_households
            for i in range(g.size):
                g[i] = eval("group_element."+self.arrays_from_categories.keys()[i])
            if g.size <= 0:
                l = ones((number_of_households_in_categories.size,))
            else:
                l = categories_index[:,0] == g[0]
                for i in range(1,num_attributes):
                    l = logical_and(l, categories_index[:,i] == g[i])
            # l has 1's for combinations of this group
            number_in_group = array(ndimage_sum(number_of_households_in_categories, labels=l, index = 1))
            diff = int(total - number_in_group)
            if diff < 0: # households to be removed
                is_in_group = l[self.household_categories]
                w = where(is_in_group)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(self.household_set, w, -1*diff,
                                                          self.location_id_name)
                self.remove_households = concatenate((self.remove_households, non_placed, sample_noreplace(sample_array,
                                                                                   max(0,abs(diff)-size_non_placed))))
            if diff > 0: # households to be created
                self._create_households(diff, l)
    def run(self,
            n=500,
            realestate_dataset_name='building',
            current_year=None,
            occupied_spaces_variable="occupied_spaces",
            total_spaces_variable="total_spaces",
            run_config=None,
            debuglevel=0):
        """
        run method of the Development Project Proposal Sampling Model
        
        **Parameters**
        
            **n** : int, sample size for each iteration
                   
                   sample n proposals at a time, which are then evaluated one by one until the 
                   target vacancies are satisfied or proposals are running out
                   
            **realestate_dataset_name** : string, name of real estate dataset
            
            **current_year**: int, simulation year. If None, get value from SimulationState
            
            **occupied_spaces_variable** : string, variable name for calculating how much spaces are currently occupied
                                        
                                          It can either be a variable for real_estate dataset that returns 
                                          the amount spaces being occupied or a target_vacancy attribute 
                                          that contains the name of real_estate variables.   
            
            **total_spaces_variable** : string, variable name for calculating total existing spaces
            
        **Returns**
        
            **proposal_set** : indices to proposal_set that are accepted 
            
            **demolished_buildings** : buildings to be demolished for re-development
        """

        self.accepted_proposals = []
        self.demolished_buildings = []  #id of buildings to be demolished
        if self.proposal_set.n <= 0:
            logger.log_status(
                "The size of proposal_set is 0; no proposals to consider, skipping DPPSM."
            )
            return (self.proposal_set, self.demolished_buildings)

        target_vacancy = self.dataset_pool.get_dataset('target_vacancy')

        if current_year is None:
            year = SimulationState().get_current_time()
        else:
            year = current_year
        this_year_index = where(target_vacancy['year'] == year)[0]
        target_vacancy_for_this_year = DatasetSubset(target_vacancy,
                                                     this_year_index)
        if target_vacancy_for_this_year.size() == 0:
            raise IOError, 'No target vacancy defined for year %s.' % year

        ## current_target_vacancy.target_attribute_name = 'target_vacancy_rate'
        ## each column provides a category for which a target vacancy is specified
        self.column_names = list(set( target_vacancy.get_known_attribute_names() ) - \
                            set( [ target_vacancy.target_attribute_name,
                                   'year', '_hidden_id_',
                                   occupied_spaces_variable, total_spaces_variable
                                   ] )
                            )
        self.column_names.sort(reverse=True)

        ## buildings table provides existing stocks
        self.realestate_dataset = self.dataset_pool.get_dataset(
            realestate_dataset_name)

        occupied_spaces_variables = [occupied_spaces_variable]
        total_spaces_variables = [total_spaces_variable]
        if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(
        ):
            occupied_spaces_variables += unique(
                target_vacancy_for_this_year[occupied_spaces_variable]).tolist(
                )
        if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(
        ):
            total_spaces_variables += unique(
                target_vacancy_for_this_year[total_spaces_variable]).tolist()

        self._compute_variables_for_dataset_if_needed(
            self.realestate_dataset, self.column_names +
            occupied_spaces_variables + total_spaces_variables)
        self._compute_variables_for_dataset_if_needed(
            self.proposal_component_set,
            self.column_names + total_spaces_variables)
        self.proposal_set.compute_variables(
            "urbansim_parcel.development_project_proposal.number_of_components",
            dataset_pool=self.dataset_pool)

        n_column = len(self.column_names)
        target_vacancy_for_this_year.column_values = target_vacancy_for_this_year.get_multiple_attributes(
            self.column_names).reshape((-1, n_column))
        self.realestate_dataset.column_values = self.realestate_dataset.get_multiple_attributes(
            self.column_names).reshape((-1, n_column))
        self.proposal_component_set.column_values = self.proposal_component_set.get_multiple_attributes(
            self.column_names).reshape((-1, n_column))
        #defaults, can be changed later by spaces_variable specified in target_vacancy rates
        self.realestate_dataset.total_spaces = self.realestate_dataset[
            total_spaces_variable]
        self.proposal_component_set.total_spaces = self.proposal_component_set[
            total_spaces_variable]
        self.realestate_dataset.occupied_spaces = self.realestate_dataset[
            occupied_spaces_variable]

        self.accounting = {}
        self.logging = {}
        #has_needed_components = zeros(self.proposal_set.size(), dtype='bool')
        for index in range(target_vacancy_for_this_year.size()):
            column_value = tuple(
                target_vacancy_for_this_year.column_values[index, :].tolist())
            accounting = {
                'target_vacancy':
                target_vacancy_for_this_year[
                    target_vacancy.target_attribute_name][index]
            }

            realestate_indexes = self.get_index_by_condition(
                self.realestate_dataset.column_values, column_value)
            component_indexes = self.get_index_by_condition(
                self.proposal_component_set.column_values, column_value)

            this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable
            ## total/occupied_spaces_variable can be specified either as a universal name for all realestate
            ## or in targe_vacancy_rate dataset for each vacancy category
            if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(
            ):
                this_occupied_spaces_variable = target_vacancy_for_this_year[
                    occupied_spaces_variable][index]
                self.realestate_dataset.occupied_spaces[realestate_indexes] = (
                    self.realestate_dataset[this_occupied_spaces_variable]
                    [realestate_indexes]).astype(
                        self.realestate_dataset.occupied_spaces.dtype)

            if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names(
            ):
                this_total_spaces_variable = target_vacancy_for_this_year[
                    total_spaces_variable][index]
                self.realestate_dataset.total_spaces[realestate_indexes] = (
                    self.realestate_dataset[this_total_spaces_variable]
                    [realestate_indexes]).astype(
                        self.realestate_dataset.total_spaces.dtype)
                self.proposal_component_set.total_spaces[component_indexes] = (
                    self.proposal_component_set[this_total_spaces_variable]
                    [component_indexes]).astype(
                        self.proposal_component_set.total_spaces.dtype)

            accounting["total_spaces_variable"] = this_total_spaces_variable
            accounting["total_spaces"] = self.realestate_dataset.total_spaces[
                realestate_indexes].sum()
            accounting[
                "occupied_spaces_variable"] = this_occupied_spaces_variable
            accounting[
                "occupied_spaces"] = self.realestate_dataset.occupied_spaces[
                    realestate_indexes].sum()
            accounting["target_spaces"] = int( round( accounting["occupied_spaces"] /\
                                                     (1 - accounting["target_vacancy"])
                                               ) )
            accounting["proposed_spaces"] = 0
            accounting["demolished_spaces"] = 0

            self.accounting[column_value] = accounting

            if self._is_target_reached(column_value):
                proposal_indexes = self.proposal_set.get_id_index(
                    unique(self.proposal_component_set['proposal_id']
                           [component_indexes]))
                single_component_indexes = where(
                    self.proposal_set["number_of_components"] == 1)[0]
                self.weight[intersect1d(proposal_indexes,
                                        single_component_indexes)] = 0.0

        ## handle planned proposals: all proposals with status_id == is_planned
        ## and start_year == year are accepted
        planned_proposal_indexes = where(
            logical_and(
                self.proposal_set.get_attribute("status_id") ==
                self.proposal_set.id_planned,
                self.proposal_set.get_attribute("start_year") == year))[0]

        self.consider_proposals(planned_proposal_indexes, force_accepting=True)

        # consider proposals (in this order: proposed, tentative)
        for status in [
                self.proposal_set.id_proposed, self.proposal_set.id_tentative
        ]:
            stat = (self.proposal_set.get_attribute("status_id") == status)
            if stat.sum() == 0:
                continue

            logger.log_status(
                "Sampling from %s eligible proposals of status %s." %
                (stat.sum(), status))
            iteration = 0
            while (not self._is_target_reached()):
                ## prevent proposals from being sampled for vacancy type whose target is reached
                #for column_value in self.accounting.keys():

                if self.weight[stat].sum() == 0.0:
                    logger.log_warning(
                        "Running out of proposals of status %s before vacancy targets are reached; there aren't any proposals with non-zero weight"
                        % status)
                    break

                available_indexes = where(logical_and(stat,
                                                      self.weight > 0))[0]
                sample_size = minimum(available_indexes.size, n)
                sampled_proposal_indexes = probsample_noreplace(
                    available_indexes,
                    sample_size,
                    prob_array=self.weight[available_indexes],
                    return_index=False)

                self.consider_proposals(sampled_proposal_indexes)
                self.weight[sampled_proposal_indexes] = 0
                #sample_size = 1
                #sampled_proposal_index = probsample_noreplace(available_indexes, sample_size,
                #prob_array=self.weight[available_indexes],
                #return_index=False)

                #self.consider_proposal(sampled_proposal_index)

                #self.weight[sampled_proposal_index] = 0
                iteration += 1

        self._log_status()

        # set status of accepted proposals to 'active'
        self.proposal_set.modify_attribute(name="status_id",
                                           data=self.proposal_set.id_active,
                                           index=array(self.accepted_proposals,
                                                       dtype='int32'))

        # Code added by Jesse Ayers, MAG, 7/20/2009
        # Get the active projects:
        stat_id = self.proposal_set.get_attribute('status_id')
        actv = where(stat_id == 1)[0]
        # Where there are active projects, compute the total_land_area_taken
        # and store it on the development_project_proposals dataset
        # so it can be used by the building_construction_model for the proper
        # computation of units_proposed for those projects with velocity curves
        if actv.size > 0:
            total_land_area_taken_computed = self.proposal_set.get_attribute(
                'urbansim_parcel.development_project_proposal.land_area_taken')
            self.proposal_set.modify_attribute(
                'total_land_area_taken', total_land_area_taken_computed[actv],
                actv)

        return (self.proposal_set, self.realestate_dataset.get_id_attribute()[
            self.demolished_buildings])
    def run(self, year=None, dataset_pool=None, **kwargs):
        """
        """
        if dataset_pool is None:
            dataset_pool = SessionConfiguration().get_dataset_pool()

        if year is None:
            year = SimulationState().get_current_time()

        this_year_index = where(
            self.scheduled_events.get_attribute('year') == year)[0]
        scheduled_events_for_this_year = DatasetSubset(self.scheduled_events,
                                                       this_year_index)
        scheduled_events_for_this_year.load_dataset_if_not_loaded()
        column_names = list(
            set(self.scheduled_events.get_known_attribute_names()) - set([
                'year', 'action', 'attribute', 'amount', 'event_id',
                '_hidden_id_'
            ]))
        column_names.sort()
        #        column_values = dict([ (name, scheduled_events_for_this_year.get_attribute(name)) for name in column_names])

        for index in range(scheduled_events_for_this_year.size()):
            indicator = ones(self.dataset.size(), dtype='bool')
            event_attr = {}
            for attribute in column_names:
                if attribute in self.dataset.get_known_attribute_names():
                    dataset_attribute = self.dataset.get_attribute(attribute)
                else:
                    ## this is done inside the loop because some action may delete computed attributes, such as dataset.add_elements()
                    try:
                        dataset_attribute = self.dataset.compute_one_variable_with_unknown_package(
                            attribute, dataset_pool=dataset_pool)
                    except:
                        raise ValueError, "attribute %s used in scheduled events dataset can not be found in dataset %s" % (
                            attribute, self.dataset.get_dataset_name())


#                if attribute in column_names:
                aval = scheduled_events_for_this_year.get_attribute(
                    attribute)[index]
                if aval == -1:
                    continue  # ignore if column value is -1
                else:
                    indicator *= dataset_attribute == aval
                    event_attr.update({attribute: aval})

            #agents in dataset satisfying all conditions are identified by indicator
            legit_index = where(indicator)[0]

            this_event = scheduled_events_for_this_year.get_data_element(index)
            if not hasattr(this_event, 'attribute'):
                action_attr_name = ''
            else:
                action_attr_name = this_event.attribute
            action_function = getattr(self,
                                      '_' + this_event.action.strip().lower())
            action_function(amount=this_event.amount,
                            attribute=action_attr_name,
                            dataset=self.dataset,
                            index=legit_index,
                            data_dict=event_attr)

            self.post_run(self.dataset, legit_index, **kwargs)

        return self.dataset
Exemple #29
0
    def run(self, n=500, 
            realestate_dataset_name = 'building',
            current_year=None,
            occupied_spaces_variable="occupied_spaces",
            total_spaces_variable="total_spaces",
            minimum_spaces_attribute="minimum_spaces",
            within_parcel_selection_weight_string=None,
            within_parcel_selection_n=0,
            within_parcel_selection_compete_among_types=False,
            within_parcel_selection_threshold=75,
            within_parcel_selection_MU_same_weight=False,
            within_parcel_selection_transpose_interpcl_weight=True,
            run_config=None,
            debuglevel=0):
        """
        run method of the Development Project Proposal Sampling Model
        
        **Parameters**
        
            **n** : int, sample size for each iteration
                   
                   sample n proposals at a time, which are then evaluated one by one until the 
                   target vacancies are satisfied or proposals are running out
                   
            **realestate_dataset_name** : string, name of real estate dataset
            
            **current_year**: int, simulation year. If None, get value from SimulationState
            
            **occupied_spaces_variable** : string, variable name for calculating how much spaces are currently occupied
                                        
                                          It can either be a variable for real_estate dataset that returns 
                                          the amount spaces being occupied or a target_vacancy attribute 
                                          that contains the name of real_estate variables.   
            
            **total_spaces_variable** : string, variable name for calculating total existing spaces
            
        **Returns**
        
            **proposal_set** : indices to proposal_set that are accepted 
            
            **demolished_buildings** : buildings to be demolished for re-development
        """

        self.accepted_proposals = []
        self.demolished_buildings = []  #id of buildings to be demolished

        if self.proposal_set.n <= 0:
            logger.log_status("The size of proposal_set is 0; no proposals to consider, skipping DPPSM.")
            return (self.proposal_set, self.demolished_buildings)

        target_vacancy = self.dataset_pool.get_dataset('target_vacancy')

        if current_year is None:
            year = SimulationState().get_current_time()
        else:
            year = current_year
        this_year_index = where(target_vacancy['year']==year)[0]
        target_vacancy_for_this_year = DatasetSubset(target_vacancy, this_year_index)
        if target_vacancy_for_this_year.size() == 0:
            raise IOError, 'No target vacancy defined for year %s.' % year
        
        ## current_target_vacancy.target_attribute_name = 'target_vacancy_rate'
        ## each column provides a category for which a target vacancy is specified
        self.column_names = list(set( target_vacancy.get_known_attribute_names() ) - \
                            set( [ target_vacancy.target_attribute_name, 
                                   'year', '_hidden_id_', minimum_spaces_attribute,
                                   occupied_spaces_variable, total_spaces_variable
                                   ] )
                            )
        self.column_names.sort(reverse=True)
            
        ## buildings table provides existing stocks
        self.realestate_dataset = self.dataset_pool.get_dataset(realestate_dataset_name)
        
        occupied_spaces_variables = [occupied_spaces_variable]
        total_spaces_variables = [total_spaces_variable]
        if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
            occupied_spaces_variables += unique(target_vacancy_for_this_year[occupied_spaces_variable]).tolist()
        if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
            total_spaces_variables += unique(target_vacancy_for_this_year[total_spaces_variable]).tolist()
            
        self._compute_variables_for_dataset_if_needed(self.realestate_dataset, self.column_names + occupied_spaces_variables + total_spaces_variables)
        self._compute_variables_for_dataset_if_needed(self.proposal_component_set, self.column_names + total_spaces_variables)
        self.proposal_set.compute_variables(["urbansim_parcel.development_project_proposal.number_of_components", 
                                             "urbansim_parcel.development_project_proposal.land_area_taken"],
                                            dataset_pool=self.dataset_pool)
        
        n_column = len(self.column_names)
        self.column_names_index = {}
        for iname in range(n_column):
            self.column_names_index[self.column_names[iname]] = iname
 
        target_vacancy_for_this_year.column_values = target_vacancy_for_this_year.get_multiple_attributes(self.column_names).reshape((-1, n_column))
        self.realestate_dataset.column_values = self.realestate_dataset.get_multiple_attributes(self.column_names).reshape((-1, n_column))
        self.proposal_component_set.column_values = self.proposal_component_set.get_multiple_attributes(self.column_names).reshape((-1, n_column))
        #defaults, can be changed later by spaces_variable specified in target_vacancy rates
        self.realestate_dataset.total_spaces = self.realestate_dataset[total_spaces_variable]
        self.proposal_component_set.total_spaces = self.proposal_component_set[total_spaces_variable]
        self.realestate_dataset.occupied_spaces = self.realestate_dataset[occupied_spaces_variable]
        
        self.accounting = {}; self.logging = {}
        #has_needed_components = zeros(self.proposal_set.size(), dtype='bool')
        for index in range(target_vacancy_for_this_year.size()):
            column_value = tuple(target_vacancy_for_this_year.column_values[index,:].tolist())
            accounting = {'target_vacancy': target_vacancy_for_this_year[target_vacancy.target_attribute_name][index]}
            if minimum_spaces_attribute in target_vacancy_for_this_year.get_known_attribute_names():
                accounting['minimum_spaces'] = target_vacancy_for_this_year[minimum_spaces_attribute][index]
            realestate_indexes = self.get_index_by_condition(self.realestate_dataset.column_values, column_value)
            component_indexes = self.get_index_by_condition(self.proposal_component_set.column_values, column_value)
            
            this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable
            ## total/occupied_spaces_variable can be specified either as a universal name for all realestate
            ## or in targe_vacancy_rate dataset for each vacancy category
            if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_occupied_spaces_variable = target_vacancy_for_this_year[occupied_spaces_variable][index]
                self.realestate_dataset.occupied_spaces[realestate_indexes] = (self.realestate_dataset[this_occupied_spaces_variable][realestate_indexes]
                                                                               ).astype(self.realestate_dataset.occupied_spaces.dtype)
    
            if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_total_spaces_variable = target_vacancy_for_this_year[total_spaces_variable][index]    
                self.realestate_dataset.total_spaces[realestate_indexes] = (self.realestate_dataset[this_total_spaces_variable][realestate_indexes]
                                                                            ).astype(self.realestate_dataset.total_spaces.dtype)
                self.proposal_component_set.total_spaces[component_indexes] = (self.proposal_component_set[this_total_spaces_variable][component_indexes]
                                                                               ).astype(self.proposal_component_set.total_spaces.dtype)
                
            accounting["total_spaces_variable"] = this_total_spaces_variable
            accounting["total_spaces"] = self.realestate_dataset.total_spaces[realestate_indexes].sum()
            accounting["occupied_spaces_variable"] = this_occupied_spaces_variable
            accounting["occupied_spaces"] = self.realestate_dataset.occupied_spaces[realestate_indexes].sum()
            accounting["target_spaces"] = int( round( accounting["occupied_spaces"] /\
                                                     (1 - accounting["target_vacancy"])
                                               ) )
            accounting["proposed_spaces"] = 0
            accounting["demolished_spaces"] = 0
            
            self.accounting[column_value] = accounting
            
            if self._is_target_reached(column_value):
                proposal_indexes = self.proposal_set.get_id_index(unique(self.proposal_component_set['proposal_id'][component_indexes]))
                if n_column == 1:
                    comp_indexes = where(ndimage.sum(self.proposal_component_set[self.column_names[0]]==column_value[0], 
                                    labels=self.proposal_component_set['proposal_id'], 
                                    index=self.proposal_set.get_id_attribute()
                                    ) == self.proposal_set["number_of_components"])[0]
                else:
                    comp_indexes = where(self.proposal_set["number_of_components"]==1)[0]
                target_reached_prop_idx = intersect1d(proposal_indexes, comp_indexes)
                self.weight[target_reached_prop_idx] = 0.0
                self.proposal_set["status_id"][intersect1d(target_reached_prop_idx, where(self.proposal_set["status_id"]==self.proposal_set.id_tentative)[0])] = self.proposal_set.id_no_demand
                
        ## handle planned proposals: all proposals with status_id == is_planned 
        ## and start_year == year are accepted
        planned_proposal_indexes = where(logical_and(
                                                  self.proposal_set.get_attribute("status_id") == self.proposal_set.id_planned, 
                                                  self.proposal_set.get_attribute("start_year") == year ) 
                                        )[0]
        
        logger.start_block("Processing %s planned proposals" % planned_proposal_indexes.size)
        self.consider_proposals(planned_proposal_indexes, force_accepting=True)
        logger.end_block()
        
        if within_parcel_selection_n > 0:
            logger.start_block("Selecting proposals within parcels (%s proposals per parcel)" % within_parcel_selection_n)
            self.select_proposals_within_parcels(nmax=within_parcel_selection_n, weight_string=within_parcel_selection_weight_string,
                                                 compete_among_types=within_parcel_selection_compete_among_types, 
                                                 filter_threshold=within_parcel_selection_threshold,
                                                 MU_same_weight=within_parcel_selection_MU_same_weight,
                                                 transpose_interpcl_weight=within_parcel_selection_transpose_interpcl_weight)
            logger.end_block()
        
        # consider proposals (in this order: proposed, tentative)
        for status in [self.proposal_set.id_proposed, self.proposal_set.id_tentative]:
            stat = (self.proposal_set.get_attribute("status_id") == status)
            if stat.sum() == 0:
                continue
            
            logger.log_status("Sampling from %s eligible proposals of status %s." % (stat.sum(), status))
            iteration = 0
            while (not self._is_target_reached()):
                ## prevent proposals from being sampled for vacancy type whose target is reached
                #for column_value in self.accounting.keys():
                
                if self.weight[stat].sum() == 0.0:
                    logger.log_warning("Running out of proposals of status %s before vacancy targets are reached; there aren't any proposals with non-zero weight" % status)
                    break
                
                available_indexes = where(logical_and(stat, self.weight > 0))[0]
                sample_size = minimum(available_indexes.size, n)
                sampled_proposal_indexes = probsample_noreplace(available_indexes, sample_size, 
                                                                prob_array=self.weight[available_indexes],
                                                                return_index=False)
                #sorted_sampled_indices = argsort(self.weight[sampled_proposal_indexes])
                #self.consider_proposals(sampled_proposal_indexes[sorted_sampled_indices][::-1])
                self.consider_proposals(sampled_proposal_indexes)
                self.weight[sampled_proposal_indexes] = 0
                iteration += 1
        
        self._log_status()
        
        # set status of accepted proposals to 'active'
        self.proposal_set.modify_attribute(name="status_id", 
                                           data=self.proposal_set.id_active,
                                           index=array(self.accepted_proposals, dtype='int32'))
        
        # Code added by Jesse Ayers, MAG, 7/20/2009
        # Get the active projects:
        stat_id = self.proposal_set.get_attribute('status_id')
        actv = where(stat_id==1)[0]
        # Where there are active projects, compute the total_land_area_taken
        # and store it on the development_project_proposals dataset
        # so it can be used by the building_construction_model for the proper
        # computation of units_proposed for those projects with velocity curves
        if actv.size > 0:          
            total_land_area_taken_computed = self.proposal_set['land_area_taken']
            self.proposal_set.modify_attribute('total_land_area_taken', total_land_area_taken_computed[actv], actv)

        return (self.proposal_set, self.realestate_dataset.get_id_attribute()[self.demolished_buildings])
    def run(self,
            dataset,
            outcome_attribute,
            weight_attribute,
            control_totals,
            current_year,
            control_total_attribute=None,
            year_attribute='year',
            capacity_attribute=None,
            add_quantity=False,
            dataset_pool=None):
        """'dataset' is a Dataset for which a quantity 'outcome_attribute' is created. The total amount of the quantity is 
        given by the attribute 'control_total_attribute' of the 'control_totals' Dataset. If it is not given, it is assumed 
        to have the same name as 'outcome_attribute'. The 'weight_attribute' of 'dataset' determines the allocation weights.
        The 'control_totals' Dataset contains an attribute 'year' (or alternatively, an attribute given by the 'year_attribute' argument)
        and optionally other attributes that must be known to the 'dataset' (such as a geography). For each row of the control_totals dataset
        for which year matches the 'current_year', the total amount is distributed among the corresponding members of 'dataset' according to weights.
        If a 'capacity_attribute' is given (attribute of 'dataset'), the algorithm removes any allocations that exceeds the capacity and 
        redistributes it among remaining members. The resulting values are appended to 'dataset' as 'outcome_attribute' (as primary attribute).
        If add_quantity is True and the 'outcome_attribute' exists in dataset, the resulting values are added to the current values of 
        'outcome_attribute'.
        """
        ct_attr = control_totals.get_known_attribute_names()
        if year_attribute not in ct_attr:
            raise StandardError, "Year attribute '%s' must be a known attribute of the control totals dataset." % year_attribute
        ct_attr.remove(year_attribute)
        if control_total_attribute is None:
            control_total_attribute = outcome_attribute
        if control_total_attribute not in ct_attr:
            raise StandardError, "Attribute '%s' must be a known attribute of the control totals dataset." % control_total_attribute
        ct_attr.remove(control_total_attribute)
        if control_totals._is_hidden_id():
            ct_attr.remove(control_totals.id_name()[0])

        # compute weights and other attributes necessary for allocation
        attrs_to_compute = [weight_attribute] + ct_attr
        if capacity_attribute is not None:
            attrs_to_compute.append(capacity_attribute)
        for attr in attrs_to_compute:
            try:
                dataset.compute_variables(attr, dataset_pool=dataset_pool)
            except:
                dataset.compute_one_variable_with_unknown_package(
                    attr, dataset_pool=dataset_pool)

        # create subset of control totals for the current year
        year_index = where(
            control_totals.get_attribute(year_attribute) == current_year)[0]
        if year_index.size <= 0:
            logger.log_warning("No control total for year %s" % current_year)
            return None
        control_totals_for_this_year = DatasetSubset(control_totals,
                                                     year_index)

        # check capacity
        if capacity_attribute is not None:
            if dataset.get_attribute(capacity_attribute).sum(
            ) < control_totals_for_this_year.get_attribute(
                    control_total_attribute).sum():
                logger.log_warning(
                    "Capacity (%s) is smaller than the amount to allocate (%s)."
                    % (dataset.get_attribute(capacity_attribute).sum(),
                       control_totals_for_this_year.get_attribute(
                           control_total_attribute).sum()))
            C = dataset.get_attribute(capacity_attribute).astype('int32')

        all_weights = dataset.get_attribute(weight_attribute)
        outcome = zeros(dataset.size(), dtype='int32')
        for ct_row in range(control_totals_for_this_year.size()):
            is_considered = ones(dataset.size(), dtype='bool8')
            for characteristics in ct_attr:
                is_considered = logical_and(
                    is_considered,
                    dataset.get_attribute(characteristics) ==
                    control_totals_for_this_year.get_attribute(
                        characteristics)[ct_row])
            T = control_totals_for_this_year.get_attribute(
                control_total_attribute)[ct_row]
            it = 1
            while True:
                is_considered_idx = where(is_considered)[0]
                weights = all_weights[is_considered_idx]
                weights_sum = float(weights.sum())
                outcome[is_considered_idx] = round_(
                    outcome[is_considered_idx] + T *
                    (weights / weights_sum)).astype('int32')
                if capacity_attribute is None:
                    break
                diff = outcome[is_considered_idx] - C[is_considered_idx]
                outcome[is_considered_idx] = clip(outcome[is_considered_idx],
                                                  0, C[is_considered_idx])
                if it == 1 and C[is_considered_idx].sum() < T:
                    logger.log_warning(
                        "Control total %s cannot be met due to a capacity restriction of %s"
                        % (T, C[is_considered_idx].sum()))
                T = where(diff < 0, 0, diff).sum()
                if T <= 0:
                    break
                is_considered = logical_and(is_considered, outcome < C)
                it += 1
        if add_quantity and (outcome_attribute
                             in dataset.get_known_attribute_names()):
            dataset.modify_attribute(name=outcome_attribute,
                                     data=outcome +
                                     dataset.get_attribute(outcome_attribute))
            logger.log_status(
                'New values added to the attribute %s of dataset %s.' %
                (outcome_attribute, dataset.get_dataset_name()))
        else:
            dataset.add_primary_attribute(name=outcome_attribute, data=outcome)
            logger.log_status(
                'New values stored into attribute %s of dataset %s.' %
                (outcome_attribute, dataset.get_dataset_name()))
        dataset.flush_attribute(outcome_attribute)
        return outcome
index_attribute = "lc0207_100k_0"

# 4. Years - date pair of input data; year is concatenated to flt_directory_in specified in #1
#years = [1991, 1995]
#years = [1995, 1999]
#years = [2002]
#years = sys.argv[3]
years = [2007, 2007]

lc1 =  LandCoverDataset(in_storage = StorageFactory().get_storage('flt_storage', 
        storage_location = os.path.join(flt_directory_in, str(years[0]))),
    out_storage = StorageFactory().get_storage('flt_storage', 
        storage_location = os.path.join(flt_directory_out, str(years[0]))))

agents_index = where(lc1.get_attribute(index_attribute))[0]
lc1subset = DatasetSubset(lc1, agents_index)
print "Writing set 1:"
for attr in lc1.get_primary_attribute_names():
    print "   ", attr
    lc1subset.write_dataset(attributes=[attr], out_table_name="land_covers")
    lc1.delete_one_attribute(attr) # leaving this line in causes the processing of every other input data file; commenting it causes memory error
    
lc2 =  LandCoverDataset(in_storage = StorageFactory().get_storage('flt_storage', 
        storage_location = os.path.join(flt_directory_in, str(years[1]))),
    out_storage = StorageFactory().get_storage('flt_storage',
        storage_location = os.path.join(flt_directory_out, str(years[1]))))
                  
lc2subset = DatasetSubset(lc2, agents_index)
print "Writing set 2:"
for attr in lc2.get_primary_attribute_names():
    print "   ", attr
 def run(self, year, job_set, control_totals, job_building_types, data_objects=None, resources=None):
     self._do_initialize_for_run(job_set, job_building_types, data_objects)
     idx = where(control_totals.get_attribute("year")==year)[0]
     self.control_totals_for_this_year = DatasetSubset(control_totals, idx)
     self._do_run_for_this_year(job_set)
     return self._update_job_set(job_set)
Exemple #33
0
    def run(self, realestate_dataset,
            year=None, 
            occupied_spaces_variable="occupied_units",
            total_spaces_variable="total_units",
            target_attribute_name='target_vacancy_rate',
            sample_from_dataset = None,
            sample_filter="",
            reset_attribute_value={}, 
            year_built = 'year_built',
            dataset_pool=None,
            append_to_realestate_dataset = False,
            table_name = "development_projects",
            dataset_name = "development_project",
            id_name = 'development_project_id',
            **kwargs):
        """         
        sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning
        append_to_realestate_dataset - whether to append the new dataset to realestate_dataset
        """
        
        if self.target_vancy_dataset is None:
            raise RuntimeError, "target_vacancy_rate dataset is unspecified."
        
        if not sample_from_dataset:
            sample_from_dataset = realestate_dataset
            
        #if dataset_pool is None:
        #    dataset_pool = SessionConfiguration().get_dataset_pool()
        alldata = dataset_pool.get_dataset('alldata')
        unit_names = dataset_pool.get_dataset('building_type').get_attribute('unit_name')
        sqft_per_job = dataset_pool.get_dataset('building_sqft_per_job')
        zones = realestate_dataset.compute_variables("building.disaggregate(parcel.zone_id)")
        type_ids = realestate_dataset.get_attribute("building_type_id")
        building_sqft_per_job_table = sqft_per_job.get_building_sqft_as_table(zones.max(), type_ids.max())
        if year is None:
            year = SimulationState().get_current_time()
        this_year_index = where(self.target_vancy_dataset.get_attribute('year')==year)[0]
        target_vacancy_for_this_year = DatasetSubset(self.target_vancy_dataset, this_year_index)
        
        column_names = list(set( self.target_vancy_dataset.get_known_attribute_names() ) - set( [ target_attribute_name, occupied_spaces_variable, total_spaces_variable, 'year', '_hidden_id_'] ))
        column_names.sort(reverse=True)
        column_values = dict([ (name, target_vacancy_for_this_year.get_attribute(name)) for name in column_names + [target_attribute_name]])
        
        independent_variables = list(set([re.sub('_max$', '', re.sub('_min$', '', col)) for col in column_names]))
        dataset_known_attributes = realestate_dataset.get_known_attribute_names()
        sample_dataset_known_attributes = sample_from_dataset.get_known_attribute_names()
        for variable in independent_variables:
            if variable not in dataset_known_attributes:
                realestate_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)
            if variable not in sample_dataset_known_attributes:
                sample_from_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)
                
        dataset_known_attributes = realestate_dataset.get_known_attribute_names() #update after compute
        if sample_filter:
            short_name = VariableName(sample_filter).get_alias()
            if short_name not in dataset_known_attributes:
                filter_indicator = sample_from_dataset.compute_variables(sample_filter, dataset_pool=dataset_pool)
            else:
                filter_indicator = sample_from_dataset.get_attribute(short_name)
        else:
            filter_indicator = 1
                
        sampled_index = array([], dtype=int32)

        #log header
        if PrettyTable is not None:
            status_log = PrettyTable()
            status_log.set_field_names(column_names + ["actual", "target", "difference", "action"])
        else:
            logger.log_status("\t".join(column_names + ["actual", "target", "difference", "action"]))
        error_log = ''
        for index in range(target_vacancy_for_this_year.size()):
            this_sampled_index = array([], dtype=int32)
            indicator = ones( realestate_dataset.size(), dtype='bool' )
            sample_indicator = ones( sample_from_dataset.size(), dtype='bool' )
            criterion = {}   # for logging
            for attribute in independent_variables:
                if attribute in dataset_known_attributes:
                    dataset_attribute = realestate_dataset.get_attribute(attribute)
                    sample_attribute = sample_from_dataset.get_attribute(attribute)
                else:
                    raise ValueError, "attribute %s used in target vacancy dataset can not be found in dataset %s" % (attribute, realestate_dataset.get_dataset_name())
                
                if attribute + '_min' in column_names:
                    amin = target_vacancy_for_this_year.get_attribute(attribute+'_min')[index] 
                    criterion.update({attribute + '_min':amin})
                    if amin != -1:
                        indicator *= dataset_attribute >= amin
                        sample_indicator *= sample_attribute >= amin
                if attribute + '_max' in column_names: 
                    amax = target_vacancy_for_this_year.get_attribute(attribute+'_max')[index]
                    criterion.update({attribute + '_max':amax}) 
                    if amax != -1:
                        indicator *= dataset_attribute <= amax
                        sample_indicator *= sample_attribute <= amax
                if attribute in column_names: 
                    aval = column_values[attribute][index] 
                    criterion.update({attribute:aval}) 
                    if aval == -1:
                        continue
                    elif aval == -2:  ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column
                        indicator *= logical_not(ismember(dataset_attribute, column_values[attribute]))
                        sample_indicator *= logical_not(ismember(sample_attribute, column_values[attribute]))
                    else:
                        indicator *= dataset_attribute == aval
                        sample_indicator *= sample_attribute == aval
                        
            this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable
            ## total/occupied_spaces_variable can be specified either as a universal name for all realestate 
            ## or in targe_vacancy_rate dataset for each vacancy category
            if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_occupied_spaces_variable = target_vacancy_for_this_year.get_attribute(occupied_spaces_variable)[index]

            if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_total_spaces_variable = target_vacancy_for_this_year.get_attribute(total_spaces_variable)[index]
            
            logger.be_quiet() #temporarily disable logging
            realestate_dataset.compute_one_variable_with_unknown_package(this_occupied_spaces_variable, dataset_pool=dataset_pool)
            realestate_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool)
            sample_from_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool)
            if unit_names[index]=="residential_units":
                num_units = alldata.compute_variables("alldata.aggregate_all(household.building_type_id==%s)" % (index+1))
                #persons = household_set.compute_variables("%s.number_of_agents(%s)" % (hh_ds_name, person_ds_name), resources=resources)
                num_units = num_units[0]
            else:
                num_units = alldata.compute_variables("alldata.aggregate_all(job.disaggregate(employment_submarket.building_type_id)==%s)" % (index+1))
                num_units = num_units * building_sqft_per_job_table[1, (index+1)]
                num_units = num_units[0]
            #need to make sure that job empsubmarket doesn't rely on building...
            #Must do non-home-based jobs only and then multiply by building_sqft
            logger.talk()
            
            actual_num = (indicator * realestate_dataset.get_attribute(this_total_spaces_variable)).sum()
            #target_num = int(round( (indicator * realestate_dataset.get_attribute(this_occupied_spaces_variable)).sum() /\
            target_num = int(round( num_units /\
                                    (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]) 
                            ))
            diff = target_num - actual_num
            if diff > 0:
                total_spaces_in_sample_dataset = sample_from_dataset.get_attribute(this_total_spaces_variable)
                legit_index = where(logical_and(sample_indicator, filter_indicator) * total_spaces_in_sample_dataset > 0)[0]
                if legit_index.size > 0:
                    mean_size = total_spaces_in_sample_dataset[legit_index].mean()
                    num_of_projects_to_sample = int( diff / mean_size )
                    ##sampled at least 1 project when diff > 0, otherwise it is a endless loop when num_of_projects_to_sample = 0
                    num_of_projects_to_sample = num_of_projects_to_sample if num_of_projects_to_sample > 0 else 1
                    while total_spaces_in_sample_dataset[this_sampled_index].sum() < diff:
                        lucky_index = sample_replace(legit_index, num_of_projects_to_sample)
                        this_sampled_index = concatenate((this_sampled_index, lucky_index))
                    this_sampled_index = this_sampled_index[0:(1+searchsorted(cumsum(total_spaces_in_sample_dataset[this_sampled_index]), diff))]
                    sampled_index = concatenate((sampled_index, this_sampled_index))
                else:
                    error_log += "There is nothing to sample from %s and no new development will happen for " % sample_from_dataset.get_dataset_name() + \
                              ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n'
            #if diff < 0: #TODO demolition; not yet supported
            
            ##log status
            action = "0"
            if this_sampled_index.size > 0:
                action_num = total_spaces_in_sample_dataset[this_sampled_index].sum()
                if diff > 0: action = "+" + str(action_num)
                if diff < 0: action = "-" + str(action_num)
            cat = [ str(criterion[col]) for col in column_names]
            cat += [str(actual_num), str(target_num), str(diff), action]
            
            if PrettyTable is not None:
                status_log.add_row(cat)
            else:                
                logger.log_status("\t".join(cat))
            
        if PrettyTable is not None:
            logger.log_status("\n" + status_log.get_string())
        if error_log:
            logger.log_error(error_log)
            
        result_data = {}
        result_dataset = None
        index = array([], dtype='int32')
        if sampled_index.size > 0:
            ### ideally duplicate_rows() is all needed to add newly cloned rows
            ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data
            ##realestate_dataset.duplicate_rows(sampled_index)
            result_data.setdefault(year_built, resize(year, sampled_index.size).astype('int32'))
            ## also add 'independent_variables' to the new dataset
            for attribute in set(sample_from_dataset.get_primary_attribute_names() + independent_variables):
                if reset_attribute_value.has_key(attribute):
                    result_data[attribute] = resize(array(reset_attribute_value[attribute]), sampled_index.size)
                else:
                    result_data[attribute] = sample_from_dataset.get_attribute_by_index(attribute, sampled_index)
        
            if id_name and result_data and id_name not in result_data:
                result_data[id_name] = arange(sampled_index.size, dtype='int32') + 1
        
            storage = StorageFactory().get_storage('dict_storage')
            storage.write_table(table_name=table_name, table_data=result_data)
            
            result_dataset = Dataset(id_name = id_name,
                                      in_storage = storage,
                                      in_table_name = table_name,
                                      dataset_name = dataset_name
                                      )
            index = arange(result_dataset.size())
        
            
        if append_to_realestate_dataset:
            if len(result_data) > 0:
                index = realestate_dataset.add_elements(result_data, require_all_attributes=False,
                                                        change_ids_if_not_unique=True)                
            result_dataset = realestate_dataset
        
        return (result_dataset, index)
Exemple #34
0
        ]
    })

    #CacheScenarioDatabase().run(gridcell_config)

    # step 2 cache water demand data by
    dbcon = ScenarioDatabase(database_name="water_demand_seattle2")

    print "Create Storage object."
    from opus_core.storage_factory import StorageFactory
    storage = StorageFactory().get_storage(type="mysql_storage",
                                           storage_location=dbcon)

    from waterdemand.datasets.consumption_dataset import ConsumptionDataset
    consumption_types = ['wrmr', 'wcsr', 'wrsr']  #'wcmr'
    for consumption_type in consumption_types:

        consumption = ConsumptionDataset(in_storage=storage,
                                         in_table_name=consumption_type +
                                         '_grid')

        for year in range(1990, 2001):
            print "%s %s" % (consumption_type, year)
            year_index = where(consumption.get_attribute("billyear") == year)
            out_storage = StorageFactory().get_storage(
                type="flt_storage",
                storage_location=os.path.join(cache_directory, str(year)))
            consumption_subset = DatasetSubset(consumption, year_index)
            consumption_subset.write_dataset(
                out_storage=out_storage,
                out_table_name=consumption_type.lower())
class EmploymentTransitionModel(Model):
    """Creates and removes jobs from job_set."""

    model_name = "Employment Transition Model"
    location_id_name_default = "grid_id"
    variable_package_default = "urbansim"

    def __init__(self, location_id_name=None, variable_package=None, dataset_pool=None, debuglevel=0):
        self.debug = DebugPrinter(debuglevel)
        self.location_id_name = self.location_id_name_default
        self.variable_package = self.variable_package_default
        if location_id_name is not None:
            self.location_id_name = location_id_name
        if variable_package is not None:
            self.variable_package = variable_package
        self.dataset_pool = self.create_dataset_pool(dataset_pool, ["urbansim", "opus_core"])

    def run(self, year, job_set, control_totals, job_building_types, data_objects=None, resources=None):
        self._do_initialize_for_run(job_set, job_building_types, data_objects)
        idx = where(control_totals.get_attribute("year")==year)[0]
        self.control_totals_for_this_year = DatasetSubset(control_totals, idx)
        self._do_run_for_this_year(job_set)
        return self._update_job_set(job_set)
        
    def _do_initialize_for_run(self, job_set, job_building_types, data_objects=None):
        self.max_id = job_set.get_id_attribute().max()
        self.job_size = job_set.size()
        self.job_id_name = job_set.get_id_name()[0]
        self.new_jobs = {
            self.location_id_name:array([], dtype=job_set.get_data_type(self.location_id_name, int32)),
            "sector_id":array([], dtype=job_set.get_data_type("sector_id", int32)),
            self.job_id_name:array([], dtype=job_set.get_data_type(self.job_id_name, int32)),
            "building_type":array([], dtype=job_set.get_data_type("building_type", int8))
                    }
        self.remove_jobs = array([], dtype=int32)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.dataset_pool.add_datasets_if_not_included({job_building_types.get_dataset_name():job_building_types})
        self.available_building_types = job_building_types.get_id_attribute()

    def _compute_sector_variables(self, sectors, job_set):
        compute_resources = Resources({"debug":self.debug})
        job_set.compute_variables(
            map(lambda x: "%s.%s.is_in_employment_sector_%s_home_based"
                    % (self.variable_package, job_set.get_dataset_name(), x),
                sectors) +
            map(lambda x: "%s.%s.is_in_employment_sector_%s_non_home_based"
                    % (self.variable_package, job_set.get_dataset_name(), x),
                sectors) + ["is_non_home_based_job", "is_home_based_job"],
            dataset_pool = self.dataset_pool,
            resources = compute_resources)
        
    def _do_run_for_this_year(self, job_set):
        building_type = job_set.get_attribute("building_type")
        sectors = unique(self.control_totals_for_this_year.get_attribute("sector_id"))
        self._compute_sector_variables(sectors, job_set)
        for sector in sectors:
            isector = where(self.control_totals_for_this_year.get_attribute("sector_id") == sector)[0]
            total_hb_jobs = self.control_totals_for_this_year.get_attribute("total_home_based_employment")[isector]
            total_nhb_jobs = self.control_totals_for_this_year.get_attribute("total_non_home_based_employment")[isector]
            is_in_sector_hb = job_set.get_attribute("is_in_employment_sector_%s_home_based" % sector)
            is_in_sector_nhb = job_set.get_attribute("is_in_employment_sector_%s_non_home_based" % sector)
            diff_hb = int(total_hb_jobs - is_in_sector_hb.astype(int8).sum())
            diff_nhb = int(total_nhb_jobs - is_in_sector_nhb.astype(int8).sum())
            if diff_hb < 0: # home based jobs to be removed
                w = where(is_in_sector_hb == 1)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(job_set, w, -1*diff_hb,
                                                         self.location_id_name)
                self.remove_jobs = concatenate((self.remove_jobs, non_placed,
                                           sample_noreplace(sample_array, max(0,abs(diff_hb)-size_non_placed))))
            if diff_nhb < 0: # non home based jobs to be removed
                w = where(is_in_sector_nhb == 1)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(job_set, w, -1*diff_nhb,
                                                         self.location_id_name)
                self.remove_jobs = concatenate((self.remove_jobs, non_placed,
                                           sample_noreplace(sample_array, max(0,abs(diff_nhb)-size_non_placed))))

            if diff_hb > 0: # home based jobs to be created
                self.new_jobs[self.location_id_name] = concatenate((self.new_jobs[self.location_id_name],
                                   zeros((diff_hb,), dtype=self.new_jobs[self.location_id_name].dtype.type)))
                self.new_jobs["sector_id"] = concatenate((self.new_jobs["sector_id"],
                                   (resize(array([sector], dtype=self.new_jobs["sector_id"].dtype.type), diff_hb))))
                if 1 in is_in_sector_hb:
                    building_type_distribution = array(ndimage_sum(is_in_sector_hb,
                                                                    labels=building_type,
                                                                    index=self.available_building_types))
                elif 1 in job_set.get_attribute("is_home_based_job"): # take the building type distribution from the whole region
                    building_type_distribution = array(ndimage_sum(
                                                                job_set.get_attribute("is_home_based_job"),
                                                                labels=building_type,
                                                                index=self.available_building_types))
                else: # there are no home-based jobs in the region, take uniform distribution
                    building_type_distribution = ones(self.available_building_types.size)
                    building_type_distribution = building_type_distribution/building_type_distribution.sum()
                sampled_building_types = probsample_replace(
                    self.available_building_types, diff_hb, building_type_distribution/
                    float(building_type_distribution.sum()))
                self.new_jobs["building_type"] = concatenate((self.new_jobs["building_type"],
                            sampled_building_types.astype(self.new_jobs["building_type"].dtype.type)))
                new_max_id = self.max_id + diff_hb
                self.new_jobs[self.job_id_name] = concatenate((self.new_jobs[self.job_id_name],
                                                     arange(self.max_id+1, new_max_id+1)))
                self.max_id = new_max_id

            if diff_nhb > 0: # non home based jobs to be created
                self.new_jobs[self.location_id_name]=concatenate((self.new_jobs[self.location_id_name],
                                     zeros((diff_nhb,), dtype=self.new_jobs[self.location_id_name].dtype.type)))
                self.new_jobs["sector_id"]=concatenate((self.new_jobs["sector_id"],
                                           (resize(array([sector], dtype=self.new_jobs["sector_id"].dtype.type), diff_nhb))))
                if 1 in is_in_sector_nhb:
                    building_type_distribution = array(ndimage_sum(is_in_sector_nhb,
                                                                    labels=building_type,
                                                                    index=self.available_building_types))
                elif 1 in job_set.get_attribute("is_non_home_based_job"): # take the building type distribution from the whole region
                    building_type_distribution = array(ndimage_sum(
                                                        job_set.get_attribute("is_non_home_based_job"),
                                                        labels=building_type,
                                                        index=self.available_building_types))
                else: # there are no non-home-based jobs in the region, take uniform distribution
                    building_type_distribution = ones(self.available_building_types.size)
                    building_type_distribution = building_type_distribution/building_type_distribution.sum()
                sampled_building_types = probsample_replace(
                    self.available_building_types, diff_nhb, building_type_distribution/
                    float(building_type_distribution.sum()))
                self.new_jobs["building_type"] = concatenate((self.new_jobs["building_type"],
                                        sampled_building_types.astype(self.new_jobs["building_type"].dtype.type)))
                new_max_id = self.max_id+diff_nhb
                self.new_jobs[self.job_id_name]=concatenate((self.new_jobs[self.job_id_name], arange(self.max_id+1, 
                                                                                                     new_max_id+1)))
                self.max_id = new_max_id

    def _update_job_set(self, job_set):
        job_set.remove_elements(self.remove_jobs)
        job_set.add_elements(self.new_jobs, require_all_attributes=False)
        difference = job_set.size()-self.job_size
        self.debug.print_debug("Difference in number of jobs: %s (original %s,"
            " new %s, created %s, deleted %s)"
                % (difference,
                   self.job_size,
                   job_set.size(),
                   self.new_jobs[self.job_id_name].size,
                   self.remove_jobs.size),
            3)
        self.debug.print_debug("Number of unplaced jobs: %s"
            % where(job_set.get_attribute(self.location_id_name) <=0)[0].size,
            3)
        return difference

    def prepare_for_run(self, storage, **kwargs):
        from urbansim.datasets.control_total_dataset import ControlTotalDataset
        control_totals = ControlTotalDataset(in_storage=storage, what="employment")
        sample_control_totals(storage, control_totals, **kwargs)
        return control_totals
    def run(self, n=500, run_config=None, current_year=None, debuglevel=0):
        """
        n - sample n proposals at a time, evaluate them one by one
        """
        self.demolished_buildings = array([], dtype='int32')  #id of buildings to be demolished
        if current_year is None:
            current_year = SimulationState().get_current_time()
        if not self.positive_proposals:
            logger.log_status("Proposal Set size <= 0, no proposals to consider, skipping DPPSM.")
            return (self.proposal_set, self.demolished_buildings) 
        self.proposal_component_set.compute_variables([
            'urbansim_parcel.development_project_proposal_component.units_proposed',
            'urbansim_parcel.development_project_proposal_component.is_residential'],
                                        dataset_pool=self.dataset_pool)
        self.proposal_set.compute_variables([
            'urbansim_parcel.development_project_proposal.number_of_components',
            'zone_id=development_project_proposal.disaggregate(parcel.zone_id)',
            #'occurence_frequency = development_project_proposal.disaggregate(development_template.sample_size)'
            ],
                                        dataset_pool=self.dataset_pool)
        buildings = self.dataset_pool.get_dataset("building")
        buildings.compute_variables([
                                "occupied_units_for_jobs = urbansim_parcel.building.number_of_non_home_based_jobs",
                                "units_for_jobs = urbansim_parcel.building.total_non_home_based_job_space",
                                "occupied_residential_units = urbansim_parcel.building.number_of_households",
#                                "urbansim_parcel.building.existing_units",
                                "urbansim_parcel.building.is_residential"
                                    ],
                                    dataset_pool=self.dataset_pool)

        ## define unit_name by whether a building is residential or not (with is_residential attribute)
        ## if it is non-residential (0), count units by number of job spaces (units_for_jobs)
        ## if it is residential (1), count units by residenital units
        self.unit_name = array(["units_for_jobs", "residential_units"])
                
        target_vacancy = self.dataset_pool.get_dataset('target_vacancy')
        target_vacancy.compute_variables(['is_residential = target_vacancy.disaggregate(building_type.is_residential)'],
                                         dataset_pool=self.dataset_pool)
        # This try-except block checks to see if the object has a subarea_id_name,
        # if it does, it calculates the vacancy rates by subarea_id_name
        try:
            # Check for subarea_id_name in target_vacancies dataset
            # if it is present, vacancy rates are specified by subarea_id_name
            # if it is not, vacancy rates are specified region wide
            target_vacancy.load_dataset()
            if self.subarea_id_name in target_vacancy.get_attribute_names():
                current_target_vacancy_this_year = DatasetSubset(target_vacancy, index=where(target_vacancy.get_attribute("year")==current_year)[0])
                current_target_vacancy = DatasetSubset(current_target_vacancy_this_year, index=where(current_target_vacancy_this_year.get_attribute(self.subarea_id_name)==self.area_id)[0])
            else:
                current_target_vacancy = DatasetSubset(target_vacancy, index=where(target_vacancy.get_attribute("year")==current_year)[0])
        except AttributeError:
            # vacancy rates are specified region wide:
            current_target_vacancy = DatasetSubset(target_vacancy, index=where(target_vacancy.get_attribute("year")==current_year)[0])

        if current_target_vacancy.size() == 0:
            raise IOError, 'No target vacancy defined for year %s.' % current_year
        
        self.existing_units = {}   #total existing units by land_use type
        self.occupied_units = {}   #total occupied units by land_use type
        self.proposed_units = {}   #total proposed units by land_use type
        self.demolished_units = {} #total (to be) demolished units by land_use type

        components_building_type_ids = self.proposal_component_set.get_attribute("building_type_id").astype("int32")
        proposal_ids = self.proposal_set.get_id_attribute()
        proposal_ids_in_component_set = self.proposal_component_set.get_attribute("proposal_id")
        all_units_proposed = self.proposal_component_set.get_attribute("units_proposed")
        number_of_components_in_proposals = self.proposal_set.get_attribute("number_of_components")
        
        self.accepting_proposals = zeros(current_target_vacancy.get_attribute("building_type_id").max()+1, dtype='bool8')  #whether accepting new proposals, for each building type
        self.accepted_proposals = [] # index of accepted proposals

        self.target_vacancies = {}
        tv_building_types = current_target_vacancy.get_attribute("building_type_id")
        tv_rate = current_target_vacancy.get_attribute("target_vacancy_rate")
        for itype in range(tv_building_types.size):
            self.target_vacancies[tv_building_types[itype]] = tv_rate[itype]
            
        self.check_vacancy_rates(current_target_vacancy)  #initialize self.accepting_proposal based on current vacancy rate

        sqft_per_job = self.dataset_pool.get_dataset("building_sqft_per_job")
        zones_of_proposals = self.proposal_set.get_attribute("zone_id")
        self.building_sqft_per_job_table = sqft_per_job.get_building_sqft_as_table(zones_of_proposals.max(), 
                                                                                   tv_building_types.max())
        # consider only those proposals that have all components of accepted type and sum of proposed units > 0
        is_accepted_type = self.accepting_proposals[components_building_type_ids]
        sum_is_accepted_type_over_proposals = array(ndimage.sum(is_accepted_type, labels = proposal_ids_in_component_set, 
                                                          index = proposal_ids))
        sum_of_units_proposed = array(ndimage.sum(all_units_proposed, labels = proposal_ids_in_component_set, 
                                                          index = proposal_ids))
        is_proposal_eligible = logical_and(sum_is_accepted_type_over_proposals == number_of_components_in_proposals,
                                           sum_of_units_proposed > 0)

        is_proposal_eligible = logical_and(is_proposal_eligible,
                                           self.proposal_set.get_attribute("start_year")==current_year )
        ## handle planned proposals: all proposals with status_id == is_planned 
        ## and start_year == current_year are accepted
        planned_proposal_indexes = where(logical_and(
                                                  self.proposal_set.get_attribute("status_id") == self.proposal_set.id_planned, 
                                                  self.proposal_set.get_attribute("start_year") == current_year ) 
                                        )[0] 
                                   
        self.consider_proposals(planned_proposal_indexes, force_accepting=True)
        # consider proposals (in this order: planned, proposed, tentative)
        for status in [self.proposal_set.id_proposed, self.proposal_set.id_tentative]:
            idx = where(logical_and(self.proposal_set.get_attribute("status_id") == status, is_proposal_eligible))[0]
            if idx.size <= 0:
                continue
            logger.log_status("Sampling from %s eligible proposals with status %s." % (idx.size, status))
            while (True in self.accepting_proposals):
                if self.weight[idx].sum() == 0.0:
                    logger.log_warning("Running out of proposals; there aren't any proposals with non-zero weight")
                    break
                
                idx = idx[self.weight[idx] > 0]
                n = minimum(idx.size, n)
                sampled_proposal_indexes = probsample_noreplace(proposal_ids[idx], n, 
                                                prob_array=(self.weight[idx]/float(self.weight[idx].sum())),                                                                
                                                exclude_index=None, return_index=True)
                self.consider_proposals(arange(self.proposal_set.size())[idx[sampled_proposal_indexes]])
                self.weight[idx[sampled_proposal_indexes]] = 0

        # set status of accepted proposals to 'active'
        self.proposal_set.modify_attribute(name="status_id", data=self.proposal_set.id_active,
                                          index=array(self.accepted_proposals, dtype='int32'))
        building_types = self.dataset_pool.get_dataset("building_type")
        logger.log_status("Status of %s development proposals set to active." % len(self.accepted_proposals))
        logger.log_status("Target/existing vacancy rates (reached using eligible proposals) by building type:")
        for type_id in self.existing_units.keys():
            units_stock = self._get_units_stock(type_id)
            vr = self._get_vacancy_rates(type_id)
            ## units = residential_units if building_type is residential
            ## units = number of job spaces if building_type is non-residential
            logger.log_status(
                              """%(type_id)s[%(type_name)s]: %(vr)s = ((existing_units:%(existing_units)s + 
                              units_proposed:%(units_proposed)s - units_to_be_demolished:%(units_demolished)s) 
                              - units_occupied:%(units_occupied)s) / units_stock:%(units_stock)s""" %  \
                                          { 'type_id': type_id,
                                            'type_name': building_types.get_attribute_by_id("building_type_name", type_id),
                                            'vr':  vr,
                                            'existing_units': int(self.existing_units[type_id]),
                                            'units_occupied': int(self.occupied_units[type_id]),
                                            'units_proposed': int(self.proposed_units[type_id]),
                                            'units_demolished': int(self.demolished_units[type_id]),
                                            'units_stock': int(units_stock)
                                          }
                            )
        # Code added by Jesse Ayers, MAG, 7/20/2009
        # Get the active projects:
        stat_id = self.proposal_set.get_attribute('status_id')
        actv = where(stat_id==1)[0]
        # Where there are active projects, compute the total_land_area_taken
        # and store it on the development_project_proposals dataset
        # so it can be used by the building_construction_model for the proper
        # computation of units_proposed for those projects with velocity curves
        if actv.size > 0:          
            total_land_area_taken_computed = self.proposal_set.get_attribute('urbansim_parcel.development_project_proposal.land_area_taken')
            self.proposal_set.modify_attribute('total_land_area_taken', total_land_area_taken_computed[actv], actv)

        return (self.proposal_set, self.demolished_buildings) 
        print flt_directory_out
        
        test_flag = options.test_flag
        
#        shutil.rmtree(flt_directory_out)
#        os.mkdir(flt_directory_out)
        
        logger.log_status("Convert input data from ", str(input_year))
    
    lc =  LandCoverDataset(in_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_directory_in), 
        out_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_directory_out))
    
    lc.get_header() # added 23 june 2009 by mm
    mask = lc.get_mask()
    idx = where(mask==0)[0]
    lcsubset = DatasetSubset(lc, idx)
    print "Converting:"
    lcsubset.write_dataset(attributes=["relative_x"], out_table_name="land_covers")
    #lcsubset.write_dataset(attributes=["relative_x"], out_table_name="land_covers",
    #                            valuetypes=valuetypes)
    lc.delete_one_attribute("relative_x")
    lcsubset.write_dataset(attributes=["relative_y"], out_table_name="land_covers")
    #lcsubset.write_dataset(attributes=["relative_y"], out_table_name="land_covers",
    #                            valuetypes=valuetypes)
    lc.delete_one_attribute("relative_y")
#    srcdir = os.path.join(flt_directory_out, "land_covers", "computed")
#    shutil.move(os.path.join(srcdir,"relative_x.li4"), os.path.join(flt_directory_out, "land_covers"))
#    shutil.move(os.path.join(srcdir,"relative_y.li4"), os.path.join(flt_directory_out, "land_covers"))
#    shutil.rmtree(srcdir)
    for attr in lc.get_primary_attribute_names():
        print "   ", attr
    def _do_run(self,
                location_set,
                agent_set,
                agents_index,
                data_objects=None,
                resources=None):
        location_id_name = location_set.get_id_name()[0]
        jobsubset = DatasetSubset(agent_set, agents_index)
        if jobsubset.size() <= 0:
            return array([], dtype='int32')
        #unplace jobs
        agent_set.set_values_of_one_attribute(
            location_id_name, resize(array([-1.0]), jobsubset.size()),
            agents_index)
        sector_ids = jobsubset.get_attribute("sector_id")
        sectors = unique(sector_ids)
        counts = ndimage_sum(ones((jobsubset.size(), )),
                             labels=sector_ids.astype('int32'),
                             index=sectors.astype('int32'))
        if sectors.size <= 1:
            counts = array([counts])
        variables = map(lambda x: "number_of_jobs_of_sector_" + str(int(x)),
                        sectors)
        compute_variables = map(
            lambda var: self.variable_package + "." + location_set.
            get_dataset_name() + "." + var, variables)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.dataset_pool.add_datasets_if_not_included(
            {agent_set.get_dataset_name(): agent_set})
        location_set.compute_variables(compute_variables,
                                       dataset_pool=self.dataset_pool)
        if self.filter is None:
            location_index = arange(location_set.size())
        else:
            filter_values = location_set.compute_variables(
                [self.filter], dataset_pool=self.dataset_pool)
            location_index = where(filter_values > 0)[0]
        if location_index.size <= 0:
            logger.log_status("No locations available. Nothing to be done.")
            return array([])
        location_subset = DatasetSubset(location_set, location_index)
        i = 0
        for sector in sectors:
            distr = location_subset.get_attribute(variables[i])
            if ma.allclose(distr.sum(), 0):
                uniform_prob = 1.0 / distr.size
                distr = resize(array([uniform_prob], dtype='float64'),
                               distr.size)
                logger.log_warning(
                    "Probabilities in scaling model for sector " +
                    str(sector) +
                    " sum to 0.0.  Substituting uniform distribution!")


#                random_sample = sample(location_set.get_attribute("grid_id"), k=int(counts[i]), \
#                                   probabilities = distr)
            distr = distr / float(distr.sum())
            random_sample = probsample_replace(
                location_subset.get_id_attribute(),
                size=int(counts[i]),
                prob_array=distr)
            idx = where(sector_ids == sector)[0]
            #modify job locations
            agent_set.set_values_of_one_attribute(location_id_name,
                                                  random_sample,
                                                  agents_index[idx])
            i += 1
        return agent_set.get_attribute_by_index(location_id_name, agents_index)
    def test_agents_placed_in_appropriate_types(self):
        """Create 1000 unplaced industrial jobs and 1 commercial job. Allocate 50 commercial
        gridcells with enough space for 10 commercial jobs per gridcell. After running the
        EmploymentLocationChoiceModel, the 1 commercial job should be placed,
        but the 100 industrial jobs should remain unplaced
        """
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='job_building_types',
            table_data = {
                'id':array([2,1]),
                'name': array(['commercial', 'industrial'])
                }
            )
        job_building_types = JobBuildingTypeDataset(in_storage=storage, in_table_name='job_building_types')

        storage.write_table(table_name='jobs',
            table_data = {
                'job_id': arange(1001)+1,
                'grid_id': array([0]*1001),
                'building_type': array([1]*1000 + [2])
                }
            )
        jobs = JobDataset(in_storage=storage, in_table_name='jobs')

        storage.write_table(table_name='gridcells',
            table_data = {
                'grid_id': arange(50)+1,
                'commercial_sqft': array([1000]*50),
                'commercial_sqft_per_job': array([100]*50)
                }
            )
        gridcells = GridcellDataset(in_storage=storage, in_table_name='gridcells')

        coefficients = Coefficients(names=("dummy",), values=(0.1,))
        specification = EquationSpecification(variables=("gridcell.commercial_sqft",), coefficients=("dummy",))

        compute_resources = Resources({"job":jobs, "job_building_type": job_building_types})
        agents_index = where(jobs.get_attribute("grid_id") == 0)
        unplace_jobs = DatasetSubset(jobs, agents_index)
        agents_index = where(unplace_jobs.get_attribute("building_type") == 2)[0]
        gridcells.compute_variables(["urbansim.gridcell.number_of_commercial_jobs"],
                                    resources=compute_resources)
        commercial_jobs = gridcells.get_attribute("number_of_commercial_jobs")

        gridcells.compute_variables(["urbansim.gridcell.number_of_industrial_jobs"],
                                    resources=compute_resources)
        industrial_jobs = gridcells.get_attribute("number_of_industrial_jobs")
        model_group = ModelGroup(job_building_types, "name")
        elcm = EmploymentLocationChoiceModel(ModelGroupMember(model_group,"commercial"), location_set=gridcells,
               agents_grouping_attribute = "job.building_type",
               choices = "opus_core.random_choices_from_index", sample_size_locations = 30)
        elcm.run(specification, coefficients, agent_set = jobs, agents_index=agents_index, debuglevel=1)

        gridcells.compute_variables(["urbansim.gridcell.number_of_commercial_jobs"],
                                    resources=compute_resources)
        commercial_jobs = gridcells.get_attribute("number_of_commercial_jobs")

        gridcells.compute_variables(["urbansim.gridcell.number_of_industrial_jobs"],
                                    resources=compute_resources)
        industrial_jobs = gridcells.get_attribute("number_of_industrial_jobs")

        self.assertEqual(commercial_jobs.sum() == 1,
                         True, "Error, there should only be a total of 1 commercial job")
        self.assertEqual(industrial_jobs.sum() == 0,
                         True, "Error, there should be no industrial jobs because there's no space for them")
    def run(
        self,
        realestate_dataset,
        year=None,
        occupied_spaces_variable="occupied_units",
        total_spaces_variable="total_units",
        target_attribute_name="target_vacancy_rate",
        sample_from_dataset=None,
        sample_filter="",
        reset_attribute_value={},
        year_built="year_built",
        dataset_pool=None,
        append_to_realestate_dataset=False,
        table_name="development_projects",
        dataset_name="development_project",
        id_name="development_project_id",
        **kwargs
    ):
        """         
        sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning
        append_to_realestate_dataset - whether to append the new dataset to realestate_dataset
        """

        if self.target_vancy_dataset is None:
            raise RuntimeError, "target_vacancy_rate dataset is unspecified."

        if not sample_from_dataset:
            sample_from_dataset = realestate_dataset

        # if dataset_pool is None:
        #    dataset_pool = SessionConfiguration().get_dataset_pool()
        if year is None:
            year = SimulationState().get_current_time()
        this_year_index = where(self.target_vancy_dataset.get_attribute("year") == year)[0]
        target_vacancy_for_this_year = DatasetSubset(self.target_vancy_dataset, this_year_index)

        column_names = list(
            set(self.target_vancy_dataset.get_known_attribute_names())
            - set([target_attribute_name, occupied_spaces_variable, total_spaces_variable, "year", "_hidden_id_"])
        )
        column_names.sort(reverse=True)
        column_values = dict(
            [
                (name, target_vacancy_for_this_year.get_attribute(name))
                for name in column_names + [target_attribute_name]
            ]
        )

        independent_variables = list(set([re.sub("_max$", "", re.sub("_min$", "", col)) for col in column_names]))
        dataset_known_attributes = realestate_dataset.get_known_attribute_names()
        sample_dataset_known_attributes = sample_from_dataset.get_known_attribute_names()
        for variable in independent_variables:
            if variable not in dataset_known_attributes:
                realestate_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)
            if variable not in sample_dataset_known_attributes:
                sample_from_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)

        dataset_known_attributes = realestate_dataset.get_known_attribute_names()  # update after compute
        if sample_filter:
            short_name = VariableName(sample_filter).get_alias()
            if short_name not in dataset_known_attributes:
                filter_indicator = sample_from_dataset.compute_variables(sample_filter, dataset_pool=dataset_pool)
            else:
                filter_indicator = sample_from_dataset.get_attribute(short_name)
        else:
            filter_indicator = 1

        sampled_index = array([], dtype=int32)

        # log header
        if PrettyTable is not None:
            status_log = PrettyTable()
            status_log.set_field_names(column_names + ["actual", "target", "expected", "difference", "action"])
        else:
            logger.log_status("\t".join(column_names + ["actual", "target", "expected", "difference", "action"]))
        error_log = ""
        for index in range(target_vacancy_for_this_year.size()):
            this_sampled_index = array([], dtype=int32)
            indicator = ones(realestate_dataset.size(), dtype="bool")
            sample_indicator = ones(sample_from_dataset.size(), dtype="bool")
            criterion = {}  # for logging
            for attribute in independent_variables:
                if attribute in dataset_known_attributes:
                    dataset_attribute = realestate_dataset.get_attribute(attribute)
                    sample_attribute = sample_from_dataset.get_attribute(attribute)
                else:
                    raise ValueError, "attribute %s used in target vacancy dataset can not be found in dataset %s" % (
                        attribute,
                        realestate_dataset.get_dataset_name(),
                    )

                if attribute + "_min" in column_names:
                    amin = target_vacancy_for_this_year.get_attribute(attribute + "_min")[index]
                    criterion.update({attribute + "_min": amin})
                    if amin != -1:
                        indicator *= dataset_attribute >= amin
                        sample_indicator *= sample_attribute >= amin
                if attribute + "_max" in column_names:
                    amax = target_vacancy_for_this_year.get_attribute(attribute + "_max")[index]
                    criterion.update({attribute + "_max": amax})
                    if amax != -1:
                        indicator *= dataset_attribute <= amax
                        sample_indicator *= sample_attribute <= amax
                if attribute in column_names:
                    aval = column_values[attribute][index]
                    criterion.update({attribute: aval})
                    if aval == -1:
                        continue
                    elif (
                        aval == -2
                    ):  ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column
                        indicator *= logical_not(ismember(dataset_attribute, column_values[attribute]))
                        sample_indicator *= logical_not(ismember(sample_attribute, column_values[attribute]))
                    else:
                        indicator *= dataset_attribute == aval
                        sample_indicator *= sample_attribute == aval

            this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable
            ## total/occupied_spaces_variable can be specified either as a universal name for all realestate
            ## or in targe_vacancy_rate dataset for each vacancy category
            if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_occupied_spaces_variable = target_vacancy_for_this_year.get_attribute(occupied_spaces_variable)[
                    index
                ]

            if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_total_spaces_variable = target_vacancy_for_this_year.get_attribute(total_spaces_variable)[index]

            this_total_spaces_variable += "_" + str(criterion[col])
            this_occupied_spaces_variable += "_" + str(criterion[col])

            logger.be_quiet()  # temporarily disable logging
            realestate_dataset.compute_one_variable_with_unknown_package(
                this_occupied_spaces_variable, dataset_pool=dataset_pool
            )
            realestate_dataset.compute_one_variable_with_unknown_package(
                this_total_spaces_variable, dataset_pool=dataset_pool
            )
            sample_from_dataset.compute_one_variable_with_unknown_package(
                this_total_spaces_variable, dataset_pool=dataset_pool
            )
            logger.talk()

            actual_num = (realestate_dataset.get_attribute(this_total_spaces_variable)).sum()
            # target_num is obsolete with this version.
            target_num = int(
                round(
                    (realestate_dataset.get_attribute(this_occupied_spaces_variable)).sum()
                    / (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])
                )
            )
            """If the target vacancy is very small and the inflow to the region big it is not enough to check
            only the current simulation year's vacancy. The simulation is more robust if the BTM is anticipating the
            next year's population (of households and jobs).
            #TODO: Make code more general to cover various stratifications in the real estate market.
            """
            if criterion[col] == 1:
                idx = where(self.control_totals.get_attribute("year") == year + 1)[0]
                this_years_control_totals = DatasetSubset(self.control_totals, idx)
                expected_num = int(
                    round(
                        this_years_control_totals.get_attribute("total_number_of_households").sum()
                        / (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])
                    )
                )
            if criterion[col] == 0:
                idx = where(self.employment_control_totals.get_attribute("year") == year + 1)[0]
                next_years_control_totals = DatasetSubset(self.employment_control_totals, idx)
                expected_num = int(
                    round(
                        next_years_control_totals.get_attribute("number_of_jobs").sum()
                        / (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])
                    )
                )

            diff = expected_num - actual_num

            # Previous version which is checking the current years occupation.
            # diff = target_num - actual_num

            if diff > 0:
                total_spaces_in_sample_dataset = sample_from_dataset.get_attribute(this_total_spaces_variable)
                legit_index = where(
                    logical_and(sample_indicator, filter_indicator) * total_spaces_in_sample_dataset > 0
                )[0]
                if legit_index.size > 0:
                    mean_size = total_spaces_in_sample_dataset[legit_index].mean()
                    num_of_projects_to_sample = int(diff / mean_size)
                    ##sampled at least 1 project when diff > 0, otherwise it is a endless loop when num_of_projects_to_sample = 0
                    num_of_projects_to_sample = num_of_projects_to_sample if num_of_projects_to_sample > 0 else 1
                    while total_spaces_in_sample_dataset[this_sampled_index].sum() < diff:
                        lucky_index = sample_replace(legit_index, num_of_projects_to_sample)
                        this_sampled_index = concatenate((this_sampled_index, lucky_index))
                    this_sampled_index = this_sampled_index[
                        0 : (1 + searchsorted(cumsum(total_spaces_in_sample_dataset[this_sampled_index]), diff))
                    ]
                    sampled_index = concatenate((sampled_index, this_sampled_index))
                else:
                    error_log += (
                        "There is nothing to sample from %s and no new development will happen for "
                        % sample_from_dataset.get_dataset_name()
                        + ",".join([col + "=" + str(criterion[col]) for col in column_names])
                        + "\n"
                    )
            # if diff < 0: #TODO demolition; not yet supported

            ##log status
            action = "0"
            if this_sampled_index.size > 0:
                action_num = total_spaces_in_sample_dataset[this_sampled_index].sum()
                if diff > 0:
                    action = "+" + str(action_num)
                if diff < 0:
                    action = "-" + str(action_num)
            cat = [str(criterion[col]) for col in column_names]
            cat += [str(actual_num), str(target_num), str(expected_num), str(diff), action]

            if PrettyTable is not None:
                status_log.add_row(cat)
            else:
                logger.log_status("\t".join(cat))

        if PrettyTable is not None:
            logger.log_status("\n" + status_log.get_string())
        if error_log:
            logger.log_error(error_log)

        result_data = {}
        result_dataset = None
        index = array([], dtype="int32")
        if True:  # sampled_index.size > 0:
            ### ideally duplicate_rows() is all needed to add newly cloned rows
            ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data
            ##realestate_dataset.duplicate_rows(sampled_index)
            result_data.setdefault(year_built, resize(year, sampled_index.size).astype("int32"))
            ## also add 'independent_variables' to the new dataset
            for attribute in set(sample_from_dataset.get_primary_attribute_names() + independent_variables):
                if reset_attribute_value.has_key(attribute):
                    result_data[attribute] = resize(array(reset_attribute_value[attribute]), sampled_index.size)
                else:
                    result_data[attribute] = sample_from_dataset.get_attribute_by_index(attribute, sampled_index)

            if id_name and result_data and id_name not in result_data:
                result_data[id_name] = arange(sampled_index.size, dtype="int32") + 1

            storage = StorageFactory().get_storage("dict_storage")
            storage.write_table(table_name=table_name, table_data=result_data)

            result_dataset = Dataset(
                id_name=id_name, in_storage=storage, in_table_name=table_name, dataset_name=dataset_name
            )
            index = arange(result_dataset.size())

        if append_to_realestate_dataset:
            if len(result_data) > 0:
                index = realestate_dataset.add_elements(
                    result_data, require_all_attributes=False, change_ids_if_not_unique=True
                )
            result_dataset = realestate_dataset

        return (result_dataset, index)
    def prepare_for_run(self, dataset_pool, 
                        create_proposal_set=True,
                        parcel_filter_for_new_development=None, 
                        parcel_filter_for_redevelopment=None, 
                        template_filter=None,
                        spec_replace_module_variable_pair=None,
                        proposed_units_variable="urbansim_parcel.development_project_proposal.units_proposed",
                        **kwargs):
        """create development project proposal dataset from parcels and development templates.
        spec_replace_module_variable_pair is a tuple with two elements: module name, variable within the module
        that contans a dictionary of model variables to be replaced in the specification.
        """
        specification, coefficients, dummy = RegressionModel.prepare_for_run(self, **kwargs)
        try:
            existing_proposal_set_parent = dataset_pool.get_dataset('development_project_proposal')
            #load proposals whose status_id are not of id_tentative or id_not_available
            available_idx = where(logical_and(existing_proposal_set_parent.get_attribute("status_id") != DevelopmentProjectProposalDataset.id_tentative,
                                              existing_proposal_set_parent.get_attribute("status_id") != DevelopmentProjectProposalDataset.id_not_available))[0]
            existing_proposal_set = DatasetSubset(existing_proposal_set_parent, available_idx)
            # Code updated by Hanyi Li, MAG 6/8/2010
            # Replacing the cached 'development_project_proposal' dataset with
            # the filtered dataset 'existing_proposal_set'
            dataset_pool.replace_dataset(existing_proposal_set_parent.get_dataset_name(), existing_proposal_set)
        except:
            existing_proposal_set = None
        
        parcels = dataset_pool.get_dataset('parcel')
        templates = dataset_pool.get_dataset('development_template')

        # It is important that during this method no variable flushing happens, since
        # we create datasets of the same name for different purposes (new development and redevelopment)
        # and flushing would mix them up
        flush_variables_current = SessionConfiguration().get('flush_variables', False)
        SessionConfiguration().put_data({'flush_variables': False})
        
        # Code added by Jesse Ayers, MAG, 9/14/2009
        # Getting an index of parcels that have actively developing projects (those on a velocity function)
        # and making sure that new proposals are not generated for them
        if existing_proposal_set:
            parcels_with_proposals = existing_proposal_set.get_attribute('parcel_id')
            parcels_with_proposals_idx = parcels.get_id_index(parcels_with_proposals)
            if parcel_filter_for_new_development is not None:
                if parcel_filter_for_new_development[parcel_filter_for_new_development.find('=')+1] == '=':
                    filter = 'flter = numpy.logical_and(parcel.number_of_agents(development_project_proposal) == 0, %s)' % parcel_filter_for_new_development
                else:
                    parcel_filter_for_new_development = parcel_filter_for_new_development[parcel_filter_for_new_development.find('=')+1:].lstrip()
                    filter = 'flter = numpy.logical_and(parcel.number_of_agents(development_project_proposal) == 0, %s)' % parcel_filter_for_new_development
                index1 = where(parcels.compute_variables(filter))[0]

        else:
            if parcel_filter_for_new_development is not None:
                index1 = where(parcels.compute_variables(parcel_filter_for_new_development))[0]
            else:
                index1 = None
            
        if template_filter is not None:
            try:
                index2 = where(templates.compute_variables(template_filter))[0]
            except Exception, e:
                logger.log_warning( "template_filter is set to %s, but there is an error when computing it: %s"
                                   % (template_filter, e) )
                index2 = None
 def get_active_agent_set(self):
     """Return agent set that make choices in the model.
     Works only for the ChoiceModel class.
     """
     agents = self.get_agent_set()
     return DatasetSubset(agents, self.get_agent_set_index())
Exemple #43
0
    def run(self,
            development_proposal_set,
            building_dataset,
            dataset_pool,
            buildings_to_be_demolished=[],
            consider_amount_built_in_parcels=False,
            current_year=None):

        self.demolish_buildings(buildings_to_be_demolished, building_dataset,
                                dataset_pool)

        if development_proposal_set.size() <= 0:
            logger.log_status(
                "Proposal set is empty. Nothing to be constructed.")
            return development_proposal_set

        # load velocity function dataset
        try:
            velocity_function_set = dataset_pool.get_dataset(
                "velocity_function")
        except:
            velocity_function_set = None

        # choose active projects
        is_active = development_proposal_set.get_attribute(
            "status_id") == development_proposal_set.id_active
        is_delayed_or_active = logical_or(
            is_active,
            development_proposal_set.get_attribute("status_id") ==
            development_proposal_set.id_with_velocity)
        active_idx = where(is_delayed_or_active)[0]

        if active_idx.size <= 0:
            logger.log_status("No new buildings built.")
            return development_proposal_set

        if current_year is None:
            current_year = SimulationState().get_current_time()

        active_proposal_set = DatasetSubset(development_proposal_set,
                                            active_idx)

        # create proposal_component_set from the active proposals
        proposal_component_set = create_from_proposals_and_template_components(
            active_proposal_set,
            dataset_pool.get_dataset('development_template_component'))
        dataset_pool.replace_dataset(proposal_component_set.get_dataset_name(),
                                     proposal_component_set)

        # determine building types and corresponding unit names of the involved building_types
        building_type_id = proposal_component_set.get_attribute(
            "building_type_id")
        building_type_set = dataset_pool.get_dataset("building_type")
        #        unit_names = building_type_set.compute_variables([
        #                                  'building_type.disaggregate(generic_building_type.unit_name)'], dataset_pool=dataset_pool)
        unit_names = building_type_set.get_attribute("unit_name")
        # get unique values of the involved generic building types and unique unit names
        unique_building_types = unique(building_type_id)
        index_in_building_types = building_type_set.get_id_index(
            unique_building_types)
        unit_names = unit_names[index_in_building_types]
        is_residential = building_type_set.get_attribute(
            "is_residential")[index_in_building_types] == 1
        unique_unit_names = unique(unit_names)

        # determine existing units on parcels
        parcels = dataset_pool.get_dataset("parcel")
        parcels.compute_variables(
            ["urbansim_parcel.parcel.vacant_land_area"] +
            ["urbansim_parcel.parcel.residential_units"] +
            map(lambda x: "urbansim_parcel.parcel.%s" % x, unique_unit_names),
            dataset_pool=dataset_pool)
        parcel_is_lut_vacant = parcels.compute_variables(
            ["urbansim_parcel.parcel.is_land_use_type_vacant"],
            dataset_pool=dataset_pool)
        parcel_lut = parcels.get_attribute("land_use_type_id")
        component_land_use_types = proposal_component_set.compute_variables(
            [
                'development_project_proposal_component.disaggregate(development_template.land_use_type_id, [development_project_proposal])'
            ],
            dataset_pool=dataset_pool)

        # from the velocity function determine the amount to be built for each component (in %)
        if velocity_function_set is not None:
            cummulative_amount_of_development = proposal_component_set.compute_variables(
                [
                    "urbansim_parcel.development_project_proposal_component.cummulative_amount_of_development"
                ],
                dataset_pool=dataset_pool)
            percent_of_development_this_year = proposal_component_set.compute_variables(
                [
                    "urbansim_parcel.development_project_proposal_component.percent_of_development_this_year"
                ],
                dataset_pool=dataset_pool)
        else:  # if there is no velocity function, all components have velocity of 100%
            percent_of_development_this_year = resize(
                array([100], dtype="int32"), proposal_component_set.size())

        # amount to be built
        to_be_built = proposal_component_set.compute_variables(
            [
                'urbansim_parcel.development_project_proposal_component.units_proposed'
            ],
            dataset_pool=dataset_pool
        ) / 100.0 * percent_of_development_this_year

        # initializing for new buildings
        max_building_id = building_dataset.get_id_attribute().max()
        new_buildings = {}
        new_buildings["parcel_id"] = array([], dtype="int32")
        new_buildings["residential_units"] = array([], dtype="int32")
        new_buildings["non_residential_sqft"] = array([], dtype="int32")
        new_buildings["building_type_id"] = array([], dtype="int32")
        new_buildings["sqft_per_unit"] = array(
            [], dtype=building_dataset.get_attribute("sqft_per_unit").dtype)
        new_buildings["land_area"] = array(
            [], dtype=building_dataset.get_attribute("land_area").dtype)
        new_buildings["improvement_value"] = array(
            [],
            dtype=building_dataset.get_attribute("improvement_value").dtype)
        new_buildings["template_id"] = array([], dtype="int32")

        sqft_per_unit = proposal_component_set.get_attribute(
            "building_sqft_per_unit").astype(
                new_buildings["sqft_per_unit"].dtype)
        # Compute land_area_taken properly if velocity function is present
        if velocity_function_set is not None:
            larea_taken = proposal_component_set.compute_variables([
                'urbansim_parcel.development_project_proposal_component.land_area_taken'
            ],
                                                                   dataset_pool=
                                                                   dataset_pool
                                                                   )
            pct_dev_this_yr_conv = (percent_of_development_this_year / 100.0)
            land_area_taken = larea_taken * pct_dev_this_yr_conv
        else:
            land_area_taken = proposal_component_set.compute_variables(
                [
                    'urbansim_parcel.development_project_proposal_component.land_area_taken'
                ],
                dataset_pool=dataset_pool).astype(
                    new_buildings["land_area"].dtype)
        construction_cost = proposal_component_set.compute_variables(
            [
                'urbansim_parcel.development_project_proposal_component.construction_cost'
            ],
            dataset_pool=dataset_pool).astype(
                new_buildings["improvement_value"].dtype)
        template_ids = proposal_component_set.get_attribute("template_id")
        number_of_new_buildings = {}
        number_of_new_buildings_by_template_id = {}

        # iterate over building types that are unique over the involved proposals
        for itype in range(unique_building_types.size):
            this_building_type = unique_building_types[itype]
            number_of_new_buildings[this_building_type] = 0
            unit_name = unit_names[itype]
            if is_residential[itype]:
                unit_name = 'residential_units'
            component_index = where(building_type_id == this_building_type)[0]
            parcel_ids_in_components = proposal_component_set.get_attribute_by_index(
                "parcel_id", component_index)
            unique_parcels = unique(parcel_ids_in_components)
            # iterate over involved parcels
            for parcel_id in unique_parcels:
                pidx = component_index[parcel_ids_in_components == parcel_id]
                parcel_index = parcels.get_id_index(parcel_id)
                # what is already built on this parcel
                if consider_amount_built_in_parcels:
                    amount_built = parcels.get_attribute_by_index(
                        unit_name, parcel_index)
                else:
                    amount_built = 0
                # what is proposed on this parcel
                amount_proposed = to_be_built[pidx].sum()
                # build if needed
                if rint(amount_proposed) > amount_built:
                    if unit_name == "residential_units":
                        bunit = "residential_units"
                        bnunit = "non_residential_sqft"
                    else:
                        bnunit = "residential_units"
                        bunit = "non_residential_sqft"
                    to_be_built_cumsum = rint(cumsum(
                        to_be_built[pidx])).astype("int32")
                    idx_to_be_built = where(
                        to_be_built_cumsum > amount_built)[0]
                    new_buildings["parcel_id"] = concatenate(
                        (new_buildings["parcel_id"],
                         array(idx_to_be_built.size * [parcel_id],
                               dtype="int32")))
                    new_buildings[bunit] = concatenate(
                        (new_buildings[bunit],
                         rint(to_be_built[pidx][idx_to_be_built]).astype(
                             new_buildings[bunit].dtype)))
                    new_buildings[bnunit] = concatenate(
                        (new_buildings[bnunit],
                         array(idx_to_be_built.size * [0], dtype="int32")))
                    new_buildings["building_type_id"] = concatenate(
                        (new_buildings["building_type_id"],
                         array(idx_to_be_built.size * [this_building_type],
                               dtype="int32")))
                    new_buildings["sqft_per_unit"] = concatenate(
                        (new_buildings["sqft_per_unit"],
                         sqft_per_unit[pidx][idx_to_be_built]))
                    new_buildings["land_area"] = concatenate(
                        (new_buildings["land_area"],
                         land_area_taken[pidx][idx_to_be_built]))
                    new_buildings["improvement_value"] = concatenate(
                        (new_buildings["improvement_value"],
                         construction_cost[pidx][idx_to_be_built]))
                    new_buildings["template_id"] = concatenate(
                        (new_buildings["template_id"],
                         template_ids[pidx][idx_to_be_built]))
                    number_of_new_buildings[
                        this_building_type] += idx_to_be_built.size
                    if parcel_is_lut_vacant[parcel_index]:
                        parcel_lut[parcel_index] = component_land_use_types[
                            pidx][idx_to_be_built][0]
                    # count number of buildings by template ids
                    for icomp in range(idx_to_be_built.size):
                        if template_ids[pidx[idx_to_be_built[
                                icomp]]] not in number_of_new_buildings_by_template_id.keys(
                                ):
                            number_of_new_buildings_by_template_id[
                                template_ids[pidx[idx_to_be_built[icomp]]]] = 0
                        number_of_new_buildings_by_template_id[template_ids[
                            pidx[idx_to_be_built[icomp]]]] += 1

        # add created buildings to the existing building dataset
        buildings_id_name = building_dataset.get_id_name()[0]
        new_buildings[buildings_id_name] = max_building_id + arange(
            1, new_buildings["parcel_id"].size + 1)
        new_buildings['year_built'] = resize(
            array([current_year], dtype="int32"),
            new_buildings["parcel_id"].size)
        building_dataset.add_elements(new_buildings,
                                      require_all_attributes=False)
        if "zone_id" in building_dataset.get_known_attribute_names():
            zone_ids = building_dataset.compute_variables(
                ['building.disaggregate(parcel.zone_id)'],
                dataset_pool=dataset_pool)
            building_dataset.modify_attribute(name="zone_id", data=zone_ids)
        if "county" in building_dataset.get_known_attribute_names():
            county_ids = building_dataset.compute_variables(
                ['building.disaggregate(parcel.county)'],
                dataset_pool=dataset_pool)
            building_dataset.modify_attribute(name="county", data=county_ids)

        logger.log_status("%s new buildings built." %
                          new_buildings["parcel_id"].size)
        for type_id in number_of_new_buildings.keys():
            logger.log_status("building type %s: %s" %
                              (type_id, number_of_new_buildings[type_id]))
        logger.log_status("Number of new buildings by template ids:")
        logger.log_status(number_of_new_buildings_by_template_id)

        # recompute the cummulative development amount
        if velocity_function_set is not None:
            # determine, if everything has been built or if it should be considered next year
            cummulative_amount_of_development = development_proposal_set.compute_variables(
                [
                    "development_project_proposal.aggregate(urbansim_parcel.development_project_proposal_component.cummulative_amount_of_development)/urbansim_parcel.development_project_proposal.number_of_components"
                ],
                dataset_pool=dataset_pool)
        else:  # if there is no velocity function, all components have velocity of 100%
            ## TODO: need to be reviewed, probably by Hana
            ## changed from proposal_component_set to development_proposal_set
            ## so it will have the same shape as is_delayed_or_active
            cummulative_amount_of_development = resize(
                array([100], dtype="int32"), development_proposal_set.size())
        will_be_delayed = cummulative_amount_of_development < 100
        velocity_idx = where(logical_and(is_delayed_or_active,
                                         will_be_delayed))[0]
        if velocity_idx.size > 0:
            # for the unfinished projects set the status_id to id_with_velocity
            development_proposal_set.set_values_of_one_attribute(
                "status_id",
                development_proposal_set.id_with_velocity,
                index=velocity_idx)
        not_velocity_idx = where(
            logical_and(is_delayed_or_active, logical_not(will_be_delayed)))[0]
        if not_velocity_idx.size > 0:
            # for the remaining projects set the status_id to id_not_available
            development_proposal_set.set_values_of_one_attribute(
                "status_id",
                development_proposal_set.id_not_available,
                index=not_velocity_idx)

        dataset_pool._remove_dataset(proposal_component_set.get_dataset_name())
        return development_proposal_set
Exemple #44
0
class DevelopmentProjectTransitionModel( Model ):
    """
    Creates development projects. Each development project is for a single type
    of development, e.g. 'industrial' or 'commercial'.  This model creates
    enough development projects to match the desired vacancy rates, as defined in the target_vacancies
    table.  It does not place any projects in locations; that is the job of the development project
    location choice models.  The distribution of project sizes (amount of space, value of space) is
    determined by sampling from the projects in the development_event_history table.
    """
    model_name = "Development Project Transition Model"
    
    def __init__( self, debuglevel=0 ):
        self.debug = DebugPrinter( debuglevel )

    def pre_check( self, location_set, vacancy_table, types ):
        for ptype in types:
            self.check_for_space( location_set.get_attribute(self.variable_for_total_units[ptype]))
        self.check_target_vacancy_is_not_100_percent( vacancy_table.get_attribute( "target_total_vacancy"))

    def check_for_space( self, values ):
        """Check that this array of values sums to something > 0."""
        self.do_check( "x > 0", array( [values.sum()] ) )

    def check_target_vacancy_is_not_100_percent( self, value ):
        """Check that the target vacancy rate is not 100% (ratio == 1), because it doesn't make sense,
        and it also causes a divide by 0 error."""
        self.do_check( "x < 1", value )

    def run( self, vacancy_table, history_table, year, location_set, dataset_pool=None, resources=None ):
        self.dataset_pool=dataset_pool
        building_types = self.dataset_pool.get_dataset('building_type')
        target_vacancy_this_year = DatasetSubset(vacancy_table, index=where(vacancy_table.get_attribute("year")==year)[0])
        building_type_ids = target_vacancy_this_year.get_attribute('building_type_id')
        building_type_idx = building_types.get_id_index(building_type_ids)
        self.used_building_types = DatasetSubset(building_types, index=building_type_idx)
        project_types =  self.used_building_types.get_attribute('building_type_name')
        is_residential = self.used_building_types.get_attribute('is_residential')
        unit_names =  where(is_residential, 'residential_units', 'non_residential_sqft')
        specific_unit_names =  where(is_residential, 'residential_units', '_sqft')
        rates =  target_vacancy_this_year.get_attribute('target_total_vacancy')
        self.project_units = {}
        self.project_specific_units = {}
        target_rates = {}
        for i in range(self.used_building_types.size()):
            self.project_units[project_types[i]] = unit_names[i]
            if is_residential[i]:
                self.project_specific_units[project_types[i]] = specific_unit_names[i]
            else:
                self.project_specific_units[project_types[i]] = "%s%s" % (project_types[i], specific_unit_names[i])
            target_rates[building_type_ids[i]] = rates[i]
            
        self._compute_vacancy_and_total_units_variables(location_set, project_types, resources)
        self.pre_check( location_set, target_vacancy_this_year, project_types)
    
        projects = None
        for project_type_id, target_vacancy_rate in target_rates.iteritems():
            # determine current-year vacancy rates
            project_type = building_types.get_attribute_by_id('building_type_name', project_type_id)
            vacant_units_sum = location_set.get_attribute(self.variable_for_vacancy[project_type]).sum()
            units_sum = float( location_set.get_attribute(self.variable_for_total_units[project_type]).sum() )
            should_develop_units = int(round(max( 0, ( target_vacancy_rate * units_sum - vacant_units_sum ) /
                                         ( 1 - target_vacancy_rate ) )))
            logger.log_status(project_type + ": vacant units: %d, should be vacant: %f, sum units: %d"
                          % (vacant_units_sum, target_vacancy_rate * units_sum, units_sum))

            if not should_develop_units:
                logger.log_note(("Will not build any " + project_type + " units, because the current vacancy of %d units\n"
                             + "is more than the %d units desired for the vacancy rate of %f.")
                            % (vacant_units_sum,
                               target_vacancy_rate * units_sum,
                               target_vacancy_rate))
            #create projects
            if should_develop_units > 0:
                this_project = self._create_projects(should_develop_units, project_type, project_type_id, history_table,
                                                               location_set, units_sum, resources)
                if projects is None:
                    projects = this_project
                else:
                    projects.join_by_rows(this_project, change_ids_if_not_unique=True)
        return projects

    
    def _compute_vacancy_and_total_units_variables(self, location_set, project_types, resources=None):
        compute_resources = Resources(resources)
        compute_resources.merge({"debug":self.debug})
        self.variable_for_vacancy = {}
        self.variable_for_total_units = {}
        for ptype in project_types:
            self.variable_for_vacancy[ptype] = compute_resources.get(
                                    "%s_vacant_variable" % ptype,
                                    "urbansim_zone.%s.vacant_%s" % (location_set.get_dataset_name(),
                                                                     self.project_specific_units[ptype]))
            self.variable_for_total_units[ptype] = compute_resources.get(
                                    "%s_total_units_variable" % ptype,
                                    "%s.aggregate(urbansim_zone.building.total_%s)" % (location_set.get_dataset_name(), 
                                                             self.project_specific_units[ptype]))
            location_set.compute_variables([self.variable_for_vacancy[ptype], self.variable_for_total_units[ptype]], 
                                           dataset_pool=self.dataset_pool, resources = compute_resources)
            
    def _create_projects(self, should_develop_units, project_type, project_type_id, history_table, location_set, units_sum, resources=None):
        history_values = history_table.get_attribute(self.project_units[project_type])
        type_code_values = history_table.get_change_type_code_attribute(self.project_units[project_type])
        # take only non-zero history values and those that don't represent demolished buildings 
        history_values_without_zeros = history_values[logical_and( history_values > 0, 
                                                                  type_code_values !=  DevelopmentEventTypeOfChange.DELETE)]
        mean_size = history_values_without_zeros.mean()
        idx = array( [], dtype="int32" )
        # Ensure that there are some development projects to choose from.
        num_of_projects_to_select = max( 10, round_( should_develop_units / mean_size ) )
        while True:
            idx = concatenate( ( idx, randint( 0, history_values_without_zeros.size,
                                                num_of_projects_to_select ) ) )
            csum = history_values_without_zeros[idx].cumsum()
            idx1 = idx[csum <= should_develop_units]
            if idx1.size == 0: # at least one project should be selected
                idx = array([idx[0]], dtype="int32")
            else:
                idx = idx1
            if csum[-1] >= should_develop_units:
                break
        data = {"residential_units": zeros( ( idx.size, ), dtype=int32),
                "non_residential_sqft": zeros( ( idx.size, ), dtype=int32),
                'building_type_id': array(idx.size* [project_type_id]),
                "project_id": arange( idx.size ) + 1,
                "building_id": zeros( ( idx.size, ), dtype=int32)}
        data[self.project_units[project_type]]= history_values_without_zeros[idx]
        storage = StorageFactory().get_storage('dict_storage')

        development_projects_table_name = 'development_projects'
        storage.write_table(table_name=development_projects_table_name, table_data=data)

        return Dataset(
            in_storage = storage,
            in_table_name = development_projects_table_name,
            id_name='project_id'
            )
    def run(self, in_storage, out_storage=None, business_dsname="business", zone_dsname=None):
        dataset_pool = DatasetPool(storage=in_storage, package_order=['psrc_parcel', 'urbansim_parcel', 'urbansim', 'opus_core'] )
        seed(1)
        allbusinesses = dataset_pool.get_dataset(business_dsname)
        parcels = dataset_pool.get_dataset('parcel')
        buildings = dataset_pool.get_dataset('building')
        parcels.compute_variables(["urbansim_parcel.parcel.residential_units", "number_of_buildings = parcel.number_of_agents(building)", 
                                   "non_residential_sqft = (parcel.aggregate(building.non_residential_sqft)).astype(int32)",
                                   "number_of_res_buildings = parcel.aggregate(urbansim_parcel.building.is_residential)",
                                   "number_of_nonres_buildings = parcel.aggregate(urbansim_parcel.building.is_non_residential)",
                                   "number_of_mixed_use_buildings = parcel.aggregate(urbansim_parcel.building.is_generic_building_type_6)"
                                   ], 
                                  dataset_pool=dataset_pool)
        restypes = [12, 4, 19, 11, 34, 10, 33]
        reslutypes = [13,14,15,24]
        is_valid_business = ones(allbusinesses.size(), dtype='bool8')
        parcels_not_matched = logical_and(in1d(allbusinesses["parcel_id"], parcels.get_id_attribute(), invert=True), allbusinesses["parcel_id"] > 0)
        if(parcels_not_matched.sum() > 0):
            is_valid_business[where(parcels_not_matched)] = False
            logger.log_warning(message="No parcel exists for %s businesses (%s jobs)" % (parcels_not_matched.sum(), 
                                                                                         allbusinesses[self.number_of_jobs_attr][where(parcels_not_matched)].sum()))
        zero_parcel = allbusinesses["parcel_id"]<=0
        if zero_parcel.sum() > 0:
            is_valid_business[where(zero_parcel)] = False
            logger.log_warning(message="%s businesses (%s jobs) located on zero parcel_id" % (zero_parcel.sum(), 
                                                                                         allbusinesses[self.number_of_jobs_attr][where(zero_parcel)].sum()))            
            
        zero_size = logical_and(is_valid_business, allbusinesses[self.number_of_jobs_attr].round() == 0)
        if(sum(zero_size) > 0):
            is_valid_business[where(zero_size)] = False
            logger.log_warning(message="%s businesses are of size 0." % sum(zero_size))
        
        businesses = DatasetSubset(allbusinesses, index=where(is_valid_business)[0])
        
        parcels.add_attribute(name="number_of_workplaces", data=parcels.sum_dataset_over_ids(businesses, constant=1))
        
        has_single_res_buildings = logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_res_buildings"] == 1) # 1 (1 residential)
        parcels.add_attribute(data=has_single_res_buildings.astype("int32"), name="buildings_code")
        has_mult_res_buildings = logical_and(parcels["number_of_buildings"] > 1,  parcels["number_of_nonres_buildings"] == 0) # 2 (mult residential)
        parcels.modify_attribute("buildings_code", data=2*ones(has_mult_res_buildings.sum()), index=where(has_mult_res_buildings)) 
        has_single_nonres_buildings = logical_and(logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_nonres_buildings"] == 1), parcels["number_of_mixed_use_buildings"] == 0) # 3 (1 non-res)
        parcels.modify_attribute("buildings_code", data=3*ones(has_single_nonres_buildings.sum()), index=where(has_single_nonres_buildings)) 
        has_mult_nonres_buildings = logical_and(logical_and(parcels["number_of_buildings"] > 1, parcels["number_of_res_buildings"] == 0), parcels["number_of_mixed_use_buildings"] == 0) # 4 (mult non-res)
        parcels.modify_attribute("buildings_code", data=4*ones(has_mult_nonres_buildings.sum()), index=where(has_mult_nonres_buildings))
        has_single_mixed_buildings = logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_mixed_use_buildings"] == 1) # 5 (1 mixed-use)
        parcels.modify_attribute("buildings_code", data=5*ones(has_single_mixed_buildings.sum()), index=where(has_single_mixed_buildings))
        has_mult_mixed_buildings = logical_and(parcels["number_of_buildings"] > 1, 
                                               logical_or(logical_and(parcels["number_of_res_buildings"] > 0, parcels["number_of_nonres_buildings"] > 0), 
                                                          logical_or(parcels["number_of_mixed_use_buildings"] > 1, 
                                                                     logical_and(parcels["number_of_res_buildings"] == 0, 
                                                                                 parcels["number_of_mixed_use_buildings"] > 0)))) # 6
        parcels.modify_attribute("buildings_code", data=6*ones(has_mult_mixed_buildings.sum()), index=where(has_mult_mixed_buildings))
        has_no_building_res_lutype = logical_and(parcels["number_of_buildings"] == 0, in1d(parcels["land_use_type_id"], reslutypes)) # 7 (vacant with res LU type)
        parcels.modify_attribute("buildings_code", data=7*ones(has_no_building_res_lutype.sum()), index=where(has_no_building_res_lutype)) 
        has_no_building_nonres_lutype = logical_and(parcels["number_of_buildings"] == 0, in1d(parcels["land_use_type_id"], reslutypes)==0) # 8 (vacant with non-res LU type)
        parcels.modify_attribute("buildings_code", data=8*ones(has_no_building_nonres_lutype.sum()), index=where(has_no_building_nonres_lutype))
        
        business_sizes = businesses[self.number_of_jobs_attr].round().astype("int32") 
        business_location = {}
        business_location1wrkpl = zeros(businesses.size(), dtype="int32")
        business_location1wrkplres = zeros(businesses.size(), dtype="int32")
        business_ids = businesses.get_id_attribute()
        # sample one building for cases when sampling is required.
        for ibusid in range(businesses.size()):
            idx = where(buildings['parcel_id'] == businesses['parcel_id'][ibusid])[0]
            bldgids = buildings['building_id'][idx]
            business_location[business_ids[ibusid]] = bldgids
            if bldgids.size == 1:
                business_location1wrkpl[ibusid] = bldgids[0]
            elif bldgids.size > 1:
                business_location1wrkpl[ibusid] = bldgids[sample_noreplace(arange(bldgids.size), 1)]
                if buildings['residential_units'][idx].sum() > 0:
                    # Residential buildings are sampled with probabilities proportional to residential units
                    business_location1wrkplres[ibusid] = bldgids[probsample_noreplace(arange(bldgids.size), 1, prob_array=buildings['residential_units'][idx])]
                else:
                    business_location1wrkplres[ibusid] = business_location1wrkpl[ibusid]
        
        home_based = zeros(business_sizes.sum(), dtype="bool8")
        job_building_id = zeros(business_sizes.sum(), dtype="int32")
        job_array_labels = business_ids.repeat(business_sizes)
        job_assignment_case = zeros(business_sizes.sum(), dtype="int32")
        processed_bindicator = zeros(businesses.size(), dtype="bool8")
        business_codes = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"])
        business_nworkplaces = parcels.get_attribute_by_id("number_of_workplaces", businesses["parcel_id"])
        logger.log_status("Total number of jobs: %s" % home_based.size)
        
        # 1. 1-2 worker business in 1 residential building
        idx_sngl_wrk_1bld_fit = where(logical_and(business_sizes < 3, business_codes == 1))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_1bld_fit])
        home_based[jidx] = True
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_1bld_fit].repeat(business_sizes[idx_sngl_wrk_1bld_fit])
        job_assignment_case[jidx] = 1
        processed_bindicator[idx_sngl_wrk_1bld_fit] = True
        logger.log_status("1. %s jobs (%s businesses) set as home-based due to 1-2 worker x 1 residential building fit." % (
            business_sizes[idx_sngl_wrk_1bld_fit].sum(), idx_sngl_wrk_1bld_fit.size))
        
        # 2. 1-2 worker business in multiple residential buildings
        idx_sngl_wrk_multbld_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 2))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_multbld_fit])
        home_based[jidx] = True
        job_building_id[jidx] = business_location1wrkplres[idx_sngl_wrk_multbld_fit].repeat(business_sizes[idx_sngl_wrk_multbld_fit])
        job_assignment_case[jidx] = 2
        processed_bindicator[idx_sngl_wrk_multbld_fit] = True
        logger.log_status("2. %s jobs (%s businesses) set as home-based due to 1-2 worker x multiple residential buildings fit." % (
            business_sizes[idx_sngl_wrk_multbld_fit].sum(), idx_sngl_wrk_multbld_fit.size))
               
        # 3. 1-2 worker in single non-res building (not mixed-use)
        idx_sngl_wrk_single_nonres_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 3))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_single_nonres_fit])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_single_nonres_fit].repeat(business_sizes[idx_sngl_wrk_single_nonres_fit])
        job_assignment_case[jidx] = 3
        processed_bindicator[idx_sngl_wrk_single_nonres_fit] = True
        logger.log_status("3. %s jobs (%s businesses) placed due to 1-2 worker x single non-res building fit." % (
                          business_sizes[idx_sngl_wrk_single_nonres_fit].sum(), idx_sngl_wrk_single_nonres_fit.size))     
        
        # 4. 1-2 worker in multiple non-res building (not mixed-use)
        idx_sngl_wrk_mult_nonres_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 4))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_mult_nonres_fit])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_mult_nonres_fit].repeat(business_sizes[idx_sngl_wrk_mult_nonres_fit])
        job_assignment_case[jidx] = 4
        processed_bindicator[idx_sngl_wrk_mult_nonres_fit] = True
        logger.log_status("4. %s jobs (%s businesses) placed due to 1-2 worker x multiple non-res building fit." % (
            business_sizes[idx_sngl_wrk_mult_nonres_fit].sum(), idx_sngl_wrk_mult_nonres_fit.size))      
                
        # 5. 1-2 worker in single mixed-use building
        idx_sngl_wrk_smu_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 5))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_smu_fit])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_smu_fit].repeat(business_sizes[idx_sngl_wrk_smu_fit])
        job_assignment_case[jidx] = 5
        processed_bindicator[idx_sngl_wrk_smu_fit] = True
        logger.log_status("5. %s jobs (%s businesses) in 1-2 worker x single mixed-use building." % (
            business_sizes[idx_sngl_wrk_smu_fit].sum(), idx_sngl_wrk_smu_fit.size))       
        
        # 6. 1-2 worker in multiple mixed-type buildings
        idx_sngl_wrk_mmu_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 6))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_mmu_fit])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_mmu_fit].repeat(business_sizes[idx_sngl_wrk_mmu_fit])
        bldtype = buildings.get_attribute_by_id("building_type_id", business_location1wrkpl[idx_sngl_wrk_mmu_fit])
        is_bldtype_res = in1d(bldtype, restypes)
        home_based[in1d(job_array_labels, business_ids[idx_sngl_wrk_mmu_fit][where(is_bldtype_res)])] = True
        job_assignment_case[jidx] = 6
        processed_bindicator[idx_sngl_wrk_mmu_fit] = True
        logger.log_status("6. %s jobs (%s businesses) in 1-2 worker x multiple mixed-type buildings. %s jobs classified as home-based." % (
            business_sizes[idx_sngl_wrk_mmu_fit].sum(), idx_sngl_wrk_mmu_fit.size, business_sizes[idx_sngl_wrk_mmu_fit][where(is_bldtype_res)].sum()))            

        # 7. 1-2 worker business in residential parcel with no building
        idx_sngl_wrk_vacant_res = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 7))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_vacant_res])
        job_assignment_case[jidx] = 7
        home_based[jidx] = True
        processed_bindicator[idx_sngl_wrk_vacant_res] = True
        logger.log_status("7. %s jobs (%s businesses of size 1-2) could not be placed due to non-existing buildings in parcels with residential LU type." % (
            business_sizes[idx_sngl_wrk_vacant_res].sum(), idx_sngl_wrk_vacant_res.size))        

        # 8. 3+ workers of governmental workplaces in 1+ residential building
        ind_bussiness_case8 = logical_and(logical_and(processed_bindicator==0, logical_and(business_sizes > 2, in1d(businesses['sector_id'], [18,19]))), in1d(business_codes, [1,2]))
        idx_wrk_fit = where(ind_bussiness_case8)[0]
        jidx = in1d(job_array_labels, business_ids[idx_wrk_fit])
        job_assignment_case[jidx] = 8
        processed_bindicator[idx_wrk_fit] = True
        logger.log_status("8. %s governmental jobs (%s businesses of size 3+) could not be placed due to residing in residential buildings only." % (
                    business_sizes[idx_wrk_fit].sum(), idx_wrk_fit.size))
        
        # 9. 3-30 workers in single residential building. Make two of them home based.
        idx_sngl_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, logical_and(business_sizes > 2, business_sizes <= 30)), business_codes == 1))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_fit])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_fit].repeat(business_sizes[idx_sngl_wrk_fit])
        bsizeminus2 = vstack((2*ones(idx_sngl_wrk_fit.size), business_sizes[idx_sngl_wrk_fit]-2)).ravel("F").astype("int32") # interweaving 2 and remaining business size
        hbidx = tile(array([True, False]), bsizeminus2.size/2).repeat(bsizeminus2) # set the first two jobs of every business to True, others to False
        home_based[(where(jidx)[0])[hbidx]] = True
        job_assignment_case[jidx] = 9
        processed_bindicator[idx_sngl_wrk_fit] = True        
        logger.log_status("9. %s jobs (%s businesses) in 3-30 worker x single residential building. %s jobs assigned as home-based." % (
            business_sizes[idx_sngl_wrk_fit].sum(), idx_sngl_wrk_fit.size, hbidx.sum()))      
        
        # 10. 3-30 workers in multiple residential buildings. Make two of them home based.
        idx_sngl_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, logical_and(business_sizes > 2, business_sizes <= 30)), business_codes == 2))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_fit])
        job_assignment_case[jidx] = 10
        processed_bindicator[idx_sngl_wrk_fit] = True
        # sample buildings to businesses by parcels 
        bpcls = unique(businesses["parcel_id"][idx_sngl_wrk_fit])
        for ipcl in range(bpcls.size):
            bidx = where(buildings['parcel_id'] == bpcls[ipcl])[0]
            bldgids = buildings['building_id'][bidx]
            bussids = intersect1d(business_ids[businesses["parcel_id"] == bpcls[ipcl]], business_ids[idx_sngl_wrk_fit])
            # multiply by units for sampling prop. to units rather than buildings
            bldgids = bldgids.repeat(maximum(1, buildings['residential_units'][bidx].astype('int32'))) 
            if bldgids.size < bussids.size:
                bldarray = bldgids.repeat(1+ceil((bussids.size - bldgids.size)/float(bldgids.size)) )
            else:
                bldarray = bldgids
            shuffle(bldarray) # randomly reorder in-place
            for ib in range(bussids.size):
                jidx = where(job_array_labels == bussids[ib])[0]
                job_building_id[jidx] = bldarray[ib]
                home_based[jidx[0:2]] = True
        logger.log_status("10. %s jobs (%s businesses) in 3-30 worker x multiple residential building. %s jobs assigned as home-based." % (
            business_sizes[idx_sngl_wrk_fit].sum(), idx_sngl_wrk_fit.size, idx_sngl_wrk_fit.size*2))        


        # 11. single workplace, 3+ workers in single non-res or mixed-use building (11.)
        idx_sngl_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2), 
                                                            logical_or(business_codes==3, business_codes==5)),
                                                business_nworkplaces==1))[0]
        which_labels = where(in1d(job_array_labels, business_ids[idx_sngl_wrkplace_2plus_workers]))[0]
        job_building_id[which_labels] = business_location1wrkpl[idx_sngl_wrkplace_2plus_workers].repeat(business_sizes[idx_sngl_wrkplace_2plus_workers])   
        job_assignment_case[which_labels] = 11
        processed_bindicator[idx_sngl_wrkplace_2plus_workers] = True
        logger.log_status("11. %s jobs (%s businesses) could be placed due to single workplace x 3+ workers x single non-res/mixed-use building fit." % (
            business_sizes[idx_sngl_wrkplace_2plus_workers].sum(), idx_sngl_wrkplace_2plus_workers.size))
        
        # 12. single workplace, 3+ workers in multiple mixed-type building
        idx_sngl_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2),
                                                                        logical_or(business_codes==4, business_codes==6)),
                                                            business_nworkplaces==1))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrkplace_2plus_workers])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrkplace_2plus_workers].repeat(business_sizes[idx_sngl_wrkplace_2plus_workers])    
        job_assignment_case[jidx] = 12
        processed_bindicator[idx_sngl_wrkplace_2plus_workers] = True
        logger.log_status("12. %s jobs (%s businesses) could be placed due to single workplace x 3+ workers x multiple non-res/mixed building fit." % (
            business_sizes[idx_sngl_wrkplace_2plus_workers].sum(), idx_sngl_wrkplace_2plus_workers.size))

        # 13. multiple workplaces, 3+ workers in single non-res or mixed building
        idx_mult_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2),
                                                                        logical_or(business_codes==3, business_codes==5)),
                                                            business_nworkplaces > 1))[0]
        jidx = in1d(job_array_labels, business_ids[idx_mult_wrkplace_2plus_workers])
        job_building_id[jidx] = business_location1wrkpl[idx_mult_wrkplace_2plus_workers].repeat(business_sizes[idx_mult_wrkplace_2plus_workers])
        job_assignment_case[jidx] = 13
        processed_bindicator[idx_mult_wrkplace_2plus_workers] = True
        logger.log_status("13. %s jobs (%s businesses) could be placed due to multiple workplaces x 3+ workers x single non-res/mixed building fit." % (
            business_sizes[idx_mult_wrkplace_2plus_workers].sum(), idx_mult_wrkplace_2plus_workers.size))
        
        # 14. multiple workplaces, 3+ workers in multiple non-res or mixed building
        idx_mult_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2),
                                                                        logical_or(business_codes==4, business_codes==6)),
                                                            business_nworkplaces > 1))[0]
        processed_bindicator[idx_mult_wrkplace_2plus_workers] = True
        # sample buildings to businesses by parcels 
        bpcls = unique(businesses["parcel_id"][idx_mult_wrkplace_2plus_workers])
        #hbasedsum = home_based.sum()
        for ipcl in range(bpcls.size):
            bldgids = buildings['building_id'][buildings['parcel_id'] == bpcls[ipcl]]
            bussids = intersect1d(business_ids[businesses["parcel_id"] == bpcls[ipcl]], business_ids[idx_mult_wrkplace_2plus_workers])
            if bldgids.size < bussids.size:
                bldarray = bldgids.repeat(1+ceil((bussids.size - bldgids.size)/float(bldgids.size)))
            else:
                bldarray = bldgids
            shuffle(bldarray) # randomly reorder in-place
            is_res = in1d(bldarray, restypes)
            for ib in range(bussids.size):
                jidx = where(job_array_labels == bussids[ib])
                job_building_id[jidx] = bldarray[ib]
                #home_based[jidx] = is_res
                job_assignment_case[jidx] = 14
        logger.log_status("14. %s jobs (%s businesses) could be placed due to multiple workplaces x 3+ workers x multiple non-res/mixed building fit." % (
            business_sizes[idx_mult_wrkplace_2plus_workers].sum(), idx_mult_wrkplace_2plus_workers.size))
        
        
        # 15. 3+ workers in residential parcel with no building
        idx_wrk_vacant_res = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 2), business_codes == 7))[0]
        jidx = in1d(job_array_labels, business_ids[idx_wrk_vacant_res])
        job_assignment_case[jidx] = 15
        processed_bindicator[idx_wrk_vacant_res] = True
        logger.log_status("15. %s jobs (%s businesses of 3+ workers) could not be placed due to non-existing buildings in parcels with residential LU type." % (
            business_sizes[idx_wrk_vacant_res].sum(), idx_wrk_vacant_res.size))
        
        # 16. nonresidential parcel with no building
        idx_wrk_vacant_nonres = where(logical_and(processed_bindicator==0, business_codes == 8))[0]
        jidx = in1d(job_array_labels, business_ids[idx_wrk_vacant_nonres])
        job_assignment_case[jidx] = 16
        processed_bindicator[idx_wrk_vacant_nonres] = True
        logger.log_status("16. %s jobs (%s businesses) could not be placed due to non-existing buildings in parcels with non-esidential LU type." % (
            business_sizes[idx_wrk_vacant_nonres].sum(), idx_wrk_vacant_nonres.size))        
        
        # 17. 31+ workers in single residential building. Do not place - will go into ELCM.
        idx_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 30), business_codes == 1))[0]
        jidx = in1d(job_array_labels, business_ids[idx_wrk_fit])
        job_assignment_case[jidx] = 17
        processed_bindicator[idx_wrk_fit] = True        
        logger.log_status("17. %s jobs (%s businesses) in 31+ workers x single residential building." % (
            business_sizes[idx_wrk_fit].sum(), idx_wrk_fit.size))         
    
        # 18. 31+ workers in multiple residential buildings.
        idx_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 30), business_codes == 2))[0]
        jidx = in1d(job_array_labels, business_ids[idx_wrk_fit])
        job_assignment_case[jidx] = 18
        processed_bindicator[idx_wrk_fit] = True
        logger.log_status("18. %s jobs (%s businesses) in 31+ workers x multiple residential building." % (
            business_sizes[idx_wrk_fit].sum(), idx_wrk_fit.size))                

        # jobs in messy buildings
        idx_messy_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 0), business_codes == 0))[0]
        processed_bindicator[idx_messy_fit] = True
        logger.log_status("%s jobs (%s businesses) could not be placed due to messy buildings." % (
            business_sizes[idx_messy_fit].sum(), idx_messy_fit.size))         
         
        # build new buildings for jobs in cases 7, 8, 15 and 16
        jidx_no_bld = where(in1d(job_assignment_case, [7,8,15,16]))[0]
        bus = unique(job_array_labels[jidx_no_bld])
        bsidx = businesses.get_id_index(bus)
        # first create buildings for single workplaces per parcel
        single_workplace_idx = where(business_nworkplaces[bsidx] == 1)[0]
        newbld_parcel_id = businesses['parcel_id'][bsidx][single_workplace_idx]
        newbld_bt = sector2building_type(businesses['sector_id'][bsidx][single_workplace_idx])
        newbids = arange(buildings.get_id_attribute().max()+1, buildings.get_id_attribute().max()+single_workplace_idx.size+1)
        bbldid = zeros(bsidx.size, dtype='int32')
        bbldid[single_workplace_idx] = newbids
        # for parcels with multiple workplaces select the largest business to determine its building type
        mult_bsidx = bsidx[where(business_nworkplaces[bsidx] > 1)[0]]
        empty_parcels = businesses['parcel_id'][mult_bsidx]
        uempty_parcels = unique(empty_parcels)
        bsize_on_empty_pcl = ndmax(business_sizes[mult_bsidx], labels=empty_parcels, index=uempty_parcels)
        newbld2_sec = zeros(uempty_parcels.size, dtype='int32')
        newbids2 = arange(newbids.max()+1, newbids.max()+uempty_parcels.size+1)
        for ipcl in range(uempty_parcels.size):
            newbld2_sec[ipcl] = businesses['sector_id'][mult_bsidx][logical_and(businesses['parcel_id'][mult_bsidx] == uempty_parcels[ipcl], 
                                                                                business_sizes[mult_bsidx]==bsize_on_empty_pcl[ipcl])][0]
            this_bidx = where(businesses['parcel_id'][bsidx] == uempty_parcels[ipcl])
            bbldid[this_bidx] = newbids2[ipcl]
            
        newbld_parcel_id = concatenate((newbld_parcel_id, uempty_parcels))
        newbld_bt = concatenate((newbld_bt, sector2building_type(newbld2_sec)))    
        
        newbldgs = {'building_id': concatenate((newbids, newbids2)),
                    'parcel_id': newbld_parcel_id,
                    'building_type_id': newbld_bt,
                    }
        buildings.add_elements(newbldgs, require_all_attributes=False)
        jidx = where(in1d(job_array_labels, business_ids[bsidx]))[0]
        job_building_id[jidx] = bbldid.repeat(business_sizes[bsidx])
        logger.log_status("Build %s new buildings to accommodate %s jobs (out of which %s are governmental) from cases 7, 15, 16." % (
            newbld_parcel_id.size, jidx.size, business_sizes[bsidx][where(in1d(businesses['sector_id'][bsidx], [18,19]))].sum()))
        
        
        logger.log_status("Assigned %s (%s percent) home-based jobs." % (home_based.sum(), round(home_based.sum()/(home_based.size/100.),2)))
        logger.log_status("Finished %s percent (%s) jobs (%s businesses) processed. %s jobs (%s businesses) remain to be processed." % \
                          (round(business_sizes[processed_bindicator].sum()/(home_based.size/100.),2),
                           business_sizes[processed_bindicator].sum(), processed_bindicator.sum(),
                          business_sizes[logical_not(processed_bindicator)].sum(), business_sizes[logical_not(processed_bindicator)].size))
        
        logger.start_block("Storing jobs data.")
        # create job dataset
        job_data = {"job_id": (arange(job_building_id.size)+1).astype("int32"),
                    "home_based_status" : home_based,
                    "building_id": job_building_id,
                    "business_id": job_array_labels.astype("int32"),
                    "sector_id": businesses['sector_id'].repeat(business_sizes).astype("int32"), 
                    "parcel_id": businesses['parcel_id'].repeat(business_sizes).astype("int32"), 
                    "assignment_case": job_assignment_case}

        # join with zones
        if zone_dsname is not None:
            zones = dataset_pool.get_dataset(zone_dsname)
            idname = zones.get_id_name()[0]
            #jpcls = buildings.get_attribute_by_id('parcel_id', job_building_id)
            job_data[idname] = parcels.get_attribute_by_id(idname, job_data["parcel_id"])
            
            
        dictstorage = StorageFactory().get_storage('dict_storage')
        dictstorage.write_table(table_name="jobs", table_data=job_data)
        jobs = Dataset(in_storage=dictstorage, in_table_name="jobs", dataset_name="job", id_name="job_id")
        if out_storage is not None:
            jobs.write_dataset(out_storage=out_storage, out_table_name="jobs")
            buildings.write_dataset(out_storage=out_storage, attributes=AttributeType.PRIMARY)
        logger.end_block()        
        return jobs
class EmploymentTransitionModel(Model):
    """Creates and removes jobs from job_set."""

    model_name = "Employment Transition Model"
    location_id_name_default = "grid_id"
    variable_package_default = "urbansim"

    def __init__(self,
                 location_id_name=None,
                 variable_package=None,
                 dataset_pool=None,
                 debuglevel=0):
        self.debug = DebugPrinter(debuglevel)
        self.location_id_name = self.location_id_name_default
        self.variable_package = self.variable_package_default
        if location_id_name is not None:
            self.location_id_name = location_id_name
        if variable_package is not None:
            self.variable_package = variable_package
        self.dataset_pool = self.create_dataset_pool(dataset_pool,
                                                     ["urbansim", "opus_core"])

    def run(self,
            year,
            job_set,
            control_totals,
            job_building_types,
            data_objects=None,
            resources=None):
        self._do_initialize_for_run(job_set, job_building_types, data_objects)
        idx = where(control_totals.get_attribute("year") == year)[0]
        self.control_totals_for_this_year = DatasetSubset(control_totals, idx)
        self._do_run_for_this_year(job_set)
        return self._update_job_set(job_set)

    def _do_initialize_for_run(self,
                               job_set,
                               job_building_types,
                               data_objects=None):
        self.max_id = job_set.get_id_attribute().max()
        self.job_size = job_set.size()
        self.job_id_name = job_set.get_id_name()[0]
        self.new_jobs = {
            self.location_id_name:
            array([],
                  dtype=job_set.get_data_type(self.location_id_name, int32)),
            "sector_id":
            array([], dtype=job_set.get_data_type("sector_id", int32)),
            self.job_id_name:
            array([], dtype=job_set.get_data_type(self.job_id_name, int32)),
            "building_type":
            array([], dtype=job_set.get_data_type("building_type", int8))
        }
        self.remove_jobs = array([], dtype=int32)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.dataset_pool.add_datasets_if_not_included(
            {job_building_types.get_dataset_name(): job_building_types})
        self.available_building_types = job_building_types.get_id_attribute()

    def _compute_sector_variables(self, sectors, job_set):
        compute_resources = Resources({"debug": self.debug})
        job_set.compute_variables(map(
            lambda x: "%s.%s.is_in_employment_sector_%s_home_based" %
            (self.variable_package, job_set.get_dataset_name(), x),
            sectors) + map(
                lambda x: "%s.%s.is_in_employment_sector_%s_non_home_based" %
                (self.variable_package, job_set.get_dataset_name(), x),
                sectors) + ["is_non_home_based_job", "is_home_based_job"],
                                  dataset_pool=self.dataset_pool,
                                  resources=compute_resources)

    def _do_run_for_this_year(self, job_set):
        building_type = job_set.get_attribute("building_type")
        sectors = unique(
            self.control_totals_for_this_year.get_attribute("sector_id"))
        self._compute_sector_variables(sectors, job_set)
        for sector in sectors:
            isector = where(
                self.control_totals_for_this_year.get_attribute("sector_id") ==
                sector)[0]
            total_hb_jobs = self.control_totals_for_this_year.get_attribute(
                "total_home_based_employment")[isector]
            total_nhb_jobs = self.control_totals_for_this_year.get_attribute(
                "total_non_home_based_employment")[isector]
            is_in_sector_hb = job_set.get_attribute(
                "is_in_employment_sector_%s_home_based" % sector)
            is_in_sector_nhb = job_set.get_attribute(
                "is_in_employment_sector_%s_non_home_based" % sector)
            diff_hb = int(total_hb_jobs - is_in_sector_hb.astype(int8).sum())
            diff_nhb = int(total_nhb_jobs -
                           is_in_sector_nhb.astype(int8).sum())
            if diff_hb < 0:  # home based jobs to be removed
                w = where(is_in_sector_hb == 1)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(job_set, w, -1*diff_hb,
                                                         self.location_id_name)
                self.remove_jobs = concatenate(
                    (self.remove_jobs, non_placed,
                     sample_noreplace(sample_array,
                                      max(0,
                                          abs(diff_hb) - size_non_placed))))
            if diff_nhb < 0:  # non home based jobs to be removed
                w = where(is_in_sector_nhb == 1)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(job_set, w, -1*diff_nhb,
                                                         self.location_id_name)
                self.remove_jobs = concatenate(
                    (self.remove_jobs, non_placed,
                     sample_noreplace(sample_array,
                                      max(0,
                                          abs(diff_nhb) - size_non_placed))))

            if diff_hb > 0:  # home based jobs to be created
                self.new_jobs[self.location_id_name] = concatenate(
                    (self.new_jobs[self.location_id_name],
                     zeros(
                         (diff_hb, ),
                         dtype=self.new_jobs[self.location_id_name].dtype.type)
                     ))
                self.new_jobs["sector_id"] = concatenate(
                    (self.new_jobs["sector_id"], (resize(
                        array([sector],
                              dtype=self.new_jobs["sector_id"].dtype.type),
                        diff_hb))))
                if 1 in is_in_sector_hb:
                    building_type_distribution = array(
                        ndimage_sum(is_in_sector_hb,
                                    labels=building_type,
                                    index=self.available_building_types))
                elif 1 in job_set.get_attribute(
                        "is_home_based_job"
                ):  # take the building type distribution from the whole region
                    building_type_distribution = array(
                        ndimage_sum(job_set.get_attribute("is_home_based_job"),
                                    labels=building_type,
                                    index=self.available_building_types))
                else:  # there are no home-based jobs in the region, take uniform distribution
                    building_type_distribution = ones(
                        self.available_building_types.size)
                    building_type_distribution = building_type_distribution / building_type_distribution.sum(
                    )
                sampled_building_types = probsample_replace(
                    self.available_building_types, diff_hb,
                    building_type_distribution /
                    float(building_type_distribution.sum()))
                self.new_jobs["building_type"] = concatenate(
                    (self.new_jobs["building_type"],
                     sampled_building_types.astype(
                         self.new_jobs["building_type"].dtype.type)))
                new_max_id = self.max_id + diff_hb
                self.new_jobs[self.job_id_name] = concatenate(
                    (self.new_jobs[self.job_id_name],
                     arange(self.max_id + 1, new_max_id + 1)))
                self.max_id = new_max_id

            if diff_nhb > 0:  # non home based jobs to be created
                self.new_jobs[self.location_id_name] = concatenate(
                    (self.new_jobs[self.location_id_name],
                     zeros(
                         (diff_nhb, ),
                         dtype=self.new_jobs[self.location_id_name].dtype.type)
                     ))
                self.new_jobs["sector_id"] = concatenate(
                    (self.new_jobs["sector_id"], (resize(
                        array([sector],
                              dtype=self.new_jobs["sector_id"].dtype.type),
                        diff_nhb))))
                if 1 in is_in_sector_nhb:
                    building_type_distribution = array(
                        ndimage_sum(is_in_sector_nhb,
                                    labels=building_type,
                                    index=self.available_building_types))
                elif 1 in job_set.get_attribute(
                        "is_non_home_based_job"
                ):  # take the building type distribution from the whole region
                    building_type_distribution = array(
                        ndimage_sum(
                            job_set.get_attribute("is_non_home_based_job"),
                            labels=building_type,
                            index=self.available_building_types))
                else:  # there are no non-home-based jobs in the region, take uniform distribution
                    building_type_distribution = ones(
                        self.available_building_types.size)
                    building_type_distribution = building_type_distribution / building_type_distribution.sum(
                    )
                sampled_building_types = probsample_replace(
                    self.available_building_types, diff_nhb,
                    building_type_distribution /
                    float(building_type_distribution.sum()))
                self.new_jobs["building_type"] = concatenate(
                    (self.new_jobs["building_type"],
                     sampled_building_types.astype(
                         self.new_jobs["building_type"].dtype.type)))
                new_max_id = self.max_id + diff_nhb
                self.new_jobs[self.job_id_name] = concatenate(
                    (self.new_jobs[self.job_id_name],
                     arange(self.max_id + 1, new_max_id + 1)))
                self.max_id = new_max_id

    def _update_job_set(self, job_set):
        job_set.remove_elements(self.remove_jobs)
        job_set.add_elements(self.new_jobs, require_all_attributes=False)
        difference = job_set.size() - self.job_size
        self.debug.print_debug(
            "Difference in number of jobs: %s (original %s,"
            " new %s, created %s, deleted %s)" %
            (difference, self.job_size, job_set.size(),
             self.new_jobs[self.job_id_name].size, self.remove_jobs.size), 3)
        self.debug.print_debug(
            "Number of unplaced jobs: %s" %
            where(job_set.get_attribute(self.location_id_name) <= 0)[0].size,
            3)
        return difference

    def prepare_for_run(self, storage, **kwargs):
        from urbansim.datasets.control_total_dataset import ControlTotalDataset
        control_totals = ControlTotalDataset(in_storage=storage,
                                             what="employment")
        sample_control_totals(storage, control_totals, **kwargs)
        return control_totals