def _assign_job_to_worker(self, worker_index, job_index):
     logger.log_status("Attempt to assign %s jobs to %s workers" % (job_index.size, worker_index.size))
     if worker_index.size >= job_index.size: 
        #number of at home workers is greater than the available choice (home_based jobs by default)
         assigned_worker_index = sample_noreplace(worker_index, job_index.size)
         assigned_job_index = job_index
     else:
         assigned_worker_index = worker_index
         assigned_job_index=sample_noreplace(job_index,worker_index.size)
     logger.log_status("Assigned %s jobs to %s workers" % (assigned_job_index.size, assigned_worker_index.size))
     
     return (assigned_worker_index, assigned_job_index)
 def _delete(self, agents_pool, amount, 
               agent_dataset, location_dataset, 
               this_refinement,
               dataset_pool ):
     """similar to subtract action, instead of unplacing agents delete remove agents from the agent dataset,
     those agents won't be available for later action
     """
     
     fit_index = self.get_fit_agents_index(agent_dataset, 
                                           this_refinement.agent_filter, 
                                           '',
                                           dataset_pool)
     
     if amount > fit_index.size or amount < 0:
         logger.log_warning("Request to delete %i agents,  but there are %i agents in total satisfying %s;" \
                            "delete %i agents instead" % (amount, fit_index.size, 
                                                            this_refinement.agent_filter,
                                                            fit_index.size) )
         amount = fit_index.size
     
     if amount == fit_index.size:
         movers_index = fit_index
     else:
         movers_index = sample_noreplace( fit_index, amount )
         
     agents_pool = list( set(agents_pool) - set(movers_index) )
         
     agent_dataset.remove_elements( array(movers_index) )
Example #3
0
    def sample_and_unplace_agents(self, spaces_with_negatives, dataset, unique_redevelopment_building_ids_index, index_overfilled_spaces):
        """
        Using overfilled spaces, agents, and buildings, randomly sample and unplace agents until the spaces are 
        no longer overfilled.
            - spaces_with_negatives is an array of vacant residential or job spaces that includes negative values (overfilled)
            - dataset is the dataset of agents overfilling the spaces specified in spaces_with_negatives
        """

        # set up table for logging unplaced building occupants
        occupants_log = PrettyTable()
        occupants_log.set_field_names(["building_id","HH unplaced","Jobs unplaced","HB Jobs unplaced"])
        number_of_overfilled_spaces = abs(spaces_with_negatives[index_overfilled_spaces].astype('int'))
        overfilled_spaces_building_ids = self.buildings_dataset.get_id_attribute()[unique_redevelopment_building_ids_index]
        overfilled_spaces_building_ids = overfilled_spaces_building_ids[index_overfilled_spaces]
        look_for_home_based_jobs = False
        dataset_name = dataset.get_dataset_name()
        if 'household' in dataset_name:
            look_for_home_based_jobs = True
        for building_id, number_of_agents_to_unplace in zip(overfilled_spaces_building_ids, number_of_overfilled_spaces):
            occupants_log_line = []
            occupants_log_line.append(building_id)
            index_of_agents_to_sample_from = dataset.get_filtered_index('%s.building_id==%s' % (dataset_name,building_id))
            sample_of_agents_to_unplace = sample_noreplace(index_of_agents_to_sample_from, number_of_agents_to_unplace)
            dataset.set_values_of_one_attribute('building_id', array([-1]), sample_of_agents_to_unplace)
            if look_for_home_based_jobs:
                occupants_log_line.append(number_of_agents_to_unplace)
                occupants_log_line.append(0)
                number_of_home_based_jobs_to_unplace = self.unplace_home_based_jobs(building_id)
                occupants_log_line.append(number_of_home_based_jobs_to_unplace)
            else:
                occupants_log_line.append(0)
                occupants_log_line.append(number_of_agents_to_unplace)
                occupants_log_line.append(0)
            occupants_log.add_row(occupants_log_line)
        return occupants_log
Example #4
0
    def _do_sector_for_businesses(self, sector, diff, business_set,
                                  is_in_sector):
        available_business_index = where(is_in_sector)[0]
        if diff < 0:  #
            sample_array, non_placed, size_non_placed = \
                get_array_without_non_placed_agents(business_set, available_business_index, -1*diff,
                                                     self.location_id_name)
            self.remove_businesses = concatenate(
                (self.remove_businesses, non_placed,
                 sample_noreplace(sample_array,
                                  max(0,
                                      abs(diff) - size_non_placed))))

        if diff > 0:  #
            self.new_businesses[self.location_id_name] = concatenate(
                (self.new_businesses[self.location_id_name], zeros((diff, ))))
            self.new_businesses["sector_id"] = concatenate(
                (self.new_businesses["sector_id"], sector * ones((diff, ))))
            sampled_business = probsample_replace(available_business_index,
                                                  diff, None)
            self.new_businesses["sqft"] = concatenate(
                (self.new_businesses["sqft"],
                 business_set.get_attribute("sqft")[sampled_business]))
            self.new_businesses["employment"] = concatenate(
                (self.new_businesses["employment"],
                 business_set.get_attribute("employment")[sampled_business]))
            self.new_businesses["activity_id"] = concatenate(
                (self.new_businesses["activity_id"],
                 business_set.get_attribute("activity_id")[sampled_business]))

            new_max_id = self.max_id + diff
            self.new_businesses[self.business_id_name] = concatenate(
                (self.new_businesses[self.business_id_name],
                 arange(self.max_id + 1, new_max_id + 1)))
            self.max_id = new_max_id
 def run(self,
         specification,
         coefficients,
         agent_set,
         agents_index=None,
         **kwargs):
     choices = ChoiceModel.run(self,
                               specification,
                               coefficients,
                               agent_set,
                               agents_index=agents_index,
                               **kwargs)
     if agents_index is None:
         agents_index = arange(agent_set.size())
     movers_indices = agents_index[where(choices > 0)]
     if self.movers_ratio is not None:
         n = rint(self.movers_ratio * agents_index.size)
         if n < movers_indices.size:
             movers_indices = sample_noreplace(movers_indices, n)
     # add unplaced agents
     unplaced_agents = agents_index[agent_set.get_attribute_by_index(
         self.location_id_name, agents_index) <= 0]
     logger.log_status(
         "%s agents selected by the logit model; %s agents without %s." %
         (movers_indices.size, unplaced_agents.size, self.location_id_name))
     movers_indices = unique(concatenate((movers_indices, unplaced_agents)))
     logger.log_status("Number of movers: " + str(movers_indices.size))
     return movers_indices
    def _assign_job_to_worker(self, worker_index, job_index):
        logger.log_status("Atempt to assign %s jobs to %s workers" %
                          (worker_index.size, job_index.size))
        if worker_index.size >= job_index.size:
            #number of at home workers is greater than the available choice (home_based jobs by default)
            assigned_worker_index = sample_noreplace(worker_index,
                                                     job_index.size)
            assigned_job_index = job_index
        else:
            assigned_worker_index = worker_index
            assigned_job_index = sample_noreplace(job_index, worker_index.size)
        logger.log_status(
            "Assigned %s jobs to %s workers" %
            (assigned_worker_index.size, assigned_job_index.size))

        return (assigned_worker_index, assigned_job_index)
 def _delete(self, agents_pool, amount, 
               agent_dataset, location_dataset, 
               this_refinement,
               dataset_pool ):
     """similar to subtract action, instead of unplacing agents delete remove agents from the agent dataset,
     those agents won't be available for later action
     """
     
     fit_index = self.get_fit_agents_index(agent_dataset, 
                                           this_refinement.agent_filter, 
                                           '',
                                           dataset_pool)
     
     if amount > fit_index.size or amount < 0:
         logger.log_warning("Request to delete %i agents,  but there are %i agents in total satisfying %s;" \
                            "delete %i agents instead" % (amount, fit_index.size, 
                                                            this_refinement.agent_filter,
                                                            fit_index.size) )
         amount = fit_index.size
     
     if amount == fit_index.size:
         movers_index = fit_index
     else:
         movers_index = sample_noreplace( fit_index, amount )
         
     agents_pool = list( set(agents_pool) - set(movers_index) )
         
     agent_dataset.remove_elements( array(movers_index) )
Example #8
0
    def map_agents_to_submodels(self,
                                submodels,
                                submodel_string,
                                agent_set,
                                agents_index,
                                dataset_pool=None,
                                resources=None,
                                submodel_size_max=None):
        """ Creates a class attribute self.observations_mapping which is a dictionary
        where each entry corresponds to one submodel. It contains indices
        of agents (within agents_index) that belong to that submodel.
        Additionally, self.observations_mapping has an entry 'index' which contains agents_index, and an entry
        'mapped_index' which contains only indices of agents_index that are included in any of the submodel entries of 
        observations_mapping. Thus, missing entries of 'index' are agents that do not belong to any submodel. 
        'submodels' is a list of submodels to be considered.
        'submodel_string' specifies the name of attribute/variable that distinguishes submodels.
        'resources' are passed to the computation of variable 'submodel_string'.
        'submodel_size_max' determines the maximum size of a submodel. If the actual size exceeds this number,
        agents are randomly sampled so that the submodel size matches this number.
        """
        self.observations_mapping = {
        }  # maps to which submodel each observation belongs to
        nsubmodels = len(submodels)
        if (nsubmodels > 1) or ((nsubmodels == 1) and (submodels[0] <> -2)):
            try:
                agent_set.compute_variables(submodel_string,
                                            dataset_pool=dataset_pool,
                                            resources=resources)
            except:
                pass
            submodel_alias = None
            if submodel_string is not None:
                submodel_alias = VariableName(submodel_string).get_alias()
            if (nsubmodels == 1) and (
                (submodel_string is None) or
                (submodel_alias not in agent_set.get_known_attribute_names())):
                self.observations_mapping[submodels[0]] = arange(
                    agents_index.size)
            else:
                for submodel in submodels:  #mapping agents to submodels
                    w = where(
                        agent_set.get_attribute_by_index(
                            submodel_alias, agents_index) == submodel)[0]
                    if submodel_size_max is not None and submodel_size_max < w.size:
                        # sub-sample from submodel
                        wnew = sample_noreplace(w, submodel_size_max)
                        logger.log_status(
                            'Number of records in submodel %s reduced from %s to %s.'
                            % (submodel, w.size, wnew.size))
                        w = sort(wnew)
                    self.observations_mapping[submodel] = w
        else:  # no submodel distinction
            self.observations_mapping[-2] = arange(agents_index.size)

        mapped = zeros(agents_index.size, dtype='bool8')
        for submodel, index in self.observations_mapping.iteritems():
            mapped[index] = True
        self.observations_mapping["index"] = agents_index
        self.observations_mapping["mapped_index"] = where(mapped)[0]
    def _delete(self, agents_pool, amount, agent_dataset, location_dataset,
                this_refinement, dataset_pool):
        """similar to subtract action, instead of unplacing agents delete remove agents from the agent dataset,
        those agents won't be available for later action
        """

        fit_index = self.get_fit_agents_index(
            agent_dataset, this_refinement.agent_expression,
            this_refinement.location_expression, dataset_pool)

        if amount > fit_index.size or amount < 0:
            logger.log_warning("Refinement requests to delete %i agents,  but there are %i agents in total satisfying %s;" \
                               "delete %i agents instead" % (amount, fit_index.size,
                                                               ' and '.join( [this_refinement.agent_expression,
                                                                            this_refinement.location_expression] ).strip(' and '),
                                                               fit_index.size) )
            amount = fit_index.size

        if amount == fit_index.size:
            movers_index = fit_index
        else:
            movers_index = sample_noreplace(fit_index, amount)

        agents_pool = list(set(agents_pool) - set(movers_index))
        ## modify location capacity attribute if specified
        if this_refinement.location_capacity_attribute is not None and len(
                this_refinement.location_capacity_attribute) > 0:
            location_dataset = dataset_pool.get_dataset(
                VariableName(
                    this_refinement.location_expression).get_dataset_name())

            movers_location_id = agent_dataset.get_attribute(
                location_dataset.get_id_name()[0])[movers_index]
            movers_location_index = location_dataset.get_id_index(
                movers_location_id)
            # see previous comment about histogram function
            num_of_movers_by_location = histogram(
                movers_location_index,
                bins=arange(location_dataset.size() + 1))[0]
            num_of_agents_by_location = location_dataset.compute_variables( "number_of_agents=%s.number_of_agents(%s)" % \
                                                                            (location_dataset.dataset_name,
                                                                            agent_dataset.dataset_name),
                                                                            dataset_pool=dataset_pool)

            shrink_factor = safe_array_divide(
                (num_of_agents_by_location -
                 num_of_movers_by_location).astype('float32'),
                num_of_agents_by_location,
                return_value_if_denominator_is_zero=1.0)
            new_values = round_(shrink_factor * location_dataset.get_attribute(
                this_refinement.location_capacity_attribute))
            location_dataset.modify_attribute(
                this_refinement.location_capacity_attribute, new_values)
            self._add_refinement_info_to_dataset(location_dataset,
                                                 self.id_names,
                                                 this_refinement,
                                                 index=movers_location_index)

        agent_dataset.remove_elements(array(movers_index))
 def test_sample_noreplace(self):
     start_time = time.time()
     sample = sample_noreplace(self.all, self.size, return_index=True)
     logger.log_status("sample_noreplace %s from %s items array in " % (self.size,self.n) + str(time.time() - start_time) + " sec")
     self.assertEqual(sample.size, self.size, msg ="sample size not equal to size parameter")
     assert isinstance(sample, ndarray), "sample is not of type ndarray"
     assert 0 <= sample.min() <= self.n-1, "sampled elements not in between min and max of source array"
     assert 0 <= sample.max() <= self.n-1, "sampled elements not in between min and max of source array"
     assert not sometrue(find_duplicates(sample)), "there are duplicates in samples"
Example #11
0
    def prepare_for_estimate(self,
                             specification_dict=None,
                             specification_storage=None,
                             specification_table=None,
                             agent_set=None,
                             agents_for_estimation_storage=None,
                             agents_for_estimation_table=None,
                             join_datasets=False,
                             index_to_unplace=None,
                             portion_to_unplace=1.0,
                             agent_filter=None,
                             data_objects={}):
        from opus_core.model import get_specification_for_estimation
        specification = get_specification_for_estimation(
            specification_dict, specification_storage, specification_table)
        if (agent_set is not None) and (index_to_unplace is not None):
            if self.location_id_string is not None:
                agent_set.compute_variables(self.location_id_string,
                                            resources=Resources(data_objects))
            if portion_to_unplace < 1:
                unplace_size = int(portion_to_unplace * index_to_unplace.size)
                end_index_to_unplace = sample_noreplace(
                    index_to_unplace, unplace_size)
            else:
                end_index_to_unplace = index_to_unplace
            logger.log_status("Unplace " + str(end_index_to_unplace.size) +
                              " agents.")
            agent_set.modify_attribute(self.choice_set.get_id_name()[0],
                                       -1 * ones(end_index_to_unplace.size),
                                       end_index_to_unplace)
        # create agents for estimation
        if agents_for_estimation_storage is not None:
            estimation_set = Dataset(in_storage=agents_for_estimation_storage,
                                     in_table_name=agents_for_estimation_table,
                                     id_name=agent_set.get_id_name(),
                                     dataset_name=agent_set.get_dataset_name())
            if agent_filter is not None:
                estimation_set.compute_variables(
                    agent_filter, resources=Resources(data_objects))
                index = where(
                    estimation_set.get_attribute(agent_filter) > 0)[0]
                estimation_set.subset_by_index(
                    index, flush_attributes_if_not_loaded=False)

            if join_datasets:
                agent_set.join_by_rows(estimation_set,
                                       require_all_attributes=False,
                                       change_ids_if_not_unique=True)
                index = arange(agent_set.size() - estimation_set.size(),
                               agent_set.size())
            else:
                index = agent_set.get_id_index(
                    estimation_set.get_id_attribute())
        else:
            index = arange(agent_set.size())
        return (specification, index)
Example #12
0
    def _subtract(self, agents_pool, amount, 
                  agent_dataset, location_dataset, 
                  this_refinement,
                  dataset_pool ):
        
        fit_index = self.get_fit_agents_index(agent_dataset, 
                                              this_refinement.agent_expression, 
                                              this_refinement.location_expression,
                                              dataset_pool)
        
        if amount > fit_index.size:
            logger.log_warning("Refinement requests to subtract %i agents,  but there are %i agents in total satisfying %s;" \
                               "subtract %i agents instead" % (amount, fit_index.size, 
                                                               ' and '.join( [this_refinement.agent_expression, 
                                                                            this_refinement.location_expression] ).strip(' and '),
                                                               fit_index.size) )
            amount = fit_index.size
        
        if amount == fit_index.size:
            movers_index = fit_index
        else:
            movers_index = sample_noreplace( fit_index, amount )
            
        agents_pool += movers_index.tolist()
        ## modify location capacity attribute if specified
        if this_refinement.location_capacity_attribute is not None and len(this_refinement.location_capacity_attribute) > 0:
            location_dataset = dataset_pool.get_dataset( VariableName( this_refinement.location_expression ).get_dataset_name() )

            movers_location_id = agent_dataset.get_attribute( location_dataset.get_id_name()[0] )[movers_index]
            movers_location_index = location_dataset.get_id_index( movers_location_id )
            # backward compatability code for older versions of numpy -- no longer required since we need numpy 1.2.1 or greater
            # new=False argument to histogram tells it to use deprecated behavior for now (to be removed in numpy 1.3)
            # See numpy release notes -- search for histogram
            # if numpy.__version__ >= '1.2.0':
            #    num_of_movers_by_location = histogram( movers_location_index, bins=arange(location_dataset.size()), new=False)[0]
            # else:
            #    num_of_movers_by_location = histogram( movers_location_index, bins=arange(location_dataset.size()))[0]
            num_of_movers_by_location = histogram( movers_location_index, bins=arange(location_dataset.size() +1) )[0]
            num_of_agents_by_location = location_dataset.compute_variables( "number_of_agents=%s.number_of_agents(%s)" % \
                                                                            (location_dataset.dataset_name,
                                                                            agent_dataset.dataset_name),
                                                                            dataset_pool=dataset_pool)
            
            shrink_factor = safe_array_divide( (num_of_agents_by_location - num_of_movers_by_location ).astype('float32'),
                                                num_of_agents_by_location, return_value_if_denominator_is_zero = 1.0  )
            new_values = round_( shrink_factor * location_dataset.get_attribute(this_refinement.location_capacity_attribute) )
            location_dataset.modify_attribute( this_refinement.location_capacity_attribute, 
                                               new_values
                                               )
            self._add_refinement_info_to_dataset(location_dataset, ("refinement_id", "transaction_id"), this_refinement, index=movers_location_index)
            
        agent_dataset.modify_attribute(location_dataset.get_id_name()[0], 
                                       -1 * ones( movers_index.size, dtype='int32' ),
                                       index = movers_index
                                       )
        self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=movers_index)
Example #13
0
    def run(self, household, person, work_eligible='person.age>15', full_time_ratio=1.0, **kwargs):
        if 'employment_status' in person.get_known_attribute_names():
            employment_status = person['employment_status']
            assigned_workers = household.compute_variables('household.aggregate(person.employment_status)')
        else:
            employment_status = zeros(person.size(), dtype='i4')
            assigned_workers = zeros(household.size(), dtype='i4')

        if 'full_time' in person.get_known_attribute_names():
            full_time = person['full_time']
        else:
            full_time = zeros(person.size(), dtype='i4')

        predicted_workers = household['workers']

        diff = predicted_workers - assigned_workers
        indices = where(diff != 0)[0]
        
        eligible = person.compute_variables(work_eligible)
        logger.log_status('Updating employment_status for {} workers in {} households'.format(abs(diff).sum(), indices.size))
        for index in indices:
            in_hh = person['household_id'] == household['household_id'][index]
            if diff[index] > 0:
                sample_pool = where( (~ employment_status) & eligible & in_hh )[0]
                new_workers = sample_noreplace(sample_pool, diff[index])
                employment_status[new_workers] = 1
                chance = random(new_workers.size)
                full_time[new_workers] = ((1 - full_time_ratio) < chance).astype('i4')
            else:
                sample_pool = where( (employment_status) & in_hh )[0]
                exit_workers = sample_noreplace(sample_pool, -diff[index])
                employment_status[exit_workers] = 0
                full_time[exit_workers] = -1

        if 'employment_status' in person.get_known_attribute_names():
            person.modify_attribute('employment_status', employment_status)
        else:
            person.add_primary_attribute(employment_status, 'employment_status')

        if 'full_time' in person.get_known_attribute_names():
            person.modify_attribute('full_time', full_time)
        else:
            person.add_primary_attribute(full_time, 'full_time')
Example #14
0
 def run(self, dataset_pool):
     workers = dataset_pool['person']
     faz_ids = workers.compute_variables('faz_id = person.disaggregate(zone.faz_id, intermediates=[parcel, building, household])',
                                            dataset_pool=dataset_pool)
     is_worker = workers.compute_variables('urbansim_parcel.person.is_worker', dataset_pool=dataset_pool)
     workers_jobs = workers['job_id']
     job_ids = arange(self.job_id_range[0], self.job_id_range[1]+1)
     for area, values in self.faz_worker_mapping.iteritems():
         fazes = array(values[0])
         amount = values[1]
         indicator = logical_and(ismember(faz_ids, fazes), is_worker)
         job_idx = where(job_ids > 0)[0]
         sampled_jobs = sample_noreplace(job_idx, amount)
         workers_idx = where(indicator > 0)[0]
         sampled_workers = sample_noreplace(workers_idx, amount)
         workers_jobs[sampled_workers] = job_ids[sampled_jobs]
         job_ids[sampled_jobs] = 0
         
     workers.modify_attribute(name='job_id', data=workers_jobs)
    def run(self, location_set, deletion_event_set, current_year, dataset_pool=None):
        """ The deletion_event_set is expected to have attributes:
                grid_id, scheduled_year, number_of_jobs, number_of_households
            The method finds jobs/househods located in the given locations (grid_id),
            then samples the given amount for this year and unplaces them.
            If the value for number_of_jobs/number_of_households is -2, the model removes 
            all jobs/households from the location.
        """
        if not deletion_event_set or (deletion_event_set.size() == 0): 
            logger.log_status("No jobs/households to be deleted.")
            return

        idx_of_events_this_year = deletion_event_set.get_attribute("scheduled_year") == current_year
        if idx_of_events_this_year.sum() == 0:
            logger.log_status("No jobs/households to be deleted.")
            return
        
        self.dataset_pool = self.create_dataset_pool(dataset_pool)
        
        location_id_name = location_set.get_id_name()[0]
        location_ids_in_event_set = deletion_event_set.get_attribute_by_index(location_id_name, 
                                                                              idx_of_events_this_year)
        
        agents_dict = {"number_of_jobs": "job", "number_of_households": "household"}
        # load attributes and datasets needed
        delete_agents = {"job": False, "household": False}
        agent_sets = {}
        for attribute_name, dataset_name in agents_dict.iteritems():
            if attribute_name in deletion_event_set.get_known_attribute_names():
                values = deletion_event_set.get_attribute_by_index(attribute_name, idx_of_events_this_year)
                if values.sum() > 0:
                    agent_sets[dataset_name] = self.dataset_pool.get_dataset(dataset_name)
                    if location_id_name not in agent_sets[dataset_name].get_known_attribute_names():
                        # compute agents locations
                        agent_sets[dataset_name].compute_one_variable_with_unknown_package(location_id_name, 
                                                                                           self.dataset_pool)
                    delete_agents[dataset_name] = True
                    
        for attribute_name, dataset_name in agents_dict.iteritems():
            if not delete_agents[dataset_name]:
                continue
            # iterate over locations
            for location_id in location_ids_in_event_set:
                number_of_agents = eval("deletion_event_set.get_data_element_by_id((location_id, current_year)).%s" % attribute_name)
                agent_ids = agent_sets[dataset_name].get_attribute(location_id_name)
                agent_index = where(agent_ids == location_id)[0]
                if (number_of_agents == -2) or (agent_index.size <= number_of_agents): # unplace all agents
                    unplace_index = agent_index
                else: # sample agents
                    unplace_index = sample_noreplace(agent_index, number_of_agents)
                agent_sets[dataset_name].modify_attribute(name=location_id_name, 
                                                      data=resize(array([-1], dtype=agent_ids.dtype), unplace_index.size),
                                                      index = unplace_index)
def get_array_without_non_placed_agents(business_set, arr, max_value=None, location_id_name="grid_id"):
    if location_id_name in business_set.get_known_attribute_names():
        non_placed = where(business_set.get_attribute_by_index(location_id_name, arr) <= 0)[0]
    else:
        non_placed=array([], dtype='int32')
    size_non_placed = non_placed.size
    if size_non_placed <= 0:
        return (arr, non_placed, 0)
    if (max_value is not None) and (size_non_placed > max_value):
        non_placed = sample_noreplace(non_placed, max_value)
        size_non_placed = non_placed.size
    a = ones((arr.size,), dtype="int8")
    a[non_placed] = 0
    return (compress(a, arr), arr[non_placed], size_non_placed)
    def prepare_for_estimate(self, 
                             agent_set=None, 
                             index_to_unplace=None, 
                             portion_to_unplace=1.0,
                             compute_lambda=False, 
                             grouping_location_set=None,
                             movers_variable=None, 
                             movers_index=None,
                             location_id_variable=None,
                             data_objects={},
                             *args, **kwargs
                            ):
        """Put 'location_id_variable' always in, if the location id is to be computed on the estimation set,
        i.e. if it is not a primary attribute of the estimation set. Set 'index_to_unplace' to None, if 'compute_lambda' is True.
        In such a case, the annual supply is estimated without unplacing agents. 'grouping_location_set', 'movers_variable' and
        'movers_index' must be given, if 'compute_lambda' is True.
        """
        from urbansim.functions import compute_supply_and_add_to_location_set

        if (agent_set is not None) and (index_to_unplace is not None):
            if self.location_id_string is not None:
                agent_set.compute_variables(self.location_id_string, 
                                            resources=Resources(data_objects))
            if portion_to_unplace < 1:
                unplace_size = int(portion_to_unplace*index_to_unplace.size)
                end_index_to_unplace = sample_noreplace(index_to_unplace, unplace_size)
            else:
                end_index_to_unplace = index_to_unplace
            logger.log_status("Unplace " + str(end_index_to_unplace.size) + " agents.")
            agent_set.modify_attribute(self.choice_set.get_id_name()[0],
                                        resize(array([-1]), end_index_to_unplace.size), 
                                       end_index_to_unplace)
        if compute_lambda:
            movers = zeros(agent_set.size(), dtype="bool8")
            if movers_index is not None:
                movers[movers_index] = 1
            agent_set.add_primary_attribute(movers, "potential_movers")
            self.estimate_config["weights_for_estimation_string"] = self.estimate_config["weights_for_estimation_string"]+"_from_lambda"
            compute_supply_and_add_to_location_set(self.choice_set, grouping_location_set,
                                                   self.run_config["number_of_units_string"],
                                                   self.run_config["capacity_string"],
                                                   movers_variable,
                                                   self.estimate_config["weights_for_estimation_string"],
                                                   resources=Resources(data_objects))

        specification, index = prepare_for_estimate(agent_set=agent_set,
                                                    *args, **kwargs)

        return (specification, index)
Example #18
0
    def _delete(self, agents_pool, amount, 
                  agent_dataset, location_dataset, 
                  this_refinement,
                  dataset_pool ):
        """similar to subtract action, instead of unplacing agents delete remove agents from the agent dataset,
        those agents won't be available for later action
        """
        
        fit_index = self.get_fit_agents_index(agent_dataset, 
                                              this_refinement.agent_expression, 
                                              this_refinement.location_expression,
                                              dataset_pool)
        
        if amount > fit_index.size or amount < 0:
            logger.log_warning("Refinement requests to delete %i agents,  but there are %i agents in total satisfying %s;" \
                               "delete %i agents instead" % (amount, fit_index.size, 
                                                               ' and '.join( [this_refinement.agent_expression, 
                                                                            this_refinement.location_expression] ).strip(' and '),
                                                               fit_index.size) )
            amount = fit_index.size
        
        if amount == fit_index.size:
            movers_index = fit_index
        else:
            movers_index = sample_noreplace( fit_index, amount )
            
        agents_pool = list( set(agents_pool) - set(movers_index) )
        ## modify location capacity attribute if specified
        if this_refinement.location_capacity_attribute is not None and len(this_refinement.location_capacity_attribute) > 0:
            location_dataset = dataset_pool.get_dataset( VariableName( this_refinement.location_expression ).get_dataset_name() )

            movers_location_id = agent_dataset.get_attribute( location_dataset.get_id_name()[0] )[movers_index]
            movers_location_index = location_dataset.get_id_index( movers_location_id )
            # see previous comment about histogram function
            num_of_movers_by_location = histogram( movers_location_index, bins=arange(location_dataset.size() +1) )[0]
            num_of_agents_by_location = location_dataset.compute_variables( "number_of_agents=%s.number_of_agents(%s)" % \
                                                                            (location_dataset.dataset_name,
                                                                            agent_dataset.dataset_name),
                                                                            dataset_pool=dataset_pool)
            
            shrink_factor = safe_array_divide( (num_of_agents_by_location - num_of_movers_by_location ).astype('float32'),
                                                num_of_agents_by_location, return_value_if_denominator_is_zero = 1.0  )
            new_values = round_( shrink_factor * location_dataset.get_attribute(this_refinement.location_capacity_attribute) )
            location_dataset.modify_attribute( this_refinement.location_capacity_attribute, 
                                               new_values
                                               )
            self._add_refinement_info_to_dataset(location_dataset, self.id_names, this_refinement, index=movers_location_index)
            
        agent_dataset.remove_elements( array(movers_index) )
 def _remove(self, amount=0, attribute='', dataset=None, index=None, **kwargs):
     if index is None:
         index = arange(dataset.size())
     if index.size < amount:
         logger.log_warning("Number of observations satisfying event condition (%s) is less than the number to be removed (%s); remove %s instead" % 
                            (index.size, amount, index.size))
         amount = index.size
     
     if index.size == amount:
         to_be_removed = index
     else:
         to_be_removed = sample_noreplace(index, amount)
     
     if to_be_removed.size > 0:
         dataset.remove_elements(to_be_removed)
 def run(self, specification, coefficients, agent_set, agents_index=None, **kwargs):
     choices = ChoiceModel.run(self, specification, coefficients, agent_set, agents_index=agents_index, **kwargs)
     if agents_index is None:
         agents_index=arange(agent_set.size())
     movers_indices = agents_index[where(choices>0)]
     if self.movers_ratio is not None:
         n = rint(self.movers_ratio*agents_index.size)
         if n < movers_indices.size:
             movers_indices = sample_noreplace(movers_indices, n)
     # add unplaced agents
     unplaced_agents = agents_index[agent_set.get_attribute_by_index(self.location_id_name, agents_index) <= 0]
     logger.log_status("%s agents selected by the logit model; %s agents without %s." % 
                       (movers_indices.size, unplaced_agents.size, self.location_id_name))
     movers_indices = unique(concatenate((movers_indices, unplaced_agents)))
     logger.log_status("Number of movers: " + str(movers_indices.size))
     return movers_indices
    def run(self, jobs, in_storage, out_storage=None):
        dataset_pool = DatasetPool(storage=in_storage, package_order=['psrc_parcel', 'urbansim_parcel', 'urbansim', 'opus_core'] )
        if jobs is None:
            jobs =  dataset_pool.get_dataset('job')
        else:
            dataset_pool.replace_dataset('job', jobs)
        hhs = dataset_pool.get_dataset('household')
        buildings = dataset_pool.get_dataset('building')
        buildings.compute_variables(["psrc_parcel.building.census_block_group_id", "psrc_parcel.building.number_of_home_based_jobs",
                                     "urbansim_parcel.building.number_of_households", "urbansim_parcel.building.residential_units"
                                           ], 
                                          dataset_pool=dataset_pool)
        ubusiness, ubusiness_idx = unique(jobs['business_id']*(jobs['home_based_status']==1), return_index=True)
        jobs_ubusiness = zeros(jobs.size(), dtype='bool8')
        jobs_ubusiness[ubusiness_idx] = True
        jobs_ubusiness[jobs['home_based_status']==0] = False
        nhbbus = minimum(ndsum(jobs_ubusiness, labels=jobs['building_id'], index=buildings['building_id']), buildings["residential_units"])        
        affected_buildings_ind = logical_and((buildings["number_of_households"] - nhbbus) < 0, buildings["number_of_households"] < buildings["residential_units"])
        not_affected_buildings_ind = logical_and(logical_not(affected_buildings_ind), buildings["number_of_home_based_jobs"] == 0)
        blocks = unique(buildings["census_block_group_id"][where(affected_buildings_ind)])

        hh_building_id = hhs['building_id'].copy()
        seed(1)
        logger.log_status("%s buildings in %s census block affected for moving households to jobs." % (affected_buildings_ind.sum(), blocks.size))
        logger.start_block("Moving households to jobs")
        for block in blocks:
            bidx = where(logical_and(affected_buildings_ind, buildings["census_block_group_id"] == block))[0]
            bidx_out = where(logical_and(not_affected_buildings_ind, buildings["census_block_group_id"] == block))[0]
            if bidx_out.size == 0:
                continue
            hh_idx = where(in1d(hhs['building_id'], buildings['building_id'][bidx_out]))[0]
            if hh_idx.size == 0:
                continue
            nhh_needed = maximum(nhbbus[bidx] - buildings["number_of_households"][bidx], 0)
            if nhh_needed.sum() <= 0:
                continue
            for i in arange(bidx.size):
                if nhh_needed[i] == 0:
                    continue
                hh_idx_sampled = sample_noreplace(hh_idx, nhh_needed[i])
                hh_building_id[hh_idx_sampled] = buildings['building_id'][bidx[i]]
        logger.end_block() 
        if out_storage is not None:
            households.write_dataset(out_storage=out_storage, out_table_name="households")                  
        logger.log_status("%s households re-located." % (hh_building_id <> hhs['building_id']).sum())
def get_array_without_non_placed_agents(business_set,
                                        arr,
                                        max_value=None,
                                        location_id_name="grid_id"):
    if location_id_name in business_set.get_known_attribute_names():
        non_placed = where(
            business_set.get_attribute_by_index(location_id_name, arr) <= 0)[0]
    else:
        non_placed = array([], dtype='int32')
    size_non_placed = non_placed.size
    if size_non_placed <= 0:
        return (arr, non_placed, 0)
    if (max_value is not None) and (size_non_placed > max_value):
        non_placed = sample_noreplace(non_placed, max_value)
        size_non_placed = non_placed.size
    a = ones((arr.size, ), dtype="int8")
    a[non_placed] = 0
    return (compress(a, arr), arr[non_placed], size_non_placed)
Example #23
0
 def map_agents_to_submodels(self, submodels, submodel_string, agent_set, agents_index,
                              dataset_pool=None, resources=None, submodel_size_max=None):
     """ Creates a class attribute self.observations_mapping which is a dictionary
     where each entry corresponds to one submodel. It contains indices
     of agents (within agents_index) that belong to that submodel.
     Additionally, self.observations_mapping has an entry 'index' which contains agents_index, and an entry
     'mapped_index' which contains only indices of agents_index that are included in any of the submodel entries of 
     observations_mapping. Thus, missing entries of 'index' are agents that do not belong to any submodel. 
     'submodels' is a list of submodels to be considered.
     'submodel_string' specifies the name of attribute/variable that distinguishes submodels.
     'resources' are passed to the computation of variable 'submodel_string'.
     'submodel_size_max' determines the maximum size of a submodel. If the actual size exceeds this number,
     agents are randomly sampled so that the submodel size matches this number.
     """
     self.observations_mapping = {} # maps to which submodel each observation belongs to
     nsubmodels = len(submodels)
     if (nsubmodels > 1) or ((nsubmodels == 1) and (submodels[0] <> -2)):
         try:
             agent_set.compute_variables(submodel_string, dataset_pool=dataset_pool, resources=resources)
         except:
             pass
         submodel_alias = None
         if submodel_string is not None:
             submodel_alias = VariableName(submodel_string).get_alias()
         if (nsubmodels == 1) and ((submodel_string is None) or (submodel_alias not in agent_set.get_known_attribute_names())):
             self.observations_mapping[submodels[0]] = arange(agents_index.size)
         else:
             for submodel in submodels: #mapping agents to submodels
                 w = where(agent_set.get_attribute_by_index(submodel_alias,
                                                            agents_index) == submodel)[0]
                 if submodel_size_max is not None and submodel_size_max < w.size:
                     # sub-sample from submodel
                     wnew = sample_noreplace(w, submodel_size_max)
                     logger.log_status('Number of records in submodel %s reduced from %s to %s.' % (submodel, w.size, wnew.size))
                     w = sort(wnew)
                 self.observations_mapping[submodel] = w
     else: # no submodel distinction
         self.observations_mapping[-2] = arange(agents_index.size)
     
     mapped = zeros(agents_index.size, dtype='bool8')
     for submodel, index in self.observations_mapping.iteritems():
         mapped[index] = True
     self.observations_mapping["index"] = agents_index
     self.observations_mapping["mapped_index"] = where(mapped)[0]
def get_array_without_non_placed_agents(dataset, arr, max_value=None, location_id_name="grid_id"):
    """ 'arr' is an index within dataset. The function returns a triple
    (arr_without_nonplaced, arr_nonplaced, nonplaced_size).
    'arr_without_nonplaced' is 'arr' with those elements being removed that don't
    have any location assigned. 'arr_nonplaced' are the elements of 'arr' that were removed
    from arr_without_nonplaced'. 'nonplaced_size' is the size of the second item of the triple.
    """
    if location_id_name in dataset.get_known_attribute_names():
        non_placed = where(dataset.get_attribute_by_index(location_id_name, arr) <= 0)[0]
    else:
        non_placed=array([], dtype='int32')
    size_non_placed = non_placed.size
    if size_non_placed <= 0:
        return (arr, non_placed, 0)
    if (max_value is not None) and (size_non_placed > max_value):
        non_placed = sample_noreplace(non_placed, max_value)
        size_non_placed = non_placed.size
    a = ones((arr.size,))
    a[non_placed] = 0
    return (compress(a, arr), arr[non_placed], size_non_placed)
Example #25
0
    def prepare_for_estimate(self, specification_dict = None, specification_storage=None,
                              specification_table=None, agent_set=None,
                              agents_for_estimation_storage=None,
                              agents_for_estimation_table=None, join_datasets=False,
                              index_to_unplace=None, portion_to_unplace=1.0,
                              agent_filter=None,
                              data_objects={}):
        from opus_core.models.model import get_specification_for_estimation
        specification = get_specification_for_estimation(specification_dict,
                                                          specification_storage,
                                                          specification_table)
        if (agent_set is not None) and (index_to_unplace is not None):
            if self.location_id_string is not None:
                agent_set.compute_variables(self.location_id_string, resources=Resources(data_objects))
            if portion_to_unplace < 1:
                unplace_size = int(portion_to_unplace*index_to_unplace.size)
                end_index_to_unplace = sample_noreplace(index_to_unplace, unplace_size)
            else:
                end_index_to_unplace = index_to_unplace
            logger.log_status("Unplace " + str(end_index_to_unplace.size) + " agents.")
            agent_set.modify_attribute(self.choice_set.get_id_name()[0],
                                        -1*ones(end_index_to_unplace.size), end_index_to_unplace)
        # create agents for estimation
        if agents_for_estimation_storage is not None:
            estimation_set = Dataset(in_storage = agents_for_estimation_storage,
                                      in_table_name=agents_for_estimation_table,
                                      id_name=agent_set.get_id_name(), dataset_name=agent_set.get_dataset_name())
            if agent_filter is not None:
                estimation_set.compute_variables(agent_filter, resources=Resources(data_objects))
                index = where(estimation_set.get_attribute(agent_filter) > 0)[0]
                estimation_set.subset_by_index(index, flush_attributes_if_not_loaded=False)

            if join_datasets:
                agent_set.join_by_rows(estimation_set, require_all_attributes=False,
                                    change_ids_if_not_unique=True)
                index = arange(agent_set.size()-estimation_set.size(),agent_set.size())
            else:
                index = agent_set.get_id_index(estimation_set.get_id_attribute())
        else:
            index = arange(agent_set.size())
        return (specification, index)
Example #26
0
 def unplace_home_based_jobs(self, building_id):
     """
     If households are being unplaced, check the building_ids that those households occupied for any home_based_jobs
     that need to be unplaced as well.
     """
     # get the index of the building to check
     building_index = self.buildings_dataset.get_id_index(building_id)
     # compute some necessary variables
     number_of_home_based_jobs = self.buildings_dataset.compute_variables("urbansim_zone.building.number_of_home_based_jobs", dataset_pool=self.dataset_pool).astype('int')
     number_of_home_based_job_spaces = self.buildings_dataset.compute_variables("urbansim_zone.building.total_home_based_job_spaces", dataset_pool=self.dataset_pool).astype('int')
     number_of_home_based_jobs_in_building = number_of_home_based_jobs[building_index]
     number_of_home_based_job_spaces_in_building = number_of_home_based_job_spaces[building_index]
     number_of_home_based_jobs_to_unplace = number_of_home_based_jobs_in_building - number_of_home_based_job_spaces_in_building
     if number_of_home_based_jobs_to_unplace < 1:
         return 0
     # unplace jobs
     # get jobs to sample from
     index_of_jobs_to_sample_from = self.jobs_dataset.get_filtered_index('job.building_id==%s' % building_id)
     sample_of_jobs_to_unplace = sample_noreplace(index_of_jobs_to_sample_from, number_of_home_based_jobs_to_unplace)
     self.jobs_dataset.set_values_of_one_attribute('building_id', array([-1]), sample_of_jobs_to_unplace)
     return number_of_home_based_jobs_to_unplace
    def _remove(self,
                amount=0,
                attribute='',
                dataset=None,
                index=None,
                **kwargs):
        if index is None:
            index = arange(dataset.size())
        if index.size < amount:
            logger.log_warning(
                "Number of observations satisfying event condition (%s) is less than the number to be removed (%s); remove %s instead"
                % (index.size, amount, index.size))
            amount = index.size

        if index.size == amount:
            to_be_removed = index
        else:
            to_be_removed = sample_noreplace(index, amount)

        if to_be_removed.size > 0:
            dataset.remove_elements(to_be_removed)
 def _do_sector_for_businesses(self, sector, diff, business_set, is_in_sector):
     available_business_index = where(is_in_sector)[0]
     if diff < 0: #
         sample_array, non_placed, size_non_placed = \
             get_array_without_non_placed_agents(business_set, available_business_index, -1*diff, 
                                                  self.location_id_name)
         self.remove_businesses = concatenate((self.remove_businesses, non_placed, 
                                    sample_noreplace(sample_array, max(0,abs(diff)-size_non_placed))))
                         
     if diff > 0: #
         self.new_businesses[self.location_id_name]=concatenate((self.new_businesses[self.location_id_name],zeros((diff,))))
         self.new_businesses["sector_id"]=concatenate((self.new_businesses["sector_id"], sector*ones((diff,))))
         sampled_business = probsample_replace(available_business_index, diff, None)
         self.new_businesses["sqft"] = concatenate((self.new_businesses["sqft"],
                                              business_set.get_attribute("sqft")[sampled_business]))
         self.new_businesses["employment"] = concatenate((self.new_businesses["employment"],
                                                    business_set.get_attribute("employment")[sampled_business]))
         self.new_businesses["activity_id"] = concatenate((self.new_businesses["activity_id"],
                                                    business_set.get_attribute("activity_id")[sampled_business]))
         
         new_max_id = self.max_id+diff
         self.new_businesses[self.business_id_name]=concatenate((self.new_businesses[self.business_id_name], 
                                                                 arange(self.max_id+1, new_max_id+1)))
         self.max_id = new_max_id
def get_array_without_non_placed_agents(dataset,
                                        arr,
                                        max_value=None,
                                        location_id_name="grid_id"):
    """ 'arr' is an index within dataset. The function returns a triple
    (arr_without_nonplaced, arr_nonplaced, nonplaced_size).
    'arr_without_nonplaced' is 'arr' with those elements being removed that don't
    have any location assigned. 'arr_nonplaced' are the elements of 'arr' that were removed
    from arr_without_nonplaced'. 'nonplaced_size' is the size of the second item of the triple.
    """
    if location_id_name in dataset.get_known_attribute_names():
        non_placed = where(
            dataset.get_attribute_by_index(location_id_name, arr) <= 0)[0]
    else:
        non_placed = array([], dtype='int32')
    size_non_placed = non_placed.size
    if size_non_placed <= 0:
        return (arr, non_placed, 0)
    if (max_value is not None) and (size_non_placed > max_value):
        non_placed = sample_noreplace(non_placed, max_value)
        size_non_placed = non_placed.size
    a = ones((arr.size, ))
    a[non_placed] = 0
    return (compress(a, arr), arr[non_placed], size_non_placed)
    def run(self, in_storage, out_storage=None, business_dsname="business", zone_dsname=None):
        dataset_pool = DatasetPool(storage=in_storage, package_order=['psrc_parcel', 'urbansim_parcel', 'urbansim', 'opus_core'] )
        seed(1)
        allbusinesses = dataset_pool.get_dataset(business_dsname)
        parcels = dataset_pool.get_dataset('parcel')
        buildings = dataset_pool.get_dataset('building')
        parcels.compute_variables(["urbansim_parcel.parcel.residential_units", "number_of_buildings = parcel.number_of_agents(building)", 
                                   "non_residential_sqft = (parcel.aggregate(building.non_residential_sqft)).astype(int32)",
                                   "number_of_res_buildings = parcel.aggregate(urbansim_parcel.building.is_residential)",
                                   "number_of_nonres_buildings = parcel.aggregate(urbansim_parcel.building.is_non_residential)",
                                   "number_of_mixed_use_buildings = parcel.aggregate(urbansim_parcel.building.is_generic_building_type_6)"
                                   ], 
                                  dataset_pool=dataset_pool)
        restypes = [12, 4, 19, 11, 34, 10, 33]
        reslutypes = [13,14,15,24]
        is_valid_business = ones(allbusinesses.size(), dtype='bool8')
        parcels_not_matched = logical_and(in1d(allbusinesses["parcel_id"], parcels.get_id_attribute(), invert=True), allbusinesses["parcel_id"] > 0)
        if(parcels_not_matched.sum() > 0):
            is_valid_business[where(parcels_not_matched)] = False
            logger.log_warning(message="No parcel exists for %s businesses (%s jobs)" % (parcels_not_matched.sum(), 
                                                                                         allbusinesses[self.number_of_jobs_attr][where(parcels_not_matched)].sum()))
        zero_parcel = allbusinesses["parcel_id"]<=0
        if zero_parcel.sum() > 0:
            is_valid_business[where(zero_parcel)] = False
            logger.log_warning(message="%s businesses (%s jobs) located on zero parcel_id" % (zero_parcel.sum(), 
                                                                                         allbusinesses[self.number_of_jobs_attr][where(zero_parcel)].sum()))            
            
        zero_size = logical_and(is_valid_business, allbusinesses[self.number_of_jobs_attr].round() == 0)
        if(sum(zero_size) > 0):
            is_valid_business[where(zero_size)] = False
            logger.log_warning(message="%s businesses are of size 0." % sum(zero_size))
        
        businesses = DatasetSubset(allbusinesses, index=where(is_valid_business)[0])
        
        parcels.add_attribute(name="number_of_workplaces", data=parcels.sum_dataset_over_ids(businesses, constant=1))
        
        has_single_res_buildings = logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_res_buildings"] == 1) # 1 (1 residential)
        parcels.add_attribute(data=has_single_res_buildings.astype("int32"), name="buildings_code")
        has_mult_res_buildings = logical_and(parcels["number_of_buildings"] > 1,  parcels["number_of_nonres_buildings"] == 0) # 2 (mult residential)
        parcels.modify_attribute("buildings_code", data=2*ones(has_mult_res_buildings.sum()), index=where(has_mult_res_buildings)) 
        has_single_nonres_buildings = logical_and(logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_nonres_buildings"] == 1), parcels["number_of_mixed_use_buildings"] == 0) # 3 (1 non-res)
        parcels.modify_attribute("buildings_code", data=3*ones(has_single_nonres_buildings.sum()), index=where(has_single_nonres_buildings)) 
        has_mult_nonres_buildings = logical_and(logical_and(parcels["number_of_buildings"] > 1, parcels["number_of_res_buildings"] == 0), parcels["number_of_mixed_use_buildings"] == 0) # 4 (mult non-res)
        parcels.modify_attribute("buildings_code", data=4*ones(has_mult_nonres_buildings.sum()), index=where(has_mult_nonres_buildings))
        has_single_mixed_buildings = logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_mixed_use_buildings"] == 1) # 5 (1 mixed-use)
        parcels.modify_attribute("buildings_code", data=5*ones(has_single_mixed_buildings.sum()), index=where(has_single_mixed_buildings))
        has_mult_mixed_buildings = logical_and(parcels["number_of_buildings"] > 1, 
                                               logical_or(logical_and(parcels["number_of_res_buildings"] > 0, parcels["number_of_nonres_buildings"] > 0), 
                                                          logical_or(parcels["number_of_mixed_use_buildings"] > 1, 
                                                                     logical_and(parcels["number_of_res_buildings"] == 0, 
                                                                                 parcels["number_of_mixed_use_buildings"] > 0)))) # 6
        parcels.modify_attribute("buildings_code", data=6*ones(has_mult_mixed_buildings.sum()), index=where(has_mult_mixed_buildings))
        has_no_building_res_lutype = logical_and(parcels["number_of_buildings"] == 0, in1d(parcels["land_use_type_id"], reslutypes)) # 7 (vacant with res LU type)
        parcels.modify_attribute("buildings_code", data=7*ones(has_no_building_res_lutype.sum()), index=where(has_no_building_res_lutype)) 
        has_no_building_nonres_lutype = logical_and(parcels["number_of_buildings"] == 0, in1d(parcels["land_use_type_id"], reslutypes)==0) # 8 (vacant with non-res LU type)
        parcels.modify_attribute("buildings_code", data=8*ones(has_no_building_nonres_lutype.sum()), index=where(has_no_building_nonres_lutype))
        
        business_sizes = businesses[self.number_of_jobs_attr].round().astype("int32") 
        business_location = {}
        business_location1wrkpl = zeros(businesses.size(), dtype="int32")
        business_location1wrkplres = zeros(businesses.size(), dtype="int32")
        business_ids = businesses.get_id_attribute()
        # sample one building for cases when sampling is required.
        for ibusid in range(businesses.size()):
            idx = where(buildings['parcel_id'] == businesses['parcel_id'][ibusid])[0]
            bldgids = buildings['building_id'][idx]
            business_location[business_ids[ibusid]] = bldgids
            if bldgids.size == 1:
                business_location1wrkpl[ibusid] = bldgids[0]
            elif bldgids.size > 1:
                business_location1wrkpl[ibusid] = bldgids[sample_noreplace(arange(bldgids.size), 1)]
                if buildings['residential_units'][idx].sum() > 0:
                    # Residential buildings are sampled with probabilities proportional to residential units
                    business_location1wrkplres[ibusid] = bldgids[probsample_noreplace(arange(bldgids.size), 1, prob_array=buildings['residential_units'][idx])]
                else:
                    business_location1wrkplres[ibusid] = business_location1wrkpl[ibusid]
        
        home_based = zeros(business_sizes.sum(), dtype="bool8")
        job_building_id = zeros(business_sizes.sum(), dtype="int32")
        job_array_labels = business_ids.repeat(business_sizes)
        job_assignment_case = zeros(business_sizes.sum(), dtype="int32")
        processed_bindicator = zeros(businesses.size(), dtype="bool8")
        business_codes = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"])
        business_nworkplaces = parcels.get_attribute_by_id("number_of_workplaces", businesses["parcel_id"])
        logger.log_status("Total number of jobs: %s" % home_based.size)
        
        # 1. 1-2 worker business in 1 residential building
        idx_sngl_wrk_1bld_fit = where(logical_and(business_sizes < 3, business_codes == 1))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_1bld_fit])
        home_based[jidx] = True
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_1bld_fit].repeat(business_sizes[idx_sngl_wrk_1bld_fit])
        job_assignment_case[jidx] = 1
        processed_bindicator[idx_sngl_wrk_1bld_fit] = True
        logger.log_status("1. %s jobs (%s businesses) set as home-based due to 1-2 worker x 1 residential building fit." % (
            business_sizes[idx_sngl_wrk_1bld_fit].sum(), idx_sngl_wrk_1bld_fit.size))
        
        # 2. 1-2 worker business in multiple residential buildings
        idx_sngl_wrk_multbld_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 2))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_multbld_fit])
        home_based[jidx] = True
        job_building_id[jidx] = business_location1wrkplres[idx_sngl_wrk_multbld_fit].repeat(business_sizes[idx_sngl_wrk_multbld_fit])
        job_assignment_case[jidx] = 2
        processed_bindicator[idx_sngl_wrk_multbld_fit] = True
        logger.log_status("2. %s jobs (%s businesses) set as home-based due to 1-2 worker x multiple residential buildings fit." % (
            business_sizes[idx_sngl_wrk_multbld_fit].sum(), idx_sngl_wrk_multbld_fit.size))
               
        # 3. 1-2 worker in single non-res building (not mixed-use)
        idx_sngl_wrk_single_nonres_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 3))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_single_nonres_fit])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_single_nonres_fit].repeat(business_sizes[idx_sngl_wrk_single_nonres_fit])
        job_assignment_case[jidx] = 3
        processed_bindicator[idx_sngl_wrk_single_nonres_fit] = True
        logger.log_status("3. %s jobs (%s businesses) placed due to 1-2 worker x single non-res building fit." % (
                          business_sizes[idx_sngl_wrk_single_nonres_fit].sum(), idx_sngl_wrk_single_nonres_fit.size))     
        
        # 4. 1-2 worker in multiple non-res building (not mixed-use)
        idx_sngl_wrk_mult_nonres_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 4))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_mult_nonres_fit])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_mult_nonres_fit].repeat(business_sizes[idx_sngl_wrk_mult_nonres_fit])
        job_assignment_case[jidx] = 4
        processed_bindicator[idx_sngl_wrk_mult_nonres_fit] = True
        logger.log_status("4. %s jobs (%s businesses) placed due to 1-2 worker x multiple non-res building fit." % (
            business_sizes[idx_sngl_wrk_mult_nonres_fit].sum(), idx_sngl_wrk_mult_nonres_fit.size))      
                
        # 5. 1-2 worker in single mixed-use building
        idx_sngl_wrk_smu_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 5))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_smu_fit])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_smu_fit].repeat(business_sizes[idx_sngl_wrk_smu_fit])
        job_assignment_case[jidx] = 5
        processed_bindicator[idx_sngl_wrk_smu_fit] = True
        logger.log_status("5. %s jobs (%s businesses) in 1-2 worker x single mixed-use building." % (
            business_sizes[idx_sngl_wrk_smu_fit].sum(), idx_sngl_wrk_smu_fit.size))       
        
        # 6. 1-2 worker in multiple mixed-type buildings
        idx_sngl_wrk_mmu_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 6))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_mmu_fit])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_mmu_fit].repeat(business_sizes[idx_sngl_wrk_mmu_fit])
        bldtype = buildings.get_attribute_by_id("building_type_id", business_location1wrkpl[idx_sngl_wrk_mmu_fit])
        is_bldtype_res = in1d(bldtype, restypes)
        home_based[in1d(job_array_labels, business_ids[idx_sngl_wrk_mmu_fit][where(is_bldtype_res)])] = True
        job_assignment_case[jidx] = 6
        processed_bindicator[idx_sngl_wrk_mmu_fit] = True
        logger.log_status("6. %s jobs (%s businesses) in 1-2 worker x multiple mixed-type buildings. %s jobs classified as home-based." % (
            business_sizes[idx_sngl_wrk_mmu_fit].sum(), idx_sngl_wrk_mmu_fit.size, business_sizes[idx_sngl_wrk_mmu_fit][where(is_bldtype_res)].sum()))            

        # 7. 1-2 worker business in residential parcel with no building
        idx_sngl_wrk_vacant_res = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 7))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_vacant_res])
        job_assignment_case[jidx] = 7
        home_based[jidx] = True
        processed_bindicator[idx_sngl_wrk_vacant_res] = True
        logger.log_status("7. %s jobs (%s businesses of size 1-2) could not be placed due to non-existing buildings in parcels with residential LU type." % (
            business_sizes[idx_sngl_wrk_vacant_res].sum(), idx_sngl_wrk_vacant_res.size))        

        # 8. 3+ workers of governmental workplaces in 1+ residential building
        ind_bussiness_case8 = logical_and(logical_and(processed_bindicator==0, logical_and(business_sizes > 2, in1d(businesses['sector_id'], [18,19]))), in1d(business_codes, [1,2]))
        idx_wrk_fit = where(ind_bussiness_case8)[0]
        jidx = in1d(job_array_labels, business_ids[idx_wrk_fit])
        job_assignment_case[jidx] = 8
        processed_bindicator[idx_wrk_fit] = True
        logger.log_status("8. %s governmental jobs (%s businesses of size 3+) could not be placed due to residing in residential buildings only." % (
                    business_sizes[idx_wrk_fit].sum(), idx_wrk_fit.size))
        
        # 9. 3-30 workers in single residential building. Make two of them home based.
        idx_sngl_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, logical_and(business_sizes > 2, business_sizes <= 30)), business_codes == 1))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_fit])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_fit].repeat(business_sizes[idx_sngl_wrk_fit])
        bsizeminus2 = vstack((2*ones(idx_sngl_wrk_fit.size), business_sizes[idx_sngl_wrk_fit]-2)).ravel("F").astype("int32") # interweaving 2 and remaining business size
        hbidx = tile(array([True, False]), bsizeminus2.size/2).repeat(bsizeminus2) # set the first two jobs of every business to True, others to False
        home_based[(where(jidx)[0])[hbidx]] = True
        job_assignment_case[jidx] = 9
        processed_bindicator[idx_sngl_wrk_fit] = True        
        logger.log_status("9. %s jobs (%s businesses) in 3-30 worker x single residential building. %s jobs assigned as home-based." % (
            business_sizes[idx_sngl_wrk_fit].sum(), idx_sngl_wrk_fit.size, hbidx.sum()))      
        
        # 10. 3-30 workers in multiple residential buildings. Make two of them home based.
        idx_sngl_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, logical_and(business_sizes > 2, business_sizes <= 30)), business_codes == 2))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_fit])
        job_assignment_case[jidx] = 10
        processed_bindicator[idx_sngl_wrk_fit] = True
        # sample buildings to businesses by parcels 
        bpcls = unique(businesses["parcel_id"][idx_sngl_wrk_fit])
        for ipcl in range(bpcls.size):
            bidx = where(buildings['parcel_id'] == bpcls[ipcl])[0]
            bldgids = buildings['building_id'][bidx]
            bussids = intersect1d(business_ids[businesses["parcel_id"] == bpcls[ipcl]], business_ids[idx_sngl_wrk_fit])
            # multiply by units for sampling prop. to units rather than buildings
            bldgids = bldgids.repeat(maximum(1, buildings['residential_units'][bidx].astype('int32'))) 
            if bldgids.size < bussids.size:
                bldarray = bldgids.repeat(1+ceil((bussids.size - bldgids.size)/float(bldgids.size)) )
            else:
                bldarray = bldgids
            shuffle(bldarray) # randomly reorder in-place
            for ib in range(bussids.size):
                jidx = where(job_array_labels == bussids[ib])[0]
                job_building_id[jidx] = bldarray[ib]
                home_based[jidx[0:2]] = True
        logger.log_status("10. %s jobs (%s businesses) in 3-30 worker x multiple residential building. %s jobs assigned as home-based." % (
            business_sizes[idx_sngl_wrk_fit].sum(), idx_sngl_wrk_fit.size, idx_sngl_wrk_fit.size*2))        


        # 11. single workplace, 3+ workers in single non-res or mixed-use building (11.)
        idx_sngl_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2), 
                                                            logical_or(business_codes==3, business_codes==5)),
                                                business_nworkplaces==1))[0]
        which_labels = where(in1d(job_array_labels, business_ids[idx_sngl_wrkplace_2plus_workers]))[0]
        job_building_id[which_labels] = business_location1wrkpl[idx_sngl_wrkplace_2plus_workers].repeat(business_sizes[idx_sngl_wrkplace_2plus_workers])   
        job_assignment_case[which_labels] = 11
        processed_bindicator[idx_sngl_wrkplace_2plus_workers] = True
        logger.log_status("11. %s jobs (%s businesses) could be placed due to single workplace x 3+ workers x single non-res/mixed-use building fit." % (
            business_sizes[idx_sngl_wrkplace_2plus_workers].sum(), idx_sngl_wrkplace_2plus_workers.size))
        
        # 12. single workplace, 3+ workers in multiple mixed-type building
        idx_sngl_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2),
                                                                        logical_or(business_codes==4, business_codes==6)),
                                                            business_nworkplaces==1))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrkplace_2plus_workers])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrkplace_2plus_workers].repeat(business_sizes[idx_sngl_wrkplace_2plus_workers])    
        job_assignment_case[jidx] = 12
        processed_bindicator[idx_sngl_wrkplace_2plus_workers] = True
        logger.log_status("12. %s jobs (%s businesses) could be placed due to single workplace x 3+ workers x multiple non-res/mixed building fit." % (
            business_sizes[idx_sngl_wrkplace_2plus_workers].sum(), idx_sngl_wrkplace_2plus_workers.size))

        # 13. multiple workplaces, 3+ workers in single non-res or mixed building
        idx_mult_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2),
                                                                        logical_or(business_codes==3, business_codes==5)),
                                                            business_nworkplaces > 1))[0]
        jidx = in1d(job_array_labels, business_ids[idx_mult_wrkplace_2plus_workers])
        job_building_id[jidx] = business_location1wrkpl[idx_mult_wrkplace_2plus_workers].repeat(business_sizes[idx_mult_wrkplace_2plus_workers])
        job_assignment_case[jidx] = 13
        processed_bindicator[idx_mult_wrkplace_2plus_workers] = True
        logger.log_status("13. %s jobs (%s businesses) could be placed due to multiple workplaces x 3+ workers x single non-res/mixed building fit." % (
            business_sizes[idx_mult_wrkplace_2plus_workers].sum(), idx_mult_wrkplace_2plus_workers.size))
        
        # 14. multiple workplaces, 3+ workers in multiple non-res or mixed building
        idx_mult_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2),
                                                                        logical_or(business_codes==4, business_codes==6)),
                                                            business_nworkplaces > 1))[0]
        processed_bindicator[idx_mult_wrkplace_2plus_workers] = True
        # sample buildings to businesses by parcels 
        bpcls = unique(businesses["parcel_id"][idx_mult_wrkplace_2plus_workers])
        #hbasedsum = home_based.sum()
        for ipcl in range(bpcls.size):
            bldgids = buildings['building_id'][buildings['parcel_id'] == bpcls[ipcl]]
            bussids = intersect1d(business_ids[businesses["parcel_id"] == bpcls[ipcl]], business_ids[idx_mult_wrkplace_2plus_workers])
            if bldgids.size < bussids.size:
                bldarray = bldgids.repeat(1+ceil((bussids.size - bldgids.size)/float(bldgids.size)))
            else:
                bldarray = bldgids
            shuffle(bldarray) # randomly reorder in-place
            is_res = in1d(bldarray, restypes)
            for ib in range(bussids.size):
                jidx = where(job_array_labels == bussids[ib])
                job_building_id[jidx] = bldarray[ib]
                #home_based[jidx] = is_res
                job_assignment_case[jidx] = 14
        logger.log_status("14. %s jobs (%s businesses) could be placed due to multiple workplaces x 3+ workers x multiple non-res/mixed building fit." % (
            business_sizes[idx_mult_wrkplace_2plus_workers].sum(), idx_mult_wrkplace_2plus_workers.size))
        
        
        # 15. 3+ workers in residential parcel with no building
        idx_wrk_vacant_res = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 2), business_codes == 7))[0]
        jidx = in1d(job_array_labels, business_ids[idx_wrk_vacant_res])
        job_assignment_case[jidx] = 15
        processed_bindicator[idx_wrk_vacant_res] = True
        logger.log_status("15. %s jobs (%s businesses of 3+ workers) could not be placed due to non-existing buildings in parcels with residential LU type." % (
            business_sizes[idx_wrk_vacant_res].sum(), idx_wrk_vacant_res.size))
        
        # 16. nonresidential parcel with no building
        idx_wrk_vacant_nonres = where(logical_and(processed_bindicator==0, business_codes == 8))[0]
        jidx = in1d(job_array_labels, business_ids[idx_wrk_vacant_nonres])
        job_assignment_case[jidx] = 16
        processed_bindicator[idx_wrk_vacant_nonres] = True
        logger.log_status("16. %s jobs (%s businesses) could not be placed due to non-existing buildings in parcels with non-esidential LU type." % (
            business_sizes[idx_wrk_vacant_nonres].sum(), idx_wrk_vacant_nonres.size))        
        
        # 17. 31+ workers in single residential building. Do not place - will go into ELCM.
        idx_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 30), business_codes == 1))[0]
        jidx = in1d(job_array_labels, business_ids[idx_wrk_fit])
        job_assignment_case[jidx] = 17
        processed_bindicator[idx_wrk_fit] = True        
        logger.log_status("17. %s jobs (%s businesses) in 31+ workers x single residential building." % (
            business_sizes[idx_wrk_fit].sum(), idx_wrk_fit.size))         
    
        # 18. 31+ workers in multiple residential buildings.
        idx_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 30), business_codes == 2))[0]
        jidx = in1d(job_array_labels, business_ids[idx_wrk_fit])
        job_assignment_case[jidx] = 18
        processed_bindicator[idx_wrk_fit] = True
        logger.log_status("18. %s jobs (%s businesses) in 31+ workers x multiple residential building." % (
            business_sizes[idx_wrk_fit].sum(), idx_wrk_fit.size))                

        # jobs in messy buildings
        idx_messy_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 0), business_codes == 0))[0]
        processed_bindicator[idx_messy_fit] = True
        logger.log_status("%s jobs (%s businesses) could not be placed due to messy buildings." % (
            business_sizes[idx_messy_fit].sum(), idx_messy_fit.size))         
         
        # build new buildings for jobs in cases 7, 8, 15 and 16
        jidx_no_bld = where(in1d(job_assignment_case, [7,8,15,16]))[0]
        bus = unique(job_array_labels[jidx_no_bld])
        bsidx = businesses.get_id_index(bus)
        # first create buildings for single workplaces per parcel
        single_workplace_idx = where(business_nworkplaces[bsidx] == 1)[0]
        newbld_parcel_id = businesses['parcel_id'][bsidx][single_workplace_idx]
        newbld_bt = sector2building_type(businesses['sector_id'][bsidx][single_workplace_idx])
        newbids = arange(buildings.get_id_attribute().max()+1, buildings.get_id_attribute().max()+single_workplace_idx.size+1)
        bbldid = zeros(bsidx.size, dtype='int32')
        bbldid[single_workplace_idx] = newbids
        # for parcels with multiple workplaces select the largest business to determine its building type
        mult_bsidx = bsidx[where(business_nworkplaces[bsidx] > 1)[0]]
        empty_parcels = businesses['parcel_id'][mult_bsidx]
        uempty_parcels = unique(empty_parcels)
        bsize_on_empty_pcl = ndmax(business_sizes[mult_bsidx], labels=empty_parcels, index=uempty_parcels)
        newbld2_sec = zeros(uempty_parcels.size, dtype='int32')
        newbids2 = arange(newbids.max()+1, newbids.max()+uempty_parcels.size+1)
        for ipcl in range(uempty_parcels.size):
            newbld2_sec[ipcl] = businesses['sector_id'][mult_bsidx][logical_and(businesses['parcel_id'][mult_bsidx] == uempty_parcels[ipcl], 
                                                                                business_sizes[mult_bsidx]==bsize_on_empty_pcl[ipcl])][0]
            this_bidx = where(businesses['parcel_id'][bsidx] == uempty_parcels[ipcl])
            bbldid[this_bidx] = newbids2[ipcl]
            
        newbld_parcel_id = concatenate((newbld_parcel_id, uempty_parcels))
        newbld_bt = concatenate((newbld_bt, sector2building_type(newbld2_sec)))    
        
        newbldgs = {'building_id': concatenate((newbids, newbids2)),
                    'parcel_id': newbld_parcel_id,
                    'building_type_id': newbld_bt,
                    }
        buildings.add_elements(newbldgs, require_all_attributes=False)
        jidx = where(in1d(job_array_labels, business_ids[bsidx]))[0]
        job_building_id[jidx] = bbldid.repeat(business_sizes[bsidx])
        logger.log_status("Build %s new buildings to accommodate %s jobs (out of which %s are governmental) from cases 7, 15, 16." % (
            newbld_parcel_id.size, jidx.size, business_sizes[bsidx][where(in1d(businesses['sector_id'][bsidx], [18,19]))].sum()))
        
        
        logger.log_status("Assigned %s (%s percent) home-based jobs." % (home_based.sum(), round(home_based.sum()/(home_based.size/100.),2)))
        logger.log_status("Finished %s percent (%s) jobs (%s businesses) processed. %s jobs (%s businesses) remain to be processed." % \
                          (round(business_sizes[processed_bindicator].sum()/(home_based.size/100.),2),
                           business_sizes[processed_bindicator].sum(), processed_bindicator.sum(),
                          business_sizes[logical_not(processed_bindicator)].sum(), business_sizes[logical_not(processed_bindicator)].size))
        
        logger.start_block("Storing jobs data.")
        # create job dataset
        job_data = {"job_id": (arange(job_building_id.size)+1).astype("int32"),
                    "home_based_status" : home_based,
                    "building_id": job_building_id,
                    "business_id": job_array_labels.astype("int32"),
                    "sector_id": businesses['sector_id'].repeat(business_sizes).astype("int32"), 
                    "parcel_id": businesses['parcel_id'].repeat(business_sizes).astype("int32"), 
                    "assignment_case": job_assignment_case}

        # join with zones
        if zone_dsname is not None:
            zones = dataset_pool.get_dataset(zone_dsname)
            idname = zones.get_id_name()[0]
            #jpcls = buildings.get_attribute_by_id('parcel_id', job_building_id)
            job_data[idname] = parcels.get_attribute_by_id(idname, job_data["parcel_id"])
            
            
        dictstorage = StorageFactory().get_storage('dict_storage')
        dictstorage.write_table(table_name="jobs", table_data=job_data)
        jobs = Dataset(in_storage=dictstorage, in_table_name="jobs", dataset_name="job", id_name="job_id")
        if out_storage is not None:
            jobs.write_dataset(out_storage=out_storage, out_table_name="jobs")
            buildings.write_dataset(out_storage=out_storage, attributes=AttributeType.PRIMARY)
        logger.end_block()        
        return jobs
Example #31
0
    def select_proposals_within_parcels(self, nmax=2, weight_string=None, compete_among_types=False, filter_threshold=75, 
                                        MU_same_weight=False, transpose_interpcl_weight=True):
        # Allow only nmax proposals per parcel in order to not disadvantage parcels with small amount of proposals.
        # It takes proposals with the highest weights.
        #parcels_with_proposals = unique(self.proposal_set['parcel_id'])
        #parcel_set = self.dataset_pool.get_dataset('parcel')
        if weight_string is not None:
            within_parcel_weights = self.proposal_set.compute_variables([weight_string], dataset_pool=self.dataset_pool)
        else:
            within_parcel_weights = self.weight
        
        egligible = logical_and(self.weight > 0, 
                                self.proposal_set['status_id'] == self.proposal_set.id_tentative)
        wegligible = where(egligible)[0]
        if wegligible.size <=0:
            return
        #parcels_with_proposals = unique(self.proposal_set['parcel_id'][wegligible])
        #min_type = {}
        #egligible_proposals = {}
        tobechosen_ind = ones(wegligible.size).astype('bool8')
        if not compete_among_types:
            for key in self.column_names:
                utypes_all = unique(self.proposal_component_set[key])
                categories = zeros(self.proposal_set.size(), dtype='int32')
                for btype in utypes_all:
                    w = where(ndimage.sum(self.proposal_component_set[key] == btype,
                                          labels=self.proposal_component_set['proposal_id'], 
                                          index=self.proposal_set.get_id_attribute()
                                          ) == self.proposal_set["number_of_components"])[0]
                    categories[w] = btype
                # categories equal zero means mix-used type with components of different type

                utypes = unique(categories[wegligible])           
                for value in utypes:
                    type_is_value_ind = categories[wegligible]==value
                    for i in range(nmax):
                        parcels_with_proposals = (unique(self.proposal_set['parcel_id'][wegligible][where(type_is_value_ind)])).astype(int32)
                        if parcels_with_proposals.size <= 0:
                            continue
                        labels = (self.proposal_set['parcel_id'][wegligible])*type_is_value_ind               
                        chosen_prop = array(maximum_position(within_parcel_weights[wegligible], 
                                            labels=labels, 
                                            index=parcels_with_proposals)).flatten().astype(int32)               
                        egligible[wegligible[chosen_prop]] = False
                        type_is_value_ind[chosen_prop] = False
        else:
            parcels_with_proposals = unique(self.proposal_set['parcel_id'][wegligible]).astype(int32)
            max_prop = array(maximum_position(within_parcel_weights[wegligible], 
                                            labels=self.proposal_set['parcel_id'][wegligible], 
                                            index=parcels_with_proposals)).flatten().astype(int32)                                            
            max_value_by_parcel = within_parcel_weights[wegligible][max_prop]
            incompetition = ones(wegligible.size, dtype='bool8')
            incompetition[max_prop] = False
            egligible[wegligible[max_prop]] = False            
            for i in range(nmax-1):
                labels = (self.proposal_set['parcel_id'][wegligible])*incompetition 
                valid_parcels = where(in1d(parcels_with_proposals, self.proposal_set['parcel_id'][wegligible][where(incompetition)]))[0]
                if valid_parcels.size <= 0:
                    break
                chosen_prop = array(maximum_position(within_parcel_weights[wegligible], 
                                            labels=labels, 
                                            index=parcels_with_proposals[valid_parcels])).flatten().astype(int32)
                percent = within_parcel_weights[wegligible][chosen_prop]/(max_value_by_parcel[valid_parcels]/100.0)
                where_lower = where(in1d(self.proposal_set['parcel_id'][wegligible], parcels_with_proposals[valid_parcels][percent <= filter_threshold]))[0]
                egligible[wegligible[setdiff1d(chosen_prop, where_lower)]] = False   # proposals with egligible=True get eliminated, so we dont want to set it to False for the where_lower ones
                incompetition[union1d(chosen_prop, where_lower)] = False
                if incompetition.sum() <= 0:
                    break
             
            self.proposal_set['status_id'][where(egligible)] = self.proposal_set.id_eliminated_in_within_parcel_selection
            if MU_same_weight:
                # Set weights of mix-use proposals within the same parcel to the same value
                parcels = self.dataset_pool.get_dataset('parcel')
#                parcels.compute_variables(['mu_ind = parcel.aggregate(numpy.logical_or(development_project_proposal_component.building_type_id==4, development_project_proposal_component.building_type_id==12) + numpy.logical_or(development_project_proposal_component.building_type_id==3, development_project_proposal_component.building_type_id==13), intermediates=[development_project_proposal])'], 
#                                                    dataset_pool=self.dataset_pool)
#                pcl_ids = parcels.get_id_attribute()[parcels['mu_ind'] > 1]
#                is_mu = logical_and(logical_and(self.weight > 0, 
#                                self.proposal_set['status_id'] == self.proposal_set.id_tentative),
#                                       in1d(self.proposal_set['parcel_id'], pcl_ids))
#                where_mu = where(is_mu)[0]
#                if where_mu.size <= 0:
#                    return
#                trans_weights = self.weight[where_mu]
#                if transpose_interpcl_weight:
#                    trans_weights = log(trans_weights)
#                pcl_idx = parcels.get_id_index(self.proposal_set['parcel_id'][where_mu])
#                upcl_idx = unique(pcl_idx)
#                weight_mean = array(ndimage_mean(trans_weights, labels=pcl_idx,  index=upcl_idx))
#                if transpose_interpcl_weight:
#                    weight_mean = exp(weight_mean)
#                weight_mean_tmp = zeros(upcl_idx.max()+1).astype(weight_mean.dtype)
#                weight_mean_tmp[upcl_idx]=weight_mean
#                self.weight[where_mu]=weight_mean_tmp[pcl_idx]
                self.proposal_set.compute_variables(['is_mfres = development_project_proposal.aggregate(numpy.logical_or(development_project_proposal_component.building_type_id==4, development_project_proposal_component.building_type_id==12))'],
                                                    dataset_pool=self.dataset_pool)
                parcels.compute_variables(['mu_ind = (parcel.aggregate(development_project_proposal.is_mfres)>0) * (parcel.mix_split_id > 0)'], 
                                                    dataset_pool=self.dataset_pool)
                pcl_ids = parcels.get_id_attribute()[parcels['mu_ind'] > 0]
                egligible_props = logical_and(self.weight > 0, logical_and(
                                self.proposal_set['status_id'] == self.proposal_set.id_tentative,
                                self.proposal_set['is_mfres']>0))
                where_prop_to_modify = where(logical_and(egligible_props,
                                       in1d(self.proposal_set['parcel_id'], pcl_ids)))[0]
                if where_prop_to_modify.size <= 0:
                    return
                upcl = unique(self.proposal_set['parcel_id'][where_prop_to_modify])               
                npcl_to_modify = int(upcl.size/10.0)
                if npcl_to_modify == 0:
                    return
                pcls_to_modify = sample_noreplace(upcl, npcl_to_modify)
                where_prop_to_modify_final = where(logical_and(egligible_props,
                                       in1d(self.proposal_set['parcel_id'], pcls_to_modify)))[0]
                trans_weights = self.weight[where_prop_to_modify_final]
                if transpose_interpcl_weight:
                    trans_weights = log(trans_weights)
                #trans_weights = 1.2*trans_weights
                if transpose_interpcl_weight:
                    trans_weights = exp(trans_weights)
                self.weight[where_prop_to_modify_final] = trans_weights
            return
            
    def run(self,
            year,
            business_set,
            control_totals,
            data_objects=None,
            resources=None):
        business_id_name = business_set.get_id_name()[0]
        control_totals.get_attribute("total_number_of_businesses")
        idx = where(control_totals.get_attribute("year") == year)
        sectors = unique(
            control_totals.get_attribute_by_index("building_use_id", idx))
        max_id = business_set.get_id_attribute().max()
        business_size = business_set.size()
        new_businesses = {
            self.location_id_name: array([], dtype='int32'),
            "building_use_id": array([], dtype='int32'),
            business_id_name: array([], dtype='int32'),
            "sqft": array([], dtype=int32),
            "employees": array([], dtype=int32),
        }
        compute_resources = Resources(data_objects)
        #        compute_resources.merge({job_building_types.get_dataset_name():job_building_types, "debug":self.debug})
        business_set.compute_variables(map(
            lambda x: "%s.%s.is_sector_%s" %
            (self.variable_package, business_set.get_dataset_name(), x),
            sectors),
                                       resources=compute_resources)
        remove_businesses = array([], dtype='int32')

        for sector in sectors:
            total_businesses = control_totals.get_data_element_by_id(
                (year, sector)).total_number_of_businesses
            is_in_sector = business_set.get_attribute("is_sector_%s" % sector)
            diff = int(total_businesses - is_in_sector.astype(int8).sum())

            if diff < 0:  #
                w = where(is_in_sector == 1)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(business_set, w, -1*diff,
                                                         self.location_id_name)
                remove_businesses = concatenate(
                    (remove_businesses, non_placed,
                     sample_noreplace(sample_array,
                                      max(0,
                                          abs(diff) - size_non_placed))))

            if diff > 0:  #
                new_businesses[self.location_id_name] = concatenate(
                    (new_businesses[self.location_id_name],
                     zeros((diff, ), dtype="int32")))
                new_businesses["building_use_id"] = concatenate(
                    (new_businesses["building_use_id"], sector * ones(
                        (diff, ), dtype="int32")))

                available_business_index = where(is_in_sector)[0]
                sampled_business = probsample_replace(available_business_index,
                                                      diff, None)

                new_businesses["sqft"] = concatenate(
                    (new_businesses["sqft"],
                     business_set.get_attribute("sqft")[sampled_business]))
                new_businesses["employees"] = concatenate((
                    new_businesses["employees"],
                    business_set.get_attribute("employees")[sampled_business]))

                new_max_id = max_id + diff
                new_businesses[business_id_name] = concatenate(
                    (new_businesses[business_id_name],
                     arange(max_id + 1, new_max_id + 1)))
                max_id = new_max_id

        business_set.remove_elements(remove_businesses)
        business_set.add_elements(new_businesses, require_all_attributes=False)
        difference = business_set.size() - business_size
        self.debug.print_debug(
            "Difference in number of businesses: %s (original %s,"
            " new %s, created %s, deleted %s)" %
            (difference, business_size, business_set.size(),
             new_businesses[business_id_name].size, remove_businesses.size), 3)
        self.debug.print_debug(
            "Number of unplaced businesses: %s" %
            where(business_set.get_attribute(self.location_id_name) <= 0)
            [0].size, 3)
        return difference
 def run(self, in_storage, out_storage, business_table="business", jobs_table="jobs", control_totals_table=None):
     logger.log_status("Unrolling %s table." % business_table)
     # get attributes from the establisments table
     business_dataset = BusinessDataset(in_storage=in_storage, in_table_name=business_table)
     business_sizes = business_dataset.get_attribute(self.number_of_jobs_attr).astype("int32")
     sectors = business_dataset.get_attribute("sector_id")
     tazes = business_dataset.get_attribute(self.geography_id_attr).astype("int32")
     building_ids = array([], dtype='int32')
     if "building_id" in business_dataset.get_primary_attribute_names():
         building_ids = business_dataset.get_attribute("building_id")
     parcel_ids = array([], dtype='int32')
     if "parcel_id" in business_dataset.get_primary_attribute_names():
         parcel_ids = business_dataset.get_attribute("parcel_id")
     home_based = array([], dtype='int16')
     if "home_based" in business_dataset.get_primary_attribute_names():
         home_based = business_dataset.get_attribute("home_based")
     building_sqft = business_dataset.get_attribute(self.sqft_attr)
     building_sqft[building_sqft <= 0] = 0
     join_flags = None
     if "join_flag" in business_dataset.get_primary_attribute_names():
         join_flags = business_dataset.get_attribute("join_flag")
     impute_sqft_flag = None
     if "impute_building_sqft_flag" in business_dataset.get_primary_attribute_names():
         impute_sqft_flag = business_dataset.get_attribute("impute_building_sqft_flag")
     
     # inititalize jobs attributes
     total_size = business_sizes.sum()
     jobs_data = {}
     jobs_data["sector_id"] = resize(array([-1], dtype=sectors.dtype), total_size)
     jobs_data["building_id"] = resize(array([-1], dtype=building_ids.dtype), total_size)
     jobs_data["parcel_id"] = resize(array([-1], dtype=parcel_ids.dtype), total_size)
     jobs_data[self.geography_id_attr] = resize(array([-1], dtype=tazes.dtype), total_size)
     jobs_data["building_type"] = resize(array([-1], dtype=home_based.dtype), total_size)
     jobs_data["sqft"] = resize(array([], dtype=building_sqft.dtype), total_size)
     if join_flags is not None:
         jobs_data["join_flag"] = resize(array([], dtype=join_flags.dtype), total_size)
     if impute_sqft_flag is not None:
         jobs_data["impute_building_sqft_flag"] = resize(array([], dtype=impute_sqft_flag.dtype), total_size)
     
     indices = cumsum(business_sizes)
     # iterate over establishments. For each business create the corresponding number of jobs by filling the corresponding part 
     # of the arrays
     start_index=0
     for i in range(business_dataset.size()):
         end_index = indices[i]
         jobs_data["sector_id"][start_index:end_index] = sectors[i]
         if building_ids.size > 0:
             jobs_data["building_id"][start_index:end_index] = building_ids[i]
         if parcel_ids.size > 0:
             jobs_data["parcel_id"][start_index:end_index] = parcel_ids[i]
         jobs_data[self.geography_id_attr][start_index:end_index] = tazes[i]
         if home_based.size > 0:
             jobs_data["building_type"][start_index:end_index] = home_based[i]
         if self.compute_sqft_per_job:
             jobs_data["sqft"][start_index:end_index] = round((building_sqft[i]-building_sqft[i]/10.0)/float(business_sizes[i])) # sqft per employee
         else:
             jobs_data["sqft"][start_index:end_index] = building_sqft[i]
         if join_flags is not None:
             jobs_data["join_flag"][start_index:end_index] = join_flags[i]
         if impute_sqft_flag is not None:
             jobs_data["impute_building_sqft_flag"][start_index:end_index]  = impute_sqft_flag[i]
         start_index = end_index
         
     jobs_data["job_id"] = arange(total_size)+1
     if self.compute_sqft_per_job:
         jobs_data["sqft"] = clip(jobs_data["sqft"], 0, self.maximum_sqft)
         jobs_data["sqft"][logical_and(jobs_data["sqft"]>0, jobs_data["sqft"]<self.minimum_sqft)] = self.minimum_sqft
     
     # correct missing job_building_types
     wmissing_bt = where(jobs_data["building_type"]<=0)[0]
     if wmissing_bt.size > 0:
         jobs_data["building_type"][wmissing_bt] = 2 # assign non-homebased type for now. It can be re-classified in the assign_bldgs_to_jobs... script
     
     # create jobs table and write it out
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(
             table_name="jobs",
             table_data=jobs_data
             )
     job_dataset = JobDataset(in_storage=storage)
     if self.unplace_jobs_with_non_existing_buildings:
         self.do_unplace_jobs_with_non_existing_buildings(job_dataset, out_storage)
     
     # Match to control totals (only eliminate jobs if control totals are smaller than the actual number of jobs). 
     if control_totals_table is not None:
         logger.log_status("Matching to control totals.")
         control_totals = ControlTotalDataset(what='employment', id_name=['zone_id', 'sector_id'], 
                                              in_table_name=control_totals_table, in_storage=in_storage)
         control_totals.load_dataset(attributes=['zone_id', 'sector_id', 'jobs'])
         zones_sectors = control_totals.get_id_attribute()
         njobs = control_totals.get_attribute('jobs')
         remove = array([], dtype='int32')
         for i in range(zones_sectors.shape[0]):
             zone, sector = zones_sectors[i,:]
             in_sector = job_dataset.get_attribute("sector_id") == sector
             in_zone_in_sector = logical_and(in_sector, job_dataset.get_attribute("zone_id") == zone)
             if in_zone_in_sector.sum() <= njobs[i]:
                 continue
             to_be_removed = in_zone_in_sector.sum() - njobs[i]
             this_removal = 0
             not_considered = ones(job_dataset.size(), dtype='bool8')
             for unit in ['parcel_id', 'building_id', None]: # first consider jobs without parcel id, then without building_id, then all
                 if unit is not None:
                     wnunit = job_dataset.get_attribute(unit) <= 0
                     eligible = logical_and(not_considered, logical_and(in_zone_in_sector, wnunit))
                     not_considered[where(wnunit)] = False
                 else:
                     eligible = logical_and(not_considered, in_zone_in_sector)
                 eligible_sum = eligible.sum()
                 if eligible_sum > 0:
                     where_eligible = where(eligible)[0]
                     if eligible_sum <= to_be_removed-this_removal:
                         draw = arange(eligible_sum)
                     else:
                         draw = sample_noreplace(where_eligible, to_be_removed-this_removal, eligible_sum)
                     remove = concatenate((remove, where_eligible[draw]))
                     this_removal += draw.size
                     if this_removal >= to_be_removed:
                         break
             
         job_dataset.remove_elements(remove)
         logger.log_status("%s jobs removed." % remove.size)
         
     
     logger.log_status("Write jobs table.")
     job_dataset.write_dataset(out_table_name=jobs_table, out_storage=out_storage)
     logger.log_status("Created %s jobs." % job_dataset.size())
    def run(self, job_dataset, dataset_pool, out_storage=None, jobs_table="jobs"):
        """
        Algorithm:
            1. For all non_home_based jobs that have parcel_id assigned but no building_id, try
                to choose a building from all buildings in that parcel. Draw the building with probabilities
                given by the sector-building_type distribution. The job sizes are
                fitted into the available space (the attribute job.sqft is updated).
            2. For all non_home_based jobs for which no building was found in step 1, check
                if the parcel has residential buildings. In such a case, re-assign the jobs to be
                home-based.
                Otherwise, if sum of non_residential_sqft over the involved buildings is 0,
                for all jobs that have impute_building_sqft_flag=True draw a building using
                the sector-building_type distribution and impute the corresponding sqft to 
                the non_residential_sqft of that building.
            3. For all home_based jobs that have parcel_id assigned but no building_id, try
                to choose a building from all buildings in that parcel. 
                The capacity of a single-family building is determined from sizes of the households living there 
                (for each household the minimum of number of members and 2 is taken). 
                For multi-family buildings the capacity is 50.
            4. Assign a building type to jobs that have missing building type. It is sampled 
                from the regional-wide distribution of home based and non-home based jobs.
            5. Update the table 'building_sqft_per_job' using the updated job.sqft.
        'in_storage' should contain the jobs table and the zone_averages_table. The 'dataset_pool_storage'
        should contain all other tables needed (buildings, households, building_types). 
        """
        parcel_ids = job_dataset.get_attribute("parcel_id")
        building_ids = job_dataset.get_attribute("building_id")
        building_types = job_dataset.get_attribute("building_type")
        try:
            impute_sqft_flags = job_dataset.get_attribute("impute_building_sqft_flag")
        except:
            impute_sqft_flags = zeros(job_dataset.size())
        is_considered = logical_and(parcel_ids > 0, building_ids <= 0) # jobs that have assigned parcel but not building
        job_index_home_based = where(logical_and(is_considered, building_types == 1))[0]
        job_index_governmental = where(logical_and(is_considered, building_types == 3))[0]
        
        building_dataset = dataset_pool.get_dataset('building')
        parcel_ids_in_bldgs = building_dataset.get_attribute("parcel_id")
        bldg_ids_in_bldgs = building_dataset.get_id_attribute()
        bldg_types_in_bldgs = building_dataset.get_attribute("building_type_id")
        
        non_res_sqft = building_dataset.get_attribute("non_residential_sqft")
        occupied = building_dataset.compute_variables(["urbansim_parcel.building.occupied_building_sqft_by_jobs"],
                                                                     dataset_pool=dataset_pool)
        is_governmental = building_dataset.compute_variables(["building.disaggregate(building_type.generic_building_type_id == 7)"],
                                                                     dataset_pool=dataset_pool)
        
        # assign buildings to governmental jobs randomly
        unique_parcels = unique(parcel_ids[job_index_governmental])
        logger.log_status("Placing governmental jobs ...")
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs[is_governmental] == parcel)[0]
            if idx_in_bldgs.size <= 0:
                continue
            idx_in_jobs = where(parcel_ids[job_index_governmental] == parcel)[0]
            draw = sample_replace(idx_in_bldgs, idx_in_jobs.size)
            building_ids[job_index_governmental[idx_in_jobs]] = bldg_ids_in_bldgs[where(is_governmental)[0][draw]]
        logger.log_status("%s governmental jobs (out of %s gov. jobs) were placed." % (
                                                                (building_ids[job_index_governmental]>0).sum(),
                                                                 job_index_governmental.size))
        logger.log_status("The not-placed governmental jobs will be added to the non-home based jobs.")
        
        # consider the unplaced governmental jobs together with other non-home-based jobs
        is_now_considered = logical_and(is_considered, building_ids <= 0)
        job_index_non_home_based = where(logical_and(is_now_considered, logical_or(building_types == 2, building_types == 3)))[0]
                                    
        # assign buildings to non_home_based jobs based on available space
        unique_parcels = unique(parcel_ids[job_index_non_home_based])
        job_building_types = job_dataset.compute_variables(["bldgs_building_type_id = job.disaggregate(building.building_type_id)"], 
                                                           dataset_pool=dataset_pool)
        where_valid_jbt = where(logical_and(job_building_types>0, logical_or(building_types == 2, building_types==3)))[0]
        building_type_dataset = dataset_pool.get_dataset("building_type")
        available_building_types= building_type_dataset.get_id_attribute()
        idx_available_bt = building_type_dataset.get_id_index(available_building_types)
        sectors = job_dataset.get_attribute("sector_id")
        unique_sectors = unique(sectors)
        sector_bt_distribution = zeros((unique_sectors.size, building_type_dataset.size()), dtype="float32")
        
        jobs_sqft = job_dataset.get_attribute_by_index("sqft", job_index_non_home_based).astype("float32")
        job_dataset._compute_if_needed("urbansim_parcel.job.zone_id", dataset_pool=dataset_pool) 
        jobs_zones = job_dataset.get_attribute_by_index("zone_id", job_index_non_home_based)
        new_jobs_sqft = job_dataset.get_attribute("sqft").copy()
        
        # find sector -> building_type distribution
        sector_index_mapping = {}
        for isector in range(unique_sectors.size):
            idx = where(sectors[where_valid_jbt]==unique_sectors[isector])[0]
            if idx.size == 0: continue
            o = ones(idx.size, dtype="int32")
            sector_bt_distribution[isector,:] = ndimage_sum(o, labels=job_building_types[where_valid_jbt[idx]], 
                                                            index=available_building_types)
            sector_bt_distribution[isector,:] = sector_bt_distribution[isector,:]/sector_bt_distribution[isector,:].sum()
            sector_index_mapping[unique_sectors[isector]] = isector
               
        # create a lookup table for zonal average per building type of sqft per employee
        zone_average_dataset = dataset_pool.get_dataset("building_sqft_per_job")
        zone_bt_lookup = zone_average_dataset.get_building_sqft_as_table(job_dataset.get_attribute("zone_id").max(),
                                                                         available_building_types.max())

        counter_zero_capacity = 0
        counter_zero_distr = 0
        # iterate over parcels
        logger.log_status("Placing non-home-based jobs ...")
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0]
            if idx_in_bldgs.size <= 0:
                continue
            idx_in_jobs = where(parcel_ids[job_index_non_home_based] == parcel)[0]
            capacity = maximum(non_res_sqft[idx_in_bldgs] - occupied[idx_in_bldgs],0)
            #capacity = non_res_sqft[idx_in_bldgs] - occupied[idx_in_bldgs]
            if capacity.sum() <= 0:
                counter_zero_capacity += idx_in_jobs.size
                continue
            this_jobs_sectors = sectors[job_index_non_home_based][idx_in_jobs]
            this_jobs_sqft_table = resize(jobs_sqft[idx_in_jobs], (idx_in_bldgs.size, idx_in_jobs.size))
            wn = jobs_sqft[idx_in_jobs] <= 0
            for i in range(idx_in_bldgs.size):
                this_jobs_sqft_table[i, where(wn)[0]] = zone_bt_lookup[jobs_zones[idx_in_jobs[wn]], bldg_types_in_bldgs[idx_in_bldgs[i]]]
            supply_demand_ratio = (resize(capacity, (capacity.size, 1))/this_jobs_sqft_table.astype("float32").sum(axis=0))/float(idx_in_jobs.size)*0.9
            if any(supply_demand_ratio < 1): # correct only if supply is smaller than demand 
                this_jobs_sqft_table = this_jobs_sqft_table * supply_demand_ratio
            probcomb = zeros(this_jobs_sqft_table.shape)
            bt = bldg_types_in_bldgs[idx_in_bldgs]
            ibt = building_type_dataset.get_id_index(bt)
            for i in range(probcomb.shape[0]):
                for j in range(probcomb.shape[1]):
                    probcomb[i,j] = sector_bt_distribution[sector_index_mapping[this_jobs_sectors[j]],ibt[i]]
            pcs = probcomb.sum(axis=0)
            probcomb = probcomb/pcs
            wz = where(pcs<=0)[0]
            counter_zero_distr += wz.size
            probcomb[:, wz] = 0 # to avoid nan values
            taken = zeros(capacity.shape)
            has_sqft = this_jobs_sqft_table > 0
            while True:
                if (has_sqft * probcomb).sum() <= 0:
                    break
                req =  (this_jobs_sqft_table * probcomb).sum(axis=0)
                maxi = req.max()
                wmaxi = where(req==maxi)[0]
                drawjob = sample_noreplace(arange(wmaxi.size), 1) # draw job from jobs with the maximum size
                imax_req = wmaxi[drawjob]
                weights = has_sqft[:,imax_req] * probcomb[:,imax_req]
                draw = probsample_noreplace(arange(probcomb.shape[0]), 1, resize(weights/weights.sum(), (probcomb.shape[0],)))
                if (taken[draw] + this_jobs_sqft_table[draw,imax_req]) > capacity[draw]:
                    probcomb[draw,imax_req]=0
                    continue
                taken[draw] = taken[draw] + this_jobs_sqft_table[draw,imax_req]
                building_ids[job_index_non_home_based[idx_in_jobs[imax_req]]] = bldg_ids_in_bldgs[idx_in_bldgs[draw]]
                probcomb[:,imax_req] = 0
                new_jobs_sqft[job_index_non_home_based[idx_in_jobs[imax_req]]] = int(min(self.maximum_sqft, max(round(this_jobs_sqft_table[draw,imax_req]), 
                                                                                     self.minimum_sqft)))
            
        logger.log_status("%s non home based jobs (out of %s nhb jobs) were placed." % (
                                                                (building_ids[job_index_non_home_based]>0).sum(),
                                                                 job_index_non_home_based.size))
        logger.log_status("Unplaced due to zero capacity: %s" % counter_zero_capacity)
        logger.log_status("Unplaced due to zero distribution: %s" % counter_zero_distr)
        
        job_dataset.modify_attribute(name="building_id", data = building_ids)
        
        # re-classify unplaced non-home based jobs to home-based if parcels contain residential buildings
        bldgs_is_residential = logical_and(logical_not(is_governmental), building_dataset.compute_variables(["urbansim_parcel.building.is_residential"], 
                                                           dataset_pool=dataset_pool))
        is_now_considered = logical_and(parcel_ids > 0, building_ids <= 0)
        job_index_non_home_based_unplaced = where(logical_and(is_now_considered, building_types == 2))[0]
        unique_parcels = unique(parcel_ids[job_index_non_home_based_unplaced])
        imputed_sqft = 0
        logger.log_status("Try to reclassify non-home-based jobs (excluding governemtal jobs) ...")
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0]
            if idx_in_bldgs.size <= 0:
                continue
            idx_in_jobs = where(parcel_ids[job_index_non_home_based_unplaced] == parcel)[0]
            where_residential = where(bldgs_is_residential[idx_in_bldgs])[0]
            if where_residential.size > 0:
                building_types[job_index_non_home_based_unplaced[idx_in_jobs]] = 1 # set to home-based jobs
            elif non_res_sqft[idx_in_bldgs].sum() <= 0:
                # impute non_residential_sqft and assign buildings
                this_jobs_sectors = sectors[job_index_non_home_based_unplaced][idx_in_jobs]
                this_jobs_sqft_table = resize(jobs_sqft[idx_in_jobs], (idx_in_bldgs.size, idx_in_jobs.size))
                wn = jobs_sqft[idx_in_jobs] <= 0
                for i in range(idx_in_bldgs.size):
                    this_jobs_sqft_table[i, where(wn)[0]] = zone_bt_lookup[jobs_zones[idx_in_jobs[wn]], bldg_types_in_bldgs[idx_in_bldgs[i]]]
                probcomb = zeros(this_jobs_sqft_table.shape)
                bt = bldg_types_in_bldgs[idx_in_bldgs]
                ibt = building_type_dataset.get_id_index(bt)
                for i in range(probcomb.shape[0]):
                    for j in range(probcomb.shape[1]):
                        probcomb[i,j] = sector_bt_distribution[sector_index_mapping[this_jobs_sectors[j]],ibt[i]]
                for ijob in range(probcomb.shape[1]):
                    if (probcomb[:,ijob].sum() <= 0) or (impute_sqft_flags[job_index_non_home_based_unplaced[ijob]] == 0):
                        continue
                    weights = probcomb[:,ijob]
                    draw = probsample_noreplace(arange(probcomb.shape[0]), 1, resize(weights/weights.sum(), (probcomb.shape[0],)))
                    non_res_sqft[idx_in_bldgs[draw]] += this_jobs_sqft_table[draw,ijob]
                    imputed_sqft += this_jobs_sqft_table[draw,ijob]
                    building_ids[job_index_non_home_based_unplaced[idx_in_jobs[ijob]]] = bldg_ids_in_bldgs[idx_in_bldgs[draw]]
                    new_jobs_sqft[job_index_non_home_based[idx_in_jobs[ijob]]] = int(min(self.maximum_sqft, max(round(this_jobs_sqft_table[draw,ijob]), 
                                                                                     self.minimum_sqft)))
                    
        building_dataset.modify_attribute(name="non_residential_sqft", data = non_res_sqft)
        job_dataset.modify_attribute(name="building_id", data = building_ids)
        job_dataset.modify_attribute(name="building_type", data = building_types)
        job_dataset.modify_attribute(name="sqft", data = new_jobs_sqft)
        
        old_nhb_size = job_index_non_home_based.size
        job_index_home_based = where(logical_and(is_considered, building_types == 1))[0]
        job_index_non_home_based = where(logical_and(is_considered, building_types == 2))[0]
        logger.log_status("%s non-home based jobs reclassified as home-based." % (old_nhb_size-job_index_non_home_based.size))
        logger.log_status("%s non-residential sqft imputed." % imputed_sqft)
        logger.log_status("Additionaly, %s non home based jobs were placed due to imputed sqft." % \
                                                (building_ids[job_index_non_home_based_unplaced]>0).sum())
        # home_based jobs
        unique_parcels = unique(parcel_ids[job_index_home_based])
        capacity_in_buildings = building_dataset.compute_variables([
                          "urbansim_parcel.building.vacant_home_based_job_space"],
                             dataset_pool=dataset_pool)
        parcels_with_exceeded_capacity = []
        # iterate over parcels
        logger.log_status("Placing home-based jobs ...")
        for parcel in unique_parcels:
            idx_in_bldgs = where(parcel_ids_in_bldgs == parcel)[0]
            idx_in_jobs = where(parcel_ids[job_index_home_based] == parcel)[0]
            capacity = capacity_in_buildings[idx_in_bldgs]
            if capacity.sum() <= 0:
                continue
            probcomb = ones((idx_in_bldgs.size, idx_in_jobs.size))
            taken = zeros(capacity.shape, dtype="int32")
            while True:
                zero_cap = where((capacity - taken) <= 0)[0]
                probcomb[zero_cap,:] = 0
                if probcomb.sum() <= 0:
                    break
                req =  probcomb.sum(axis=0)
                wmaxi = where(req==req.max())[0]
                drawjob = sample_noreplace(arange(wmaxi.size), 1) # draw job from available jobs
                imax_req = wmaxi[drawjob]
                weights = probcomb[:,imax_req]
                # sample building
                draw = probsample_noreplace(arange(probcomb.shape[0]), 1, resize(weights/weights.sum(), (probcomb.shape[0],)))
                taken[draw] = taken[draw] + 1
                building_ids[job_index_home_based[idx_in_jobs[imax_req]]] = bldg_ids_in_bldgs[idx_in_bldgs[draw]]
                probcomb[:,imax_req] = 0
            if -1 in building_ids[job_index_home_based[idx_in_jobs]]:
                parcels_with_exceeded_capacity.append(parcel)
        parcels_with_exceeded_capacity = array(parcels_with_exceeded_capacity)    
        
        logger.log_status("%s home based jobs (out of %s hb jobs) were placed." % ((building_ids[job_index_home_based]>0).sum(),
                                                                         job_index_home_based.size))
        
        # assign building type where missing
        # determine regional distribution
        idx_home_based = where(building_types == 1)[0]
        idx_non_home_based = where(building_types == 2)[0]
        idx_bt_missing = where(building_types <= 0)[0]
        if idx_bt_missing.size > 0:
            # sample building types
            sample_bt = probsample_replace(array([1,2]), idx_bt_missing.size, 
               array([idx_home_based.size, idx_non_home_based.size])/float(idx_home_based.size + idx_non_home_based.size))
            # coerce to int32 (on a 64 bit machine, sample_bt will be of type int64)
            building_types[idx_bt_missing] = sample_bt.astype(int32)
            job_dataset.modify_attribute(name="building_type", data = building_types) 
        
        if out_storage is not None:
            job_dataset.write_dataset(out_table_name=jobs_table, out_storage=out_storage, attributes=AttributeType.PRIMARY)
            building_dataset.write_dataset(out_table_name='buildings', out_storage=out_storage, attributes=AttributeType.PRIMARY)
        logger.log_status("Assigning building_id to jobs done.")
    def estimate(self, specification, dataset, outcome_attribute, index = None, procedure=None, data_objects=None,
                        estimate_config=None,  debuglevel=0):
        """'specification' is of type EquationSpecification,
            'dataset' is of type Dataset,
            'outcome_attribute' - string that determines the dependent variable,
            'index' are indices of individuals in dataset for which
                    the model runs. If it is None, the whole dataset is considered.
            'procedure' - name of the estimation procedure. If it is None,
                there should be an entry "estimation" in 'estimate_config' that determines the procedure. The class
                must have a method 'run' that takes as arguments 'data', 'regression_procedure' and 'resources'.
                It returns a dictionary with entries 'estimators', 'standard_errors' and 't_values' (all 1D numpy arrays).
            'data_objects' is a dictionary where each key is the name of an data object
                    ('zone', ...) and its value is an object of class  Dataset.
            'estimate_config' is of type Resources, it gives additional arguments for the estimation procedure.
            'debuglevel' overwrites the class 'debuglevel'.
        """
        #import wingdbstub
        self.debug.flag = debuglevel
        if estimate_config == None:
            estimate_config = Resources()
        if not isinstance(estimate_config,Resources) and isinstance(estimate_config, dict):
            estimate_config = Resources(estimate_config)
        self.estimate_config = estimate_config.merge_with_defaults(self.estimate_config)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.procedure=procedure
        if self.procedure == None:
            self.procedure = self.estimate_config.get("estimation", None)
        if self.procedure is not None:
            self.procedure = ModelComponentCreator().get_model_component(self.procedure)
        else:
            logger.log_warning("No estimation procedure given, or problems with loading the corresponding module.")

        compute_resources = Resources({"debug":self.debug})
        if dataset.size()<=0: # no data loaded yet
            dataset.get_id_attribute()
        if index == None:
            index = arange(dataset.size())
        if not isinstance(index,ndarray):
            index=array(index)

        estimation_size_agents = self.estimate_config.get("estimation_size_agents", None) # should be a proportion of the agent_set
        if estimation_size_agents == None:
            estimation_size_agents = 1.0
        else:
            estimation_size_agents = max(min(estimation_size_agents,1.0),0.0) # between 0 and 1

        if estimation_size_agents < 1.0:
            self.debug.print_debug("Sampling agents for estimation ...",3)
            estimation_idx = sample_noreplace(arange(index.size),
                                                         int(index.size*estimation_size_agents))
        else:
            estimation_idx = arange(index.size)

        estimation_idx = index[estimation_idx]
        self.debug.print_debug("Number of observations for estimation: " + str(estimation_idx.size),2)
        if estimation_idx.size <= 0:
            self.debug.print_debug("Nothing to be done.",2)
            return (None, None)

        coefficients = create_coefficient_from_specification(specification)
        self.specified_coefficients = SpecifiedCoefficients().create(coefficients, specification, neqs=1)
        submodels = self.specified_coefficients.get_submodels()
        self.get_status_for_gui().update_pieces_using_submodels(submodels=submodels, leave_pieces=2)
        self.map_agents_to_submodels(submodels, self.submodel_string, dataset, estimation_idx,
                                      dataset_pool=self.dataset_pool, resources = compute_resources,
                                      submodel_size_max=self.estimate_config.get('submodel_size_max', None))
        variables = self.specified_coefficients.get_full_variable_names_without_constants()
        self.debug.print_debug("Compute variables ...",4)
        self.increment_current_status_piece()
        dataset.compute_variables(variables, dataset_pool=self.dataset_pool, resources = compute_resources)

        coef = {}
        estimated_coef={}
        self.outcome = {}
        dataset.compute_variables([outcome_attribute], dataset_pool=self.dataset_pool, resources=compute_resources)
        regression_resources=Resources(estimate_config)
        regression_resources.merge({"debug":self.debug})
        outcome_variable_name = VariableName(outcome_attribute)
        for submodel in submodels:
            coef[submodel] = SpecifiedCoefficientsFor1Submodel(self.specified_coefficients,submodel)
            self.increment_current_status_piece()
            logger.log_status("Estimate regression for submodel " +str(submodel),
                               tags=["estimate"], verbosity_level=2)
            #logger.log_status("Number of observations: " +str(self.observations_mapping[submodel].size),
                               #tags=["estimate"], verbosity_level=2)
            self.data[submodel] = dataset.create_regression_data_for_estimation(coef[submodel],
                                                            index = estimation_idx[self.observations_mapping[submodel]])
            self.coefficient_names[submodel] = coef[submodel].get_coefficient_names_without_constant()[0,:]
            if (self.data[submodel].shape[0] > 0) and (self.data[submodel].size > 0) and (self.procedure is not None): # observations for this submodel available
                self.outcome[submodel] = dataset.get_attribute_by_index(outcome_variable_name.get_alias(), estimation_idx[self.observations_mapping[submodel]])   
                regression_resources.merge({"outcome":  self.outcome[submodel]})
                regression_resources.merge({"coefficient_names":self.coefficient_names[submodel].tolist(),
                            "constant_position": coef[submodel].get_constants_positions()})
                regression_resources.merge({"submodel": submodel})
                estimated_coef[submodel] = self.procedure.run(self.data[submodel], self.regression,
                                                        resources=regression_resources)
                if "estimators" in estimated_coef[submodel].keys():
                    coef[submodel].set_coefficient_values(estimated_coef[submodel]["estimators"])
                if "standard_errors" in estimated_coef[submodel].keys():
                    coef[submodel].set_standard_errors(estimated_coef[submodel]["standard_errors"])
                if "other_measures" in estimated_coef[submodel].keys():
                    for measure in estimated_coef[submodel]["other_measures"].keys():
                        coef[submodel].set_measure(measure,
                              estimated_coef[submodel]["other_measures"][measure])
                if "other_info" in estimated_coef[submodel].keys():
                    for info in estimated_coef[submodel]["other_info"]:
                        coef[submodel].set_other_info(info,
                              estimated_coef[submodel]["other_info"][info])
        coefficients.fill_coefficients(coef)
        self.specified_coefficients.coefficients = coefficients
        self.save_predicted_values_and_errors(specification, coefficients, dataset, outcome_variable_name, index=index, data_objects=data_objects)
            
        return (coefficients, estimated_coef)
    def _do_run_for_this_year(self, job_set):
        building_type = job_set.get_attribute("building_type")
        sectors = unique(self.control_totals_for_this_year.get_attribute("sector_id"))
        self._compute_sector_variables(sectors, job_set)
        for sector in sectors:
            isector = where(self.control_totals_for_this_year.get_attribute("sector_id") == sector)[0]
            total_hb_jobs = self.control_totals_for_this_year.get_attribute("total_home_based_employment")[isector]
            total_nhb_jobs = self.control_totals_for_this_year.get_attribute("total_non_home_based_employment")[isector]
            is_in_sector_hb = job_set.get_attribute("is_in_employment_sector_%s_home_based" % sector)
            is_in_sector_nhb = job_set.get_attribute("is_in_employment_sector_%s_non_home_based" % sector)
            diff_hb = int(total_hb_jobs - is_in_sector_hb.astype(int8).sum())
            diff_nhb = int(total_nhb_jobs - is_in_sector_nhb.astype(int8).sum())
            if diff_hb < 0: # home based jobs to be removed
                w = where(is_in_sector_hb == 1)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(job_set, w, -1*diff_hb,
                                                         self.location_id_name)
                self.remove_jobs = concatenate((self.remove_jobs, non_placed,
                                           sample_noreplace(sample_array, max(0,abs(diff_hb)-size_non_placed))))
            if diff_nhb < 0: # non home based jobs to be removed
                w = where(is_in_sector_nhb == 1)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(job_set, w, -1*diff_nhb,
                                                         self.location_id_name)
                self.remove_jobs = concatenate((self.remove_jobs, non_placed,
                                           sample_noreplace(sample_array, max(0,abs(diff_nhb)-size_non_placed))))

            if diff_hb > 0: # home based jobs to be created
                self.new_jobs[self.location_id_name] = concatenate((self.new_jobs[self.location_id_name],
                                   zeros((diff_hb,), dtype=self.new_jobs[self.location_id_name].dtype.type)))
                self.new_jobs["sector_id"] = concatenate((self.new_jobs["sector_id"],
                                   (resize(array([sector], dtype=self.new_jobs["sector_id"].dtype.type), diff_hb))))
                if 1 in is_in_sector_hb:
                    building_type_distribution = array(ndimage_sum(is_in_sector_hb,
                                                                    labels=building_type,
                                                                    index=self.available_building_types))
                elif 1 in job_set.get_attribute("is_home_based_job"): # take the building type distribution from the whole region
                    building_type_distribution = array(ndimage_sum(
                                                                job_set.get_attribute("is_home_based_job"),
                                                                labels=building_type,
                                                                index=self.available_building_types))
                else: # there are no home-based jobs in the region, take uniform distribution
                    building_type_distribution = ones(self.available_building_types.size)
                    building_type_distribution = building_type_distribution/building_type_distribution.sum()
                sampled_building_types = probsample_replace(
                    self.available_building_types, diff_hb, building_type_distribution/
                    float(building_type_distribution.sum()))
                self.new_jobs["building_type"] = concatenate((self.new_jobs["building_type"],
                            sampled_building_types.astype(self.new_jobs["building_type"].dtype.type)))
                new_max_id = self.max_id + diff_hb
                self.new_jobs[self.job_id_name] = concatenate((self.new_jobs[self.job_id_name],
                                                     arange(self.max_id+1, new_max_id+1)))
                self.max_id = new_max_id

            if diff_nhb > 0: # non home based jobs to be created
                self.new_jobs[self.location_id_name]=concatenate((self.new_jobs[self.location_id_name],
                                     zeros((diff_nhb,), dtype=self.new_jobs[self.location_id_name].dtype.type)))
                self.new_jobs["sector_id"]=concatenate((self.new_jobs["sector_id"],
                                           (resize(array([sector], dtype=self.new_jobs["sector_id"].dtype.type), diff_nhb))))
                if 1 in is_in_sector_nhb:
                    building_type_distribution = array(ndimage_sum(is_in_sector_nhb,
                                                                    labels=building_type,
                                                                    index=self.available_building_types))
                elif 1 in job_set.get_attribute("is_non_home_based_job"): # take the building type distribution from the whole region
                    building_type_distribution = array(ndimage_sum(
                                                        job_set.get_attribute("is_non_home_based_job"),
                                                        labels=building_type,
                                                        index=self.available_building_types))
                else: # there are no non-home-based jobs in the region, take uniform distribution
                    building_type_distribution = ones(self.available_building_types.size)
                    building_type_distribution = building_type_distribution/building_type_distribution.sum()
                sampled_building_types = probsample_replace(
                    self.available_building_types, diff_nhb, building_type_distribution/
                    float(building_type_distribution.sum()))
                self.new_jobs["building_type"] = concatenate((self.new_jobs["building_type"],
                                        sampled_building_types.astype(self.new_jobs["building_type"].dtype.type)))
                new_max_id = self.max_id+diff_nhb
                self.new_jobs[self.job_id_name]=concatenate((self.new_jobs[self.job_id_name], arange(self.max_id+1, 
                                                                                                     new_max_id+1)))
                self.max_id = new_max_id
class HouseholdTransitionModel(Model):
    """Creates and removes households from household_set. New households are duplicated from the existing households, keeping 
       the joint distribution of all characteristics. 
    """

    model_name = "Household Transition Model"

    def __init__(self,
                 location_id_name="grid_id",
                 dataset_pool=None,
                 debuglevel=0):
        self.debug = DebugPrinter(debuglevel)
        self.location_id_name = location_id_name
        self.dataset_pool = self.create_dataset_pool(dataset_pool,
                                                     ["urbansim", "opus_core"])

    def run(self,
            year,
            household_set,
            control_totals,
            characteristics,
            resources=None):
        self._do_initialize_for_run(household_set)
        control_totals.get_attribute(
            "total_number_of_households")  # to make sure they are loaded
        self.characteristics = characteristics
        self.all_categories = self.characteristics.get_attribute(
            "characteristic")
        self.all_categories = array(
            map(lambda x: x.lower(), self.all_categories))
        self.scaled_characteristic_names = get_distinct_names(
            self.all_categories).tolist()
        self.marginal_characteristic_names = copy(control_totals.get_id_name())
        index_year = self.marginal_characteristic_names.index("year")
        self.marginal_characteristic_names.remove("year")
        idx = where(control_totals.get_attribute("year") == year)[0]
        self.control_totals_for_this_year = DatasetSubset(control_totals, idx)
        self._do_run_for_this_year(household_set)
        return self._update_household_set(household_set)

    def _update_household_set(self, household_set):
        index_of_duplicated_hhs = household_set.duplicate_rows(
            self.mapping_existing_hhs_to_new_hhs)
        household_set.modify_attribute(
            name=self.location_id_name,
            data=-1 * ones(
                (index_of_duplicated_hhs.size, ),
                dtype=household_set.get_data_type(self.location_id_name)),
            index=index_of_duplicated_hhs)
        household_set.remove_elements(self.remove_households)
        if self.new_households[self.location_id_name].size > 0:
            max_id = household_set.get_id_attribute().max()
            self.new_households[self.household_id_name] = concatenate(
                (self.new_households[self.household_id_name],
                 arange(
                     max_id + 1, max_id +
                     self.new_households[self.location_id_name].size + 1)))
            household_set.add_elements(self.new_households,
                                       require_all_attributes=False)

        difference = household_set.size() - self.household_size
        self.debug.print_debug(
            "Difference in number of households: %s"
            " (original %s, new %s, created %s, deleted %s)" %
            (difference, self.household_size, household_set.size(),
             self.new_households[self.household_id_name].size +
             self.mapping_existing_hhs_to_new_hhs.size,
             self.remove_households.size), 3)
        if self.location_id_name in household_set.get_attribute_names():
            self.debug.print_debug(
                "Number of unplaced households: %s" %
                where(household_set.get_attribute(self.location_id_name) <= 0)
                [0].size, 3)
        return difference

    def _do_initialize_for_run(self, household_set):
        self.household_id_name = household_set.get_id_name()[0]
        self.new_households = {
            self.location_id_name:
            array([],
                  dtype=household_set.get_data_type(self.location_id_name,
                                                    int32)),
            self.household_id_name:
            array([],
                  dtype=household_set.get_data_type(self.household_id_name,
                                                    int32))
        }
        self.remove_households = array([], dtype='int32')
        self.household_size = household_set.size()
        self.max_id = household_set.get_id_attribute().max()
        self.arrays_from_categories = {}
        self.arrays_from_categories_mapping = {}
        self.mapping_existing_hhs_to_new_hhs = array(
            [],
            dtype=household_set.get_data_type(self.household_id_name, int32))

    def _do_run_for_this_year(self, household_set):
        self.household_set = household_set
        groups = self.control_totals_for_this_year.get_id_attribute()
        self.create_arrays_from_categories(self.household_set)

        all_characteristics = self.arrays_from_categories.keys()
        self.household_set.load_dataset_if_not_loaded(
            attributes=all_characteristics
        )  # prevents from lazy loading to save runtime
        idx_shape = []
        number_of_combinations = 1
        num_attributes = len(all_characteristics)
        for iattr in range(num_attributes):
            attr = all_characteristics[iattr]
            max_bins = self.arrays_from_categories[attr].max() + 1
            idx_shape.append(max_bins)
            number_of_combinations = number_of_combinations * max_bins
            if attr not in self.new_households.keys():
                self.new_households[attr] = array(
                    [], dtype=self.household_set.get_data_type(attr, float32))

        self.number_of_combinations = int(number_of_combinations)
        idx_tmp = indices(tuple(idx_shape))

        categories_index = zeros((self.number_of_combinations, num_attributes))

        for i in range(num_attributes):  #create indices of all combinations
            categories_index[:, i] = idx_tmp[i].ravel()

        categories_index_mapping = {}
        for i in range(self.number_of_combinations):
            categories_index_mapping[tuple(categories_index[i, ].tolist())] = i

        def get_category(values):
            bins = map(lambda x, y: self.arrays_from_categories[x][int(y)],
                       all_characteristics, values)
            try:
                return categories_index_mapping[tuple(bins)]
            except KeyError, msg:
                where_error = where(array(bins) == -1)[0]
                if where_error.size > 0:
                    raise KeyError, \
                        "Invalid value of %s for attribute %s. It is not included in the characteristics groups." % (
                                                                               array(values)[where_error],
                                                                               array(all_characteristics)[where_error])
                raise KeyError, msg

        if num_attributes > 0:
            # the next array must be a copy of the household values, otherwise, it changes the original values
            values_array = reshape(
                array(self.household_set.get_attribute(
                    all_characteristics[0])), (self.household_set.size(), 1))
            if num_attributes > 1:
                for attr in all_characteristics[1:]:
                    values_array = concatenate(
                        (values_array,
                         reshape(array(self.household_set.get_attribute(attr)),
                                 (self.household_set.size(), 1))),
                        axis=1)
            for i in range(values_array.shape[1]):
                if values_array[:, i].max() > 10000:
                    values_array[:, i] = values_array[:, i] / 10
                values_array[:, i] = clip(
                    values_array[:, i], 0,
                    self.arrays_from_categories[all_characteristics[i]].size -
                    1)

            # determine for each household to what category it belongs to
            self.household_categories = array(
                map(lambda x: get_category(x),
                    values_array))  # performance bottleneck

            number_of_households_in_categories = array(
                ndimage_sum(ones((self.household_categories.size, )),
                            labels=self.household_categories + 1,
                            index=arange(self.number_of_combinations) + 1))
        else:
            # no marginal characteristics; consider just one group
            self.household_categories = zeros(self.household_set.size(),
                                              dtype='int32')
            number_of_households_in_categories = array(
                [self.household_set.size()])

        g = arange(num_attributes)

        #iterate over marginal characteristics
        for group in groups:
            if groups.ndim <= 1:  # there is only one group (no marginal char.)
                id = group
            else:
                id = tuple(group.tolist())
            group_element = self.control_totals_for_this_year.get_data_element_by_id(
                id)
            total = group_element.total_number_of_households
            for i in range(g.size):
                g[i] = eval("group_element." +
                            self.arrays_from_categories.keys()[i])
            if g.size <= 0:
                l = ones((number_of_households_in_categories.size, ))
            else:
                l = categories_index[:, 0] == g[0]
                for i in range(1, num_attributes):
                    l = logical_and(l, categories_index[:, i] == g[i])
            # l has 1's for combinations of this group
            number_in_group = array(
                ndimage_sum(number_of_households_in_categories,
                            labels=l,
                            index=1))
            diff = int(total - number_in_group)
            if diff < 0:  # households to be removed
                is_in_group = l[self.household_categories]
                w = where(is_in_group)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(self.household_set, w, -1*diff,
                                                          self.location_id_name)
                self.remove_households = concatenate(
                    (self.remove_households, non_placed,
                     sample_noreplace(sample_array,
                                      max(0,
                                          abs(diff) - size_non_placed))))
            if diff > 0:  # households to be created
                self._create_households(diff, l)
    def run(self,
            in_storage,
            out_storage,
            business_table="business",
            jobs_table="jobs",
            control_totals_table=None):
        logger.log_status("Unrolling %s table." % business_table)
        # get attributes from the establisments table
        business_dataset = BusinessDataset(in_storage=in_storage,
                                           in_table_name=business_table)
        business_sizes = business_dataset.get_attribute(
            self.number_of_jobs_attr).astype("int32")
        sectors = business_dataset.get_attribute("sector_id")
        tazes = business_dataset.get_attribute(
            self.geography_id_attr).astype("int32")
        building_ids = array([], dtype='int32')
        if "building_id" in business_dataset.get_primary_attribute_names():
            building_ids = business_dataset.get_attribute("building_id")
        parcel_ids = array([], dtype='int32')
        if "parcel_id" in business_dataset.get_primary_attribute_names():
            parcel_ids = business_dataset.get_attribute("parcel_id")
        home_based = array([], dtype='int16')
        if "home_based" in business_dataset.get_primary_attribute_names():
            home_based = business_dataset.get_attribute("home_based")
        building_sqft = business_dataset.get_attribute(self.sqft_attr)
        building_sqft[building_sqft <= 0] = 0
        join_flags = None
        if "join_flag" in business_dataset.get_primary_attribute_names():
            join_flags = business_dataset.get_attribute("join_flag")
        impute_sqft_flag = None
        if "impute_building_sqft_flag" in business_dataset.get_primary_attribute_names(
        ):
            impute_sqft_flag = business_dataset.get_attribute(
                "impute_building_sqft_flag")

        # inititalize jobs attributes
        total_size = business_sizes.sum()
        jobs_data = {}
        jobs_data["sector_id"] = resize(array([-1], dtype=sectors.dtype),
                                        total_size)
        jobs_data["building_id"] = resize(
            array([-1], dtype=building_ids.dtype), total_size)
        jobs_data["parcel_id"] = resize(array([-1], dtype=parcel_ids.dtype),
                                        total_size)
        jobs_data[self.geography_id_attr] = resize(
            array([-1], dtype=tazes.dtype), total_size)
        jobs_data["building_type"] = resize(
            array([-1], dtype=home_based.dtype), total_size)
        jobs_data["sqft"] = resize(array([], dtype=building_sqft.dtype),
                                   total_size)
        if join_flags is not None:
            jobs_data["join_flag"] = resize(array([], dtype=join_flags.dtype),
                                            total_size)
        if impute_sqft_flag is not None:
            jobs_data["impute_building_sqft_flag"] = resize(
                array([], dtype=impute_sqft_flag.dtype), total_size)

        indices = cumsum(business_sizes)
        # iterate over establishments. For each business create the corresponding number of jobs by filling the corresponding part
        # of the arrays
        start_index = 0
        for i in range(business_dataset.size()):
            end_index = indices[i]
            jobs_data["sector_id"][start_index:end_index] = sectors[i]
            if building_ids.size > 0:
                jobs_data["building_id"][start_index:end_index] = building_ids[
                    i]
            if parcel_ids.size > 0:
                jobs_data["parcel_id"][start_index:end_index] = parcel_ids[i]
            jobs_data[self.geography_id_attr][start_index:end_index] = tazes[i]
            if home_based.size > 0:
                jobs_data["building_type"][start_index:end_index] = home_based[
                    i]
            if self.compute_sqft_per_job:
                jobs_data["sqft"][start_index:end_index] = round(
                    (building_sqft[i] - building_sqft[i] / 10.0) /
                    float(business_sizes[i]))  # sqft per employee
            else:
                jobs_data["sqft"][start_index:end_index] = building_sqft[i]
            if join_flags is not None:
                jobs_data["join_flag"][start_index:end_index] = join_flags[i]
            if impute_sqft_flag is not None:
                jobs_data["impute_building_sqft_flag"][
                    start_index:end_index] = impute_sqft_flag[i]
            start_index = end_index

        jobs_data["job_id"] = arange(total_size) + 1
        if self.compute_sqft_per_job:
            jobs_data["sqft"] = clip(jobs_data["sqft"], 0, self.maximum_sqft)
            jobs_data["sqft"][logical_and(
                jobs_data["sqft"] > 0,
                jobs_data["sqft"] < self.minimum_sqft)] = self.minimum_sqft

        # correct missing job_building_types
        wmissing_bt = where(jobs_data["building_type"] <= 0)[0]
        if wmissing_bt.size > 0:
            jobs_data["building_type"][
                wmissing_bt] = 2  # assign non-homebased type for now. It can be re-classified in the assign_bldgs_to_jobs... script

        # create jobs table and write it out
        storage = StorageFactory().get_storage('dict_storage')
        storage.write_table(table_name="jobs", table_data=jobs_data)
        job_dataset = JobDataset(in_storage=storage)
        if self.unplace_jobs_with_non_existing_buildings:
            self.do_unplace_jobs_with_non_existing_buildings(
                job_dataset, out_storage)

        # Match to control totals (only eliminate jobs if control totals are smaller than the actual number of jobs).
        if control_totals_table is not None:
            logger.log_status("Matching to control totals.")
            control_totals = ControlTotalDataset(
                what='employment',
                id_name=['zone_id', 'sector_id'],
                in_table_name=control_totals_table,
                in_storage=in_storage)
            control_totals.load_dataset(
                attributes=['zone_id', 'sector_id', 'jobs'])
            zones_sectors = control_totals.get_id_attribute()
            njobs = control_totals.get_attribute('jobs')
            remove = array([], dtype='int32')
            for i in range(zones_sectors.shape[0]):
                zone, sector = zones_sectors[i, :]
                in_sector = job_dataset.get_attribute("sector_id") == sector
                in_zone_in_sector = logical_and(
                    in_sector,
                    job_dataset.get_attribute("zone_id") == zone)
                if in_zone_in_sector.sum() <= njobs[i]:
                    continue
                to_be_removed = in_zone_in_sector.sum() - njobs[i]
                this_removal = 0
                not_considered = ones(job_dataset.size(), dtype='bool8')
                for unit in [
                        'parcel_id', 'building_id', None
                ]:  # first consider jobs without parcel id, then without building_id, then all
                    if unit is not None:
                        wnunit = job_dataset.get_attribute(unit) <= 0
                        eligible = logical_and(
                            not_considered,
                            logical_and(in_zone_in_sector, wnunit))
                        not_considered[where(wnunit)] = False
                    else:
                        eligible = logical_and(not_considered,
                                               in_zone_in_sector)
                    eligible_sum = eligible.sum()
                    if eligible_sum > 0:
                        where_eligible = where(eligible)[0]
                        if eligible_sum <= to_be_removed - this_removal:
                            draw = arange(eligible_sum)
                        else:
                            draw = sample_noreplace(
                                where_eligible, to_be_removed - this_removal,
                                eligible_sum)
                        remove = concatenate((remove, where_eligible[draw]))
                        this_removal += draw.size
                        if this_removal >= to_be_removed:
                            break

            job_dataset.remove_elements(remove)
            logger.log_status("%s jobs removed." % remove.size)

        logger.log_status("Write jobs table.")
        job_dataset.write_dataset(out_table_name=jobs_table,
                                  out_storage=out_storage)
        logger.log_status("Created %s jobs." % job_dataset.size())
    def prepare_for_estimate(self, specification_dict = None, specification_storage=None,
                              specification_table=None, agent_set=None, 
                              agents_for_estimation_storage=None,
                              agents_for_estimation_table=None, join_datasets=False,
                              index_to_unplace=None, portion_to_unplace=1.0,
                              compute_lambda=False, grouping_location_set=None,
                              movers_variable=None, movers_index=None,
                              filter=None, location_id_variable=None,
                              data_objects={}):
        """Put 'location_id_variable' always in, if the location id is to be computed on the estimation set,
        i.e. if it is not a primary attribute of the estimation set. Set 'index_to_unplace' to None, if 'compute_lambda' is True.
        In such a case, the annual supply is estimated without unplacing agents. 'grouping_location_set', 'movers_variable' and
        'movers_index' must be given, if 'compute_lambda' is True.
        """
        from opus_core.model import get_specification_for_estimation
        from urbansim.functions import compute_supply_and_add_to_location_set
        specification = get_specification_for_estimation(specification_dict,
                                                          specification_storage,
                                                          specification_table)
        if (agent_set is not None) and (index_to_unplace is not None):
            if self.location_id_string is not None:
                agent_set.compute_variables(self.location_id_string, resources=Resources(data_objects))
            if portion_to_unplace < 1:
                unplace_size = int(portion_to_unplace*index_to_unplace.size)
                end_index_to_unplace = sample_noreplace(index_to_unplace, unplace_size)
            else:
                end_index_to_unplace = index_to_unplace
            logger.log_status("Unplace " + str(end_index_to_unplace.size) + " agents.")
            agent_set.modify_attribute(self.choice_set.get_id_name()[0],
                                        resize(array([-1]), end_index_to_unplace.size), end_index_to_unplace)
        if compute_lambda:
            movers = zeros(agent_set.size(), dtype="bool8")
            if movers_index is not None:
                movers[movers_index] = 1
            agent_set.add_primary_attribute(movers, "potential_movers")
            self.estimate_config["weights_for_estimation_string"] = self.estimate_config["weights_for_estimation_string"]+"_from_lambda"
            compute_supply_and_add_to_location_set(self.choice_set, grouping_location_set,
                                                   self.run_config["number_of_units_string"],
                                                   self.run_config["capacity_string"],
                                                   movers_variable,
                                                   self.estimate_config["weights_for_estimation_string"],
                                                   resources=Resources(data_objects))

        # create agents for estimation
        if (agents_for_estimation_storage is not None) and (agents_for_estimation_table is not None):
            estimation_set = Dataset(in_storage = agents_for_estimation_storage,
                                      in_table_name=agents_for_estimation_table,
                                      id_name=agent_set.get_id_name(), dataset_name=agent_set.get_dataset_name())
            if location_id_variable is not None:
                estimation_set.compute_variables(location_id_variable, resources=Resources(data_objects))
                # needs to be a primary attribute because of the join method below
                estimation_set.add_primary_attribute(estimation_set.get_attribute(location_id_variable), VariableName(location_id_variable).get_alias())
            if filter:
                values = estimation_set.compute_variables(filter, resources=Resources(data_objects))
                index = where(values > 0)[0]
                estimation_set.subset_by_index(index, flush_attributes_if_not_loaded=False)

            if join_datasets:
                agent_set.join_by_rows(estimation_set, require_all_attributes=False,
                                    change_ids_if_not_unique=True)
                index = arange(agent_set.size()-estimation_set.size(),agent_set.size())
            else:
                index = agent_set.get_id_index(estimation_set.get_id_attribute())
        else:
            if agent_set is not None:
                if filter is not None:
                    values = agent_set.compute_variables(filter, resources=Resources(data_objects))
                    index = where(values > 0)[0]
                else:
                    index = arange(agent_set.size())
            else:
                index = None
        return (specification, index)
    def _do_run_for_this_year(self, job_set):
        building_type = job_set.get_attribute("building_type")
        sectors = unique(
            self.control_totals_for_this_year.get_attribute("sector_id"))
        self._compute_sector_variables(sectors, job_set)
        for sector in sectors:
            isector = where(
                self.control_totals_for_this_year.get_attribute("sector_id") ==
                sector)[0]
            total_hb_jobs = self.control_totals_for_this_year.get_attribute(
                "total_home_based_employment")[isector]
            total_nhb_jobs = self.control_totals_for_this_year.get_attribute(
                "total_non_home_based_employment")[isector]
            is_in_sector_hb = job_set.get_attribute(
                "is_in_employment_sector_%s_home_based" % sector)
            is_in_sector_nhb = job_set.get_attribute(
                "is_in_employment_sector_%s_non_home_based" % sector)
            diff_hb = int(total_hb_jobs - is_in_sector_hb.astype(int8).sum())
            diff_nhb = int(total_nhb_jobs -
                           is_in_sector_nhb.astype(int8).sum())
            if diff_hb < 0:  # home based jobs to be removed
                w = where(is_in_sector_hb == 1)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(job_set, w, -1*diff_hb,
                                                         self.location_id_name)
                self.remove_jobs = concatenate(
                    (self.remove_jobs, non_placed,
                     sample_noreplace(sample_array,
                                      max(0,
                                          abs(diff_hb) - size_non_placed))))
            if diff_nhb < 0:  # non home based jobs to be removed
                w = where(is_in_sector_nhb == 1)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(job_set, w, -1*diff_nhb,
                                                         self.location_id_name)
                self.remove_jobs = concatenate(
                    (self.remove_jobs, non_placed,
                     sample_noreplace(sample_array,
                                      max(0,
                                          abs(diff_nhb) - size_non_placed))))

            if diff_hb > 0:  # home based jobs to be created
                self.new_jobs[self.location_id_name] = concatenate(
                    (self.new_jobs[self.location_id_name],
                     zeros(
                         (diff_hb, ),
                         dtype=self.new_jobs[self.location_id_name].dtype.type)
                     ))
                self.new_jobs["sector_id"] = concatenate(
                    (self.new_jobs["sector_id"], (resize(
                        array([sector],
                              dtype=self.new_jobs["sector_id"].dtype.type),
                        diff_hb))))
                if 1 in is_in_sector_hb:
                    building_type_distribution = array(
                        ndimage_sum(is_in_sector_hb,
                                    labels=building_type,
                                    index=self.available_building_types))
                elif 1 in job_set.get_attribute(
                        "is_home_based_job"
                ):  # take the building type distribution from the whole region
                    building_type_distribution = array(
                        ndimage_sum(job_set.get_attribute("is_home_based_job"),
                                    labels=building_type,
                                    index=self.available_building_types))
                else:  # there are no home-based jobs in the region, take uniform distribution
                    building_type_distribution = ones(
                        self.available_building_types.size)
                    building_type_distribution = building_type_distribution / building_type_distribution.sum(
                    )
                sampled_building_types = probsample_replace(
                    self.available_building_types, diff_hb,
                    building_type_distribution /
                    float(building_type_distribution.sum()))
                self.new_jobs["building_type"] = concatenate(
                    (self.new_jobs["building_type"],
                     sampled_building_types.astype(
                         self.new_jobs["building_type"].dtype.type)))
                new_max_id = self.max_id + diff_hb
                self.new_jobs[self.job_id_name] = concatenate(
                    (self.new_jobs[self.job_id_name],
                     arange(self.max_id + 1, new_max_id + 1)))
                self.max_id = new_max_id

            if diff_nhb > 0:  # non home based jobs to be created
                self.new_jobs[self.location_id_name] = concatenate(
                    (self.new_jobs[self.location_id_name],
                     zeros(
                         (diff_nhb, ),
                         dtype=self.new_jobs[self.location_id_name].dtype.type)
                     ))
                self.new_jobs["sector_id"] = concatenate(
                    (self.new_jobs["sector_id"], (resize(
                        array([sector],
                              dtype=self.new_jobs["sector_id"].dtype.type),
                        diff_nhb))))
                if 1 in is_in_sector_nhb:
                    building_type_distribution = array(
                        ndimage_sum(is_in_sector_nhb,
                                    labels=building_type,
                                    index=self.available_building_types))
                elif 1 in job_set.get_attribute(
                        "is_non_home_based_job"
                ):  # take the building type distribution from the whole region
                    building_type_distribution = array(
                        ndimage_sum(
                            job_set.get_attribute("is_non_home_based_job"),
                            labels=building_type,
                            index=self.available_building_types))
                else:  # there are no non-home-based jobs in the region, take uniform distribution
                    building_type_distribution = ones(
                        self.available_building_types.size)
                    building_type_distribution = building_type_distribution / building_type_distribution.sum(
                    )
                sampled_building_types = probsample_replace(
                    self.available_building_types, diff_nhb,
                    building_type_distribution /
                    float(building_type_distribution.sum()))
                self.new_jobs["building_type"] = concatenate(
                    (self.new_jobs["building_type"],
                     sampled_building_types.astype(
                         self.new_jobs["building_type"].dtype.type)))
                new_max_id = self.max_id + diff_nhb
                self.new_jobs[self.job_id_name] = concatenate(
                    (self.new_jobs[self.job_id_name],
                     arange(self.max_id + 1, new_max_id + 1)))
                self.max_id = new_max_id
Example #41
0
  def run(my, cache_dir=None, year=None):
    global parcel_set, z, node_set, submarket, esubmarket, isr, parcelfees, costdiscount

    '''
    if 0:
        z = Zoning()
        p = Parcels()
        cPickle.dump((z,p),open('databaseinfo.jar','w'))
    else:
        print "Reading db info from jar..."
        z,p = cPickle.load(open(os.path.join(os.environ['OPUS_DATA'],'bay_area_parcel/databaseinfo.jar')))
    '''

    ## when developer_model is invoked alone from command line
    if cache_dir is not None and year is not None:
        #data_path = paths.get_opus_data_path_path()
        cache_dir = os.path.join(data_path, 'bay_area_parcel/runs/run_105.2012_05_03_09')
        #year = 2011
        simulation_state = SimulationState()
        simulation_state.set_current_time(year)
        SimulationState().set_cache_directory(cache_dir)
        attribute_cache = AttributeCache()
        dataset_pool = SessionConfiguration(new_instance=True,
                             package_order=['bayarea', 'urbansim_parcel',
                                            'urbansim', 'opus_core'],
                             in_storage=attribute_cache
                            ).get_dataset_pool()
        
    dataset_pool = SessionConfiguration().get_dataset_pool()
    current_year = SimulationState().get_current_time()
    cache_dir = SimulationState().get_cache_directory()

    parcel_set = dataset_pool.get_dataset('parcel')
    building_set = dataset_pool.get_dataset('building')
    household_set = dataset_pool.get_dataset('household')
    node_set = dataset_pool.get_dataset('node')
    unit_set = dataset_pool.get_dataset('residential_unit')
    submarket = dataset_pool.get_dataset('submarket')
    esubmarket = dataset_pool.get_dataset('employment_submarket')
    #print numpy.array(unit_set['rent'] > 0).size
    #for i in range(unit_set.size()):
    #    print unit_set['unit_price'][i], unit_set['unit_sqft'][i]
    
    #transit_set = dataset_pool.get_dataset('transit_station')
    #print dataset_pool.datasets_in_pool()
    '''
    from bayarea.node import transit_type_DDD_within_DDD_meters
    for i in range(7):
        print i
        v = transit_type_DDD_within_DDD_meters.transit_type_DDD_within_DDD_meters(i,500)
        d = v.compute(dataset_pool)
        print d.size
        found = d[numpy.nonzero(d)]
        print found.size
    sys.exit()
    '''
   
    compute_devmdl_accvars(node_set) 

    ######################
    ### CAREFUL - THIS IS WHERE SCNERARIO SPECIFIC INFO GOES
    ######################

    current_year = SimulationState().get_current_time()
    z = Zoning(my.scenario,current_year)
    isr = None
    if my.scenario.startswith('Transit'): isr = ISR()
    parcelfees = None
    if my.scenario.startswith('Preferred'):
        parcelfees = ParcelFees(dataset_pool.get_dataset('parcelfees_preferred'))
    #elif my.scenario.startswith('Transit'):
    #    parcelfees = ParcelFees(dataset_pool.get_dataset('parcelfees_transit'))
    elif my.scenario.startswith('Equity'):
        parcelfees = ParcelFees(dataset_pool.get_dataset('parcelfees_equity'))
    elif my.scenario.startswith('Infill'):
        parcelfees = ParcelFees(dataset_pool.get_dataset('parcelfees_infill'))
    costdiscount = 0.0
    if not my.scenario.startswith('No Project') and not my.scenario.startswith('Equity'):
        costdiscount = .01

    #################################
    #################################
    from numpy import logical_not
    empty_parcels = parcel_set.compute_variables("(parcel.number_of_agents(building)==0)*(parcel.node_id>0)*(parcel.shape_area>80)")
    res_parcels = parcel_set.compute_variables("(parcel.number_of_agents(building)>0)*(parcel.node_id>0)*(parcel.shape_area>80)")
    bart_parcels = parcel_set.compute_variables("(parcel.disaggregate(bayarea.node.transit_type_1_within_800_meters))")
    caltrain_parcels = parcel_set.compute_variables("(parcel.disaggregate(bayarea.node.transit_type_2_within_800_meters))")
    #pda_parcels = parcel_set.compute_variables("(parcel.pda_id > -1)*(numpy.logical_not(parcel.county_id==38))")
    pda_parcels = parcel_set.compute_variables("(parcel.pda_id > -1)")
    SAMPLE_RATE = 0.01
    from opus_core.sampling_toolbox import sample_noreplace
    from numpy import concatenate, where
    sampled_res_parcels_index = sample_noreplace(where(res_parcels)[0], int(SAMPLE_RATE * parcel_set.size()))
    test_parcels = concatenate((where(empty_parcels==1)[0], sampled_res_parcels_index,where(bart_parcels==1)[0],where(caltrain_parcels==1)[0],where(pda_parcels==1)[0]))
    test_parcels = sample_noreplace(test_parcels, int(.08 * 154877))
    numpy.random.shuffle(test_parcels)
 
    """
    sample = []
    for i in range(parcel_set.size()):
        if empty_parcels[i] == 1:
            sample.append(i+1)
        elif res_parcels[i] == 1 and numpy.random.ranf() < SAMPLE_RATE:
            sample.append(i+1)
    test_parcels = array(sample)
    """

    #empty_parcels = parcel_set.compute_variables("(parcel.node_id>0)*(parcel.shape_area>80)")
    #test_parcels = numpy.where(empty_parcels==1)[0]
    
    global building_sqft, building_price
    building_sqft = parcel_set.compute_variables('parcel.aggregate(building.building_sqft)')

    building_price_owner_residential=parcel_set.compute_variables('building_price_owner_res=parcel.aggregate((residential_unit.sale_price)*(residential_unit.sale_price>0),intermediates=[building])')
    building_price_rental_residential=parcel_set.compute_variables('building_price_rental_res=parcel.aggregate((residential_unit.rent*12*17.9)*(residential_unit.rent>0),intermediates=[building])')
    building_price_nonresidential = parcel_set.compute_variables('building_price_nonres = parcel.aggregate((building.non_residential_rent*7*building.non_residential_sqft))')
    sum_building_p = parcel_set.compute_variables('sum_building_price = parcel.building_price_owner_res + parcel.building_price_rental_res + building_price_nonres')
    ##sum_building_price = building_price_owner_residential + building_price_rental_residential + building_price_nonresidential
    vacant_parcel = parcel_set.compute_variables('parcel.sum_building_price == 0')
    price_per_sqft_land = (parcel_set.compute_variables('parcel.disaggregate(safe_array_divide(zone.aggregate(parcel.sum_building_price),zone.aggregate(building.building_sqft)))'))/4
    parcel_land_area = parcel_set.compute_variables('parcel.shape_area')
    vacant_land_price = vacant_parcel*price_per_sqft_land*parcel_land_area
    building_price = sum_building_p + vacant_land_price


    ##sum_building_price = building_price_owner_residential + building_price_rental_residential + building_price_nonresidential

    #land_price = (sum_building_p==0) * (parcel_set.compute_variables('parcel.disaggregate(safe_array_divide(zone.aggregate(parcel.sum_building_price),zone.aggregate(building.building_sqft)))'))* parcel_set.compute_variables('parcel.land_area')

    #info used to match from proposal_component to submarket
    parcel_set.compute_variables(["bayarea.parcel.within_half_mile_transit", 
                                  "bayarea.parcel.schooldistrict",
                                  "bayarea.parcel.jurisdiction_id",
                                 ])
    #test_parcels = array([i+1 for i in range(parcel_set.size())])
    #test_parcels = test_parcels[:10000]

    #test_parcels = test_parcels[:150]
    #test_parcels = numpy.where(parcel_set['parcel_id'] == 1608920)[0]
    #print test_parcels
    logger.log_status("%s parcels to test" % (test_parcels.size))
    print "Num of parcels:", test_parcels.size
    import time

    HOTSHOT = 0
    if MP:
        from multiprocessing import Pool, Queue
        pool = Pool(processes=4)

    import hotshot, hotshot.stats#, test.pystone
    if HOTSHOT:
        prof = hotshot.Profile('devmdl.prof')
        prof.start()

    outf = open(os.path.join(cache_dir,'buildings-%d.csv' % current_year),'w')
    outf.write('pid,county,dev_btype,stories,sqft,res_sqft,nonres_sqft,tenure,year_built,res_units,npv,actualfee,btype\n')
    debugf = open(os.path.join(cache_dir,'proforma-debug-%d.csv' % current_year),'w')
    bformdbg = 'county_id,far,height,max_dua,bform.sf_builtarea(),bform.sfunitsizes,bform.mf_builtarea(),bform.mfunitsizes,bform.num_units,bform.nonres_sqft,bform.buildable_area'
    otherdbg = 'isr,parcelfees,existing_sqft,existing_price,lotsize,unitsize,unitsize2,bform.sales_absorption,bform.rent_absorption,bform.leases_absorption,bform.sales_vacancy_rates,bform.vacancy_rates'
    debugf.write('pid,btype,npv,actualfee,pricesf,pricemf,rentsf,rentmf,rentof,rentret,rentind,%s,%s\n' % (bformdbg,otherdbg))
    t1 = time.time()
    aggd = {}

    def chunks(l, n):
        for i in xrange(0, len(l), n):
           yield l[i:i+n]

    for test_chunk in chunks(test_parcels,1000):

        print "Executing CHUNK"

        sales_absorption = submarket.compute_variables('bayarea.submarket.sales_absorption')
        rent_absorption = submarket.compute_variables('bayarea.submarket.rent_absorption')
        vacancy_rates = submarket.compute_variables('bayarea.submarket.vacancy_rates')
        leases_absorption = esubmarket.compute_variables('bayarea.employment_submarket.leases_absorption')
        nr_vacancy_rates = esubmarket.compute_variables('bayarea.employment_submarket.vacancy_rates')

        if HOTSHOT:
            results = []
            for p in test_chunk: 
                r = process_parcel(p)
                if r <> None and r <> -1: results.append(list(r))
        else:
            if MP:
                results = pool.map(process_parcel,test_chunk)
            else:
                results = [process_parcel(p) for p in test_chunk]
            results_bldg = [list(x[0]) for x in results if x <> None and x[0] <> -1]
            #each row of units represents number of units of [1, 2, 3, 4] bedrooms
            units = array([x[1][0] for x in results if x <> None and x[0] <> -1])
            sqft_per_unit = array([x[1][1] for x in results if x <> None and x[0] <> -1])
            for x in results:
                if x <> None: 
                    debugf.write(x[2])

            results = results_bldg
        for result in results:
            #print result
            out_btype = devmdltypes[int(result[2])-1]
            outf.write(string.join([str(x) for x in result]+[str(out_btype)],sep=',')+'\n')

        ##TODO: id of buildings to be demolished
    
        buildings_to_demolish = []
        idx_buildings_to_demolish = building_set.get_id_index(buildings_to_demolish)
        
        JAMM = JoinAttributeModificationModel()
        JAMM.run(household_set, building_set, index=idx_buildings_to_demolish, value=-1)

        building_set.remove_elements(idx_buildings_to_demolish)
        column_names = ["parcel_id","county","building_type_id","stories",
                    "building_sqft","residential_sqft","non_residential_sqft",
                    "tenure","year_built","residential_units"]
        buildings_data = copy.deepcopy(results)
        for i in range(len(buildings_data)):
            buildings_data[i][2] = devmdltypes[int(buildings_data[i][2])-1]
        buildings_data = array(buildings_data)
        new_buildings = {}
        available_bldg_id = building_set['building_id'].max() + 1
        new_bldg_ids = arange(available_bldg_id, available_bldg_id+buildings_data.shape[0],
                              dtype=building_set['building_id'].dtype)
        if buildings_data.size > 0:
            for icol, col_name in enumerate(column_names):
                if col_name in building_set.get_known_attribute_names():
                    ddtype = building_set[col_name].dtype
                    new_buildings[col_name] = (buildings_data[:, icol]).astype(ddtype)
                else:
                    #if the col_name is not in dataset, it will be discarded anyway
                    pass

            new_buildings['building_id'] = new_bldg_ids
            # recode tenure: 1 - rent, 2 - own from 0 - own, 1 - rent
            new_buildings['tenure'][new_buildings['tenure']==0] = 2
            ## pid is the index to parcel_set; convert them to actual parcel_id
            #new_buildings['parcel_id'] = parcel_set['parcel_id'][new_buildings['parcel_id']]
            building_set.add_elements(new_buildings, require_all_attributes=False,
                                      change_ids_if_not_unique=True)
            building_set.flush_dataset()

            assert new_bldg_ids.size == units.shape[0] == sqft_per_unit.shape[0]
            units_bldg_ids = repeat(new_bldg_ids, 4)
            bedrooms = array([1, 2, 3, 4] * units.size)
            units = round(units.ravel())
            sqft_per_unit = sqft_per_unit.ravel()
            new_units = {'building_id': array([], dtype='i4'),
                         'bedrooms': array([], dtype='i4'),
                         'sqft_per_unit': array([], dtype='i4')
                        }
            
            for i_unit, unit in enumerate(units):
                if unit <= 0:
                  continue
                new_units['building_id'] = concatenate((new_units['building_id'],
                                                        repeat(units_bldg_ids[i_unit], unit))
                                                       )
                new_units['bedrooms'] = concatenate((new_units['bedrooms'],
                                                     repeat(bedrooms[i_unit], unit))
                                                    )
                new_units['sqft_per_unit'] = concatenate((new_units['sqft_per_unit'],
                                                          repeat(sqft_per_unit[i_unit], unit))
                                                         )

            ##force dtype conversion to the same dtype as unit_set
            for col_name in ['building_id', 'bedrooms', 'sqft_per_unit']:
                if col_name in unit_set.get_known_attribute_names():
                    new_units[col_name] = new_units[col_name].astype(unit_set[col_name].dtype)

            unit_set.add_elements(new_units, require_all_attributes=False,
                                  change_ids_if_not_unique=True)
            unit_set.flush_dataset()

        for result in results:
            units = result[-1]
            nonres_sqft = 1 #result[6]/1000.0
            county = result[1]
            btype = result[2]
            key = (county,btype)
            aggd.setdefault(key,0)
            if btype < 7: aggd[key] += units
            else: aggd[key] += nonres_sqft
            aggd.setdefault(county,0)
            aggd[county] += units
   
    aggf = open('county_aggregations-%d.csv' % current_year,'w')
    county_names = {49:'son',41:'smt',1:'ala',43:'scl',28:'nap',38:'sfr',7:'cnc',48:'sol',21:'mar',0:'n/a'}
    btype_names = {1:'SF',2:'SFBUILD',3:'MF',4:'MXMF',5:'CONDO',6:'MXC',7:'OF',8:'MXO',9:'CHOOD',10:'CAUTO',11:'CBOX',12:'MANU',13:'WHE'}
    aggf.write('county,total,'+string.join(btype_names.values(),sep=',')+'\n')
    for county in [38,41,43,1,7,48,28,49,21]:
        aggf.write(county_names[county]+','+str(aggd.get(county,0)))
        for btype in btype_names.keys():
            key = (county,btype)
            val = aggd.get(key,0) 
            aggf.write(','+str(val))
        aggf.write('\n')

    t2 = time.time()

    print "Finished in %f seconds" % (t2-t1)
    print "Ran optimization %d times" % devmdl_optimize.OBJCNT
    global NOZONINGCNT, NOBUILDTYPES
    print "Did not find zoning for parcel %d times" % NOZONINGCNT
    print "Did not find building types for parcel %d times" % NOBUILDTYPES
    print "DONE"

    my.post_run() #remove price_shifter & cost_shifter to avoid them being cached

    if HOTSHOT:
        prof.stop()
        prof.close()
        stats = hotshot.stats.load('devmdl.prof')
        stats.strip_dirs()
        stats.sort_stats('cumulative')
        stats.print_stats(20)
    def run(self, year, business_set,
            control_totals,
            data_objects=None,
            resources=None):
        business_id_name = business_set.get_id_name()[0]
        control_totals.get_attribute("total_number_of_businesses")
        idx = where(control_totals.get_attribute("year")==year)
        sectors = unique(control_totals.get_attribute_by_index("building_use_id", idx))
        max_id = business_set.get_id_attribute().max()
        business_size = business_set.size()
        new_businesses = {self.location_id_name:array([], dtype='int32'),
                          "building_use_id":array([], dtype='int32'),
                          business_id_name:array([], dtype='int32'),
                          "sqft":array([], dtype=int32),
                          "employees":array([], dtype=int32),}
        compute_resources = Resources(data_objects)
#        compute_resources.merge({job_building_types.get_dataset_name():job_building_types, "debug":self.debug})
        business_set.compute_variables(
            map(lambda x: "%s.%s.is_sector_%s"
                    % (self.variable_package, business_set.get_dataset_name(), x),
                sectors),
            resources = compute_resources)
        remove_businesses = array([], dtype='int32')

        for sector in sectors:
            total_businesses = control_totals.get_data_element_by_id((year,sector)).total_number_of_businesses
            is_in_sector = business_set.get_attribute("is_sector_%s" % sector)
            diff = int(total_businesses - is_in_sector.astype(int8).sum())

            if diff < 0: #
                w = where(is_in_sector == 1)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(business_set, w, -1*diff,
                                                         self.location_id_name)
                remove_businesses = concatenate((remove_businesses, non_placed,
                                           sample_noreplace(sample_array, max(0,abs(diff)-size_non_placed))))

            if diff > 0: #
                new_businesses[self.location_id_name]=concatenate((new_businesses[self.location_id_name],zeros((diff,), dtype="int32")))
                new_businesses["building_use_id"]=concatenate((new_businesses["building_use_id"],
                                                               sector*ones((diff,), dtype="int32")))

                available_business_index = where(is_in_sector)[0]
                sampled_business = probsample_replace(available_business_index, diff, None)

                new_businesses["sqft"] = concatenate((new_businesses["sqft"],
                                                     business_set.get_attribute("sqft")[sampled_business]))
                new_businesses["employees"] = concatenate((new_businesses["employees"],
                                                           business_set.get_attribute("employees")[sampled_business]))

                new_max_id = max_id+diff
                new_businesses[business_id_name]=concatenate((new_businesses[business_id_name], arange(max_id+1, new_max_id+1)))
                max_id = new_max_id

        business_set.remove_elements(remove_businesses)
        business_set.add_elements(new_businesses, require_all_attributes=False)
        difference = business_set.size()-business_size
        self.debug.print_debug("Difference in number of businesses: %s (original %s,"
            " new %s, created %s, deleted %s)"
                % (difference,
                   business_size,
                   business_set.size(),
                   new_businesses[business_id_name].size,
                   remove_businesses.size),
            3)
        self.debug.print_debug("Number of unplaced businesses: %s"
            % where(business_set.get_attribute(self.location_id_name) <=0)[0].size,
            3)
        return difference
    def run(self,
            location_set,
            agent_event_set,
            agent_set,
            current_year,
            dataset_pool=None):
        """ The agent_event_set is expected to have attributes:
                grid_id, scheduled_year, total_number, is_percentage, change_type, (optionally other agent characteristics)
            'grid_id' is not a mandatory name, but it must match to the id name of the location_set.
            Thus, the model works on any geography level.
            'is_percentage' (bool) determines if the 'total_number' is a percentage of existing agents (True) or 
            an absolute number (False) - it is relevant only when deleting agents.
            'change_type' can have values 'D' (delete), 'A' (add), 'R' (replace) and determines the type
            of change for the agents. If this column is missing, the model considers 'D' as default
            for all entries in the agent_event_set.
            If the change of type is 'D', 
            the method finds agents from the agent_set (jobs, households) located in the given locations 
            (e.g. grid_id), then samples the given amount for the current_year and unplaces them.
            If other characteristics columns are contained in the agent_event_set, their names
            must match column names in the agent_set (e.g. 'sector_id' for jobs).
            In such a case the deletion is done among agents that match the given characteristics.
        """
        if not agent_event_set or (agent_event_set.size() == 0):
            logger.log_status("No %s agents for event processing." %
                              agent_set.get_dataset_name())
            return

        idx_of_events_this_year = agent_event_set.get_attribute(
            "scheduled_year") == current_year
        if idx_of_events_this_year.sum() == 0:
            logger.log_status("No %s agents for this year event processing." %
                              agent_set.get_dataset_name())
            return

        self.dataset_pool = self.create_dataset_pool(dataset_pool)

        location_id_name = location_set.get_id_name()[0]
        location_ids_in_event_set = agent_event_set.get_attribute_by_index(
            location_id_name, idx_of_events_this_year)

        other_characteristics = agent_event_set.get_known_attribute_names()
        for name in agent_event_set.get_id_name():
            other_characteristics.remove(name)
        other_characteristics.remove("total_number")

        totals = agent_event_set.get_attribute_by_index(
            "total_number", idx_of_events_this_year)
        if "change_type" not in agent_event_set.get_known_attribute_names():
            types_of_change = array(idx_of_events_this_year.sum() * ['D'])
        else:
            types_of_change = agent_event_set.get_attribute_by_index(
                "change_type", idx_of_events_this_year)
            other_characteristics.remove("change_type")
        if "is_percentage" not in agent_event_set.get_known_attribute_names():
            is_percentage = zeros(idx_of_events_this_year.sum(), dtype='bool8')
        else:
            is_percentage = agent_event_set.get_attribute_by_index(
                "is_percentage", idx_of_events_this_year)
            other_characteristics.remove("is_percentage")

        # pre-load other characteristics
        for name in other_characteristics:
            agent_event_set.get_attribute(name)

        if location_id_name not in agent_set.get_known_attribute_names():
            # compute agents locations
            agent_set.compute_one_variable_with_unknown_package(
                location_id_name, self.dataset_pool)

        # iterate over locations
        for ilocation_id in range(location_ids_in_event_set.size):
            agent_ids = agent_set.get_attribute(location_id_name)
            location_id = location_ids_in_event_set[ilocation_id]
            change_type = types_of_change[ilocation_id]

            agents_to_consider = agent_ids == location_id
            for characteristics in other_characteristics:
                characteristics_value = eval(
                    "agent_event_set.get_data_element_by_id((location_id, current_year)).%s"
                    % characteristics)
                agents_to_consider = logical_and(
                    agents_to_consider,
                    agent_set.get_attribute(characteristics) ==
                    characteristics_value)
            number_of_agents = totals[ilocation_id]
            agent_index = where(agents_to_consider)[0]
            if is_percentage[
                    ilocation_id]:  # number_of_agents means percentage; convert to absolute number
                number_of_agents = agent_index.size * number_of_agents / 100.0
            number_of_agents = int(number_of_agents)
            if change_type == 'D':
                if number_of_agents > 0:
                    if agent_index.size <= number_of_agents:  # unplace all agents
                        unplace_index = agent_index
                    else:  # sample agents
                        unplace_index = sample_noreplace(
                            agent_index, number_of_agents)
                    agent_set.modify_attribute(
                        name=location_id_name,
                        data=resize(array([-1], dtype=agent_ids.dtype),
                                    unplace_index.size),
                        index=unplace_index)
            elif change_type == 'A':
                if number_of_agents > 0:
                    data = {
                        agent_set.get_id_name()[0]:
                        arange(1, number_of_agents + 1, 1) +
                        agent_set.get_id_attribute().max()
                    }
                    data[location_id_name] = array([location_id] *
                                                   number_of_agents)

                    for characteristics in other_characteristics:
                        characteristics_value = eval(
                            "agent_event_set.get_data_element_by_id((location_id, current_year)).%s"
                            % characteristics)
                        data[characteristics] = array([characteristics_value] *
                                                      number_of_agents)

                    agent_set.add_elements(data, require_all_attributes=False)
    def prepare_for_estimate(self,
                             specification_dict=None,
                             specification_storage=None,
                             specification_table=None,
                             agent_set=None,
                             agents_for_estimation_storage=None,
                             agents_for_estimation_table=None,
                             join_datasets=False,
                             index_to_unplace=None,
                             portion_to_unplace=1.0,
                             compute_lambda=False,
                             grouping_location_set=None,
                             movers_variable=None,
                             movers_index=None,
                             filter=None,
                             location_id_variable=None,
                             data_objects={}):
        """Put 'location_id_variable' always in, if the location id is to be computed on the estimation set,
        i.e. if it is not a primary attribute of the estimation set. Set 'index_to_unplace' to None, if 'compute_lambda' is True.
        In such a case, the annual supply is estimated without unplacing agents. 'grouping_location_set', 'movers_variable' and
        'movers_index' must be given, if 'compute_lambda' is True.
        """
        from opus_core.model import get_specification_for_estimation
        from urbansim.functions import compute_supply_and_add_to_location_set
        specification = get_specification_for_estimation(
            specification_dict, specification_storage, specification_table)
        if (agent_set is not None) and (index_to_unplace is not None):
            if self.location_id_string is not None:
                agent_set.compute_variables(self.location_id_string,
                                            resources=Resources(data_objects))
            if portion_to_unplace < 1:
                unplace_size = int(portion_to_unplace * index_to_unplace.size)
                end_index_to_unplace = sample_noreplace(
                    index_to_unplace, unplace_size)
            else:
                end_index_to_unplace = index_to_unplace
            logger.log_status("Unplace " + str(end_index_to_unplace.size) +
                              " agents.")
            agent_set.modify_attribute(
                self.choice_set.get_id_name()[0],
                resize(array([-1]), end_index_to_unplace.size),
                end_index_to_unplace)
        if compute_lambda:
            movers = zeros(agent_set.size(), dtype="bool8")
            if movers_index is not None:
                movers[movers_index] = 1
            agent_set.add_primary_attribute(movers, "potential_movers")
            self.estimate_config[
                "weights_for_estimation_string"] = self.estimate_config[
                    "weights_for_estimation_string"] + "_from_lambda"
            compute_supply_and_add_to_location_set(
                self.choice_set,
                grouping_location_set,
                self.run_config["number_of_units_string"],
                self.run_config["capacity_string"],
                movers_variable,
                self.estimate_config["weights_for_estimation_string"],
                resources=Resources(data_objects))

        # create agents for estimation
        if (agents_for_estimation_storage
                is not None) and (agents_for_estimation_table is not None):
            estimation_set = Dataset(in_storage=agents_for_estimation_storage,
                                     in_table_name=agents_for_estimation_table,
                                     id_name=agent_set.get_id_name(),
                                     dataset_name=agent_set.get_dataset_name())
            if location_id_variable is not None:
                estimation_set.compute_variables(
                    location_id_variable, resources=Resources(data_objects))
                # needs to be a primary attribute because of the join method below
                estimation_set.add_primary_attribute(
                    estimation_set.get_attribute(location_id_variable),
                    VariableName(location_id_variable).get_alias())
            if filter:
                values = estimation_set.compute_variables(
                    filter, resources=Resources(data_objects))
                index = where(values > 0)[0]
                estimation_set.subset_by_index(
                    index, flush_attributes_if_not_loaded=False)

            if join_datasets:
                agent_set.join_by_rows(estimation_set,
                                       require_all_attributes=False,
                                       change_ids_if_not_unique=True)
                index = arange(agent_set.size() - estimation_set.size(),
                               agent_set.size())
            else:
                index = agent_set.get_id_index(
                    estimation_set.get_id_attribute())
        else:
            if agent_set is not None:
                if filter is not None:
                    values = agent_set.compute_variables(
                        filter, resources=Resources(data_objects))
                    index = where(values > 0)[0]
                else:
                    index = arange(agent_set.size())
            else:
                index = None
        return (specification, index)
Example #45
0
    def _add(self, agents_pool, amount, 
             agent_dataset, location_dataset, 
             this_refinement,
             dataset_pool ):
        
        fit_index = self.get_fit_agents_index(agent_dataset, 
                                              this_refinement.agent_expression, 
                                              this_refinement.location_expression,
                                              dataset_pool)
        movers_index = array([],dtype="int32")
        amount_from_agents_pool = min( amount, len(agents_pool) )
        if amount_from_agents_pool > 0:
            agents_index_from_agents_pool = sample_noreplace( agents_pool, amount_from_agents_pool )
            [ agents_pool.remove(i) for i in agents_index_from_agents_pool ]
            if fit_index.size == 0:
                ##cannot find agents to copy their location or clone them, place agents in agents_pool
                if amount > amount_from_agents_pool:                   
                    logger.log_warning("Refinement requests to add %i agents,  but there are only %i agents subtracted from previous action(s) and no agents satisfying %s to clone from;" \
                                   "add %i agents instead" % (amount, amount_from_agents_pool, 
                                                              ' and '.join( [this_refinement.agent_expression, 
                                                                           this_refinement.location_expression]).strip(' and '), 
                                                              amount_from_agents_pool,) )
                    amount = amount_from_agents_pool
                # sample from all suitable locations
                is_suitable_location = location_dataset.compute_variables( this_refinement.location_expression,
                                                                           dataset_pool=dataset_pool )
                location_id_for_agents_pool = sample_replace( location_dataset.get_id_attribute()[is_suitable_location],
                                                                 amount_from_agents_pool )
            else:
                #sample from locations of suitable agents            
                agents_index_for_location = sample_replace( fit_index, amount_from_agents_pool)
                location_id_for_agents_pool = agent_dataset.get_attribute( location_dataset.get_id_name()[0] 
                                                                         )[agents_index_for_location]
                movers_index = concatenate( (movers_index, agents_index_for_location) )

        elif fit_index.size == 0:
            ## no agents in agents_pool and no agents to clone either, --> fail
            logger.log_error( "Action 'add' failed: there is no agent subtracted from previous action, and no suitable agents satisfying %s to clone from." % \
                              ' and '.join( [this_refinement.agent_expression, this_refinement.location_expression] ).strip('and') )
            return
            
        if amount > amount_from_agents_pool:
            agents_index_to_clone = sample_replace( fit_index, amount - amount_from_agents_pool)
            movers_index = concatenate( (movers_index, agents_index_to_clone) )

        if movers_index.size > 0 and this_refinement.location_capacity_attribute is not None and len(this_refinement.location_capacity_attribute) > 0:
            movers_location_id = agent_dataset.get_attribute( location_dataset.get_id_name()[0] )[movers_index]
            movers_location_index = location_dataset.get_id_index( movers_location_id )
            # see previous comment about histogram function
            num_of_movers_by_location = histogram( movers_location_index, bins=arange(location_dataset.size() +1) )[0]
            num_of_agents_by_location = location_dataset.compute_variables( "number_of_agents=%s.number_of_agents(%s)" % \
                                                                            ( location_dataset.dataset_name,
                                                                            agent_dataset.dataset_name ),
                                                                            dataset_pool=dataset_pool)
            
            expand_factor = safe_array_divide( (num_of_agents_by_location + num_of_movers_by_location ).astype('float32'),
                                                num_of_agents_by_location, return_value_if_denominator_is_zero = 1.0 )
            new_values = round_( expand_factor * location_dataset.get_attribute(this_refinement.location_capacity_attribute) )
            location_dataset.modify_attribute( this_refinement.location_capacity_attribute, 
                                               new_values
                                           )
            self._add_refinement_info_to_dataset(location_dataset, self.id_names, this_refinement, index=movers_location_index)
        if amount_from_agents_pool > 0:
            agent_dataset.modify_attribute( location_dataset.get_id_name()[0],
                                            location_id_for_agents_pool,
                                            agents_index_from_agents_pool
                                            )
            self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=agents_index_from_agents_pool)
        if amount > amount_from_agents_pool:
            new_agents_index = agent_dataset.duplicate_rows(agents_index_to_clone)
            self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=agents_index_to_clone)
            self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=new_agents_index)
    def _add(self, agents_pool, amount, agent_dataset, location_dataset,
             this_refinement, dataset_pool):

        fit_index = self.get_fit_agents_index(
            agent_dataset, this_refinement.agent_expression,
            this_refinement.location_expression, dataset_pool)
        movers_index = array([], dtype="int32")
        amount_from_agents_pool = min(amount, len(agents_pool))
        if amount_from_agents_pool > 0:
            agents_index_from_agents_pool = sample_noreplace(
                agents_pool, amount_from_agents_pool)
            [agents_pool.remove(i) for i in agents_index_from_agents_pool]
            if fit_index.size == 0:
                ##cannot find agents to copy their location or clone them, place agents in agents_pool
                logger.log_warning("Refinement requests to add %i agents,  but there are only %i agents subtracted from previous action(s) and no agents satisfying %s to clone from;" \
                                   "add %i agents instead" % (amount, amount_from_agents_pool,
                                                              ' and '.join( [this_refinement.agent_expression,
                                                                           this_refinement.location_expression]).strip(' and '),
                                                              amount_from_agents_pool,) )

                amount = amount_from_agents_pool

                is_suitable_location = location_dataset.compute_variables(
                    this_refinement.location_expression,
                    dataset_pool=dataset_pool)
                location_id_for_agents_pool = sample_replace(
                    location_dataset.get_id_attribute()[is_suitable_location],
                    amount_from_agents_pool)
            else:

                agents_index_for_location = sample_replace(
                    fit_index, amount_from_agents_pool)
                location_id_for_agents_pool = agent_dataset.get_attribute(
                    location_dataset.get_id_name()
                    [0])[agents_index_for_location]
                movers_index = concatenate(
                    (movers_index, agents_index_for_location))

        elif fit_index.size == 0:
            ## no agents in agents_pool and no agents to clone either, --> fail
            logger.log_error( "Action 'add' failed: there is no agent subtracted from previous action, and no suitable agents satisfying %s to clone from." % \
                              ' and '.join( [this_refinement.agent_expression, this_refinement.location_expression] ).strip('and') )
            return

        if amount > amount_from_agents_pool:
            agents_index_to_clone = sample_replace(
                fit_index, amount - amount_from_agents_pool)
            movers_index = concatenate((movers_index, agents_index_to_clone))

        if movers_index.size > 0 and this_refinement.location_capacity_attribute is not None and len(
                this_refinement.location_capacity_attribute) > 0:
            movers_location_id = agent_dataset.get_attribute(
                location_dataset.get_id_name()[0])[movers_index]
            movers_location_index = location_dataset.get_id_index(
                movers_location_id)
            # see previous comment about histogram function
            num_of_movers_by_location = histogram(
                movers_location_index,
                bins=arange(location_dataset.size() + 1))[0]
            num_of_agents_by_location = location_dataset.compute_variables( "number_of_agents=%s.number_of_agents(%s)" % \
                                                                            ( location_dataset.dataset_name,
                                                                            agent_dataset.dataset_name ),
                                                                            dataset_pool=dataset_pool)

            expand_factor = safe_array_divide(
                (num_of_agents_by_location +
                 num_of_movers_by_location).astype('float32'),
                num_of_agents_by_location,
                return_value_if_denominator_is_zero=1.0)
            new_values = round_(expand_factor * location_dataset.get_attribute(
                this_refinement.location_capacity_attribute))
            location_dataset.modify_attribute(
                this_refinement.location_capacity_attribute, new_values)
            self._add_refinement_info_to_dataset(location_dataset,
                                                 self.id_names,
                                                 this_refinement,
                                                 index=movers_location_index)
        if amount_from_agents_pool > 0:
            agent_dataset.modify_attribute(location_dataset.get_id_name()[0],
                                           location_id_for_agents_pool,
                                           agents_index_from_agents_pool)
            self._add_refinement_info_to_dataset(
                agent_dataset,
                self.id_names,
                this_refinement,
                index=agents_index_from_agents_pool)
        if amount > amount_from_agents_pool:
            new_agents_index = agent_dataset.duplicate_rows(
                agents_index_to_clone)
            self._add_refinement_info_to_dataset(agent_dataset,
                                                 self.id_names,
                                                 this_refinement,
                                                 index=agents_index_to_clone)
            self._add_refinement_info_to_dataset(agent_dataset,
                                                 self.id_names,
                                                 this_refinement,
                                                 index=new_agents_index)
    def _subtract(self, agents_pool, amount, agent_dataset, location_dataset,
                  this_refinement, dataset_pool):

        fit_index = self.get_fit_agents_index(
            agent_dataset, this_refinement.agent_expression,
            this_refinement.location_expression, dataset_pool)

        if amount > fit_index.size:
            logger.log_warning("Refinement requests to subtract %i agents,  but there are %i agents in total satisfying %s;" \
                               "subtract %i agents instead" % (amount, fit_index.size,
                                                               ' and '.join( [this_refinement.agent_expression,
                                                                            this_refinement.location_expression] ).strip(' and '),
                                                               fit_index.size) )
            amount = fit_index.size

        if amount == fit_index.size:
            movers_index = fit_index
        else:
            movers_index = sample_noreplace(fit_index, amount)

        agents_pool += movers_index.tolist()
        ## modify location capacity attribute if specified
        if this_refinement.location_capacity_attribute is not None and len(
                this_refinement.location_capacity_attribute) > 0:
            location_dataset = dataset_pool.get_dataset(
                VariableName(
                    this_refinement.location_expression).get_dataset_name())

            movers_location_id = agent_dataset.get_attribute(
                location_dataset.get_id_name()[0])[movers_index]
            movers_location_index = location_dataset.get_id_index(
                movers_location_id)
            # backward compatability code for older versions of numpy -- no longer required since we need numpy 1.2.1 or greater
            # new=False argument to histogram tells it to use deprecated behavior for now (to be removed in numpy 1.3)
            # See numpy release notes -- search for histogram
            # if numpy.__version__ >= '1.2.0':
            #    num_of_movers_by_location = histogram( movers_location_index, bins=arange(location_dataset.size()), new=False)[0]
            # else:
            #    num_of_movers_by_location = histogram( movers_location_index, bins=arange(location_dataset.size()))[0]
            num_of_movers_by_location = histogram(
                movers_location_index,
                bins=arange(location_dataset.size() + 1))[0]
            num_of_agents_by_location = location_dataset.compute_variables( "number_of_agents=%s.number_of_agents(%s)" % \
                                                                            (location_dataset.dataset_name,
                                                                            agent_dataset.dataset_name),
                                                                            dataset_pool=dataset_pool)

            shrink_factor = safe_array_divide(
                (num_of_agents_by_location -
                 num_of_movers_by_location).astype('float32'),
                num_of_agents_by_location,
                return_value_if_denominator_is_zero=1.0)
            new_values = round_(shrink_factor * location_dataset.get_attribute(
                this_refinement.location_capacity_attribute))
            location_dataset.modify_attribute(
                this_refinement.location_capacity_attribute, new_values)
            self._add_refinement_info_to_dataset(
                location_dataset, ("refinement_id", "transaction_id"),
                this_refinement,
                index=movers_location_index)

        agent_dataset.modify_attribute(location_dataset.get_id_name()[0],
                                       -1 *
                                       ones(movers_index.size, dtype='int32'),
                                       index=movers_index)
        self._add_refinement_info_to_dataset(agent_dataset,
                                             self.id_names,
                                             this_refinement,
                                             index=movers_index)
Example #48
0
    def run(self,
            location_set,
            deletion_event_set,
            current_year,
            dataset_pool=None):
        """ The deletion_event_set is expected to have attributes:
                grid_id, scheduled_year, number_of_jobs, number_of_households
            The method finds jobs/househods located in the given locations (grid_id),
            then samples the given amount for this year and unplaces them.
            If the value for number_of_jobs/number_of_households is -2, the model removes 
            all jobs/households from the location.
        """
        if not deletion_event_set or (deletion_event_set.size() == 0):
            logger.log_status("No jobs/households to be deleted.")
            return

        idx_of_events_this_year = deletion_event_set.get_attribute(
            "scheduled_year") == current_year
        if idx_of_events_this_year.sum() == 0:
            logger.log_status("No jobs/households to be deleted.")
            return

        self.dataset_pool = self.create_dataset_pool(dataset_pool)

        location_id_name = location_set.get_id_name()[0]
        location_ids_in_event_set = deletion_event_set.get_attribute_by_index(
            location_id_name, idx_of_events_this_year)

        agents_dict = {
            "number_of_jobs": "job",
            "number_of_households": "household"
        }
        # load attributes and datasets needed
        delete_agents = {"job": False, "household": False}
        agent_sets = {}
        for attribute_name, dataset_name in agents_dict.iteritems():
            if attribute_name in deletion_event_set.get_known_attribute_names(
            ):
                values = deletion_event_set.get_attribute_by_index(
                    attribute_name, idx_of_events_this_year)
                if values.sum() > 0:
                    agent_sets[dataset_name] = self.dataset_pool.get_dataset(
                        dataset_name)
                    if location_id_name not in agent_sets[
                            dataset_name].get_known_attribute_names():
                        # compute agents locations
                        agent_sets[
                            dataset_name].compute_one_variable_with_unknown_package(
                                location_id_name, self.dataset_pool)
                    delete_agents[dataset_name] = True

        for attribute_name, dataset_name in agents_dict.iteritems():
            if not delete_agents[dataset_name]:
                continue
            # iterate over locations
            for location_id in location_ids_in_event_set:
                number_of_agents = eval(
                    "deletion_event_set.get_data_element_by_id((location_id, current_year)).%s"
                    % attribute_name)
                agent_ids = agent_sets[dataset_name].get_attribute(
                    location_id_name)
                agent_index = where(agent_ids == location_id)[0]
                if (number_of_agents
                        == -2) or (agent_index.size <=
                                   number_of_agents):  # unplace all agents
                    unplace_index = agent_index
                else:  # sample agents
                    unplace_index = sample_noreplace(agent_index,
                                                     number_of_agents)
                agent_sets[dataset_name].modify_attribute(
                    name=location_id_name,
                    data=resize(array([-1], dtype=agent_ids.dtype),
                                unplace_index.size),
                    index=unplace_index)
Example #49
0
    def run(self, location_set, agent_event_set, agent_set, current_year, dataset_pool=None):
        """ The agent_event_set is expected to have attributes:
                grid_id, scheduled_year, total_number, is_percentage, change_type, (optionally other agent characteristics)
            'grid_id' is not a mandatory name, but it must match to the id name of the location_set.
            Thus, the model works on any geography level.
            'is_percentage' (bool) determines if the 'total_number' is a percentage of existing agents (True) or 
            an absolute number (False) - it is relevant only when deleting agents.
            'change_type' can have values 'D' (delete), 'A' (add), 'R' (replace) and determines the type
            of change for the agents. If this column is missing, the model considers 'D' as default
            for all entries in the agent_event_set.
            If the change of type is 'D', 
            the method finds agents from the agent_set (jobs, households) located in the given locations 
            (e.g. grid_id), then samples the given amount for the current_year and unplaces them.
            If other characteristics columns are contained in the agent_event_set, their names
            must match column names in the agent_set (e.g. 'sector_id' for jobs).
            In such a case the deletion is done among agents that match the given characteristics.
        """        
        if not agent_event_set or (agent_event_set.size() == 0): 
            logger.log_status("No %s agents for event processing." % agent_set.get_dataset_name())
            return

        idx_of_events_this_year = agent_event_set.get_attribute("scheduled_year") == current_year
        if idx_of_events_this_year.sum() == 0:
            logger.log_status("No %s agents for this year event processing." % agent_set.get_dataset_name())
            return
        
        self.dataset_pool = self.create_dataset_pool(dataset_pool)
        
        location_id_name = location_set.get_id_name()[0]
        location_ids_in_event_set = agent_event_set.get_attribute_by_index(location_id_name, 
                                                                           idx_of_events_this_year)
        
        other_characteristics = agent_event_set.get_known_attribute_names()
        for name in agent_event_set.get_id_name():
            other_characteristics.remove(name)
        other_characteristics.remove("total_number")
        
        totals = agent_event_set.get_attribute_by_index("total_number", idx_of_events_this_year)
        if "change_type" not in agent_event_set.get_known_attribute_names():
            types_of_change = array(idx_of_events_this_year.sum()*['D'])
        else:
            types_of_change = agent_event_set.get_attribute_by_index("change_type", 
                                                                           idx_of_events_this_year)
            other_characteristics.remove("change_type")
        if "is_percentage" not in agent_event_set.get_known_attribute_names():
            is_percentage = zeros(idx_of_events_this_year.sum(), dtype='bool8')
        else:
            is_percentage = agent_event_set.get_attribute_by_index("is_percentage", 
                                                                           idx_of_events_this_year)
            other_characteristics.remove("is_percentage")
        
        # pre-load other characteristics
        for name in other_characteristics:
            agent_event_set.get_attribute(name)
            
        if location_id_name not in agent_set.get_known_attribute_names():
            # compute agents locations
            agent_set.compute_one_variable_with_unknown_package(location_id_name, self.dataset_pool)
                    
        # iterate over locations
        for ilocation_id in range(location_ids_in_event_set.size):
            agent_ids = agent_set.get_attribute(location_id_name)
            location_id = location_ids_in_event_set[ilocation_id]
            change_type = types_of_change[ilocation_id]

            agents_to_consider = agent_ids == location_id
            for characteristics in other_characteristics:
                characteristics_value = eval("agent_event_set.get_data_element_by_id((location_id, current_year)).%s" % 
                                             characteristics)
                agents_to_consider = logical_and(agents_to_consider, 
                                                 agent_set.get_attribute(characteristics) == characteristics_value)
            number_of_agents = totals[ilocation_id]
            agent_index = where(agents_to_consider)[0]  
            if  is_percentage[ilocation_id]: # number_of_agents means percentage; convert to absolute number
                number_of_agents = agent_index.size*number_of_agents/100.0
            number_of_agents = int(number_of_agents)
            if change_type == 'D':
                if number_of_agents > 0:
                    if agent_index.size <= number_of_agents: # unplace all agents
                        unplace_index = agent_index
                    else: # sample agents
                        unplace_index = sample_noreplace(agent_index, number_of_agents)
                    agent_set.modify_attribute(name=location_id_name, 
                                           data=resize(array([-1], dtype=agent_ids.dtype), unplace_index.size),
                                           index = unplace_index)
            elif change_type == 'A':
                if number_of_agents > 0:
                    data = {agent_set.get_id_name()[0]: arange(1, number_of_agents+1, 1) + agent_set.get_id_attribute().max()}
                    data[location_id_name] = array([location_id] * number_of_agents)
                
                    for characteristics in other_characteristics:
                        characteristics_value = eval("agent_event_set.get_data_element_by_id((location_id, current_year)).%s" % 
                                                     characteristics)
                        data[characteristics] = array([characteristics_value] * number_of_agents)
                
                    agent_set.add_elements(data, require_all_attributes=False)
    def estimate(self,
                 specification,
                 dataset,
                 outcome_attribute,
                 index=None,
                 procedure=None,
                 data_objects=None,
                 estimate_config=None,
                 debuglevel=0):
        """'specification' is of type EquationSpecification,
            'dataset' is of type Dataset,
            'outcome_attribute' - string that determines the dependent variable,
            'index' are indices of individuals in dataset for which
                    the model runs. If it is None, the whole dataset is considered.
            'procedure' - name of the estimation procedure. If it is None,
                there should be an entry "estimation" in 'estimate_config' that determines the procedure. The class
                must have a method 'run' that takes as arguments 'data', 'regression_procedure' and 'resources'.
                It returns a dictionary with entries 'estimators', 'standard_errors' and 't_values' (all 1D numpy arrays).
            'data_objects' is a dictionary where each key is the name of an data object
                    ('zone', ...) and its value is an object of class  Dataset.
            'estimate_config' is of type Resources, it gives additional arguments for the estimation procedure.
            'debuglevel' overwrites the class 'debuglevel'.
        """
        #import wingdbstub
        self.debug.flag = debuglevel
        if estimate_config == None:
            estimate_config = Resources()
        if not isinstance(estimate_config, Resources) and isinstance(
                estimate_config, dict):
            estimate_config = Resources(estimate_config)
        self.estimate_config = estimate_config.merge_with_defaults(
            self.estimate_config)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.procedure = procedure
        if self.procedure == None:
            self.procedure = self.estimate_config.get("estimation", None)
        if self.procedure is not None:
            self.procedure = ModelComponentCreator().get_model_component(
                self.procedure)
        else:
            logger.log_warning(
                "No estimation procedure given, or problems with loading the corresponding module."
            )

        compute_resources = Resources({"debug": self.debug})
        if dataset.size() <= 0:  # no data loaded yet
            dataset.get_id_attribute()
        if index == None:
            index = arange(dataset.size())
        if not isinstance(index, ndarray):
            index = array(index)

        estimation_size_agents = self.estimate_config.get(
            "estimation_size_agents",
            None)  # should be a proportion of the agent_set
        if estimation_size_agents == None:
            estimation_size_agents = 1.0
        else:
            estimation_size_agents = max(min(estimation_size_agents, 1.0),
                                         0.0)  # between 0 and 1

        if estimation_size_agents < 1.0:
            self.debug.print_debug("Sampling agents for estimation ...", 3)
            estimation_idx = sample_noreplace(
                arange(index.size), int(index.size * estimation_size_agents))
        else:
            estimation_idx = arange(index.size)

        estimation_idx = index[estimation_idx]
        self.debug.print_debug(
            "Number of observations for estimation: " +
            str(estimation_idx.size), 2)
        if estimation_idx.size <= 0:
            self.debug.print_debug("Nothing to be done.", 2)
            return (None, None)

        coefficients = create_coefficient_from_specification(specification)
        specified_coefficients = SpecifiedCoefficients().create(coefficients,
                                                                specification,
                                                                neqs=1)
        submodels = specified_coefficients.get_submodels()
        self.get_status_for_gui().update_pieces_using_submodels(
            submodels=submodels, leave_pieces=2)
        self.map_agents_to_submodels(
            submodels,
            self.submodel_string,
            dataset,
            estimation_idx,
            dataset_pool=self.dataset_pool,
            resources=compute_resources,
            submodel_size_max=self.estimate_config.get('submodel_size_max',
                                                       None))
        variables = specified_coefficients.get_full_variable_names_without_constants(
        )
        self.debug.print_debug("Compute variables ...", 4)
        self.increment_current_status_piece()
        dataset.compute_variables(variables,
                                  dataset_pool=self.dataset_pool,
                                  resources=compute_resources)

        coef = {}
        estimated_coef = {}
        self.outcome = {}
        dataset.compute_variables([outcome_attribute],
                                  dataset_pool=self.dataset_pool,
                                  resources=compute_resources)
        regression_resources = Resources(estimate_config)
        regression_resources.merge({"debug": self.debug})
        outcome_variable_name = VariableName(outcome_attribute)
        for submodel in submodels:
            coef[submodel] = SpecifiedCoefficientsFor1Submodel(
                specified_coefficients, submodel)
            self.increment_current_status_piece()
            logger.log_status("Estimate regression for submodel " +
                              str(submodel),
                              tags=["estimate"],
                              verbosity_level=2)
            logger.log_status("Number of observations: " +
                              str(self.observations_mapping[submodel].size),
                              tags=["estimate"],
                              verbosity_level=2)
            self.data[
                submodel] = dataset.create_regression_data_for_estimation(
                    coef[submodel],
                    index=estimation_idx[self.observations_mapping[submodel]])
            self.coefficient_names[submodel] = coef[
                submodel].get_coefficient_names_without_constant()[0, :]
            if (self.data[submodel].shape[0] > 0
                ) and (self.data[submodel].size > 0) and (
                    self.procedure
                    is not None):  # observations for this submodel available
                self.outcome[submodel] = dataset.get_attribute_by_index(
                    outcome_variable_name.get_alias(),
                    estimation_idx[self.observations_mapping[submodel]])
                regression_resources.merge({"outcome": self.outcome[submodel]})
                regression_resources.merge({
                    "coefficient_names":
                    self.coefficient_names[submodel].tolist(),
                    "constant_position":
                    coef[submodel].get_constants_positions()
                })
                estimated_coef[submodel] = self.procedure.run(
                    self.data[submodel],
                    self.regression,
                    resources=regression_resources)
                if "estimators" in estimated_coef[submodel].keys():
                    coef[submodel].set_coefficient_values(
                        estimated_coef[submodel]["estimators"])
                if "standard_errors" in estimated_coef[submodel].keys():
                    coef[submodel].set_standard_errors(
                        estimated_coef[submodel]["standard_errors"])
                if "other_measures" in estimated_coef[submodel].keys():
                    for measure in estimated_coef[submodel][
                            "other_measures"].keys():
                        coef[submodel].set_measure(
                            measure, estimated_coef[submodel]["other_measures"]
                            [measure])
                if "other_info" in estimated_coef[submodel].keys():
                    for info in estimated_coef[submodel]["other_info"]:
                        coef[submodel].set_other_info(
                            info, estimated_coef[submodel]["other_info"][info])
        coefficients.fill_coefficients(coef)

        self.save_predicted_values_and_errors(specification,
                                              coefficients,
                                              dataset,
                                              outcome_variable_name,
                                              index=index,
                                              data_objects=data_objects)

        return (coefficients, estimated_coef)