def prepare_for_estimate(specification_dict=None,
                         specification_storage=None,
                         specification_table=None,
                         agent_set=None,
                         household_set=None,
                         agents_for_estimation_storage=None,
                         agents_for_estimation_table=None,
                         households_for_estimation_table=None,
                         join_datasets=False,
                         filter=None,
                         data_objects=None):
    specification = get_specification_for_estimation(specification_dict,
                                                     specification_storage,
                                                     specification_table)
    if agents_for_estimation_storage is not None:
        estimation_set = Dataset(in_storage=agents_for_estimation_storage,
                                 in_table_name=agents_for_estimation_table,
                                 id_name=agent_set.get_id_name(),
                                 dataset_name=agent_set.get_dataset_name())
        hh_estimation_set = None
        if households_for_estimation_table is not None:
            hh_estimation_set = Dataset(
                in_storage=agents_for_estimation_storage,
                in_table_name=households_for_estimation_table,
                id_name=household_set.get_id_name(),
                dataset_name=household_set.get_dataset_name())

        filter_index = arange(estimation_set.size())
        if filter:
            estimation_set.compute_variables(filter,
                                             resources=Resources(data_objects))
            filter_index = where(estimation_set.get_attribute(filter) > 0)[0]
            #estimation_set.subset_by_index(index, flush_attributes_if_not_loaded=False)

        if join_datasets:
            if hh_estimation_set is not None:
                household_set.join_by_rows(hh_estimation_set,
                                           require_all_attributes=False,
                                           change_ids_if_not_unique=True)

            agent_set.join_by_rows(estimation_set,
                                   require_all_attributes=False,
                                   change_ids_if_not_unique=True)
            index = arange(agent_set.size() - estimation_set.size(),
                           agent_set.size())[filter_index]
        else:
            index = agent_set.get_id_index(
                estimation_set.get_id_attribute()[filter_index])
    else:
        if agent_set is not None:
            index = arange(agent_set.size())
        else:
            index = None

    return (specification, index)
def prepare_for_estimate(specification_dict = None, 
                         specification_storage=None, 
                         specification_table=None,
                         agent_set=None, 
                         household_set=None,
                         agents_for_estimation_storage=None,
                         agents_for_estimation_table=None,
                         households_for_estimation_table=None,
                         join_datasets=False,
                         filter=None,
                         data_objects=None):
    specification = get_specification_for_estimation(specification_dict, 
                                                     specification_storage, 
                                                     specification_table)
    if agents_for_estimation_storage is not None:                 
        estimation_set = Dataset(in_storage = agents_for_estimation_storage, 
                                 in_table_name=agents_for_estimation_table,
                                 id_name=agent_set.get_id_name(), 
                                 dataset_name=agent_set.get_dataset_name())
        hh_estimation_set = None
        if households_for_estimation_table is not None:
            hh_estimation_set = Dataset(in_storage = agents_for_estimation_storage, 
                                     in_table_name=households_for_estimation_table,
                                     id_name=household_set.get_id_name(), 
                                     dataset_name=household_set.get_dataset_name())
        
        filter_index = arange(estimation_set.size())
        if filter:
            estimation_set.compute_variables(filter, resources=Resources(data_objects))
            filter_index = where(estimation_set.get_attribute(filter) > 0)[0]
            #estimation_set.subset_by_index(index, flush_attributes_if_not_loaded=False)
        
        if join_datasets:
            if hh_estimation_set is not None:
                household_set.join_by_rows(hh_estimation_set, require_all_attributes=False,
                                           change_ids_if_not_unique=True)
                
            agent_set.join_by_rows(estimation_set, require_all_attributes=False,
                                   change_ids_if_not_unique=True)
            index = arange(agent_set.size() - estimation_set.size(), agent_set.size())[filter_index]
        else:
            index = agent_set.get_id_index(estimation_set.get_id_attribute()[filter_index])
    else:
        if agent_set is not None:
            index = arange(agent_set.size())
        else:
            index = None
            
    return (specification, index)
Example #3
0
def prepare_for_estimate(specification_dict=None,
                         specification_storage=None,
                         specification_table=None,
                         agent_set=None,
                         agents_for_estimation_storage=None,
                         agents_for_estimation_table=None,
                         join_datasets=False,
                         filter=None,
                         agents_filter=None,
                         data_objects=None):

    """
    filter - alias to agents_filter for backforward compatibility, which is more specific
    """

    if agents_filter is None and filter is not None:
        agents_filter = filter

    specification = get_specification_for_estimation(specification_dict, 
                                                     specification_storage, 
                                                     specification_table)
    if agents_for_estimation_storage is not None:                 
        estimation_set = Dataset(in_storage=agents_for_estimation_storage, 
                                 in_table_name=agents_for_estimation_table,
                                 id_name=agent_set.get_id_name(), 
                                 dataset_name=agent_set.get_dataset_name())
        
        filter_index = arange(estimation_set.size())
        if agents_filter:
            filter_condition = estimation_set.compute_variables(agents_filter, 
                                                                resources=Resources(data_objects))
            filter_index = where(filter_condition)[0]
        
        if join_datasets:
            agent_set.join_by_rows(estimation_set, 
                                   require_all_attributes=False,
                                   change_ids_if_not_unique=True)
            index = arange(agent_set.size() - estimation_set.size(), 
                           agent_set.size())[filter_index]
        else:
            index = agent_set.get_id_index(estimation_set.get_id_attribute()[filter_index])
    else:
        if agent_set is not None:
            index = arange(agent_set.size())
        else:
            index = None
            
    return (specification, index)
Example #4
0
def prepare_for_estimate(specification_dict=None,
                         specification_storage=None,
                         specification_table=None,
                         agent_set=None,
                         agents_for_estimation_storage=None,
                         agents_for_estimation_table=None,
                         join_datasets=False,
                         filter=None,
                         agents_filter=None,
                         data_objects=None):
    """
    filter - alias to agents_filter for backforward compatibility, which is more specific
    """

    if agents_filter is None and filter is not None:
        agents_filter = filter

    specification = get_specification_for_estimation(specification_dict,
                                                     specification_storage,
                                                     specification_table)
    if agents_for_estimation_storage is not None:
        estimation_set = Dataset(in_storage=agents_for_estimation_storage,
                                 in_table_name=agents_for_estimation_table,
                                 id_name=agent_set.get_id_name(),
                                 dataset_name=agent_set.get_dataset_name())

        filter_index = arange(estimation_set.size())
        if agents_filter:
            filter_condition = estimation_set.compute_variables(
                agents_filter, resources=Resources(data_objects))
            filter_index = where(filter_condition)[0]

        if join_datasets:
            agent_set.join_by_rows(estimation_set,
                                   require_all_attributes=False,
                                   change_ids_if_not_unique=True)
            index = arange(agent_set.size() - estimation_set.size(),
                           agent_set.size())[filter_index]
        else:
            index = agent_set.get_id_index(
                estimation_set.get_id_attribute()[filter_index])
    else:
        if agent_set is not None:
            index = arange(agent_set.size())
        else:
            index = None

    return (specification, index)
def get_households_for_estimation(agent_set,
                                  in_storage,
                                  agents_for_estimation_table_name,
                                  exclude_condition=None,
                                  join_datasets=True):
    estimation_set = Dataset(in_storage=in_storage,
                             in_table_name=agents_for_estimation_table_name,
                             id_name=agent_set.get_id_name(),
                             dataset_name=agent_set.get_dataset_name())
    agent_set.unload_primary_attributes()
    agent_set.load_dataset(attributes='*')
    estimation_set.load_dataset(
        attributes=agent_set.get_primary_attribute_names())
    if join_datasets:
        agent_set.join_by_rows(estimation_set,
                               require_all_attributes=False,
                               change_ids_if_not_unique=True)
        index = arange(agent_set.size() - estimation_set.size(),
                       agent_set.size())
    else:
        index = agent_set.get_id_index(estimation_set.get_id_attribute())

    exclude_ids = []
    if exclude_condition is not None:
        exclude_ids = agent_set.get_id_attribute()[where(
            agent_set.compute_variables(exclude_condition))]

    for id in exclude_ids:
        minus = agent_set.get_id_index(id)
        if minus in index:
            index = index[index != minus]

    return (agent_set, index)
 def create_edges(self, input_file_dir, input_file_name, output_file_name):
     storage = StorageFactory().get_storage(type='tab_storage', subdir='store', 
         storage_location=input_file_dir)
     dataset = Dataset(in_storage = storage, id_name = ['stop_id','sch_time'], in_table_name = input_file_name)
     
     n = dataset.size()
     trip_ids = dataset.get_attribute("stop_id")
     unique_trip_ids = unique(trip_ids)
     source_list = list()
     target_list = list()
     time_list = list()
     
     for trip in unique_trip_ids:
         idx = where(dataset.get_attribute("stop_id") == trip)[0]
         nodes = dataset.get_attribute_by_index("node_id", idx)
         times = dataset.get_attribute_by_index("sch_time", idx)
         for inode in range(nodes.size-1):
             source_list.append(nodes[inode])
             target_list.append(nodes[inode+1])
             time_list.append(times[inode+1] - times[inode])
    
     storage = StorageFactory().get_storage('dict_storage')
     
     storage.write_table(table_name='edges',
         table_data={
             'edge_id': arange(len(source_list))+1, 
             'source': array(source_list), #type=int64), # <<<< OUTPUT FIELD, USE array
             'target': array(target_list), #type=int64), # <<<< OUTPUT FIELD, USE array
             'cost': array(time_list, dtype=int32)
             }
         )
    
     edges = Dataset(in_storage=storage, in_table_name='edges', id_name = "edge_id")
     
     edges.write_dataset(attributes = ["source", "target", "cost"], out_storage = storage, out_table_name = output_file_name)
def get_households_for_estimation(agent_set, in_storage, 
                                  agents_for_estimation_table_name, 
                                  exclude_condition=None,
                                  join_datasets=True):
    estimation_set = Dataset(in_storage = in_storage,
                             in_table_name=agents_for_estimation_table_name,
                             id_name=agent_set.get_id_name(), 
                             dataset_name=agent_set.get_dataset_name())
    agent_set.unload_primary_attributes()
    agent_set.load_dataset(attributes='*')
    estimation_set.load_dataset(attributes=agent_set.get_primary_attribute_names())
    if join_datasets:
        agent_set.join_by_rows(estimation_set, 
                               require_all_attributes=False,
                               change_ids_if_not_unique=True)
        index = arange(agent_set.size()-estimation_set.size(),agent_set.size())
    else:
        index = agent_set.get_id_index(estimation_set.get_id_attribute())

    exclude_ids = []
    if exclude_condition is not None:
        exclude_ids = agent_set.get_id_attribute()[where(agent_set.compute_variables(exclude_condition))]
        
    for id in exclude_ids:
        minus = agent_set.get_id_index(id)
        if minus in index:            
            index = index[index != minus]
        
    return (agent_set, index)
Example #8
0
    def prepare_for_estimate(self,
                             specification_dict=None,
                             specification_storage=None,
                             specification_table=None,
                             agent_set=None,
                             agents_for_estimation_storage=None,
                             agents_for_estimation_table=None,
                             join_datasets=False,
                             index_to_unplace=None,
                             portion_to_unplace=1.0,
                             agent_filter=None,
                             data_objects={}):
        from opus_core.model import get_specification_for_estimation
        specification = get_specification_for_estimation(
            specification_dict, specification_storage, specification_table)
        if (agent_set is not None) and (index_to_unplace is not None):
            if self.location_id_string is not None:
                agent_set.compute_variables(self.location_id_string,
                                            resources=Resources(data_objects))
            if portion_to_unplace < 1:
                unplace_size = int(portion_to_unplace * index_to_unplace.size)
                end_index_to_unplace = sample_noreplace(
                    index_to_unplace, unplace_size)
            else:
                end_index_to_unplace = index_to_unplace
            logger.log_status("Unplace " + str(end_index_to_unplace.size) +
                              " agents.")
            agent_set.modify_attribute(self.choice_set.get_id_name()[0],
                                       -1 * ones(end_index_to_unplace.size),
                                       end_index_to_unplace)
        # create agents for estimation
        if agents_for_estimation_storage is not None:
            estimation_set = Dataset(in_storage=agents_for_estimation_storage,
                                     in_table_name=agents_for_estimation_table,
                                     id_name=agent_set.get_id_name(),
                                     dataset_name=agent_set.get_dataset_name())
            if agent_filter is not None:
                estimation_set.compute_variables(
                    agent_filter, resources=Resources(data_objects))
                index = where(
                    estimation_set.get_attribute(agent_filter) > 0)[0]
                estimation_set.subset_by_index(
                    index, flush_attributes_if_not_loaded=False)

            if join_datasets:
                agent_set.join_by_rows(estimation_set,
                                       require_all_attributes=False,
                                       change_ids_if_not_unique=True)
                index = arange(agent_set.size() - estimation_set.size(),
                               agent_set.size())
            else:
                index = agent_set.get_id_index(
                    estimation_set.get_id_attribute())
        else:
            index = arange(agent_set.size())
        return (specification, index)
Example #9
0
 def test_dict_dataset(self):
     storage = StorageFactory().get_storage('dict_storage')
     
     storage.write_table(
         table_name='dataset',
         table_data={
             "id":array([1,2,3,4]), 
             "attr":array([4,7,2,1])
             }
         )
     
     ds = Dataset(in_storage=storage, in_table_name='dataset', id_name="id")
     
     self.assert_(ds.get_attribute("attr").sum()==14, "Something is wrong with the dataset.")
     self.assert_(ds.size()==4, "Wrong size of dataset.")
Example #10
0
 def prepare_for_estimate(self, dataset=None, dataset_for_estimation_storage=None, dataset_for_estimation_table=None, join_datasets=False, **kwargs):
     from opus_core.model import get_specification_for_estimation
     from opus_core.datasets.dataset import Dataset
     spec = get_specification_for_estimation(**kwargs)
     if (dataset_for_estimation_storage is not None) and (dataset_for_estimation_table is not None):
         estimation_set = Dataset(in_storage = dataset_for_estimation_storage,
                                   in_table_name=dataset_for_estimation_table,
                                   id_name=dataset.get_id_name(), dataset_name=dataset.get_dataset_name())
         if join_datasets:
             dataset.join_by_rows(estimation_set, require_all_attributes=False,
                                 change_ids_if_not_unique=True)
             index = arange(dataset.size()-estimation_set.size(),dataset.size())
         else:
             index = dataset.get_id_index(estimation_set.get_id_attribute())
     else:
             index = None
     return (spec, index)
    def prepare_for_estimate(self, specification_dict = None, 
                             specification_storage=None, 
                             specification_table=None,
                             agent_set=None, 
                             agents_for_estimation_storage=None,
                             agents_for_estimation_table=None,
                             filter_for_estimation_set=None,
                             data_objects=None):
        specification = get_specification_for_estimation(specification_dict, 
                                                         specification_storage, 
                                                         specification_table)
        if self.filter is not None:
            agents_index = where( self.proposal_set.compute_variables(self.filter) )[0]        
        
        id_attribute_name = ['parcel_id', 'template_id', 'is_redevelopment']
        if agents_for_estimation_storage is not None:
            estimation_set = Dataset(in_storage = agents_for_estimation_storage, 
                                     in_table_name=agents_for_estimation_table,
                                     id_name=id_attribute_name, 
                                     dataset_name=agent_set.get_dataset_name())
            
            filter_index = arange(estimation_set.size())
            if filter_for_estimation_set:
                filter_index = where(estimation_set.compute_variables(filter_for_estimation_set, resources=Resources(data_objects)))[0]
                estimation_set.subset_by_index(filter_index, flush_attributes_if_not_loaded=False)
                
            id_attributes = None
            for attr_name in id_attribute_name:
                attr_value = agent_set.get_attribute_as_column(attr_name)
                if id_attributes == None:
                    id_attributes = attr_value
                else:
                    id_attributes = concatenate((id_attributes, attr_value), axis=1)
                    
            id_index = estimation_set.try_get_id_index(id_attributes, return_value_if_not_found=-1)

            status_id = 2 * ones(agent_set.size(), dtype="int8")
            status_id[where(id_index != -1)] = 1
            name = self.choice_attribute_name.get_alias()
            if name in agent_set.get_known_attribute_names():
                agent_set.set_values_of_one_attribute(name, status_id[where(id_index != -1)], where(id_index!=-1)[0])
            else:
                agent_set.add_primary_attribute(status_id, name)
            
        return (specification, agents_index)
Example #12
0
    def create_edges(self, input_file_dir, input_file_name, output_file_name):
        storage = StorageFactory().get_storage(type='tab_storage',
                                               subdir='store',
                                               storage_location=input_file_dir)
        dataset = Dataset(in_storage=storage,
                          id_name=['stop_id', 'sch_time'],
                          in_table_name=input_file_name)

        n = dataset.size()
        trip_ids = dataset.get_attribute("stop_id")
        unique_trip_ids = unique(trip_ids)
        source_list = list()
        target_list = list()
        time_list = list()

        for trip in unique_trip_ids:
            idx = where(dataset.get_attribute("stop_id") == trip)[0]
            nodes = dataset.get_attribute_by_index("node_id", idx)
            times = dataset.get_attribute_by_index("sch_time", idx)
            for inode in range(nodes.size - 1):
                source_list.append(nodes[inode])
                target_list.append(nodes[inode + 1])
                time_list.append(times[inode + 1] - times[inode])

        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(
            table_name='edges',
            table_data={
                'edge_id': arange(len(source_list)) + 1,
                'source': array(
                    source_list),  #type=int64), # <<<< OUTPUT FIELD, USE array
                'target': array(
                    target_list),  #type=int64), # <<<< OUTPUT FIELD, USE array
                'cost': array(time_list, dtype=int32)
            })

        edges = Dataset(in_storage=storage,
                        in_table_name='edges',
                        id_name="edge_id")

        edges.write_dataset(attributes=["source", "target", "cost"],
                            out_storage=storage,
                            out_table_name=output_file_name)
Example #13
0
    def prepare_for_estimate(self, specification_dict = None, specification_storage=None,
                              specification_table=None, agent_set=None,
                              agents_for_estimation_storage=None,
                              agents_for_estimation_table=None, join_datasets=False,
                              index_to_unplace=None, portion_to_unplace=1.0,
                              agent_filter=None,
                              data_objects={}):
        from opus_core.models.model import get_specification_for_estimation
        specification = get_specification_for_estimation(specification_dict,
                                                          specification_storage,
                                                          specification_table)
        if (agent_set is not None) and (index_to_unplace is not None):
            if self.location_id_string is not None:
                agent_set.compute_variables(self.location_id_string, resources=Resources(data_objects))
            if portion_to_unplace < 1:
                unplace_size = int(portion_to_unplace*index_to_unplace.size)
                end_index_to_unplace = sample_noreplace(index_to_unplace, unplace_size)
            else:
                end_index_to_unplace = index_to_unplace
            logger.log_status("Unplace " + str(end_index_to_unplace.size) + " agents.")
            agent_set.modify_attribute(self.choice_set.get_id_name()[0],
                                        -1*ones(end_index_to_unplace.size), end_index_to_unplace)
        # create agents for estimation
        if agents_for_estimation_storage is not None:
            estimation_set = Dataset(in_storage = agents_for_estimation_storage,
                                      in_table_name=agents_for_estimation_table,
                                      id_name=agent_set.get_id_name(), dataset_name=agent_set.get_dataset_name())
            if agent_filter is not None:
                estimation_set.compute_variables(agent_filter, resources=Resources(data_objects))
                index = where(estimation_set.get_attribute(agent_filter) > 0)[0]
                estimation_set.subset_by_index(index, flush_attributes_if_not_loaded=False)

            if join_datasets:
                agent_set.join_by_rows(estimation_set, require_all_attributes=False,
                                    change_ids_if_not_unique=True)
                index = arange(agent_set.size()-estimation_set.size(),agent_set.size())
            else:
                index = agent_set.get_id_index(estimation_set.get_id_attribute())
        else:
            index = arange(agent_set.size())
        return (specification, index)
Example #14
0
class Test(opus_unittest.OpusTestCase):
    def setUp(self):
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(
            table_name='households',
            table_data={
                'household_id': arange(10) + 1,
                #  'household_id':array([1, 2, 3, 4, 5, 6, 7, 8]),
                #  'income'      :array([1, 3, 2, 1, 3, 8, 5, 4]),
                # #'category_id' :array([1, 2, 2, 1, 2, 3, 3, 2]),
                #  'building_id' :array([1, 2, 4, 3, 3, 2, 4, 2]),
                ##'large_area_id':array([1, 1, 2, 3, 3, 1, 2, 1]),
                #
                'grid_id': arange(-1, 9, 1) + 1,
                'lucky': array([1, 0, 1, 0, 1, 1, 1, 1, 0, 0])
            })

        storage.write_table(
            table_name='gridcells',
            table_data={
                #'building_id':    array([1, 2, 3, 4]),
                #'large_area_id':  array([1, 1, 3, 2]),
                'grid_id':
                arange(15) + 1,
                'filter':
                array([0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1]),
                'weight':
                array([0.1, 9, 15, 2, 5, 1, 6, 2.1, .3, 4, 3, 1, 10, 8, 7])
            })
        dataset_pool = SessionConfiguration(
            in_storage=storage).get_dataset_pool()

        #create households
        self.households = Dataset(in_storage=storage,
                                  in_table_name='households',
                                  id_name="household_id",
                                  dataset_name="household")

        # create gridcells
        self.gridcells = Dataset(in_storage=storage,
                                 in_table_name='gridcells',
                                 id_name="grid_id",
                                 dataset_name="gridcell")
        dataset_pool.replace_dataset('household', self.households)
        dataset_pool.replace_dataset('gridcell', self.gridcells)

    def test_1(self):
        """"""
        sample_size = 5
        # check the individual gridcells
        # This is a stochastic model, so it may legitimately fail occassionally.
        index1 = where(self.households.get_attribute("lucky"))[0]
        #index2 = where(self.gridcells.get_attribute("filter"))[0]
        weight = self.gridcells.get_attribute("weight")
        estimation_config = {
            "agent_category_definition": ["household.lucky"],
            "choice_category_definition": ["gridcell.filter+1"]
        }
        for icc in [0, 1]:  #include_chosen_choice?
            #icc = sample([0,1],1)
            sampler_ret = weighted_sampler_by_category().run(
                dataset1=self.households,
                dataset2=self.gridcells,
                index1=index1,
                sample_size=sample_size,
                include_chosen_choice=icc,
                resources=estimation_config)
            # get results
            sampled_index = sampler_ret.get_2d_index()
            chosen_choices = UNPLACED_ID * ones(index1.size, dtype=DTYPE)
            where_chosen = where(sampler_ret.get_attribute("chosen_choice"))
            chosen_choices[where_chosen[0]] = where_chosen[1]

            self.assertEqual(sampled_index.shape, (index1.size, sample_size))
            if icc:
                placed_agents_index = self.gridcells.try_get_id_index(
                    self.households.get_attribute("grid_id")[index1],
                    UNPLACED_ID)
                chosen_choice_index = resize(array([UNPLACED_ID], dtype=DTYPE),
                                             index1.shape)
                w = where(chosen_choices >= 0)[0]
                # for 64 bit machines, need to coerce the type to int32 -- on a
                # 32 bit machine the astype(int32) doesn't do anything
                chosen_choice_index[w] = sampled_index[
                    w, chosen_choices[w]].astype(int32)
                self.assert_(
                    alltrue(equal(placed_agents_index, chosen_choice_index)))
                sampled_index = sampled_index[:, 1:]

            self.assert_(
                alltrue(
                    lookup(sampled_index.ravel(),
                           arange(self.gridcells.size()),
                           index_if_not_found=UNPLACED_ID) != UNPLACED_ID))
            self.assert_(all(not_equal(weight[sampled_index], 0.0)))
    def prepare_for_estimate(
        self,
        add_member_prefix=True,
        specification_dict=None,
        specification_storage=None,
        specification_table=None,
        building_set=None,
        buildings_for_estimation_storage=None,
        buildings_for_estimation_table=None,
        constants=None,
        base_year=0,
        building_categories=None,
        location_id_variable=None,
        join_datasets=False,
        data_objects=None,
        **kwargs
    ):
        #        buildings = None

        if building_set is not None:
            if location_id_variable is not None:
                building_set.compute_variables(location_id_variable, resources=Resources(data_objects))

        # create agents for estimation
        if buildings_for_estimation_storage is not None:
            estimation_set = Dataset(
                in_storage=buildings_for_estimation_storage,
                in_table_name=buildings_for_estimation_table,
                id_name=building_set.get_id_name(),
                dataset_name=building_set.get_dataset_name(),
            )
            if location_id_variable:
                estimation_set.compute_variables(location_id_variable, resources=Resources(data_objects))
                # needs to be a primary attribute because of the join method below
                estimation_set.add_primary_attribute(
                    estimation_set.get_attribute(location_id_variable), VariableName(location_id_variable).alias()
                )

            years = estimation_set.get_attribute("scheduled_year")
            recent_years = constants["recent_years"]
            indicator = zeros(estimation_set.size(), dtype="int32")
            for year in range(base_year - recent_years, base_year + 1):
                indicator = logical_or(indicator, years == year)
            idx = where(logical_not(indicator))[0]
            estimation_set.remove_elements(idx)

            # if filter:
            # estimation_set.compute_variables(filter, resources=Resources(data_objects))
            # index = where(estimation_set.get_attribute(filter) > 0)[0]
            # estimation_set.subset_by_index(index, flush_attributes_if_not_loaded=False)

            if join_datasets:
                building_set.join_by_rows(estimation_set, require_all_attributes=False, change_ids_if_not_unique=True)
                index = arange(building_set.size() - estimation_set.size(), building_set.size())
            else:
                index = building_set.get_id_index(estimation_set.get_id_attribute())
        else:
            if building_set is not None:
                index = arange(building_set.size())
            else:
                index = None

        if add_member_prefix:
            specification_table = self.group_member.add_member_prefix_to_table_names([specification_table])

        from opus_core.model import get_specification_for_estimation

        # from urbansim.functions import compute_supply_and_add_to_location_set
        specification = get_specification_for_estimation(specification_dict, specification_storage, specification_table)

        # specification, dummy = AgentLocationChoiceModelMember.prepare_for_estimate(self, add_member_prefix,
        # specification_dict, specification_storage,
        # specification_table,
        # location_id_variable=location_id_variable,
        # data_objects=data_objects, **kwargs)
        return (specification, index)
    def prepare_for_estimate(self,
                             specification_dict=None,
                             specification_storage=None,
                             specification_table=None,
                             agent_set=None,
                             agents_for_estimation_storage=None,
                             agents_for_estimation_table=None,
                             join_datasets=False,
                             index_to_unplace=None,
                             portion_to_unplace=1.0,
                             compute_lambda=False,
                             grouping_location_set=None,
                             movers_variable=None,
                             movers_index=None,
                             filter=None,
                             location_id_variable=None,
                             data_objects={}):
        """Put 'location_id_variable' always in, if the location id is to be computed on the estimation set,
        i.e. if it is not a primary attribute of the estimation set. Set 'index_to_unplace' to None, if 'compute_lambda' is True.
        In such a case, the annual supply is estimated without unplacing agents. 'grouping_location_set', 'movers_variable' and
        'movers_index' must be given, if 'compute_lambda' is True.
        """
        from opus_core.model import get_specification_for_estimation
        from urbansim.functions import compute_supply_and_add_to_location_set
        specification = get_specification_for_estimation(
            specification_dict, specification_storage, specification_table)
        if (agent_set is not None) and (index_to_unplace is not None):
            if self.location_id_string is not None:
                agent_set.compute_variables(self.location_id_string,
                                            resources=Resources(data_objects))
            if portion_to_unplace < 1:
                unplace_size = int(portion_to_unplace * index_to_unplace.size)
                end_index_to_unplace = sample_noreplace(
                    index_to_unplace, unplace_size)
            else:
                end_index_to_unplace = index_to_unplace
            logger.log_status("Unplace " + str(end_index_to_unplace.size) +
                              " agents.")
            agent_set.modify_attribute(
                self.choice_set.get_id_name()[0],
                resize(array([-1]), end_index_to_unplace.size),
                end_index_to_unplace)
        if compute_lambda:
            movers = zeros(agent_set.size(), dtype="bool8")
            if movers_index is not None:
                movers[movers_index] = 1
            agent_set.add_primary_attribute(movers, "potential_movers")
            self.estimate_config[
                "weights_for_estimation_string"] = self.estimate_config[
                    "weights_for_estimation_string"] + "_from_lambda"
            compute_supply_and_add_to_location_set(
                self.choice_set,
                grouping_location_set,
                self.run_config["number_of_units_string"],
                self.run_config["capacity_string"],
                movers_variable,
                self.estimate_config["weights_for_estimation_string"],
                resources=Resources(data_objects))

        # create agents for estimation
        if (agents_for_estimation_storage
                is not None) and (agents_for_estimation_table is not None):
            estimation_set = Dataset(in_storage=agents_for_estimation_storage,
                                     in_table_name=agents_for_estimation_table,
                                     id_name=agent_set.get_id_name(),
                                     dataset_name=agent_set.get_dataset_name())
            if location_id_variable is not None:
                estimation_set.compute_variables(
                    location_id_variable, resources=Resources(data_objects))
                # needs to be a primary attribute because of the join method below
                estimation_set.add_primary_attribute(
                    estimation_set.get_attribute(location_id_variable),
                    VariableName(location_id_variable).get_alias())
            if filter:
                values = estimation_set.compute_variables(
                    filter, resources=Resources(data_objects))
                index = where(values > 0)[0]
                estimation_set.subset_by_index(
                    index, flush_attributes_if_not_loaded=False)

            if join_datasets:
                agent_set.join_by_rows(estimation_set,
                                       require_all_attributes=False,
                                       change_ids_if_not_unique=True)
                index = arange(agent_set.size() - estimation_set.size(),
                               agent_set.size())
            else:
                index = agent_set.get_id_index(
                    estimation_set.get_id_attribute())
        else:
            if agent_set is not None:
                if filter is not None:
                    values = agent_set.compute_variables(
                        filter, resources=Resources(data_objects))
                    index = where(values > 0)[0]
                else:
                    index = arange(agent_set.size())
            else:
                index = None
        return (specification, index)
    def prepare_for_estimate(self,
                             add_member_prefix=True,
                             specification_dict=None,
                             specification_storage=None,
                             specification_table=None,
                             building_set=None,
                             buildings_for_estimation_storage=None,
                             buildings_for_estimation_table=None,
                             constants=None,
                             base_year=0,
                             building_categories=None,
                             location_id_variable=None,
                             join_datasets=False,
                             data_objects=None,
                             **kwargs):
        #        buildings = None

        if (building_set is not None):
            if location_id_variable is not None:
                building_set.compute_variables(
                    location_id_variable, resources=Resources(data_objects))

        # create agents for estimation
        if buildings_for_estimation_storage is not None:
            estimation_set = Dataset(
                in_storage=buildings_for_estimation_storage,
                in_table_name=buildings_for_estimation_table,
                id_name=building_set.get_id_name(),
                dataset_name=building_set.get_dataset_name())
            if location_id_variable:
                estimation_set.compute_variables(
                    location_id_variable, resources=Resources(data_objects))
                # needs to be a primary attribute because of the join method below
                estimation_set.add_primary_attribute(
                    estimation_set.get_attribute(location_id_variable),
                    VariableName(location_id_variable).alias())

            years = estimation_set.get_attribute("scheduled_year")
            recent_years = constants['recent_years']
            indicator = zeros(estimation_set.size())
            for year in range(base_year - recent_years, base_year + 1):
                indicator = logical_or(indicator, years == year)
            idx = where(logical_not(indicator))[0]
            estimation_set.remove_elements(idx)

            #if filter:
            #estimation_set.compute_variables(filter, resources=Resources(data_objects))
            #index = where(estimation_set.get_attribute(filter) > 0)[0]
            #estimation_set.subset_by_index(index, flush_attributes_if_not_loaded=False)

            if join_datasets:
                building_set.join_by_rows(estimation_set,
                                          require_all_attributes=False,
                                          change_ids_if_not_unique=True)
                index = arange(building_set.size() - estimation_set.size(),
                               agent_set.size())
            else:
                index = building_set.get_id_index(
                    estimation_set.get_id_attribute())
        else:
            if building_set is not None:
                index = arange(building_set.size())
            else:
                index = None

        if add_member_prefix:
            specification_table = self.group_member.add_member_prefix_to_table_names(
                [specification_table])

        from opus_core.model import get_specification_for_estimation
        #from urbansim.functions import compute_supply_and_add_to_location_set
        specification = get_specification_for_estimation(
            specification_dict, specification_storage, specification_table)

        #specification, dummy = AgentLocationChoiceModelMember.prepare_for_estimate(self, add_member_prefix,
        #specification_dict, specification_storage,
        #specification_table,
        #location_id_variable=location_id_variable,
        #data_objects=data_objects, **kwargs)
        return (specification, index)
    def run(
        self,
        realestate_dataset,
        year=None,
        occupied_spaces_variable="occupied_units",
        total_spaces_variable="total_units",
        target_attribute_name="target_vacancy_rate",
        sample_from_dataset=None,
        sample_filter="",
        reset_attribute_value={},
        year_built="year_built",
        dataset_pool=None,
        append_to_realestate_dataset=False,
        table_name="development_projects",
        dataset_name="development_project",
        id_name="development_project_id",
        **kwargs
    ):
        """         
        sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning
        append_to_realestate_dataset - whether to append the new dataset to realestate_dataset
        """

        if self.target_vancy_dataset is None:
            raise RuntimeError, "target_vacancy_rate dataset is unspecified."

        if not sample_from_dataset:
            sample_from_dataset = realestate_dataset

        # if dataset_pool is None:
        #    dataset_pool = SessionConfiguration().get_dataset_pool()
        if year is None:
            year = SimulationState().get_current_time()
        this_year_index = where(self.target_vancy_dataset.get_attribute("year") == year)[0]
        target_vacancy_for_this_year = DatasetSubset(self.target_vancy_dataset, this_year_index)

        column_names = list(
            set(self.target_vancy_dataset.get_known_attribute_names())
            - set([target_attribute_name, occupied_spaces_variable, total_spaces_variable, "year", "_hidden_id_"])
        )
        column_names.sort(reverse=True)
        column_values = dict(
            [
                (name, target_vacancy_for_this_year.get_attribute(name))
                for name in column_names + [target_attribute_name]
            ]
        )

        independent_variables = list(set([re.sub("_max$", "", re.sub("_min$", "", col)) for col in column_names]))
        dataset_known_attributes = realestate_dataset.get_known_attribute_names()
        sample_dataset_known_attributes = sample_from_dataset.get_known_attribute_names()
        for variable in independent_variables:
            if variable not in dataset_known_attributes:
                realestate_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)
            if variable not in sample_dataset_known_attributes:
                sample_from_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)

        dataset_known_attributes = realestate_dataset.get_known_attribute_names()  # update after compute
        if sample_filter:
            short_name = VariableName(sample_filter).get_alias()
            if short_name not in dataset_known_attributes:
                filter_indicator = sample_from_dataset.compute_variables(sample_filter, dataset_pool=dataset_pool)
            else:
                filter_indicator = sample_from_dataset.get_attribute(short_name)
        else:
            filter_indicator = 1

        sampled_index = array([], dtype=int32)

        # log header
        if PrettyTable is not None:
            status_log = PrettyTable()
            status_log.set_field_names(column_names + ["actual", "target", "expected", "difference", "action"])
        else:
            logger.log_status("\t".join(column_names + ["actual", "target", "expected", "difference", "action"]))
        error_log = ""
        for index in range(target_vacancy_for_this_year.size()):
            this_sampled_index = array([], dtype=int32)
            indicator = ones(realestate_dataset.size(), dtype="bool")
            sample_indicator = ones(sample_from_dataset.size(), dtype="bool")
            criterion = {}  # for logging
            for attribute in independent_variables:
                if attribute in dataset_known_attributes:
                    dataset_attribute = realestate_dataset.get_attribute(attribute)
                    sample_attribute = sample_from_dataset.get_attribute(attribute)
                else:
                    raise ValueError, "attribute %s used in target vacancy dataset can not be found in dataset %s" % (
                        attribute,
                        realestate_dataset.get_dataset_name(),
                    )

                if attribute + "_min" in column_names:
                    amin = target_vacancy_for_this_year.get_attribute(attribute + "_min")[index]
                    criterion.update({attribute + "_min": amin})
                    if amin != -1:
                        indicator *= dataset_attribute >= amin
                        sample_indicator *= sample_attribute >= amin
                if attribute + "_max" in column_names:
                    amax = target_vacancy_for_this_year.get_attribute(attribute + "_max")[index]
                    criterion.update({attribute + "_max": amax})
                    if amax != -1:
                        indicator *= dataset_attribute <= amax
                        sample_indicator *= sample_attribute <= amax
                if attribute in column_names:
                    aval = column_values[attribute][index]
                    criterion.update({attribute: aval})
                    if aval == -1:
                        continue
                    elif (
                        aval == -2
                    ):  ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column
                        indicator *= logical_not(ismember(dataset_attribute, column_values[attribute]))
                        sample_indicator *= logical_not(ismember(sample_attribute, column_values[attribute]))
                    else:
                        indicator *= dataset_attribute == aval
                        sample_indicator *= sample_attribute == aval

            this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable
            ## total/occupied_spaces_variable can be specified either as a universal name for all realestate
            ## or in targe_vacancy_rate dataset for each vacancy category
            if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_occupied_spaces_variable = target_vacancy_for_this_year.get_attribute(occupied_spaces_variable)[
                    index
                ]

            if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_total_spaces_variable = target_vacancy_for_this_year.get_attribute(total_spaces_variable)[index]

            this_total_spaces_variable += "_" + str(criterion[col])
            this_occupied_spaces_variable += "_" + str(criterion[col])

            logger.be_quiet()  # temporarily disable logging
            realestate_dataset.compute_one_variable_with_unknown_package(
                this_occupied_spaces_variable, dataset_pool=dataset_pool
            )
            realestate_dataset.compute_one_variable_with_unknown_package(
                this_total_spaces_variable, dataset_pool=dataset_pool
            )
            sample_from_dataset.compute_one_variable_with_unknown_package(
                this_total_spaces_variable, dataset_pool=dataset_pool
            )
            logger.talk()

            actual_num = (realestate_dataset.get_attribute(this_total_spaces_variable)).sum()
            # target_num is obsolete with this version.
            target_num = int(
                round(
                    (realestate_dataset.get_attribute(this_occupied_spaces_variable)).sum()
                    / (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])
                )
            )
            """If the target vacancy is very small and the inflow to the region big it is not enough to check
            only the current simulation year's vacancy. The simulation is more robust if the BTM is anticipating the
            next year's population (of households and jobs).
            #TODO: Make code more general to cover various stratifications in the real estate market.
            """
            if criterion[col] == 1:
                idx = where(self.control_totals.get_attribute("year") == year + 1)[0]
                this_years_control_totals = DatasetSubset(self.control_totals, idx)
                expected_num = int(
                    round(
                        this_years_control_totals.get_attribute("total_number_of_households").sum()
                        / (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])
                    )
                )
            if criterion[col] == 0:
                idx = where(self.employment_control_totals.get_attribute("year") == year + 1)[0]
                next_years_control_totals = DatasetSubset(self.employment_control_totals, idx)
                expected_num = int(
                    round(
                        next_years_control_totals.get_attribute("number_of_jobs").sum()
                        / (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])
                    )
                )

            diff = expected_num - actual_num

            # Previous version which is checking the current years occupation.
            # diff = target_num - actual_num

            if diff > 0:
                total_spaces_in_sample_dataset = sample_from_dataset.get_attribute(this_total_spaces_variable)
                legit_index = where(
                    logical_and(sample_indicator, filter_indicator) * total_spaces_in_sample_dataset > 0
                )[0]
                if legit_index.size > 0:
                    mean_size = total_spaces_in_sample_dataset[legit_index].mean()
                    num_of_projects_to_sample = int(diff / mean_size)
                    ##sampled at least 1 project when diff > 0, otherwise it is a endless loop when num_of_projects_to_sample = 0
                    num_of_projects_to_sample = num_of_projects_to_sample if num_of_projects_to_sample > 0 else 1
                    while total_spaces_in_sample_dataset[this_sampled_index].sum() < diff:
                        lucky_index = sample_replace(legit_index, num_of_projects_to_sample)
                        this_sampled_index = concatenate((this_sampled_index, lucky_index))
                    this_sampled_index = this_sampled_index[
                        0 : (1 + searchsorted(cumsum(total_spaces_in_sample_dataset[this_sampled_index]), diff))
                    ]
                    sampled_index = concatenate((sampled_index, this_sampled_index))
                else:
                    error_log += (
                        "There is nothing to sample from %s and no new development will happen for "
                        % sample_from_dataset.get_dataset_name()
                        + ",".join([col + "=" + str(criterion[col]) for col in column_names])
                        + "\n"
                    )
            # if diff < 0: #TODO demolition; not yet supported

            ##log status
            action = "0"
            if this_sampled_index.size > 0:
                action_num = total_spaces_in_sample_dataset[this_sampled_index].sum()
                if diff > 0:
                    action = "+" + str(action_num)
                if diff < 0:
                    action = "-" + str(action_num)
            cat = [str(criterion[col]) for col in column_names]
            cat += [str(actual_num), str(target_num), str(expected_num), str(diff), action]

            if PrettyTable is not None:
                status_log.add_row(cat)
            else:
                logger.log_status("\t".join(cat))

        if PrettyTable is not None:
            logger.log_status("\n" + status_log.get_string())
        if error_log:
            logger.log_error(error_log)

        result_data = {}
        result_dataset = None
        index = array([], dtype="int32")
        if True:  # sampled_index.size > 0:
            ### ideally duplicate_rows() is all needed to add newly cloned rows
            ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data
            ##realestate_dataset.duplicate_rows(sampled_index)
            result_data.setdefault(year_built, resize(year, sampled_index.size).astype("int32"))
            ## also add 'independent_variables' to the new dataset
            for attribute in set(sample_from_dataset.get_primary_attribute_names() + independent_variables):
                if reset_attribute_value.has_key(attribute):
                    result_data[attribute] = resize(array(reset_attribute_value[attribute]), sampled_index.size)
                else:
                    result_data[attribute] = sample_from_dataset.get_attribute_by_index(attribute, sampled_index)

            if id_name and result_data and id_name not in result_data:
                result_data[id_name] = arange(sampled_index.size, dtype="int32") + 1

            storage = StorageFactory().get_storage("dict_storage")
            storage.write_table(table_name=table_name, table_data=result_data)

            result_dataset = Dataset(
                id_name=id_name, in_storage=storage, in_table_name=table_name, dataset_name=dataset_name
            )
            index = arange(result_dataset.size())

        if append_to_realestate_dataset:
            if len(result_data) > 0:
                index = realestate_dataset.add_elements(
                    result_data, require_all_attributes=False, change_ids_if_not_unique=True
                )
            result_dataset = realestate_dataset

        return (result_dataset, index)
Example #19
0
# Working with datasets
import os
import urbansim
us_path = urbansim.__path__[0]
from opus_core.storage_factory import StorageFactory
storage = StorageFactory().get_storage('tab_storage',
    storage_location = os.path.join(us_path, "data/tutorial"))

from opus_core.datasets.dataset import Dataset
households = Dataset(in_storage = storage,
                         in_table_name = 'households', 
                         id_name='household_id',
                         dataset_name='household')
households.get_attribute_names()
households.get_id_attribute()
households.size()
households.get_attribute("income")
households.get_attribute_names()
households.load_dataset()
households.get_attribute_names()
#households.plot_histogram("income", bins = 10)
#households.r_histogram("income")
#households.r_scatter("persons", "income")
households.correlation_coefficient("persons", "income")
households.correlation_matrix(["persons", "income"])
households.summary()
households.add_primary_attribute(data=[4,6,9,2,4,8,2,1,3,2], name="location")
households.get_attribute_names()
households.modify_attribute(name="location", data=[0,0], index=[0,1])
households.get_attribute("location")
households.get_data_element_by_id(5).location
    def run(self, realestate_dataset,
            living_units_dataset,
            year=None, 
            occupied_spaces_variable="occupied_units",
            total_spaces_variable="total_units",
            target_attribute_name='target_vacancy_rate',
            sample_from_dataset = None,
            living_units_from_dataset = None,
            sample_filter="",
            reset_attribute_value={}, 
            year_built = 'year_built',
            dataset_pool=None,
            append_to_realestate_dataset = False,
            table_name = "development_projects",
            dataset_name = "development_project",
            id_name = 'development_project_id',
            **kwargs):
        """         
        sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning
        append_to_realestate_dataset - whether to append the new dataset to realestate_dataset
        """
        
        if self.target_vancy_dataset is None:
            raise RuntimeError, "target_vacancy_rate dataset is unspecified."
        
        if not sample_from_dataset or not living_units_from_dataset:
            logger.log_note('No development projects or no living units of development projects to sample from. Development projects are taken from building dataset and thus living units from living_units dataset.')
            sample_from_dataset = realestate_dataset
            living_units_from_dataset = living_units_dataset
            
        if dataset_pool is None:
            dataset_pool = SessionConfiguration().get_dataset_pool()
        if year is None:
            year = SimulationState().get_current_time()
        this_year_index = where(self.target_vancy_dataset.get_attribute('year')==year)[0]
        target_vacancy_for_this_year = DatasetSubset(self.target_vancy_dataset, this_year_index)
        
        column_names = list(set( self.target_vancy_dataset.get_known_attribute_names() ) - set( [ target_attribute_name, occupied_spaces_variable, total_spaces_variable, 'year', '_hidden_id_'] ))
        column_names.sort(reverse=True)
        column_values = dict([ (name, target_vacancy_for_this_year.get_attribute(name)) for name in column_names + [target_attribute_name]])
        
        
        independent_variables = list(set([re.sub('_max$', '', re.sub('_min$', '', col)) for col in column_names]))
        sample_dataset_known_attributes = sample_from_dataset.get_known_attribute_names()
        for attribute in independent_variables:
            if attribute not in sample_dataset_known_attributes:
                sample_from_dataset.compute_one_variable_with_unknown_package(attribute, dataset_pool=dataset_pool)
        sample_dataset_known_attributes = sample_from_dataset.get_known_attribute_names() #update after compute
                
        if sample_filter:
            short_name = VariableName(sample_filter).get_alias()
            if short_name not in sample_dataset_known_attributes:
                filter_indicator = sample_from_dataset.compute_variables(sample_filter, dataset_pool=dataset_pool)
            else:
                filter_indicator = sample_from_dataset.get_attribute(short_name)
        else:
            filter_indicator = 1
                
        sampled_index = array([], dtype=int32)

        #log header
        if PrettyTable is not None:
            status_log = PrettyTable()
            status_log.set_field_names(column_names + ["actual", "target", "expected", "difference", "action"])
        else:
            logger.log_status("\t".join(column_names + ["actual", "target", "expected", "difference", "action"]))
        error_log = ''
        for index in range(target_vacancy_for_this_year.size()):
            sample_indicator = ones( sample_from_dataset.size(), dtype='bool' )
            criterion = {}   # for logging
            for attribute in independent_variables:
                if attribute in sample_dataset_known_attributes:
                    sample_attribute = sample_from_dataset.get_attribute(attribute)
                else:
                    raise ValueError, "attribute %s used in target vacancy dataset can not be found in dataset %s" % (attribute, realestate_dataset.get_dataset_name())
                
                if attribute + '_min' in column_names:
                    amin = target_vacancy_for_this_year.get_attribute(attribute+'_min')[index] 
                    criterion.update({attribute + '_min':amin})
                    if amin != -1:
                        sample_indicator *= sample_attribute >= amin
                if attribute + '_max' in column_names: 
                    amax = target_vacancy_for_this_year.get_attribute(attribute+'_max')[index]
                    criterion.update({attribute + '_max':amax}) 
                    if amax != -1:
                        sample_indicator *= sample_attribute <= amax
                if attribute in column_names: 
                    aval = column_values[attribute][index] 
                    criterion.update({attribute:aval}) 
                    if aval == -1:
                        continue
                    elif aval == -2:  ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column
                        sample_indicator *= logical_not(ismember(sample_attribute, column_values[attribute]))
                    else:
                        sample_indicator *= sample_attribute == aval
                        
            this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable
            ## total/occupied_spaces_variable can be specified either as a universal name for all realestate 
            ## or in targe_vacancy_rate dataset for each vacancy category
            if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_occupied_spaces_variable = target_vacancy_for_this_year.get_attribute(occupied_spaces_variable)[index]

            if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_total_spaces_variable = target_vacancy_for_this_year.get_attribute(total_spaces_variable)[index]
            
            this_total_spaces_variable += '_' + str(criterion[col])
            this_occupied_spaces_variable += '_' + str(criterion[col])
            
            logger.be_quiet() #temporarily disable logging
            realestate_dataset.compute_one_variable_with_unknown_package(this_occupied_spaces_variable, dataset_pool=dataset_pool)
            realestate_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool)
            sample_from_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool)
            logger.talk()
            
            actual_num = (realestate_dataset.get_attribute(this_total_spaces_variable)).sum()
            #target_num is obsolete with this version.
            target_num = int(round( (realestate_dataset.get_attribute(this_occupied_spaces_variable)).sum() /\
                                    (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])))
            '''If the target vacancy is very small and the inflow to the region big it is not enough to check
            only the current simulation year's vacancy. The simulation is more robust if the BTM is anticipating the
            next year's population (of households and jobs).
            This version calculates the non residential spaces based on sqft requirements of jobs per sector. 
            #TODO: Make code more general to cover various stratifications in the real estate market.
            '''
            if criterion[col] == 0:
                """ Option without demography model
                idx = where(self.control_totals.get_attribute("year")==year + 1)[0]
                this_years_control_totals = DatasetSubset(self.control_totals, idx)
                expected_num = int(round( this_years_control_totals.get_attribute('total_number_of_households').sum() /\
                                    (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])))""" 
                hh_dataset = dataset_pool.get_dataset( 'household' )
                number_of_hh = hh_dataset.size()
                expected_num = int(round( number_of_hh /\
                                    (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]))) 
            if criterion[col] > 0:
                # Getting control totals per sector in a dictionary
                idx = where(self.employment_control_totals.get_attribute("year")==year)[0] # Create index to get the subset of control totals for the next simulation year.
                this_years_control_totals = DatasetSubset(self.employment_control_totals, idx) # Create the subset of control totals.
                idx_non_home_based = where(logical_and(this_years_control_totals['home_based_status'] == 0,this_years_control_totals['sector_id'] == criterion[col]))[0] # Create index of non home based control totals in current sector. Only non home based jobs are supported. TODO: Support home based jobs.
                this_years_control_totals = DatasetSubset(this_years_control_totals, idx_non_home_based)
#                idx_current_sector = where(this_years_control_totals['sector_id'] == criterion[col])[0]
                next_years_jobs = this_years_control_totals['number_of_jobs']
                controled_sectors = this_years_control_totals['sector_id']                
                sector_job_totals = dict(zip(controled_sectors, next_years_jobs.T)) # creating dictionary with sector id's as key and number of jobs as values to ensure multiplication with right requiremtents.

                # Getting infos on required sqft per sector. 
#                a_zone_id = min(self.building_sqft_per_job['zone_id']) # Get a zone number from the definition table. Here choose to take the minimum which is arbitrary. This code assumes constant sqft requirements in all zones. TODO: Support different sqft requirements per zone.
#                idx_zone = where(self.building_sqft_per_job['zone_id'] == a_zone_id)[0]
#                subset_sqft_per_job = DatasetSubset(self.building_sqft_per_job, idx_zone)
#                sqft_per_job = subset_sqft_per_job['building_sqft_per_job']
#                sectors_with_requirements = subset_sqft_per_job['sector_id']
#                requirements_by_sector = dict(zip(sectors_with_requirements, sqft_per_job.T))
#                
#                needed_sqft_over_all_sectors = sector_job_totals[criterion[col]] * requirements_by_sector[criterion[col]]
#                expected_num = int(round( needed_sqft_over_all_sectors /\
#                                    (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])))
                
                idx_sector = where(self.sectors['sector_id'] == criterion[col])
                subset_sqft_per_job_sector = DatasetSubset(self.sectors, idx_sector)
                needed_sqft_current_sector = sector_job_totals[criterion[col]] * subset_sqft_per_job_sector.get_attribute('sqm_per_job')
                expected_num = int(round( needed_sqft_current_sector /\
                                    (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index])))

            diff = expected_num - actual_num
            
            #Previous version which is checking the current years occupation.
            #diff = target_num - actual_num
            
            this_sampled_index = array([], dtype=int32)
            if diff > 0:
                total_spaces_in_sample_dataset = sample_from_dataset.get_attribute(this_total_spaces_variable)
                legit_index = where(logical_and(sample_indicator, filter_indicator) * total_spaces_in_sample_dataset > 0)[0]
                if legit_index.size > 0:
                    mean_size = total_spaces_in_sample_dataset[legit_index].mean()
                    num_of_projects_to_sample = int( diff / mean_size )
                    ##sampled at least 1 project when diff > 0, otherwise it is a endless loop when num_of_projects_to_sample = 0
                    num_of_projects_to_sample = num_of_projects_to_sample if num_of_projects_to_sample > 0 else 1
                    while total_spaces_in_sample_dataset[this_sampled_index].sum() < diff:
                        lucky_index = sample_replace(legit_index, num_of_projects_to_sample)
                        this_sampled_index = concatenate((this_sampled_index, lucky_index))
                    this_sampled_index = this_sampled_index[0:(1+searchsorted(cumsum(total_spaces_in_sample_dataset[this_sampled_index]), diff))]
                    sampled_index = concatenate((sampled_index, this_sampled_index))
                else:
                    error_log += "There is nothing to sample from %s and no new development will happen for " % sample_from_dataset.get_dataset_name() + \
                              ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n'
            #if diff < 0: #TODO demolition; not yet supported
            
            ##log status
            action = "0"
            if this_sampled_index.size > 0:
                action_num = total_spaces_in_sample_dataset[this_sampled_index].sum()
                if diff > 0: action = "+" + str(action_num)
                if diff < 0: action = "-" + str(action_num)
            cat = [ str(criterion[col]) for col in column_names]
            cat += [str(actual_num), str(target_num), str(expected_num), str(diff), action]
            
            if PrettyTable is not None:
                status_log.add_row(cat)
            else:                
                logger.log_status("\t".join(cat))
            
        if PrettyTable is not None:
            logger.log_status("\n" + status_log.get_string())
        if error_log:
            logger.log_error(error_log)
        
        
        #logger.log_note("Updating attributes of %s sampled development events." % sampled_index.size)
        result_data = {}
        result_dataset = None
        index = array([], dtype='int32')
        if sampled_index.size > 0:
            ### ideally duplicate_rows() is all needed to add newly cloned rows
            ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data
            ##realestate_dataset.duplicate_rows(sampled_index)
            #result_data.setdefault(year_built, resize(year, sampled_index.size).astype('int32')) # Reset the year_built attribute. Uncommented because it is overwritten in the for loop afterwards.
            ## also add 'independent_variables' to the new dataset
            for attribute in set(sample_from_dataset.get_primary_attribute_names() + independent_variables):
                if reset_attribute_value.has_key(attribute):
                    result_data[attribute] = resize(array(reset_attribute_value[attribute]), sampled_index.size)
                else:
                    result_data[attribute] = sample_from_dataset.get_attribute_by_index(attribute, sampled_index)
            # Reset the year_built attribute.
            result_data['year_built'] = resize(year, sampled_index.size).astype('int32')
            # TODO: Uncomment the following three lines to reset land_area, tax_exempt, zgde. Test still to be done. parcel_id should be changed by location choice model.
            #result_data['land_area'] = resize(-1, sampled_index.size).astype('int32')
            #result_data['tax_exempt'] = resize(-1, sampled_index.size).astype('int32')
            #result_data['zgde'] = resize(-1, sampled_index.size).astype('int32')
            
            if id_name and result_data and id_name not in result_data:
                result_data[id_name] = arange(sampled_index.size, dtype='int32') + 1
            storage = StorageFactory().get_storage('dict_storage')
            storage.write_table(table_name=table_name, table_data=result_data)
            
            result_dataset = Dataset(id_name = id_name,
                                      in_storage = storage,
                                      in_table_name = table_name,
                                      dataset_name = dataset_name
                                      )
            index = arange(result_dataset.size())

        if append_to_realestate_dataset:
            if len(result_data) > 0:
                logger.start_block('Appending development events and living units')
                logger.log_note("Append %d sampled development events to real estate dataset." % len(result_data[result_data.keys()[0]]))
                index = realestate_dataset.add_elements(result_data, require_all_attributes=False,
                                                        change_ids_if_not_unique=True)
                logger.start_block('Creating id mapping')
                # remember the ids from the development_event_history dataset.
                mapping_new_old = self.get_mapping_of_old_ids_to_new_ids(result_data, realestate_dataset, index)
                logger.end_block()
                
                '''Getting living units associated to selected development events by iterating over the mapping dictionary and 
                selecting each time all the living units according to the old building ids.
                The living units are then added to selected_living_units_dict which is then
                added to living_units dataset. A dictionary is needed to use the add_elements method.
                Creating a dictionary also clones the records. The subset is only a view on the original table.'''
                selected_living_units_dict = {}
                counter = 0
                for new_id in mapping_new_old:
                    if counter == 0:
                        logger.log_note("Log assignment of every 100th development event")
                    counter +=1
                    if counter % 100 == 0:
                        logger.log_note("Assembling living units for development event %s" % new_id)
                    sel_index = [i for i in range(0, len(living_units_from_dataset['building_id'])) if living_units_from_dataset['building_id'][i] == mapping_new_old[new_id]]
                    living_units_this_sampled_building = DatasetSubset(living_units_from_dataset, sel_index) 
                    if len(selected_living_units_dict) == 0:
                        logger.start_block('Assign new building id')
                        for attribute_name in living_units_this_sampled_building.get_primary_attribute_names():
                            column = living_units_this_sampled_building.get_attribute(attribute_name)
                            if attribute_name == 'building_id':
                                new_ids = array(living_units_this_sampled_building.size() * [new_id], dtype=int32)
                                selected_living_units_dict.update({attribute_name: new_ids})
                            else:
                                selected_living_units_dict.update({attribute_name: column})
                        logger.end_block()
                    else:
                        this_living_units_dict ={}
                        for attribute_name in living_units_this_sampled_building.get_primary_attribute_names():
                            column = living_units_this_sampled_building.get_attribute(attribute_name)
                            if attribute_name == 'building_id':
                                new_ids = array(living_units_this_sampled_building.size() * [new_id], dtype=int32)
                                this_living_units_dict.update({attribute_name: new_ids})
                            else:
                                this_living_units_dict.update({attribute_name: column})
                        for attribute_name in living_units_this_sampled_building.get_primary_attribute_names():
                            selected_living_units_dict[attribute_name] = concatenate([selected_living_units_dict[attribute_name], this_living_units_dict[attribute_name]])
                # Reset year_built attribute of living units
                selected_living_units_dict['year_built'] = resize(year, len(selected_living_units_dict['year_built'])).astype('int32')
                # TODO: Uncomment the following two lines to reset rent_price, zgde. Test still to be done
                # selected_living_units_dict['rent_price'] = resize(-1, len(selected_living_units_dict['rent_price'])).astype('int32')
                # selected_living_units_dict['zgde'] = resize(-1, len(selected_living_units_dict['zgde'])).astype('int32')


                
                index_units = living_units_dataset.add_elements(selected_living_units_dict, require_all_attributes=False,
                                                        change_ids_if_not_unique=True)
                
                # Check consistency of buildings and living units. All living units must belong to a building
                if SimulationState().get_current_time() - SimulationState().get_start_time() == 1:
                    for building_id in living_units_dataset['building_id']:
                        if building_id not in realestate_dataset['building_id']:
                            logger.log_warning('Living unit with building_id %d has no corresponding building.' % (building_id))
                        # Uncomment next line to enforce consistency of living units and building dataset. Then you may uncomment the two previous lines.
#                        assert(building_id in realestate_dataset['building_id']), 'Living unit with building_id %d has no corresponding building.' % (building_id)

            result_dataset = realestate_dataset
        logger.end_block()

        # It is recommended to derive all variables of buildings in relation to living units via expression variables.
        # However, if the building dataset contains attributes derived from living units these attributes should be consistent
        # with the living units table. Below an example.
        # Residential_units attribute of each building should be consistent with the number of living units associated.
#        self.check_consistency_of_living_units_per_building(realestate_dataset, living_units_dataset, mapping_new_old)

        return (result_dataset, index)
    def run(self, realestate_dataset,
            year=None, 
            occupied_spaces_variable="occupied_units",
            total_spaces_variable="total_units",
            target_attribute_name='target_vacancy_rate',
            sample_from_dataset = None,
            sample_filter="",
            reset_attribute_value={}, 
            year_built = 'year_built',
            dataset_pool=None,
            append_to_realestate_dataset = False,
            table_name = "development_projects",
            dataset_name = "development_project",
            id_name = [],
            **kwargs):
        """         
        sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning
        append_to_realestate_dataset - whether to append the new dataset to realestate_dataset
        """
        
        if self.target_vancy_dataset is None:
            raise RuntimeError, "target_vacancy_rate dataset is unspecified."
        
        if not sample_from_dataset:
            sample_from_dataset = realestate_dataset
            
        #if dataset_pool is None:
        #    dataset_pool = SessionConfiguration().get_dataset_pool()
        if year is None:
            year = SimulationState().get_current_time()
        this_year_index = where(self.target_vancy_dataset.get_attribute('year')==year)[0]
        target_vacancy_for_this_year = DatasetSubset(self.target_vancy_dataset, this_year_index)
        
        column_names = list(set( self.target_vancy_dataset.get_known_attribute_names() ) - set( [ target_attribute_name, occupied_spaces_variable, total_spaces_variable, 'year', '_hidden_id_'] ))
        column_names.sort(reverse=True)
        column_values = dict([ (name, target_vacancy_for_this_year.get_attribute(name)) for name in column_names + [target_attribute_name]])
        
        independent_variables = list(set([re.sub('_max$', '', re.sub('_min$', '', col)) for col in column_names]))
        dataset_known_attributes = realestate_dataset.get_known_attribute_names()
        for variable in independent_variables:
            if variable not in dataset_known_attributes:
                realestate_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)
                sample_from_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)
                
        dataset_known_attributes = realestate_dataset.get_known_attribute_names() #update after compute
        if sample_filter:
            short_name = VariableName(sample_filter).get_alias()
            if short_name not in dataset_known_attributes:
                filter_indicator = sample_from_dataset.compute_variables(sample_filter, dataset_pool=dataset_pool)
            else:
                filter_indicator = sample_from_dataset.get_attribute(short_name)
        else:
            filter_indicator = 1
                
        sampled_index = array([], dtype=int32)

        #log header
        if PrettyTable is not None:
            status_log = PrettyTable()
            status_log.set_field_names(column_names + ["actual", "target", "difference", "action"])
        else:
            logger.log_status("\t".join(column_names + ["actual", "target", "difference", "action"]))
        error_log = ''
        for index in range(target_vacancy_for_this_year.size()):
            this_sampled_index = array([], dtype=int32)
            indicator = ones( realestate_dataset.size(), dtype='bool' )
            sample_indicator = ones( sample_from_dataset.size(), dtype='bool' )
            criterion = {}   # for logging
            for attribute in independent_variables:
                if attribute in dataset_known_attributes:
                    dataset_attribute = realestate_dataset.get_attribute(attribute)
                    sample_attribute = sample_from_dataset.get_attribute(attribute)
                else:
                    raise ValueError, "attribute %s used in target vacancy dataset can not be found in dataset %s" % (attribute, realestate_dataset.get_dataset_name())
                
                if attribute + '_min' in column_names:
                    amin = target_vacancy_for_this_year.get_attribute(attribute+'_min')[index] 
                    criterion.update({attribute + '_min':amin})
                    if amin != -1:
                        indicator *= dataset_attribute >= amin
                        sample_indicator *= sample_attribute >= amin
                if attribute + '_max' in column_names: 
                    amax = target_vacancy_for_this_year.get_attribute(attribute+'_max')[index]
                    criterion.update({attribute + '_max':amax}) 
                    if amax != -1:
                        indicator *= dataset_attribute <= amax
                        sample_indicator *= sample_attribute <= amax
                if attribute in column_names: 
                    aval = column_values[attribute][index] 
                    criterion.update({attribute:aval}) 
                    if aval == -1:
                        continue
                    elif aval == -2:  ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column
                        indicator *= logical_not(ismember(dataset_attribute, column_values[attribute]))
                        sample_indicator *= logical_not(ismember(sample_attribute, column_values[attribute]))
                    else:
                        indicator *= dataset_attribute == aval
                        sample_indicator *= sample_attribute == aval
                        
            this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable
            ## total/occupied_spaces_variable can be specified either as a universal name for all realestate 
            ## or in targe_vacancy_rate dataset for each vacancy category
            if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_occupied_spaces_variable = target_vacancy_for_this_year.get_attribute(occupied_spaces_variable)[index]

            if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_total_spaces_variable = target_vacancy_for_this_year.get_attribute(total_spaces_variable)[index]
            
            logger.be_quiet() #temporarily disable logging
            realestate_dataset.compute_one_variable_with_unknown_package(this_occupied_spaces_variable, dataset_pool=dataset_pool)
            realestate_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool)
            sample_from_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool)
            logger.talk()
            
            actual_num = (indicator * realestate_dataset.get_attribute(this_total_spaces_variable)).sum()
            target_num = int(round( (indicator * realestate_dataset.get_attribute(this_occupied_spaces_variable)).sum() /\
                                    (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]) 
                            ))
            diff = target_num - actual_num
            if diff > 0:
                total_spaces_in_sample_dataset = sample_from_dataset.get_attribute(this_total_spaces_variable)
                legit_index = where(logical_and(sample_indicator, filter_indicator) * total_spaces_in_sample_dataset > 0)[0]
                if legit_index.size > 0:
                    mean_size = total_spaces_in_sample_dataset[legit_index].mean()
                    num_of_projects_to_sample = int( diff / mean_size )
                    while total_spaces_in_sample_dataset[this_sampled_index].sum() < diff:
                        lucky_index = sample_replace(legit_index, num_of_projects_to_sample)
                        this_sampled_index = concatenate((this_sampled_index, lucky_index))
                    this_sampled_index = this_sampled_index[0:(1+searchsorted(cumsum(total_spaces_in_sample_dataset[this_sampled_index]), diff))]
                    sampled_index = concatenate((sampled_index, this_sampled_index))
                else:
                    error_log += "There is nothing to sample from %s and no new development will happen for " % sample_from_dataset.get_dataset_name() + \
                              ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n'
            #if diff < 0: #TODO demolition; not yet supported
            
            ##log status
            action = "0"
            if this_sampled_index.size > 0:
                action_num = total_spaces_in_sample_dataset[this_sampled_index].sum()
                if diff > 0: action = "+" + str(action_num)
                if diff < 0: action = "-" + str(action_num)
            cat = [ str(criterion[col]) for col in column_names]
            cat += [str(actual_num), str(target_num), str(diff), action]
            
            if PrettyTable is not None:
                status_log.add_row(cat)
            else:                
                logger.log_status("\t".join(cat))
            
        if PrettyTable is not None:
            logger.log_status("\n" + status_log.get_string())
        if error_log:
            logger.log_error(error_log)
            
        result_data = {}
        result_dataset = None
        index = array([], dtype='int32')
        if sampled_index.size > 0:
            ### ideally duplicate_rows() is all needed to add newly cloned rows
            ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data
            ##realestate_dataset.duplicate_rows(sampled_index)
            result_data.setdefault(year_built, resize(year, sampled_index.size).astype('int32'))
            for attribute in sample_from_dataset.get_primary_attribute_names():
                if reset_attribute_value.has_key(attribute):
                    result_data[attribute] = resize(array(reset_attribute_value[attribute]), sampled_index.size)
                else:
                    result_data[attribute] = sample_from_dataset.get_attribute_by_index(attribute, sampled_index)
        
            storage = StorageFactory().get_storage('dict_storage')
            storage.write_table(table_name=table_name, table_data=result_data)
    
            result_dataset = Dataset(id_name = id_name,
                                      in_storage = storage,
                                      in_table_name = table_name,
                                      dataset_name = dataset_name
                                      )
            index = arange(result_dataset.size())
            
        if append_to_realestate_dataset:
            if len(result_data) > 0:
                index = realestate_dataset.add_elements(result_data, require_all_attributes=False,
                                                        change_ids_if_not_unique=True)                
            result_dataset = realestate_dataset
        
        return (result_dataset, index)
Example #22
0
    def run(self, realestate_dataset,
            year=None, 
            occupied_spaces_variable="occupied_units",
            total_spaces_variable="total_units",
            target_attribute_name='target_vacancy_rate',
            sample_from_dataset = None,
            sample_filter="",
            reset_attribute_value={}, 
            year_built = 'year_built',
            dataset_pool=None,
            append_to_realestate_dataset = False,
            table_name = "development_projects",
            dataset_name = "development_project",
            id_name = 'development_project_id',
            **kwargs):
        """         
        sample_filter attribute/variable indicates which records in the dataset are eligible in the sampling for removal or cloning
        append_to_realestate_dataset - whether to append the new dataset to realestate_dataset
        """
        
        if self.target_vancy_dataset is None:
            raise RuntimeError, "target_vacancy_rate dataset is unspecified."
        
        if not sample_from_dataset:
            sample_from_dataset = realestate_dataset
            
        #if dataset_pool is None:
        #    dataset_pool = SessionConfiguration().get_dataset_pool()
        alldata = dataset_pool.get_dataset('alldata')
        unit_names = dataset_pool.get_dataset('building_type').get_attribute('unit_name')
        sqft_per_job = dataset_pool.get_dataset('building_sqft_per_job')
        zones = realestate_dataset.compute_variables("building.disaggregate(parcel.zone_id)")
        type_ids = realestate_dataset.get_attribute("building_type_id")
        building_sqft_per_job_table = sqft_per_job.get_building_sqft_as_table(zones.max(), type_ids.max())
        if year is None:
            year = SimulationState().get_current_time()
        this_year_index = where(self.target_vancy_dataset.get_attribute('year')==year)[0]
        target_vacancy_for_this_year = DatasetSubset(self.target_vancy_dataset, this_year_index)
        
        column_names = list(set( self.target_vancy_dataset.get_known_attribute_names() ) - set( [ target_attribute_name, occupied_spaces_variable, total_spaces_variable, 'year', '_hidden_id_'] ))
        column_names.sort(reverse=True)
        column_values = dict([ (name, target_vacancy_for_this_year.get_attribute(name)) for name in column_names + [target_attribute_name]])
        
        independent_variables = list(set([re.sub('_max$', '', re.sub('_min$', '', col)) for col in column_names]))
        dataset_known_attributes = realestate_dataset.get_known_attribute_names()
        sample_dataset_known_attributes = sample_from_dataset.get_known_attribute_names()
        for variable in independent_variables:
            if variable not in dataset_known_attributes:
                realestate_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)
            if variable not in sample_dataset_known_attributes:
                sample_from_dataset.compute_one_variable_with_unknown_package(variable, dataset_pool=dataset_pool)
                
        dataset_known_attributes = realestate_dataset.get_known_attribute_names() #update after compute
        if sample_filter:
            short_name = VariableName(sample_filter).get_alias()
            if short_name not in dataset_known_attributes:
                filter_indicator = sample_from_dataset.compute_variables(sample_filter, dataset_pool=dataset_pool)
            else:
                filter_indicator = sample_from_dataset.get_attribute(short_name)
        else:
            filter_indicator = 1
                
        sampled_index = array([], dtype=int32)

        #log header
        if PrettyTable is not None:
            status_log = PrettyTable()
            status_log.set_field_names(column_names + ["actual", "target", "difference", "action"])
        else:
            logger.log_status("\t".join(column_names + ["actual", "target", "difference", "action"]))
        error_log = ''
        for index in range(target_vacancy_for_this_year.size()):
            this_sampled_index = array([], dtype=int32)
            indicator = ones( realestate_dataset.size(), dtype='bool' )
            sample_indicator = ones( sample_from_dataset.size(), dtype='bool' )
            criterion = {}   # for logging
            for attribute in independent_variables:
                if attribute in dataset_known_attributes:
                    dataset_attribute = realestate_dataset.get_attribute(attribute)
                    sample_attribute = sample_from_dataset.get_attribute(attribute)
                else:
                    raise ValueError, "attribute %s used in target vacancy dataset can not be found in dataset %s" % (attribute, realestate_dataset.get_dataset_name())
                
                if attribute + '_min' in column_names:
                    amin = target_vacancy_for_this_year.get_attribute(attribute+'_min')[index] 
                    criterion.update({attribute + '_min':amin})
                    if amin != -1:
                        indicator *= dataset_attribute >= amin
                        sample_indicator *= sample_attribute >= amin
                if attribute + '_max' in column_names: 
                    amax = target_vacancy_for_this_year.get_attribute(attribute+'_max')[index]
                    criterion.update({attribute + '_max':amax}) 
                    if amax != -1:
                        indicator *= dataset_attribute <= amax
                        sample_indicator *= sample_attribute <= amax
                if attribute in column_names: 
                    aval = column_values[attribute][index] 
                    criterion.update({attribute:aval}) 
                    if aval == -1:
                        continue
                    elif aval == -2:  ##treat -2 in control totals column as complement set, i.e. all other values not already specified in this column
                        indicator *= logical_not(ismember(dataset_attribute, column_values[attribute]))
                        sample_indicator *= logical_not(ismember(sample_attribute, column_values[attribute]))
                    else:
                        indicator *= dataset_attribute == aval
                        sample_indicator *= sample_attribute == aval
                        
            this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable
            ## total/occupied_spaces_variable can be specified either as a universal name for all realestate 
            ## or in targe_vacancy_rate dataset for each vacancy category
            if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_occupied_spaces_variable = target_vacancy_for_this_year.get_attribute(occupied_spaces_variable)[index]

            if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_total_spaces_variable = target_vacancy_for_this_year.get_attribute(total_spaces_variable)[index]
            
            logger.be_quiet() #temporarily disable logging
            realestate_dataset.compute_one_variable_with_unknown_package(this_occupied_spaces_variable, dataset_pool=dataset_pool)
            realestate_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool)
            sample_from_dataset.compute_one_variable_with_unknown_package(this_total_spaces_variable, dataset_pool=dataset_pool)
            if unit_names[index]=="residential_units":
                num_units = alldata.compute_variables("alldata.aggregate_all(household.building_type_id==%s)" % (index+1))
                #persons = household_set.compute_variables("%s.number_of_agents(%s)" % (hh_ds_name, person_ds_name), resources=resources)
                num_units = num_units[0]
            else:
                num_units = alldata.compute_variables("alldata.aggregate_all(job.disaggregate(employment_submarket.building_type_id)==%s)" % (index+1))
                num_units = num_units * building_sqft_per_job_table[1, (index+1)]
                num_units = num_units[0]
            #need to make sure that job empsubmarket doesn't rely on building...
            #Must do non-home-based jobs only and then multiply by building_sqft
            logger.talk()
            
            actual_num = (indicator * realestate_dataset.get_attribute(this_total_spaces_variable)).sum()
            #target_num = int(round( (indicator * realestate_dataset.get_attribute(this_occupied_spaces_variable)).sum() /\
            target_num = int(round( num_units /\
                                    (1 - target_vacancy_for_this_year.get_attribute(target_attribute_name)[index]) 
                            ))
            diff = target_num - actual_num
            if diff > 0:
                total_spaces_in_sample_dataset = sample_from_dataset.get_attribute(this_total_spaces_variable)
                legit_index = where(logical_and(sample_indicator, filter_indicator) * total_spaces_in_sample_dataset > 0)[0]
                if legit_index.size > 0:
                    mean_size = total_spaces_in_sample_dataset[legit_index].mean()
                    num_of_projects_to_sample = int( diff / mean_size )
                    ##sampled at least 1 project when diff > 0, otherwise it is a endless loop when num_of_projects_to_sample = 0
                    num_of_projects_to_sample = num_of_projects_to_sample if num_of_projects_to_sample > 0 else 1
                    while total_spaces_in_sample_dataset[this_sampled_index].sum() < diff:
                        lucky_index = sample_replace(legit_index, num_of_projects_to_sample)
                        this_sampled_index = concatenate((this_sampled_index, lucky_index))
                    this_sampled_index = this_sampled_index[0:(1+searchsorted(cumsum(total_spaces_in_sample_dataset[this_sampled_index]), diff))]
                    sampled_index = concatenate((sampled_index, this_sampled_index))
                else:
                    error_log += "There is nothing to sample from %s and no new development will happen for " % sample_from_dataset.get_dataset_name() + \
                              ','.join([col+"="+str(criterion[col]) for col in column_names]) + '\n'
            #if diff < 0: #TODO demolition; not yet supported
            
            ##log status
            action = "0"
            if this_sampled_index.size > 0:
                action_num = total_spaces_in_sample_dataset[this_sampled_index].sum()
                if diff > 0: action = "+" + str(action_num)
                if diff < 0: action = "-" + str(action_num)
            cat = [ str(criterion[col]) for col in column_names]
            cat += [str(actual_num), str(target_num), str(diff), action]
            
            if PrettyTable is not None:
                status_log.add_row(cat)
            else:                
                logger.log_status("\t".join(cat))
            
        if PrettyTable is not None:
            logger.log_status("\n" + status_log.get_string())
        if error_log:
            logger.log_error(error_log)
            
        result_data = {}
        result_dataset = None
        index = array([], dtype='int32')
        if sampled_index.size > 0:
            ### ideally duplicate_rows() is all needed to add newly cloned rows
            ### to be more cautious, copy the data to be cloned, remove elements, then append the cloned data
            ##realestate_dataset.duplicate_rows(sampled_index)
            result_data.setdefault(year_built, resize(year, sampled_index.size).astype('int32'))
            ## also add 'independent_variables' to the new dataset
            for attribute in set(sample_from_dataset.get_primary_attribute_names() + independent_variables):
                if reset_attribute_value.has_key(attribute):
                    result_data[attribute] = resize(array(reset_attribute_value[attribute]), sampled_index.size)
                else:
                    result_data[attribute] = sample_from_dataset.get_attribute_by_index(attribute, sampled_index)
        
            if id_name and result_data and id_name not in result_data:
                result_data[id_name] = arange(sampled_index.size, dtype='int32') + 1
        
            storage = StorageFactory().get_storage('dict_storage')
            storage.write_table(table_name=table_name, table_data=result_data)
            
            result_dataset = Dataset(id_name = id_name,
                                      in_storage = storage,
                                      in_table_name = table_name,
                                      dataset_name = dataset_name
                                      )
            index = arange(result_dataset.size())
        
            
        if append_to_realestate_dataset:
            if len(result_data) > 0:
                index = realestate_dataset.add_elements(result_data, require_all_attributes=False,
                                                        change_ids_if_not_unique=True)                
            result_dataset = realestate_dataset
        
        return (result_dataset, index)
class Test(opus_unittest.OpusTestCase):
    
    def setUp(self):
        storage = StorageFactory().get_storage('dict_storage')
    
        storage.write_table(table_name='households',
            table_data={
                'household_id': arange(10)+1,
                'grid_id': arange(-1, 9, 1)+1,
                'lucky':array([1,0,1, 0,1,1, 1,1,0, 0])
                }
            )
    
        storage.write_table(table_name='gridcells',
            table_data={
                'grid_id': arange(15)+1,
                'filter':array([0,1,1, 1,1,1, 1,1,1, 0,1,0, 1,1,1]),
                'weight':array([0.1,9,15, 2,5,1, 6,2.1,.3, 4,3,1, 10,8,7])
                }
            )
    
        #create households
        self.households = Dataset(in_storage=storage, in_table_name='households', id_name="household_id", dataset_name="household")
    
        # create gridcells
        self.gridcells = Dataset(in_storage=storage, in_table_name='gridcells', id_name="grid_id", dataset_name="gridcell")


    def test_1d_weight_array(self):
        """"""
        sample_size = 5
        # check the individual gridcells
        # This is a stochastic model, so it may legitimately fail occassionally.
        index1 = where(self.households.get_attribute("lucky"))[0]
        index2 = where(self.gridcells.get_attribute("filter"))[0]
        weight=self.gridcells.get_attribute("weight")
        for icc in [0,1]: #include_chosen_choice?
            #icc = sample([0,1],1)
            sampler_ret = weighted_sampler().run(dataset1=self.households, dataset2=self.gridcells, index1=index1,
                            index2=index2, sample_size=sample_size, weight="weight",include_chosen_choice=icc)
            # get results
            sampled_index = sampler_ret.get_2d_index()
            chosen_choices = UNPLACED_ID * ones(index1.size, dtype="int32") 
            where_chosen = where(sampler_ret.get_attribute("chosen_choice"))
            chosen_choices[where_chosen[0]]=where_chosen[1]

            sample_results = sampled_index, chosen_choices
            sampled_index = sample_results[0]
            self.assertEqual(sampled_index.shape, (index1.size, sample_size))
            if icc:
                placed_agents_index = self.gridcells.try_get_id_index(
                                        self.households.get_attribute("grid_id")[index1],UNPLACED_ID)
                chosen_choice_index = resize(array([UNPLACED_ID], dtype="int32"), index1.shape)
                w = where(chosen_choices>=0)[0]
                # for 64 bit machines, need to coerce the type to int32 -- on a
                # 32 bit machine the astype(int32) doesn't do anything
                chosen_choice_index[w] = sampled_index[w, chosen_choices[w]].astype(int32)
                self.assert_( alltrue(equal(placed_agents_index, chosen_choice_index)) )
                sampled_index = sampled_index[:,1:]
            
            self.assert_( alltrue(lookup(sampled_index.ravel(), index2, index_if_not_found=UNPLACED_ID)!=UNPLACED_ID) )
            self.assert_( all(not_equal(weight[sampled_index], 0.0)) )

    def test_2d_weight_array(self):
        #2d weight
        sample_size = 5
        n = self.households.size()
        index1 = where(self.households.get_attribute("lucky"))[0]
        index2 = where(self.gridcells.get_attribute("filter"))[0]
        lucky = self.households.get_attribute("lucky")
        weight = repeat(self.gridcells.get_attribute("weight")[newaxis, :], n, axis=0)
        for i in range(n):
            weight[i,:] += lucky[i]

        for icc in [0,1]:
            sampler_ret = weighted_sampler().run(dataset1=self.households, dataset2=self.gridcells, index1=index1,
                            index2=index2, sample_size=sample_size, weight=weight,include_chosen_choice=icc)

            sampled_index = sampler_ret.get_2d_index()
            chosen_choices = UNPLACED_ID * ones(index1.size, dtype="int32") 
            where_chosen = where(sampler_ret.get_attribute("chosen_choice"))
            chosen_choices[where_chosen[0]]=where_chosen[1]

            self.assertEqual(sampled_index.shape, (index1.size, sample_size))

            if icc:
                placed_agents_index = self.gridcells.try_get_id_index(
                                        self.households.get_attribute("grid_id")[index1],UNPLACED_ID)

                chosen_choice_index = resize(array([UNPLACED_ID], dtype="int32"), index1.shape)
                w = where(chosen_choices>=0)[0]
                chosen_choice_index[w] = sampled_index[w, chosen_choices[w]].astype(int32)
                self.assert_( alltrue(equal(placed_agents_index, chosen_choice_index)) )
                sampled_index = sampled_index[:,1:]
                
            self.assert_( alltrue(lookup(sampled_index.ravel(), index2, index_if_not_found=UNPLACED_ID)!=UNPLACED_ID) )

            for j in range(sample_size):
                self.assert_( all(not_equal(weight[j, sampled_index[j,:]], 0.0)) )
    def prepare_for_estimate(self, specification_dict = None, specification_storage=None,
                              specification_table=None, agent_set=None, 
                              agents_for_estimation_storage=None,
                              agents_for_estimation_table=None, join_datasets=False,
                              index_to_unplace=None, portion_to_unplace=1.0,
                              compute_lambda=False, grouping_location_set=None,
                              movers_variable=None, movers_index=None,
                              filter=None, location_id_variable=None,
                              data_objects={}):
        """Put 'location_id_variable' always in, if the location id is to be computed on the estimation set,
        i.e. if it is not a primary attribute of the estimation set. Set 'index_to_unplace' to None, if 'compute_lambda' is True.
        In such a case, the annual supply is estimated without unplacing agents. 'grouping_location_set', 'movers_variable' and
        'movers_index' must be given, if 'compute_lambda' is True.
        """
        from opus_core.model import get_specification_for_estimation
        from urbansim.functions import compute_supply_and_add_to_location_set
        specification = get_specification_for_estimation(specification_dict,
                                                          specification_storage,
                                                          specification_table)
        if (agent_set is not None) and (index_to_unplace is not None):
            if self.location_id_string is not None:
                agent_set.compute_variables(self.location_id_string, resources=Resources(data_objects))
            if portion_to_unplace < 1:
                unplace_size = int(portion_to_unplace*index_to_unplace.size)
                end_index_to_unplace = sample_noreplace(index_to_unplace, unplace_size)
            else:
                end_index_to_unplace = index_to_unplace
            logger.log_status("Unplace " + str(end_index_to_unplace.size) + " agents.")
            agent_set.modify_attribute(self.choice_set.get_id_name()[0],
                                        resize(array([-1]), end_index_to_unplace.size), end_index_to_unplace)
        if compute_lambda:
            movers = zeros(agent_set.size(), dtype="bool8")
            if movers_index is not None:
                movers[movers_index] = 1
            agent_set.add_primary_attribute(movers, "potential_movers")
            self.estimate_config["weights_for_estimation_string"] = self.estimate_config["weights_for_estimation_string"]+"_from_lambda"
            compute_supply_and_add_to_location_set(self.choice_set, grouping_location_set,
                                                   self.run_config["number_of_units_string"],
                                                   self.run_config["capacity_string"],
                                                   movers_variable,
                                                   self.estimate_config["weights_for_estimation_string"],
                                                   resources=Resources(data_objects))

        # create agents for estimation
        if (agents_for_estimation_storage is not None) and (agents_for_estimation_table is not None):
            estimation_set = Dataset(in_storage = agents_for_estimation_storage,
                                      in_table_name=agents_for_estimation_table,
                                      id_name=agent_set.get_id_name(), dataset_name=agent_set.get_dataset_name())
            if location_id_variable is not None:
                estimation_set.compute_variables(location_id_variable, resources=Resources(data_objects))
                # needs to be a primary attribute because of the join method below
                estimation_set.add_primary_attribute(estimation_set.get_attribute(location_id_variable), VariableName(location_id_variable).get_alias())
            if filter:
                values = estimation_set.compute_variables(filter, resources=Resources(data_objects))
                index = where(values > 0)[0]
                estimation_set.subset_by_index(index, flush_attributes_if_not_loaded=False)

            if join_datasets:
                agent_set.join_by_rows(estimation_set, require_all_attributes=False,
                                    change_ids_if_not_unique=True)
                index = arange(agent_set.size()-estimation_set.size(),agent_set.size())
            else:
                index = agent_set.get_id_index(estimation_set.get_id_attribute())
        else:
            if agent_set is not None:
                if filter is not None:
                    values = agent_set.compute_variables(filter, resources=Resources(data_objects))
                    index = where(values > 0)[0]
                else:
                    index = arange(agent_set.size())
            else:
                index = None
        return (specification, index)
class Test(opus_unittest.OpusTestCase):
    def setUp(self):
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='households',
                            table_data={
                                'household_id': arange(10) + 1,
                                'grid_id': arange(-1, 9, 1) + 1,
                                'lucky': array([1, 0, 1, 0, 1, 1, 1, 1, 0, 0])
                            })

        storage.write_table(
            table_name='gridcells',
            table_data={
                'grid_id':
                arange(15) + 1,
                'filter':
                array([0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1]),
                'weight':
                array([0.1, 9, 15, 2, 5, 1, 6, 2.1, .3, 4, 3, 1, 10, 8, 7])
            })

        #create households
        self.households = Dataset(in_storage=storage,
                                  in_table_name='households',
                                  id_name="household_id",
                                  dataset_name="household")

        # create gridcells
        self.gridcells = Dataset(in_storage=storage,
                                 in_table_name='gridcells',
                                 id_name="grid_id",
                                 dataset_name="gridcell")

    def test_1d_weight_array(self):
        """"""
        sample_size = 5
        # check the individual gridcells
        # This is a stochastic model, so it may legitimately fail occassionally.
        index1 = where(self.households.get_attribute("lucky"))[0]
        index2 = where(self.gridcells.get_attribute("filter"))[0]
        weight = self.gridcells.get_attribute("weight")
        for icc in [0, 1]:  #include_chosen_choice?
            #icc = sample([0,1],1)
            sampler_ret = weighted_sampler().run(dataset1=self.households,
                                                 dataset2=self.gridcells,
                                                 index1=index1,
                                                 index2=index2,
                                                 sample_size=sample_size,
                                                 weight="weight",
                                                 include_chosen_choice=icc)
            # get results
            sampled_index = sampler_ret.get_2d_index()
            chosen_choices = UNPLACED_ID * ones(index1.size, dtype=DTYPE)
            where_chosen = where(sampler_ret.get_attribute("chosen_choice"))
            chosen_choices[where_chosen[0]] = where_chosen[1]

            sample_results = sampled_index, chosen_choices
            sampled_index = sample_results[0]
            self.assertEqual(sampled_index.shape, (index1.size, sample_size))
            if icc:
                placed_agents_index = self.gridcells.try_get_id_index(
                    self.households.get_attribute("grid_id")[index1],
                    UNPLACED_ID)
                chosen_choice_index = resize(array([UNPLACED_ID], dtype=DTYPE),
                                             index1.shape)
                w = where(chosen_choices >= 0)[0]
                # for 64 bit machines, need to coerce the type to int32 -- on a
                # 32 bit machine the astype(int32) doesn't do anything
                chosen_choice_index[w] = sampled_index[w, chosen_choices[w]]
                self.assert_(
                    alltrue(equal(placed_agents_index, chosen_choice_index)))
                sampled_index = sampled_index[:, 1:]

            self.assert_(
                alltrue(
                    lookup(sampled_index.ravel(),
                           index2,
                           index_if_not_found=UNPLACED_ID) != UNPLACED_ID))
            self.assert_(all(not_equal(weight[sampled_index], 0.0)))

    def test_2d_weight_array(self):
        #2d weight
        sample_size = 5
        n = self.households.size()
        index1 = where(self.households.get_attribute("lucky"))[0]
        index2 = where(self.gridcells.get_attribute("filter"))[0]
        lucky = self.households.get_attribute("lucky")
        weight = repeat(self.gridcells.get_attribute("weight")[newaxis, :],
                        n,
                        axis=0)
        for i in range(n):
            weight[i, :] += lucky[i]

        for icc in [0, 1]:
            sampler_ret = weighted_sampler().run(dataset1=self.households,
                                                 dataset2=self.gridcells,
                                                 index1=index1,
                                                 index2=index2,
                                                 sample_size=sample_size,
                                                 weight=weight,
                                                 include_chosen_choice=icc)

            sampled_index = sampler_ret.get_2d_index()
            chosen_choices = UNPLACED_ID * ones(index1.size, dtype=DTYPE)
            where_chosen = where(sampler_ret.get_attribute("chosen_choice"))
            chosen_choices[where_chosen[0]] = where_chosen[1]

            self.assertEqual(sampled_index.shape, (index1.size, sample_size))

            if icc:
                placed_agents_index = self.gridcells.try_get_id_index(
                    self.households.get_attribute("grid_id")[index1],
                    UNPLACED_ID)

                chosen_choice_index = resize(array([UNPLACED_ID], dtype=DTYPE),
                                             index1.shape)
                w = where(chosen_choices >= 0)[0]
                chosen_choice_index[w] = sampled_index[w, chosen_choices[w]]
                self.assert_(
                    alltrue(equal(placed_agents_index, chosen_choice_index)))
                sampled_index = sampled_index[:, 1:]

            self.assert_(
                alltrue(
                    lookup(sampled_index.ravel(),
                           index2,
                           index_if_not_found=UNPLACED_ID) != UNPLACED_ID))

            for j in range(sample_size):
                self.assert_(
                    all(not_equal(weight[j, sampled_index[j, :]], 0.0)))
Example #26
0
# Working with datasets
import os
import urbansim
us_path = urbansim.__path__[0]
from opus_core.storage_factory import StorageFactory
storage = StorageFactory().get_storage('tab_storage',
    storage_location = os.path.join(us_path, "data/tutorial"))

from opus_core.datasets.dataset import Dataset
households = Dataset(in_storage = storage,
                         in_table_name = 'households', 
                         id_name='household_id',
                         dataset_name='household')
households.get_attribute_names()
households.get_id_attribute()
households.size()
households.get_attribute("income")
households.get_attribute_names()
households.load_dataset()
households.get_attribute_names()
#households.plot_histogram("income", bins = 10)
#households.r_histogram("income")
#households.r_scatter("persons", "income")
households.correlation_coefficient("persons", "income")
households.correlation_matrix(["persons", "income"])
households.summary()
households.add_primary_attribute(data=[4,6,9,2,4,8,2,1,3,2], name="location")
households.get_attribute_names()
households.modify_attribute(name="location", data=[0,0], index=[0,1])
households.get_attribute("location")
households.get_data_element_by_id(5).location
class Test(opus_unittest.OpusTestCase):

    def setUp(self):
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='households',
            table_data={
                'household_id': arange(10)+1,
              #  'household_id':array([1, 2, 3, 4, 5, 6, 7, 8]),
              #  'income'      :array([1, 3, 2, 1, 3, 8, 5, 4]),
              # #'category_id' :array([1, 2, 2, 1, 2, 3, 3, 2]),
              #  'building_id' :array([1, 2, 4, 3, 3, 2, 4, 2]),
              ##'large_area_id':array([1, 1, 2, 3, 3, 1, 2, 1]),
              #  
                'grid_id': arange(-1, 9, 1)+1,
                'lucky':array([1,0,1, 0,1,1, 1,1,0, 0])
                }
            )

        storage.write_table(table_name='gridcells',
            table_data={
                #'building_id':    array([1, 2, 3, 4]),
                #'large_area_id':  array([1, 1, 3, 2]),

                'grid_id': arange(15)+1,
                'filter':array([0,1,1, 1,1,1, 1,1,1, 0,1,0, 1,1,1]),
                'weight':array([0.1,9,15, 2,5,1, 6,2.1,.3, 4,3,1, 10,8,7])
                }
            )
        dataset_pool = SessionConfiguration(in_storage=storage).get_dataset_pool()

        #create households
        self.households = Dataset(in_storage=storage, in_table_name='households', id_name="household_id", dataset_name="household")

        # create gridcells
        self.gridcells = Dataset(in_storage=storage, in_table_name='gridcells', id_name="grid_id", dataset_name="gridcell")
        dataset_pool.replace_dataset('household', self.households)
        dataset_pool.replace_dataset('gridcell', self.gridcells)

    def test_1(self):
        """"""
        sample_size = 5
        # check the individual gridcells
        # This is a stochastic model, so it may legitimately fail occassionally.
        index1 = where(self.households.get_attribute("lucky"))[0]
        #index2 = where(self.gridcells.get_attribute("filter"))[0]
        weight=self.gridcells.get_attribute("weight")
        estimation_config = {"agent_category_definition":["household.lucky"],
                             "choice_category_definition":["gridcell.filter+1"]
                            }
        for icc in [0,1]: #include_chosen_choice?
            #icc = sample([0,1],1)
            sampler_ret = weighted_sampler_by_category().run(dataset1=self.households, dataset2=self.gridcells, index1=index1,
                                                                sample_size=sample_size,
                                                                include_chosen_choice=icc, resources=estimation_config)
            # get results
            sampled_index = sampler_ret.get_2d_index()
            chosen_choices = UNPLACED_ID * ones(index1.size, dtype="int32") 
            where_chosen = where(sampler_ret.get_attribute("chosen_choice"))
            chosen_choices[where_chosen[0]]=where_chosen[1]

            self.assertEqual(sampled_index.shape, (index1.size, sample_size))
            if icc:
                placed_agents_index = self.gridcells.try_get_id_index(
                                        self.households.get_attribute("grid_id")[index1],UNPLACED_ID)
                chosen_choice_index = resize(array([UNPLACED_ID], dtype="int32"), index1.shape)
                w = where(chosen_choices>=0)[0]
                # for 64 bit machines, need to coerce the type to int32 -- on a
                # 32 bit machine the astype(int32) doesn't do anything
                chosen_choice_index[w] = sampled_index[w, chosen_choices[w]].astype(int32)
                self.assert_( alltrue(equal(placed_agents_index, chosen_choice_index)) )
                sampled_index = sampled_index[:,1:]
                
            self.assert_( alltrue(lookup(sampled_index.ravel(), arange(self.gridcells.size()), index_if_not_found=UNPLACED_ID)!=UNPLACED_ID) )
            self.assert_( all(not_equal(weight[sampled_index], 0.0)) )