def prepare_for_estimate(self, specification_dict = None, 
                             specification_storage=None, 
                             specification_table=None,
                             agent_set=None, 
                             agents_for_estimation_storage=None,
                             agents_for_estimation_table=None,
                             filter_for_estimation_set=None,
                             data_objects=None):
        specification = get_specification_for_estimation(specification_dict, 
                                                         specification_storage, 
                                                         specification_table)
        if self.filter is not None:
            agents_index = where( self.proposal_set.compute_variables(self.filter) )[0]        
        
        id_attribute_name = ['parcel_id', 'template_id', 'is_redevelopment']
        if agents_for_estimation_storage is not None:
            estimation_set = Dataset(in_storage = agents_for_estimation_storage, 
                                     in_table_name=agents_for_estimation_table,
                                     id_name=id_attribute_name, 
                                     dataset_name=agent_set.get_dataset_name())
            
            filter_index = arange(estimation_set.size())
            if filter_for_estimation_set:
                filter_index = where(estimation_set.compute_variables(filter_for_estimation_set, resources=Resources(data_objects)))[0]
                estimation_set.subset_by_index(filter_index, flush_attributes_if_not_loaded=False)
                
            id_attributes = None
            for attr_name in id_attribute_name:
                attr_value = agent_set.get_attribute_as_column(attr_name)
                if id_attributes == None:
                    id_attributes = attr_value
                else:
                    id_attributes = concatenate((id_attributes, attr_value), axis=1)
                    
            id_index = estimation_set.try_get_id_index(id_attributes, return_value_if_not_found=-1)

            status_id = 2 * ones(agent_set.size(), dtype="int8")
            status_id[where(id_index != -1)] = 1
            name = self.choice_attribute_name.get_alias()
            if name in agent_set.get_known_attribute_names():
                agent_set.set_values_of_one_attribute(name, status_id[where(id_index != -1)], where(id_index!=-1)[0])
            else:
                agent_set.add_primary_attribute(status_id, name)
            
        return (specification, agents_index)
Example #2
0
class Test(opus_unittest.OpusTestCase):
    def setUp(self):
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(
            table_name='households',
            table_data={
                'household_id': arange(10) + 1,
                #  'household_id':array([1, 2, 3, 4, 5, 6, 7, 8]),
                #  'income'      :array([1, 3, 2, 1, 3, 8, 5, 4]),
                # #'category_id' :array([1, 2, 2, 1, 2, 3, 3, 2]),
                #  'building_id' :array([1, 2, 4, 3, 3, 2, 4, 2]),
                ##'large_area_id':array([1, 1, 2, 3, 3, 1, 2, 1]),
                #
                'grid_id': arange(-1, 9, 1) + 1,
                'lucky': array([1, 0, 1, 0, 1, 1, 1, 1, 0, 0])
            })

        storage.write_table(
            table_name='gridcells',
            table_data={
                #'building_id':    array([1, 2, 3, 4]),
                #'large_area_id':  array([1, 1, 3, 2]),
                'grid_id':
                arange(15) + 1,
                'filter':
                array([0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1]),
                'weight':
                array([0.1, 9, 15, 2, 5, 1, 6, 2.1, .3, 4, 3, 1, 10, 8, 7])
            })
        dataset_pool = SessionConfiguration(
            in_storage=storage).get_dataset_pool()

        #create households
        self.households = Dataset(in_storage=storage,
                                  in_table_name='households',
                                  id_name="household_id",
                                  dataset_name="household")

        # create gridcells
        self.gridcells = Dataset(in_storage=storage,
                                 in_table_name='gridcells',
                                 id_name="grid_id",
                                 dataset_name="gridcell")
        dataset_pool.replace_dataset('household', self.households)
        dataset_pool.replace_dataset('gridcell', self.gridcells)

    def test_1(self):
        """"""
        sample_size = 5
        # check the individual gridcells
        # This is a stochastic model, so it may legitimately fail occassionally.
        index1 = where(self.households.get_attribute("lucky"))[0]
        #index2 = where(self.gridcells.get_attribute("filter"))[0]
        weight = self.gridcells.get_attribute("weight")
        estimation_config = {
            "agent_category_definition": ["household.lucky"],
            "choice_category_definition": ["gridcell.filter+1"]
        }
        for icc in [0, 1]:  #include_chosen_choice?
            #icc = sample([0,1],1)
            sampler_ret = weighted_sampler_by_category().run(
                dataset1=self.households,
                dataset2=self.gridcells,
                index1=index1,
                sample_size=sample_size,
                include_chosen_choice=icc,
                resources=estimation_config)
            # get results
            sampled_index = sampler_ret.get_2d_index()
            chosen_choices = UNPLACED_ID * ones(index1.size, dtype=DTYPE)
            where_chosen = where(sampler_ret.get_attribute("chosen_choice"))
            chosen_choices[where_chosen[0]] = where_chosen[1]

            self.assertEqual(sampled_index.shape, (index1.size, sample_size))
            if icc:
                placed_agents_index = self.gridcells.try_get_id_index(
                    self.households.get_attribute("grid_id")[index1],
                    UNPLACED_ID)
                chosen_choice_index = resize(array([UNPLACED_ID], dtype=DTYPE),
                                             index1.shape)
                w = where(chosen_choices >= 0)[0]
                # for 64 bit machines, need to coerce the type to int32 -- on a
                # 32 bit machine the astype(int32) doesn't do anything
                chosen_choice_index[w] = sampled_index[
                    w, chosen_choices[w]].astype(int32)
                self.assert_(
                    alltrue(equal(placed_agents_index, chosen_choice_index)))
                sampled_index = sampled_index[:, 1:]

            self.assert_(
                alltrue(
                    lookup(sampled_index.ravel(),
                           arange(self.gridcells.size()),
                           index_if_not_found=UNPLACED_ID) != UNPLACED_ID))
            self.assert_(all(not_equal(weight[sampled_index], 0.0)))
class Test(opus_unittest.OpusTestCase):
    def setUp(self):
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='households',
                            table_data={
                                'household_id': arange(10) + 1,
                                'grid_id': arange(-1, 9, 1) + 1,
                                'lucky': array([1, 0, 1, 0, 1, 1, 1, 1, 0, 0])
                            })

        storage.write_table(
            table_name='gridcells',
            table_data={
                'grid_id':
                arange(15) + 1,
                'filter':
                array([0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1]),
                'weight':
                array([0.1, 9, 15, 2, 5, 1, 6, 2.1, .3, 4, 3, 1, 10, 8, 7])
            })

        #create households
        self.households = Dataset(in_storage=storage,
                                  in_table_name='households',
                                  id_name="household_id",
                                  dataset_name="household")

        # create gridcells
        self.gridcells = Dataset(in_storage=storage,
                                 in_table_name='gridcells',
                                 id_name="grid_id",
                                 dataset_name="gridcell")

    def test_1d_weight_array(self):
        """"""
        sample_size = 5
        # check the individual gridcells
        # This is a stochastic model, so it may legitimately fail occassionally.
        index1 = where(self.households.get_attribute("lucky"))[0]
        index2 = where(self.gridcells.get_attribute("filter"))[0]
        weight = self.gridcells.get_attribute("weight")
        for icc in [0, 1]:  #include_chosen_choice?
            #icc = sample([0,1],1)
            sampler_ret = weighted_sampler().run(dataset1=self.households,
                                                 dataset2=self.gridcells,
                                                 index1=index1,
                                                 index2=index2,
                                                 sample_size=sample_size,
                                                 weight="weight",
                                                 include_chosen_choice=icc)
            # get results
            sampled_index = sampler_ret.get_2d_index()
            chosen_choices = UNPLACED_ID * ones(index1.size, dtype=DTYPE)
            where_chosen = where(sampler_ret.get_attribute("chosen_choice"))
            chosen_choices[where_chosen[0]] = where_chosen[1]

            sample_results = sampled_index, chosen_choices
            sampled_index = sample_results[0]
            self.assertEqual(sampled_index.shape, (index1.size, sample_size))
            if icc:
                placed_agents_index = self.gridcells.try_get_id_index(
                    self.households.get_attribute("grid_id")[index1],
                    UNPLACED_ID)
                chosen_choice_index = resize(array([UNPLACED_ID], dtype=DTYPE),
                                             index1.shape)
                w = where(chosen_choices >= 0)[0]
                # for 64 bit machines, need to coerce the type to int32 -- on a
                # 32 bit machine the astype(int32) doesn't do anything
                chosen_choice_index[w] = sampled_index[w, chosen_choices[w]]
                self.assert_(
                    alltrue(equal(placed_agents_index, chosen_choice_index)))
                sampled_index = sampled_index[:, 1:]

            self.assert_(
                alltrue(
                    lookup(sampled_index.ravel(),
                           index2,
                           index_if_not_found=UNPLACED_ID) != UNPLACED_ID))
            self.assert_(all(not_equal(weight[sampled_index], 0.0)))

    def test_2d_weight_array(self):
        #2d weight
        sample_size = 5
        n = self.households.size()
        index1 = where(self.households.get_attribute("lucky"))[0]
        index2 = where(self.gridcells.get_attribute("filter"))[0]
        lucky = self.households.get_attribute("lucky")
        weight = repeat(self.gridcells.get_attribute("weight")[newaxis, :],
                        n,
                        axis=0)
        for i in range(n):
            weight[i, :] += lucky[i]

        for icc in [0, 1]:
            sampler_ret = weighted_sampler().run(dataset1=self.households,
                                                 dataset2=self.gridcells,
                                                 index1=index1,
                                                 index2=index2,
                                                 sample_size=sample_size,
                                                 weight=weight,
                                                 include_chosen_choice=icc)

            sampled_index = sampler_ret.get_2d_index()
            chosen_choices = UNPLACED_ID * ones(index1.size, dtype=DTYPE)
            where_chosen = where(sampler_ret.get_attribute("chosen_choice"))
            chosen_choices[where_chosen[0]] = where_chosen[1]

            self.assertEqual(sampled_index.shape, (index1.size, sample_size))

            if icc:
                placed_agents_index = self.gridcells.try_get_id_index(
                    self.households.get_attribute("grid_id")[index1],
                    UNPLACED_ID)

                chosen_choice_index = resize(array([UNPLACED_ID], dtype=DTYPE),
                                             index1.shape)
                w = where(chosen_choices >= 0)[0]
                chosen_choice_index[w] = sampled_index[w, chosen_choices[w]]
                self.assert_(
                    alltrue(equal(placed_agents_index, chosen_choice_index)))
                sampled_index = sampled_index[:, 1:]

            self.assert_(
                alltrue(
                    lookup(sampled_index.ravel(),
                           index2,
                           index_if_not_found=UNPLACED_ID) != UNPLACED_ID))

            for j in range(sample_size):
                self.assert_(
                    all(not_equal(weight[j, sampled_index[j, :]], 0.0)))
class Test(opus_unittest.OpusTestCase):

    def setUp(self):
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='households',
            table_data={
                'household_id': arange(10)+1,
              #  'household_id':array([1, 2, 3, 4, 5, 6, 7, 8]),
              #  'income'      :array([1, 3, 2, 1, 3, 8, 5, 4]),
              # #'category_id' :array([1, 2, 2, 1, 2, 3, 3, 2]),
              #  'building_id' :array([1, 2, 4, 3, 3, 2, 4, 2]),
              ##'large_area_id':array([1, 1, 2, 3, 3, 1, 2, 1]),
              #  
                'grid_id': arange(-1, 9, 1)+1,
                'lucky':array([1,0,1, 0,1,1, 1,1,0, 0])
                }
            )

        storage.write_table(table_name='gridcells',
            table_data={
                #'building_id':    array([1, 2, 3, 4]),
                #'large_area_id':  array([1, 1, 3, 2]),

                'grid_id': arange(15)+1,
                'filter':array([0,1,1, 1,1,1, 1,1,1, 0,1,0, 1,1,1]),
                'weight':array([0.1,9,15, 2,5,1, 6,2.1,.3, 4,3,1, 10,8,7])
                }
            )
        dataset_pool = SessionConfiguration(in_storage=storage).get_dataset_pool()

        #create households
        self.households = Dataset(in_storage=storage, in_table_name='households', id_name="household_id", dataset_name="household")

        # create gridcells
        self.gridcells = Dataset(in_storage=storage, in_table_name='gridcells', id_name="grid_id", dataset_name="gridcell")
        dataset_pool.replace_dataset('household', self.households)
        dataset_pool.replace_dataset('gridcell', self.gridcells)

    def test_1(self):
        """"""
        sample_size = 5
        # check the individual gridcells
        # This is a stochastic model, so it may legitimately fail occassionally.
        index1 = where(self.households.get_attribute("lucky"))[0]
        #index2 = where(self.gridcells.get_attribute("filter"))[0]
        weight=self.gridcells.get_attribute("weight")
        estimation_config = {"agent_category_definition":["household.lucky"],
                             "choice_category_definition":["gridcell.filter+1"]
                            }
        for icc in [0,1]: #include_chosen_choice?
            #icc = sample([0,1],1)
            sampler_ret = weighted_sampler_by_category().run(dataset1=self.households, dataset2=self.gridcells, index1=index1,
                                                                sample_size=sample_size,
                                                                include_chosen_choice=icc, resources=estimation_config)
            # get results
            sampled_index = sampler_ret.get_2d_index()
            chosen_choices = UNPLACED_ID * ones(index1.size, dtype="int32") 
            where_chosen = where(sampler_ret.get_attribute("chosen_choice"))
            chosen_choices[where_chosen[0]]=where_chosen[1]

            self.assertEqual(sampled_index.shape, (index1.size, sample_size))
            if icc:
                placed_agents_index = self.gridcells.try_get_id_index(
                                        self.households.get_attribute("grid_id")[index1],UNPLACED_ID)
                chosen_choice_index = resize(array([UNPLACED_ID], dtype="int32"), index1.shape)
                w = where(chosen_choices>=0)[0]
                # for 64 bit machines, need to coerce the type to int32 -- on a
                # 32 bit machine the astype(int32) doesn't do anything
                chosen_choice_index[w] = sampled_index[w, chosen_choices[w]].astype(int32)
                self.assert_( alltrue(equal(placed_agents_index, chosen_choice_index)) )
                sampled_index = sampled_index[:,1:]
                
            self.assert_( alltrue(lookup(sampled_index.ravel(), arange(self.gridcells.size()), index_if_not_found=UNPLACED_ID)!=UNPLACED_ID) )
            self.assert_( all(not_equal(weight[sampled_index], 0.0)) )
class Test(opus_unittest.OpusTestCase):
    
    def setUp(self):
        storage = StorageFactory().get_storage('dict_storage')
    
        storage.write_table(table_name='households',
            table_data={
                'household_id': arange(10)+1,
                'grid_id': arange(-1, 9, 1)+1,
                'lucky':array([1,0,1, 0,1,1, 1,1,0, 0])
                }
            )
    
        storage.write_table(table_name='gridcells',
            table_data={
                'grid_id': arange(15)+1,
                'filter':array([0,1,1, 1,1,1, 1,1,1, 0,1,0, 1,1,1]),
                'weight':array([0.1,9,15, 2,5,1, 6,2.1,.3, 4,3,1, 10,8,7])
                }
            )
    
        #create households
        self.households = Dataset(in_storage=storage, in_table_name='households', id_name="household_id", dataset_name="household")
    
        # create gridcells
        self.gridcells = Dataset(in_storage=storage, in_table_name='gridcells', id_name="grid_id", dataset_name="gridcell")


    def test_1d_weight_array(self):
        """"""
        sample_size = 5
        # check the individual gridcells
        # This is a stochastic model, so it may legitimately fail occassionally.
        index1 = where(self.households.get_attribute("lucky"))[0]
        index2 = where(self.gridcells.get_attribute("filter"))[0]
        weight=self.gridcells.get_attribute("weight")
        for icc in [0,1]: #include_chosen_choice?
            #icc = sample([0,1],1)
            sampler_ret = weighted_sampler().run(dataset1=self.households, dataset2=self.gridcells, index1=index1,
                            index2=index2, sample_size=sample_size, weight="weight",include_chosen_choice=icc)
            # get results
            sampled_index = sampler_ret.get_2d_index()
            chosen_choices = UNPLACED_ID * ones(index1.size, dtype="int32") 
            where_chosen = where(sampler_ret.get_attribute("chosen_choice"))
            chosen_choices[where_chosen[0]]=where_chosen[1]

            sample_results = sampled_index, chosen_choices
            sampled_index = sample_results[0]
            self.assertEqual(sampled_index.shape, (index1.size, sample_size))
            if icc:
                placed_agents_index = self.gridcells.try_get_id_index(
                                        self.households.get_attribute("grid_id")[index1],UNPLACED_ID)
                chosen_choice_index = resize(array([UNPLACED_ID], dtype="int32"), index1.shape)
                w = where(chosen_choices>=0)[0]
                # for 64 bit machines, need to coerce the type to int32 -- on a
                # 32 bit machine the astype(int32) doesn't do anything
                chosen_choice_index[w] = sampled_index[w, chosen_choices[w]].astype(int32)
                self.assert_( alltrue(equal(placed_agents_index, chosen_choice_index)) )
                sampled_index = sampled_index[:,1:]
            
            self.assert_( alltrue(lookup(sampled_index.ravel(), index2, index_if_not_found=UNPLACED_ID)!=UNPLACED_ID) )
            self.assert_( all(not_equal(weight[sampled_index], 0.0)) )

    def test_2d_weight_array(self):
        #2d weight
        sample_size = 5
        n = self.households.size()
        index1 = where(self.households.get_attribute("lucky"))[0]
        index2 = where(self.gridcells.get_attribute("filter"))[0]
        lucky = self.households.get_attribute("lucky")
        weight = repeat(self.gridcells.get_attribute("weight")[newaxis, :], n, axis=0)
        for i in range(n):
            weight[i,:] += lucky[i]

        for icc in [0,1]:
            sampler_ret = weighted_sampler().run(dataset1=self.households, dataset2=self.gridcells, index1=index1,
                            index2=index2, sample_size=sample_size, weight=weight,include_chosen_choice=icc)

            sampled_index = sampler_ret.get_2d_index()
            chosen_choices = UNPLACED_ID * ones(index1.size, dtype="int32") 
            where_chosen = where(sampler_ret.get_attribute("chosen_choice"))
            chosen_choices[where_chosen[0]]=where_chosen[1]

            self.assertEqual(sampled_index.shape, (index1.size, sample_size))

            if icc:
                placed_agents_index = self.gridcells.try_get_id_index(
                                        self.households.get_attribute("grid_id")[index1],UNPLACED_ID)

                chosen_choice_index = resize(array([UNPLACED_ID], dtype="int32"), index1.shape)
                w = where(chosen_choices>=0)[0]
                chosen_choice_index[w] = sampled_index[w, chosen_choices[w]].astype(int32)
                self.assert_( alltrue(equal(placed_agents_index, chosen_choice_index)) )
                sampled_index = sampled_index[:,1:]
                
            self.assert_( alltrue(lookup(sampled_index.ravel(), index2, index_if_not_found=UNPLACED_ID)!=UNPLACED_ID) )

            for j in range(sample_size):
                self.assert_( all(not_equal(weight[j, sampled_index[j,:]], 0.0)) )