Example #1
0
            def run_model():
                storage.write_table(table_name = 'households', table_data = household_data)
                households = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='households')

                model = HouseholdTransitionModel()
                model.run(year=2000, household_set=households, control_totals=hct_set, characteristics=hc_set)
                income = households.get_attribute("income")
                age = households.get_attribute("age_of_head")
                idx1 = where(income <= 1000)[0]
                idx2 = where(logical_and(income <= 5000, income > 1000))[0]
                idx3 = where(logical_and(income <= 10000, income > 5000))[0]
                idx4 = where(income > 10000)[0]
                results = array([age[idx1].mean(), age[idx2].mean(), age[idx3].mean(), age[idx4].mean()])
                print results
                return results
 def prepare_for_run(self, storage, **kwargs):
     from urbansim.datasets.control_total_dataset import ControlTotalDataset
     from urbansim.datasets.household_characteristic_dataset import HouseholdCharacteristicDataset
     from urbansim.models.employment_transition_model import sample_control_totals
     control_totals = ControlTotalDataset(in_storage=storage,
                                          what="household")
     characteristics = HouseholdCharacteristicDataset(in_storage=storage)
     sample_control_totals(storage, control_totals, **kwargs)
     return (control_totals, characteristics)
Example #3
0
    def test_controlling_with_one_marginal_characteristic(self):
        """Using the age_of_head as a marginal characteristic, which would partition the 8 groups into two larger groups
        (those with age_of_head < 40 and >= 40), ensure that the control totals are met and that the distribution within
        each large group is the same before and after running the model
        """

        #IMPORTANT: marginal characteristics grouping indices have to start at 0!
        #i.e. below, there is one marg. char. "age_of_head". here we indicate that the first "large group" (groups 1-4),
        #consisting of those groups with age_of_head < 40 should total 25000 households after running this model for one year,
        #and the second large group, those groups with age_of_head > 40, should total 15000 households
        annual_household_control_totals_data = {
            "year": array([2000, 2000]),
            "age_of_head": array([0,1]),
            "total_number_of_households": array([25000, 15000])
            }

        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='hh_set', table_data=self.households_data)
        hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set')

        storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data)
        hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what='household', id_name=['year' ,'age_of_head'])

        storage.write_table(table_name='hc_set', table_data=self.household_characteristics_for_ht_data)
        hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set')

        storage.write_table(table_name='prs_set', table_data=self.person_data)
        prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set')
        
        model = HouseholdTransitionModel()
        model.run(year=2000, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)

        #check that there are indeed 40000 total households after running the model
        results = hh_set.size()
        should_be = [40000]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        #check that the total number of households within first four groups increased by 10000
        #and that the total number of households within last four groups decreased by 3000
        results = self.get_count_all_groups(hh_set)
        should_be = [25000, 15000]
        self.assertEqual(ma.allclose([sum(results[0:4]), sum(results[4:8])], should_be, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        #check that the distribution of households within groups 1-4 and 5-8 are the same before and after
        #running the model, respectively

        should_be = [6000.0/15000.0*25000.0, 2000.0/15000.0*25000.0, 3000.0/15000.0*25000.0, 4000.0/15000.0*25000.0,
                     2000.0/18000.0*15000.0, 5000.0/18000.0*15000.0, 3000.0/18000.0*15000.0, 8000.0/18000.0*15000.0]
        self.assertEqual(ma.allclose(results, should_be, rtol=0.05),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))
Example #4
0
    def test_controlling_with_three_marginal_characteristics(self):
        """Controlling with all three possible marginal characteristics in this example, age_of_head, income, and persons,
        this would partition the 8 groups into the same 8 groups, and with a control total specified for each group, we must
        ensure that the control totals for each group exactly meet the specifications.
        """

        #IMPORTANT: marginal characteristics grouping indices have to start at 0!
        annual_household_control_totals_data = {
            "year": array(8*[2000]),
            "age_of_head": array(4*[0] + 4*[1]),
            "income": array(2*[0] + 2*[1] + 2*[0] + 2*[1]),
            "persons": array([0,1,0,1,0,1,0,1]),
            "total_number_of_households": array([4000, 5000, 1000, 3000, 0, 6000, 3000, 8000])
            }
        #size of columns was not even, removed last element of min and max
        household_characteristics_for_ht_data = {
            "characteristic": array(2*['age_of_head'] + 2*['income'] + 2*['persons']),
            "min": array([0, 50, 0, 40000, 0, 3]),
            "max": array([49, 100, 39999, -1, 2, -1]) 
            }
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='hh_set', table_data=self.households_data)
        hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set')

        storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data)
        hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what='household', id_name=['year' ,'age_of_head', 'income', 'persons'])

        storage.write_table(table_name='hc_set', table_data=household_characteristics_for_ht_data)
        hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set')

        # unplace some households
        where10 = where(hh_set.get_attribute("building_id")<>10)[0]
        hh_set.modify_attribute(name="building_id", data=zeros(where10.size), index=where10)

        storage.write_table(table_name='prs_set', table_data=self.person_data)
        prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set')
        
        model = HouseholdTransitionModel()
        model.run(year=2000, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)

        #check that there are indeed 33000 total households after running the model
        results = hh_set.size()
        should_be = [30000]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        #check that the number of households in each group exactly match the control totals specified
        results = self.get_count_all_groups(hh_set)
        should_be = [4000, 5000, 1000, 3000, 0, 6000, 3000, 8000]
        self.assertEqual(ma.allclose(results, should_be),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))
Example #5
0
    def test_same_distribution_after_household_addition(self):
        """Using the control_totals and no marginal characteristics,
        add households and ensure that the distribution within each group stays the same
        """

        annual_household_control_totals_data = {
            "year": array([2000]),
            "total_number_of_households": array([50000])
            }

        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='hh_set', table_data=self.households_data)
        hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set')

        storage.write_table(table_name='prs_set', table_data=self.person_data)
        prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set')
        
        storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data)
        hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what="household", id_name="year")

        storage.write_table(table_name='hc_set', table_data=self.household_characteristics_for_ht_data)
        hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set')

        model = HouseholdTransitionModel()
        model.run(year=2000, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)

        #check that there are indeed 50000 total households after running the model
        results = hh_set.size()
        should_be = [50000]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        #check that the number of unplaced households is exactly the number of new households created
        results = where(hh_set.get_attribute("building_id")<=0)[0].size
        should_be = [17000]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        #check that the distribution of households in each group is the same as before running the model
        results = self.get_count_all_groups(hh_set)
        should_be = array([6000.0/33000.0*50000.0, 2000.0/33000.0*50000.0, 3000.0/33000.0*50000.0, 4000.0/33000.0*50000.0,
                     2000.0/33000.0*50000.0, 5000.0/33000.0*50000.0, 3000.0/33000.0*50000.0, 8000.0/33000.0*50000.0])
        self.assertEqual(ma.allclose(results, should_be, rtol=0.05),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))
        # check the types of the attributes
        self.assertEqual(hh_set.get_attribute("age_of_head").dtype, int32,
                         "Error in data type of the new household set. Should be: int32, is: %s" % str(hh_set.get_attribute("age_of_head").dtype))
        self.assertEqual(hh_set.get_attribute("income").dtype, int32,
                         "Error in data type of the new household set. Should be: int32, is: %s" % str(hh_set.get_attribute("income").dtype))
        self.assertEqual(hh_set.get_attribute("persons").dtype, int8,
                         "Error in data type of the new household set. Should be: int8, is: %s" % str(hh_set.get_attribute("persons").dtype))
    def test_same_distribution_after_household_subtraction(self):
        """Using the control_totals and no marginal characteristics,
        subtract households and ensure that the distribution within each group stays the same
        """
        annual_household_control_totals_data = {
            "year": array([2000]),
            "total_number_of_households": array([20000])
        }

        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='hh_set',
                            table_data=self.households_data)
        hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set')

        storage.write_table(table_name='hct_set',
                            table_data=annual_household_control_totals_data)
        hct_set = ControlTotalDataset(in_storage=storage,
                                      in_table_name='hct_set',
                                      what="household",
                                      id_name="year")

        storage.write_table(
            table_name='hc_set',
            table_data=self.household_characteristics_for_ht_data)
        hc_set = HouseholdCharacteristicDataset(in_storage=storage,
                                                in_table_name='hc_set')

        model = HouseholdTransitionModel()
        model.run(year=2000,
                  household_set=hh_set,
                  control_totals=hct_set,
                  characteristics=hc_set)

        #check that there are indeed 20000 total households after running the model
        results = hh_set.size()
        should_be = [20000]
        self.assertEqual(
            ma.allclose(should_be, results, rtol=1e-1), True,
            "Error, should_be: %s, but result: %s" % (should_be, results))

        #check that the distribution of households in each group is the same as before running the model
        results = self.get_count_all_groups(hh_set)
        should_be = [
            6000.0 / 33000.0 * 20000.0, 2000.0 / 33000.0 * 20000.0,
            3000.0 / 33000.0 * 20000.0, 4000.0 / 33000.0 * 20000.0,
            2000.0 / 33000.0 * 20000.0, 5000.0 / 33000.0 * 20000.0,
            3000.0 / 33000.0 * 20000.0, 8000.0 / 33000.0 * 20000.0
        ]
        self.assertEqual(
            ma.allclose(results, should_be, rtol=0.05), True,
            "Error, should_be: %s,\n but result: %s" % (should_be, results))
            def run_model():
                storage.write_table(table_name='households',
                                    table_data=household_data)
                households = HouseholdCharacteristicDataset(
                    in_storage=storage, in_table_name='households')

                model = HouseholdTransitionModel()
                model.run(year=2000,
                          household_set=households,
                          control_totals=hct_set,
                          characteristics=hc_set)
                income = households.get_attribute("income")
                age = households.get_attribute("age_of_head")
                idx1 = where(income <= 1000)[0]
                idx2 = where(logical_and(income <= 5000, income > 1000))[0]
                idx3 = where(logical_and(income <= 10000, income > 5000))[0]
                idx4 = where(income > 10000)[0]
                results = array([
                    age[idx1].mean(), age[idx2].mean(), age[idx3].mean(),
                    age[idx4].mean()
                ])
                print results
                return results
    def test_same_distribution_after_household_subtraction(self):
        """Using the control_totals and no marginal characteristics,
        subtract households and ensure that the distribution within each group stays the same
        """
        annual_household_control_totals_data = {
            "year": array([2000, 2000]),
            "total_number_of_households": array([8000, 12000]),
             "faz_id": array([1,2])
            }

        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name = 'hh_set', table_data = self.households_data)
        hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set')

        storage.write_table(table_name = 'hct_set', table_data = annual_household_control_totals_data)
        hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what="household")

        storage.write_table(table_name = 'hc_set', table_data = self.household_characteristics_for_ht_data)
        hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set')
#        storage.write_table(table_name='prs_set', table_data=self.person_data)
#        prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set')
        model = SubareaHouseholdTransitionModel(subarea_id_name="faz_id")
#        model.run(year=2000, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)
        model.run(year=2000, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)

        #check that there are indeed 8000 (area 1) and 12000 (area 2) total households after running the model
        areas = hh_set.get_attribute("faz_id")
        results = array([0,0])
        for iarea in [0,1]:
            results[iarea] = where(areas == [1,2][iarea])[0].size
        should_be = [8000, 12000]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        #check that the distribution of households in each group is the same as before running the model
        results = self.get_count_all_groups(hh_set)
        should_be = array([# area 1 
                     3000.0/16500.0*8000.0, 1000.0/16500.0*8000.0, 1500.0/16500.0*8000.0, 2000.0/16500.0*8000.0,
                     1000.0/16500.0*8000.0, 2500.0/16500.0*8000.0, 1500.0/16500.0*8000.0, 4000.0/16500.0*8000.0,
                     # area 2
                     3000.0/16500.0*12000.0, 1000.0/16500.0*12000.0, 1500.0/16500.0*12000.0, 2000.0/16500.0*12000.0,
                     1000.0/16500.0*12000.0, 2500.0/16500.0*12000.0, 1500.0/16500.0*12000.0, 4000.0/16500.0*12000.0])
        self.assertEqual(ma.allclose(results, should_be, rtol=0.1),
                         True, "Error, should_be: %s,\n but result: %s" % (should_be, results))
Example #9
0
    def test_person_dataset(self):
        households_data = {
            "household_id":arange(4)+1,
            "building_id": array([3,6,1,2], dtype=int32),
            "persons": array([1,2,2,4], dtype=int32)
            }
        household_characteristics_for_ht_data = {
            "characteristic": array(2*['persons']),
            "min": array([1, 3]),
            "max": array([2,-1])
            }
        person_data = {
            "person_id": arange(9)+1,
            "household_id": array([1,2,2,3,3,4,4,4,4]),
            "job_id": array([30, 50, 0, 1, 23, 54, 78, 2, 6]),
                           }
        annual_household_control_totals_data = {
            "year": array(2*[2000]),
            "persons": array([0,1]),
            "total_number_of_households": array([0, 4])
            }
        
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='hh_set', table_data=households_data)
        hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set')

        storage.write_table(table_name='prs_set', table_data=person_data)
        prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set')
        
        storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data)
        hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what="household", id_name=["year", "persons"])

        storage.write_table(table_name='hc_set', table_data=household_characteristics_for_ht_data)
        hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set')

        model = HouseholdTransitionModel(debuglevel=3)
        model.run(year=2000, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)
        # The run should remove the first three households and first 5 persons and add 3 copies of the last household, i.e. 12 persons
        self.assertEqual(prs_set.size(), 16, "Error in size of the person_set. Should be 16, is %s." % prs_set.size())
        self.assertEqual(ma.allequal(prs_set.get_attribute('household_id'), array([4,4,4,4,5,5,5,5,6,6,6,6,7,7,7,7])), True,
                                    "Error in assigning household_id to new persons.")
    def test_controlling_with_one_marginal_characteristic(self):
        """Using the age_of_head as a marginal characteristic, which would partition the 8 groups into two larger groups
        (those with age_of_head < 40 and >= 40), ensure that the control totals are met and that the distribution within
        each large group is the same before and after running the model
        """

        #IMPORTANT: marginal characteristics grouping indices have to start at 0!
        #i.e. below, there is one marg. char. "age_of_head". here we indicate that the first "large group" (groups 1-4),
        #consisting of those groups with age_of_head < 40 should total 25000 households after running this model for one year,
        #and the second large group, those groups with age_of_head > 40, should total 15000 households
        annual_household_control_totals_data = {
            "year": array([2000, 2000, 2000, 2000]),
            "age_of_head": array([0, 1, 0, 1]),
            "total_number_of_households": array([20000, 10000, 5000, 5000]),
            "faz_id": array([1, 1, 2, 2])
        }

        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='hh_set',
                            table_data=self.households_data)
        hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set')

        storage.write_table(table_name='hct_set',
                            table_data=annual_household_control_totals_data)
        hct_set = ControlTotalDataset(in_storage=storage,
                                      in_table_name='hct_set',
                                      what='household')

        storage.write_table(table_name='prs_set', table_data=self.person_data)
        prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set')

        storage.write_table(
            table_name='hc_set',
            table_data=self.household_characteristics_for_ht_data)
        hc_set = HouseholdCharacteristicDataset(in_storage=storage,
                                                in_table_name='hc_set')

        model = SubareaHouseholdTransitionModel(subarea_id_name="faz_id")
        model.run(year=2000,
                  person_set=prs_set,
                  household_set=hh_set,
                  control_totals=hct_set,
                  characteristics=hc_set)

        #check that there are indeed 40000 total households after running the model
        areas = hh_set.get_attribute("faz_id")
        results = array([0, 0])
        for iarea in [0, 1]:
            results[iarea] = where(areas == [1, 2][iarea])[0].size
        should_be = [30000, 10000]
        self.assertEqual(
            ma.allclose(should_be, results, rtol=1e-1), True,
            "Error, should_be: %s, but result: %s" % (should_be, results))

        #check that the number of households within the groups correspond to the control totals
        results = self.get_count_all_groups(hh_set)
        should_be = [20000, 10000, 5000, 5000]
        idx1 = arange(0, 4)
        idx2 = arange(4, 8)
        idx3 = arange(8, 12)
        idx4 = arange(12, 16)
        self.assertEqual(
            ma.allclose([
                results[idx1].sum(), results[idx2].sum(), results[idx3].sum(),
                results[idx4].sum()
            ],
                        should_be,
                        rtol=1e-1), True,
            "Error, should_be: %s, but result: %s" %
            (should_be,
             array([
                 results[idx1].sum(), results[idx2].sum(), results[idx3].sum(),
                 results[idx4].sum()
             ])))

        #check that the distribution of households within the groups are the same before and after
        #running the model, respectively

        should_be = [  # area 1 
            3000.0 / 7500.0 * 20000.0,
            1000.0 / 7500.0 * 20000.0,
            1500.0 / 7500.0 * 20000.0,
            2000.0 / 7500.0 * 20000.0,
            1000.0 / 9000.0 * 10000.0,
            2500.0 / 9000.0 * 10000.0,
            1500.0 / 9000.0 * 10000.0,
            4000.0 / 9000.0 * 10000.0,
            # area 2
            3000.0 / 7500.0 * 5000.0,
            1000.0 / 7500.0 * 5000.0,
            1500.0 / 7500.0 * 5000.0,
            2000.0 / 7500.0 * 5000.0,
            1000.0 / 9000.0 * 5000.0,
            2500.0 / 9000.0 * 5000.0,
            1500.0 / 9000.0 * 5000.0,
            4000.0 / 9000.0 * 5000.0
        ]
        self.assertEqual(
            ma.allclose(results, should_be, rtol=0.1), True,
            "Error, should_be: %s, but result: %s" % (should_be, results))
Example #11
0
    def test_controlling_age_of_head(self):
        """ Controls for one marginal characteristics, namely age_of_head.
        """
        annual_household_control_totals_data = {
            "year": array([2000, 2000, 2000, 2001, 2001, 2001, 2002, 2002, 2002]),
            "age_of_head": array([0,1,2,0,1,2, 0,1,2]),
            "total_number_of_households": array([25013, 21513, 18227,  # 2000
                                                 10055, 15003, 17999, # 2001
                                                 15678, 14001, 20432]) # 2002
            }

        household_characteristics_for_ht_data = {
            "characteristic": array(3*['age_of_head']),
            "min": array([0, 35, 65]),
            "max": array([34, 64, -1])
            }

        households_data = {
            "household_id":arange(15000)+1,
            "building_id": array(15000*[1]),
            "age_of_head": array(1000*[25] + 1000*[28] + 2000*[32] + 1000*[34] +
                            2000*[35] + 1000*[40] + 1000*[54]+ 1000*[62] +
                            1000*[65] + 1000*[68] + 2000*[71] + 1000*[98]),
            "persons": array(1000*[2] + 2000*[3] + 1000*[1] + 1000*[6] + 1000*[1] + 1000*[4] +
                                3000*[1]+ 5000*[5], dtype=int8)
            }
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='hh_set', table_data=households_data)
        hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set')

        storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data)
        hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what='household',
                                      id_name=['year' ,'age_of_head'])

        storage.write_table(table_name='hc_set', table_data=household_characteristics_for_ht_data)
        hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set')

        storage.write_table(table_name='prs_set', table_data=self.person_data)
        prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set')
        
        model = HouseholdTransitionModel(debuglevel=3)
        # this run should add households in all four categories
        model.run(year=2000, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)

        results = hh_set.size()
        should_be = [(hct_set.get_attribute("total_number_of_households")[0:3]).sum()]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        results = zeros(hc_set.size(), dtype=int32)
        results[0] = where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[0], 1,0).sum()
        for i in range(1, hc_set.size()-1):
            results[i] = logical_and(where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[i], 1,0),
                                 where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[i], 1,0)).sum()
        results[hc_set.size()-1] = where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[hc_set.size()-1], 1,0).sum()
        should_be = hct_set.get_attribute("total_number_of_households")[0:3]
        self.assertEqual(ma.allclose(results, should_be, rtol=1e-6),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        # this run should remove households in all four categories
        model.run(year=2001, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)
        results = hh_set.size()
        should_be = [(hct_set.get_attribute("total_number_of_households")[3:6]).sum()]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        results = zeros(hc_set.size(), dtype=int32)
        results[0] = where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[0], 1,0).sum()
        for i in range(1, hc_set.size()-1):
            results[i] = logical_and(where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[i], 1,0),
                                 where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[i], 1,0)).sum()
        results[hc_set.size()-1] = where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[hc_set.size()-1], 1,0).sum()
        should_be = hct_set.get_attribute("total_number_of_households")[3:6]
        self.assertEqual(ma.allclose(results, should_be, rtol=1e-6),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        # this run should add and remove households
        model.run(year=2002, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)
        results = hh_set.size()
        should_be = [(hct_set.get_attribute("total_number_of_households")[6:9]).sum()]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        results = zeros(hc_set.size(), dtype=int32)
        results[0] = where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[0], 1,0).sum()
        for i in range(1, hc_set.size()-1):
            results[i] = logical_and(where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[i], 1,0),
                                 where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[i], 1,0)).sum()
        results[hc_set.size()-1] = where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[hc_set.size()-1], 1,0).sum()
        should_be = hct_set.get_attribute("total_number_of_households")[6:9]
        self.assertEqual(ma.allclose(results, should_be, rtol=1e-6),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))
Example #12
0
    def test_controlling_income(self):
        """ Controls for one marginal characteristics, namely income.
        """
        annual_household_control_totals_data = {
            "year": array([2000, 2000, 2000, 2000, 2001, 2001, 2001, 2001, 2002, 2002, 2002, 2002]),
            "income": array([0,1,2,3,0,1,2,3, 0,1,2,3]),
            "total_number_of_households": array([25013, 21513, 18227, 18493, # 2000
                                                 10055, 15003, 17999, 17654, # 2001
                                                 15678, 14001, 20432, 14500]) # 2002
            }

        household_characteristics_for_ht_data = {
            "characteristic": array(4*['income']),
            "min": array([0, 40000, 120000, 70000]), # category 120000 has index 3 and category 70000 has index 2 
            "max": array([39999, 69999, -1, 119999]) # (testing row invariance)
            }
        hc_sorted_index = array([0,1,3,2])
        households_data = {
            "household_id":arange(20000)+1,
            "building_id": array(19950*[1] + 50*[0]),
            "income": array(1000*[1000] + 1000*[10000] + 2000*[20000] + 1000*[35000] + 2000*[45000] +
                                1000*[50000] + 2000*[67000]+ 2000*[90000] + 1000*[100005] + 2000*[110003] +
                                1000*[120000] + 1000*[200000] + 2000*[500000] + 1000*[630000]),
            "persons": array(3000*[2] + 2000*[3] + 1000*[1] + 1000*[6] + 1000*[1] + 1000*[4] +
                                3000*[1]+ 8000*[5], dtype=int8)
            }
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='hh_set', table_data=households_data)
        hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set')

        storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data)
        hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what='household', id_name=['year' ,'income'])

        storage.write_table(table_name='hc_set', table_data=household_characteristics_for_ht_data)
        hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set')

        storage.write_table(table_name='prs_set', table_data=self.person_data)
        prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set')
        
        model = HouseholdTransitionModel(debuglevel=3)
        # this run should add households in all four categories
        model.run(year=2000, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)

        results = hh_set.size()
        should_be = [83246]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        results = zeros(hc_set.size(), dtype=int32)
        results[0] = where(hh_set.get_attribute('income') <= 
                                            hc_set.get_attribute("max")[hc_sorted_index[0]], 1,0).sum()
        for i in range(1, hc_set.size()-1):
            results[i] = logical_and(where(hh_set.get_attribute('income') >= 
                                           hc_set.get_attribute("min")[hc_sorted_index[i]], 1,0),
                                     where(hh_set.get_attribute('income') <= 
                                           hc_set.get_attribute("max")[hc_sorted_index[i]], 1,0)).sum()
        results[-1] = where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[-1]], 1,0).sum()
        should_be = hct_set.get_attribute("total_number_of_households")[0:4]
        self.assertEqual(ma.allclose(results, should_be, rtol=1e-6),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        # this run should remove households in all four categories
        model.run(year=2001, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)
        results = hh_set.size()
        should_be = [(hct_set.get_attribute("total_number_of_households")[4:8]).sum()]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        results = zeros(hc_set.size(), dtype=int32)
        results[0] = where(hh_set.get_attribute('income') <= 
                                            hc_set.get_attribute("max")[hc_sorted_index[0]], 1,0).sum()
        for i in range(1, hc_set.size()-1):
            results[i] = logical_and(where(hh_set.get_attribute('income') >= 
                                           hc_set.get_attribute("min")[hc_sorted_index[i]], 1,0),
                                     where(hh_set.get_attribute('income') <= 
                                           hc_set.get_attribute("max")[hc_sorted_index[i]], 1,0)).sum()
        results[-1] = where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[-1]], 1,0).sum()
        should_be = hct_set.get_attribute("total_number_of_households")[4:8]
        self.assertEqual(ma.allclose(results, should_be, rtol=1e-6),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        # this run should add and remove households
        model.run(year=2002, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)
        results = hh_set.size()
        should_be = [(hct_set.get_attribute("total_number_of_households")[8:13]).sum()]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        results = zeros(hc_set.size(), dtype=int32)
        results[0] = where(hh_set.get_attribute('income') <= hc_set.get_attribute("max")[hc_sorted_index[0]], 1,0).sum()
        for i in range(1, hc_set.size()-1):
            results[i] = logical_and(where(hh_set.get_attribute('income') >= 
                                           hc_set.get_attribute("min")[hc_sorted_index[i]], 1,0),
                                     where(hh_set.get_attribute('income') <= 
                                           hc_set.get_attribute("max")[hc_sorted_index[i]], 1,0)).sum()
        results[-1] = where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[-1]], 1,0).sum()
        should_be = hct_set.get_attribute("total_number_of_households")[8:13]
        self.assertEqual(ma.allclose(results, should_be, rtol=1e-6),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))
Example #13
0
        def xtest_power_HTM_controlling_with_marginal_characteristics(self):
            nhhs = 5000
            ngroups = 4
            nhhsg = int(nhhs/ngroups)
            nhhslg = nhhs-(ngroups-1)*nhhsg
            should_nhhs = nhhs-2000
            logger.be_quiet()
            household_data = {"age_of_head": array(nhhsg/2*[18]+(nhhsg-nhhsg/2)*[35] +
                                nhhsg/2*[30] + (nhhsg-nhhsg/2)*[40] + 
                                nhhsg/2*[38] + (nhhsg-nhhsg/2)*[65] + 
                                nhhslg/2*[50] + (nhhslg-nhhslg/2)*[80]),
                               "income": array(nhhsg*[500] + nhhsg*[2000] + 
                                       nhhsg*[7000] + nhhslg*[15000]),
                               "household_id":arange(nhhs)+1}
            household_characteristics_for_ht_data = {"characteristic": array(4*["income"]+4*["age_of_head"]), 
                                                      "min":array([0,1001,5001, 10001, 0, 31, 41, 61]), 
                                                      "max":array([1000, 5000, 10000,-1, 30, 40, 60, -1])}
            annual_household_control_totals_data = {"year":array([2000]),
                                                     "total_number_of_households":array([should_nhhs])}
            
            storage = StorageFactory().get_storage('dict_storage')
           
            storage.write_table(table_name = 'hc_set', table_data = household_characteristics_for_ht_data)
            hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set')

            storage.write_table(table_name = 'hct_set', table_data = annual_household_control_totals_data)
            hct_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hct_set')
            
            storage.write_table(table_name = 'households', table_data = household_data)
            households = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='households')

            income = households.get_attribute("income")
            age = households.get_attribute("age_of_head")
            idx1 = where(income <= 1000)[0]
            idx2 = where(logical_and(income <= 5000, income > 1000))[0]
            idx3 = where(logical_and(income <= 10000, income > 5000))[0]
            idx4 = where(income > 10000)[0]
            expected_results = array([age[idx1].mean(), age[idx2].mean(), age[idx3].mean(), age[idx4].mean()]) 
                  
            def run_model():
                storage.write_table(table_name = 'households', table_data = household_data)
                households = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='households')
            
                model = HouseholdTransitionModel()
                model.run(year=2000, household_set=households, control_totals=hct_set, characteristics=hc_set)
                income = households.get_attribute("income")
                age = households.get_attribute("age_of_head")
                idx1 = where(income <= 1000)[0]
                idx2 = where(logical_and(income <= 5000, income > 1000))[0]
                idx3 = where(logical_and(income <= 10000, income > 5000))[0]
                idx4 = where(income > 10000)[0]
                results = array([age[idx1].mean(), age[idx2].mean(), age[idx3].mean(), age[idx4].mean()])
                results[-1] = results[-1]+self.wrong_number
                #print results
                return results
            #print expected_results      
            R = 1000
            #r = [2, 5, 10, 50, 100, 1000]
            #r = [2, 5, 10, 15, 20]
            r=[2,5]
            levels = [0.05,  0.01]
            #levels = [0.05]
            #wrong_numbers = [0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2, 2.25, 2.5]
            wrong_numbers = [1]
            for wn in wrong_numbers:
                self.wrong_number = wn
                print "Wrong number = ", self.wrong_number
                power = zeros((len(r), len(levels)))            
                for ir in range(len(r)):
                    for il in range(len(levels)):
                        print "r=", r[ir],", level=",levels[il]
                        seed(1)
                        for iR in range(R):                  
                            try:
                                self.run_stochastic_test(__file__, run_model, expected_results, 
                                                         r[ir], significance_level=levels[il], transformation=None)
                            except:
                                power[ir,il]=power[ir,il]+1
                        print "Power: ",power[ir,il]/float(R)
                print power/float(R)                                                 
Example #14
0
def run_HTM(niter):
    nhhs = 5000
    ngroups = 4
    nhhsg = int(nhhs / ngroups)
    nhhslg = nhhs - (ngroups - 1) * nhhsg
    should_nhhs = nhhs - 2000

    storage = StorageFactory().get_storage('dict_storage')

    hc_set_table_name = 'hc_set'
    storage.write_table(
        table_name=hc_set_table_name,
        table_data={
            'characteristic': array(4 * ['income'] + 4 * ['age_of_head']),
            'min': array([0, 1001, 5001, 10001, 0, 31, 41, 61]),
            'max': array([1000, 5000, 10000, -1, 30, 40, 60, -1])
        },
    )

    hct_set_table_name = 'hct_set'
    storage.write_table(
        table_name=hct_set_table_name,
        table_data={
            'year': array([2000]),
            'total_number_of_households': array([should_nhhs])
        },
    )

    households_table_name = 'households'
    storage.write_table(
        table_name=households_table_name,
        table_data={
            'age_of_head':
            array(nhhsg / 2 * [18] + (nhhsg - nhhsg / 2) * [35] +
                  nhhsg / 2 * [30] + (nhhsg - nhhsg / 2) * [40] +
                  nhhsg / 2 * [38] + (nhhsg - nhhsg / 2) * [65] +
                  nhhslg / 2 * [50] + (nhhslg - nhhslg / 2) * [80]),
            'income':
            array(nhhsg * [500] + nhhsg * [2000] + nhhsg * [7000] +
                  nhhslg * [15000]),
            'household_id':
            arange(nhhs) + 1
        },
    )

    hc_set = HouseholdCharacteristicDataset(in_storage=storage,
                                            in_table_name=hc_set_table_name)
    hct_set = ControlTotalDataset(in_storage=storage,
                                  in_table_name=hct_set_table_name,
                                  what='household',
                                  id_name=['year'])

    logger.be_quiet()
    result = zeros((niter, 4))
    for iter in range(niter):
        households = HouseholdDataset(in_storage=storage,
                                      in_table_name=households_table_name)

        model = HouseholdTransitionModel()
        model.run(year=2000,
                  household_set=households,
                  control_totals=hct_set,
                  characteristics=hc_set)
        income = households.get_attribute('income')
        age = households.get_attribute('age_of_head')
        idx1 = where(income <= 1000)[0]
        idx2 = where(logical_and(income <= 5000, income > 1000))[0]
        idx3 = where(logical_and(income <= 10000, income > 5000))[0]
        idx4 = where(income > 10000)[0]
        result[iter, :] = array([
            age[idx1].mean(), age[idx2].mean(), age[idx3].mean(),
            age[idx4].mean()
        ])

    return result
Example #15
0
    def test_same_distribution_after_household_addition(self):
        """Using the control_totals and no marginal characteristics,
        add households and ensure that the distribution within each group stays the same
        """

        annual_household_control_totals_data = {
            "year": array([2000, 2000]),
            "total_number_of_households": array([20000, 30000]),
            "large_area_id": array([1, 2])
        }

        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='hh_set',
                            table_data=self.households_data)
        hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set')

        storage.write_table(table_name='hct_set',
                            table_data=annual_household_control_totals_data)
        hct_set = ControlTotalDataset(in_storage=storage,
                                      in_table_name='hct_set',
                                      what="household")

        storage.write_table(
            table_name='hc_set',
            table_data=self.household_characteristics_for_ht_data)
        hc_set = HouseholdCharacteristicDataset(in_storage=storage,
                                                in_table_name='hc_set')

        model = RegionalHouseholdTransitionModel()
        model.run(year=2000,
                  household_set=hh_set,
                  control_totals=hct_set,
                  characteristics=hc_set)

        #check that there are 20000 (area 1) and 30000 (area 2) total households after running the model
        areas = hh_set.get_attribute("large_area_id")
        results = array([0, 0])
        for iarea in [0, 1]:
            results[iarea] = where(areas == [1, 2][iarea])[0].size
        should_be = [20000, 30000]
        self.assertEqual(
            ma.allclose(should_be, results, rtol=1e-1), True,
            "Error, should_be: %s, but result: %s" % (should_be, results))

        #check that the number of unplaced households is exactly the number of new households created
        results = where(hh_set.get_attribute("grid_id") <= 0)[0].size
        should_be = [17000]
        self.assertEqual(
            ma.allclose(should_be, results, rtol=1e-1), True,
            "Error, should_be: %s, but result: %s" % (should_be, results))

        #check that the distribution of households in each group and each area is the same as before running the model
        results = self.get_count_all_groups(hh_set)
        should_be = array([
            # area 1
            3000.0 / 16500.0 * 20000.0,
            1000.0 / 16500.0 * 20000.0,
            1500.0 / 16500.0 * 20000.0,
            2000.0 / 16500.0 * 20000.0,
            1000.0 / 16500.0 * 20000.0,
            2500.0 / 16500.0 * 20000.0,
            1500.0 / 16500.0 * 20000.0,
            4000.0 / 16500.0 * 20000.0,
            # area 2
            3000.0 / 16500.0 * 30000.0,
            1000.0 / 16500.0 * 30000.0,
            1500.0 / 16500.0 * 30000.0,
            2000.0 / 16500.0 * 30000.0,
            1000.0 / 16500.0 * 30000.0,
            2500.0 / 16500.0 * 30000.0,
            1500.0 / 16500.0 * 30000.0,
            4000.0 / 16500.0 * 30000.0
        ])
        self.assertEqual(
            ma.allclose(results, should_be, rtol=0.1), True,
            "Error, should_be: %s, but result: %s" % (should_be, results))
        # check the types of the attributes
        self.assertEqual(
            hh_set.get_attribute("age_of_head").dtype, int32,
            "Error in data type of the new household set. Should be: int32, is: %s"
            % str(hh_set.get_attribute("age_of_head").dtype))
        self.assertEqual(
            hh_set.get_attribute("income").dtype, int32,
            "Error in data type of the new household set. Should be: int32, is: %s"
            % str(hh_set.get_attribute("income").dtype))
        self.assertEqual(
            hh_set.get_attribute("persons").dtype, int8,
            "Error in data type of the new household set. Should be: int8, is: %s"
            % str(hh_set.get_attribute("persons").dtype))
        def xtest_power_HTM_controlling_with_marginal_characteristics(self):
            nhhs = 5000
            ngroups = 4
            nhhsg = int(nhhs / ngroups)
            nhhslg = nhhs - (ngroups - 1) * nhhsg
            should_nhhs = nhhs - 2000
            logger.be_quiet()
            household_data = {
                "age_of_head":
                array(nhhsg / 2 * [18] + (nhhsg - nhhsg / 2) * [35] +
                      nhhsg / 2 * [30] + (nhhsg - nhhsg / 2) * [40] +
                      nhhsg / 2 * [38] + (nhhsg - nhhsg / 2) * [65] +
                      nhhslg / 2 * [50] + (nhhslg - nhhslg / 2) * [80]),
                "income":
                array(nhhsg * [500] + nhhsg * [2000] + nhhsg * [7000] +
                      nhhslg * [15000]),
                "household_id":
                arange(nhhs) + 1
            }
            household_characteristics_for_ht_data = {
                "characteristic": array(4 * ["income"] + 4 * ["age_of_head"]),
                "min": array([0, 1001, 5001, 10001, 0, 31, 41, 61]),
                "max": array([1000, 5000, 10000, -1, 30, 40, 60, -1])
            }
            annual_household_control_totals_data = {
                "year": array([2000]),
                "total_number_of_households": array([should_nhhs])
            }

            storage = StorageFactory().get_storage('dict_storage')

            storage.write_table(
                table_name='hc_set',
                table_data=household_characteristics_for_ht_data)
            hc_set = HouseholdCharacteristicDataset(in_storage=storage,
                                                    in_table_name='hc_set')

            storage.write_table(
                table_name='hct_set',
                table_data=annual_household_control_totals_data)
            hct_set = HouseholdCharacteristicDataset(in_storage=storage,
                                                     in_table_name='hct_set')

            storage.write_table(table_name='households',
                                table_data=household_data)
            households = HouseholdCharacteristicDataset(
                in_storage=storage, in_table_name='households')

            income = households.get_attribute("income")
            age = households.get_attribute("age_of_head")
            idx1 = where(income <= 1000)[0]
            idx2 = where(logical_and(income <= 5000, income > 1000))[0]
            idx3 = where(logical_and(income <= 10000, income > 5000))[0]
            idx4 = where(income > 10000)[0]
            expected_results = array([
                age[idx1].mean(), age[idx2].mean(), age[idx3].mean(),
                age[idx4].mean()
            ])

            def run_model():
                storage.write_table(table_name='households',
                                    table_data=household_data)
                households = HouseholdCharacteristicDataset(
                    in_storage=storage, in_table_name='households')

                model = HouseholdTransitionModel()
                model.run(year=2000,
                          household_set=households,
                          control_totals=hct_set,
                          characteristics=hc_set)
                income = households.get_attribute("income")
                age = households.get_attribute("age_of_head")
                idx1 = where(income <= 1000)[0]
                idx2 = where(logical_and(income <= 5000, income > 1000))[0]
                idx3 = where(logical_and(income <= 10000, income > 5000))[0]
                idx4 = where(income > 10000)[0]
                results = array([
                    age[idx1].mean(), age[idx2].mean(), age[idx3].mean(),
                    age[idx4].mean()
                ])
                results[-1] = results[-1] + self.wrong_number
                #print results
                return results

            #print expected_results
            R = 1000
            #r = [2, 5, 10, 50, 100, 1000]
            #r = [2, 5, 10, 15, 20]
            r = [2, 5]
            levels = [0.05, 0.01]
            #levels = [0.05]
            #wrong_numbers = [0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2, 2.25, 2.5]
            wrong_numbers = [1]
            for wn in wrong_numbers:
                self.wrong_number = wn
                print "Wrong number = ", self.wrong_number
                power = zeros((len(r), len(levels)))
                for ir in range(len(r)):
                    for il in range(len(levels)):
                        print "r=", r[ir], ", level=", levels[il]
                        seed(1)
                        for iR in range(R):
                            try:
                                self.run_stochastic_test(
                                    __file__,
                                    run_model,
                                    expected_results,
                                    r[ir],
                                    significance_level=levels[il],
                                    transformation=None)
                            except:
                                power[ir, il] = power[ir, il] + 1
                        print "Power: ", power[ir, il] / float(R)
                print power / float(R)