def run_model(): storage.write_table(table_name = 'households', table_data = household_data) households = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='households') model = HouseholdTransitionModel() model.run(year=2000, household_set=households, control_totals=hct_set, characteristics=hc_set) income = households.get_attribute("income") age = households.get_attribute("age_of_head") idx1 = where(income <= 1000)[0] idx2 = where(logical_and(income <= 5000, income > 1000))[0] idx3 = where(logical_and(income <= 10000, income > 5000))[0] idx4 = where(income > 10000)[0] results = array([age[idx1].mean(), age[idx2].mean(), age[idx3].mean(), age[idx4].mean()]) print results return results
def prepare_for_run(self, storage, **kwargs): from urbansim.datasets.control_total_dataset import ControlTotalDataset from urbansim.datasets.household_characteristic_dataset import HouseholdCharacteristicDataset from urbansim.models.employment_transition_model import sample_control_totals control_totals = ControlTotalDataset(in_storage=storage, what="household") characteristics = HouseholdCharacteristicDataset(in_storage=storage) sample_control_totals(storage, control_totals, **kwargs) return (control_totals, characteristics)
def test_controlling_with_one_marginal_characteristic(self): """Using the age_of_head as a marginal characteristic, which would partition the 8 groups into two larger groups (those with age_of_head < 40 and >= 40), ensure that the control totals are met and that the distribution within each large group is the same before and after running the model """ #IMPORTANT: marginal characteristics grouping indices have to start at 0! #i.e. below, there is one marg. char. "age_of_head". here we indicate that the first "large group" (groups 1-4), #consisting of those groups with age_of_head < 40 should total 25000 households after running this model for one year, #and the second large group, those groups with age_of_head > 40, should total 15000 households annual_household_control_totals_data = { "year": array([2000, 2000]), "age_of_head": array([0,1]), "total_number_of_households": array([25000, 15000]) } storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='hh_set', table_data=self.households_data) hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set') storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data) hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what='household', id_name=['year' ,'age_of_head']) storage.write_table(table_name='hc_set', table_data=self.household_characteristics_for_ht_data) hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set') storage.write_table(table_name='prs_set', table_data=self.person_data) prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set') model = HouseholdTransitionModel() model.run(year=2000, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) #check that there are indeed 40000 total households after running the model results = hh_set.size() should_be = [40000] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) #check that the total number of households within first four groups increased by 10000 #and that the total number of households within last four groups decreased by 3000 results = self.get_count_all_groups(hh_set) should_be = [25000, 15000] self.assertEqual(ma.allclose([sum(results[0:4]), sum(results[4:8])], should_be, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) #check that the distribution of households within groups 1-4 and 5-8 are the same before and after #running the model, respectively should_be = [6000.0/15000.0*25000.0, 2000.0/15000.0*25000.0, 3000.0/15000.0*25000.0, 4000.0/15000.0*25000.0, 2000.0/18000.0*15000.0, 5000.0/18000.0*15000.0, 3000.0/18000.0*15000.0, 8000.0/18000.0*15000.0] self.assertEqual(ma.allclose(results, should_be, rtol=0.05), True, "Error, should_be: %s, but result: %s" % (should_be, results))
def test_controlling_with_three_marginal_characteristics(self): """Controlling with all three possible marginal characteristics in this example, age_of_head, income, and persons, this would partition the 8 groups into the same 8 groups, and with a control total specified for each group, we must ensure that the control totals for each group exactly meet the specifications. """ #IMPORTANT: marginal characteristics grouping indices have to start at 0! annual_household_control_totals_data = { "year": array(8*[2000]), "age_of_head": array(4*[0] + 4*[1]), "income": array(2*[0] + 2*[1] + 2*[0] + 2*[1]), "persons": array([0,1,0,1,0,1,0,1]), "total_number_of_households": array([4000, 5000, 1000, 3000, 0, 6000, 3000, 8000]) } #size of columns was not even, removed last element of min and max household_characteristics_for_ht_data = { "characteristic": array(2*['age_of_head'] + 2*['income'] + 2*['persons']), "min": array([0, 50, 0, 40000, 0, 3]), "max": array([49, 100, 39999, -1, 2, -1]) } storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='hh_set', table_data=self.households_data) hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set') storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data) hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what='household', id_name=['year' ,'age_of_head', 'income', 'persons']) storage.write_table(table_name='hc_set', table_data=household_characteristics_for_ht_data) hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set') # unplace some households where10 = where(hh_set.get_attribute("building_id")<>10)[0] hh_set.modify_attribute(name="building_id", data=zeros(where10.size), index=where10) storage.write_table(table_name='prs_set', table_data=self.person_data) prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set') model = HouseholdTransitionModel() model.run(year=2000, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) #check that there are indeed 33000 total households after running the model results = hh_set.size() should_be = [30000] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) #check that the number of households in each group exactly match the control totals specified results = self.get_count_all_groups(hh_set) should_be = [4000, 5000, 1000, 3000, 0, 6000, 3000, 8000] self.assertEqual(ma.allclose(results, should_be), True, "Error, should_be: %s, but result: %s" % (should_be, results))
def test_same_distribution_after_household_addition(self): """Using the control_totals and no marginal characteristics, add households and ensure that the distribution within each group stays the same """ annual_household_control_totals_data = { "year": array([2000]), "total_number_of_households": array([50000]) } storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='hh_set', table_data=self.households_data) hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set') storage.write_table(table_name='prs_set', table_data=self.person_data) prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set') storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data) hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what="household", id_name="year") storage.write_table(table_name='hc_set', table_data=self.household_characteristics_for_ht_data) hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set') model = HouseholdTransitionModel() model.run(year=2000, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) #check that there are indeed 50000 total households after running the model results = hh_set.size() should_be = [50000] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) #check that the number of unplaced households is exactly the number of new households created results = where(hh_set.get_attribute("building_id")<=0)[0].size should_be = [17000] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) #check that the distribution of households in each group is the same as before running the model results = self.get_count_all_groups(hh_set) should_be = array([6000.0/33000.0*50000.0, 2000.0/33000.0*50000.0, 3000.0/33000.0*50000.0, 4000.0/33000.0*50000.0, 2000.0/33000.0*50000.0, 5000.0/33000.0*50000.0, 3000.0/33000.0*50000.0, 8000.0/33000.0*50000.0]) self.assertEqual(ma.allclose(results, should_be, rtol=0.05), True, "Error, should_be: %s, but result: %s" % (should_be, results)) # check the types of the attributes self.assertEqual(hh_set.get_attribute("age_of_head").dtype, int32, "Error in data type of the new household set. Should be: int32, is: %s" % str(hh_set.get_attribute("age_of_head").dtype)) self.assertEqual(hh_set.get_attribute("income").dtype, int32, "Error in data type of the new household set. Should be: int32, is: %s" % str(hh_set.get_attribute("income").dtype)) self.assertEqual(hh_set.get_attribute("persons").dtype, int8, "Error in data type of the new household set. Should be: int8, is: %s" % str(hh_set.get_attribute("persons").dtype))
def test_same_distribution_after_household_subtraction(self): """Using the control_totals and no marginal characteristics, subtract households and ensure that the distribution within each group stays the same """ annual_household_control_totals_data = { "year": array([2000]), "total_number_of_households": array([20000]) } storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='hh_set', table_data=self.households_data) hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set') storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data) hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what="household", id_name="year") storage.write_table( table_name='hc_set', table_data=self.household_characteristics_for_ht_data) hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set') model = HouseholdTransitionModel() model.run(year=2000, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) #check that there are indeed 20000 total households after running the model results = hh_set.size() should_be = [20000] self.assertEqual( ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) #check that the distribution of households in each group is the same as before running the model results = self.get_count_all_groups(hh_set) should_be = [ 6000.0 / 33000.0 * 20000.0, 2000.0 / 33000.0 * 20000.0, 3000.0 / 33000.0 * 20000.0, 4000.0 / 33000.0 * 20000.0, 2000.0 / 33000.0 * 20000.0, 5000.0 / 33000.0 * 20000.0, 3000.0 / 33000.0 * 20000.0, 8000.0 / 33000.0 * 20000.0 ] self.assertEqual( ma.allclose(results, should_be, rtol=0.05), True, "Error, should_be: %s,\n but result: %s" % (should_be, results))
def run_model(): storage.write_table(table_name='households', table_data=household_data) households = HouseholdCharacteristicDataset( in_storage=storage, in_table_name='households') model = HouseholdTransitionModel() model.run(year=2000, household_set=households, control_totals=hct_set, characteristics=hc_set) income = households.get_attribute("income") age = households.get_attribute("age_of_head") idx1 = where(income <= 1000)[0] idx2 = where(logical_and(income <= 5000, income > 1000))[0] idx3 = where(logical_and(income <= 10000, income > 5000))[0] idx4 = where(income > 10000)[0] results = array([ age[idx1].mean(), age[idx2].mean(), age[idx3].mean(), age[idx4].mean() ]) print results return results
def test_same_distribution_after_household_subtraction(self): """Using the control_totals and no marginal characteristics, subtract households and ensure that the distribution within each group stays the same """ annual_household_control_totals_data = { "year": array([2000, 2000]), "total_number_of_households": array([8000, 12000]), "faz_id": array([1,2]) } storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name = 'hh_set', table_data = self.households_data) hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set') storage.write_table(table_name = 'hct_set', table_data = annual_household_control_totals_data) hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what="household") storage.write_table(table_name = 'hc_set', table_data = self.household_characteristics_for_ht_data) hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set') # storage.write_table(table_name='prs_set', table_data=self.person_data) # prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set') model = SubareaHouseholdTransitionModel(subarea_id_name="faz_id") # model.run(year=2000, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) model.run(year=2000, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) #check that there are indeed 8000 (area 1) and 12000 (area 2) total households after running the model areas = hh_set.get_attribute("faz_id") results = array([0,0]) for iarea in [0,1]: results[iarea] = where(areas == [1,2][iarea])[0].size should_be = [8000, 12000] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) #check that the distribution of households in each group is the same as before running the model results = self.get_count_all_groups(hh_set) should_be = array([# area 1 3000.0/16500.0*8000.0, 1000.0/16500.0*8000.0, 1500.0/16500.0*8000.0, 2000.0/16500.0*8000.0, 1000.0/16500.0*8000.0, 2500.0/16500.0*8000.0, 1500.0/16500.0*8000.0, 4000.0/16500.0*8000.0, # area 2 3000.0/16500.0*12000.0, 1000.0/16500.0*12000.0, 1500.0/16500.0*12000.0, 2000.0/16500.0*12000.0, 1000.0/16500.0*12000.0, 2500.0/16500.0*12000.0, 1500.0/16500.0*12000.0, 4000.0/16500.0*12000.0]) self.assertEqual(ma.allclose(results, should_be, rtol=0.1), True, "Error, should_be: %s,\n but result: %s" % (should_be, results))
def test_person_dataset(self): households_data = { "household_id":arange(4)+1, "building_id": array([3,6,1,2], dtype=int32), "persons": array([1,2,2,4], dtype=int32) } household_characteristics_for_ht_data = { "characteristic": array(2*['persons']), "min": array([1, 3]), "max": array([2,-1]) } person_data = { "person_id": arange(9)+1, "household_id": array([1,2,2,3,3,4,4,4,4]), "job_id": array([30, 50, 0, 1, 23, 54, 78, 2, 6]), } annual_household_control_totals_data = { "year": array(2*[2000]), "persons": array([0,1]), "total_number_of_households": array([0, 4]) } storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='hh_set', table_data=households_data) hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set') storage.write_table(table_name='prs_set', table_data=person_data) prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set') storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data) hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what="household", id_name=["year", "persons"]) storage.write_table(table_name='hc_set', table_data=household_characteristics_for_ht_data) hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set') model = HouseholdTransitionModel(debuglevel=3) model.run(year=2000, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) # The run should remove the first three households and first 5 persons and add 3 copies of the last household, i.e. 12 persons self.assertEqual(prs_set.size(), 16, "Error in size of the person_set. Should be 16, is %s." % prs_set.size()) self.assertEqual(ma.allequal(prs_set.get_attribute('household_id'), array([4,4,4,4,5,5,5,5,6,6,6,6,7,7,7,7])), True, "Error in assigning household_id to new persons.")
def test_controlling_with_one_marginal_characteristic(self): """Using the age_of_head as a marginal characteristic, which would partition the 8 groups into two larger groups (those with age_of_head < 40 and >= 40), ensure that the control totals are met and that the distribution within each large group is the same before and after running the model """ #IMPORTANT: marginal characteristics grouping indices have to start at 0! #i.e. below, there is one marg. char. "age_of_head". here we indicate that the first "large group" (groups 1-4), #consisting of those groups with age_of_head < 40 should total 25000 households after running this model for one year, #and the second large group, those groups with age_of_head > 40, should total 15000 households annual_household_control_totals_data = { "year": array([2000, 2000, 2000, 2000]), "age_of_head": array([0, 1, 0, 1]), "total_number_of_households": array([20000, 10000, 5000, 5000]), "faz_id": array([1, 1, 2, 2]) } storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='hh_set', table_data=self.households_data) hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set') storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data) hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what='household') storage.write_table(table_name='prs_set', table_data=self.person_data) prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set') storage.write_table( table_name='hc_set', table_data=self.household_characteristics_for_ht_data) hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set') model = SubareaHouseholdTransitionModel(subarea_id_name="faz_id") model.run(year=2000, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) #check that there are indeed 40000 total households after running the model areas = hh_set.get_attribute("faz_id") results = array([0, 0]) for iarea in [0, 1]: results[iarea] = where(areas == [1, 2][iarea])[0].size should_be = [30000, 10000] self.assertEqual( ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) #check that the number of households within the groups correspond to the control totals results = self.get_count_all_groups(hh_set) should_be = [20000, 10000, 5000, 5000] idx1 = arange(0, 4) idx2 = arange(4, 8) idx3 = arange(8, 12) idx4 = arange(12, 16) self.assertEqual( ma.allclose([ results[idx1].sum(), results[idx2].sum(), results[idx3].sum(), results[idx4].sum() ], should_be, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, array([ results[idx1].sum(), results[idx2].sum(), results[idx3].sum(), results[idx4].sum() ]))) #check that the distribution of households within the groups are the same before and after #running the model, respectively should_be = [ # area 1 3000.0 / 7500.0 * 20000.0, 1000.0 / 7500.0 * 20000.0, 1500.0 / 7500.0 * 20000.0, 2000.0 / 7500.0 * 20000.0, 1000.0 / 9000.0 * 10000.0, 2500.0 / 9000.0 * 10000.0, 1500.0 / 9000.0 * 10000.0, 4000.0 / 9000.0 * 10000.0, # area 2 3000.0 / 7500.0 * 5000.0, 1000.0 / 7500.0 * 5000.0, 1500.0 / 7500.0 * 5000.0, 2000.0 / 7500.0 * 5000.0, 1000.0 / 9000.0 * 5000.0, 2500.0 / 9000.0 * 5000.0, 1500.0 / 9000.0 * 5000.0, 4000.0 / 9000.0 * 5000.0 ] self.assertEqual( ma.allclose(results, should_be, rtol=0.1), True, "Error, should_be: %s, but result: %s" % (should_be, results))
def test_controlling_age_of_head(self): """ Controls for one marginal characteristics, namely age_of_head. """ annual_household_control_totals_data = { "year": array([2000, 2000, 2000, 2001, 2001, 2001, 2002, 2002, 2002]), "age_of_head": array([0,1,2,0,1,2, 0,1,2]), "total_number_of_households": array([25013, 21513, 18227, # 2000 10055, 15003, 17999, # 2001 15678, 14001, 20432]) # 2002 } household_characteristics_for_ht_data = { "characteristic": array(3*['age_of_head']), "min": array([0, 35, 65]), "max": array([34, 64, -1]) } households_data = { "household_id":arange(15000)+1, "building_id": array(15000*[1]), "age_of_head": array(1000*[25] + 1000*[28] + 2000*[32] + 1000*[34] + 2000*[35] + 1000*[40] + 1000*[54]+ 1000*[62] + 1000*[65] + 1000*[68] + 2000*[71] + 1000*[98]), "persons": array(1000*[2] + 2000*[3] + 1000*[1] + 1000*[6] + 1000*[1] + 1000*[4] + 3000*[1]+ 5000*[5], dtype=int8) } storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='hh_set', table_data=households_data) hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set') storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data) hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what='household', id_name=['year' ,'age_of_head']) storage.write_table(table_name='hc_set', table_data=household_characteristics_for_ht_data) hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set') storage.write_table(table_name='prs_set', table_data=self.person_data) prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set') model = HouseholdTransitionModel(debuglevel=3) # this run should add households in all four categories model.run(year=2000, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) results = hh_set.size() should_be = [(hct_set.get_attribute("total_number_of_households")[0:3]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) results = zeros(hc_set.size(), dtype=int32) results[0] = where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[0], 1,0).sum() for i in range(1, hc_set.size()-1): results[i] = logical_and(where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[i], 1,0), where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[i], 1,0)).sum() results[hc_set.size()-1] = where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[hc_set.size()-1], 1,0).sum() should_be = hct_set.get_attribute("total_number_of_households")[0:3] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results)) # this run should remove households in all four categories model.run(year=2001, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) results = hh_set.size() should_be = [(hct_set.get_attribute("total_number_of_households")[3:6]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) results = zeros(hc_set.size(), dtype=int32) results[0] = where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[0], 1,0).sum() for i in range(1, hc_set.size()-1): results[i] = logical_and(where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[i], 1,0), where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[i], 1,0)).sum() results[hc_set.size()-1] = where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[hc_set.size()-1], 1,0).sum() should_be = hct_set.get_attribute("total_number_of_households")[3:6] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results)) # this run should add and remove households model.run(year=2002, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) results = hh_set.size() should_be = [(hct_set.get_attribute("total_number_of_households")[6:9]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) results = zeros(hc_set.size(), dtype=int32) results[0] = where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[0], 1,0).sum() for i in range(1, hc_set.size()-1): results[i] = logical_and(where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[i], 1,0), where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[i], 1,0)).sum() results[hc_set.size()-1] = where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[hc_set.size()-1], 1,0).sum() should_be = hct_set.get_attribute("total_number_of_households")[6:9] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results))
def test_controlling_income(self): """ Controls for one marginal characteristics, namely income. """ annual_household_control_totals_data = { "year": array([2000, 2000, 2000, 2000, 2001, 2001, 2001, 2001, 2002, 2002, 2002, 2002]), "income": array([0,1,2,3,0,1,2,3, 0,1,2,3]), "total_number_of_households": array([25013, 21513, 18227, 18493, # 2000 10055, 15003, 17999, 17654, # 2001 15678, 14001, 20432, 14500]) # 2002 } household_characteristics_for_ht_data = { "characteristic": array(4*['income']), "min": array([0, 40000, 120000, 70000]), # category 120000 has index 3 and category 70000 has index 2 "max": array([39999, 69999, -1, 119999]) # (testing row invariance) } hc_sorted_index = array([0,1,3,2]) households_data = { "household_id":arange(20000)+1, "building_id": array(19950*[1] + 50*[0]), "income": array(1000*[1000] + 1000*[10000] + 2000*[20000] + 1000*[35000] + 2000*[45000] + 1000*[50000] + 2000*[67000]+ 2000*[90000] + 1000*[100005] + 2000*[110003] + 1000*[120000] + 1000*[200000] + 2000*[500000] + 1000*[630000]), "persons": array(3000*[2] + 2000*[3] + 1000*[1] + 1000*[6] + 1000*[1] + 1000*[4] + 3000*[1]+ 8000*[5], dtype=int8) } storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='hh_set', table_data=households_data) hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set') storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data) hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what='household', id_name=['year' ,'income']) storage.write_table(table_name='hc_set', table_data=household_characteristics_for_ht_data) hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set') storage.write_table(table_name='prs_set', table_data=self.person_data) prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set') model = HouseholdTransitionModel(debuglevel=3) # this run should add households in all four categories model.run(year=2000, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) results = hh_set.size() should_be = [83246] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) results = zeros(hc_set.size(), dtype=int32) results[0] = where(hh_set.get_attribute('income') <= hc_set.get_attribute("max")[hc_sorted_index[0]], 1,0).sum() for i in range(1, hc_set.size()-1): results[i] = logical_and(where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[i]], 1,0), where(hh_set.get_attribute('income') <= hc_set.get_attribute("max")[hc_sorted_index[i]], 1,0)).sum() results[-1] = where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[-1]], 1,0).sum() should_be = hct_set.get_attribute("total_number_of_households")[0:4] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results)) # this run should remove households in all four categories model.run(year=2001, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) results = hh_set.size() should_be = [(hct_set.get_attribute("total_number_of_households")[4:8]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) results = zeros(hc_set.size(), dtype=int32) results[0] = where(hh_set.get_attribute('income') <= hc_set.get_attribute("max")[hc_sorted_index[0]], 1,0).sum() for i in range(1, hc_set.size()-1): results[i] = logical_and(where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[i]], 1,0), where(hh_set.get_attribute('income') <= hc_set.get_attribute("max")[hc_sorted_index[i]], 1,0)).sum() results[-1] = where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[-1]], 1,0).sum() should_be = hct_set.get_attribute("total_number_of_households")[4:8] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results)) # this run should add and remove households model.run(year=2002, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) results = hh_set.size() should_be = [(hct_set.get_attribute("total_number_of_households")[8:13]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) results = zeros(hc_set.size(), dtype=int32) results[0] = where(hh_set.get_attribute('income') <= hc_set.get_attribute("max")[hc_sorted_index[0]], 1,0).sum() for i in range(1, hc_set.size()-1): results[i] = logical_and(where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[i]], 1,0), where(hh_set.get_attribute('income') <= hc_set.get_attribute("max")[hc_sorted_index[i]], 1,0)).sum() results[-1] = where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[-1]], 1,0).sum() should_be = hct_set.get_attribute("total_number_of_households")[8:13] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results))
def xtest_power_HTM_controlling_with_marginal_characteristics(self): nhhs = 5000 ngroups = 4 nhhsg = int(nhhs/ngroups) nhhslg = nhhs-(ngroups-1)*nhhsg should_nhhs = nhhs-2000 logger.be_quiet() household_data = {"age_of_head": array(nhhsg/2*[18]+(nhhsg-nhhsg/2)*[35] + nhhsg/2*[30] + (nhhsg-nhhsg/2)*[40] + nhhsg/2*[38] + (nhhsg-nhhsg/2)*[65] + nhhslg/2*[50] + (nhhslg-nhhslg/2)*[80]), "income": array(nhhsg*[500] + nhhsg*[2000] + nhhsg*[7000] + nhhslg*[15000]), "household_id":arange(nhhs)+1} household_characteristics_for_ht_data = {"characteristic": array(4*["income"]+4*["age_of_head"]), "min":array([0,1001,5001, 10001, 0, 31, 41, 61]), "max":array([1000, 5000, 10000,-1, 30, 40, 60, -1])} annual_household_control_totals_data = {"year":array([2000]), "total_number_of_households":array([should_nhhs])} storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name = 'hc_set', table_data = household_characteristics_for_ht_data) hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set') storage.write_table(table_name = 'hct_set', table_data = annual_household_control_totals_data) hct_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hct_set') storage.write_table(table_name = 'households', table_data = household_data) households = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='households') income = households.get_attribute("income") age = households.get_attribute("age_of_head") idx1 = where(income <= 1000)[0] idx2 = where(logical_and(income <= 5000, income > 1000))[0] idx3 = where(logical_and(income <= 10000, income > 5000))[0] idx4 = where(income > 10000)[0] expected_results = array([age[idx1].mean(), age[idx2].mean(), age[idx3].mean(), age[idx4].mean()]) def run_model(): storage.write_table(table_name = 'households', table_data = household_data) households = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='households') model = HouseholdTransitionModel() model.run(year=2000, household_set=households, control_totals=hct_set, characteristics=hc_set) income = households.get_attribute("income") age = households.get_attribute("age_of_head") idx1 = where(income <= 1000)[0] idx2 = where(logical_and(income <= 5000, income > 1000))[0] idx3 = where(logical_and(income <= 10000, income > 5000))[0] idx4 = where(income > 10000)[0] results = array([age[idx1].mean(), age[idx2].mean(), age[idx3].mean(), age[idx4].mean()]) results[-1] = results[-1]+self.wrong_number #print results return results #print expected_results R = 1000 #r = [2, 5, 10, 50, 100, 1000] #r = [2, 5, 10, 15, 20] r=[2,5] levels = [0.05, 0.01] #levels = [0.05] #wrong_numbers = [0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2, 2.25, 2.5] wrong_numbers = [1] for wn in wrong_numbers: self.wrong_number = wn print "Wrong number = ", self.wrong_number power = zeros((len(r), len(levels))) for ir in range(len(r)): for il in range(len(levels)): print "r=", r[ir],", level=",levels[il] seed(1) for iR in range(R): try: self.run_stochastic_test(__file__, run_model, expected_results, r[ir], significance_level=levels[il], transformation=None) except: power[ir,il]=power[ir,il]+1 print "Power: ",power[ir,il]/float(R) print power/float(R)
def run_HTM(niter): nhhs = 5000 ngroups = 4 nhhsg = int(nhhs / ngroups) nhhslg = nhhs - (ngroups - 1) * nhhsg should_nhhs = nhhs - 2000 storage = StorageFactory().get_storage('dict_storage') hc_set_table_name = 'hc_set' storage.write_table( table_name=hc_set_table_name, table_data={ 'characteristic': array(4 * ['income'] + 4 * ['age_of_head']), 'min': array([0, 1001, 5001, 10001, 0, 31, 41, 61]), 'max': array([1000, 5000, 10000, -1, 30, 40, 60, -1]) }, ) hct_set_table_name = 'hct_set' storage.write_table( table_name=hct_set_table_name, table_data={ 'year': array([2000]), 'total_number_of_households': array([should_nhhs]) }, ) households_table_name = 'households' storage.write_table( table_name=households_table_name, table_data={ 'age_of_head': array(nhhsg / 2 * [18] + (nhhsg - nhhsg / 2) * [35] + nhhsg / 2 * [30] + (nhhsg - nhhsg / 2) * [40] + nhhsg / 2 * [38] + (nhhsg - nhhsg / 2) * [65] + nhhslg / 2 * [50] + (nhhslg - nhhslg / 2) * [80]), 'income': array(nhhsg * [500] + nhhsg * [2000] + nhhsg * [7000] + nhhslg * [15000]), 'household_id': arange(nhhs) + 1 }, ) hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name=hc_set_table_name) hct_set = ControlTotalDataset(in_storage=storage, in_table_name=hct_set_table_name, what='household', id_name=['year']) logger.be_quiet() result = zeros((niter, 4)) for iter in range(niter): households = HouseholdDataset(in_storage=storage, in_table_name=households_table_name) model = HouseholdTransitionModel() model.run(year=2000, household_set=households, control_totals=hct_set, characteristics=hc_set) income = households.get_attribute('income') age = households.get_attribute('age_of_head') idx1 = where(income <= 1000)[0] idx2 = where(logical_and(income <= 5000, income > 1000))[0] idx3 = where(logical_and(income <= 10000, income > 5000))[0] idx4 = where(income > 10000)[0] result[iter, :] = array([ age[idx1].mean(), age[idx2].mean(), age[idx3].mean(), age[idx4].mean() ]) return result
def test_same_distribution_after_household_addition(self): """Using the control_totals and no marginal characteristics, add households and ensure that the distribution within each group stays the same """ annual_household_control_totals_data = { "year": array([2000, 2000]), "total_number_of_households": array([20000, 30000]), "large_area_id": array([1, 2]) } storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='hh_set', table_data=self.households_data) hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set') storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data) hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what="household") storage.write_table( table_name='hc_set', table_data=self.household_characteristics_for_ht_data) hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set') model = RegionalHouseholdTransitionModel() model.run(year=2000, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) #check that there are 20000 (area 1) and 30000 (area 2) total households after running the model areas = hh_set.get_attribute("large_area_id") results = array([0, 0]) for iarea in [0, 1]: results[iarea] = where(areas == [1, 2][iarea])[0].size should_be = [20000, 30000] self.assertEqual( ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) #check that the number of unplaced households is exactly the number of new households created results = where(hh_set.get_attribute("grid_id") <= 0)[0].size should_be = [17000] self.assertEqual( ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) #check that the distribution of households in each group and each area is the same as before running the model results = self.get_count_all_groups(hh_set) should_be = array([ # area 1 3000.0 / 16500.0 * 20000.0, 1000.0 / 16500.0 * 20000.0, 1500.0 / 16500.0 * 20000.0, 2000.0 / 16500.0 * 20000.0, 1000.0 / 16500.0 * 20000.0, 2500.0 / 16500.0 * 20000.0, 1500.0 / 16500.0 * 20000.0, 4000.0 / 16500.0 * 20000.0, # area 2 3000.0 / 16500.0 * 30000.0, 1000.0 / 16500.0 * 30000.0, 1500.0 / 16500.0 * 30000.0, 2000.0 / 16500.0 * 30000.0, 1000.0 / 16500.0 * 30000.0, 2500.0 / 16500.0 * 30000.0, 1500.0 / 16500.0 * 30000.0, 4000.0 / 16500.0 * 30000.0 ]) self.assertEqual( ma.allclose(results, should_be, rtol=0.1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) # check the types of the attributes self.assertEqual( hh_set.get_attribute("age_of_head").dtype, int32, "Error in data type of the new household set. Should be: int32, is: %s" % str(hh_set.get_attribute("age_of_head").dtype)) self.assertEqual( hh_set.get_attribute("income").dtype, int32, "Error in data type of the new household set. Should be: int32, is: %s" % str(hh_set.get_attribute("income").dtype)) self.assertEqual( hh_set.get_attribute("persons").dtype, int8, "Error in data type of the new household set. Should be: int8, is: %s" % str(hh_set.get_attribute("persons").dtype))
def xtest_power_HTM_controlling_with_marginal_characteristics(self): nhhs = 5000 ngroups = 4 nhhsg = int(nhhs / ngroups) nhhslg = nhhs - (ngroups - 1) * nhhsg should_nhhs = nhhs - 2000 logger.be_quiet() household_data = { "age_of_head": array(nhhsg / 2 * [18] + (nhhsg - nhhsg / 2) * [35] + nhhsg / 2 * [30] + (nhhsg - nhhsg / 2) * [40] + nhhsg / 2 * [38] + (nhhsg - nhhsg / 2) * [65] + nhhslg / 2 * [50] + (nhhslg - nhhslg / 2) * [80]), "income": array(nhhsg * [500] + nhhsg * [2000] + nhhsg * [7000] + nhhslg * [15000]), "household_id": arange(nhhs) + 1 } household_characteristics_for_ht_data = { "characteristic": array(4 * ["income"] + 4 * ["age_of_head"]), "min": array([0, 1001, 5001, 10001, 0, 31, 41, 61]), "max": array([1000, 5000, 10000, -1, 30, 40, 60, -1]) } annual_household_control_totals_data = { "year": array([2000]), "total_number_of_households": array([should_nhhs]) } storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='hc_set', table_data=household_characteristics_for_ht_data) hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set') storage.write_table( table_name='hct_set', table_data=annual_household_control_totals_data) hct_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hct_set') storage.write_table(table_name='households', table_data=household_data) households = HouseholdCharacteristicDataset( in_storage=storage, in_table_name='households') income = households.get_attribute("income") age = households.get_attribute("age_of_head") idx1 = where(income <= 1000)[0] idx2 = where(logical_and(income <= 5000, income > 1000))[0] idx3 = where(logical_and(income <= 10000, income > 5000))[0] idx4 = where(income > 10000)[0] expected_results = array([ age[idx1].mean(), age[idx2].mean(), age[idx3].mean(), age[idx4].mean() ]) def run_model(): storage.write_table(table_name='households', table_data=household_data) households = HouseholdCharacteristicDataset( in_storage=storage, in_table_name='households') model = HouseholdTransitionModel() model.run(year=2000, household_set=households, control_totals=hct_set, characteristics=hc_set) income = households.get_attribute("income") age = households.get_attribute("age_of_head") idx1 = where(income <= 1000)[0] idx2 = where(logical_and(income <= 5000, income > 1000))[0] idx3 = where(logical_and(income <= 10000, income > 5000))[0] idx4 = where(income > 10000)[0] results = array([ age[idx1].mean(), age[idx2].mean(), age[idx3].mean(), age[idx4].mean() ]) results[-1] = results[-1] + self.wrong_number #print results return results #print expected_results R = 1000 #r = [2, 5, 10, 50, 100, 1000] #r = [2, 5, 10, 15, 20] r = [2, 5] levels = [0.05, 0.01] #levels = [0.05] #wrong_numbers = [0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2, 2.25, 2.5] wrong_numbers = [1] for wn in wrong_numbers: self.wrong_number = wn print "Wrong number = ", self.wrong_number power = zeros((len(r), len(levels))) for ir in range(len(r)): for il in range(len(levels)): print "r=", r[ir], ", level=", levels[il] seed(1) for iR in range(R): try: self.run_stochastic_test( __file__, run_model, expected_results, r[ir], significance_level=levels[il], transformation=None) except: power[ir, il] = power[ir, il] + 1 print "Power: ", power[ir, il] / float(R) print power / float(R)