def run_model(): storage.write_table(table_name = 'households', table_data = household_data) households = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='households') model = HouseholdTransitionModel() model.run(year=2000, household_set=households, control_totals=hct_set, characteristics=hc_set) income = households.get_attribute("income") age = households.get_attribute("age_of_head") idx1 = where(income <= 1000)[0] idx2 = where(logical_and(income <= 5000, income > 1000))[0] idx3 = where(logical_and(income <= 10000, income > 5000))[0] idx4 = where(income > 10000)[0] results = array([age[idx1].mean(), age[idx2].mean(), age[idx3].mean(), age[idx4].mean()]) print results return results
def run_model(): storage.write_table(table_name='households', table_data=household_data) households = HouseholdCharacteristicDataset( in_storage=storage, in_table_name='households') model = HouseholdTransitionModel() model.run(year=2000, household_set=households, control_totals=hct_set, characteristics=hc_set) income = households.get_attribute("income") age = households.get_attribute("age_of_head") idx1 = where(income <= 1000)[0] idx2 = where(logical_and(income <= 5000, income > 1000))[0] idx3 = where(logical_and(income <= 10000, income > 5000))[0] idx4 = where(income > 10000)[0] results = array([ age[idx1].mean(), age[idx2].mean(), age[idx3].mean(), age[idx4].mean() ]) print results return results
def test_controlling_age_of_head(self): """ Controls for one marginal characteristics, namely age_of_head. """ annual_household_control_totals_data = { "year": array([2000, 2000, 2000, 2001, 2001, 2001, 2002, 2002, 2002]), "age_of_head": array([0,1,2,0,1,2, 0,1,2]), "total_number_of_households": array([25013, 21513, 18227, # 2000 10055, 15003, 17999, # 2001 15678, 14001, 20432]) # 2002 } household_characteristics_for_ht_data = { "characteristic": array(3*['age_of_head']), "min": array([0, 35, 65]), "max": array([34, 64, -1]) } households_data = { "household_id":arange(15000)+1, "building_id": array(15000*[1]), "age_of_head": array(1000*[25] + 1000*[28] + 2000*[32] + 1000*[34] + 2000*[35] + 1000*[40] + 1000*[54]+ 1000*[62] + 1000*[65] + 1000*[68] + 2000*[71] + 1000*[98]), "persons": array(1000*[2] + 2000*[3] + 1000*[1] + 1000*[6] + 1000*[1] + 1000*[4] + 3000*[1]+ 5000*[5], dtype=int8) } storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='hh_set', table_data=households_data) hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set') storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data) hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what='household', id_name=['year' ,'age_of_head']) storage.write_table(table_name='hc_set', table_data=household_characteristics_for_ht_data) hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set') storage.write_table(table_name='prs_set', table_data=self.person_data) prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set') model = HouseholdTransitionModel(debuglevel=3) # this run should add households in all four categories model.run(year=2000, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) results = hh_set.size() should_be = [(hct_set.get_attribute("total_number_of_households")[0:3]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) results = zeros(hc_set.size(), dtype=int32) results[0] = where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[0], 1,0).sum() for i in range(1, hc_set.size()-1): results[i] = logical_and(where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[i], 1,0), where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[i], 1,0)).sum() results[hc_set.size()-1] = where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[hc_set.size()-1], 1,0).sum() should_be = hct_set.get_attribute("total_number_of_households")[0:3] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results)) # this run should remove households in all four categories model.run(year=2001, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) results = hh_set.size() should_be = [(hct_set.get_attribute("total_number_of_households")[3:6]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) results = zeros(hc_set.size(), dtype=int32) results[0] = where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[0], 1,0).sum() for i in range(1, hc_set.size()-1): results[i] = logical_and(where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[i], 1,0), where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[i], 1,0)).sum() results[hc_set.size()-1] = where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[hc_set.size()-1], 1,0).sum() should_be = hct_set.get_attribute("total_number_of_households")[3:6] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results)) # this run should add and remove households model.run(year=2002, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) results = hh_set.size() should_be = [(hct_set.get_attribute("total_number_of_households")[6:9]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) results = zeros(hc_set.size(), dtype=int32) results[0] = where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[0], 1,0).sum() for i in range(1, hc_set.size()-1): results[i] = logical_and(where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[i], 1,0), where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[i], 1,0)).sum() results[hc_set.size()-1] = where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[hc_set.size()-1], 1,0).sum() should_be = hct_set.get_attribute("total_number_of_households")[6:9] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results))
def test_controlling_income(self): """ Controls for one marginal characteristics, namely income. """ annual_household_control_totals_data = { "year": array([2000, 2000, 2000, 2000, 2001, 2001, 2001, 2001, 2002, 2002, 2002, 2002]), "income": array([0,1,2,3,0,1,2,3, 0,1,2,3]), "total_number_of_households": array([25013, 21513, 18227, 18493, # 2000 10055, 15003, 17999, 17654, # 2001 15678, 14001, 20432, 14500]) # 2002 } household_characteristics_for_ht_data = { "characteristic": array(4*['income']), "min": array([0, 40000, 120000, 70000]), # category 120000 has index 3 and category 70000 has index 2 "max": array([39999, 69999, -1, 119999]) # (testing row invariance) } hc_sorted_index = array([0,1,3,2]) households_data = { "household_id":arange(20000)+1, "building_id": array(19950*[1] + 50*[0]), "income": array(1000*[1000] + 1000*[10000] + 2000*[20000] + 1000*[35000] + 2000*[45000] + 1000*[50000] + 2000*[67000]+ 2000*[90000] + 1000*[100005] + 2000*[110003] + 1000*[120000] + 1000*[200000] + 2000*[500000] + 1000*[630000]), "persons": array(3000*[2] + 2000*[3] + 1000*[1] + 1000*[6] + 1000*[1] + 1000*[4] + 3000*[1]+ 8000*[5], dtype=int8) } storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='hh_set', table_data=households_data) hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set') storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data) hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what='household', id_name=['year' ,'income']) storage.write_table(table_name='hc_set', table_data=household_characteristics_for_ht_data) hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set') storage.write_table(table_name='prs_set', table_data=self.person_data) prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set') model = HouseholdTransitionModel(debuglevel=3) # this run should add households in all four categories model.run(year=2000, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) results = hh_set.size() should_be = [83246] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) results = zeros(hc_set.size(), dtype=int32) results[0] = where(hh_set.get_attribute('income') <= hc_set.get_attribute("max")[hc_sorted_index[0]], 1,0).sum() for i in range(1, hc_set.size()-1): results[i] = logical_and(where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[i]], 1,0), where(hh_set.get_attribute('income') <= hc_set.get_attribute("max")[hc_sorted_index[i]], 1,0)).sum() results[-1] = where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[-1]], 1,0).sum() should_be = hct_set.get_attribute("total_number_of_households")[0:4] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results)) # this run should remove households in all four categories model.run(year=2001, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) results = hh_set.size() should_be = [(hct_set.get_attribute("total_number_of_households")[4:8]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) results = zeros(hc_set.size(), dtype=int32) results[0] = where(hh_set.get_attribute('income') <= hc_set.get_attribute("max")[hc_sorted_index[0]], 1,0).sum() for i in range(1, hc_set.size()-1): results[i] = logical_and(where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[i]], 1,0), where(hh_set.get_attribute('income') <= hc_set.get_attribute("max")[hc_sorted_index[i]], 1,0)).sum() results[-1] = where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[-1]], 1,0).sum() should_be = hct_set.get_attribute("total_number_of_households")[4:8] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results)) # this run should add and remove households model.run(year=2002, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) results = hh_set.size() should_be = [(hct_set.get_attribute("total_number_of_households")[8:13]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) results = zeros(hc_set.size(), dtype=int32) results[0] = where(hh_set.get_attribute('income') <= hc_set.get_attribute("max")[hc_sorted_index[0]], 1,0).sum() for i in range(1, hc_set.size()-1): results[i] = logical_and(where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[i]], 1,0), where(hh_set.get_attribute('income') <= hc_set.get_attribute("max")[hc_sorted_index[i]], 1,0)).sum() results[-1] = where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[-1]], 1,0).sum() should_be = hct_set.get_attribute("total_number_of_households")[8:13] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results))
def xtest_power_HTM_controlling_with_marginal_characteristics(self): nhhs = 5000 ngroups = 4 nhhsg = int(nhhs/ngroups) nhhslg = nhhs-(ngroups-1)*nhhsg should_nhhs = nhhs-2000 logger.be_quiet() household_data = {"age_of_head": array(nhhsg/2*[18]+(nhhsg-nhhsg/2)*[35] + nhhsg/2*[30] + (nhhsg-nhhsg/2)*[40] + nhhsg/2*[38] + (nhhsg-nhhsg/2)*[65] + nhhslg/2*[50] + (nhhslg-nhhslg/2)*[80]), "income": array(nhhsg*[500] + nhhsg*[2000] + nhhsg*[7000] + nhhslg*[15000]), "household_id":arange(nhhs)+1} household_characteristics_for_ht_data = {"characteristic": array(4*["income"]+4*["age_of_head"]), "min":array([0,1001,5001, 10001, 0, 31, 41, 61]), "max":array([1000, 5000, 10000,-1, 30, 40, 60, -1])} annual_household_control_totals_data = {"year":array([2000]), "total_number_of_households":array([should_nhhs])} storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name = 'hc_set', table_data = household_characteristics_for_ht_data) hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set') storage.write_table(table_name = 'hct_set', table_data = annual_household_control_totals_data) hct_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hct_set') storage.write_table(table_name = 'households', table_data = household_data) households = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='households') income = households.get_attribute("income") age = households.get_attribute("age_of_head") idx1 = where(income <= 1000)[0] idx2 = where(logical_and(income <= 5000, income > 1000))[0] idx3 = where(logical_and(income <= 10000, income > 5000))[0] idx4 = where(income > 10000)[0] expected_results = array([age[idx1].mean(), age[idx2].mean(), age[idx3].mean(), age[idx4].mean()]) def run_model(): storage.write_table(table_name = 'households', table_data = household_data) households = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='households') model = HouseholdTransitionModel() model.run(year=2000, household_set=households, control_totals=hct_set, characteristics=hc_set) income = households.get_attribute("income") age = households.get_attribute("age_of_head") idx1 = where(income <= 1000)[0] idx2 = where(logical_and(income <= 5000, income > 1000))[0] idx3 = where(logical_and(income <= 10000, income > 5000))[0] idx4 = where(income > 10000)[0] results = array([age[idx1].mean(), age[idx2].mean(), age[idx3].mean(), age[idx4].mean()]) results[-1] = results[-1]+self.wrong_number #print results return results #print expected_results R = 1000 #r = [2, 5, 10, 50, 100, 1000] #r = [2, 5, 10, 15, 20] r=[2,5] levels = [0.05, 0.01] #levels = [0.05] #wrong_numbers = [0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2, 2.25, 2.5] wrong_numbers = [1] for wn in wrong_numbers: self.wrong_number = wn print "Wrong number = ", self.wrong_number power = zeros((len(r), len(levels))) for ir in range(len(r)): for il in range(len(levels)): print "r=", r[ir],", level=",levels[il] seed(1) for iR in range(R): try: self.run_stochastic_test(__file__, run_model, expected_results, r[ir], significance_level=levels[il], transformation=None) except: power[ir,il]=power[ir,il]+1 print "Power: ",power[ir,il]/float(R) print power/float(R)
def xtest_power_HTM_controlling_with_marginal_characteristics(self): nhhs = 5000 ngroups = 4 nhhsg = int(nhhs / ngroups) nhhslg = nhhs - (ngroups - 1) * nhhsg should_nhhs = nhhs - 2000 logger.be_quiet() household_data = { "age_of_head": array(nhhsg / 2 * [18] + (nhhsg - nhhsg / 2) * [35] + nhhsg / 2 * [30] + (nhhsg - nhhsg / 2) * [40] + nhhsg / 2 * [38] + (nhhsg - nhhsg / 2) * [65] + nhhslg / 2 * [50] + (nhhslg - nhhslg / 2) * [80]), "income": array(nhhsg * [500] + nhhsg * [2000] + nhhsg * [7000] + nhhslg * [15000]), "household_id": arange(nhhs) + 1 } household_characteristics_for_ht_data = { "characteristic": array(4 * ["income"] + 4 * ["age_of_head"]), "min": array([0, 1001, 5001, 10001, 0, 31, 41, 61]), "max": array([1000, 5000, 10000, -1, 30, 40, 60, -1]) } annual_household_control_totals_data = { "year": array([2000]), "total_number_of_households": array([should_nhhs]) } storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='hc_set', table_data=household_characteristics_for_ht_data) hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set') storage.write_table( table_name='hct_set', table_data=annual_household_control_totals_data) hct_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hct_set') storage.write_table(table_name='households', table_data=household_data) households = HouseholdCharacteristicDataset( in_storage=storage, in_table_name='households') income = households.get_attribute("income") age = households.get_attribute("age_of_head") idx1 = where(income <= 1000)[0] idx2 = where(logical_and(income <= 5000, income > 1000))[0] idx3 = where(logical_and(income <= 10000, income > 5000))[0] idx4 = where(income > 10000)[0] expected_results = array([ age[idx1].mean(), age[idx2].mean(), age[idx3].mean(), age[idx4].mean() ]) def run_model(): storage.write_table(table_name='households', table_data=household_data) households = HouseholdCharacteristicDataset( in_storage=storage, in_table_name='households') model = HouseholdTransitionModel() model.run(year=2000, household_set=households, control_totals=hct_set, characteristics=hc_set) income = households.get_attribute("income") age = households.get_attribute("age_of_head") idx1 = where(income <= 1000)[0] idx2 = where(logical_and(income <= 5000, income > 1000))[0] idx3 = where(logical_and(income <= 10000, income > 5000))[0] idx4 = where(income > 10000)[0] results = array([ age[idx1].mean(), age[idx2].mean(), age[idx3].mean(), age[idx4].mean() ]) results[-1] = results[-1] + self.wrong_number #print results return results #print expected_results R = 1000 #r = [2, 5, 10, 50, 100, 1000] #r = [2, 5, 10, 15, 20] r = [2, 5] levels = [0.05, 0.01] #levels = [0.05] #wrong_numbers = [0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2, 2.25, 2.5] wrong_numbers = [1] for wn in wrong_numbers: self.wrong_number = wn print "Wrong number = ", self.wrong_number power = zeros((len(r), len(levels))) for ir in range(len(r)): for il in range(len(levels)): print "r=", r[ir], ", level=", levels[il] seed(1) for iR in range(R): try: self.run_stochastic_test( __file__, run_model, expected_results, r[ir], significance_level=levels[il], transformation=None) except: power[ir, il] = power[ir, il] + 1 print "Power: ", power[ir, il] / float(R) print power / float(R)