コード例 #1
0
            def run_model():
                storage.write_table(table_name = 'households', table_data = household_data)
                households = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='households')

                model = HouseholdTransitionModel()
                model.run(year=2000, household_set=households, control_totals=hct_set, characteristics=hc_set)
                income = households.get_attribute("income")
                age = households.get_attribute("age_of_head")
                idx1 = where(income <= 1000)[0]
                idx2 = where(logical_and(income <= 5000, income > 1000))[0]
                idx3 = where(logical_and(income <= 10000, income > 5000))[0]
                idx4 = where(income > 10000)[0]
                results = array([age[idx1].mean(), age[idx2].mean(), age[idx3].mean(), age[idx4].mean()])
                print results
                return results
コード例 #2
0
            def run_model():
                storage.write_table(table_name='households',
                                    table_data=household_data)
                households = HouseholdCharacteristicDataset(
                    in_storage=storage, in_table_name='households')

                model = HouseholdTransitionModel()
                model.run(year=2000,
                          household_set=households,
                          control_totals=hct_set,
                          characteristics=hc_set)
                income = households.get_attribute("income")
                age = households.get_attribute("age_of_head")
                idx1 = where(income <= 1000)[0]
                idx2 = where(logical_and(income <= 5000, income > 1000))[0]
                idx3 = where(logical_and(income <= 10000, income > 5000))[0]
                idx4 = where(income > 10000)[0]
                results = array([
                    age[idx1].mean(), age[idx2].mean(), age[idx3].mean(),
                    age[idx4].mean()
                ])
                print results
                return results
コード例 #3
0
    def test_controlling_age_of_head(self):
        """ Controls for one marginal characteristics, namely age_of_head.
        """
        annual_household_control_totals_data = {
            "year": array([2000, 2000, 2000, 2001, 2001, 2001, 2002, 2002, 2002]),
            "age_of_head": array([0,1,2,0,1,2, 0,1,2]),
            "total_number_of_households": array([25013, 21513, 18227,  # 2000
                                                 10055, 15003, 17999, # 2001
                                                 15678, 14001, 20432]) # 2002
            }

        household_characteristics_for_ht_data = {
            "characteristic": array(3*['age_of_head']),
            "min": array([0, 35, 65]),
            "max": array([34, 64, -1])
            }

        households_data = {
            "household_id":arange(15000)+1,
            "building_id": array(15000*[1]),
            "age_of_head": array(1000*[25] + 1000*[28] + 2000*[32] + 1000*[34] +
                            2000*[35] + 1000*[40] + 1000*[54]+ 1000*[62] +
                            1000*[65] + 1000*[68] + 2000*[71] + 1000*[98]),
            "persons": array(1000*[2] + 2000*[3] + 1000*[1] + 1000*[6] + 1000*[1] + 1000*[4] +
                                3000*[1]+ 5000*[5], dtype=int8)
            }
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='hh_set', table_data=households_data)
        hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set')

        storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data)
        hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what='household',
                                      id_name=['year' ,'age_of_head'])

        storage.write_table(table_name='hc_set', table_data=household_characteristics_for_ht_data)
        hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set')

        storage.write_table(table_name='prs_set', table_data=self.person_data)
        prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set')
        
        model = HouseholdTransitionModel(debuglevel=3)
        # this run should add households in all four categories
        model.run(year=2000, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)

        results = hh_set.size()
        should_be = [(hct_set.get_attribute("total_number_of_households")[0:3]).sum()]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        results = zeros(hc_set.size(), dtype=int32)
        results[0] = where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[0], 1,0).sum()
        for i in range(1, hc_set.size()-1):
            results[i] = logical_and(where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[i], 1,0),
                                 where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[i], 1,0)).sum()
        results[hc_set.size()-1] = where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[hc_set.size()-1], 1,0).sum()
        should_be = hct_set.get_attribute("total_number_of_households")[0:3]
        self.assertEqual(ma.allclose(results, should_be, rtol=1e-6),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        # this run should remove households in all four categories
        model.run(year=2001, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)
        results = hh_set.size()
        should_be = [(hct_set.get_attribute("total_number_of_households")[3:6]).sum()]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        results = zeros(hc_set.size(), dtype=int32)
        results[0] = where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[0], 1,0).sum()
        for i in range(1, hc_set.size()-1):
            results[i] = logical_and(where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[i], 1,0),
                                 where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[i], 1,0)).sum()
        results[hc_set.size()-1] = where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[hc_set.size()-1], 1,0).sum()
        should_be = hct_set.get_attribute("total_number_of_households")[3:6]
        self.assertEqual(ma.allclose(results, should_be, rtol=1e-6),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        # this run should add and remove households
        model.run(year=2002, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)
        results = hh_set.size()
        should_be = [(hct_set.get_attribute("total_number_of_households")[6:9]).sum()]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        results = zeros(hc_set.size(), dtype=int32)
        results[0] = where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[0], 1,0).sum()
        for i in range(1, hc_set.size()-1):
            results[i] = logical_and(where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[i], 1,0),
                                 where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[i], 1,0)).sum()
        results[hc_set.size()-1] = where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[hc_set.size()-1], 1,0).sum()
        should_be = hct_set.get_attribute("total_number_of_households")[6:9]
        self.assertEqual(ma.allclose(results, should_be, rtol=1e-6),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))
コード例 #4
0
    def test_controlling_income(self):
        """ Controls for one marginal characteristics, namely income.
        """
        annual_household_control_totals_data = {
            "year": array([2000, 2000, 2000, 2000, 2001, 2001, 2001, 2001, 2002, 2002, 2002, 2002]),
            "income": array([0,1,2,3,0,1,2,3, 0,1,2,3]),
            "total_number_of_households": array([25013, 21513, 18227, 18493, # 2000
                                                 10055, 15003, 17999, 17654, # 2001
                                                 15678, 14001, 20432, 14500]) # 2002
            }

        household_characteristics_for_ht_data = {
            "characteristic": array(4*['income']),
            "min": array([0, 40000, 120000, 70000]), # category 120000 has index 3 and category 70000 has index 2 
            "max": array([39999, 69999, -1, 119999]) # (testing row invariance)
            }
        hc_sorted_index = array([0,1,3,2])
        households_data = {
            "household_id":arange(20000)+1,
            "building_id": array(19950*[1] + 50*[0]),
            "income": array(1000*[1000] + 1000*[10000] + 2000*[20000] + 1000*[35000] + 2000*[45000] +
                                1000*[50000] + 2000*[67000]+ 2000*[90000] + 1000*[100005] + 2000*[110003] +
                                1000*[120000] + 1000*[200000] + 2000*[500000] + 1000*[630000]),
            "persons": array(3000*[2] + 2000*[3] + 1000*[1] + 1000*[6] + 1000*[1] + 1000*[4] +
                                3000*[1]+ 8000*[5], dtype=int8)
            }
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='hh_set', table_data=households_data)
        hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set')

        storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data)
        hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what='household', id_name=['year' ,'income'])

        storage.write_table(table_name='hc_set', table_data=household_characteristics_for_ht_data)
        hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set')

        storage.write_table(table_name='prs_set', table_data=self.person_data)
        prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set')
        
        model = HouseholdTransitionModel(debuglevel=3)
        # this run should add households in all four categories
        model.run(year=2000, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)

        results = hh_set.size()
        should_be = [83246]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        results = zeros(hc_set.size(), dtype=int32)
        results[0] = where(hh_set.get_attribute('income') <= 
                                            hc_set.get_attribute("max")[hc_sorted_index[0]], 1,0).sum()
        for i in range(1, hc_set.size()-1):
            results[i] = logical_and(where(hh_set.get_attribute('income') >= 
                                           hc_set.get_attribute("min")[hc_sorted_index[i]], 1,0),
                                     where(hh_set.get_attribute('income') <= 
                                           hc_set.get_attribute("max")[hc_sorted_index[i]], 1,0)).sum()
        results[-1] = where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[-1]], 1,0).sum()
        should_be = hct_set.get_attribute("total_number_of_households")[0:4]
        self.assertEqual(ma.allclose(results, should_be, rtol=1e-6),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        # this run should remove households in all four categories
        model.run(year=2001, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)
        results = hh_set.size()
        should_be = [(hct_set.get_attribute("total_number_of_households")[4:8]).sum()]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        results = zeros(hc_set.size(), dtype=int32)
        results[0] = where(hh_set.get_attribute('income') <= 
                                            hc_set.get_attribute("max")[hc_sorted_index[0]], 1,0).sum()
        for i in range(1, hc_set.size()-1):
            results[i] = logical_and(where(hh_set.get_attribute('income') >= 
                                           hc_set.get_attribute("min")[hc_sorted_index[i]], 1,0),
                                     where(hh_set.get_attribute('income') <= 
                                           hc_set.get_attribute("max")[hc_sorted_index[i]], 1,0)).sum()
        results[-1] = where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[-1]], 1,0).sum()
        should_be = hct_set.get_attribute("total_number_of_households")[4:8]
        self.assertEqual(ma.allclose(results, should_be, rtol=1e-6),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        # this run should add and remove households
        model.run(year=2002, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)
        results = hh_set.size()
        should_be = [(hct_set.get_attribute("total_number_of_households")[8:13]).sum()]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        results = zeros(hc_set.size(), dtype=int32)
        results[0] = where(hh_set.get_attribute('income') <= hc_set.get_attribute("max")[hc_sorted_index[0]], 1,0).sum()
        for i in range(1, hc_set.size()-1):
            results[i] = logical_and(where(hh_set.get_attribute('income') >= 
                                           hc_set.get_attribute("min")[hc_sorted_index[i]], 1,0),
                                     where(hh_set.get_attribute('income') <= 
                                           hc_set.get_attribute("max")[hc_sorted_index[i]], 1,0)).sum()
        results[-1] = where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[-1]], 1,0).sum()
        should_be = hct_set.get_attribute("total_number_of_households")[8:13]
        self.assertEqual(ma.allclose(results, should_be, rtol=1e-6),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))
コード例 #5
0
        def xtest_power_HTM_controlling_with_marginal_characteristics(self):
            nhhs = 5000
            ngroups = 4
            nhhsg = int(nhhs/ngroups)
            nhhslg = nhhs-(ngroups-1)*nhhsg
            should_nhhs = nhhs-2000
            logger.be_quiet()
            household_data = {"age_of_head": array(nhhsg/2*[18]+(nhhsg-nhhsg/2)*[35] +
                                nhhsg/2*[30] + (nhhsg-nhhsg/2)*[40] + 
                                nhhsg/2*[38] + (nhhsg-nhhsg/2)*[65] + 
                                nhhslg/2*[50] + (nhhslg-nhhslg/2)*[80]),
                               "income": array(nhhsg*[500] + nhhsg*[2000] + 
                                       nhhsg*[7000] + nhhslg*[15000]),
                               "household_id":arange(nhhs)+1}
            household_characteristics_for_ht_data = {"characteristic": array(4*["income"]+4*["age_of_head"]), 
                                                      "min":array([0,1001,5001, 10001, 0, 31, 41, 61]), 
                                                      "max":array([1000, 5000, 10000,-1, 30, 40, 60, -1])}
            annual_household_control_totals_data = {"year":array([2000]),
                                                     "total_number_of_households":array([should_nhhs])}
            
            storage = StorageFactory().get_storage('dict_storage')
           
            storage.write_table(table_name = 'hc_set', table_data = household_characteristics_for_ht_data)
            hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set')

            storage.write_table(table_name = 'hct_set', table_data = annual_household_control_totals_data)
            hct_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hct_set')
            
            storage.write_table(table_name = 'households', table_data = household_data)
            households = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='households')

            income = households.get_attribute("income")
            age = households.get_attribute("age_of_head")
            idx1 = where(income <= 1000)[0]
            idx2 = where(logical_and(income <= 5000, income > 1000))[0]
            idx3 = where(logical_and(income <= 10000, income > 5000))[0]
            idx4 = where(income > 10000)[0]
            expected_results = array([age[idx1].mean(), age[idx2].mean(), age[idx3].mean(), age[idx4].mean()]) 
                  
            def run_model():
                storage.write_table(table_name = 'households', table_data = household_data)
                households = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='households')
            
                model = HouseholdTransitionModel()
                model.run(year=2000, household_set=households, control_totals=hct_set, characteristics=hc_set)
                income = households.get_attribute("income")
                age = households.get_attribute("age_of_head")
                idx1 = where(income <= 1000)[0]
                idx2 = where(logical_and(income <= 5000, income > 1000))[0]
                idx3 = where(logical_and(income <= 10000, income > 5000))[0]
                idx4 = where(income > 10000)[0]
                results = array([age[idx1].mean(), age[idx2].mean(), age[idx3].mean(), age[idx4].mean()])
                results[-1] = results[-1]+self.wrong_number
                #print results
                return results
            #print expected_results      
            R = 1000
            #r = [2, 5, 10, 50, 100, 1000]
            #r = [2, 5, 10, 15, 20]
            r=[2,5]
            levels = [0.05,  0.01]
            #levels = [0.05]
            #wrong_numbers = [0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2, 2.25, 2.5]
            wrong_numbers = [1]
            for wn in wrong_numbers:
                self.wrong_number = wn
                print "Wrong number = ", self.wrong_number
                power = zeros((len(r), len(levels)))            
                for ir in range(len(r)):
                    for il in range(len(levels)):
                        print "r=", r[ir],", level=",levels[il]
                        seed(1)
                        for iR in range(R):                  
                            try:
                                self.run_stochastic_test(__file__, run_model, expected_results, 
                                                         r[ir], significance_level=levels[il], transformation=None)
                            except:
                                power[ir,il]=power[ir,il]+1
                        print "Power: ",power[ir,il]/float(R)
                print power/float(R)                                                 
コード例 #6
0
        def xtest_power_HTM_controlling_with_marginal_characteristics(self):
            nhhs = 5000
            ngroups = 4
            nhhsg = int(nhhs / ngroups)
            nhhslg = nhhs - (ngroups - 1) * nhhsg
            should_nhhs = nhhs - 2000
            logger.be_quiet()
            household_data = {
                "age_of_head":
                array(nhhsg / 2 * [18] + (nhhsg - nhhsg / 2) * [35] +
                      nhhsg / 2 * [30] + (nhhsg - nhhsg / 2) * [40] +
                      nhhsg / 2 * [38] + (nhhsg - nhhsg / 2) * [65] +
                      nhhslg / 2 * [50] + (nhhslg - nhhslg / 2) * [80]),
                "income":
                array(nhhsg * [500] + nhhsg * [2000] + nhhsg * [7000] +
                      nhhslg * [15000]),
                "household_id":
                arange(nhhs) + 1
            }
            household_characteristics_for_ht_data = {
                "characteristic": array(4 * ["income"] + 4 * ["age_of_head"]),
                "min": array([0, 1001, 5001, 10001, 0, 31, 41, 61]),
                "max": array([1000, 5000, 10000, -1, 30, 40, 60, -1])
            }
            annual_household_control_totals_data = {
                "year": array([2000]),
                "total_number_of_households": array([should_nhhs])
            }

            storage = StorageFactory().get_storage('dict_storage')

            storage.write_table(
                table_name='hc_set',
                table_data=household_characteristics_for_ht_data)
            hc_set = HouseholdCharacteristicDataset(in_storage=storage,
                                                    in_table_name='hc_set')

            storage.write_table(
                table_name='hct_set',
                table_data=annual_household_control_totals_data)
            hct_set = HouseholdCharacteristicDataset(in_storage=storage,
                                                     in_table_name='hct_set')

            storage.write_table(table_name='households',
                                table_data=household_data)
            households = HouseholdCharacteristicDataset(
                in_storage=storage, in_table_name='households')

            income = households.get_attribute("income")
            age = households.get_attribute("age_of_head")
            idx1 = where(income <= 1000)[0]
            idx2 = where(logical_and(income <= 5000, income > 1000))[0]
            idx3 = where(logical_and(income <= 10000, income > 5000))[0]
            idx4 = where(income > 10000)[0]
            expected_results = array([
                age[idx1].mean(), age[idx2].mean(), age[idx3].mean(),
                age[idx4].mean()
            ])

            def run_model():
                storage.write_table(table_name='households',
                                    table_data=household_data)
                households = HouseholdCharacteristicDataset(
                    in_storage=storage, in_table_name='households')

                model = HouseholdTransitionModel()
                model.run(year=2000,
                          household_set=households,
                          control_totals=hct_set,
                          characteristics=hc_set)
                income = households.get_attribute("income")
                age = households.get_attribute("age_of_head")
                idx1 = where(income <= 1000)[0]
                idx2 = where(logical_and(income <= 5000, income > 1000))[0]
                idx3 = where(logical_and(income <= 10000, income > 5000))[0]
                idx4 = where(income > 10000)[0]
                results = array([
                    age[idx1].mean(), age[idx2].mean(), age[idx3].mean(),
                    age[idx4].mean()
                ])
                results[-1] = results[-1] + self.wrong_number
                #print results
                return results

            #print expected_results
            R = 1000
            #r = [2, 5, 10, 50, 100, 1000]
            #r = [2, 5, 10, 15, 20]
            r = [2, 5]
            levels = [0.05, 0.01]
            #levels = [0.05]
            #wrong_numbers = [0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2, 2.25, 2.5]
            wrong_numbers = [1]
            for wn in wrong_numbers:
                self.wrong_number = wn
                print "Wrong number = ", self.wrong_number
                power = zeros((len(r), len(levels)))
                for ir in range(len(r)):
                    for il in range(len(levels)):
                        print "r=", r[ir], ", level=", levels[il]
                        seed(1)
                        for iR in range(R):
                            try:
                                self.run_stochastic_test(
                                    __file__,
                                    run_model,
                                    expected_results,
                                    r[ir],
                                    significance_level=levels[il],
                                    transformation=None)
                            except:
                                power[ir, il] = power[ir, il] + 1
                        print "Power: ", power[ir, il] / float(R)
                print power / float(R)