def prepare_for_run(self, storage, in_table_name, id_name, **kwargs):
     from urbansim.datasets.control_total_dataset import ControlTotalDataset
     control_totals = ControlTotalDataset(in_storage=storage,
                                          in_table_name=in_table_name,
                                          id_name=id_name)
     #        sample_control_totals(storage, control_totals, **kwargs)
     return control_totals
    def test_accounting_attribute(self):
        """
        """
        annual_employment_control_totals_data = {
            "year":           array([2000,   2000,  2000,  2001]),
            "sector_id":      array([    1,     2,     3,     2]),
            "number_of_jobs": array([25013,  1513,  5000, 10055])
            }


        business_data = {
            "business_id":arange(1500)+1,
            "grid_id": array(1500*[1]),
            "sector_id": array(500*[1] +
                               500*[2] + 
                               500*[3]),
            "jobs":      array(500*[10] + 
                               500*[10] +
                               500*[10]),
                            
            }
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='bs_set', table_data=business_data)
        bs_set = BusinessDataset(in_storage=storage, in_table_name='bs_set')

        storage.write_table(table_name='ect_set', table_data=annual_employment_control_totals_data)
        ect_set = ControlTotalDataset(in_storage=storage, in_table_name='ect_set', what='',
                                      id_name=[])

        model = TransitionModel(bs_set, dataset_accounting_attribute='jobs', control_total_dataset=ect_set)
        model.run(year=2000, target_attribute_name="number_of_jobs", reset_dataset_attribute_value={'grid_id':-1})

        results = bs_set.get_attribute('jobs').sum()
        should_be = [(ect_set.get_attribute("number_of_jobs")[0:3]).sum()]
        self.assertEqual(ma.allclose(should_be, results, rtol=10),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))
        
        cats = 3
        results = zeros(cats, dtype=int32)
        for i in range(0, cats):
            results[i] = ( bs_set.get_attribute('jobs')*(bs_set.get_attribute('sector_id') == ect_set.get_attribute("sector_id")[i])).sum()
        should_be = ect_set.get_attribute("number_of_jobs")[0:3]
        self.assertEqual(ma.allclose(results, should_be, rtol=10),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))
 def prepare_for_run(self, storage, **kwargs):
     from urbansim.datasets.control_total_dataset import ControlTotalDataset
     from urbansim.datasets.household_characteristic_dataset import HouseholdCharacteristicDataset
     from urbansim.models.employment_transition_model import sample_control_totals
     control_totals = ControlTotalDataset(in_storage=storage,
                                          what="household")
     characteristics = HouseholdCharacteristicDataset(in_storage=storage)
     sample_control_totals(storage, control_totals, **kwargs)
     return (control_totals, characteristics)
Exemple #4
0
    def test_same_distribution_after_job_addition(self):
        """Add 1,750 new jobs of sector 1 without specifying a distribution across gridcells (so it is assumed equal)
        Test that the total number of jobs in each sector after the addition matches the totals specified
        in annual_employment_control_totals.
        Ensure that the number of unplaced jobs after the addition is exactly 1,750 because this model
        is not responsible for placing jobs, only for creating them.
        NOTE: unplaced jobs are indicated by grid_id <= 0
        """
        storage = StorageFactory().get_storage('dict_storage')

        jobs_set_table_name = 'jobs_set'
        storage.write_table(
            table_name=jobs_set_table_name,
            table_data=self.jobs_data,
        )
        jobs_set = JobDataset(in_storage=storage,
                              in_table_name=jobs_set_table_name)

        annual_employment_control_totals_data = self.annual_employment_control_totals_data
        annual_employment_control_totals_data[
            "total_non_home_based_employment"] = array(
                [5750, 1400, 4000, 1600])

        ect_set_table_name = 'ect_set'
        storage.write_table(
            table_name=ect_set_table_name,
            table_data=annual_employment_control_totals_data,
        )
        ect_set = ControlTotalDataset(in_storage=storage,
                                      in_table_name=ect_set_table_name,
                                      what="employment")

        # run model
        model = RegionalEmploymentTransitionModel()
        model.run(year=2000,
                  job_set=jobs_set,
                  control_totals=ect_set,
                  job_building_types=self.job_building_types)

        #check that there are indeed 14750 total jobs after running the model
        areas = jobs_set.get_attribute("large_area_id")
        results = array([0, 0])
        for iarea in [0, 1]:
            results[iarea] = where(areas == [1, 2][iarea])[0].size
        should_be = [8150, 7600]
        self.assertEqual(
            ma.allequal(should_be, results), True,
            "Error, should_be: %s, but result: %s" % (should_be, results))

        #check that total #jobs within each sector are close to what was set in the control_totals
        results = self.get_count_all_sectors_and_areas(jobs_set)
        should_be = [5750, 1400, 1000, 4000, 1600, 2000]
        self.assertEqual(
            ma.allclose(results, should_be, rtol=0.00001), True,
            "Error, should_be: %s, but result: %s" % (should_be, results))
Exemple #5
0
    def test_controlling_with_one_marginal_characteristic(self):
        """Using the age_of_head as a marginal characteristic, which would partition the 8 groups into two larger groups
        (those with age_of_head < 40 and >= 40), ensure that the control totals are met and that the distribution within
        each large group is the same before and after running the model
        """

        #IMPORTANT: marginal characteristics grouping indices have to start at 0!
        #i.e. below, there is one marg. char. "age_of_head". here we indicate that the first "large group" (groups 1-4),
        #consisting of those groups with age_of_head < 40 should total 25000 households after running this model for one year,
        #and the second large group, those groups with age_of_head > 40, should total 15000 households
        annual_household_control_totals_data = {
            "year": array([2000, 2000]),
            "age_of_head": array([0,1]),
            "total_number_of_households": array([25000, 15000])
            }

        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='hh_set', table_data=self.households_data)
        hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set')

        storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data)
        hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what='household', id_name=['year' ,'age_of_head'])

        storage.write_table(table_name='hc_set', table_data=self.household_characteristics_for_ht_data)
        hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set')

        storage.write_table(table_name='prs_set', table_data=self.person_data)
        prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set')
        
        model = HouseholdTransitionModel()
        model.run(year=2000, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)

        #check that there are indeed 40000 total households after running the model
        results = hh_set.size()
        should_be = [40000]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        #check that the total number of households within first four groups increased by 10000
        #and that the total number of households within last four groups decreased by 3000
        results = self.get_count_all_groups(hh_set)
        should_be = [25000, 15000]
        self.assertEqual(ma.allclose([sum(results[0:4]), sum(results[4:8])], should_be, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        #check that the distribution of households within groups 1-4 and 5-8 are the same before and after
        #running the model, respectively

        should_be = [6000.0/15000.0*25000.0, 2000.0/15000.0*25000.0, 3000.0/15000.0*25000.0, 4000.0/15000.0*25000.0,
                     2000.0/18000.0*15000.0, 5000.0/18000.0*15000.0, 3000.0/18000.0*15000.0, 8000.0/18000.0*15000.0]
        self.assertEqual(ma.allclose(results, should_be, rtol=0.05),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))
    def test_unplaced_jobs_after_job_addition(self):
        """The initial jobs table is now adjusted to include 2000 unplaced jobs.
        Add 1,750 new jobs and ensure that the number of unplaced jobs after the addition
        is exactly 3,750 because this model is not responsible for placing jobs, only for creating them.
        """
        # create and populate jobs table for model input
        add_jobs_data = {
            "job_id": arange(13001, 15001),
            "grid_id": array(2000 * [0]),
            "sector_id": array(2000 * [1]),
            "building_type": array(2000 * [Constants._industrial_code])
        }
        annual_employment_control_totals_data = self.annual_employment_control_totals_data
        annual_employment_control_totals_data[
            "total_non_home_based_employment"] = array([10750, 3000])

        storage = StorageFactory().get_storage('dict_storage')

        jobs_set_table_name = 'jobs_set'
        storage.write_table(table_name=jobs_set_table_name,
                            table_data=self.jobs_data)
        jobs_set = JobDataset(in_storage=storage,
                              in_table_name=jobs_set_table_name)

        ect_set_table_name = 'ect_set'
        storage.write_table(
            table_name=ect_set_table_name,
            table_data=annual_employment_control_totals_data,
        )
        ect_set = ControlTotalDataset(in_storage=storage,
                                      in_table_name=ect_set_table_name,
                                      what="employment")

        jobs_set.add_elements(add_jobs_data)

        # run model with input databases
        model = EmploymentTransitionModel()
        model.run(year=2000,
                  job_set=jobs_set,
                  control_totals=ect_set,
                  job_building_types=self.job_building_types)

        #check that there are indeed 16750 total jobs after running the model
        results = jobs_set.size()
        should_be = [16750]
        self.assertEqual(ma.allequal(should_be, results), True, "Error")

        #check that the number of unplaced jobs is the number of new jobs created + number of unplaced jobs before running model
        results = where(jobs_set.get_attribute("grid_id") <= 0)[0].size
        should_be = [3750.0]

        self.assertEqual(ma.allclose(results, should_be, rtol=0.00001), True)
Exemple #7
0
    def test_controlling_with_three_marginal_characteristics(self):
        """Controlling with all three possible marginal characteristics in this example, age_of_head, income, and persons,
        this would partition the 8 groups into the same 8 groups, and with a control total specified for each group, we must
        ensure that the control totals for each group exactly meet the specifications.
        """

        #IMPORTANT: marginal characteristics grouping indices have to start at 0!
        annual_household_control_totals_data = {
            "year": array(8*[2000]),
            "age_of_head": array(4*[0] + 4*[1]),
            "income": array(2*[0] + 2*[1] + 2*[0] + 2*[1]),
            "persons": array([0,1,0,1,0,1,0,1]),
            "total_number_of_households": array([4000, 5000, 1000, 3000, 0, 6000, 3000, 8000])
            }
        #size of columns was not even, removed last element of min and max
        household_characteristics_for_ht_data = {
            "characteristic": array(2*['age_of_head'] + 2*['income'] + 2*['persons']),
            "min": array([0, 50, 0, 40000, 0, 3]),
            "max": array([49, 100, 39999, -1, 2, -1]) 
            }
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='hh_set', table_data=self.households_data)
        hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set')

        storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data)
        hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what='household', id_name=['year' ,'age_of_head', 'income', 'persons'])

        storage.write_table(table_name='hc_set', table_data=household_characteristics_for_ht_data)
        hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set')

        # unplace some households
        where10 = where(hh_set.get_attribute("building_id")<>10)[0]
        hh_set.modify_attribute(name="building_id", data=zeros(where10.size), index=where10)

        storage.write_table(table_name='prs_set', table_data=self.person_data)
        prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set')
        
        model = HouseholdTransitionModel()
        model.run(year=2000, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)

        #check that there are indeed 33000 total households after running the model
        results = hh_set.size()
        should_be = [30000]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        #check that the number of households in each group exactly match the control totals specified
        results = self.get_count_all_groups(hh_set)
        should_be = [4000, 5000, 1000, 3000, 0, 6000, 3000, 8000]
        self.assertEqual(ma.allclose(results, should_be),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))
    def test_same_distribution_after_household_subtraction(self):
        """Using the control_totals and no marginal characteristics,
        subtract households and ensure that the distribution within each group stays the same
        """
        annual_household_control_totals_data = {
            "year": array([2000]),
            "total_number_of_households": array([20000])
        }

        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='hh_set',
                            table_data=self.households_data)
        hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set')

        storage.write_table(table_name='hct_set',
                            table_data=annual_household_control_totals_data)
        hct_set = ControlTotalDataset(in_storage=storage,
                                      in_table_name='hct_set',
                                      what="household",
                                      id_name="year")

        storage.write_table(
            table_name='hc_set',
            table_data=self.household_characteristics_for_ht_data)
        hc_set = HouseholdCharacteristicDataset(in_storage=storage,
                                                in_table_name='hc_set')

        model = HouseholdTransitionModel()
        model.run(year=2000,
                  household_set=hh_set,
                  control_totals=hct_set,
                  characteristics=hc_set)

        #check that there are indeed 20000 total households after running the model
        results = hh_set.size()
        should_be = [20000]
        self.assertEqual(
            ma.allclose(should_be, results, rtol=1e-1), True,
            "Error, should_be: %s, but result: %s" % (should_be, results))

        #check that the distribution of households in each group is the same as before running the model
        results = self.get_count_all_groups(hh_set)
        should_be = [
            6000.0 / 33000.0 * 20000.0, 2000.0 / 33000.0 * 20000.0,
            3000.0 / 33000.0 * 20000.0, 4000.0 / 33000.0 * 20000.0,
            2000.0 / 33000.0 * 20000.0, 5000.0 / 33000.0 * 20000.0,
            3000.0 / 33000.0 * 20000.0, 8000.0 / 33000.0 * 20000.0
        ]
        self.assertEqual(
            ma.allclose(results, should_be, rtol=0.05), True,
            "Error, should_be: %s,\n but result: %s" % (should_be, results))
Exemple #9
0
    def test_same_distribution_after_household_addition(self):
        """Using the control_totals and no marginal characteristics,
        add households and ensure that the distribution within each group stays the same
        """

        annual_household_control_totals_data = {
            "year": array([2000]),
            "total_number_of_households": array([50000])
            }

        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='hh_set', table_data=self.households_data)
        hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set')

        storage.write_table(table_name='prs_set', table_data=self.person_data)
        prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set')
        
        storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data)
        hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what="household", id_name="year")

        storage.write_table(table_name='hc_set', table_data=self.household_characteristics_for_ht_data)
        hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set')

        model = HouseholdTransitionModel()
        model.run(year=2000, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)

        #check that there are indeed 50000 total households after running the model
        results = hh_set.size()
        should_be = [50000]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        #check that the number of unplaced households is exactly the number of new households created
        results = where(hh_set.get_attribute("building_id")<=0)[0].size
        should_be = [17000]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        #check that the distribution of households in each group is the same as before running the model
        results = self.get_count_all_groups(hh_set)
        should_be = array([6000.0/33000.0*50000.0, 2000.0/33000.0*50000.0, 3000.0/33000.0*50000.0, 4000.0/33000.0*50000.0,
                     2000.0/33000.0*50000.0, 5000.0/33000.0*50000.0, 3000.0/33000.0*50000.0, 8000.0/33000.0*50000.0])
        self.assertEqual(ma.allclose(results, should_be, rtol=0.05),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))
        # check the types of the attributes
        self.assertEqual(hh_set.get_attribute("age_of_head").dtype, int32,
                         "Error in data type of the new household set. Should be: int32, is: %s" % str(hh_set.get_attribute("age_of_head").dtype))
        self.assertEqual(hh_set.get_attribute("income").dtype, int32,
                         "Error in data type of the new household set. Should be: int32, is: %s" % str(hh_set.get_attribute("income").dtype))
        self.assertEqual(hh_set.get_attribute("persons").dtype, int8,
                         "Error in data type of the new household set. Should be: int8, is: %s" % str(hh_set.get_attribute("persons").dtype))
    def test_same_distribution_after_household_subtraction(self):
        """Using the control_totals and no marginal characteristics,
        subtract households and ensure that the distribution within each group stays the same
        """
        annual_household_control_totals_data = {
            "year": array([2000, 2000]),
            "total_number_of_households": array([8000, 12000]),
             "faz_id": array([1,2])
            }

        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name = 'hh_set', table_data = self.households_data)
        hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set')

        storage.write_table(table_name = 'hct_set', table_data = annual_household_control_totals_data)
        hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what="household")

        storage.write_table(table_name = 'hc_set', table_data = self.household_characteristics_for_ht_data)
        hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set')
#        storage.write_table(table_name='prs_set', table_data=self.person_data)
#        prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set')
        model = SubareaHouseholdTransitionModel(subarea_id_name="faz_id")
#        model.run(year=2000, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)
        model.run(year=2000, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)

        #check that there are indeed 8000 (area 1) and 12000 (area 2) total households after running the model
        areas = hh_set.get_attribute("faz_id")
        results = array([0,0])
        for iarea in [0,1]:
            results[iarea] = where(areas == [1,2][iarea])[0].size
        should_be = [8000, 12000]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        #check that the distribution of households in each group is the same as before running the model
        results = self.get_count_all_groups(hh_set)
        should_be = array([# area 1 
                     3000.0/16500.0*8000.0, 1000.0/16500.0*8000.0, 1500.0/16500.0*8000.0, 2000.0/16500.0*8000.0,
                     1000.0/16500.0*8000.0, 2500.0/16500.0*8000.0, 1500.0/16500.0*8000.0, 4000.0/16500.0*8000.0,
                     # area 2
                     3000.0/16500.0*12000.0, 1000.0/16500.0*12000.0, 1500.0/16500.0*12000.0, 2000.0/16500.0*12000.0,
                     1000.0/16500.0*12000.0, 2500.0/16500.0*12000.0, 1500.0/16500.0*12000.0, 4000.0/16500.0*12000.0])
        self.assertEqual(ma.allclose(results, should_be, rtol=0.1),
                         True, "Error, should_be: %s,\n but result: %s" % (should_be, results))
Exemple #11
0
    def test_person_dataset(self):
        households_data = {
            "household_id":arange(4)+1,
            "building_id": array([3,6,1,2], dtype=int32),
            "persons": array([1,2,2,4], dtype=int32)
            }
        household_characteristics_for_ht_data = {
            "characteristic": array(2*['persons']),
            "min": array([1, 3]),
            "max": array([2,-1])
            }
        person_data = {
            "person_id": arange(9)+1,
            "household_id": array([1,2,2,3,3,4,4,4,4]),
            "job_id": array([30, 50, 0, 1, 23, 54, 78, 2, 6]),
                           }
        annual_household_control_totals_data = {
            "year": array(2*[2000]),
            "persons": array([0,1]),
            "total_number_of_households": array([0, 4])
            }
        
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='hh_set', table_data=households_data)
        hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set')

        storage.write_table(table_name='prs_set', table_data=person_data)
        prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set')
        
        storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data)
        hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what="household", id_name=["year", "persons"])

        storage.write_table(table_name='hc_set', table_data=household_characteristics_for_ht_data)
        hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set')

        model = HouseholdTransitionModel(debuglevel=3)
        model.run(year=2000, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)
        # The run should remove the first three households and first 5 persons and add 3 copies of the last household, i.e. 12 persons
        self.assertEqual(prs_set.size(), 16, "Error in size of the person_set. Should be 16, is %s." % prs_set.size())
        self.assertEqual(ma.allequal(prs_set.get_attribute('household_id'), array([4,4,4,4,5,5,5,5,6,6,6,6,7,7,7,7])), True,
                                    "Error in assigning household_id to new persons.")
Exemple #12
0
    def run(self):
        """Runs the test model. 
        """

        dataset_pool = SessionConfiguration().get_dataset_pool()

        zone_set = dataset_pool.get_dataset('zone')

        zone_pop = zone_set.compute_variables('_zone_pop = zone.aggregate(household.persons,intermediates=[building,parcel])')

        county_pop = zone_set.compute_variables('_county_pop = zone.aggregate(parcel.disaggregate(county.aggregate(household.persons,intermediates=[building,parcel])),function=median)')
        
        #zone_set.add_primary_attribute(name='county_pop', data=county_pop)

        regional_pop = zone_set.compute_variables('_regional_pop = zone.disaggregate(alldata.aggregate_all(household.persons))')
        
        #zone_set.add_primary_attribute(name='regional_pop', data=regional_pop)

        local_gov_jobs = zone_set.compute_variables('_local_gov_jobs = zone._zone_pop * zone.disaggregate(zone_gov_ed_job.local_gov)')

        local_ed_k12_jobs = zone_set.compute_variables('_ed_k12 = zone._zone_pop * zone.disaggregate(zone_gov_ed_job.ed_k12)')

        county_gov_jobs = zone_set.compute_variables('_county_gov_jobs = zone._county_pop * zone.disaggregate(zone_gov_ed_job.county_gov)')
        
        #county_gov_job_coeff = zone_set.compute_variables('_county_gov_job_coeff = zone.disaggregate(zone_gov_ed_job.county_gov)')
        
        #zone_set.add_primary_attribute(name='county_gov_jobs', data=county_gov_jobs)
        
        #zone_set.add_primary_attribute(name='county_gov_job_coeff', data=county_gov_job_coeff)

        state_gov_jobs = zone_set.compute_variables('_state_gov_jobs = zone._regional_pop * zone.disaggregate(zone_gov_ed_job.state_gov)')

        fed_gov_jobs = zone_set.compute_variables('_fed_gov_jobs = zone._regional_pop * zone.disaggregate(zone_gov_ed_job.fed_gov)')

        ed_high_jobs = zone_set.compute_variables('_ed_high_jobs = zone._regional_pop * zone.disaggregate(zone_gov_ed_job.ed_high)')
        
        gov_jobs =  zone_set.compute_variables('_gov_jobs = _local_gov_jobs + _county_gov_jobs + _state_gov_jobs + _fed_gov_jobs')
        
        edu_jobs =  zone_set.compute_variables('_ed_jobs = _ed_k12 + _ed_high_jobs')
        
        current_year = SimulationState().get_current_time()
        base_year = '2010'
        base_cache_storage = AttributeCache().get_flt_storage_for_year(base_year)
        control_totals = ControlTotalDataset(in_storage=base_cache_storage, in_table_name="annual_business_control_totals")
        number_of_jobs = control_totals.get_attribute("total_number_of_jobs")
        
        idx_current_edother = where(logical_and(control_totals.get_attribute("year")==current_year,control_totals.get_attribute("sector_id")==618320))[0]
        jobs_current_edother = number_of_jobs[idx_current_edother].sum()
        
        idx_current_edhigh = where(logical_and(control_totals.get_attribute("year")==current_year,control_totals.get_attribute("sector_id")==618330))[0]
        jobs_current_edhigh = number_of_jobs[idx_current_edhigh].sum()
        
        idx_current_edk12 = where(logical_and(control_totals.get_attribute("year")==current_year,control_totals.get_attribute("sector_id")==618340))[0]
        jobs_current_edk12 = number_of_jobs[idx_current_edk12].sum()
        
        idx_current_gov = where(logical_and(control_totals.get_attribute("year")==current_year,control_totals.get_attribute("sector_id")==618319))[0]
        total_gov_jobs = number_of_jobs[idx_current_gov].sum()
        
        total_edu_jobs = jobs_current_edother + jobs_current_edhigh + jobs_current_edk12
        
        gov_scaling_ratio=total_gov_jobs*1.0/gov_jobs.sum()
        
        edu_scaling_ratio=total_edu_jobs*1.0/edu_jobs.sum()
        
        gov_jobs = around(gov_jobs*gov_scaling_ratio)
        
        edu_jobs = around(edu_jobs*edu_scaling_ratio)
        
        zone_set.add_primary_attribute(name='gov_jobs', data=gov_jobs)

        zone_set.add_primary_attribute(name='edu_jobs', data=edu_jobs)
    def run(self,
            in_storage,
            out_storage,
            business_table="business",
            jobs_table="jobs",
            control_totals_table=None):
        logger.log_status("Unrolling %s table." % business_table)
        # get attributes from the establisments table
        business_dataset = BusinessDataset(in_storage=in_storage,
                                           in_table_name=business_table)
        business_sizes = business_dataset.get_attribute(
            self.number_of_jobs_attr).astype("int32")
        sectors = business_dataset.get_attribute("sector_id")
        tazes = business_dataset.get_attribute(
            self.geography_id_attr).astype("int32")
        building_ids = array([], dtype='int32')
        if "building_id" in business_dataset.get_primary_attribute_names():
            building_ids = business_dataset.get_attribute("building_id")
        parcel_ids = array([], dtype='int32')
        if "parcel_id" in business_dataset.get_primary_attribute_names():
            parcel_ids = business_dataset.get_attribute("parcel_id")
        home_based = array([], dtype='int16')
        if "home_based" in business_dataset.get_primary_attribute_names():
            home_based = business_dataset.get_attribute("home_based")
        building_sqft = business_dataset.get_attribute(self.sqft_attr)
        building_sqft[building_sqft <= 0] = 0
        join_flags = None
        if "join_flag" in business_dataset.get_primary_attribute_names():
            join_flags = business_dataset.get_attribute("join_flag")
        impute_sqft_flag = None
        if "impute_building_sqft_flag" in business_dataset.get_primary_attribute_names(
        ):
            impute_sqft_flag = business_dataset.get_attribute(
                "impute_building_sqft_flag")

        # inititalize jobs attributes
        total_size = business_sizes.sum()
        jobs_data = {}
        jobs_data["sector_id"] = resize(array([-1], dtype=sectors.dtype),
                                        total_size)
        jobs_data["building_id"] = resize(
            array([-1], dtype=building_ids.dtype), total_size)
        jobs_data["parcel_id"] = resize(array([-1], dtype=parcel_ids.dtype),
                                        total_size)
        jobs_data[self.geography_id_attr] = resize(
            array([-1], dtype=tazes.dtype), total_size)
        jobs_data["building_type"] = resize(
            array([-1], dtype=home_based.dtype), total_size)
        jobs_data["sqft"] = resize(array([], dtype=building_sqft.dtype),
                                   total_size)
        if join_flags is not None:
            jobs_data["join_flag"] = resize(array([], dtype=join_flags.dtype),
                                            total_size)
        if impute_sqft_flag is not None:
            jobs_data["impute_building_sqft_flag"] = resize(
                array([], dtype=impute_sqft_flag.dtype), total_size)

        indices = cumsum(business_sizes)
        # iterate over establishments. For each business create the corresponding number of jobs by filling the corresponding part
        # of the arrays
        start_index = 0
        for i in range(business_dataset.size()):
            end_index = indices[i]
            jobs_data["sector_id"][start_index:end_index] = sectors[i]
            if building_ids.size > 0:
                jobs_data["building_id"][start_index:end_index] = building_ids[
                    i]
            if parcel_ids.size > 0:
                jobs_data["parcel_id"][start_index:end_index] = parcel_ids[i]
            jobs_data[self.geography_id_attr][start_index:end_index] = tazes[i]
            if home_based.size > 0:
                jobs_data["building_type"][start_index:end_index] = home_based[
                    i]
            if self.compute_sqft_per_job:
                jobs_data["sqft"][start_index:end_index] = round(
                    (building_sqft[i] - building_sqft[i] / 10.0) /
                    float(business_sizes[i]))  # sqft per employee
            else:
                jobs_data["sqft"][start_index:end_index] = building_sqft[i]
            if join_flags is not None:
                jobs_data["join_flag"][start_index:end_index] = join_flags[i]
            if impute_sqft_flag is not None:
                jobs_data["impute_building_sqft_flag"][
                    start_index:end_index] = impute_sqft_flag[i]
            start_index = end_index

        jobs_data["job_id"] = arange(total_size) + 1
        if self.compute_sqft_per_job:
            jobs_data["sqft"] = clip(jobs_data["sqft"], 0, self.maximum_sqft)
            jobs_data["sqft"][logical_and(
                jobs_data["sqft"] > 0,
                jobs_data["sqft"] < self.minimum_sqft)] = self.minimum_sqft

        # correct missing job_building_types
        wmissing_bt = where(jobs_data["building_type"] <= 0)[0]
        if wmissing_bt.size > 0:
            jobs_data["building_type"][
                wmissing_bt] = 2  # assign non-homebased type for now. It can be re-classified in the assign_bldgs_to_jobs... script

        # create jobs table and write it out
        storage = StorageFactory().get_storage('dict_storage')
        storage.write_table(table_name="jobs", table_data=jobs_data)
        job_dataset = JobDataset(in_storage=storage)
        if self.unplace_jobs_with_non_existing_buildings:
            self.do_unplace_jobs_with_non_existing_buildings(
                job_dataset, out_storage)

        # Match to control totals (only eliminate jobs if control totals are smaller than the actual number of jobs).
        if control_totals_table is not None:
            logger.log_status("Matching to control totals.")
            control_totals = ControlTotalDataset(
                what='employment',
                id_name=['zone_id', 'sector_id'],
                in_table_name=control_totals_table,
                in_storage=in_storage)
            control_totals.load_dataset(
                attributes=['zone_id', 'sector_id', 'jobs'])
            zones_sectors = control_totals.get_id_attribute()
            njobs = control_totals.get_attribute('jobs')
            remove = array([], dtype='int32')
            for i in range(zones_sectors.shape[0]):
                zone, sector = zones_sectors[i, :]
                in_sector = job_dataset.get_attribute("sector_id") == sector
                in_zone_in_sector = logical_and(
                    in_sector,
                    job_dataset.get_attribute("zone_id") == zone)
                if in_zone_in_sector.sum() <= njobs[i]:
                    continue
                to_be_removed = in_zone_in_sector.sum() - njobs[i]
                this_removal = 0
                not_considered = ones(job_dataset.size(), dtype='bool8')
                for unit in [
                        'parcel_id', 'building_id', None
                ]:  # first consider jobs without parcel id, then without building_id, then all
                    if unit is not None:
                        wnunit = job_dataset.get_attribute(unit) <= 0
                        eligible = logical_and(
                            not_considered,
                            logical_and(in_zone_in_sector, wnunit))
                        not_considered[where(wnunit)] = False
                    else:
                        eligible = logical_and(not_considered,
                                               in_zone_in_sector)
                    eligible_sum = eligible.sum()
                    if eligible_sum > 0:
                        where_eligible = where(eligible)[0]
                        if eligible_sum <= to_be_removed - this_removal:
                            draw = arange(eligible_sum)
                        else:
                            draw = sample_noreplace(
                                where_eligible, to_be_removed - this_removal,
                                eligible_sum)
                        remove = concatenate((remove, where_eligible[draw]))
                        this_removal += draw.size
                        if this_removal >= to_be_removed:
                            break

            job_dataset.remove_elements(remove)
            logger.log_status("%s jobs removed." % remove.size)

        logger.log_status("Write jobs table.")
        job_dataset.write_dataset(out_table_name=jobs_table,
                                  out_storage=out_storage)
        logger.log_status("Created %s jobs." % job_dataset.size())
def run_HTM(niter):
    nhhs = 5000
    ngroups = 4
    nhhsg = int(nhhs / ngroups)
    nhhslg = nhhs - (ngroups - 1) * nhhsg
    should_nhhs = nhhs - 2000

    storage = StorageFactory().get_storage('dict_storage')

    hc_set_table_name = 'hc_set'
    storage.write_table(
        table_name=hc_set_table_name,
        table_data={
            'characteristic': array(4 * ['income'] + 4 * ['age_of_head']),
            'min': array([0, 1001, 5001, 10001, 0, 31, 41, 61]),
            'max': array([1000, 5000, 10000, -1, 30, 40, 60, -1])
        },
    )

    hct_set_table_name = 'hct_set'
    storage.write_table(
        table_name=hct_set_table_name,
        table_data={
            'year': array([2000]),
            'total_number_of_households': array([should_nhhs])
        },
    )

    households_table_name = 'households'
    storage.write_table(
        table_name=households_table_name,
        table_data={
            'age_of_head':
            array(nhhsg / 2 * [18] + (nhhsg - nhhsg / 2) * [35] +
                  nhhsg / 2 * [30] + (nhhsg - nhhsg / 2) * [40] +
                  nhhsg / 2 * [38] + (nhhsg - nhhsg / 2) * [65] +
                  nhhslg / 2 * [50] + (nhhslg - nhhslg / 2) * [80]),
            'income':
            array(nhhsg * [500] + nhhsg * [2000] + nhhsg * [7000] +
                  nhhslg * [15000]),
            'household_id':
            arange(nhhs) + 1
        },
    )

    hc_set = HouseholdCharacteristicDataset(in_storage=storage,
                                            in_table_name=hc_set_table_name)
    hct_set = ControlTotalDataset(in_storage=storage,
                                  in_table_name=hct_set_table_name,
                                  what='household',
                                  id_name=['year'])

    logger.be_quiet()
    result = zeros((niter, 4))
    for iter in range(niter):
        households = HouseholdDataset(in_storage=storage,
                                      in_table_name=households_table_name)

        model = HouseholdTransitionModel()
        model.run(year=2000,
                  household_set=households,
                  control_totals=hct_set,
                  characteristics=hc_set)
        income = households.get_attribute('income')
        age = households.get_attribute('age_of_head')
        idx1 = where(income <= 1000)[0]
        idx2 = where(logical_and(income <= 5000, income > 1000))[0]
        idx3 = where(logical_and(income <= 10000, income > 5000))[0]
        idx4 = where(income > 10000)[0]
        result[iter, :] = array([
            age[idx1].mean(), age[idx2].mean(), age[idx3].mean(),
            age[idx4].mean()
        ])

    return result
Exemple #15
0
    def test_same_distribution_after_job_subtraction(self):
        """Removes 1,750 sector_1 jobs, without specifying the distribution across gridcells (so it is assumed equal)
        Test that the distribution (in %) of sector 1 jobs across gridcells before and after the subtraction are
        relatively equal.
        """
        storage = StorageFactory().get_storage('dict_storage')

        jobs_set_table_name = 'jobs_set'
        storage.write_table(table_name=jobs_set_table_name,
                            table_data=self.jobs_data)
        jobs_set = JobDataset(in_storage=storage,
                              in_table_name=jobs_set_table_name)

        ect_set_table_name = 'ect_set'
        storage.write_table(
            table_name=ect_set_table_name,
            table_data=self.annual_employment_control_totals_data)
        ect_set = ControlTotalDataset(in_storage=storage,
                                      in_table_name=ect_set_table_name,
                                      what="employment")

        model = RegionalEmploymentTransitionModel()
        model.run(year=2000,
                  job_set=jobs_set,
                  control_totals=ect_set,
                  job_building_types=self.job_building_types)

        # check the totals in regions
        areas = jobs_set.get_attribute("large_area_id")
        results = array([0, 0])
        for iarea in [0, 1]:
            results[iarea] = where(areas == [1, 2][iarea])[0].size
        should_be = [4250, 7000]
        self.assertEqual(
            ma.allequal(should_be, results), True,
            "Error, should_be: %s, but result: %s" % (should_be, results))

        def run_model():
            storage = StorageFactory().get_storage('dict_storage')

            jobs_set_table_name = 'jobs_set'
            storage.write_table(
                table_name=jobs_set_table_name,
                table_data=self.jobs_data,
            )

            jobs_set = JobDataset(in_storage=storage,
                                  in_table_name=jobs_set_table_name)

            model = RegionalEmploymentTransitionModel()
            model.run(year=2000,
                      job_set=jobs_set,
                      control_totals=ect_set,
                      job_building_types=self.job_building_types)
            # check that the distribution of jobs is the same before and after subtracting jobs
            results = self.get_count_all_sectors_and_areas(jobs_set)
            return results

        expected_results = array([2250.0, 1000, 1000, 3000, 2000.0, 2000])

        self.run_stochastic_test(__file__, run_model, expected_results, 10)

        def run_model2():
            storage = StorageFactory().get_storage('dict_storage')

            jobs_set_table_name = 'jobs_set'
            storage.write_table(
                table_name=jobs_set_table_name,
                table_data=self.jobs_data,
            )

            jobs_set = JobDataset(in_storage=storage,
                                  in_table_name=jobs_set_table_name)

            model = RegionalEmploymentTransitionModel()
            model.run(year=2000,
                      job_set=jobs_set,
                      control_totals=ect_set,
                      job_building_types=self.job_building_types)
            # check that the distribution of building type is the same before and after subtracting jobs
            jobs_set.compute_variables([
                "urbansim.job.is_in_employment_sector_1_industrial",
                "urbansim.job.is_in_employment_sector_2_industrial",
                "urbansim.job.is_in_employment_sector_1_commercial",
                "urbansim.job.is_in_employment_sector_2_commercial",
                "urbansim.job.is_in_employment_sector_1_governmental",
                "urbansim.job.is_in_employment_sector_2_governmental"
            ],
                                       resources=Resources({
                                           "job_building_type":
                                           self.job_building_types
                                       }))
            result = array([
                jobs_set.get_attribute(
                    "is_in_employment_sector_1_industrial").sum(),
                jobs_set.get_attribute(
                    "is_in_employment_sector_2_industrial").sum(),
                jobs_set.get_attribute(
                    "is_in_employment_sector_1_commercial").sum(),
                jobs_set.get_attribute(
                    "is_in_employment_sector_2_commercial").sum(),
                jobs_set.get_attribute(
                    "is_in_employment_sector_1_governmental").sum(),
                jobs_set.get_attribute(
                    "is_in_employment_sector_2_governmental").sum()
            ])
            return result

        expected_results = array([
            3500.0 / 7000.0 * 5250.0, 900, 3500.0 / 7000.0 * 5250.0, 1800, 0,
            300
        ])
        self.run_stochastic_test(__file__, run_model2, expected_results, 20)
    def test_controlling_income(self):
        """ Controls for one marginal characteristics, namely income.
        """
        annual_household_control_totals_data = {
            "year": array([2000, 2000, 2000, 2000, 2001, 2001, 2001, 2001, 2002, 2002, 2002, 2002]),
            #"income": array([0,1,2,3,0,1,2,3, 0,1,2,3]),
            "income_min": array([    0,40000, 70000,120000,     0,40000, 70000,120000,     0,40000, 70000,120000]),
            "income_max": array([39999,69999,119999,    -1, 39999,69999,119999,    -1, 39999,69999,119999,    -1]),
            "total_number_of_households": array([25013, 21513, 18227, 18493, # 2000   
                                                 10055, 15003, 17999, 17654, # 2001
                                                 15678, 14001, 20432, 14500]) # 2002
            }

        #household_characteristics_for_ht_data = {
            #"characteristic": array(4*['income']),
            #"min": array([0, 40000, 120000, 70000]), # category 120000 has index 3 and category 70000 has index 2 
            #"max": array([39999, 69999, -1, 119999]) # (testing row invariance)
            #}
        #hc_sorted_index = array([0,1,3,2])
        households_data = {
            "household_id":arange(20000)+1,
            "grid_id": array(19950*[1] + 50*[0]),
            "income": array(1000*[1000] + 1000*[10000] + 2000*[20000] + 1000*[35000] + 2000*[45000] +
                                1000*[50000] + 2000*[67000]+ 2000*[90000] + 1000*[100005] + 2000*[110003] +
                                1000*[120000] + 1000*[200000] + 2000*[500000] + 1000*[630000])
            }
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='hh_set', table_data=households_data)
        hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set')

        storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data)
        hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what='household', id_name=[])

        #storage.write_table(table_name='hc_set', table_data=household_characteristics_for_ht_data)
        #hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set')

        model = TransitionModel(hh_set, control_total_dataset=hct_set)
        model.run(year=2000, target_attribute_name="total_number_of_households", reset_dataset_attribute_value={'grid_id':-1})

        results = hh_set.size()
        should_be = [83246]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))
        cats = 4
        results = zeros(cats, dtype=int32)
        results[0] = (hh_set.get_attribute('income') <= hct_set.get_attribute("income_max")[0]).sum()
        for i in range(1, cats-1):
            results[i] = logical_and(hh_set.get_attribute('income') >= hct_set.get_attribute("income_min")[i],
                                     hh_set.get_attribute('income') <= hct_set.get_attribute("income_max")[i]).sum()
        results[-1] = (hh_set.get_attribute('income') >= hct_set.get_attribute("income_min")[i+1]).sum()
        should_be = hct_set.get_attribute("total_number_of_households")[0:4]
        self.assertEqual(ma.allclose(results, should_be, rtol=1e-6),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        # this run should remove households in all four categories
        #model.run(year=2001, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)
        model.run(year=2001, target_attribute_name="total_number_of_households", reset_dataset_attribute_value={'grid_id':-1})
        results = hh_set.size()
        should_be = [(hct_set.get_attribute("total_number_of_households")[4:8]).sum()]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        results = zeros(cats, dtype=int32)
        results[0] = (hh_set.get_attribute('income') <= hct_set.get_attribute("income_max")[4]).sum()
        for i in range(1, cats-1):
            results[i] = logical_and(hh_set.get_attribute('income') >= hct_set.get_attribute("income_min")[i+4],
                                     hh_set.get_attribute('income') <= hct_set.get_attribute("income_max")[i+4]).sum()
        results[-1] = (hh_set.get_attribute('income') >= hct_set.get_attribute("income_min")[i+5]).sum()
        should_be = hct_set.get_attribute("total_number_of_households")[4:8]
        self.assertEqual(ma.allclose(results, should_be, rtol=1e-6),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        # this run should add and remove households
        #model.run(year=2002, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)
        model.run(year=2002, target_attribute_name="total_number_of_households", reset_dataset_attribute_value={'grid_id':-1})
        results = hh_set.size()
        should_be = [(hct_set.get_attribute("total_number_of_households")[8:12]).sum()]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        results = zeros(cats, dtype=int32)
        results[0] = (hh_set.get_attribute('income') <= hct_set.get_attribute("income_max")[8]).sum()
        for i in range(1, cats-1):
            results[i] = logical_and(hh_set.get_attribute('income') >= hct_set.get_attribute("income_min")[i+8],
                                     hh_set.get_attribute('income') <= hct_set.get_attribute("income_max")[i+8]).sum()
        results[-1] = (hh_set.get_attribute('income') >= hct_set.get_attribute("income_min")[i+9]).sum()
        should_be = hct_set.get_attribute("total_number_of_households")[8:12]
        self.assertEqual(ma.allclose(results, should_be, rtol=1e-6),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))
    def test_controlling_age_of_head(self):
        """ Controls for one marginal characteristics, namely age_of_head.
        """
        annual_household_control_totals_data = {
            "year": array([2000, 2000, 2000, 2001, 2001, 2001, 2002, 2002, 2002]),
            #"age_of_head": array([0,1,2,0,1,2, 0,1,2]),
            "age_of_head_min": array([ 0,35,65,  0,35,65,  0,35,65]),
            "age_of_head_max": array([34,64,-1, 34,64,-1, 34,64,-1]),
            "total_number_of_households": array([25013, 21513, 18227,  # 2000
                                                 10055, 15003, 17999, # 2001
                                                 15678, 14001, 20432]) # 2002
            }

        #household_characteristics_for_ht_data = {
            #"characteristic": array(3*['age_of_head']),
            #"min": array([0, 35, 65]),
            #"max": array([34, 64, -1])
            #}

        households_data = {
            "household_id":arange(15000)+1,
            "grid_id": array(15000*[1]),
            "age_of_head": array(1000*[25] + 1000*[28] + 2000*[32] + 1000*[34] +
                            2000*[35] + 1000*[40] + 1000*[54]+ 1000*[62] +
                            1000*[65] + 1000*[68] + 2000*[71] + 1000*[98])
            }
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='hh_set', table_data=households_data)
        hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set')

        storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data)
        hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what='household',
                                      id_name=[])

        #storage.write_table(table_name='hc_set', table_data=household_characteristics_for_ht_data)
        #hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set')
        
        model = TransitionModel(hh_set, control_total_dataset=hct_set)
        model.run(year=2000, target_attribute_name="total_number_of_households", reset_dataset_attribute_value={'grid_id':-1})

        results = hh_set.size()
        should_be = [(hct_set.get_attribute("total_number_of_households")[0:3]).sum()]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))
        cats = 3
        results = zeros(cats, dtype=int32)
        results[0] = (hh_set.get_attribute('age_of_head') <= hct_set.get_attribute("age_of_head_max")[0]).sum()
        for i in range(1, cats-1):
            results[i] = logical_and(hh_set.get_attribute('age_of_head') >= hct_set.get_attribute("age_of_head_min")[i],
                                     hh_set.get_attribute('age_of_head') <= hct_set.get_attribute("age_of_head_max")[i]).sum()
        results[-1] = (hh_set.get_attribute('age_of_head') >= hct_set.get_attribute("age_of_head_min")[i+1]).sum()
        should_be = hct_set.get_attribute("total_number_of_households")[0:3]
        self.assertEqual(ma.allclose(results, should_be, rtol=1e-6),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        # this run should remove households in all four categories
        #model.run(year=2001, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)
        model.run(year=2001, target_attribute_name="total_number_of_households", reset_dataset_attribute_value={'grid_id':-1})
        results = hh_set.size()
        should_be = [(hct_set.get_attribute("total_number_of_households")[3:6]).sum()]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        results = zeros(cats, dtype=int32)
        results[0] = (hh_set.get_attribute('age_of_head') <= hct_set.get_attribute("age_of_head_max")[0]).sum()
        for i in range(1, cats-1):
            results[i] = logical_and(hh_set.get_attribute('age_of_head') >= hct_set.get_attribute("age_of_head_min")[i+3],
                                     hh_set.get_attribute('age_of_head') <= hct_set.get_attribute("age_of_head_max")[i+3]).sum()
        results[-1] = (hh_set.get_attribute('age_of_head') >= hct_set.get_attribute("age_of_head_min")[i+4]).sum()
        should_be = hct_set.get_attribute("total_number_of_households")[3:6]
        self.assertEqual(ma.allclose(results, should_be, rtol=1e-6),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        # this run should add and remove households
        #model.run(year=2002, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)
        model.run(year=2002, target_attribute_name="total_number_of_households", reset_dataset_attribute_value={'grid_id':-1})
        results = hh_set.size()
        should_be = [(hct_set.get_attribute("total_number_of_households")[6:9]).sum()]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        results = zeros(cats, dtype=int32)
        results[0] = where(hh_set.get_attribute('age_of_head') <= hct_set.get_attribute("age_of_head_max")[0], 1,0).sum()
        for i in range(1, cats-1):
            results[i] = logical_and(hh_set.get_attribute('age_of_head') >= hct_set.get_attribute("age_of_head_min")[i+6],
                                     hh_set.get_attribute('age_of_head') <= hct_set.get_attribute("age_of_head_max")[i+6]).sum()
        results[-1] = (hh_set.get_attribute('age_of_head') >= hct_set.get_attribute("age_of_head_min")[i+7]).sum()
        should_be = hct_set.get_attribute("total_number_of_households")[6:9]
        self.assertEqual(ma.allclose(results, should_be, rtol=1e-6),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))
Exemple #18
0
    def test_controlling_income(self):
        """ Controls for one marginal characteristics, namely income.
        """
        annual_household_control_totals_data = {
            "year": array([2000, 2000, 2000, 2000, 2001, 2001, 2001, 2001, 2002, 2002, 2002, 2002]),
            "income": array([0,1,2,3,0,1,2,3, 0,1,2,3]),
            "total_number_of_households": array([25013, 21513, 18227, 18493, # 2000
                                                 10055, 15003, 17999, 17654, # 2001
                                                 15678, 14001, 20432, 14500]) # 2002
            }

        household_characteristics_for_ht_data = {
            "characteristic": array(4*['income']),
            "min": array([0, 40000, 120000, 70000]), # category 120000 has index 3 and category 70000 has index 2 
            "max": array([39999, 69999, -1, 119999]) # (testing row invariance)
            }
        hc_sorted_index = array([0,1,3,2])
        households_data = {
            "household_id":arange(20000)+1,
            "building_id": array(19950*[1] + 50*[0]),
            "income": array(1000*[1000] + 1000*[10000] + 2000*[20000] + 1000*[35000] + 2000*[45000] +
                                1000*[50000] + 2000*[67000]+ 2000*[90000] + 1000*[100005] + 2000*[110003] +
                                1000*[120000] + 1000*[200000] + 2000*[500000] + 1000*[630000]),
            "persons": array(3000*[2] + 2000*[3] + 1000*[1] + 1000*[6] + 1000*[1] + 1000*[4] +
                                3000*[1]+ 8000*[5], dtype=int8)
            }
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='hh_set', table_data=households_data)
        hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set')

        storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data)
        hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what='household', id_name=['year' ,'income'])

        storage.write_table(table_name='hc_set', table_data=household_characteristics_for_ht_data)
        hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set')

        storage.write_table(table_name='prs_set', table_data=self.person_data)
        prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set')
        
        model = HouseholdTransitionModel(debuglevel=3)
        # this run should add households in all four categories
        model.run(year=2000, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)

        results = hh_set.size()
        should_be = [83246]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        results = zeros(hc_set.size(), dtype=int32)
        results[0] = where(hh_set.get_attribute('income') <= 
                                            hc_set.get_attribute("max")[hc_sorted_index[0]], 1,0).sum()
        for i in range(1, hc_set.size()-1):
            results[i] = logical_and(where(hh_set.get_attribute('income') >= 
                                           hc_set.get_attribute("min")[hc_sorted_index[i]], 1,0),
                                     where(hh_set.get_attribute('income') <= 
                                           hc_set.get_attribute("max")[hc_sorted_index[i]], 1,0)).sum()
        results[-1] = where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[-1]], 1,0).sum()
        should_be = hct_set.get_attribute("total_number_of_households")[0:4]
        self.assertEqual(ma.allclose(results, should_be, rtol=1e-6),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        # this run should remove households in all four categories
        model.run(year=2001, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)
        results = hh_set.size()
        should_be = [(hct_set.get_attribute("total_number_of_households")[4:8]).sum()]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        results = zeros(hc_set.size(), dtype=int32)
        results[0] = where(hh_set.get_attribute('income') <= 
                                            hc_set.get_attribute("max")[hc_sorted_index[0]], 1,0).sum()
        for i in range(1, hc_set.size()-1):
            results[i] = logical_and(where(hh_set.get_attribute('income') >= 
                                           hc_set.get_attribute("min")[hc_sorted_index[i]], 1,0),
                                     where(hh_set.get_attribute('income') <= 
                                           hc_set.get_attribute("max")[hc_sorted_index[i]], 1,0)).sum()
        results[-1] = where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[-1]], 1,0).sum()
        should_be = hct_set.get_attribute("total_number_of_households")[4:8]
        self.assertEqual(ma.allclose(results, should_be, rtol=1e-6),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        # this run should add and remove households
        model.run(year=2002, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)
        results = hh_set.size()
        should_be = [(hct_set.get_attribute("total_number_of_households")[8:13]).sum()]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        results = zeros(hc_set.size(), dtype=int32)
        results[0] = where(hh_set.get_attribute('income') <= hc_set.get_attribute("max")[hc_sorted_index[0]], 1,0).sum()
        for i in range(1, hc_set.size()-1):
            results[i] = logical_and(where(hh_set.get_attribute('income') >= 
                                           hc_set.get_attribute("min")[hc_sorted_index[i]], 1,0),
                                     where(hh_set.get_attribute('income') <= 
                                           hc_set.get_attribute("max")[hc_sorted_index[i]], 1,0)).sum()
        results[-1] = where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[-1]], 1,0).sum()
        should_be = hct_set.get_attribute("total_number_of_households")[8:13]
        self.assertEqual(ma.allclose(results, should_be, rtol=1e-6),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))
    def test_same_distribution_after_job_addition(self):
        """Add 1,750 new jobs of sector 1 without specifying a distribution across gridcells (so it is assumed equal)
        Test that the total number of jobs in each sector after the addition matches the totals specified
        in annual_employment_control_totals.
        Ensure that the number of unplaced jobs after the addition is exactly 1,750 because this model
        is not responsible for placing jobs, only for creating them.
        NOTE: unplaced jobs are indicated by grid_id <= 0
        """
        storage = StorageFactory().get_storage('dict_storage')

        jobs_set_table_name = 'jobs_set'
        storage.write_table(
            table_name=jobs_set_table_name,
            table_data=self.jobs_data,
        )
        jobs_set = JobDataset(in_storage=storage,
                              in_table_name=jobs_set_table_name)

        annual_employment_control_totals_data = self.annual_employment_control_totals_data
        annual_employment_control_totals_data[
            "total_non_home_based_employment"] = array([8750, 3000])

        ect_set_table_name = 'ect_set'
        storage.write_table(
            table_name=ect_set_table_name,
            table_data=annual_employment_control_totals_data,
        )
        ect_set = ControlTotalDataset(in_storage=storage,
                                      in_table_name=ect_set_table_name,
                                      what="employment")

        # run model
        model = EmploymentTransitionModel()
        model.run(year=2000,
                  job_set=jobs_set,
                  control_totals=ect_set,
                  job_building_types=self.job_building_types)

        #check that there are indeed 14750 total jobs after running the model
        results = jobs_set.size()
        should_be = [14750]
        self.assertEqual(ma.allequal(should_be, results), True, "Error")

        #check that total #jobs within each sector are close to what was set in the control_totals
        results = self.get_count_all_sectors(jobs_set)
        should_be = [8750.0, 3000, 3000]
        self.assertEqual(ma.allclose(results, should_be, rtol=0.00001), True)

        #check that the number of unplaced jobs is the number of new jobs created (1750)
        results = where(jobs_set.get_attribute("grid_id") <= 0)[0].size
        should_be = [1750.0]
        self.assertEqual(ma.allclose(results, should_be, rtol=0.00001), True)

        # test distribution of building type
        def run_model():
            storage = StorageFactory().get_storage('dict_storage')

            jobs_set_table_name = 'jobs_set'
            storage.write_table(table_name=jobs_set_table_name,
                                table_data=self.jobs_data)
            jobs_set = JobDataset(in_storage=storage,
                                  in_table_name=jobs_set_table_name)

            model = EmploymentTransitionModel()
            model.run(year=2000,
                      job_set=jobs_set,
                      control_totals=ect_set,
                      job_building_types=self.job_building_types)
            # check that the distribution of building type is the same before and after subtracting jobs
            jobs_set.compute_variables([
                "urbansim.job.is_in_employment_sector_1_industrial",
                "urbansim.job.is_in_employment_sector_2_industrial",
                "urbansim.job.is_in_employment_sector_1_commercial",
                "urbansim.job.is_in_employment_sector_2_commercial",
                "urbansim.job.is_in_employment_sector_1_governmental",
                "urbansim.job.is_in_employment_sector_2_governmental"
            ],
                                       resources=Resources({
                                           "job_building_type":
                                           self.job_building_types
                                       }))
            result = array([
                jobs_set.get_attribute(
                    "is_in_employment_sector_1_industrial").sum(),
                jobs_set.get_attribute(
                    "is_in_employment_sector_2_industrial").sum(),
                jobs_set.get_attribute(
                    "is_in_employment_sector_1_commercial").sum(),
                jobs_set.get_attribute(
                    "is_in_employment_sector_2_commercial").sum(),
                jobs_set.get_attribute(
                    "is_in_employment_sector_1_governmental").sum(),
                jobs_set.get_attribute(
                    "is_in_employment_sector_2_governmental").sum()
            ])
            return result

        expected_results = array([
            3500.0 / 7000.0 * 8750.0, 900, 3500.0 / 7000.0 * 8750.0, 1800, 0,
            300
        ])
        #print expected_results
        self.run_stochastic_test(__file__, run_model, expected_results, 10)

        # check data types
        self.assertEqual(
            jobs_set.get_attribute("sector_id").dtype, int32,
            "Error in data type of the new job set. Should be: int32, is: %s" %
            str(jobs_set.get_attribute("sector_id").dtype))
        self.assertEqual(
            jobs_set.get_attribute("building_type").dtype, int8,
            "Error in data type of the new job set. Should be: int8, is: %s" %
            str(jobs_set.get_attribute("building_type").dtype))
    def test_same_distribution_after_job_subtraction(self):
        """Removes 1,750 sector_1 jobs, without specifying the distribution across gridcells (so it is assumed equal)
        Test that the distribution (in %) of sector 1 jobs across gridcells before and after the subtraction are
        relatively equal.
        """
        storage = StorageFactory().get_storage('dict_storage')

        jobs_set_table_name = 'jobs_set'
        storage.write_table(table_name=jobs_set_table_name,
                            table_data=self.jobs_data)
        jobs_set = JobDataset(in_storage=storage,
                              in_table_name=jobs_set_table_name)

        ect_set_table_name = 'ect_set'
        storage.write_table(
            table_name=ect_set_table_name,
            table_data=self.annual_employment_control_totals_data)
        ect_set = ControlTotalDataset(in_storage=storage,
                                      in_table_name=ect_set_table_name,
                                      what="employment")

        # unplace some jobs
        jobs_set.modify_attribute(name="grid_id",
                                  data=zeros(int(jobs_set.size() / 2)),
                                  index=arange(int(jobs_set.size() / 2)))
        #run model with input Datasets

        model = EmploymentTransitionModel()
        model.run(year=2000,
                  job_set=jobs_set,
                  control_totals=ect_set,
                  job_building_types=self.job_building_types)
        results = jobs_set.size()
        should_be = [11250]
        self.assertEqual(ma.allequal(should_be, results), True, "Error")

        def run_model():
            storage = StorageFactory().get_storage('dict_storage')

            jobs_set_table_name = 'jobs_set'
            storage.write_table(
                table_name=jobs_set_table_name,
                table_data=self.jobs_data,
            )

            jobs_set = JobDataset(in_storage=storage,
                                  in_table_name=jobs_set_table_name)

            model = EmploymentTransitionModel()
            model.run(year=2000,
                      job_set=jobs_set,
                      control_totals=ect_set,
                      job_building_types=self.job_building_types)
            # check that the distribution of jobs is the same before and after subtracting jobs
            results = self.get_count_all_sectors_and_gridcells(jobs_set)
            return results

        expected_results = array([
            4000.0 / 7000.0 * 5250.0, 1000, 1000, 2000.0 / 7000.0 * 5250.0,
            1000, 1000, 1000.0 / 7000.0 * 5250.0, 1000, 1000
        ])

        self.run_stochastic_test(__file__, run_model, expected_results, 10)

        def run_model2():
            storage = StorageFactory().get_storage('dict_storage')

            jobs_set_table_name = 'jobs_set'
            storage.write_table(
                table_name=jobs_set_table_name,
                table_data=self.jobs_data,
            )

            jobs_set = JobDataset(in_storage=storage,
                                  in_table_name=jobs_set_table_name)

            model = EmploymentTransitionModel()
            model.run(year=2000,
                      job_set=jobs_set,
                      control_totals=ect_set,
                      job_building_types=self.job_building_types)
            # check that the distribution of building type is the same before and after subtracting jobs
            jobs_set.compute_variables([
                "urbansim.job.is_in_employment_sector_1_industrial",
                "urbansim.job.is_in_employment_sector_2_industrial",
                "urbansim.job.is_in_employment_sector_1_commercial",
                "urbansim.job.is_in_employment_sector_2_commercial",
                "urbansim.job.is_in_employment_sector_1_governmental",
                "urbansim.job.is_in_employment_sector_2_governmental"
            ],
                                       resources=Resources({
                                           "job_building_type":
                                           self.job_building_types
                                       }))
            result = array([
                jobs_set.get_attribute(
                    "is_in_employment_sector_1_industrial").sum(),
                jobs_set.get_attribute(
                    "is_in_employment_sector_2_industrial").sum(),
                jobs_set.get_attribute(
                    "is_in_employment_sector_1_commercial").sum(),
                jobs_set.get_attribute(
                    "is_in_employment_sector_2_commercial").sum(),
                jobs_set.get_attribute(
                    "is_in_employment_sector_1_governmental").sum(),
                jobs_set.get_attribute(
                    "is_in_employment_sector_2_governmental").sum()
            ])
            return result

        expected_results = array([
            3500.0 / 7000.0 * 5250.0, 900, 3500.0 / 7000.0 * 5250.0, 1800, 0,
            300
        ])
        self.run_stochastic_test(__file__, run_model2, expected_results, 20)
 def prepare_for_run(self, storage, **kwargs):
     from urbansim.datasets.control_total_dataset import ControlTotalDataset
     control_totals = ControlTotalDataset(in_storage=storage,
                                          what="employment")
     sample_control_totals(storage, control_totals, **kwargs)
     return control_totals
    def test_controlling_sector(self):
        """ Controls for one marginal characteristics, namely age_of_head.
        """
        annual_employment_control_totals_data = {
            "year": array([2000, 2000, 2000, 2001, 2001, 2001, 2002, 2002, 2002]),
            "sector_id": array([ 1,2,3, 1,2,3,  1,2,3]),
            "number_of_jobs": array([25013, 21513, 18227,  # 2000
                                                 10055, 15003, 17999, # 2001
                                                 15678, 14001, 20432]) # 2002
            }


        jobs_data = {
            "job_id":arange(15000)+1,
            "grid_id": array(15000*[1]),
            "sector_id": array(1000*[1] + 1000*[1] + 2000*[1] + 1000*[1] +
                            2000*[2] + 1000*[2] + 1000*[2]+ 1000*[2] +
                            1000*[3] + 1000*[3] + 2000*[3] + 1000*[3])
            }
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='job_set', table_data=jobs_data)
        job_set = JobDataset(in_storage=storage, in_table_name='job_set')

        storage.write_table(table_name='ect_set', table_data=annual_employment_control_totals_data)
        ect_set = ControlTotalDataset(in_storage=storage, in_table_name='ect_set', what='',
                                      id_name=[])

        
        model = TransitionModel(job_set, control_total_dataset=ect_set)
        model.run(year=2000, target_attribute_name="number_of_jobs", reset_dataset_attribute_value={'grid_id':-1})

        results = job_set.size()
        should_be = [(ect_set.get_attribute("number_of_jobs")[0:3]).sum()]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))
        cats = 3
        results = zeros(cats, dtype=int32)
        for i in range(0, cats):
            results[i] = (job_set.get_attribute('sector_id') == ect_set.get_attribute("sector_id")[i]).sum()
        should_be = ect_set.get_attribute("number_of_jobs")[0:3]
        self.assertEqual(ma.allclose(results, should_be, rtol=1e-6),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        # this run should remove households in all four categories
        #model.run(year=2001, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)
        model.run(year=2001, target_attribute_name="number_of_jobs", reset_dataset_attribute_value={'grid_id':-1})
        results = job_set.size()
        should_be = [(ect_set.get_attribute("number_of_jobs")[3:6]).sum()]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))
        cats = 3
        results = zeros(cats, dtype=int32)
        for i in range(0, cats):
            results[i] = (job_set.get_attribute('sector_id') == ect_set.get_attribute("sector_id")[i+3]).sum()
        should_be = ect_set.get_attribute("number_of_jobs")[3:6]
        self.assertEqual(ma.allclose(results, should_be, rtol=1e-6),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        # this run should add and remove households
        #model.run(year=2002, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)
        model.run(year=2002, target_attribute_name="number_of_jobs", reset_dataset_attribute_value={'grid_id':-1})
        results = job_set.size()
        should_be = [(ect_set.get_attribute("number_of_jobs")[6:9]).sum()]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))
        cats = 3
        results = zeros(cats, dtype=int32)
        for i in range(0, cats):
            results[i] = (job_set.get_attribute('sector_id') == ect_set.get_attribute("sector_id")[i+6]).sum()
        should_be = ect_set.get_attribute("number_of_jobs")[6:9]
        self.assertEqual(ma.allclose(results, should_be, rtol=1e-6),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))
Exemple #23
0
    def test_controlling_age_of_head(self):
        """ Controls for one marginal characteristics, namely age_of_head.
        """
        annual_household_control_totals_data = {
            "year": array([2000, 2000, 2000, 2001, 2001, 2001, 2002, 2002, 2002]),
            "age_of_head": array([0,1,2,0,1,2, 0,1,2]),
            "total_number_of_households": array([25013, 21513, 18227,  # 2000
                                                 10055, 15003, 17999, # 2001
                                                 15678, 14001, 20432]) # 2002
            }

        household_characteristics_for_ht_data = {
            "characteristic": array(3*['age_of_head']),
            "min": array([0, 35, 65]),
            "max": array([34, 64, -1])
            }

        households_data = {
            "household_id":arange(15000)+1,
            "building_id": array(15000*[1]),
            "age_of_head": array(1000*[25] + 1000*[28] + 2000*[32] + 1000*[34] +
                            2000*[35] + 1000*[40] + 1000*[54]+ 1000*[62] +
                            1000*[65] + 1000*[68] + 2000*[71] + 1000*[98]),
            "persons": array(1000*[2] + 2000*[3] + 1000*[1] + 1000*[6] + 1000*[1] + 1000*[4] +
                                3000*[1]+ 5000*[5], dtype=int8)
            }
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='hh_set', table_data=households_data)
        hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set')

        storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data)
        hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what='household',
                                      id_name=['year' ,'age_of_head'])

        storage.write_table(table_name='hc_set', table_data=household_characteristics_for_ht_data)
        hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set')

        storage.write_table(table_name='prs_set', table_data=self.person_data)
        prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set')
        
        model = HouseholdTransitionModel(debuglevel=3)
        # this run should add households in all four categories
        model.run(year=2000, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)

        results = hh_set.size()
        should_be = [(hct_set.get_attribute("total_number_of_households")[0:3]).sum()]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        results = zeros(hc_set.size(), dtype=int32)
        results[0] = where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[0], 1,0).sum()
        for i in range(1, hc_set.size()-1):
            results[i] = logical_and(where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[i], 1,0),
                                 where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[i], 1,0)).sum()
        results[hc_set.size()-1] = where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[hc_set.size()-1], 1,0).sum()
        should_be = hct_set.get_attribute("total_number_of_households")[0:3]
        self.assertEqual(ma.allclose(results, should_be, rtol=1e-6),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        # this run should remove households in all four categories
        model.run(year=2001, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)
        results = hh_set.size()
        should_be = [(hct_set.get_attribute("total_number_of_households")[3:6]).sum()]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        results = zeros(hc_set.size(), dtype=int32)
        results[0] = where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[0], 1,0).sum()
        for i in range(1, hc_set.size()-1):
            results[i] = logical_and(where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[i], 1,0),
                                 where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[i], 1,0)).sum()
        results[hc_set.size()-1] = where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[hc_set.size()-1], 1,0).sum()
        should_be = hct_set.get_attribute("total_number_of_households")[3:6]
        self.assertEqual(ma.allclose(results, should_be, rtol=1e-6),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        # this run should add and remove households
        model.run(year=2002, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set)
        results = hh_set.size()
        should_be = [(hct_set.get_attribute("total_number_of_households")[6:9]).sum()]
        self.assertEqual(ma.allclose(should_be, results, rtol=1e-1),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))

        results = zeros(hc_set.size(), dtype=int32)
        results[0] = where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[0], 1,0).sum()
        for i in range(1, hc_set.size()-1):
            results[i] = logical_and(where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[i], 1,0),
                                 where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[i], 1,0)).sum()
        results[hc_set.size()-1] = where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[hc_set.size()-1], 1,0).sum()
        should_be = hct_set.get_attribute("total_number_of_households")[6:9]
        self.assertEqual(ma.allclose(results, should_be, rtol=1e-6),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))
 def run(self, in_storage, out_storage, business_table="business", jobs_table="jobs", control_totals_table=None):
     logger.log_status("Unrolling %s table." % business_table)
     # get attributes from the establisments table
     business_dataset = BusinessDataset(in_storage=in_storage, in_table_name=business_table)
     business_sizes = business_dataset.get_attribute(self.number_of_jobs_attr).astype("int32")
     sectors = business_dataset.get_attribute("sector_id")
     tazes = business_dataset.get_attribute(self.geography_id_attr).astype("int32")
     building_ids = array([], dtype='int32')
     if "building_id" in business_dataset.get_primary_attribute_names():
         building_ids = business_dataset.get_attribute("building_id")
     parcel_ids = array([], dtype='int32')
     if "parcel_id" in business_dataset.get_primary_attribute_names():
         parcel_ids = business_dataset.get_attribute("parcel_id")
     home_based = array([], dtype='int16')
     if "home_based" in business_dataset.get_primary_attribute_names():
         home_based = business_dataset.get_attribute("home_based")
     building_sqft = business_dataset.get_attribute(self.sqft_attr)
     building_sqft[building_sqft <= 0] = 0
     join_flags = None
     if "join_flag" in business_dataset.get_primary_attribute_names():
         join_flags = business_dataset.get_attribute("join_flag")
     impute_sqft_flag = None
     if "impute_building_sqft_flag" in business_dataset.get_primary_attribute_names():
         impute_sqft_flag = business_dataset.get_attribute("impute_building_sqft_flag")
     
     # inititalize jobs attributes
     total_size = business_sizes.sum()
     jobs_data = {}
     jobs_data["sector_id"] = resize(array([-1], dtype=sectors.dtype), total_size)
     jobs_data["building_id"] = resize(array([-1], dtype=building_ids.dtype), total_size)
     jobs_data["parcel_id"] = resize(array([-1], dtype=parcel_ids.dtype), total_size)
     jobs_data[self.geography_id_attr] = resize(array([-1], dtype=tazes.dtype), total_size)
     jobs_data["building_type"] = resize(array([-1], dtype=home_based.dtype), total_size)
     jobs_data["sqft"] = resize(array([], dtype=building_sqft.dtype), total_size)
     if join_flags is not None:
         jobs_data["join_flag"] = resize(array([], dtype=join_flags.dtype), total_size)
     if impute_sqft_flag is not None:
         jobs_data["impute_building_sqft_flag"] = resize(array([], dtype=impute_sqft_flag.dtype), total_size)
     
     indices = cumsum(business_sizes)
     # iterate over establishments. For each business create the corresponding number of jobs by filling the corresponding part 
     # of the arrays
     start_index=0
     for i in range(business_dataset.size()):
         end_index = indices[i]
         jobs_data["sector_id"][start_index:end_index] = sectors[i]
         if building_ids.size > 0:
             jobs_data["building_id"][start_index:end_index] = building_ids[i]
         if parcel_ids.size > 0:
             jobs_data["parcel_id"][start_index:end_index] = parcel_ids[i]
         jobs_data[self.geography_id_attr][start_index:end_index] = tazes[i]
         if home_based.size > 0:
             jobs_data["building_type"][start_index:end_index] = home_based[i]
         if self.compute_sqft_per_job:
             jobs_data["sqft"][start_index:end_index] = round((building_sqft[i]-building_sqft[i]/10.0)/float(business_sizes[i])) # sqft per employee
         else:
             jobs_data["sqft"][start_index:end_index] = building_sqft[i]
         if join_flags is not None:
             jobs_data["join_flag"][start_index:end_index] = join_flags[i]
         if impute_sqft_flag is not None:
             jobs_data["impute_building_sqft_flag"][start_index:end_index]  = impute_sqft_flag[i]
         start_index = end_index
         
     jobs_data["job_id"] = arange(total_size)+1
     if self.compute_sqft_per_job:
         jobs_data["sqft"] = clip(jobs_data["sqft"], 0, self.maximum_sqft)
         jobs_data["sqft"][logical_and(jobs_data["sqft"]>0, jobs_data["sqft"]<self.minimum_sqft)] = self.minimum_sqft
     
     # correct missing job_building_types
     wmissing_bt = where(jobs_data["building_type"]<=0)[0]
     if wmissing_bt.size > 0:
         jobs_data["building_type"][wmissing_bt] = 2 # assign non-homebased type for now. It can be re-classified in the assign_bldgs_to_jobs... script
     
     # create jobs table and write it out
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(
             table_name="jobs",
             table_data=jobs_data
             )
     job_dataset = JobDataset(in_storage=storage)
     if self.unplace_jobs_with_non_existing_buildings:
         self.do_unplace_jobs_with_non_existing_buildings(job_dataset, out_storage)
     
     # Match to control totals (only eliminate jobs if control totals are smaller than the actual number of jobs). 
     if control_totals_table is not None:
         logger.log_status("Matching to control totals.")
         control_totals = ControlTotalDataset(what='employment', id_name=['zone_id', 'sector_id'], 
                                              in_table_name=control_totals_table, in_storage=in_storage)
         control_totals.load_dataset(attributes=['zone_id', 'sector_id', 'jobs'])
         zones_sectors = control_totals.get_id_attribute()
         njobs = control_totals.get_attribute('jobs')
         remove = array([], dtype='int32')
         for i in range(zones_sectors.shape[0]):
             zone, sector = zones_sectors[i,:]
             in_sector = job_dataset.get_attribute("sector_id") == sector
             in_zone_in_sector = logical_and(in_sector, job_dataset.get_attribute("zone_id") == zone)
             if in_zone_in_sector.sum() <= njobs[i]:
                 continue
             to_be_removed = in_zone_in_sector.sum() - njobs[i]
             this_removal = 0
             not_considered = ones(job_dataset.size(), dtype='bool8')
             for unit in ['parcel_id', 'building_id', None]: # first consider jobs without parcel id, then without building_id, then all
                 if unit is not None:
                     wnunit = job_dataset.get_attribute(unit) <= 0
                     eligible = logical_and(not_considered, logical_and(in_zone_in_sector, wnunit))
                     not_considered[where(wnunit)] = False
                 else:
                     eligible = logical_and(not_considered, in_zone_in_sector)
                 eligible_sum = eligible.sum()
                 if eligible_sum > 0:
                     where_eligible = where(eligible)[0]
                     if eligible_sum <= to_be_removed-this_removal:
                         draw = arange(eligible_sum)
                     else:
                         draw = sample_noreplace(where_eligible, to_be_removed-this_removal, eligible_sum)
                     remove = concatenate((remove, where_eligible[draw]))
                     this_removal += draw.size
                     if this_removal >= to_be_removed:
                         break
             
         job_dataset.remove_elements(remove)
         logger.log_status("%s jobs removed." % remove.size)
         
     
     logger.log_status("Write jobs table.")
     job_dataset.write_dataset(out_table_name=jobs_table, out_storage=out_storage)
     logger.log_status("Created %s jobs." % job_dataset.size())
    def test_controlling_with_one_marginal_characteristic(self):
        """Using the age_of_head as a marginal characteristic, which would partition the 8 groups into two larger groups
        (those with age_of_head < 40 and >= 40), ensure that the control totals are met and that the distribution within
        each large group is the same before and after running the model
        """

        #IMPORTANT: marginal characteristics grouping indices have to start at 0!
        #i.e. below, there is one marg. char. "age_of_head". here we indicate that the first "large group" (groups 1-4),
        #consisting of those groups with age_of_head < 40 should total 25000 households after running this model for one year,
        #and the second large group, those groups with age_of_head > 40, should total 15000 households
        annual_household_control_totals_data = {
            "year": array([2000, 2000, 2000, 2000]),
            "age_of_head": array([0, 1, 0, 1]),
            "total_number_of_households": array([20000, 10000, 5000, 5000]),
            "faz_id": array([1, 1, 2, 2])
        }

        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='hh_set',
                            table_data=self.households_data)
        hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set')

        storage.write_table(table_name='hct_set',
                            table_data=annual_household_control_totals_data)
        hct_set = ControlTotalDataset(in_storage=storage,
                                      in_table_name='hct_set',
                                      what='household')

        storage.write_table(table_name='prs_set', table_data=self.person_data)
        prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set')

        storage.write_table(
            table_name='hc_set',
            table_data=self.household_characteristics_for_ht_data)
        hc_set = HouseholdCharacteristicDataset(in_storage=storage,
                                                in_table_name='hc_set')

        model = SubareaHouseholdTransitionModel(subarea_id_name="faz_id")
        model.run(year=2000,
                  person_set=prs_set,
                  household_set=hh_set,
                  control_totals=hct_set,
                  characteristics=hc_set)

        #check that there are indeed 40000 total households after running the model
        areas = hh_set.get_attribute("faz_id")
        results = array([0, 0])
        for iarea in [0, 1]:
            results[iarea] = where(areas == [1, 2][iarea])[0].size
        should_be = [30000, 10000]
        self.assertEqual(
            ma.allclose(should_be, results, rtol=1e-1), True,
            "Error, should_be: %s, but result: %s" % (should_be, results))

        #check that the number of households within the groups correspond to the control totals
        results = self.get_count_all_groups(hh_set)
        should_be = [20000, 10000, 5000, 5000]
        idx1 = arange(0, 4)
        idx2 = arange(4, 8)
        idx3 = arange(8, 12)
        idx4 = arange(12, 16)
        self.assertEqual(
            ma.allclose([
                results[idx1].sum(), results[idx2].sum(), results[idx3].sum(),
                results[idx4].sum()
            ],
                        should_be,
                        rtol=1e-1), True,
            "Error, should_be: %s, but result: %s" %
            (should_be,
             array([
                 results[idx1].sum(), results[idx2].sum(), results[idx3].sum(),
                 results[idx4].sum()
             ])))

        #check that the distribution of households within the groups are the same before and after
        #running the model, respectively

        should_be = [  # area 1 
            3000.0 / 7500.0 * 20000.0,
            1000.0 / 7500.0 * 20000.0,
            1500.0 / 7500.0 * 20000.0,
            2000.0 / 7500.0 * 20000.0,
            1000.0 / 9000.0 * 10000.0,
            2500.0 / 9000.0 * 10000.0,
            1500.0 / 9000.0 * 10000.0,
            4000.0 / 9000.0 * 10000.0,
            # area 2
            3000.0 / 7500.0 * 5000.0,
            1000.0 / 7500.0 * 5000.0,
            1500.0 / 7500.0 * 5000.0,
            2000.0 / 7500.0 * 5000.0,
            1000.0 / 9000.0 * 5000.0,
            2500.0 / 9000.0 * 5000.0,
            1500.0 / 9000.0 * 5000.0,
            4000.0 / 9000.0 * 5000.0
        ]
        self.assertEqual(
            ma.allclose(results, should_be, rtol=0.1), True,
            "Error, should_be: %s, but result: %s" % (should_be, results))
Exemple #26
0
    def test_same_distribution_after_household_addition(self):
        """Using the control_totals and no marginal characteristics,
        add households and ensure that the distribution within each group stays the same
        """

        annual_household_control_totals_data = {
            "year": array([2000, 2000]),
            "total_number_of_households": array([20000, 30000]),
            "large_area_id": array([1, 2])
        }

        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='hh_set',
                            table_data=self.households_data)
        hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set')

        storage.write_table(table_name='hct_set',
                            table_data=annual_household_control_totals_data)
        hct_set = ControlTotalDataset(in_storage=storage,
                                      in_table_name='hct_set',
                                      what="household")

        storage.write_table(
            table_name='hc_set',
            table_data=self.household_characteristics_for_ht_data)
        hc_set = HouseholdCharacteristicDataset(in_storage=storage,
                                                in_table_name='hc_set')

        model = RegionalHouseholdTransitionModel()
        model.run(year=2000,
                  household_set=hh_set,
                  control_totals=hct_set,
                  characteristics=hc_set)

        #check that there are 20000 (area 1) and 30000 (area 2) total households after running the model
        areas = hh_set.get_attribute("large_area_id")
        results = array([0, 0])
        for iarea in [0, 1]:
            results[iarea] = where(areas == [1, 2][iarea])[0].size
        should_be = [20000, 30000]
        self.assertEqual(
            ma.allclose(should_be, results, rtol=1e-1), True,
            "Error, should_be: %s, but result: %s" % (should_be, results))

        #check that the number of unplaced households is exactly the number of new households created
        results = where(hh_set.get_attribute("grid_id") <= 0)[0].size
        should_be = [17000]
        self.assertEqual(
            ma.allclose(should_be, results, rtol=1e-1), True,
            "Error, should_be: %s, but result: %s" % (should_be, results))

        #check that the distribution of households in each group and each area is the same as before running the model
        results = self.get_count_all_groups(hh_set)
        should_be = array([
            # area 1
            3000.0 / 16500.0 * 20000.0,
            1000.0 / 16500.0 * 20000.0,
            1500.0 / 16500.0 * 20000.0,
            2000.0 / 16500.0 * 20000.0,
            1000.0 / 16500.0 * 20000.0,
            2500.0 / 16500.0 * 20000.0,
            1500.0 / 16500.0 * 20000.0,
            4000.0 / 16500.0 * 20000.0,
            # area 2
            3000.0 / 16500.0 * 30000.0,
            1000.0 / 16500.0 * 30000.0,
            1500.0 / 16500.0 * 30000.0,
            2000.0 / 16500.0 * 30000.0,
            1000.0 / 16500.0 * 30000.0,
            2500.0 / 16500.0 * 30000.0,
            1500.0 / 16500.0 * 30000.0,
            4000.0 / 16500.0 * 30000.0
        ])
        self.assertEqual(
            ma.allclose(results, should_be, rtol=0.1), True,
            "Error, should_be: %s, but result: %s" % (should_be, results))
        # check the types of the attributes
        self.assertEqual(
            hh_set.get_attribute("age_of_head").dtype, int32,
            "Error in data type of the new household set. Should be: int32, is: %s"
            % str(hh_set.get_attribute("age_of_head").dtype))
        self.assertEqual(
            hh_set.get_attribute("income").dtype, int32,
            "Error in data type of the new household set. Should be: int32, is: %s"
            % str(hh_set.get_attribute("income").dtype))
        self.assertEqual(
            hh_set.get_attribute("persons").dtype, int8,
            "Error in data type of the new household set. Should be: int8, is: %s"
            % str(hh_set.get_attribute("persons").dtype))