def test_unplaced_jobs_after_job_addition(self): """The initial jobs table is now adjusted to include 2000 unplaced jobs. Add 1,750 new jobs and ensure that the number of unplaced jobs after the addition is exactly 3,750 because this model is not responsible for placing jobs, only for creating them. """ # create and populate jobs table for model input add_jobs_data = { "job_id": arange(13001, 15001), "grid_id": array(2000 * [0]), "sector_id": array(2000 * [1]), "building_type": array(2000 * [Constants._industrial_code]) } annual_employment_control_totals_data = self.annual_employment_control_totals_data annual_employment_control_totals_data[ "total_non_home_based_employment"] = array([10750, 3000]) storage = StorageFactory().get_storage('dict_storage') jobs_set_table_name = 'jobs_set' storage.write_table(table_name=jobs_set_table_name, table_data=self.jobs_data) jobs_set = JobDataset(in_storage=storage, in_table_name=jobs_set_table_name) ect_set_table_name = 'ect_set' storage.write_table( table_name=ect_set_table_name, table_data=annual_employment_control_totals_data, ) ect_set = ControlTotalDataset(in_storage=storage, in_table_name=ect_set_table_name, what="employment") jobs_set.add_elements(add_jobs_data) # run model with input databases model = EmploymentTransitionModel() model.run(year=2000, job_set=jobs_set, control_totals=ect_set, job_building_types=self.job_building_types) #check that there are indeed 16750 total jobs after running the model results = jobs_set.size() should_be = [16750] self.assertEqual(ma.allequal(should_be, results), True, "Error") #check that the number of unplaced jobs is the number of new jobs created + number of unplaced jobs before running model results = where(jobs_set.get_attribute("grid_id") <= 0)[0].size should_be = [3750.0] self.assertEqual(ma.allclose(results, should_be, rtol=0.00001), True)
def test_unplaced_jobs_after_job_addition(self): """The initial jobs table is now adjusted to include 2000 unplaced jobs. Add 1,750 new jobs and ensure that the number of unplaced jobs after the addition is exactly 3,750 because this model is not responsible for placing jobs, only for creating them. """ # create and populate jobs table for model input add_jobs_data = { "job_id": arange(13001, 15001), "grid_id": array(2000*[0]), "sector_id": array(2000*[1]), "building_type": array(2000*[Constants._industrial_code]) } annual_employment_control_totals_data = self.annual_employment_control_totals_data annual_employment_control_totals_data["total_non_home_based_employment"] = array([10750, 3000]) storage = StorageFactory().get_storage('dict_storage') jobs_set_table_name = 'jobs_set' storage.write_table( table_name=jobs_set_table_name, table_data=self.jobs_data ) jobs_set = JobDataset(in_storage=storage, in_table_name=jobs_set_table_name) ect_set_table_name = 'ect_set' storage.write_table( table_name=ect_set_table_name, table_data=annual_employment_control_totals_data, ) ect_set = ControlTotalDataset(in_storage=storage, in_table_name=ect_set_table_name, what="employment") jobs_set.add_elements(add_jobs_data) # run model with input databases model = EmploymentTransitionModel() model.run(year=2000, job_set=jobs_set, control_totals=ect_set, job_building_types=self.job_building_types) #check that there are indeed 16750 total jobs after running the model results = jobs_set.size() should_be = [16750] self.assertEqual(ma.allequal(should_be, results), True, "Error") #check that the number of unplaced jobs is the number of new jobs created + number of unplaced jobs before running model results = where(jobs_set.get_attribute("grid_id")<=0)[0].size should_be = [3750.0] self.assertEqual(ma.allclose(results, should_be, rtol=0.00001), True)
def test_same_distribution_after_job_addition(self): """Add 1,750 new jobs of sector 1 without specifying a distribution across gridcells (so it is assumed equal) Test that the total number of jobs in each sector after the addition matches the totals specified in annual_employment_control_totals. Ensure that the number of unplaced jobs after the addition is exactly 1,750 because this model is not responsible for placing jobs, only for creating them. NOTE: unplaced jobs are indicated by grid_id <= 0 """ storage = StorageFactory().get_storage('dict_storage') jobs_set_table_name = 'jobs_set' storage.write_table( table_name=jobs_set_table_name, table_data=self.jobs_data, ) jobs_set = JobDataset(in_storage=storage, in_table_name=jobs_set_table_name) annual_employment_control_totals_data = self.annual_employment_control_totals_data annual_employment_control_totals_data["total_non_home_based_employment"] = array([8750, 3000]) ect_set_table_name = 'ect_set' storage.write_table( table_name=ect_set_table_name, table_data=annual_employment_control_totals_data, ) ect_set = ControlTotalDataset(in_storage=storage, in_table_name=ect_set_table_name, what="employment") # run model model = EmploymentTransitionModel() model.run(year=2000, job_set=jobs_set, control_totals=ect_set, job_building_types=self.job_building_types) #check that there are indeed 14750 total jobs after running the model results = jobs_set.size() should_be = [14750] self.assertEqual(ma.allequal(should_be, results), True, "Error") #check that total #jobs within each sector are close to what was set in the control_totals results = self.get_count_all_sectors(jobs_set) should_be = [8750.0, 3000, 3000] self.assertEqual(ma.allclose(results, should_be, rtol=0.00001), True) #check that the number of unplaced jobs is the number of new jobs created (1750) results = where(jobs_set.get_attribute("grid_id")<=0)[0].size should_be = [1750.0] self.assertEqual(ma.allclose(results, should_be, rtol=0.00001), True) # test distribution of building type def run_model(): storage = StorageFactory().get_storage('dict_storage') jobs_set_table_name = 'jobs_set' storage.write_table( table_name=jobs_set_table_name, table_data=self.jobs_data ) jobs_set = JobDataset(in_storage=storage, in_table_name=jobs_set_table_name) model = EmploymentTransitionModel() model.run(year=2000, job_set=jobs_set, control_totals=ect_set, job_building_types=self.job_building_types) # check that the distribution of building type is the same before and after subtracting jobs jobs_set.compute_variables(["urbansim.job.is_in_employment_sector_1_industrial", "urbansim.job.is_in_employment_sector_2_industrial", "urbansim.job.is_in_employment_sector_1_commercial", "urbansim.job.is_in_employment_sector_2_commercial", "urbansim.job.is_in_employment_sector_1_governmental", "urbansim.job.is_in_employment_sector_2_governmental"], resources = Resources({"job_building_type":self.job_building_types})) result = array([jobs_set.get_attribute("is_in_employment_sector_1_industrial").sum(), jobs_set.get_attribute("is_in_employment_sector_2_industrial").sum(), jobs_set.get_attribute("is_in_employment_sector_1_commercial").sum(), jobs_set.get_attribute("is_in_employment_sector_2_commercial").sum(), jobs_set.get_attribute("is_in_employment_sector_1_governmental").sum(), jobs_set.get_attribute("is_in_employment_sector_2_governmental").sum() ]) return result expected_results = array([3500.0/7000.0*8750.0, 900, 3500.0/7000.0*8750.0, 1800, 0, 300]) #print expected_results self.run_stochastic_test(__file__, run_model, expected_results, 10) # check data types self.assertEqual(jobs_set.get_attribute("sector_id").dtype, int32, "Error in data type of the new job set. Should be: int32, is: %s" % str(jobs_set.get_attribute("sector_id").dtype)) self.assertEqual(jobs_set.get_attribute("building_type").dtype, int8, "Error in data type of the new job set. Should be: int8, is: %s" % str(jobs_set.get_attribute("building_type").dtype))
def test_same_distribution_after_job_subtraction(self): """Removes 1,750 sector_1 jobs, without specifying the distribution across gridcells (so it is assumed equal) Test that the distribution (in %) of sector 1 jobs across gridcells before and after the subtraction are relatively equal. """ storage = StorageFactory().get_storage('dict_storage') jobs_set_table_name = 'jobs_set' storage.write_table(table_name=jobs_set_table_name, table_data=self.jobs_data) jobs_set = JobDataset(in_storage=storage, in_table_name=jobs_set_table_name) ect_set_table_name = 'ect_set' storage.write_table(table_name=ect_set_table_name, table_data=self.annual_employment_control_totals_data) ect_set = ControlTotalDataset(in_storage=storage, in_table_name=ect_set_table_name, what="employment") # unplace some jobs jobs_set.modify_attribute(name="grid_id", data=zeros(int(jobs_set.size()/2)), index=arange(int(jobs_set.size()/2))) #run model with input Datasets model = EmploymentTransitionModel() model.run(year=2000, job_set=jobs_set, control_totals=ect_set, job_building_types=self.job_building_types) results = jobs_set.size() should_be = [11250] self.assertEqual(ma.allequal(should_be, results), True, "Error") def run_model(): storage = StorageFactory().get_storage('dict_storage') jobs_set_table_name = 'jobs_set' storage.write_table( table_name=jobs_set_table_name, table_data=self.jobs_data, ) jobs_set = JobDataset(in_storage=storage, in_table_name=jobs_set_table_name) model = EmploymentTransitionModel() model.run(year=2000, job_set=jobs_set, control_totals=ect_set, job_building_types=self.job_building_types) # check that the distribution of jobs is the same before and after subtracting jobs results = self.get_count_all_sectors_and_gridcells(jobs_set) return results expected_results = array([4000.0/7000.0*5250.0, 1000, 1000, 2000.0/7000.0*5250.0, 1000, 1000, 1000.0/7000.0*5250.0, 1000, 1000]) self.run_stochastic_test(__file__, run_model, expected_results, 10) def run_model2(): storage = StorageFactory().get_storage('dict_storage') jobs_set_table_name = 'jobs_set' storage.write_table( table_name=jobs_set_table_name, table_data=self.jobs_data, ) jobs_set = JobDataset(in_storage=storage, in_table_name=jobs_set_table_name) model = EmploymentTransitionModel() model.run(year=2000, job_set=jobs_set, control_totals=ect_set, job_building_types=self.job_building_types) # check that the distribution of building type is the same before and after subtracting jobs jobs_set.compute_variables(["urbansim.job.is_in_employment_sector_1_industrial", "urbansim.job.is_in_employment_sector_2_industrial", "urbansim.job.is_in_employment_sector_1_commercial", "urbansim.job.is_in_employment_sector_2_commercial", "urbansim.job.is_in_employment_sector_1_governmental", "urbansim.job.is_in_employment_sector_2_governmental"], resources = Resources({"job_building_type":self.job_building_types})) result = array([jobs_set.get_attribute("is_in_employment_sector_1_industrial").sum(), jobs_set.get_attribute("is_in_employment_sector_2_industrial").sum(), jobs_set.get_attribute("is_in_employment_sector_1_commercial").sum(), jobs_set.get_attribute("is_in_employment_sector_2_commercial").sum(), jobs_set.get_attribute("is_in_employment_sector_1_governmental").sum(), jobs_set.get_attribute("is_in_employment_sector_2_governmental").sum() ]) return result expected_results = array([3500.0/7000.0*5250.0, 900, 3500.0/7000.0*5250.0, 1800, 0, 300]) self.run_stochastic_test(__file__, run_model2, expected_results, 20)
def run(self, in_storage, out_storage, business_table="business", jobs_table="jobs", control_totals_table=None): logger.log_status("Unrolling %s table." % business_table) # get attributes from the establisments table business_dataset = BusinessDataset(in_storage=in_storage, in_table_name=business_table) business_sizes = business_dataset.get_attribute(self.number_of_jobs_attr).astype("int32") sectors = business_dataset.get_attribute("sector_id") tazes = business_dataset.get_attribute(self.geography_id_attr).astype("int32") building_ids = array([], dtype='int32') if "building_id" in business_dataset.get_primary_attribute_names(): building_ids = business_dataset.get_attribute("building_id") parcel_ids = array([], dtype='int32') if "parcel_id" in business_dataset.get_primary_attribute_names(): parcel_ids = business_dataset.get_attribute("parcel_id") home_based = array([], dtype='int16') if "home_based" in business_dataset.get_primary_attribute_names(): home_based = business_dataset.get_attribute("home_based") building_sqft = business_dataset.get_attribute(self.sqft_attr) building_sqft[building_sqft <= 0] = 0 join_flags = None if "join_flag" in business_dataset.get_primary_attribute_names(): join_flags = business_dataset.get_attribute("join_flag") impute_sqft_flag = None if "impute_building_sqft_flag" in business_dataset.get_primary_attribute_names(): impute_sqft_flag = business_dataset.get_attribute("impute_building_sqft_flag") # inititalize jobs attributes total_size = business_sizes.sum() jobs_data = {} jobs_data["sector_id"] = resize(array([-1], dtype=sectors.dtype), total_size) jobs_data["building_id"] = resize(array([-1], dtype=building_ids.dtype), total_size) jobs_data["parcel_id"] = resize(array([-1], dtype=parcel_ids.dtype), total_size) jobs_data[self.geography_id_attr] = resize(array([-1], dtype=tazes.dtype), total_size) jobs_data["building_type"] = resize(array([-1], dtype=home_based.dtype), total_size) jobs_data["sqft"] = resize(array([], dtype=building_sqft.dtype), total_size) if join_flags is not None: jobs_data["join_flag"] = resize(array([], dtype=join_flags.dtype), total_size) if impute_sqft_flag is not None: jobs_data["impute_building_sqft_flag"] = resize(array([], dtype=impute_sqft_flag.dtype), total_size) indices = cumsum(business_sizes) # iterate over establishments. For each business create the corresponding number of jobs by filling the corresponding part # of the arrays start_index=0 for i in range(business_dataset.size()): end_index = indices[i] jobs_data["sector_id"][start_index:end_index] = sectors[i] if building_ids.size > 0: jobs_data["building_id"][start_index:end_index] = building_ids[i] if parcel_ids.size > 0: jobs_data["parcel_id"][start_index:end_index] = parcel_ids[i] jobs_data[self.geography_id_attr][start_index:end_index] = tazes[i] if home_based.size > 0: jobs_data["building_type"][start_index:end_index] = home_based[i] if self.compute_sqft_per_job: jobs_data["sqft"][start_index:end_index] = round((building_sqft[i]-building_sqft[i]/10.0)/float(business_sizes[i])) # sqft per employee else: jobs_data["sqft"][start_index:end_index] = building_sqft[i] if join_flags is not None: jobs_data["join_flag"][start_index:end_index] = join_flags[i] if impute_sqft_flag is not None: jobs_data["impute_building_sqft_flag"][start_index:end_index] = impute_sqft_flag[i] start_index = end_index jobs_data["job_id"] = arange(total_size)+1 if self.compute_sqft_per_job: jobs_data["sqft"] = clip(jobs_data["sqft"], 0, self.maximum_sqft) jobs_data["sqft"][logical_and(jobs_data["sqft"]>0, jobs_data["sqft"]<self.minimum_sqft)] = self.minimum_sqft # correct missing job_building_types wmissing_bt = where(jobs_data["building_type"]<=0)[0] if wmissing_bt.size > 0: jobs_data["building_type"][wmissing_bt] = 2 # assign non-homebased type for now. It can be re-classified in the assign_bldgs_to_jobs... script # create jobs table and write it out storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name="jobs", table_data=jobs_data ) job_dataset = JobDataset(in_storage=storage) if self.unplace_jobs_with_non_existing_buildings: self.do_unplace_jobs_with_non_existing_buildings(job_dataset, out_storage) # Match to control totals (only eliminate jobs if control totals are smaller than the actual number of jobs). if control_totals_table is not None: logger.log_status("Matching to control totals.") control_totals = ControlTotalDataset(what='employment', id_name=['zone_id', 'sector_id'], in_table_name=control_totals_table, in_storage=in_storage) control_totals.load_dataset(attributes=['zone_id', 'sector_id', 'jobs']) zones_sectors = control_totals.get_id_attribute() njobs = control_totals.get_attribute('jobs') remove = array([], dtype='int32') for i in range(zones_sectors.shape[0]): zone, sector = zones_sectors[i,:] in_sector = job_dataset.get_attribute("sector_id") == sector in_zone_in_sector = logical_and(in_sector, job_dataset.get_attribute("zone_id") == zone) if in_zone_in_sector.sum() <= njobs[i]: continue to_be_removed = in_zone_in_sector.sum() - njobs[i] this_removal = 0 not_considered = ones(job_dataset.size(), dtype='bool8') for unit in ['parcel_id', 'building_id', None]: # first consider jobs without parcel id, then without building_id, then all if unit is not None: wnunit = job_dataset.get_attribute(unit) <= 0 eligible = logical_and(not_considered, logical_and(in_zone_in_sector, wnunit)) not_considered[where(wnunit)] = False else: eligible = logical_and(not_considered, in_zone_in_sector) eligible_sum = eligible.sum() if eligible_sum > 0: where_eligible = where(eligible)[0] if eligible_sum <= to_be_removed-this_removal: draw = arange(eligible_sum) else: draw = sample_noreplace(where_eligible, to_be_removed-this_removal, eligible_sum) remove = concatenate((remove, where_eligible[draw])) this_removal += draw.size if this_removal >= to_be_removed: break job_dataset.remove_elements(remove) logger.log_status("%s jobs removed." % remove.size) logger.log_status("Write jobs table.") job_dataset.write_dataset(out_table_name=jobs_table, out_storage=out_storage) logger.log_status("Created %s jobs." % job_dataset.size())
def test_same_distribution_after_job_addition(self): """Add 1,750 new jobs of sector 1 without specifying a distribution across gridcells (so it is assumed equal) Test that the total number of jobs in each sector after the addition matches the totals specified in annual_employment_control_totals. Ensure that the number of unplaced jobs after the addition is exactly 1,750 because this model is not responsible for placing jobs, only for creating them. NOTE: unplaced jobs are indicated by grid_id <= 0 """ storage = StorageFactory().get_storage('dict_storage') jobs_set_table_name = 'jobs_set' storage.write_table( table_name=jobs_set_table_name, table_data=self.jobs_data, ) jobs_set = JobDataset(in_storage=storage, in_table_name=jobs_set_table_name) annual_employment_control_totals_data = self.annual_employment_control_totals_data annual_employment_control_totals_data[ "total_non_home_based_employment"] = array([8750, 3000]) ect_set_table_name = 'ect_set' storage.write_table( table_name=ect_set_table_name, table_data=annual_employment_control_totals_data, ) ect_set = ControlTotalDataset(in_storage=storage, in_table_name=ect_set_table_name, what="employment") # run model model = EmploymentTransitionModel() model.run(year=2000, job_set=jobs_set, control_totals=ect_set, job_building_types=self.job_building_types) #check that there are indeed 14750 total jobs after running the model results = jobs_set.size() should_be = [14750] self.assertEqual(ma.allequal(should_be, results), True, "Error") #check that total #jobs within each sector are close to what was set in the control_totals results = self.get_count_all_sectors(jobs_set) should_be = [8750.0, 3000, 3000] self.assertEqual(ma.allclose(results, should_be, rtol=0.00001), True) #check that the number of unplaced jobs is the number of new jobs created (1750) results = where(jobs_set.get_attribute("grid_id") <= 0)[0].size should_be = [1750.0] self.assertEqual(ma.allclose(results, should_be, rtol=0.00001), True) # test distribution of building type def run_model(): storage = StorageFactory().get_storage('dict_storage') jobs_set_table_name = 'jobs_set' storage.write_table(table_name=jobs_set_table_name, table_data=self.jobs_data) jobs_set = JobDataset(in_storage=storage, in_table_name=jobs_set_table_name) model = EmploymentTransitionModel() model.run(year=2000, job_set=jobs_set, control_totals=ect_set, job_building_types=self.job_building_types) # check that the distribution of building type is the same before and after subtracting jobs jobs_set.compute_variables([ "urbansim.job.is_in_employment_sector_1_industrial", "urbansim.job.is_in_employment_sector_2_industrial", "urbansim.job.is_in_employment_sector_1_commercial", "urbansim.job.is_in_employment_sector_2_commercial", "urbansim.job.is_in_employment_sector_1_governmental", "urbansim.job.is_in_employment_sector_2_governmental" ], resources=Resources({ "job_building_type": self.job_building_types })) result = array([ jobs_set.get_attribute( "is_in_employment_sector_1_industrial").sum(), jobs_set.get_attribute( "is_in_employment_sector_2_industrial").sum(), jobs_set.get_attribute( "is_in_employment_sector_1_commercial").sum(), jobs_set.get_attribute( "is_in_employment_sector_2_commercial").sum(), jobs_set.get_attribute( "is_in_employment_sector_1_governmental").sum(), jobs_set.get_attribute( "is_in_employment_sector_2_governmental").sum() ]) return result expected_results = array([ 3500.0 / 7000.0 * 8750.0, 900, 3500.0 / 7000.0 * 8750.0, 1800, 0, 300 ]) #print expected_results self.run_stochastic_test(__file__, run_model, expected_results, 10) # check data types self.assertEqual( jobs_set.get_attribute("sector_id").dtype, int32, "Error in data type of the new job set. Should be: int32, is: %s" % str(jobs_set.get_attribute("sector_id").dtype)) self.assertEqual( jobs_set.get_attribute("building_type").dtype, int8, "Error in data type of the new job set. Should be: int8, is: %s" % str(jobs_set.get_attribute("building_type").dtype))
def test_same_distribution_after_job_subtraction(self): """Removes 1,750 sector_1 jobs, without specifying the distribution across gridcells (so it is assumed equal) Test that the distribution (in %) of sector 1 jobs across gridcells before and after the subtraction are relatively equal. """ storage = StorageFactory().get_storage('dict_storage') jobs_set_table_name = 'jobs_set' storage.write_table(table_name=jobs_set_table_name, table_data=self.jobs_data) jobs_set = JobDataset(in_storage=storage, in_table_name=jobs_set_table_name) ect_set_table_name = 'ect_set' storage.write_table( table_name=ect_set_table_name, table_data=self.annual_employment_control_totals_data) ect_set = ControlTotalDataset(in_storage=storage, in_table_name=ect_set_table_name, what="employment") # unplace some jobs jobs_set.modify_attribute(name="grid_id", data=zeros(int(jobs_set.size() / 2)), index=arange(int(jobs_set.size() / 2))) #run model with input Datasets model = EmploymentTransitionModel() model.run(year=2000, job_set=jobs_set, control_totals=ect_set, job_building_types=self.job_building_types) results = jobs_set.size() should_be = [11250] self.assertEqual(ma.allequal(should_be, results), True, "Error") def run_model(): storage = StorageFactory().get_storage('dict_storage') jobs_set_table_name = 'jobs_set' storage.write_table( table_name=jobs_set_table_name, table_data=self.jobs_data, ) jobs_set = JobDataset(in_storage=storage, in_table_name=jobs_set_table_name) model = EmploymentTransitionModel() model.run(year=2000, job_set=jobs_set, control_totals=ect_set, job_building_types=self.job_building_types) # check that the distribution of jobs is the same before and after subtracting jobs results = self.get_count_all_sectors_and_gridcells(jobs_set) return results expected_results = array([ 4000.0 / 7000.0 * 5250.0, 1000, 1000, 2000.0 / 7000.0 * 5250.0, 1000, 1000, 1000.0 / 7000.0 * 5250.0, 1000, 1000 ]) self.run_stochastic_test(__file__, run_model, expected_results, 10) def run_model2(): storage = StorageFactory().get_storage('dict_storage') jobs_set_table_name = 'jobs_set' storage.write_table( table_name=jobs_set_table_name, table_data=self.jobs_data, ) jobs_set = JobDataset(in_storage=storage, in_table_name=jobs_set_table_name) model = EmploymentTransitionModel() model.run(year=2000, job_set=jobs_set, control_totals=ect_set, job_building_types=self.job_building_types) # check that the distribution of building type is the same before and after subtracting jobs jobs_set.compute_variables([ "urbansim.job.is_in_employment_sector_1_industrial", "urbansim.job.is_in_employment_sector_2_industrial", "urbansim.job.is_in_employment_sector_1_commercial", "urbansim.job.is_in_employment_sector_2_commercial", "urbansim.job.is_in_employment_sector_1_governmental", "urbansim.job.is_in_employment_sector_2_governmental" ], resources=Resources({ "job_building_type": self.job_building_types })) result = array([ jobs_set.get_attribute( "is_in_employment_sector_1_industrial").sum(), jobs_set.get_attribute( "is_in_employment_sector_2_industrial").sum(), jobs_set.get_attribute( "is_in_employment_sector_1_commercial").sum(), jobs_set.get_attribute( "is_in_employment_sector_2_commercial").sum(), jobs_set.get_attribute( "is_in_employment_sector_1_governmental").sum(), jobs_set.get_attribute( "is_in_employment_sector_2_governmental").sum() ]) return result expected_results = array([ 3500.0 / 7000.0 * 5250.0, 900, 3500.0 / 7000.0 * 5250.0, 1800, 0, 300 ]) self.run_stochastic_test(__file__, run_model2, expected_results, 20)
def test_controlling_sector(self): """ Controls for one marginal characteristics, namely age_of_head. """ annual_employment_control_totals_data = { "year": array([2000, 2000, 2000, 2001, 2001, 2001, 2002, 2002, 2002]), "sector_id": array([ 1,2,3, 1,2,3, 1,2,3]), "number_of_jobs": array([25013, 21513, 18227, # 2000 10055, 15003, 17999, # 2001 15678, 14001, 20432]) # 2002 } jobs_data = { "job_id":arange(15000)+1, "grid_id": array(15000*[1]), "sector_id": array(1000*[1] + 1000*[1] + 2000*[1] + 1000*[1] + 2000*[2] + 1000*[2] + 1000*[2]+ 1000*[2] + 1000*[3] + 1000*[3] + 2000*[3] + 1000*[3]) } storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='job_set', table_data=jobs_data) job_set = JobDataset(in_storage=storage, in_table_name='job_set') storage.write_table(table_name='ect_set', table_data=annual_employment_control_totals_data) ect_set = ControlTotalDataset(in_storage=storage, in_table_name='ect_set', what='', id_name=[]) model = TransitionModel(job_set, control_total_dataset=ect_set) model.run(year=2000, target_attribute_name="number_of_jobs", reset_dataset_attribute_value={'grid_id':-1}) results = job_set.size() should_be = [(ect_set.get_attribute("number_of_jobs")[0:3]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) cats = 3 results = zeros(cats, dtype=int32) for i in range(0, cats): results[i] = (job_set.get_attribute('sector_id') == ect_set.get_attribute("sector_id")[i]).sum() should_be = ect_set.get_attribute("number_of_jobs")[0:3] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results)) # this run should remove households in all four categories #model.run(year=2001, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) model.run(year=2001, target_attribute_name="number_of_jobs", reset_dataset_attribute_value={'grid_id':-1}) results = job_set.size() should_be = [(ect_set.get_attribute("number_of_jobs")[3:6]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) cats = 3 results = zeros(cats, dtype=int32) for i in range(0, cats): results[i] = (job_set.get_attribute('sector_id') == ect_set.get_attribute("sector_id")[i+3]).sum() should_be = ect_set.get_attribute("number_of_jobs")[3:6] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results)) # this run should add and remove households #model.run(year=2002, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) model.run(year=2002, target_attribute_name="number_of_jobs", reset_dataset_attribute_value={'grid_id':-1}) results = job_set.size() should_be = [(ect_set.get_attribute("number_of_jobs")[6:9]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) cats = 3 results = zeros(cats, dtype=int32) for i in range(0, cats): results[i] = (job_set.get_attribute('sector_id') == ect_set.get_attribute("sector_id")[i+6]).sum() should_be = ect_set.get_attribute("number_of_jobs")[6:9] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results))
def run(self, in_storage, out_storage, business_table="business", jobs_table="jobs", control_totals_table=None): logger.log_status("Unrolling %s table." % business_table) # get attributes from the establisments table business_dataset = BusinessDataset(in_storage=in_storage, in_table_name=business_table) business_sizes = business_dataset.get_attribute( self.number_of_jobs_attr).astype("int32") sectors = business_dataset.get_attribute("sector_id") tazes = business_dataset.get_attribute( self.geography_id_attr).astype("int32") building_ids = array([], dtype='int32') if "building_id" in business_dataset.get_primary_attribute_names(): building_ids = business_dataset.get_attribute("building_id") parcel_ids = array([], dtype='int32') if "parcel_id" in business_dataset.get_primary_attribute_names(): parcel_ids = business_dataset.get_attribute("parcel_id") home_based = array([], dtype='int16') if "home_based" in business_dataset.get_primary_attribute_names(): home_based = business_dataset.get_attribute("home_based") building_sqft = business_dataset.get_attribute(self.sqft_attr) building_sqft[building_sqft <= 0] = 0 join_flags = None if "join_flag" in business_dataset.get_primary_attribute_names(): join_flags = business_dataset.get_attribute("join_flag") impute_sqft_flag = None if "impute_building_sqft_flag" in business_dataset.get_primary_attribute_names( ): impute_sqft_flag = business_dataset.get_attribute( "impute_building_sqft_flag") # inititalize jobs attributes total_size = business_sizes.sum() jobs_data = {} jobs_data["sector_id"] = resize(array([-1], dtype=sectors.dtype), total_size) jobs_data["building_id"] = resize( array([-1], dtype=building_ids.dtype), total_size) jobs_data["parcel_id"] = resize(array([-1], dtype=parcel_ids.dtype), total_size) jobs_data[self.geography_id_attr] = resize( array([-1], dtype=tazes.dtype), total_size) jobs_data["building_type"] = resize( array([-1], dtype=home_based.dtype), total_size) jobs_data["sqft"] = resize(array([], dtype=building_sqft.dtype), total_size) if join_flags is not None: jobs_data["join_flag"] = resize(array([], dtype=join_flags.dtype), total_size) if impute_sqft_flag is not None: jobs_data["impute_building_sqft_flag"] = resize( array([], dtype=impute_sqft_flag.dtype), total_size) indices = cumsum(business_sizes) # iterate over establishments. For each business create the corresponding number of jobs by filling the corresponding part # of the arrays start_index = 0 for i in range(business_dataset.size()): end_index = indices[i] jobs_data["sector_id"][start_index:end_index] = sectors[i] if building_ids.size > 0: jobs_data["building_id"][start_index:end_index] = building_ids[ i] if parcel_ids.size > 0: jobs_data["parcel_id"][start_index:end_index] = parcel_ids[i] jobs_data[self.geography_id_attr][start_index:end_index] = tazes[i] if home_based.size > 0: jobs_data["building_type"][start_index:end_index] = home_based[ i] if self.compute_sqft_per_job: jobs_data["sqft"][start_index:end_index] = round( (building_sqft[i] - building_sqft[i] / 10.0) / float(business_sizes[i])) # sqft per employee else: jobs_data["sqft"][start_index:end_index] = building_sqft[i] if join_flags is not None: jobs_data["join_flag"][start_index:end_index] = join_flags[i] if impute_sqft_flag is not None: jobs_data["impute_building_sqft_flag"][ start_index:end_index] = impute_sqft_flag[i] start_index = end_index jobs_data["job_id"] = arange(total_size) + 1 if self.compute_sqft_per_job: jobs_data["sqft"] = clip(jobs_data["sqft"], 0, self.maximum_sqft) jobs_data["sqft"][logical_and( jobs_data["sqft"] > 0, jobs_data["sqft"] < self.minimum_sqft)] = self.minimum_sqft # correct missing job_building_types wmissing_bt = where(jobs_data["building_type"] <= 0)[0] if wmissing_bt.size > 0: jobs_data["building_type"][ wmissing_bt] = 2 # assign non-homebased type for now. It can be re-classified in the assign_bldgs_to_jobs... script # create jobs table and write it out storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name="jobs", table_data=jobs_data) job_dataset = JobDataset(in_storage=storage) if self.unplace_jobs_with_non_existing_buildings: self.do_unplace_jobs_with_non_existing_buildings( job_dataset, out_storage) # Match to control totals (only eliminate jobs if control totals are smaller than the actual number of jobs). if control_totals_table is not None: logger.log_status("Matching to control totals.") control_totals = ControlTotalDataset( what='employment', id_name=['zone_id', 'sector_id'], in_table_name=control_totals_table, in_storage=in_storage) control_totals.load_dataset( attributes=['zone_id', 'sector_id', 'jobs']) zones_sectors = control_totals.get_id_attribute() njobs = control_totals.get_attribute('jobs') remove = array([], dtype='int32') for i in range(zones_sectors.shape[0]): zone, sector = zones_sectors[i, :] in_sector = job_dataset.get_attribute("sector_id") == sector in_zone_in_sector = logical_and( in_sector, job_dataset.get_attribute("zone_id") == zone) if in_zone_in_sector.sum() <= njobs[i]: continue to_be_removed = in_zone_in_sector.sum() - njobs[i] this_removal = 0 not_considered = ones(job_dataset.size(), dtype='bool8') for unit in [ 'parcel_id', 'building_id', None ]: # first consider jobs without parcel id, then without building_id, then all if unit is not None: wnunit = job_dataset.get_attribute(unit) <= 0 eligible = logical_and( not_considered, logical_and(in_zone_in_sector, wnunit)) not_considered[where(wnunit)] = False else: eligible = logical_and(not_considered, in_zone_in_sector) eligible_sum = eligible.sum() if eligible_sum > 0: where_eligible = where(eligible)[0] if eligible_sum <= to_be_removed - this_removal: draw = arange(eligible_sum) else: draw = sample_noreplace( where_eligible, to_be_removed - this_removal, eligible_sum) remove = concatenate((remove, where_eligible[draw])) this_removal += draw.size if this_removal >= to_be_removed: break job_dataset.remove_elements(remove) logger.log_status("%s jobs removed." % remove.size) logger.log_status("Write jobs table.") job_dataset.write_dataset(out_table_name=jobs_table, out_storage=out_storage) logger.log_status("Created %s jobs." % job_dataset.size())