def download_tract_data(state_id, puma_id, output_dir, census_api_key, puma_tract_mappings, households_data, persons_data): '''Download tract data from the US Census' API. Initilize an allocator, capable of allocating PUMS households as best as possible based on marginal census (currently tract) data using a cvx-solver. Args: state_id: 2-digit state fips code puma_id: 5-digit puma code output_dir: dir to write outWriter the generated bayesian nets to census_api_key: key used to download data from the U.S. Census puma_tract_mappings: filepath to the puma-tract mappings households_data: pums households data frame persons_data: pums persons data frame Returns: An allocator described above. ''' marginal_path = os.path.join( output_dir, FILE_PATTERN.format(state_id, puma_id, 'marginals.csv')) try: # Already have marginals file marginals = Marginals.from_csv(marginal_path) except Exception: # Download marginal data from the Census API with builtins.open(puma_tract_mappings) as csv_file: csv_reader = csv.DictReader(csv_file) marginals = Marginals.from_census_data(csv_reader, census_api_key, state=state_id, pumas=puma_id) if len(marginals.data) <= 1: logging.exception( 'Couldn\'t fetch data from the census. Check your API key') raise CensusFetchException() else: logging.info( 'Writing outWriter marginal file for state: %s, puma: %s', state_id, puma_id) marginals.write(marginal_path) '''With the above marginal controls (tract data), the methods in allocation.py allocate discrete PUMS households to the subject PUMA.''' try: allocator = HouseholdAllocator.from_cleaned_data( marginals=marginals, households_data=households_data, persons_data=persons_data) except Exception as e: logging.exception('Error Allocating state: %s, puma: %s\n%s', state_id, puma_id, e) __builtin__.exit() return marginals, allocator
def test_fetch_marginals(self): state = self._mock_marginals_file()[0]['STATEFP'] puma = self._mock_marginals_file()[0]['PUMA5CE'] with patch('doppelganger.marginals.Marginals._fetch_from_census', return_value=self._mock_response()): marg = Marginals.from_census_data( puma_tract_mappings=self._mock_marginals_file(), census_key=None, state=state, pumas=set([puma])) expected = { 'STATEFP': '06', 'COUNTYFP': '075', 'PUMA5CE': '07507', 'TRACTCE': '023001', 'age_0-17': 909, 'age_18-34': 1124, 'age_65+': 713, 'age_35-64': 2334, 'num_people_count': 1335, 'num_people_1': 168, 'num_people_3': 304, 'num_people_2': 341, 'num_people_4+': 522, 'num_vehicles_0': 0, 'num_vehicles_1': 1, 'num_vehicles_2': 2, 'num_vehicles_3+': 3 } result = marg.data.loc[0].to_dict() self.assertDictEqual(result, expected)
def test_fetch_marginals(self): state = self._mock_marginals_file()[0]['STATEFP'] puma = self._mock_marginals_file()[0]['PUMA5CE'] with patch('doppelganger.marginals.Marginals._fetch_from_census', return_value=self._mock_response()): marg = Marginals.from_census_data( puma_tract_mappings=self._mock_marginals_file(), census_key=None, state=state, pumas=set([puma])) expected = { 'STATEFP': '06', 'COUNTYFP': '075', 'PUMA5CE': '07507', 'TRACTCE': '023001', '0-17': '909', '18-34': '1124', '65+': '713', '35-64': '2334', 'count': '1335', '1': '168', '3': '304', '2': '341', '4+': '522' } result = marg.data.loc[0].to_dict() self.assertDictEqual(result, expected)
def test_from_cleaned_data(self): # Prepare pums data households_data = CleanedData(self._mock_household_data()) persons_data = CleanedData(self._mock_person_data()) # Prepare marginals marginals = Marginals(self._mock_tract_data()) allocator = HouseholdAllocator.from_cleaned_data( marginals, households_data, persons_data) self.assertTrue(allocator) expected_shape = (114, 17) self.assertEqual(allocator.allocated_households.shape, expected_shape) expected_columns = [ u'serial_number', u'num_people', u'num_vehicles', u'household_weight', u'num_people_1', u'num_people_2', u'num_people_3', u'num_vehicles_0', u'num_vehicles_1', u'num_vehicles_2', u'num_vehicles_3+', u'age_0-17', u'age_18-34', u'age_65+', u'age_35-64', u'count', u'tract' ] self.assertEqual(set(allocator.allocated_households.columns.tolist()), set(expected_columns))