def test_write_to_file(self): persons = MagicMock() households = MagicMock() allocator = HouseholdAllocator(households, persons) allocator.write(person_file='persons_file', household_file='households_file') persons.to_csv.assert_called_once_with('persons_file') households.to_csv.assert_called_once_with('households_file')
def _mock_allocated(self): def mock_person(serialno, age, sex, income): return { 'serial_number': serialno, 'age': age, 'sex': sex, 'individual_income': income } allocated_persons = pandas.DataFrame([ mock_person('b', '35-64', 'F', '40k+'), mock_person('b', '35-64', 'M', 'None'), ]) def mock_household(serialno, num_people, num_vehicles, income, count, tract): return { 'serial_number': serialno, 'num_people': num_people, 'num_vehicles': num_vehicles, 'household_income': income, 'count': count, 'tract': tract, } allocated_households = pandas.DataFrame([ mock_household('b', '6+', '2', '40k+', count=2, tract='tract1'), mock_household('b', '6+', '2', '40k+', count=2, tract='tract2'), ]) return HouseholdAllocator(allocated_households, allocated_persons)
def test_read_from_file(self): read_csv = MagicMock(return_value=pandas.DataFrame()) with patch('pandas.read_csv', read_csv): allocator = HouseholdAllocator.from_csvs('households_file', 'persons_file') assert type(allocator) == HouseholdAllocator read_csv.assert_any_call('households_file') read_csv.assert_any_call('persons_file')
def download_tract_data(state_id, puma_id, output_dir, census_api_key, puma_tract_mappings, households_data, persons_data): '''Download tract data from the US Census' API. Initilize an allocator, capable of allocating PUMS households as best as possible based on marginal census (currently tract) data using a cvx-solver. Args: state_id: 2-digit state fips code puma_id: 5-digit puma code output_dir: dir to write outWriter the generated bayesian nets to census_api_key: key used to download data from the U.S. Census puma_tract_mappings: filepath to the puma-tract mappings households_data: pums households data frame persons_data: pums persons data frame Returns: An allocator described above. ''' marginal_path = os.path.join( output_dir, FILE_PATTERN.format(state_id, puma_id, 'marginals.csv')) try: # Already have marginals file marginals = Marginals.from_csv(marginal_path) except Exception: # Download marginal data from the Census API with builtins.open(puma_tract_mappings) as csv_file: csv_reader = csv.DictReader(csv_file) marginals = Marginals.from_census_data(csv_reader, census_api_key, state=state_id, pumas=puma_id) if len(marginals.data) <= 1: logging.exception( 'Couldn\'t fetch data from the census. Check your API key') raise CensusFetchException() else: logging.info( 'Writing outWriter marginal file for state: %s, puma: %s', state_id, puma_id) marginals.write(marginal_path) '''With the above marginal controls (tract data), the methods in allocation.py allocate discrete PUMS households to the subject PUMA.''' try: allocator = HouseholdAllocator.from_cleaned_data( marginals=marginals, households_data=households_data, persons_data=persons_data) except Exception as e: logging.exception('Error Allocating state: %s, puma: %s\n%s', state_id, puma_id, e) __builtin__.exit() return marginals, allocator
def test_from_cleaned_data(self): # Prepare pums data households_data = CleanedData(self._mock_household_data()) persons_data = CleanedData(self._mock_person_data()) # Prepare marginals marginals = Marginals(self._mock_tract_data()) allocator = HouseholdAllocator.from_cleaned_data( marginals, households_data, persons_data) self.assertTrue(allocator) expected_shape = (114, 17) self.assertEqual(allocator.allocated_households.shape, expected_shape) expected_columns = [ u'serial_number', u'num_people', u'num_vehicles', u'household_weight', u'num_people_1', u'num_people_2', u'num_people_3', u'num_vehicles_0', u'num_vehicles_1', u'num_vehicles_2', u'num_vehicles_3+', u'age_0-17', u'age_18-34', u'age_65+', u'age_35-64', u'count', u'tract' ] self.assertEqual(set(allocator.allocated_households.columns.tolist()), set(expected_columns))