コード例 #1
0
    def test_download_and_load_pums_data_download(self, mock_fetch_pums_data,
                                                  mock_exists,
                                                  mock_CleanedData,
                                                  mock_pandas_to_csv):
        '''Verify fetch_pums_data is called with the proper arguments if local pums files aren't
        found
        '''
        mock_fetch_pums_data.return_value = (CleanedData(pandas.DataFrame()),
                                             CleanedData(pandas.DataFrame()))
        configuration = self._mock_config()
        mock_exists.return_value = False

        download_allocate_generate.download_and_load_pums_data(
            output_dir=self._mock_params['output_dir'],
            state_id=self._mock_params['state_id'],
            puma_id=self._mock_params['puma_id'],
            configuration=configuration,
            db_host=self._mock_params['db_host'],
            db_database=self._mock_params['db_database'],
            db_schema=self._mock_params['db_schema'],
            db_user=self._mock_params['db_user'],
            db_password=self._mock_params['db_password'])
        mock_fetch_pums_data.assert_called()
        mock_fetch_pums_data.assert_called_with(state_id='01',
                                                puma_id='00001',
                                                configuration=configuration,
                                                db_host='host1',
                                                db_database='database1',
                                                db_schema='schema1',
                                                db_user='******',
                                                db_password='******')
コード例 #2
0
def download_and_load_pums_data(output_dir, state_id, puma_id, configuration,
                                db_host, db_database, db_schema, db_user,
                                db_password):
    '''Do the pums files already exist --
            if no - read from db, write csv; load the csv
            if yes - load csv file

    Args:
        output_dir: place to look for and write pums household and person data files
        state_id: 2-digit state fips code
        puma_id: 5-digit puma code
        configuration: keeps track of which variables/models belong to households and persons
        db_host: hostname of the POSTGRESQL instance to connect to
        db_database: database name to connect to
        db_schema: postgres schema name schema which _must_ contain a persons and households table
            with pums fields referenced in doppelganger/inputs.py. E.g. if your schema is called
            "pums", then your schema should have a "pums.persons" table and a "pums.households"
            table
        db_user: username to connect with
        db_password: password to authenticate to the database

    Returns:
        Household and Person dataframes with the pums fields specified above.
    '''
    household_filename = FILE_PATTERN.format(state_id, puma_id,
                                             'households_pums.csv')
    household_path = os.path.sep.join([output_dir, household_filename])
    person_filename = FILE_PATTERN.format(state_id, puma_id,
                                          'persons_pums.csv')
    person_path = os.path.sep.join([output_dir, person_filename])

    if not os.path.exists(household_path) or not os.path.exists(person_path):
        logging.info(
            'Data not found at:\n%s\nor\n%s\n Downloading data from the db',
            household_path, person_path)

        households_data, persons_data = fetch_pums_data(
            state_id=state_id,
            puma_id=puma_id,
            configuration=configuration,
            db_host=db_host,
            db_database=db_database,
            db_schema=db_schema,
            db_user=db_user,
            db_password=db_password,
        )
        # Write data to files, so mustn't be downloaded again
        households_data.data.to_csv(household_path)
        persons_data.data.to_csv(person_path)
    else:
        households_data = CleanedData.from_csv(household_path)
        persons_data = CleanedData.from_csv(person_path)

    return households_data, persons_data
コード例 #3
0
 def test_from_cleaned_data(self):
     # Prepare pums data
     households_data = CleanedData(self._mock_household_data())
     persons_data = CleanedData(self._mock_person_data())
     # Prepare marginals
     marginals = Marginals(self._mock_tract_data())
     allocator = HouseholdAllocator.from_cleaned_data(
         marginals, households_data, persons_data)
     self.assertTrue(allocator)
     expected_shape = (114, 17)
     self.assertEqual(allocator.allocated_households.shape, expected_shape)
     expected_columns = [
         u'serial_number', u'num_people', u'num_vehicles',
         u'household_weight', u'num_people_1', u'num_people_2',
         u'num_people_3', u'num_vehicles_0', u'num_vehicles_1',
         u'num_vehicles_2', u'num_vehicles_3+', u'age_0-17', u'age_18-34',
         u'age_65+', u'age_35-64', u'count', u'tract'
     ]
     self.assertEqual(set(allocator.allocated_households.columns.tolist()),
                      set(expected_columns))