コード例 #1
0
 def test_clean_data_one_field(self):
     pums_data = datasource.PumsData(
         pandas.DataFrame(self._mock_dirty_household_input()))
     cleaned = pums_data.clean([inputs.NUM_PEOPLE.name], Preprocessor())
     actual = cleaned.data.loc[1].to_dict()
     expected = {
         inputs.NUM_PEOPLE.name: '2',
     }
     self.assertDictEqual(actual, expected)
コード例 #2
0
 def test_clean_data(self):
     pums_data = datasource.PumsData(
         pandas.DataFrame(self._mock_dirty_household_input()))
     cleaned = pums_data.clean([
         inputs.SERIAL_NUMBER.name, inputs.NUM_PEOPLE.name,
         inputs.NUM_VEHICLES.name, inputs.HOUSEHOLD_INCOME.name
     ], Preprocessor())
     actual = cleaned.data.loc[1].to_dict()
     expected = {
         inputs.SERIAL_NUMBER.name: 'b',
         inputs.NUM_PEOPLE.name: '2',
         inputs.NUM_VEHICLES.name: '3+',
         inputs.HOUSEHOLD_INCOME.name: '40000+'
     }
     self.assertDictEqual(actual, expected)
コード例 #3
0
 def test_clean_data_filter_length(self):
     pums_data = datasource.PumsData(
         pandas.DataFrame(self._mock_dirty_household_puma_state_input()))
     field_names = [
         inputs.SERIAL_NUMBER.name, inputs.STATE.name, inputs.PUMA.name
     ]
     cleaned = pums_data.clean(field_names, Preprocessor())
     cleaned_state = pums_data.clean(field_names,
                                     Preprocessor(),
                                     state='06')
     cleaned_puma = pums_data.clean(field_names,
                                    Preprocessor(),
                                    puma='00106')
     cleaned_both = pums_data.clean(field_names,
                                    Preprocessor(),
                                    state='06',
                                    puma='00106')
     self.assertEqual(len(cleaned.data), 3)
     self.assertEqual(len(cleaned_state.data), 2)
     self.assertEqual(len(cleaned_puma.data), 2)
     self.assertEqual(len(cleaned_both.data), 1)