def test_load_csv_with_invalid_header(self): """Bail loading a CSV when the header is invalid.""" data = {'foo': [1, 2, 3]} mock_pandas = MagicMock() mock_pandas.read_csv.return_value = pandas.DataFrame(data=data) filepath = 'path/name.csv' geo_type = 'state' rows = list( CsvImporter.load_csv(filepath, geo_type, pandas=mock_pandas)) self.assertTrue(mock_pandas.read_csv.called) self.assertTrue(mock_pandas.read_csv.call_args[0][0], filepath) self.assertEqual(rows, [None])
def test_load_csv_with_valid_header(self): """Yield sanity checked `RowValues` from a valid CSV file.""" # one invalid geo_id, but otherwise valid data = { 'geo_id': ['ca', 'tx', 'fl', '123'], 'val': ['1.1', '1.2', '1.3', '1.4'], 'se': ['2.1', '2.2', '2.3', '2.4'], 'sample_size': ['301', '302', '303', '304'], } mock_pandas = MagicMock() mock_pandas.read_csv.return_value = pandas.DataFrame(data=data) filepath = 'path/name.csv' geo_type = 'state' rows = list( CsvImporter.load_csv(filepath, geo_type, pandas=mock_pandas)) self.assertTrue(mock_pandas.read_csv.called) self.assertTrue(mock_pandas.read_csv.call_args[0][0], filepath) self.assertEqual(len(rows), 4) self.assertEqual(rows[0].geo_value, 'ca') self.assertEqual(rows[0].value, 1.1) self.assertEqual(rows[0].stderr, 2.1) self.assertEqual(rows[0].sample_size, 301) self.assertEqual(rows[1].geo_value, 'tx') self.assertEqual(rows[1].value, 1.2) self.assertEqual(rows[1].stderr, 2.2) self.assertEqual(rows[1].sample_size, 302) self.assertEqual(rows[2].geo_value, 'fl') self.assertEqual(rows[2].value, 1.3) self.assertEqual(rows[2].stderr, 2.3) self.assertEqual(rows[2].sample_size, 303) self.assertIsNone(rows[3])
def test_load_csv_with_valid_header(self): """Yield sanity checked `RowValues` from a valid CSV file.""" # one invalid geo_id, but otherwise valid data = { 'geo_id': ['ca', 'tx', 'fl', '123'], 'val': ['1.1', '1.2', '1.3', '1.4'], 'se': ['2.1', '2.2', '2.3', '2.4'], 'sample_size': ['301', '302', '303', '304'], } mock_pandas = MagicMock() mock_pandas.read_csv.return_value = pandas.DataFrame(data=data) filepath = 'path/name.csv' geo_type = 'state' rows = list( CsvImporter.load_csv(filepath, geo_type, pandas=mock_pandas)) self.assertTrue(mock_pandas.read_csv.called) self.assertTrue(mock_pandas.read_csv.call_args[0][0], filepath) self.assertEqual(len(rows), 4) self.assertEqual(rows[0].geo_value, 'ca') self.assertEqual(rows[0].value, 1.1) self.assertEqual(rows[0].stderr, 2.1) self.assertEqual(rows[0].sample_size, 301) self.assertEqual(rows[1].geo_value, 'tx') self.assertEqual(rows[1].value, 1.2) self.assertEqual(rows[1].stderr, 2.2) self.assertEqual(rows[1].sample_size, 302) self.assertEqual(rows[2].geo_value, 'fl') self.assertEqual(rows[2].value, 1.3) self.assertEqual(rows[2].stderr, 2.3) self.assertEqual(rows[2].sample_size, 303) self.assertIsNone(rows[3]) # now with missing values! data = { 'geo_id': ['ca', 'tx', 'fl', 'ak', 'wa'], 'val': [np.nan, '1.2', '1.3', '1.4', '1.5'], 'se': ['2.1', "na", '2.3', '2.4', '2.5'], 'sample_size': ['301', '302', None, '304', None], 'missing_value': [Nans.NOT_APPLICABLE] + [Nans.NOT_MISSING] * 3 + [None], 'missing_stderr': [ Nans.NOT_MISSING, Nans.REGION_EXCEPTION, Nans.NOT_MISSING, Nans.NOT_MISSING ] + [None], 'missing_sample_size': [Nans.NOT_MISSING] * 2 + [Nans.REGION_EXCEPTION] * 2 + [None] } mock_pandas = MagicMock() mock_pandas.read_csv.return_value = pandas.DataFrame(data=data) filepath = 'path/name.csv' geo_type = 'state' rows = list( CsvImporter.load_csv(filepath, geo_type, pandas=mock_pandas)) self.assertTrue(mock_pandas.read_csv.called) self.assertTrue(mock_pandas.read_csv.call_args[0][0], filepath) self.assertEqual(len(rows), 5) self.assertEqual(rows[0].geo_value, 'ca') self.assertIsNone(rows[0].value) self.assertEqual(rows[0].stderr, 2.1) self.assertEqual(rows[0].sample_size, 301) self.assertEqual(rows[0].missing_value, Nans.NOT_APPLICABLE) self.assertEqual(rows[0].missing_stderr, Nans.NOT_MISSING) self.assertEqual(rows[0].missing_sample_size, Nans.NOT_MISSING) self.assertEqual(rows[1].geo_value, 'tx') self.assertEqual(rows[1].value, 1.2) self.assertIsNone(rows[1].stderr) self.assertEqual(rows[1].sample_size, 302) self.assertEqual(rows[1].missing_value, Nans.NOT_MISSING) self.assertEqual(rows[1].missing_stderr, Nans.REGION_EXCEPTION) self.assertEqual(rows[1].missing_sample_size, Nans.NOT_MISSING) self.assertEqual(rows[2].geo_value, 'fl') self.assertEqual(rows[2].value, 1.3) self.assertEqual(rows[2].stderr, 2.3) self.assertIsNone(rows[2].sample_size) self.assertEqual(rows[2].missing_value, Nans.NOT_MISSING) self.assertEqual(rows[2].missing_stderr, Nans.NOT_MISSING) self.assertEqual(rows[2].missing_sample_size, Nans.REGION_EXCEPTION) self.assertEqual(rows[3].geo_value, 'ak') self.assertEqual(rows[3].value, 1.4) self.assertEqual(rows[3].stderr, 2.4) self.assertEqual(rows[3].sample_size, 304) self.assertEqual(rows[3].missing_value, Nans.NOT_MISSING) self.assertEqual(rows[3].missing_stderr, Nans.NOT_MISSING) self.assertEqual(rows[3].missing_sample_size, Nans.NOT_MISSING) self.assertEqual(rows[4].geo_value, 'wa') self.assertEqual(rows[4].value, 1.5) self.assertEqual(rows[4].stderr, 2.5) self.assertEqual(rows[4].sample_size, None) self.assertEqual(rows[4].missing_value, Nans.NOT_MISSING) self.assertEqual(rows[4].missing_stderr, Nans.NOT_MISSING) self.assertEqual(rows[4].missing_sample_size, Nans.OTHER)