Example #1
0
    def test_load_csv_with_invalid_header(self):
        """Bail loading a CSV when the header is invalid."""

        data = {'foo': [1, 2, 3]}
        mock_pandas = MagicMock()
        mock_pandas.read_csv.return_value = pandas.DataFrame(data=data)
        filepath = 'path/name.csv'
        geo_type = 'state'

        rows = list(
            CsvImporter.load_csv(filepath, geo_type, pandas=mock_pandas))

        self.assertTrue(mock_pandas.read_csv.called)
        self.assertTrue(mock_pandas.read_csv.call_args[0][0], filepath)
        self.assertEqual(rows, [None])
Example #2
0
    def test_load_csv_with_valid_header(self):
        """Yield sanity checked `RowValues` from a valid CSV file."""

        # one invalid geo_id, but otherwise valid
        data = {
            'geo_id': ['ca', 'tx', 'fl', '123'],
            'val': ['1.1', '1.2', '1.3', '1.4'],
            'se': ['2.1', '2.2', '2.3', '2.4'],
            'sample_size': ['301', '302', '303', '304'],
        }
        mock_pandas = MagicMock()
        mock_pandas.read_csv.return_value = pandas.DataFrame(data=data)
        filepath = 'path/name.csv'
        geo_type = 'state'

        rows = list(
            CsvImporter.load_csv(filepath, geo_type, pandas=mock_pandas))

        self.assertTrue(mock_pandas.read_csv.called)
        self.assertTrue(mock_pandas.read_csv.call_args[0][0], filepath)
        self.assertEqual(len(rows), 4)

        self.assertEqual(rows[0].geo_value, 'ca')
        self.assertEqual(rows[0].value, 1.1)
        self.assertEqual(rows[0].stderr, 2.1)
        self.assertEqual(rows[0].sample_size, 301)

        self.assertEqual(rows[1].geo_value, 'tx')
        self.assertEqual(rows[1].value, 1.2)
        self.assertEqual(rows[1].stderr, 2.2)
        self.assertEqual(rows[1].sample_size, 302)

        self.assertEqual(rows[2].geo_value, 'fl')
        self.assertEqual(rows[2].value, 1.3)
        self.assertEqual(rows[2].stderr, 2.3)
        self.assertEqual(rows[2].sample_size, 303)

        self.assertIsNone(rows[3])
Example #3
0
    def test_load_csv_with_valid_header(self):
        """Yield sanity checked `RowValues` from a valid CSV file."""

        # one invalid geo_id, but otherwise valid
        data = {
            'geo_id': ['ca', 'tx', 'fl', '123'],
            'val': ['1.1', '1.2', '1.3', '1.4'],
            'se': ['2.1', '2.2', '2.3', '2.4'],
            'sample_size': ['301', '302', '303', '304'],
        }
        mock_pandas = MagicMock()
        mock_pandas.read_csv.return_value = pandas.DataFrame(data=data)
        filepath = 'path/name.csv'
        geo_type = 'state'

        rows = list(
            CsvImporter.load_csv(filepath, geo_type, pandas=mock_pandas))

        self.assertTrue(mock_pandas.read_csv.called)
        self.assertTrue(mock_pandas.read_csv.call_args[0][0], filepath)
        self.assertEqual(len(rows), 4)

        self.assertEqual(rows[0].geo_value, 'ca')
        self.assertEqual(rows[0].value, 1.1)
        self.assertEqual(rows[0].stderr, 2.1)
        self.assertEqual(rows[0].sample_size, 301)

        self.assertEqual(rows[1].geo_value, 'tx')
        self.assertEqual(rows[1].value, 1.2)
        self.assertEqual(rows[1].stderr, 2.2)
        self.assertEqual(rows[1].sample_size, 302)

        self.assertEqual(rows[2].geo_value, 'fl')
        self.assertEqual(rows[2].value, 1.3)
        self.assertEqual(rows[2].stderr, 2.3)
        self.assertEqual(rows[2].sample_size, 303)

        self.assertIsNone(rows[3])

        # now with missing values!
        data = {
            'geo_id': ['ca', 'tx', 'fl', 'ak', 'wa'],
            'val': [np.nan, '1.2', '1.3', '1.4', '1.5'],
            'se': ['2.1', "na", '2.3', '2.4', '2.5'],
            'sample_size': ['301', '302', None, '304', None],
            'missing_value':
            [Nans.NOT_APPLICABLE] + [Nans.NOT_MISSING] * 3 + [None],
            'missing_stderr': [
                Nans.NOT_MISSING, Nans.REGION_EXCEPTION, Nans.NOT_MISSING,
                Nans.NOT_MISSING
            ] + [None],
            'missing_sample_size':
            [Nans.NOT_MISSING] * 2 + [Nans.REGION_EXCEPTION] * 2 + [None]
        }
        mock_pandas = MagicMock()
        mock_pandas.read_csv.return_value = pandas.DataFrame(data=data)
        filepath = 'path/name.csv'
        geo_type = 'state'

        rows = list(
            CsvImporter.load_csv(filepath, geo_type, pandas=mock_pandas))

        self.assertTrue(mock_pandas.read_csv.called)
        self.assertTrue(mock_pandas.read_csv.call_args[0][0], filepath)
        self.assertEqual(len(rows), 5)

        self.assertEqual(rows[0].geo_value, 'ca')
        self.assertIsNone(rows[0].value)
        self.assertEqual(rows[0].stderr, 2.1)
        self.assertEqual(rows[0].sample_size, 301)
        self.assertEqual(rows[0].missing_value, Nans.NOT_APPLICABLE)
        self.assertEqual(rows[0].missing_stderr, Nans.NOT_MISSING)
        self.assertEqual(rows[0].missing_sample_size, Nans.NOT_MISSING)

        self.assertEqual(rows[1].geo_value, 'tx')
        self.assertEqual(rows[1].value, 1.2)
        self.assertIsNone(rows[1].stderr)
        self.assertEqual(rows[1].sample_size, 302)
        self.assertEqual(rows[1].missing_value, Nans.NOT_MISSING)
        self.assertEqual(rows[1].missing_stderr, Nans.REGION_EXCEPTION)
        self.assertEqual(rows[1].missing_sample_size, Nans.NOT_MISSING)

        self.assertEqual(rows[2].geo_value, 'fl')
        self.assertEqual(rows[2].value, 1.3)
        self.assertEqual(rows[2].stderr, 2.3)
        self.assertIsNone(rows[2].sample_size)
        self.assertEqual(rows[2].missing_value, Nans.NOT_MISSING)
        self.assertEqual(rows[2].missing_stderr, Nans.NOT_MISSING)
        self.assertEqual(rows[2].missing_sample_size, Nans.REGION_EXCEPTION)

        self.assertEqual(rows[3].geo_value, 'ak')
        self.assertEqual(rows[3].value, 1.4)
        self.assertEqual(rows[3].stderr, 2.4)
        self.assertEqual(rows[3].sample_size, 304)
        self.assertEqual(rows[3].missing_value, Nans.NOT_MISSING)
        self.assertEqual(rows[3].missing_stderr, Nans.NOT_MISSING)
        self.assertEqual(rows[3].missing_sample_size, Nans.NOT_MISSING)

        self.assertEqual(rows[4].geo_value, 'wa')
        self.assertEqual(rows[4].value, 1.5)
        self.assertEqual(rows[4].stderr, 2.5)
        self.assertEqual(rows[4].sample_size, None)
        self.assertEqual(rows[4].missing_value, Nans.NOT_MISSING)
        self.assertEqual(rows[4].missing_stderr, Nans.NOT_MISSING)
        self.assertEqual(rows[4].missing_sample_size, Nans.OTHER)