def test_get_entry_failure(self): """Fail with a helpful message when a nested field doesn't exist.""" obj = self.test_utils.load_sample_metadata() with self.assertRaises(CovidHospException): Utils.get_entry(obj, -1)
def test_launch_if_main_when_not_main(self): """Don't launch the main entry point.""" mock_entry = MagicMock() Utils.launch_if_main(mock_entry, '__test__') mock_entry.assert_not_called()
def test_launch_if_main_when_main(self): """Launch the main entry point.""" mock_entry = MagicMock() Utils.launch_if_main(mock_entry, '__main__') mock_entry.assert_called_once()
def test_acquire_specific_issue(self): """Acquire a new dataset.""" # make sure the data does not yet exist with self.subTest(name='no data yet'): response = Epidata.covid_hosp('MA', Epidata.range(20200101, 20210101)) self.assertEqual(response['result'], -2) # acquire sample data into local database # mock out network calls to external hosts with Database.connect() as db: pre_max_issue = db.get_max_issue() self.assertEqual(pre_max_issue, pd.Timestamp('1900-01-01 00:00:00')) with self.subTest(name='first acquisition'), \ patch.object(Network, 'fetch_metadata', return_value=self.test_utils.load_sample_metadata()) as mock_fetch_meta, \ patch.object(Network, 'fetch_dataset', side_effect=[self.test_utils.load_sample_dataset("dataset0.csv")] ) as mock_fetch: acquired = Utils.update_dataset(Database, Network, date(2021, 3, 12), date(2021, 3, 14)) with Database.connect() as db: post_max_issue = db.get_max_issue() self.assertEqual(post_max_issue, pd.Timestamp('2021-03-13 00:00:00')) self.assertTrue(acquired)
def test_get_issue_from_revision(self): """Extract an issue date from a free-form revision string.""" revisions = ('Tue, 11/03/2020 - 19:38', 'Mon, 11/16/2020 - 00:55', 'foo') issues = (20201103, 20201116, None) for revision, issue in zip(revisions, issues): with self.subTest(revision=revision): if issue: result = Utils.get_issue_from_revision(revision) self.assertEqual(result, issue) else: with self.assertRaises(CovidHospException): Utils.get_issue_from_revision(revision)
def test_get_entry_success(self): """Get a deeply nested field from an arbitrary object.""" obj = self.test_utils.load_sample_metadata() result = Utils.get_entry(obj, 'result', 0, 'tags', 2, 'id') self.assertEqual(result, '56f3cdad-8acb-46c8-bc71-aa1ded8407fb')
def test_extract_resource_details(self): """Extract URL and revision from metadata.""" with self.subTest(name='invalid success'): metadata = self.test_utils.load_sample_metadata() metadata['success'] = False with self.assertRaises(CovidHospException): Utils.extract_resource_details(metadata) with self.subTest(name='invalid result'): metadata = self.test_utils.load_sample_metadata() metadata['result'] = [] with self.assertRaises(CovidHospException): Utils.extract_resource_details(metadata) with self.subTest(name='invalid resource'): metadata = self.test_utils.load_sample_metadata() metadata['result'][0]['resources'] = [] with self.assertRaises(CovidHospException): Utils.extract_resource_details(metadata) with self.subTest(name='valid'): metadata = self.test_utils.load_sample_metadata() url, revision = Utils.extract_resource_details(metadata) expected_url = ('https://healthdata.gov/sites/default/files/' 'estimated_inpatient_all_20201213_1757.csv') self.assertEqual(url, expected_url) self.assertEqual(revision, 'Sun, 12/13/2020 - 22:36')
def run(network=Network): """Acquire the most recent dataset, unless it was previously acquired. Returns ------- bool Whether a new dataset was acquired. """ return Utils.update_dataset(Database, network)
def test_merge(self): """Merging the set of updates in each batch is pretty tricky""" # Generate a set of synthetic updates with overlapping keys N = 10 dfs = [] for i in range(5): # knock out every 2nd key, then every 3rd, then every 4th, etc dfs.append( pd.DataFrame( dict(state=range(1, N, i + 1), reporting_cutoff_start=range(N + 1, 2 * N, i + 1), **{ spec[0]: i + 1 for spec in Database.ORDERED_CSV_COLUMNS[2:] }))) # add a data frame with unseen keys dfs.append( pd.DataFrame( dict(state=[-1], reporting_cutoff_start=[-1], **{ spec[0]: -1 for spec in Database.ORDERED_CSV_COLUMNS[2:] }))) # now we need to know which data frame was used as the final value. the # above procedure is a prime number generator, so we can derive the result # mathematically: # for x in 1..N get the greatest number 5 or less that evenly divides x value_from = [[i for i in range(5, 0, -1) if x / i == x // i][0] for x in range(N - 1)] + [-1] states = list(range(1, N)) + [-1] dates = list(range(N + 1, 2 * N)) + [-1] self.assertEqual(len(value_from), len(states)) self.assertEqual(len(states), len(dates)) expected = pd.DataFrame( dict(state=states, reporting_cutoff_start=dates, **{ spec[0]: value_from for spec in Database.ORDERED_CSV_COLUMNS[2:] })).astype({ spec[0]: 'float64' for spec in Database.ORDERED_CSV_COLUMNS[2:] }) result = Utils.merge_by_key_cols(dfs, Database.KEY_COLS) try: pd.testing.assert_frame_equal(result, expected) except: assert False, f"""
def test_parse_bool(self): """Parse a boolean value from a string.""" with self.subTest(name='None'): self.assertIsNone(Utils.parse_bool(None)) with self.subTest(name='empty'): self.assertIsNone(Utils.parse_bool('')) with self.subTest(name='true'): self.assertTrue(Utils.parse_bool('true')) self.assertTrue(Utils.parse_bool('True')) self.assertTrue(Utils.parse_bool('tRuE')) with self.subTest(name='false'): self.assertFalse(Utils.parse_bool('false')) self.assertFalse(Utils.parse_bool('False')) self.assertFalse(Utils.parse_bool('fAlSe')) with self.subTest(name='exception'): with self.assertRaises(CovidHospException): Utils.parse_bool('maybe')
def test_run_skip_old_dataset(self): """Don't re-acquire an old dataset.""" mock_network = MagicMock() mock_network.fetch_metadata.return_value = \ self.test_utils.load_sample_metadata() mock_database = MagicMock() with mock_database.connect() as mock_connection: pass mock_connection.get_max_issue.return_value = pd.Timestamp("2200/1/1") result = Utils.update_dataset(database=mock_database, network=mock_network) self.assertFalse(result) mock_network.fetch_dataset.assert_not_called() mock_connection.insert_metadata.assert_not_called() mock_connection.insert_dataset.assert_not_called()
def test_issues_to_fetch(self): test_metadata = pd.DataFrame({ "date": [ pd.Timestamp("2021-03-13 00:00:00"), pd.Timestamp("2021-03-14 00:00:00"), pd.Timestamp("2021-03-15 00:00:01"), pd.Timestamp("2021-03-15 00:00:00"), pd.Timestamp("2021-03-16 00:00:00") ], "Archive Link": ["a", "b", "d", "c", "e"] }).set_index("date") issues = Utils.issues_to_fetch(test_metadata, pd.Timestamp("2021-3-13"), pd.Timestamp("2021-3-16")) self.assertEqual( issues, { date(2021, 3, 14): [("b", pd.Timestamp("2021-03-14 00:00:00")) ], date(2021, 3, 15): [("c", pd.Timestamp("2021-03-15 00:00:00")), ("d", pd.Timestamp("2021-03-15 00:00:01"))] })
def test_run_acquire_new_dataset(self): """Acquire a new dataset.""" mock_network = MagicMock() mock_network.fetch_metadata.return_value = \ self.test_utils.load_sample_metadata() fake_dataset = pd.DataFrame({ "date": [pd.Timestamp("2020/1/1")], "state": ["ca"] }) mock_network.fetch_dataset.return_value = fake_dataset mock_database = MagicMock() with mock_database.connect() as mock_connection: pass type(mock_connection).KEY_COLS = PropertyMock( return_value=["state", "date"]) mock_connection.get_max_issue.return_value = pd.Timestamp("1900/1/1") with patch.object(Utils, 'issues_to_fetch') as mock_issues: mock_issues.return_value = { pd.Timestamp("2021/3/15"): [("url1", pd.Timestamp("2021-03-15 00:00:00")), ("url2", pd.Timestamp("2021-03-15 00:00:00"))] } result = Utils.update_dataset(database=mock_database, network=mock_network) self.assertTrue(result) mock_connection.insert_metadata.assert_called_once() args = mock_connection.insert_metadata.call_args[0] self.assertEqual(args[:2], (20210315, "url2")) pd.testing.assert_frame_equal( mock_connection.insert_dataset.call_args[0][1], pd.DataFrame({ "state": ["ca"], "date": [pd.Timestamp("2020/1/1")] })) self.assertEqual(mock_connection.insert_dataset.call_args[0][0], 20210315)
def test_run_acquire_new_dataset(self): """Acquire a new dataset.""" mock_network = MagicMock() mock_network.fetch_metadata.return_value = \ self.test_utils.load_sample_metadata() fake_dataset = [1, 2, 3] mock_network.fetch_dataset.return_value = fake_dataset mock_database = MagicMock() with mock_database.connect() as mock_connection: pass mock_connection.contains_revision.return_value = False result = Utils.update_dataset(database=mock_database, network=mock_network) self.assertTrue(result) mock_connection.insert_metadata.assert_called_once() args = mock_connection.insert_metadata.call_args[0] self.assertEqual(args[:2], (20201213, 'Sun, 12/13/2020 - 22:36')) mock_connection.insert_dataset.assert_called_once_with( 20201213, fake_dataset)
""" Acquires the "COVID-19 Reported Patient Impact and Hospital Capacity by State" dataset provided by the US Department of Health & Human Services via healthdata.gov. """ # first party from delphi.epidata.acquisition.covid_hosp.common.utils import Utils from delphi.epidata.acquisition.covid_hosp.state_daily.database import Database from delphi.epidata.acquisition.covid_hosp.state_daily.network import Network class Update: def run(network=Network): """Acquire the most recent dataset, unless it was previously acquired. Returns ------- bool Whether a new dataset was acquired. """ return Utils.update_dataset(Database, network) # main entry point Utils.launch_if_main(Update.run, __name__)
def test_int_from_date(self): """Convert a YYY-MM-DD date to a YYYYMMDD int.""" self.assertEqual(Utils.int_from_date('2020-11-17'), 20201117) self.assertEqual(Utils.int_from_date('2020/11/17'), 20201117) self.assertEqual(Utils.int_from_date('2020/11/17 10:00:00'), 20201117)