def test_main_unsuccessful(self): """Run the main program with failure, then commit changes.""" # TODO: use an actual argparse object for the args instead of a MagicMock args = MagicMock(log_file=None, data_dir='data', is_wip_override=False, not_wip_override=False, specific_issue_date=False) mock_database = MagicMock() mock_database.count_all_rows.return_value = 0 fake_database_impl = lambda: mock_database mock_upload_archive = MagicMock(side_effect=Exception('testing')) mock_collect_files = MagicMock() mock_collect_files.return_value = [("a", False)] with self.assertRaises(Exception): main(args, database_impl=fake_database_impl, collect_files_impl=mock_collect_files, upload_archive_impl=mock_upload_archive) self.assertTrue(mock_upload_archive.called) self.assertEqual(mock_upload_archive.call_args[0][0], [("a", False)]) self.assertTrue(mock_database.connect.called) self.assertTrue(mock_database.disconnect.called) self.assertTrue(mock_database.disconnect.call_args[0][0])
def test_main_early_exit(self): """Run the main program with an empty receiving directory.""" # TODO: use an actual argparse object for the args instead of a MagicMock args = MagicMock(log_file=None, data_dir='data', is_wip_override=False, not_wip_override=False, specific_issue_date=False) mock_database = MagicMock() mock_database.count_all_rows.return_value = 0 fake_database_impl = lambda: mock_database mock_collect_files = MagicMock() mock_collect_files.return_value = [] mock_upload_archive = MagicMock() main(args, database_impl=fake_database_impl, collect_files_impl=mock_collect_files, upload_archive_impl=mock_upload_archive) self.assertTrue(mock_collect_files.called) self.assertEqual(mock_collect_files.call_args[0][0], 'data') self.assertFalse(mock_upload_archive.called) self.assertFalse(mock_database.connect.called) self.assertFalse(mock_database.disconnect.called)
def test_main_successful(self): """Run the main program successfully, then commit changes.""" # TODO: use an actual argparse object for the args instead of a MagicMock args = MagicMock(data_dir='data', is_wip_override=False, not_wip_override=False, specific_issue_date=False) mock_database = MagicMock() mock_database.count_all_rows.return_value = 0 fake_database_impl = lambda: mock_database mock_collect_files = MagicMock() mock_collect_files.return_value = [("a", False)] mock_upload_archive = MagicMock() main(args, database_impl=fake_database_impl, collect_files_impl=mock_collect_files, upload_archive_impl=mock_upload_archive) self.assertTrue(mock_collect_files.called) self.assertEqual(mock_collect_files.call_args[0][0], 'data') self.assertTrue(mock_upload_archive.called) self.assertEqual(mock_upload_archive.call_args[0][0], [("a", False)]) self.assertTrue(mock_database.connect.called) self.assertTrue(mock_database.disconnect.called) self.assertTrue(mock_database.disconnect.call_args[0][0])
def test_uploading(self): """Scan, parse, upload, archive, serve, and fetch a covidcast signal.""" # print full diff if something unexpected comes out self.maxDiff = None # make some fake data files data_dir = 'covid/data' source_receiving_dir = data_dir + '/receiving/src-name' os.makedirs(source_receiving_dir, exist_ok=True) # valid with open(source_receiving_dir + '/20200419_state_test.csv', 'w') as f: f.write('geo_id,val,se,sample_size\n') f.write('ca,1,0.1,10\n') f.write('tx,2,0.2,20\n') f.write('fl,3,0.3,30\n') # valid wip with open(source_receiving_dir + '/20200419_state_wip_prototype.csv', 'w') as f: f.write('geo_id,val,se,sample_size\n') f.write('me,10,0.01,100\n') f.write('nd,20,0.02,200\n') f.write('wa,30,0.03,300\n') # invalid with open( source_receiving_dir + '/20200419_state_wip_really_long_name_that_will_be_accepted.csv', 'w') as f: f.write('geo_id,val,se,sample_size\n') f.write('pa,100,5.4,624\n') # invalid with open( source_receiving_dir + '/20200419_state_wip_really_long_name_that_will_get_truncated_lorem_ipsum_dolor_sit_amet.csv', 'w') as f: f.write('geo_id,val,se,sample_size\n') f.write('pa,100,5.4,624\n') # invalid with open(source_receiving_dir + '/20200420_state_test.csv', 'w') as f: f.write('this,header,is,wrong\n') # invalid with open(source_receiving_dir + '/hello.csv', 'w') as f: f.write('file name is wrong\n') # upload CSVs # TODO: use an actual argparse object for the args instead of a MagicMock args = MagicMock(data_dir=data_dir, is_wip_override=False, not_wip_override=False, specific_issue_date=False) main(args) # request CSV data from the API response = Epidata.covidcast('src-name', 'test', 'day', 'state', 20200419, '*') expected_issue_day = date.today() expected_issue = expected_issue_day.strftime("%Y%m%d") def apply_lag(expected_epidata): for dct in expected_epidata: dct['issue'] = int(expected_issue) time_value_day = date(year=dct['time_value'] // 10000, month=dct['time_value'] % 10000 // 100, day=dct['time_value'] % 100) expected_lag = (expected_issue_day - time_value_day).days dct['lag'] = expected_lag return expected_epidata # verify data matches the CSV # NB these are ordered by geo_value self.assertEqual( response, { 'result': 1, 'epidata': apply_lag([ { 'time_value': 20200419, 'geo_value': 'ca', 'value': 1, 'stderr': 0.1, 'sample_size': 10, 'direction': None, 'signal': 'test', }, { 'time_value': 20200419, 'geo_value': 'fl', 'value': 3, 'stderr': 0.3, 'sample_size': 30, 'direction': None, 'signal': 'test', }, { 'time_value': 20200419, 'geo_value': 'tx', 'value': 2, 'stderr': 0.2, 'sample_size': 20, 'direction': None, 'signal': 'test', }, ]), 'message': 'success', }) # request CSV data from the API on WIP signal response = Epidata.covidcast('src-name', 'wip_prototype', 'day', 'state', 20200419, '*') # verify data matches the CSV # NB these are ordered by geo_value self.assertEqual( response, { 'result': 1, 'epidata': apply_lag([ { 'time_value': 20200419, 'geo_value': 'me', 'value': 10, 'stderr': 0.01, 'sample_size': 100, 'direction': None, 'signal': 'wip_prototype', }, { 'time_value': 20200419, 'geo_value': 'nd', 'value': 20, 'stderr': 0.02, 'sample_size': 200, 'direction': None, 'signal': 'wip_prototype', }, { 'time_value': 20200419, 'geo_value': 'wa', 'value': 30, 'stderr': 0.03, 'sample_size': 300, 'direction': None, 'signal': 'wip_prototype', }, ]), 'message': 'success', }) # request CSV data from the API on the signal with name length 32<x<64 response = Epidata.covidcast( 'src-name', 'wip_really_long_name_that_will_be_accepted', 'day', 'state', 20200419, '*') # verify data matches the CSV self.assertEqual( response, { 'result': 1, 'message': 'success', 'epidata': apply_lag([ { 'time_value': 20200419, 'geo_value': 'pa', 'value': 100, 'stderr': 5.4, 'sample_size': 624, 'direction': None, 'signal': 'wip_really_long_name_that_will_be_accepted', }, ]) }) # request CSV data from the API on the long-named signal response = Epidata.covidcast( 'src-name', 'wip_really_long_name_that_will_get_truncated_lorem_ipsum_dolor_s', 'day', 'state', 20200419, '*') # verify data matches the CSV # if the CSV failed correctly there should be no results self.assertEqual(response, { 'result': -2, 'message': 'no results', }) # verify timestamps and default values are reasonable self.cur.execute( 'select value_updated_timestamp, direction_updated_timestamp, direction from covidcast' ) for value_updated_timestamp, direction_updated_timestamp, direction in self.cur: self.assertGreater(value_updated_timestamp, 0) self.assertEqual(direction_updated_timestamp, 0) self.assertIsNone(direction) # verify that the CSVs were archived for sig in ["test", "wip_prototype"]: path = data_dir + f'/archive/successful/src-name/20200419_state_{sig}.csv.gz' self.assertIsNotNone(os.stat(path)) path = data_dir + '/archive/failed/src-name/20200420_state_test.csv' self.assertIsNotNone(os.stat(path)) path = data_dir + '/archive/failed/unknown/hello.csv' self.assertIsNotNone(os.stat(path))
def test_uploading(self): """Scan, parse, upload, archive, serve, and fetch a covidcast signal.""" # print full diff if something unexpected comes out self.maxDiff = None # make some fake data files data_dir = 'covid/data' source_receiving_dir = data_dir + '/receiving/src-name' log_file_directory = "/var/log/" os.makedirs(source_receiving_dir, exist_ok=True) os.makedirs(log_file_directory, exist_ok=True) # TODO: use an actual argparse object for the args instead of a MagicMock args = MagicMock(log_file=log_file_directory + "output.log", data_dir=data_dir, is_wip_override=False, not_wip_override=False, specific_issue_date=False) uploader_column_rename = { "geo_id": "geo_value", "val": "value", "se": "stderr", "missing_val": "missing_value", "missing_se": "missing_stderr" } with self.subTest("Valid CSV with correct missing columns"): values = pd.DataFrame({ "geo_id": ["ca", "fl", "tx"], "val": [1.0, 2.0, 3.0], "se": [0.1, 0.2, 0.3], "sample_size": [10.0, 20.0, 30.0], "missing_val": [Nans.NOT_MISSING] * 3, "missing_se": [Nans.NOT_MISSING] * 3, "missing_sample_size": [Nans.NOT_MISSING] * 3 }) signal_name = "test" values.to_csv(source_receiving_dir + f'/20200419_state_{signal_name}.csv', index=False) # upload CSVs main(args) response = Epidata.covidcast('src-name', signal_name, 'day', 'state', 20200419, '*') expected_values = pd.concat( [ values, pd.DataFrame({ "time_value": [20200419] * 3, "signal": [signal_name] * 3, "direction": [None] * 3 }) ], axis=1).rename(columns=uploader_column_rename).to_dict( orient="records") expected_response = { 'result': 1, 'epidata': self.apply_lag(expected_values), 'message': 'success' } self.assertEqual(response, expected_response) self.verify_timestamps_and_defaults() # Verify that files were archived path = data_dir + f'/archive/successful/src-name/20200419_state_test.csv.gz' self.assertIsNotNone(os.stat(path)) self.tearDown() self.setUp() with self.subTest( "Valid CSV with no missing columns should set intelligent defaults" ): values = pd.DataFrame( { "geo_id": ["ca", "fl", "tx"], "val": [None, 2.0, 3.0], "se": [0.1, None, 0.3], "sample_size": [10.0, 20.0, None] }, dtype=object) signal_name = "test_no_missing_cols" values.to_csv(source_receiving_dir + f'/20200419_state_{signal_name}.csv', index=False) # upload CSVs main(args) response = Epidata.covidcast('src-name', signal_name, 'day', 'state', 20200419, '*') expected_values = pd.concat( [ values, pd.DataFrame({ "time_value": [20200419] * 3, "signal": [signal_name] * 3, "direction": [None] * 3, "missing_value": [Nans.OTHER] + [Nans.NOT_MISSING] * 2, "missing_stderr": [Nans.NOT_MISSING, Nans.OTHER, Nans.NOT_MISSING], "missing_sample_size": [Nans.NOT_MISSING] * 2 + [Nans.OTHER] }) ], axis=1).rename(columns=uploader_column_rename).to_dict( orient="records") expected_response = { 'result': 1, 'epidata': self.apply_lag(expected_values), 'message': 'success' } self.assertEqual(response, expected_response) self.verify_timestamps_and_defaults() self.tearDown() self.setUp() with self.subTest("Invalid, missing with an inf value"): values = pd.DataFrame({ "geo_id": ["tx"], "val": [np.inf], "se": [0.3], "sample_size": [None], "missing_value": [Nans.OTHER], "missing_stderr": [Nans.NOT_MISSING], "missing_sample_size": [Nans.NOT_MISSING] }) signal_name = "test_with_inf" values.to_csv(source_receiving_dir + f'/20200419_state_{signal_name}.csv', index=False) # upload CSVs main(args) response = Epidata.covidcast('src-name', signal_name, 'day', 'state', 20200419, '*') expected_response = {'result': -2, 'message': 'no results'} self.assertEqual(response, expected_response) self.verify_timestamps_and_defaults() self.tearDown() self.setUp() with self.subTest( "Valid, missing with incorrect missing codes, fixed by acquisition" ): values = pd.DataFrame({ "geo_id": ["tx"], "val": [None], "se": [0.3], "sample_size": [30.0], "missing_val": [Nans.NOT_MISSING], "missing_se": [Nans.NOT_MISSING], "missing_sample_size": [Nans.OTHER] }).replace({np.nan: None}) signal_name = "test_incorrect_missing_codes" values.to_csv(source_receiving_dir + f'/20200419_state_{signal_name}.csv', index=False) # upload CSVs main(args) response = Epidata.covidcast('src-name', signal_name, 'day', 'state', 20200419, '*') expected_values_df = pd.concat( [ values, pd.DataFrame({ "time_value": [20200419], "signal": [signal_name], "direction": [None] }) ], axis=1).rename(columns=uploader_column_rename) expected_values_df["missing_value"].iloc[0] = Nans.OTHER expected_values_df["missing_sample_size"].iloc[ 0] = Nans.NOT_MISSING expected_values = expected_values_df.to_dict(orient="records") expected_response = { 'result': 1, 'epidata': self.apply_lag(expected_values), 'message': 'success' } self.assertEqual(response, expected_response) self.verify_timestamps_and_defaults() self.tearDown() self.setUp() with self.subTest("Valid wip"): values = pd.DataFrame({ "geo_id": ["me", "nd", "wa"], "val": [10.0, 20.0, 30.0], "se": [0.01, 0.02, 0.03], "sample_size": [100.0, 200.0, 300.0], "missing_val": [Nans.NOT_MISSING] * 3, "missing_se": [Nans.NOT_MISSING] * 3, "missing_sample_size": [Nans.NOT_MISSING] * 3 }) signal_name = "wip_prototype" values.to_csv(source_receiving_dir + f'/20200419_state_{signal_name}.csv', index=False) # upload CSVs main(args) response = Epidata.covidcast('src-name', signal_name, 'day', 'state', 20200419, '*') expected_values = pd.concat( [ values, pd.DataFrame({ "time_value": [20200419] * 3, "signal": [signal_name] * 3, "direction": [None] * 3 }) ], axis=1).rename(columns=uploader_column_rename).to_dict( orient="records") expected_response = { 'result': 1, 'epidata': self.apply_lag(expected_values), 'message': 'success' } self.assertEqual(response, expected_response) self.verify_timestamps_and_defaults() # Verify that files were archived path = data_dir + f'/archive/successful/src-name/20200419_state_wip_prototype.csv.gz' self.assertIsNotNone(os.stat(path)) self.tearDown() self.setUp() with self.subTest("Valid signal with name length 32<x<64"): values = pd.DataFrame({ "geo_id": ["pa"], "val": [100.0], "se": [5.4], "sample_size": [624.0], "missing_val": [Nans.NOT_MISSING], "missing_se": [Nans.NOT_MISSING], "missing_sample_size": [Nans.NOT_MISSING] }) signal_name = "wip_really_long_name_that_will_be_accepted" values.to_csv(source_receiving_dir + f'/20200419_state_{signal_name}.csv', index=False) # upload CSVs main(args) response = Epidata.covidcast('src-name', signal_name, 'day', 'state', 20200419, '*') expected_values = pd.concat( [ values, pd.DataFrame({ "time_value": [20200419], "signal": [signal_name], "direction": [None] }) ], axis=1).rename(columns=uploader_column_rename).to_dict( orient="records") expected_response = { 'result': 1, 'epidata': self.apply_lag(expected_values), 'message': 'success' } self.assertEqual(response, expected_response) self.verify_timestamps_and_defaults() self.tearDown() self.setUp() with self.subTest("Invalid signal with a too-long name"): values = pd.DataFrame({ "geo_id": ["pa"], "val": [100.0], "se": [5.4], "sample_size": [624.0], "missing_val": [Nans.NOT_MISSING], "missing_se": [Nans.NOT_MISSING], "missing_sample_size": [Nans.NOT_MISSING] }) signal_name = "wip_really_long_name_that_will_get_truncated_lorem_ipsum_dolor_sit_amet" values.to_csv(source_receiving_dir + f'/20200419_state_{signal_name}.csv', index=False) # upload CSVs main(args) response = Epidata.covidcast('src-name', signal_name, 'day', 'state', 20200419, '*') expected_response = {'result': -2, 'message': 'no results'} self.assertEqual(response, expected_response) self.verify_timestamps_and_defaults() self.tearDown() self.setUp() with self.subTest("Invalid file with a wrong header"): with open(source_receiving_dir + '/20200420_state_test.csv', 'w') as f: f.write('this,header,is,wrong\n') main(args) path = data_dir + '/archive/failed/src-name/20200420_state_test.csv' self.assertIsNotNone(os.stat(path)) self.tearDown() self.setUp() with self.subTest("Invalid file with a wrong name"): with open(source_receiving_dir + '/hello.csv', 'w') as f: f.write('file name is wrong\n') main(args) path = data_dir + '/archive/failed/unknown/hello.csv' self.assertIsNotNone(os.stat(path)) self.tearDown() self.setUp()