def __init__(self, region, target, use_weekly=True): self.region = region self.target = target self.stts = 0 weeks = Epidata.range(201401, 202330) rx = Epidata.check(Epidata.paho_dengue(self.region, weeks)) self.data = {} self.valid = {} self.ew2i, self.i2ew = {}, {} for ew in EW.range_epiweeks(weeks['from'], weeks['to'], inclusive=True): # if 200916 <= ew <= 201015: # continue i = len(self.ew2i) self.ew2i[ew] = i self.i2ew[i] = ew epiweeks = list(map(lambda elt: elt['epiweek'], rx)) values = list(map(lambda elt: elt[self.target], rx)) data = {elt['epiweek']: elt[self.target] for elt in rx} w_data = cum_to_week(data) for i in range(len(rx)): ew, observation = epiweeks[i], w_data[epiweeks[i]] if ew not in self.ew2i: continue i = self.ew2i[ew] if i not in self.data: self.data[i] = {} self.valid[i] = {'stable': False} lag = 'stable' self.data[i][lag] = observation self.valid[i][lag] = True self.weeks = sorted(list(self.data.keys())) self.dds = DengueDataSource.new_instance(target)
def get_weeks(epiweek): ew1 = 200330 ew2 = epiweek ew3 = flu.add_epiweeks(epiweek, 1) weeks0 = Epidata.range(ew1, ew2) weeks1 = Epidata.range(ew1, ew3) return (ew1, ew2, ew3, weeks0, weeks1)
def __init__(self, ew2): ew1 = FIRST_DATA_EPIWEEK print('prefetching %d--%d...' % (ew1, ew2)) weeks = Epidata.range(ew1, ew2) si = StateInfo() all_names, all_loc = get_all_sensors() self._sensors = {} self._fluview = {} na, nc = 0, 0 for loc in all_loc: for name in all_names: res = Epidata.sensors(secrets.api.sensors, name, loc, weeks) if res['result'] == 1: for row in res['epidata']: n, l = row['name'], row['location'] if n not in self._sensors: self._sensors[n] = {} if l not in self._sensors[n]: self._sensors[n][l] = [] self._sensors[n][l].append(row) na += 1 res = Epidata.fluview(loc, weeks, auth=secrets.api.fluview) if res['result'] == 1: for row in res['epidata']: # Make sure that the case of the returned location is consistent with # the case used elsewhere; lower for regions and upper for states. if row['region'].lower() != loc.lower(): raise Exception(row['region'], loc) row['region'] = loc if loc not in self._fluview: self._fluview[loc] = [] self._fluview[loc].append(row) nc += 1 print('done (%d|%d)' % (na, nc))
def test_acquire_specific_issue(self): """Acquire a new dataset.""" # make sure the data does not yet exist with self.subTest(name='no data yet'): response = Epidata.covid_hosp('MA', Epidata.range(20200101, 20210101)) self.assertEqual(response['result'], -2) # acquire sample data into local database # mock out network calls to external hosts with Database.connect() as db: pre_max_issue = db.get_max_issue() self.assertEqual(pre_max_issue, pd.Timestamp('1900-01-01 00:00:00')) with self.subTest(name='first acquisition'), \ patch.object(Network, 'fetch_metadata', return_value=self.test_utils.load_sample_metadata()) as mock_fetch_meta, \ patch.object(Network, 'fetch_dataset', side_effect=[self.test_utils.load_sample_dataset("dataset0.csv")] ) as mock_fetch: acquired = Utils.update_dataset(Database, Network, date(2021, 3, 12), date(2021, 3, 14)) with Database.connect() as db: post_max_issue = db.get_max_issue() self.assertEqual(post_max_issue, pd.Timestamp('2021-03-13 00:00:00')) self.assertTrue(acquired)
def fetch(weeks): # a map from epiweeks to a map of field-value pairs (for each article/hour) data = {} # field name index idx = 0 # download each time series individually for article in articles: for hour in hours: # fetch the data from the API res = Epidata.wiki(article, epiweeks=weeks, hours=hour) epidata = Epidata.check(res) field_name = fields[idx] idx += 1 # loop over rows of the response, ordered by epiweek for row in epidata: ew = row['epiweek'] if ew not in data: # make a new entry for this epiweek data[ew] = {'epiweek': ew} # save the value of this field data[ew][field_name] = row['value'] # convert the map to a list matching the API epidata list rows = [] for ew in sorted(list(data.keys())): rows.append(data[ew]) # spoof the API response return { 'result': 1, 'message': None, 'epidata': rows, }
def __init__(self, region): self.region = region weeks = Epidata.range(200330, 202330) rows = Epidata.check(Epidata.fluview(self.region, weeks)) self.seasons = {} for row in rows: ew, wili = row['epiweek'], row['wili'] y, w = EW.split_epiweek(ew) if w < 30: y -= 1 i = EW.delta_epiweeks(EW.join_epiweek(y, 30), ew) if y not in self.seasons: self.seasons[y] = {} if 0 <= i < 52: self.seasons[y][i] = wili years = sorted(list(self.seasons.keys())) for year in years: if len(self.seasons[year]) != 52: del self.seasons[year] if 2008 in self.seasons and 2009 in self.seasons: for i in range(40, 52): self.seasons[2008][i] = self.seasons[2009][i] del self.seasons[2009] curve = lambda y: [self.seasons[y][i] for i in range(52)] self.years = sorted(list(self.seasons.keys())) self.curves = dict([(y, curve(y)) for y in self.years])
def get_training_set_datasetname(location, epiweek, signal, target, signal_to_truth_ew_shift): ew1, ew2, ew3, weeks0, weeks1 = get_weeks(epiweek) groundTruth = dict() auth = secrets.api.datasetname_targets datasetnameData = Epidata.check( Epidata.datasetname_targets(auth, target, location, weeks0)) for row in datasetnameData: groundTruth[row['epiweek']] = row['value'] data = {} dropped_weeks = 0 for signal_week in signal.keys(): ground_truth_week = flu.add_epiweeks(signal_week, signal_to_truth_ew_shift) # skip the week we're trying to predict if ground_truth_week == ew3: continue sig = signal[signal_week] if ground_truth_week in groundTruth: label = groundTruth[ground_truth_week] else: dropped_weeks += 1 continue data[ground_truth_week] = {'x': sig, 'y': label} if dropped_weeks: msg = 'warning: dropped %d/%d signal weeks because ground truth / target was unavailable' print(msg % (dropped_weeks, len(signal))) epiweeks = sorted(list(data.keys())) X = [data[week]['x'] for week in epiweeks] Y = [data[week]['y'] for week in epiweeks] return (epiweeks, X, Y)
def _forecast(self, ageGroup, epiweek): # season setup and sanity check ew1 = flu.join_epiweek(self.test_season, 40) ew2 = flu.join_epiweek(self.test_season + 1, 17) print("test season:", self.test_season, "ew1:", ew1, "epiweek:", epiweek) if not ew1 <= epiweek <= ew2: raise Exception('`epiweek` outside of `test_season`') # get past values (left half) from the Epidata API response = Epidata.flusurv('network_all', Epidata.range(ew1, epiweek), issues=epiweek) epidata = Forecaster.Utils.decode(response) pinned = [row[ageGroup] for row in epidata] if len(pinned) != flu.delta_epiweeks(ew1, epiweek) + 1: raise Exception('missing ILINet data') # get the user submissions (right half) from the database print("ageGroup", ageGroup, "epiweek", epiweek) submissions = self.fetch_submissions(ageGroup, epiweek) self._num_users = len(submissions) if self.verbose: print(' [EC] %d users found for %s on %d' % (len(submissions), ageGroup, epiweek)) # concatenate observed data and user submissions return [pinned + sub for sub in submissions]
def _get_partial_trajectory(self, epiweek, valid=True): y, w = EW.split_epiweek(epiweek) if w < 30: y -= 1 ew1 = EW.join_epiweek(y, 30) ew2 = epiweek limit = EW.add_epiweeks(ew2, -5) weeks = Epidata.range(ew1, ew2) stable = Epidata.check(Epidata.fluview(self.region, weeks)) try: unstable = Epidata.check(Epidata.fluview(self.region, weeks, issues=ew2)) except: unstable = [] wili = {} for row in stable: ew, value = row['epiweek'], row['wili'] if not valid or ew < limit: wili[ew] = value for row in unstable: ew, value = row['epiweek'], row['wili'] wili[ew] = value curve = [] for ew in EW.range_epiweeks(ew1, ew2, inclusive=True): if ew not in wili: if valid: t = 'unstable' else: t = 'any' raise Exception('wILI (%s) not available for week %d' % (t, ew)) curve.append(wili[ew]) n1 = EW.delta_epiweeks(ew1, ew2) + 1 n2 = len(curve) if n1 != n2: raise Exception('missing data (expected %d, found %d)' % (n1, n2)) return curve
def __init__(self, region, target): self.region = region self.target = target weeks = Epidata.range(199301, 202330) auth = secrets.api.datasetname_targets rx = mutate_rows_as_if_lagged(Epidata.check(Epidata.datasetname_targets(auth, self.target, self.region, weeks)), 1000000) self.data = {} self.valid = {} self.ew2i, self.i2ew = {}, {} for ew in EW.range_epiweeks(weeks['from'], weeks['to'], inclusive=True): # if 200916 <= ew <= 201015: # continue i = len(self.ew2i) self.ew2i[ew] = i self.i2ew[i] = ew for row in rx: ew, observation, lag = row['epiweek'], row['value'], row['lag'] if ew not in self.ew2i: continue i = self.ew2i[ew] if i not in self.data: self.data[i] = {} self.valid[i] = {'stable': False} lag = 'stable' self.data[i][lag] = observation self.valid[i][lag] = True self.weeks = sorted(list(self.data.keys())) for i in self.weeks: if 'stable' not in self.data[i]: continue
def test_query_by_issue(self): """Query with and without specifying an issue.""" # insert dummy data def insert_issue(cur, issue, value, record_type): so_many_nulls = ', '.join(['null'] * 57) cur.execute(f'''insert into covid_hosp_state_timeseries values ( 0, {issue}, 'PA', 20201118, {value}, {so_many_nulls}, '{record_type}' )''') with Database.connect() as db: with db.new_cursor() as cur: # inserting out of order to test server-side order by # also inserting two for 20201201 to test tiebreaker. insert_issue(cur, 20201201, 123, 'T') insert_issue(cur, 20201201, 321, 'D') insert_issue(cur, 20201203, 789, 'T') insert_issue(cur, 20201202, 456, 'T') # request without issue (defaulting to latest issue) with self.subTest(name='no issue (latest)'): response = Epidata.covid_hosp('PA', 20201118) self.assertEqual(response['result'], 1) self.assertEqual(len(response['epidata']), 1) self.assertEqual(response['epidata'][0]['issue'], 20201203) self.assertEqual( response['epidata'][0]['critical_staffing_shortage_today_yes'], 789) # request for specific issue with self.subTest(name='specific single issue'): response = Epidata.covid_hosp('PA', 20201118, issues=20201201) self.assertEqual(response['result'], 1) self.assertEqual(len(response['epidata']), 1) self.assertEqual(response['epidata'][0]['issue'], 20201201) self.assertEqual( response['epidata'][0]['critical_staffing_shortage_today_yes'], 321) # request for multiple issues with self.subTest(name='specific multiple issues'): issues = Epidata.range(20201201, 20201231) response = Epidata.covid_hosp('PA', 20201118, issues=issues) self.assertEqual(response['result'], 1) self.assertEqual(len(response['epidata']), 3) rows = response['epidata'] # tiebreaker self.assertEqual(rows[0]['issue'], 20201201) self.assertEqual(rows[0]['critical_staffing_shortage_today_yes'], 321) # server-side order by self.assertEqual(rows[1]['issue'], 20201202) self.assertEqual(rows[1]['critical_staffing_shortage_today_yes'], 456) self.assertEqual(rows[2]['issue'], 20201203) self.assertEqual(rows[2]['critical_staffing_shortage_today_yes'], 789)
def get_training_set(location, epiweek, signal, valid): ew1, ew2, ew3, weeks0, weeks1 = get_weeks(epiweek) auth = secrets.api.fluview try: result = Epidata.fluview(location, weeks0, issues=ew2, auth=auth) rows = Epidata.check(result) unstable = extract(rows, ['wili']) except: unstable = {} rows = Epidata.check(Epidata.fluview(location, weeks0, auth=auth)) stable = extract(rows, ['wili']) data = {} num_dropped = 0 for ew in signal.keys(): if ew == ew3: continue sig = signal[ew] if ew not in unstable: if valid and flu.delta_epiweeks(ew, ew3) <= 5: raise Exception('unstable wILI is not available on %d' % ew) if ew not in stable: num_dropped += 1 continue wili = stable[ew] else: wili = unstable[ew] data[ew] = {'x': sig, 'y': wili} if num_dropped: msg = 'warning: dropped %d/%d signal weeks because (w)ILI was unavailable' print(msg % (num_dropped, len(signal))) return get_training_set_data(data)
def download_preliminary_fluview(f): for lag in range(3): print('preliminary fluview', lag) resp = Epidata.fluview('nat', weeks, lag=lag, auth=secrets.api.fluview) rows = Epidata.check(resp) for row in rows: week, value = row['epiweek'], row['wili'] f.write('%d,%s,%.5f\n' % (week, 'nat_%d' % lag, value))
def download_fluview(f): for loc in Locations.region_list: print('fluview', loc) resp = Epidata.fluview(loc, weeks, auth=secrets.api.fluview) rows = Epidata.check(resp) for row in rows: week, value = row['epiweek'], row['wili'] f.write('%d,%s,%.5f\n' % (week, loc, value))
def fetch(weeks): # The GFT model update of 2013 significantly improved the GFT signal, so # much so that training on the old data will severely hurt the predictive # power of the new data. To overcome this, I basically pretend that GFT # versions before and after mid-2013 are different signals. if weeks['to'] >= 201340: # this is the new GFT model, so throw out data from the old model weeks = Epidata.range(max(weeks['from'], 201331), weeks['to']) return Epidata.gft(location, weeks)
def test_async_epidata(self): # insert dummy data self.cur.execute(f''' INSERT INTO `covidcast` (`id`, `source`, `signal`, `time_type`, `geo_type`, `time_value`, `geo_value`, `value_updated_timestamp`, `value`, `stderr`, `sample_size`, `direction_updated_timestamp`, `direction`, `issue`, `lag`, `is_latest_issue`, `is_wip`,`missing_value`, `missing_stderr`,`missing_sample_size`) VALUES (0, 'src', 'sig', 'day', 'county', 20200414, '11111', 123, 10, 11, 12, 456, 13, 20200414, 0, 1, False, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}), (0, 'src', 'sig', 'day', 'county', 20200414, '22222', 123, 20, 21, 22, 456, 23, 20200414, 0, 1, False, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}), (0, 'src', 'sig', 'day', 'county', 20200414, '33333', 123, 30, 31, 32, 456, 33, 20200414, 0, 1, False, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}), (0, 'src', 'sig', 'day', 'msa', 20200414, '11111', 123, 40, 41, 42, 456, 43, 20200414, 0, 1, False, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}), (0, 'src', 'sig', 'day', 'msa', 20200414, '22222', 123, 50, 51, 52, 456, 53, 20200414, 0, 1, False, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}), (0, 'src', 'sig', 'day', 'msa', 20200414, '33333', 123, 60, 61, 62, 456, 634, 20200414, 0, 1, False, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}) ''') self.cnx.commit() test_output = Epidata.async_epidata([ { 'source': 'covidcast', 'data_source': 'src', 'signals': 'sig', 'time_type': 'day', 'geo_type': 'county', 'geo_value': '11111', 'time_values': '20200414' }, { 'source': 'covidcast', 'data_source': 'src', 'signals': 'sig', 'time_type': 'day', 'geo_type': 'county', 'geo_value': '00000', 'time_values': '20200414' } ]*12, batch_size=10) responses = [i[0] for i in test_output] # check response is same as standard covidcast call, using 24 calls to test batch sizing self.assertEqual(responses, [Epidata.covidcast('src', 'sig', 'day', 'county', 20200414, '11111'), Epidata.covidcast('src', 'sig', 'day', 'county', 20200414, '00000')]*12 )
def test_query_by_issue(self): """Query with and without specifying an issue.""" with Database.connect() as db: with db.new_cursor() as cur: # inserting out of order to test server-side order by # also inserting two for 20201201 to test tiebreaker. self.insert_issue(cur, 20201201, 123, 'T') self.insert_issue(cur, 20201201, 321, 'D') self.insert_issue(cur, 20201203, 789, 'T') self.insert_issue(cur, 20201202, 456, 'T') # request without issue (defaulting to latest issue) with self.subTest(name='no issue (latest)'): response = Epidata.covid_hosp('PA', 20201118) self.assertEqual(response['result'], 1) self.assertEqual(len(response['epidata']), 1) self.assertEqual(response['epidata'][0]['issue'], 20201203) self.assertEqual( response['epidata'][0]['critical_staffing_shortage_today_yes'], 789) # request for specific issue with self.subTest(name='specific single issue'): response = Epidata.covid_hosp('PA', 20201118, issues=20201201) self.assertEqual(response['result'], 1) self.assertEqual(len(response['epidata']), 1) self.assertEqual(response['epidata'][0]['issue'], 20201201) self.assertEqual( response['epidata'][0]['critical_staffing_shortage_today_yes'], 321) # request for multiple issues with self.subTest(name='specific multiple issues'): issues = Epidata.range(20201201, 20201231) response = Epidata.covid_hosp('PA', 20201118, issues=issues) self.assertEqual(response['result'], 1) self.assertEqual(len(response['epidata']), 3) rows = response['epidata'] # tiebreaker self.assertEqual(rows[0]['issue'], 20201201) self.assertEqual(rows[0]['critical_staffing_shortage_today_yes'], 321) # server-side order by self.assertEqual(rows[1]['issue'], 20201202) self.assertEqual(rows[1]['critical_staffing_shortage_today_yes'], 456) self.assertEqual(rows[2]['issue'], 20201203) self.assertEqual(rows[2]['critical_staffing_shortage_today_yes'], 789)
def get_training_set(location, epiweek, signal, valid): ew1, ew2, ew3, weeks0, weeks1 = get_weeks(epiweek) result = Epidata.paho_dengue(location, weeks0) rows = Epidata.check(result) stable = extract(rows, 'num_dengue', to_weekly=True) data = {} for ew in signal.keys(): if ew == ew3 or ew not in stable: continue sig = signal[ew] num_dengue = stable[ew] data[ew] = {'x': sig, 'y': num_dengue} return get_training_set_data(data)
def test_async_epidata_fail(self): with pytest.raises(ClientResponseError, match="404, message='NOT FOUND'"): Epidata.async_epidata([ { 'source': 'covidcast', 'data_source': 'src', 'signals': 'sig', 'time_type': 'day', 'geo_type': 'county', 'geo_value': '11111', 'time_values': '20200414' } ])
def test_query_by_issue(self): """Query with and without specifying an issue.""" # insert dummy data def insert_issue(cur, issue, value): so_many_nulls = ', '.join(['null'] * 51) cur.execute(f'''insert into covid_hosp values ( 0, {issue}, 'PA', 20201118, {value}, {so_many_nulls} )''') with Database.connect() as db: with db.new_cursor() as cur: # inserting out of order to test server-side order by insert_issue(cur, 20201201, 123) insert_issue(cur, 20201203, 789) insert_issue(cur, 20201202, 456) # request without issue (defaulting to latest issue) with self.subTest(name='no issue (latest)'): response = Epidata.covid_hosp('PA', 20201118) self.assertEqual(response['result'], 1) self.assertEqual(len(response['epidata']), 1) self.assertEqual(response['epidata'][0]['issue'], 20201203) self.assertEqual(response['epidata'][0]['hospital_onset_covid'], 789) # request for specific issue with self.subTest(name='specific single issue'): response = Epidata.covid_hosp('PA', 20201118, issues=20201201) self.assertEqual(response['result'], 1) self.assertEqual(len(response['epidata']), 1) self.assertEqual(response['epidata'][0]['issue'], 20201201) self.assertEqual(response['epidata'][0]['hospital_onset_covid'], 123) # request for multiple issues with self.subTest(name='specific multiple issues'): issues = Epidata.range(20201201, 20201231) response = Epidata.covid_hosp('PA', 20201118, issues=issues) self.assertEqual(response['result'], 1) self.assertEqual(len(response['epidata']), 3) rows = response['epidata'] self.assertEqual(rows[0]['issue'], 20201201) self.assertEqual(rows[0]['hospital_onset_covid'], 123) self.assertEqual(rows[1]['issue'], 20201202) self.assertEqual(rows[1]['hospital_onset_covid'], 456) self.assertEqual(rows[2]['issue'], 20201203) self.assertEqual(rows[2]['hospital_onset_covid'], 789)
def _get_unstable(self, region, lag): ranges = [] for s in range(2010, self.test_season): ew1 = flu.join_epiweek(s + 0, 40) ew2 = flu.join_epiweek(s + 1, 20) ranges.append(Epidata.range(ew1, ew2)) if self.forecast_type == ForecastType.WILI: epidata = Forecaster.Utils.decode( Epidata.fluview(region, ranges, lag=lag)) return dict([(row['epiweek'], row['wili']) for row in epidata]) else: epidata = Forecaster.Utils.decode( Epidata.flusurv('network_all', ranges, lag=lag)) return dict([(row['epiweek'], row[region]) for row in epidata])
def test_request_method(self, get, post): """Test that a GET request is default and POST is used if a 414 is returned.""" with self.subTest(name='get request'): Epidata.covidcast('src', 'sig', 'day', 'county', 20200414, '01234') get.assert_called_once() post.assert_not_called() with self.subTest(name='post request'): mock_response = MagicMock() mock_response.status_code = 414 get.return_value = mock_response Epidata.covidcast('src', 'sig', 'day', 'county', 20200414, '01234') self.assertEqual(get.call_count, 2) # one from post test and one from get test post.assert_called_once()
def test_round_trip(self): """Make a simple round-trip with some sample data.""" # insert dummy data self.cur.execute(''' INSERT INTO `fluview` (`id`, `release_date`, `issue`, `epiweek`, `region`, `lag`, `num_ili`, `num_patients`, `num_providers`, `wili`, `ili`, `num_age_0`, `num_age_1`, `num_age_2`, `num_age_3`, `num_age_4`, `num_age_5`) VALUES (0, "2020-04-07", 202021, 202020, "nat", 1, 2, 3, 4, 3.14159, 1.41421, 10, 11, 12, 13, 14, 15), (0, "2020-04-28", 202022, 202022, "hhs1", 5, 6, 7, 8, 1.11111, 2.22222, 20, 21, 22, 23, 24, 25) ''') self.cnx.commit() # make the request response = Epidata.fluview_meta() # assert that the right data came back self.assertEqual(response, { 'result': 1, 'epidata': [{ 'latest_update': '2020-04-28', 'latest_issue': 202022, 'table_rows': 2, }], 'message': 'success', })
def test_round_trip(self): """Make a simple round-trip with some sample data.""" # insert dummy data self.cur.execute(''' insert into fluview values (0, "2020-04-07", 202021, 202020, "nat", 1, 2, 3, 4, 3.14159, 1.41421, 10, 11, 12, 13, 14, 15), (0, "2020-04-28", 202022, 202022, "hhs1", 5, 6, 7, 8, 1.11111, 2.22222, 20, 21, 22, 23, 24, 25) ''') self.cnx.commit() # make the request response = Epidata.fluview_meta() # assert that the right data came back self.assertEqual(response, { 'result': 1, 'epidata': [{ 'latest_update': '2020-04-28', 'latest_issue': 202022, 'table_rows': 2, }], 'message': 'success', })
def test_covidcast_meta(self): """Test that the covidcast_meta endpoint returns expected data.""" # insert dummy data self.cur.execute(''' insert into covidcast values (0, 'src', 'sig', 'day', 'county', 20200414, '01234', 123, 1.5, 2.5, 3.5, 456, 4) ''') self.cnx.commit() # fetch data response = Epidata.covidcast_meta() # check result self.assertEqual(response, { 'result': 1, 'epidata': [{ 'data_source': 'src', 'signal': 'sig', 'time_type': 'day', 'geo_type': 'county', 'min_time': 20200414, 'max_time': 20200414, 'num_locations': 1, 'min_value': 1.5, 'max_value': 1.5, 'mean_value': 1.5, 'stdev_value': 0, 'last_update': 123, }], 'message': 'success', })
def test_covidcast(self): """Test that the covidcast endpoint returns expected data.""" # insert dummy data self.cur.execute(''' insert into covidcast values (0, 'src', 'sig', 'day', 'county', 20200414, '01234', 123, 1.5, 2.5, 3.5, 456, 4) ''') self.cnx.commit() # fetch data response = Epidata.covidcast( 'src', 'sig', 'day', 'county', 20200414, '01234') # check result self.assertEqual(response, { 'result': 1, 'epidata': [{ 'time_value': 20200414, 'geo_value': '01234', 'value': 1.5, 'stderr': 2.5, 'sample_size': 3.5, 'direction': 4, }], 'message': 'success', })
def _get_stable(self, region): ranges = [] for s in range(2003, self.test_season): if s == 2009: continue ew1 = flu.join_epiweek(s, 40) ew2 = flu.add_epiweeks(ew1, 37) ranges.append(Epidata.range(ew1, ew2)) if self.forecast_type == ForecastType.WILI: epidata = Forecaster.Utils.decode(Epidata.fluview(region, ranges)) return dict([(row['epiweek'], row['wili']) for row in epidata]) else: epidata = Forecaster.Utils.decode( Epidata.flusurv('network_all', ranges)) return dict([(row['epiweek'], row[region]) for row in epidata])
def _forecast(self, region, epiweek): # season setup and sanity check ew1 = flu.join_epiweek(self.test_season, 40) ew2 = flu.join_epiweek(self.test_season + 1, 20) if not ew1 <= epiweek <= ew2: raise Exception('`epiweek` outside of `test_season`') # get past values (left half) from the Epidata API epidata = Forecaster.Utils.decode(Epidata.fluview(region, Epidata.range(ew1, epiweek), issues=epiweek)) pinned = [row['wili'] for row in epidata] if len(pinned) != flu.delta_epiweeks(ew1, epiweek) + 1: raise Exception('missing ILINet data') # get the user submissions (right half) from the database submissions = self.fetch_submissions(region, epiweek) self._num_users = len(submissions) print(' [EC] %d users found for %s on %d' % (len(submissions), region, epiweek)) # concatenate observed data and user submissions return [pinned + sub for sub in submissions]
def fetch(weeks): # It appears that log-transformed counts provide a much better fit. res = Epidata.cdc(secrets.api.cdc, weeks, location) if 'epidata' in res: for row in res['epidata']: for col in fields: row[col] = np.log(1. + row[col]) return res
def test_acquire_dataset(self): """Acquire a new dataset.""" # make sure the data does not yet exist with self.subTest(name='no data yet'): response = Epidata.covid_hosp('MA', Epidata.range(20200101, 20210101)) self.assertEqual(response['result'], -2, response) # acquire sample data into local database # mock out network calls to external hosts with self.subTest(name='first acquisition'), \ patch.object(Network, 'fetch_metadata', return_value=self.test_utils.load_sample_metadata()) as mock_fetch_meta, \ patch.object(Network, 'fetch_dataset', side_effect=[self.test_utils.load_sample_dataset("dataset0.csv"), # dataset for 3/13 self.test_utils.load_sample_dataset("dataset0.csv"), # first dataset for 3/15 self.test_utils.load_sample_dataset()] # second dataset for 3/15 ) as mock_fetch: acquired = Update.run() self.assertTrue(acquired) self.assertEqual(mock_fetch_meta.call_count, 1) # make sure the data now exists with self.subTest(name='initial data checks'): response = Epidata.covid_hosp('WY', Epidata.range(20200101, 20210101)) self.assertEqual(response['result'], 1) self.assertEqual(len(response['epidata']), 1) row = response['epidata'][0] self.assertEqual(row['state'], 'WY') self.assertEqual(row['date'], 20201209) self.assertEqual(row['issue'], 20210315) self.assertEqual(row['critical_staffing_shortage_today_yes'], 8) self.assertEqual(row['total_patients_hospitalized_confirmed_influenza_covid_coverage'], 56) actual = row['inpatient_bed_covid_utilization'] expected = 0.11729857819905214 self.assertAlmostEqual(actual, expected) self.assertIsNone(row['critical_staffing_shortage_today_no']) # expect 61 fields per row (63 database columns, except `id` and `record_type`) self.assertEqual(len(row), 118) with self.subTest(name='all date batches acquired'): response = Epidata.covid_hosp('WY', Epidata.range(20200101, 20210101), issues=20210313) self.assertEqual(response['result'], 1) # re-acquisition of the same dataset should be a no-op with self.subTest(name='second acquisition'), \ patch.object(Network, 'fetch_metadata', return_value=self.test_utils.load_sample_metadata()) as mock_fetch_meta, \ patch.object(Network, 'fetch_dataset', return_value=self.test_utils.load_sample_dataset()) as mock_fetch: acquired = Update.run() self.assertFalse(acquired) # make sure the data still exists with self.subTest(name='final data checks'): response = Epidata.covid_hosp('WY', Epidata.range(20200101, 20210101)) self.assertEqual(response['result'], 1) self.assertEqual(len(response['epidata']), 1)