def _forecast(self, ageGroup, epiweek): # season setup and sanity check ew1 = flu.join_epiweek(self.test_season, 40) ew2 = flu.join_epiweek(self.test_season + 1, 17) print("test season:", self.test_season, "ew1:", ew1, "epiweek:", epiweek) if not ew1 <= epiweek <= ew2: raise Exception('`epiweek` outside of `test_season`') # get past values (left half) from the Epidata API response = Epidata.flusurv('network_all', Epidata.range(ew1, epiweek), issues=epiweek) epidata = Forecaster.Utils.decode(response) pinned = [row[ageGroup] for row in epidata] if len(pinned) != flu.delta_epiweeks(ew1, epiweek) + 1: raise Exception('missing ILINet data') # get the user submissions (right half) from the database print("ageGroup", ageGroup, "epiweek", epiweek) submissions = self.fetch_submissions(ageGroup, epiweek) self._num_users = len(submissions) if self.verbose: print(' [EC] %d users found for %s on %d' % (len(submissions), ageGroup, epiweek)) # concatenate observed data and user submissions return [pinned + sub for sub in submissions]
def _get_unstable(self, region, lag): ranges = [] for s in range(2010, self.test_season): ew1 = flu.join_epiweek(s + 0, 40) ew2 = flu.join_epiweek(s + 1, 20) ranges.append(Epidata.range(ew1, ew2)) if self.forecast_type == ForecastType.WILI: epidata = Forecaster.Utils.decode( Epidata.fluview(region, ranges, lag=lag)) return dict([(row['epiweek'], row['wili']) for row in epidata]) else: epidata = Forecaster.Utils.decode( Epidata.flusurv('network_all', ranges, lag=lag)) return dict([(row['epiweek'], row[region]) for row in epidata])
def _get_partial_trajectory(self, epiweek, valid=True): y, w = EW.split_epiweek(epiweek) if w < 30: y -= 1 ew1 = EW.join_epiweek(y, 30) ew2 = epiweek limit = EW.add_epiweeks(ew2, -5) weeks = Epidata.range(ew1, ew2) stable = Epidata.check(Epidata.fluview(self.region, weeks)) try: unstable = Epidata.check(Epidata.fluview(self.region, weeks, issues=ew2)) except: unstable = [] wili = {} for row in stable: ew, value = row['epiweek'], row['wili'] if not valid or ew < limit: wili[ew] = value for row in unstable: ew, value = row['epiweek'], row['wili'] wili[ew] = value curve = [] for ew in EW.range_epiweeks(ew1, ew2, inclusive=True): if ew not in wili: if valid: t = 'unstable' else: t = 'any' raise Exception('wILI (%s) not available for week %d' % (t, ew)) curve.append(wili[ew]) n1 = EW.delta_epiweeks(ew1, ew2) + 1 n2 = len(curve) if n1 != n2: raise Exception('missing data (expected %d, found %d)' % (n1, n2)) return curve
def get_ilinet_data(row): if row[0] == 'REGION TYPE' and row != [ 'REGION TYPE', 'REGION', 'YEAR', 'WEEK', '% WEIGHTED ILI', '%UNWEIGHTED ILI', 'AGE 0-4', 'AGE 25-49', 'AGE 25-64', 'AGE 5-24', 'AGE 50-64', 'AGE 65', 'ILITOTAL', 'NUM. OF PROVIDERS', 'TOTAL PATIENTS' ]: raise Exception('header row has changed') if len(row) == 1 or row[0] == 'REGION TYPE': # this is a header row return None if row[5] == 'X': # ILI isn't reported, ignore this row return None return { 'location': fluview_locations.get_location_name(*row[:2]), 'epiweek': join_epiweek(int(row[2]), int(row[3])), 'wili': optional_float(*row[4:6]), 'ili': float(row[5]), 'age0': optional_int(row[6]), 'age1': optional_int(row[9]), 'age2': optional_int(row[8]), 'age3': optional_int(row[7]), 'age4': optional_int(row[10]), 'age5': optional_int(row[11]), 'n_ili': optional_int(row[12]), 'n_providers': optional_int(row[13]), 'n_patients': optional_int(row[14]), }
def __init__(self, region): self.region = region weeks = Epidata.range(200330, 202330) rows = Epidata.check(Epidata.fluview(self.region, weeks)) self.seasons = {} for row in rows: ew, wili = row['epiweek'], row['wili'] y, w = EW.split_epiweek(ew) if w < 30: y -= 1 i = EW.delta_epiweeks(EW.join_epiweek(y, 30), ew) if y not in self.seasons: self.seasons[y] = {} if 0 <= i < 52: self.seasons[y][i] = wili years = sorted(list(self.seasons.keys())) for year in years: if len(self.seasons[year]) != 52: del self.seasons[year] if 2008 in self.seasons and 2009 in self.seasons: for i in range(40, 52): self.seasons[2008][i] = self.seasons[2009][i] del self.seasons[2009] curve = lambda y: [self.seasons[y][i] for i in range(52)] self.years = sorted(list(self.seasons.keys())) self.curves = dict([(y, curve(y)) for y in self.years])
def _forecast(self, region, epiweek): # season setup and sanity check ew1 = flu.join_epiweek(self.test_season, 40) ew2 = flu.join_epiweek(self.test_season + 1, 20) if not ew1 <= epiweek <= ew2: raise Exception('`epiweek` outside of `test_season`') # get past values (left half) from the Epidata API epidata = Forecaster.Utils.decode(Epidata.fluview(region, Epidata.range(ew1, epiweek), issues=epiweek)) pinned = [row['wili'] for row in epidata] if len(pinned) != flu.delta_epiweeks(ew1, epiweek) + 1: raise Exception('missing ILINet data') # get the user submissions (right half) from the database submissions = self.fetch_submissions(region, epiweek) self._num_users = len(submissions) print(' [EC] %d users found for %s on %d' % (len(submissions), region, epiweek)) # concatenate observed data and user submissions return [pinned + sub for sub in submissions]
def fetch_submissions(self, region, epiweek_now): final_week = flu.join_epiweek(self.test_season + 1, 20) self.cur = self.cnx.cursor() self.cur.execute( """ SELECT u.`id` `user_id`, f.`epiweek`, f.`wili` FROM ( SELECT u.* FROM `ec_fluv_users_mturk_2019` u JOIN `ec_fluv_defaults` d ON TRUE LEFT JOIN `ec_fluv_user_preferences_mturk` p ON p.`user_id` = u.`id` AND p.`name` = d.`name` WHERE d.`name` = '_debug' AND coalesce(p.`value`, d.`value`) = '0' ) u JOIN `ec_fluv_submissions_mturk` s ON s.`user_id` = u.`id` JOIN `ec_fluv_forecast_mturk` f ON f.`user_id` = u.`id` AND f.`region_id` = s.`region_id` AND f.`epiweek_now` = s.`epiweek_now` JOIN `ec_fluv_regions` r ON r.`id` = s.`region_id` WHERE r.`fluview_name` = %s AND s.`epiweek_now` = %s AND f.`epiweek` <= %s AND f.`wili` > 0 ORDER BY u.`id` ASC, f.`epiweek` ASC """, (region, epiweek_now, final_week)) submissions = {} for (user, epiweek, wili) in self.cur: if self.users is not None and user not in self.users: continue if user not in submissions: submissions[user] = [] submissions[user].append(wili) self.cur.close() curves = [] expected_weeks = flu.delta_epiweeks(epiweek_now, final_week) for user in submissions: if len(submissions[user]) != expected_weeks: print( ' [EC] warning: missing data in user sumission [%d|%s|%d]' % (user, region, epiweek_now)) else: curves.append(submissions[user]) return curves
def train(self, epiweek): curves = [] for year in self.years: season_end = EW.join_epiweek(year + 1, 29) if epiweek >= season_end: curves.append(self.curves[year]) self.model = Archetype(curves) self.training_week = epiweek return curves, self.model
def _get_current(self, region, epiweek, forecast_type): ew1 = flu.join_epiweek(self.test_season + 0, 40) ew2 = flu.join_epiweek(self.test_season + 1, 20) weeks = Epidata.range(ew1, ew2) if self.forecast_type == ForecastType.WILI: print('fetching history data for:') print(region, epiweek, weeks) epidata = Forecaster.Utils.decode( Epidata.fluview(region, weeks, issues=epiweek)) data = [row['wili'] for row in epidata] # print (data) else: epidata = Forecaster.Utils.decode( Epidata.flusurv('network_all', weeks, issues=epiweek)) data = [row[region] for row in epidata] if len(data) != flu.delta_epiweeks(ew1, epiweek) + 1: raise Exception('missing data') return data
def _forecast(self, region, epiweek): print('inside hybrid._forecast, region, epiweek:', region, epiweek) P = self.past._forecast(region, epiweek) F = self.future._forecast(region, epiweek) print('inside hybrid._forecast, len P, len F', len(P), len(F)) i = flu.delta_epiweeks(flu.join_epiweek(self.test_season, 40), epiweek) curves = [] for j in range(max(len(P), len(F))): p, f = P[j % len(P)], F[j % len(F)] curves.append(list(p[:i]) + list(f[i:])) if self._callback is not None: self._callback() return curves
def _forecast(self, region, epiweek): ew1 = flu.join_epiweek(self.test_season + 0, 40) ew2 = flu.join_epiweek(self.test_season + 1, 24) num_weeks = flu.delta_epiweeks(ew1, ew2) print('fetching past data until week %d' % (epiweek)) observed = self._get_current(region, epiweek, self.forecast_type) mean, var = self.emp_mean[region].copy(), self.emp_var[region].copy() for ew in flu.range_epiweeks(ew1, flu.add_epiweeks(epiweek, 1)): i = flu.delta_epiweeks(ew1, ew) lag = flu.delta_epiweeks(ew1, epiweek) - i lag = min(lag, len(self.bf_var[region]) - 1) mean[i] = observed[i] var[i] = self.bf_var[region][lag] curves = Forecaster.Utils.sample_normal_var(mean, var, self.num_samples) if not self.do_sampling: offset = flu.delta_epiweeks(ew1, epiweek) + 1 for (i, curve) in enumerate(curves): index = i % len(self.emp_curves[region]) curve[offset:] = self.emp_curves[region][index][offset:] return curves
def test_from_epiweek(self): for y, m, d, ey, ew in FunctionTests.sample_epiweeks: epwk = utils_epiweek.join_epiweek(ey, ew) with self.subTest(y=y, m=m, d=d, epwk=epwk): date1 = EpiDate(y, m, d) date2 = EpiDate.from_epiweek(ey, ew) self.assertEqual(date1.get_ew(), epwk) self.assertEqual(date2.get_ew(), epwk) self.assertEqual(date2.get_day_of_week(), 3) for year in range(2000, 2020): for week in range(1, utils_epiweek.get_num_weeks(year) + 1): epwk = utils_epiweek.join_epiweek(year, week) date = EpiDate.from_epiweek(year, week) self.assertEqual(date.get_ew(), epwk) self.assertEqual(date.get_day_of_week(), 3) with self.assertRaises(Exception): EpiDate.from_epiweek(2017, 0) with self.assertRaises(Exception): EpiDate.from_epiweek(2017, 53) with self.assertRaises(Exception): EpiDate.from_epiweek(0, 30)
def _train(self, region): if region in self.bf_var: # already trained return if len(region) == 2: # TODO: this is a hack for state ILI # assume backfill of region 4 print('FIXME: setting backfill for %s as hhs4' % region) self.bf_var[region] = self.bf_var['hhs4'] self.emp_mean[region] = self.emp_mean['hhs4'] self.emp_var[region] = self.emp_var['hhs4'] self.emp_curves[region] = self.emp_curves['hhs4'] return stable = self._get_stable(region) start_weeks = [flu.get_season(ew)[0] for ew in stable.keys()] curves = [] seasons = set( [flu.split_epiweek(ew)[0] for ew in start_weeks if ew is not None]) for s in seasons: ew1 = flu.join_epiweek(s + 0, 40) if self.forecast_type == ForecastType.WILI: ew2 = flu.add_epiweeks(ew1, 37) else: ew2 = flu.add_epiweeks(ew1, 29) # print("stable: ", stable) # print("range_epiweeks: ", [i for i in flu.range_epiweeks(ew1, ew2)]) curve = [stable[ew] for ew in flu.range_epiweeks(ew1, ew2)] curves.append(curve) self.emp_mean[region] = np.mean(curves, axis=0) self.emp_var[region] = np.var(curves, axis=0, ddof=1) self.emp_curves[region] = curves if self.backfill_weeks is None: self.bf_var[region] = [0] else: self.bf_var[region] = [] for lag in range(self.backfill_weeks): unstable = self._get_unstable(region, lag) changes = [ stable[ew] - unstable[ew] for ew in stable.keys() & unstable.keys() ] if len(changes) < 2: raise Exception('not enough data') self.bf_var[region].append(np.var(changes, ddof=1)) print( ' %5s: %s' % (region, ' '.join(['%.3f' % (b**0.5) for b in self.bf_var[region]])))
def _get_stable(self, region): ranges = [] for s in range(2003, self.test_season): if s == 2009: continue ew1 = flu.join_epiweek(s, 40) ew2 = flu.add_epiweeks(ew1, 37) ranges.append(Epidata.range(ew1, ew2)) if self.forecast_type == ForecastType.WILI: epidata = Forecaster.Utils.decode(Epidata.fluview(region, ranges)) return dict([(row['epiweek'], row['wili']) for row in epidata]) else: epidata = Forecaster.Utils.decode( Epidata.flusurv('network_all', ranges)) return dict([(row['epiweek'], row[region]) for row in epidata])
def get_public_data(row): hrow1 = [ 'REGION TYPE', 'REGION', 'SEASON_DESCRIPTION', 'TOTAL SPECIMENS', 'A (2009 H1N1)', 'A (H3)', 'A (Subtyping not Performed)', 'B', 'BVic', 'BYam', 'H3N2v' ] hrow2 = [ 'REGION TYPE', 'REGION', 'YEAR', 'WEEK', 'TOTAL SPECIMENS', 'A (2009 H1N1)', 'A (H3)', 'A (Subtyping not Performed)', 'B', 'BVic', 'BYam', 'H3N2v' ] if row[0] == 'REGION TYPE' and row != hrow1 and row != hrow2: raise Exception('header row has changed for public health lab data.') if len(row) == 1 or row[0] == 'REGION TYPE': # header row return None if row[3] == 'X': # data is not reported, ignore this row return None # handle case where data is reported by season, not by epiweek is_weekly = len(row) == len(hrow2) # set epiweek if is_weekly: epiweek = join_epiweek(int(row[2]), int(row[3])) else: epiweek = int(row[2][7:11]) * 100 + 40 # row offset offset = 1 if is_weekly else 0 return { 'location': fluview_locations.get_location_name(*row[:2]), 'epiweek': epiweek, 'total_specimens': int(row[3 + offset]), 'total_a_h1n1': optional_int(row[4 + offset]), 'total_a_h3': optional_int(row[5 + offset]), 'total_a_h3n2v': optional_int(row[10 + offset]), 'total_a_no_sub': optional_int(row[6 + offset]), 'total_b': optional_int(row[7 + offset]), 'total_b_vic': optional_int(row[8 + offset]), 'total_b_yam': optional_int(row[9 + offset]) }
def get_clinical_data(row): if row[0] == 'REGION TYPE' and row != [ 'REGION TYPE', 'REGION', 'YEAR', 'WEEK', 'TOTAL SPECIMENS', 'TOTAL A', 'TOTAL B', 'PERCENT POSITIVE', 'PERCENT A', 'PERCENT B' ]: raise Exception('header row has changed for clinical lab data.') if len(row) == 1 or row[0] == 'REGION TYPE': # this is a header row return None if row[4] == 'X': # data is not reported, ignore this row return None # ignore percentage calculations for now return { 'location': fluview_locations.get_location_name(*row[:2]), 'epiweek': join_epiweek(int(row[2]), int(row[3])), 'total_specimens': int(row[4]), 'total_a': optional_int(row[5]), 'total_b': optional_int(row[6]), 'percent_positive': nullable_float(row[7]), 'percent_a': nullable_float(row[8]), 'percent_b': nullable_float(row[9]) }
def test_get_ew(self): for y, m, d, ey, ew in FunctionTests.sample_epiweeks: epwk = utils_epiweek.join_epiweek(ey, ew) with self.subTest(y=y, m=m, d=d, epwk=epwk): self.assertEqual(EpiDate(y, m, d).get_ew(), epwk)
def forecast(self, epiweek): """ `epiweek`: the most recent epiweek for which ILINet data is available """ # sanity checks flu.check_epiweek(epiweek) season = flu.split_epiweek(flu.get_season(epiweek)[0])[0] week = flu.split_epiweek(epiweek)[1] first_epiweek = flu.join_epiweek(season, 40) offset = flu.delta_epiweeks(first_epiweek, epiweek) if season != self.test_season: raise Exception('unable to forecast season %d' % season) if 20 < week < 40: raise Exception('unable to forecast week %02d' % week) # initialize forecast forecast = Forecast(self.test_season, datetime.now(), self.name, epiweek, self.forecast_type) # aliases for readability num_week_bins = forecast.season_length num_wili_bins = forecast.num_ili_bins wili_bin_size = forecast.ili_bin_size # if (forecast_type == ForecastType.HOSP): # num_wili_bins = 601 # uniform blending weights week_weight = self.min_week_prob * (num_week_bins + 1 ) # include `none` "bin" wili_weight = self.min_wili_prob * num_wili_bins if week_weight > 1: raise Exception('`min_week_prob` is impossibly high') if wili_weight > 1: raise Exception('`min_wili_prob` is impossibly high') # forecast each region for region in self.locations: # draw sample curves curves = self._forecast(region, epiweek) # regional info if Locations.is_region(region): baseline = Targets.baselines[self.test_season][region] else: baseline = None # get all targets targets = [ Targets.get_all_targets(c, baseline, offset, rule_season=self.test_season) for c in curves ] onsets = [t['onset'] for t in targets] peakweeks = [t['peakweek'] for t in targets] peaks = [t['peak'] for t in targets] x1s = [t['x1'] for t in targets] x2s = [t['x2'] for t in targets] x3s = [t['x3'] for t in targets] x4s = [t['x4'] for t in targets] # forecast each target allow_no_pw = self.test_season < 2016 if Locations.is_region(region): # skip onset for states and hospitalization, and do it only for regions onset = self.forecast_weeks(first_epiweek, num_week_bins, onsets, week_weight, self.smooth_weeks_bw, True) peakweek = self.forecast_weeks(first_epiweek, num_week_bins, peakweeks, week_weight, self.smooth_weeks_bw, allow_no_pw) peak = self.forecast_wili(wili_bin_size, num_wili_bins, peaks, wili_weight, self.smooth_wili_bw) x1 = self.forecast_wili(wili_bin_size, num_wili_bins, x1s, wili_weight, self.smooth_wili_bw) x2 = self.forecast_wili(wili_bin_size, num_wili_bins, x2s, wili_weight, self.smooth_wili_bw) x3 = self.forecast_wili(wili_bin_size, num_wili_bins, x3s, wili_weight, self.smooth_wili_bw) x4 = self.forecast_wili(wili_bin_size, num_wili_bins, x4s, wili_weight, self.smooth_wili_bw) # fill in the forecast data fc = forecast.get_or_create_forecast(region) if Locations.is_region(region): fc.set_onset(*onset) fc.set_peakweek(*peakweek) fc.set_peak(*peak) fc.set_lookahead(1, *x1) fc.set_lookahead(2, *x2) fc.set_lookahead(3, *x3) fc.set_lookahead(4, *x4) # sanity check completed forecast forecast.sanity_check() return forecast