def get_training_set(location, epiweek, signal, valid): ew1, ew2, ew3, weeks0, weeks1 = get_weeks(epiweek) auth = secrets.api.fluview try: result = Epidata.fluview(location, weeks0, issues=ew2, auth=auth) rows = Epidata.check(result) unstable = extract(rows, ['wili']) except: unstable = {} rows = Epidata.check(Epidata.fluview(location, weeks0, auth=auth)) stable = extract(rows, ['wili']) data = {} num_dropped = 0 for ew in signal.keys(): if ew == ew3: continue sig = signal[ew] if ew not in unstable: if valid and flu.delta_epiweeks(ew, ew3) <= 5: raise Exception('unstable wILI is not available on %d' % ew) if ew not in stable: num_dropped += 1 continue wili = stable[ew] else: wili = unstable[ew] data[ew] = {'x': sig, 'y': wili} if num_dropped: msg = 'warning: dropped %d/%d signal weeks because (w)ILI was unavailable' print(msg % (num_dropped, len(signal))) return get_training_set_data(data)
def _get_partial_trajectory(self, epiweek, valid=True): y, w = EW.split_epiweek(epiweek) if w < 30: y -= 1 ew1 = EW.join_epiweek(y, 30) ew2 = epiweek limit = EW.add_epiweeks(ew2, -5) weeks = Epidata.range(ew1, ew2) stable = Epidata.check(Epidata.fluview(self.region, weeks)) try: unstable = Epidata.check(Epidata.fluview(self.region, weeks, issues=ew2)) except: unstable = [] wili = {} for row in stable: ew, value = row['epiweek'], row['wili'] if not valid or ew < limit: wili[ew] = value for row in unstable: ew, value = row['epiweek'], row['wili'] wili[ew] = value curve = [] for ew in EW.range_epiweeks(ew1, ew2, inclusive=True): if ew not in wili: if valid: t = 'unstable' else: t = 'any' raise Exception('wILI (%s) not available for week %d' % (t, ew)) curve.append(wili[ew]) n1 = EW.delta_epiweeks(ew1, ew2) + 1 n2 = len(curve) if n1 != n2: raise Exception('missing data (expected %d, found %d)' % (n1, n2)) return curve
def __init__(self, region, target): self.region = region self.target = target weeks = Epidata.range(199301, 202330) auth = secrets.api.datasetname_targets rx = mutate_rows_as_if_lagged(Epidata.check(Epidata.datasetname_targets(auth, self.target, self.region, weeks)), 1000000) self.data = {} self.valid = {} self.ew2i, self.i2ew = {}, {} for ew in EW.range_epiweeks(weeks['from'], weeks['to'], inclusive=True): # if 200916 <= ew <= 201015: # continue i = len(self.ew2i) self.ew2i[ew] = i self.i2ew[i] = ew for row in rx: ew, observation, lag = row['epiweek'], row['value'], row['lag'] if ew not in self.ew2i: continue i = self.ew2i[ew] if i not in self.data: self.data[i] = {} self.valid[i] = {'stable': False} lag = 'stable' self.data[i][lag] = observation self.valid[i][lag] = True self.weeks = sorted(list(self.data.keys())) for i in self.weeks: if 'stable' not in self.data[i]: continue
def get_training_set_datasetname(location, epiweek, signal, target, signal_to_truth_ew_shift): ew1, ew2, ew3, weeks0, weeks1 = get_weeks(epiweek) groundTruth = dict() auth = secrets.api.datasetname_targets datasetnameData = Epidata.check( Epidata.datasetname_targets(auth, target, location, weeks0)) for row in datasetnameData: groundTruth[row['epiweek']] = row['value'] data = {} dropped_weeks = 0 for signal_week in signal.keys(): ground_truth_week = flu.add_epiweeks(signal_week, signal_to_truth_ew_shift) # skip the week we're trying to predict if ground_truth_week == ew3: continue sig = signal[signal_week] if ground_truth_week in groundTruth: label = groundTruth[ground_truth_week] else: dropped_weeks += 1 continue data[ground_truth_week] = {'x': sig, 'y': label} if dropped_weeks: msg = 'warning: dropped %d/%d signal weeks because ground truth / target was unavailable' print(msg % (dropped_weeks, len(signal))) epiweeks = sorted(list(data.keys())) X = [data[week]['x'] for week in epiweeks] Y = [data[week]['y'] for week in epiweeks] return (epiweeks, X, Y)
def fetch(weeks): # a map from epiweeks to a map of field-value pairs (for each article/hour) data = {} # field name index idx = 0 # download each time series individually for article in articles: for hour in hours: # fetch the data from the API res = Epidata.wiki(article, epiweeks=weeks, hours=hour) epidata = Epidata.check(res) field_name = fields[idx] idx += 1 # loop over rows of the response, ordered by epiweek for row in epidata: ew = row['epiweek'] if ew not in data: # make a new entry for this epiweek data[ew] = {'epiweek': ew} # save the value of this field data[ew][field_name] = row['value'] # convert the map to a list matching the API epidata list rows = [] for ew in sorted(list(data.keys())): rows.append(data[ew]) # spoof the API response return { 'result': 1, 'message': None, 'epidata': rows, }
def __init__(self, region): self.region = region weeks = Epidata.range(200330, 202330) rows = Epidata.check(Epidata.fluview(self.region, weeks)) self.seasons = {} for row in rows: ew, wili = row['epiweek'], row['wili'] y, w = EW.split_epiweek(ew) if w < 30: y -= 1 i = EW.delta_epiweeks(EW.join_epiweek(y, 30), ew) if y not in self.seasons: self.seasons[y] = {} if 0 <= i < 52: self.seasons[y][i] = wili years = sorted(list(self.seasons.keys())) for year in years: if len(self.seasons[year]) != 52: del self.seasons[year] if 2008 in self.seasons and 2009 in self.seasons: for i in range(40, 52): self.seasons[2008][i] = self.seasons[2009][i] del self.seasons[2009] curve = lambda y: [self.seasons[y][i] for i in range(52)] self.years = sorted(list(self.seasons.keys())) self.curves = dict([(y, curve(y)) for y in self.years])
def __init__(self, region, target, use_weekly=True): self.region = region self.target = target self.stts = 0 weeks = Epidata.range(201401, 202330) rx = Epidata.check(Epidata.paho_dengue(self.region, weeks)) self.data = {} self.valid = {} self.ew2i, self.i2ew = {}, {} for ew in EW.range_epiweeks(weeks['from'], weeks['to'], inclusive=True): # if 200916 <= ew <= 201015: # continue i = len(self.ew2i) self.ew2i[ew] = i self.i2ew[i] = ew epiweeks = list(map(lambda elt: elt['epiweek'], rx)) values = list(map(lambda elt: elt[self.target], rx)) data = {elt['epiweek']: elt[self.target] for elt in rx} w_data = cum_to_week(data) for i in range(len(rx)): ew, observation = epiweeks[i], w_data[epiweeks[i]] if ew not in self.ew2i: continue i = self.ew2i[ew] if i not in self.data: self.data[i] = {} self.valid[i] = {'stable': False} lag = 'stable' self.data[i][lag] = observation self.valid[i][lag] = True self.weeks = sorted(list(self.data.keys())) self.dds = DengueDataSource.new_instance(target)
def download_preliminary_fluview(f): for lag in range(3): print('preliminary fluview', lag) resp = Epidata.fluview('nat', weeks, lag=lag, auth=secrets.api.fluview) rows = Epidata.check(resp) for row in rows: week, value = row['epiweek'], row['wili'] f.write('%d,%s,%.5f\n' % (week, 'nat_%d' % lag, value))
def download_fluview(f): for loc in Locations.region_list: print('fluview', loc) resp = Epidata.fluview(loc, weeks, auth=secrets.api.fluview) rows = Epidata.check(resp) for row in rows: week, value = row['epiweek'], row['wili'] f.write('%d,%s,%.5f\n' % (week, loc, value))
def __init__(self, region, target): self.region = region self.target = target weeks = Epidata.range(199301, 202330) auth = secrets.api.datasetname_targets # r0 = Epidata.check(Epidata.fluview(self.region, weeks, lag=0, auth=auth)) # r1 = Epidata.check(Epidata.fluview(self.region, weeks, lag=1, auth=auth)) # r2 = Epidata.check(Epidata.fluview(self.region, weeks, lag=2, auth=auth)) # rx = Epidata.check(Epidata.fluview(self.region, weeks, auth=auth)) r0 = mutate_rows_as_if_lagged(Epidata.check(Epidata.datasetname_targets(auth, self.target, self.region, weeks)), 0) r1 = mutate_rows_as_if_lagged(Epidata.check(Epidata.datasetname_targets(auth, self.target, self.region, weeks)), 1) r2 = mutate_rows_as_if_lagged(Epidata.check(Epidata.datasetname_targets(auth, self.target, self.region, weeks)), 2) rx = mutate_rows_as_if_lagged(Epidata.check(Epidata.datasetname_targets(auth, self.target, self.region, weeks)), 1000000) self.data = {} self.valid = {} self.ew2i, self.i2ew = {}, {} for ew in EW.range_epiweeks(weeks['from'], weeks['to'], inclusive=True): # if 200916 <= ew <= 201015: # continue i = len(self.ew2i) self.ew2i[ew] = i self.i2ew[i] = ew for row in r0 + r1 + r2 + rx: ew, observation, lag = row['epiweek'], row['value'], row['lag'] if ew not in self.ew2i: continue i = self.ew2i[ew] if i not in self.data: self.data[i] = {} self.valid[i] = {0: False, 1: False, 2: False, 'stable': False} if not (0 <= lag <= 2): lag = 'stable' self.data[i][lag] = observation self.valid[i][lag] = True self.weeks = sorted(list(self.data.keys())) for i in self.weeks: if 'stable' not in self.data[i]: continue for lag in range(3): if lag not in self.data[i]: self.data[i][lag] = self.data[i]['stable']
def __init__(self, region): self.region = region weeks = Epidata.range(200330, 202330) auth = secrets.api.fluview r0 = Epidata.check( Epidata.fluview(self.region, weeks, lag=0, auth=auth)) r1 = Epidata.check( Epidata.fluview(self.region, weeks, lag=1, auth=auth)) r2 = Epidata.check( Epidata.fluview(self.region, weeks, lag=2, auth=auth)) rx = Epidata.check(Epidata.fluview(self.region, weeks, auth=auth)) self.data = {} self.valid = {} self.ew2i, self.i2ew = {}, {} for ew in EW.range_epiweeks(weeks['from'], weeks['to'], inclusive=True): if 200916 <= ew <= 201015: continue i = len(self.ew2i) self.ew2i[ew] = i self.i2ew[i] = ew for row in r0 + r1 + r2 + rx: ew, wili, lag = row['epiweek'], row['wili'], row['lag'] if ew not in self.ew2i: continue i = self.ew2i[ew] if i not in self.data: self.data[i] = {} self.valid[i] = {0: False, 1: False, 2: False, 'stable': False} if not (0 <= lag <= 2): lag = 'stable' self.data[i][lag] = wili self.valid[i][lag] = True self.weeks = sorted(list(self.data.keys())) for i in self.weeks: if 'stable' not in self.data[i]: continue for lag in range(3): if lag not in self.data[i]: self.data[i][lag] = self.data[i]['stable']
def get_training_set(location, epiweek, signal, valid): ew1, ew2, ew3, weeks0, weeks1 = get_weeks(epiweek) result = Epidata.paho_dengue(location, weeks0) rows = Epidata.check(result) stable = extract(rows, 'num_dengue', to_weekly=True) data = {} for ew in signal.keys(): if ew == ew3 or ew not in stable: continue sig = signal[ew] num_dengue = stable[ew] data[ew] = {'x': sig, 'y': num_dengue} return get_training_set_data(data)
def __init__(self, region, target): self.region = region self.target = target weeks = Epidata.range(201401, 202330) r0 = Epidata.check(Epidata.paho_dengue(self.region, weeks, lag=0)) r1 = Epidata.check(Epidata.paho_dengue(self.region, weeks, lag=1)) r2 = Epidata.check(Epidata.paho_dengue(self.region, weeks, lag=2)) rx = Epidata.check(Epidata.paho_dengue(self.region, weeks)) self.data = {} self.valid = {} self.ew2i, self.i2ew = {}, {} for ew in EW.range_epiweeks(weeks['from'], weeks['to'], inclusive=True): # if 200916 <= ew <= 201015: # continue i = len(self.ew2i) self.ew2i[ew] = i self.i2ew[i] = ew for row in r0 + r1 + r2 + rx: ew, observation, lag = row['epiweek'], row[self.target], row['lag'] if ew not in self.ew2i: continue i = self.ew2i[ew] if i not in self.data: self.data[i] = {} self.valid[i] = {0: False, 1: False, 2: False, 'stable': False} if not (0 <= lag <= 2): lag = 'stable' self.data[i][lag] = observation self.valid[i][lag] = True self.weeks = sorted(list(self.data.keys())) for i in self.weeks: if 'stable' not in self.data[i]: continue for lag in range(3): if lag not in self.data[i]: self.data[i][lag] = self.data[i]['stable']
def get_prediction(location, epiweek, name, fields, fetch, valid): if type(fields) == str: fields = [fields] ew1, ew2, ew3, weeks0, weeks1 = get_weeks(epiweek) rows = Epidata.check(fetch(weeks1)) signal = extract(rows, fields) min_rows = 3 + len(fields) if ew3 not in signal: raise Exception('%s unavailable on %d' % (name, ew3)) if len(signal) < min_rows: raise Exception('%s available less than %d weeks' % (name, min_rows)) epiweeks, X, Y = get_training_set(location, epiweek, signal, valid) min_rows = min_rows - 1 if len(Y) < min_rows: raise Exception('(w)ILI available less than %d weeks' % (min_rows)) model = get_model(ew3, epiweeks, X, Y) value = apply_model(ew3, model, signal[ew3]) return value
def fit_loch_ness(location, epiweek, name, fields, fetch, valid): # Helper functions def get_weeks(epiweek): ew1 = 200330 ew2 = epiweek ew3 = flu.add_epiweeks(epiweek, 1) weeks0 = Epidata.range(ew1, ew2) weeks1 = Epidata.range(ew1, ew3) return (ew1, ew2, ew3, weeks0, weeks1) def extract(rows, fields): data = {} for row in rows: data[row['epiweek']] = [float(row[f]) for f in fields] return data def get_training_set_data(data): epiweeks = sorted(list(data.keys())) X = [data[ew]['x'] for ew in epiweeks] Y = [data[ew]['y'] for ew in epiweeks] return (epiweeks, X, Y) def get_training_set(location, epiweek, signal, valid): ew1, ew2, ew3, weeks0, weeks1 = get_weeks(epiweek) auth = secrets.api.fluview try: result = Epidata.fluview(location, weeks0, issues=ew2, auth=auth) rows = Epidata.check(result) unstable = extract(rows, ['wili']) except Exception: unstable = {} rows = Epidata.check(Epidata.fluview(location, weeks0, auth=auth)) stable = extract(rows, ['wili']) data = {} num_dropped = 0 for ew in signal.keys(): if ew == ew3: continue sig = signal[ew] if ew not in unstable: if valid and flu.delta_epiweeks(ew, ew3) <= 5: raise Exception('unstable wILI is not available on %d' % ew) if ew not in stable: num_dropped += 1 continue wili = stable[ew] else: wili = unstable[ew] data[ew] = {'x': sig, 'y': wili} if num_dropped: msg = 'warning: dropped %d/%d signal weeks because (w)ILI was unavailable' print(msg % (num_dropped, len(signal))) return get_training_set_data(data) def dot(*Ms): """ Simple function to compute the dot product for any number of arguments. """ N = Ms[0] for M in Ms[1:]: N = np.dot(N, M) return N def get_weight(ew1, ew2): """ This function gives the weight between two given epiweeks based on a function that: - drops sharply over the most recent ~3 weeks - falls off exponentially with time - puts extra emphasis on the past weeks at the same time of year (seasonality) - gives no week a weight of zero """ dw = flu.delta_epiweeks(ew1, ew2) yr = 52.2 hl1, hl2, bw = yr, 1, 4 a = 0.05 #b = (np.cos(2 * np.pi * (dw / yr)) + 1) / 2 b = np.exp(-((min(dw % yr, yr - dw % yr) / bw) ** 2)) c = 2 ** -(dw / hl1) d = 1 - 2 ** -(dw / hl2) return (a + (1 - a) * b) * c * d def get_periodic_bias(epiweek): weeks_per_year = 52.2 offset = flu.delta_epiweeks(200001, epiweek) % weeks_per_year angle = np.pi * 2 * offset / weeks_per_year return [np.sin(angle), np.cos(angle)] def apply_model(epiweek, beta, values): bias0 = [1.] if beta.shape[0] > len(values) + 1: # constant and periodic bias bias1 = get_periodic_bias(epiweek) obs = np.array([values + bias0 + bias1]) else: # constant bias only obs = np.array([values + bias0]) return float(dot(obs, beta)) def get_model(ew2, epiweeks, X, Y): ne, nx1, nx2, ny = len(epiweeks), len(X), len(X[0]), len(Y) if ne != nx1 or nx1 != ny: raise Exception('length mismatch e=%d X=%d Y=%d' % (ne, nx1, ny)) weights = np.diag([get_weight(ew1, ew2) for ew1 in epiweeks]) X = np.array(X).reshape((nx1, nx2)) Y = np.array(Y).reshape((ny, 1)) bias0 = np.ones(Y.shape) if ne >= 26 and flu.delta_epiweeks(epiweeks[0], epiweeks[-1]) >= 52: # constant and periodic bias bias1 = np.array([get_periodic_bias(ew) for ew in epiweeks]) X = np.hstack((X, bias0, bias1)) else: # constant bias only X = np.hstack((X, bias0)) XtXi = np.linalg.inv(dot(X.T, weights, X)) XtY = dot(X.T, weights, Y) return np.dot(XtXi, XtY) if type(fields) == str: fields = [fields] ew1, ew2, ew3, weeks0, weeks1 = get_weeks(epiweek) rows = Epidata.check(fetch(weeks1)) signal = extract(rows, fields) min_rows = 3 + len(fields) if ew3 not in signal: raise Exception('%s unavailable on %d' % (name, ew3)) if len(signal) < min_rows: raise Exception('%s available less than %d weeks' % (name, min_rows)) epiweeks, X, Y = get_training_set(location, epiweek, signal, valid) min_rows = min_rows - 1 if len(Y) < min_rows: raise Exception('(w)ILI available less than %d weeks' % (min_rows)) model = get_model(ew3, epiweeks, X, Y) value = apply_model(ew3, model, signal[ew3]) return value
def get_epic(location, epiweek, valid): fc = Epidata.check(Epidata.delphi('ec', epiweek))[0] return fc['forecast']['data'][location]['x1']['point']
def fit_loch_ness(location, epiweek, name, fields, fetch, valid, target, signal_to_truth_ew_shift=0): # target_type is added for compatibility for other type of targets such as datasetname data # Helper functions def get_weeks(epiweek): ew1 = 199301 ew2 = epiweek ew3 = flu.add_epiweeks(epiweek, 1) weeks0 = Epidata.range(ew1, ew2) weeks1 = Epidata.range(ew1, ew3) return (ew1, ew2, ew3, weeks0, weeks1) def extract(rows, fields, signal_to_truth_ew_shift): data = {} for row in rows: data[flu.add_epiweeks(row['epiweek'], signal_to_truth_ew_shift)] = [ float(row[f]) for f in fields ] return data def get_training_set_data(data): epiweeks = sorted(list(data.keys())) X = [data[ew]['x'] for ew in epiweeks] Y = [data[ew]['y'] for ew in epiweeks] return (epiweeks, X, Y) def get_training_set_datasetname(location, epiweek, signal, target, signal_to_truth_ew_shift): ew1, ew2, ew3, weeks0, weeks1 = get_weeks(epiweek) groundTruth = dict() auth = secrets.api.datasetname_targets datasetnameData = Epidata.check( Epidata.datasetname_targets(auth, target, location, weeks0)) for row in datasetnameData: groundTruth[row['epiweek']] = row['value'] data = {} dropped_weeks = 0 for signal_week in signal.keys(): ground_truth_week = flu.add_epiweeks(signal_week, signal_to_truth_ew_shift) # skip the week we're trying to predict if ground_truth_week == ew3: continue sig = signal[signal_week] if ground_truth_week in groundTruth: label = groundTruth[ground_truth_week] else: dropped_weeks += 1 continue data[ground_truth_week] = {'x': sig, 'y': label} if dropped_weeks: msg = 'warning: dropped %d/%d signal weeks because ground truth / target was unavailable' print(msg % (dropped_weeks, len(signal))) epiweeks = sorted(list(data.keys())) X = [data[week]['x'] for week in epiweeks] Y = [data[week]['y'] for week in epiweeks] return (epiweeks, X, Y) def dot(*Ms): """ Simple function to compute the dot product for any number of arguments. """ N = Ms[0] for M in Ms[1:]: N = np.dot(N, M) return N def get_weight(ew1, ew2): """ This function gives the weight between two given epiweeks based on a function that: - drops sharply over the most recent ~3 weeks - falls off exponentially with time - puts extra emphasis on the past weeks at the same time of year (seasonality) - gives no week a weight of zero """ dw = flu.delta_epiweeks(ew1, ew2) yr = 52.2 hl1, hl2, bw = yr, 1, 4 a = 0.05 # b = (np.cos(2 * np.pi * (dw / yr)) + 1) / 2 b = np.exp(-((min(dw % yr, yr - dw % yr) / bw)**2)) c = 2**-(dw / hl1) d = 1 - 2**-(dw / hl2) return (a + (1 - a) * b) * c * d def get_periodic_bias(epiweek): weeks_per_year = 52.2 offset = flu.delta_epiweeks(200001, epiweek) % weeks_per_year angle = np.pi * 2 * offset / weeks_per_year return [np.sin(angle), np.cos(angle)] def apply_model(epiweek, beta, values): bias0 = [1.] if beta.shape[0] > len(values) + 1: # constant and periodic bias bias1 = get_periodic_bias(epiweek) obs = np.array([values + bias0 + bias1]) else: # constant bias only obs = np.array([values + bias0]) return float(dot(obs, beta)) def get_model(ew2, epiweeks, X, Y): ne, nx1, nx2, ny = len(epiweeks), len(X), len(X[0]), len(Y) if ne != nx1 or nx1 != ny: raise Exception('length mismatch e=%d X=%d Y=%d' % (ne, nx1, ny)) weights = np.diag([get_weight(ew1, ew2) for ew1 in epiweeks]) X = np.array(X).reshape((nx1, nx2)) Y = np.array(Y).reshape((ny, 1)) bias0 = np.ones(Y.shape) if ne >= 26 and flu.delta_epiweeks(epiweeks[0], epiweeks[-1]) >= 52: # constant and periodic bias bias1 = np.array([get_periodic_bias(ew) for ew in epiweeks]) X = np.hstack((X, bias0, bias1)) else: # constant bias only X = np.hstack((X, bias0)) XtXi = np.linalg.inv(dot(X.T, weights, X)) XtY = dot(X.T, weights, Y) return np.dot(XtXi, XtY) if type(fields) == str: fields = [fields] ew1, ew2, ew3, weeks0, weeks1 = get_weeks(epiweek) rows = Epidata.check(fetch(weeks1)) signal = extract(rows, fields, signal_to_truth_ew_shift) # rule of thumb: require num training instances >= 10x num features and >= 52 min_rows = max(10 * len(fields), 52) if ew3 not in signal: raise Exception('%s unavailable on %d' % (name, ew3)) if len(signal) < min_rows: raise Exception('%s available less than %d weeks' % (name, min_rows)) epiweeks, X, Y = get_training_set_datasetname( location, epiweek, signal, target, signal_to_truth_ew_shift) min_rows = min_rows - 1 if len(Y) < min_rows: raise Exception( 'datasetname_targets available less than %d weeks' % (min_rows)) model = get_model(ew3, epiweeks, X, Y) value = apply_model(ew3, model, signal[ew3]) return value
def get_most_recent_issue(): # search for FluView issues within the last 10 weeks ew2 = EpiDate.today().get_ew() ew1 = flu.add_epiweeks(ew2, -9) rows = Epidata.check(Epidata.fluview('nat', Epidata.range(ew1, ew2))) return max([row['issue'] for row in rows])
def fit_loch_ness(location, epiweek, name, field, fetch, valid, target): # target_type is added for compatibility for other type of targets such as norovirus data # Helper functions def get_weeks(epiweek): ew1 = 201401 ew2 = epiweek ew3 = flu.add_epiweeks(epiweek, 1) weeks0 = Epidata.range(ew1, ew2) weeks1 = Epidata.range(ew1, ew3) return (ew1, ew2, ew3, weeks0, weeks1) def extract(rows, field, to_weekly=False): data = {} for row in rows: data[row['epiweek']] = float(row[field]) if not to_weekly: return data else: w_data = cum_to_week(data) return w_data def get_training_set_data(data): epiweeks = sorted(list(data.keys())) X = [data[ew]['x'] for ew in epiweeks] Y = [data[ew]['y'] for ew in epiweeks] return (epiweeks, X, Y) def get_training_set(location, epiweek, signal, valid): ew1, ew2, ew3, weeks0, weeks1 = get_weeks(epiweek) result = Epidata.paho_dengue(location, weeks0) rows = Epidata.check(result) stable = extract(rows, 'num_dengue', to_weekly=True) data = {} for ew in signal.keys(): if ew == ew3 or ew not in stable: continue sig = signal[ew] num_dengue = stable[ew] data[ew] = {'x': sig, 'y': num_dengue} return get_training_set_data(data) def dot(*Ms): """ Simple function to compute the dot product for any number of arguments. """ N = Ms[0] for M in Ms[1:]: N = np.dot(N, M) return N def get_weight(ew1, ew2): """ This function gives the weight between two given epiweeks based on a function that: - drops sharply over the most recent ~3 weeks - falls off exponentially with time - puts extra emphasis on the past weeks at the same time of year (seasonality) - gives no week a weight of zero """ dw = flu.delta_epiweeks(ew1, ew2) yr = 52.2 hl1, hl2, bw = yr, 1, 4 a = 0.05 # b = (np.cos(2 * np.pi * (dw / yr)) + 1) / 2 b = np.exp(-((min(dw % yr, yr - dw % yr) / bw) ** 2)) c = 2 ** -(dw / hl1) d = 1 - 2 ** -(dw / hl2) return (a + (1 - a) * b) * c * d def get_periodic_bias(epiweek): weeks_per_year = 52.2 offset = flu.delta_epiweeks(201401, epiweek) % weeks_per_year angle = np.pi * 2 * offset / weeks_per_year return [np.sin(angle), np.cos(angle)] def apply_model(epiweek, beta, values): bias0 = [1.] if beta.shape[0] > len(values) + 1: # constant and periodic bias bias1 = get_periodic_bias(epiweek) obs = np.array([values + bias0 + bias1]) else: # constant bias only obs = np.array([values + bias0]) return float(dot(obs, beta)) def get_model(ew2, epiweeks, X, Y): ne, nx1, ny = len(epiweeks), len(X), len(Y) if type(X[0]) == type([]): nx2 = len(X[0]) else: nx2 = 1 if ne != nx1 or nx1 != ny: raise Exception('length mismatch e=%d X=%d Y=%d' % (ne, nx1, ny)) weights = np.diag([get_weight(ew1, ew2) for ew1 in epiweeks]) X = np.array(X).reshape((nx1, nx2)) Y = np.array(Y).reshape((ny, 1)) bias0 = np.ones(Y.shape) if ne >= 26 and flu.delta_epiweeks(epiweeks[0], epiweeks[-1]) >= 52: # constant and periodic bias bias1 = np.array([get_periodic_bias(ew) for ew in epiweeks]) X = np.hstack((X, bias0, bias1)) else: # constant bias only X = np.hstack((X, bias0)) XtXi = np.linalg.inv(dot(X.T, weights, X)) XtY = dot(X.T, weights, Y) return np.dot(XtXi, XtY) ew1, ew2, ew3, weeks0, weeks1 = get_weeks(epiweek) rows = Epidata.check(fetch(weeks1)) signal = extract(rows, field) min_rows = 4 if ew3 not in signal: raise Exception('%s unavailable on %d' % (name, ew3)) if len(signal) < min_rows: raise Exception('%s available less than %d weeks' % (name, min_rows)) epiweeks, X, Y = get_training_set(location, epiweek, signal, target) min_rows = min_rows - 1 if len(Y) < min_rows: raise Exception('paho_dengue available less than %d weeks' % (min_rows)) model = get_model(ew3, epiweeks, X, Y) value = apply_model(ew3, model, [signal[ew3]]) return value
def nowcast(epiweek, epidata_cache=None): si = StateInfo() # all sensors and locations all_names, all_loc = get_all_sensors() # get sensors available on the target week rows = Epidata.check( Epidata.sensors(secrets.api.sensors, all_names, all_loc, epiweek)) present = {} for row in rows: name, loc, value = row['name'], row['location'], row['value'] if name not in present: present[name] = {} if loc not in present[name]: present[name][loc] = value # get the history of each available sensor (6 sec) past = {} sensor_locs = set() missing = set() past_weeks = Epidata.range(FIRST_DATA_EPIWEEK, flu.add_epiweeks(epiweek, -1)) all_epiweeks = [ w for w in flu.range_epiweeks( past_weeks['from'], past_weeks['to'], inclusive=True) ] num_obs = len(all_epiweeks) for name in present.keys(): past[name] = {} for loc in present[name].keys(): past[name][loc] = {} sensor_locs |= set([loc]) #print(name, loc) try: if epidata_cache is not None: rows = epidata_cache.sensors(name, loc, past_weeks) else: rows = Epidata.check( Epidata.sensors(secrets.api.sensors, name, loc, past_weeks)) if len(rows) < 2: raise Exception() for row in rows: past[name][loc][row['epiweek']] = row['value'] except: missing |= set([(name, loc)]) # remove sensors with zero past data for (n, l) in missing: del present[n][l] if len(present[n]) == 0: del present[n] del past[n][l] if len(past[n]) == 0: del past[n] #print(n, l, 'is missing') # inventory all_sensors = [] for n in all_names: for l in si.nat + si.hhs + si.cen + si.sta: if n in past and l in past[n]: all_sensors.append((n, l)) #print(all_sensors) num_sensors = len(all_sensors) # get historical ground truth for each sensor (4 sec) truth = {} auth = secrets.api.fluview for loc in sensor_locs: truth[loc] = {} if epidata_cache is not None: srows = epidata_cache.fluview(loc, past_weeks) else: srows = Epidata.check(Epidata.fluview(loc, past_weeks, auth=auth)) sdata = dict([(r['epiweek'], r) for r in srows]) udata = {} try: i = past_weeks['to'] result = Epidata.fluview(loc, past_weeks, issues=i, auth=auth) urows = Epidata.check(result) udata = dict([(r['epiweek'], r) for r in urows]) except: pass rows = [] for ew in all_epiweeks: if ew in udata: rows.append(udata[ew]) else: rows.append(sdata[ew]) for row in rows: truth[loc][row['epiweek']] = row['wili'] # rows are epiweeks, cols are sensors X = np.zeros((num_obs, num_sensors)) * np.nan for (r, ew) in enumerate(all_epiweeks): for (c, (name, loc)) in enumerate(all_sensors): if name in past and loc in past[name] and ew in past[name][ loc] and loc in truth and ew in truth[loc]: X[r, c] = past[name][loc][ew] - truth[loc][ew] # sparse precision matrix Ri = Fusion.precision(X, mean=np.zeros((1, num_sensors)), b=0.25) # prepare for sensor fusion inputs = all_sensors state = si.sta outputs = si.nat + si.hhs + si.cen + si.sta num_i, num_s, num_o = len(inputs), len(state), len(outputs) # input (z): [ num_i x 1 ] # state (x): [ num_s x 1 ] # output (y): [ num_o x 1 ] # S->I (H): [ num_i x num_s ] # S->O (W): [ num_o x num_s ] z = np.array([present[n][l] for (n, l) in inputs]).reshape((num_i, 1)) H = np.zeros((num_i, num_s)) W = np.zeros((num_o, num_s)) # populate H, given input signals for (row, (name, location)) in enumerate(inputs): for (col, loc) in enumerate(state): if loc in si.within[location]: H[row, col] = si.weight[location][loc] if np.linalg.matrix_rank(np.dot(H.T, H)) != num_s: raise Exception('H is singluar') if not np.allclose(np.sum(H, axis=1), 1): raise Exception('H rows do not sum to 1') # populate W, given output locations for (row, location) in enumerate(outputs): for (col, loc) in enumerate(state): if loc in si.within[location]: W[row, col] = si.weight[location][loc] if not np.allclose(np.sum(W, axis=1), 1): raise Exception('W rows do not sum to 1') # sensor fusion x, P = Fusion.fuse(z, Ri, H) y, S = Fusion.extract(x, P, W) print(num_obs, num_i, num_s, num_o) pt = [float(v) for v in y.flatten()] std = [float(v) for v in np.sqrt(S).flatten()] return (outputs, pt, std)