Ejemplo n.º 1
0
 def _get_features(self,
                   ew,
                   signal_to_truth_shift=0,
                   valid=False,
                   mask=np.ones((10), dtype=bool)):
     X = np.zeros((1, 10))
     i = self.ew2i[ew]
     X[0, 0] = 1
     for lag in range(3):
         if valid and not self.valid[i - lag][lag]:
             w = self.i2ew[i - lag]
             raise Exception('missing unstable wILI (ew=%d|lag=%d)' %
                             (w, lag))
         try:
             X[0, 1 + lag] = np.log(
                 np.maximum(
                     0.01,
                     self.data[i - lag - signal_to_truth_shift]['stable']))
         except Exception:
             X[0, 1 + lag] = np.nan
     for holiday in range(4):
         if EW.split_epiweek(EW.add_epiweeks(ew, holiday))[1] == 1:
             X[0, 4 + holiday] = 1
     y, w = EW.split_epiweek(ew)
     N = EW.get_num_weeks(y)
     offset = np.pi * 2 * w / N
     X[0, 8] = np.sin(offset)
     X[0, 9] = np.cos(offset)
     # todo linear time trend covariate?
     return X[:, mask]
Ejemplo n.º 2
0
 def _get_features(self, ew, valid=True):
   X = np.zeros((1, 7))
   i = self.ew2i[ew]
   X[0, 0] = 1
   for holiday in range(4):
     if EW.split_epiweek(EW.add_epiweeks(ew, holiday))[1] == 1:
       X[0, 1 + holiday] = 1
   y, w = EW.split_epiweek(ew)
   N = EW.get_num_weeks(y)
   offset = np.pi * 2 * w / N
   X[0, 5] = np.sin(offset)
   X[0, 6] = np.cos(offset)
   # todo linear time trend covariate?
   return X
Ejemplo n.º 3
0
 def _get_partial_trajectory(self, epiweek, valid=True):
   y, w = EW.split_epiweek(epiweek)
   if w < 30:
     y -= 1
   ew1 = EW.join_epiweek(y, 30)
   ew2 = epiweek
   limit = EW.add_epiweeks(ew2, -5)
   weeks = Epidata.range(ew1, ew2)
   stable = Epidata.check(Epidata.fluview(self.region, weeks))
   try:
     unstable = Epidata.check(Epidata.fluview(self.region, weeks, issues=ew2))
   except:
     unstable = []
   wili = {}
   for row in stable:
     ew, value = row['epiweek'], row['wili']
     if not valid or ew < limit:
       wili[ew] = value
   for row in unstable:
     ew, value = row['epiweek'], row['wili']
     wili[ew] = value
   curve = []
   for ew in EW.range_epiweeks(ew1, ew2, inclusive=True):
     if ew not in wili:
       if valid:
         t = 'unstable'
       else:
         t = 'any'
       raise Exception('wILI (%s) not available for week %d' % (t, ew))
     curve.append(wili[ew])
   n1 = EW.delta_epiweeks(ew1, ew2) + 1
   n2 = len(curve)
   if n1 != n2:
     raise Exception('missing data (expected %d, found %d)' % (n1, n2))
   return curve
Ejemplo n.º 4
0
 def __init__(self, region):
   self.region = region
   weeks = Epidata.range(200330, 202330)
   rows = Epidata.check(Epidata.fluview(self.region, weeks))
   self.seasons = {}
   for row in rows:
     ew, wili = row['epiweek'], row['wili']
     y, w = EW.split_epiweek(ew)
     if w < 30:
       y -= 1
     i = EW.delta_epiweeks(EW.join_epiweek(y, 30), ew)
     if y not in self.seasons:
       self.seasons[y] = {}
     if 0 <= i < 52:
       self.seasons[y][i] = wili
   years = sorted(list(self.seasons.keys()))
   for year in years:
     if len(self.seasons[year]) != 52:
       del self.seasons[year]
   if 2008 in self.seasons and 2009 in self.seasons:
     for i in range(40, 52):
       self.seasons[2008][i] = self.seasons[2009][i]
     del self.seasons[2009]
   curve = lambda y: [self.seasons[y][i] for i in range(52)]
   self.years = sorted(list(self.seasons.keys()))
   self.curves = dict([(y, curve(y)) for y in self.years])
Ejemplo n.º 5
0
 def _ew2date(ew):
     # parse the epiweek
     year, week = flu.split_epiweek(ew)
     # get the date object (middle of the week; Wednesday)
     date = EpiDate.from_epiweek(year, week)
     # go to the first day of the week (Sunday)
     date = date.add_days(-3)
     # date as string
     return str(date)
Ejemplo n.º 6
0
 def _get_features(self, ew, valid=True):
   X = np.zeros((1, 10))
   i = self.ew2i[ew]
   X[0, 0] = 1
   for lag in range(3):
     if valid and not self.valid[i - lag][lag]:
       w = self.i2ew[i - lag]
       raise Exception('missing unstable %s (ew=%d|lag=%d)' % (self.target, w, lag))
     X[0, 1 + lag] = self.data[i - lag][lag]
   for holiday in range(4):
     if EW.split_epiweek(EW.add_epiweeks(ew, holiday))[1] == 1:
       X[0, 4 + holiday] = 1
   y, w = EW.split_epiweek(ew)
   N = EW.get_num_weeks(y)
   offset = np.pi * 2 * w / N
   X[0, 8] = np.sin(offset)
   X[0, 9] = np.cos(offset)
   return X
Ejemplo n.º 7
0
 def predict(self, epiweek, train=True, valid=True):
   if train:
     self.train(epiweek)
   if self.training_week > epiweek:
     raise Exception('trained on future data')
   y, w = EW.split_epiweek(epiweek)
   #if 30 <= w < 40:
   #  return float(self.model.mean[w - 30])
   #if 20 < w < 30:
   #  return float(self.model.mean[-(30 - w)])
   if 20 <= w < 39:
     raise Exception('no prediction on weeks 21--39')
   curve = self._get_partial_trajectory(epiweek, valid=valid)
   arch = self._fit(curve)
   return float(arch[len(curve)])
Ejemplo n.º 8
0
 def _train(self, region):
     if region in self.bf_var:
         # already trained
         return
     if len(region) == 2:
         # TODO: this is a hack for state ILI
         # assume backfill of region 4
         print('FIXME: setting backfill for %s as hhs4' % region)
         self.bf_var[region] = self.bf_var['hhs4']
         self.emp_mean[region] = self.emp_mean['hhs4']
         self.emp_var[region] = self.emp_var['hhs4']
         self.emp_curves[region] = self.emp_curves['hhs4']
         return
     stable = self._get_stable(region)
     start_weeks = [flu.get_season(ew)[0] for ew in stable.keys()]
     curves = []
     seasons = set(
         [flu.split_epiweek(ew)[0] for ew in start_weeks if ew is not None])
     for s in seasons:
         ew1 = flu.join_epiweek(s + 0, 40)
         if self.forecast_type == ForecastType.WILI:
             ew2 = flu.add_epiweeks(ew1, 37)
         else:
             ew2 = flu.add_epiweeks(ew1, 29)
         # print("stable: ", stable)
         # print("range_epiweeks: ", [i for i in flu.range_epiweeks(ew1, ew2)])
         curve = [stable[ew] for ew in flu.range_epiweeks(ew1, ew2)]
         curves.append(curve)
     self.emp_mean[region] = np.mean(curves, axis=0)
     self.emp_var[region] = np.var(curves, axis=0, ddof=1)
     self.emp_curves[region] = curves
     if self.backfill_weeks is None:
         self.bf_var[region] = [0]
     else:
         self.bf_var[region] = []
         for lag in range(self.backfill_weeks):
             unstable = self._get_unstable(region, lag)
             changes = [
                 stable[ew] - unstable[ew]
                 for ew in stable.keys() & unstable.keys()
             ]
             if len(changes) < 2:
                 raise Exception('not enough data')
             self.bf_var[region].append(np.var(changes, ddof=1))
     print(
         ' %5s: %s' %
         (region, ' '.join(['%.3f' % (b**0.5)
                            for b in self.bf_var[region]])))
Ejemplo n.º 9
0
  def extract_epiweek_and_team(filename):
    """
    Extract the submission epiweek (epiweek of most recently published report)
    and the team name from the file name of a flu contest submission.

    The return value is a tuple of:
      1. the submission epiweek (e.g. 201751)
      2. the team name (e.g. "delphi-epicast")
    """

    # this is the naming convention for 2017 flu contest submissions
    pattern = re.compile('^EW(\\d{2})-(.*)-(\\d{4})-(\\d{2})-(\\d{2}).csv$')
    match = pattern.match(os.path.basename(filename))
    if match is None:
      # only able to parse this specific naming convention
      raise Exception()

    week = int(match.group(1))
    team = match.group(2)
    year = int(match.group(3))
    month = int(match.group(4))
    day = int(match.group(5))
    epiweek = EpiDate(year, month, day).get_ew()

    # We know the week number, but the year has to be inferred from the
    # submission date. Since the week of submission is never less than the week
    # of the most recent report, we can step backwards from the week of
    # submission until we find the expected week number. Ordinarily, this will
    # take exactly two steps. For example, data collected on 2017w51 is
    # reported on 2017w52, and our forecast is submitted on 2018w01; so we
    # start with 2018w01 and step backwards until find the first week 51, which
    # is 2017w51.
    if not 1 <= week <= 53:
      # prevent an infinite loop
      raise Exception('invalid week number: %d' % week)
    while Epiweek.split_epiweek(epiweek)[1] != week:
      epiweek = Epiweek.add_epiweeks(epiweek, -1)

    return epiweek, team
def cum_to_week(data):
    epiweeks = list(data.keys())
    all_epiweeks = list(
        EW.range_epiweeks(min(epiweeks), max(epiweeks), inclusive=True))

    result = np.zeros((len(all_epiweeks)))
    last_valid = (-1, 0)  # (idx, value)
    for i in range(len(result)):
        ew = all_epiweeks[i]
        if ew in data:
            if data[all_epiweeks[i]] is not None:
                result[last_valid[0] + 1:i +
                       1] = (data[ew] - last_valid[1]) / float(
                           i -
                           last_valid[0])  # Evenly distribute missing counts
                last_valid = (i, data[ew])
        yr, wk = EW.split_epiweek(all_epiweeks[i])
        if EW.get_num_weeks(yr) == wk:
            result[
                last_valid[0] + 1:i +
                1] = 0  # Fill rest of year with 0s, not getting this information
            last_valid = (i, 0)  # Start new year at 0
    return {all_epiweeks[i]: result[i] for i in range(len(all_epiweeks))}
Ejemplo n.º 11
0
  def load_csv(filename):
    timestamp = None
    epiweek, team = ForecastIO.extract_epiweek_and_team(filename)

    season_start = Epiweek.get_season(epiweek)[0]
    season = Epiweek.split_epiweek(season_start)[0]

    forecast = Forecast(season, timestamp, team, epiweek)

    # the default column layout
    # can be updated based on header row
    canonical_fields = [f.lower() for f in (
      'Location', 'Target', 'Type', 'Unit', 'Bin_start_incl',
      'Bin_end_notincl', 'Value'
    )]
    field_to_column = dict(zip(canonical_fields, range(len(canonical_fields))))

    # read the csv one row at a time
    with open(filename, 'r', newline='') as f:
      reader = csv.reader(f)
      for row in reader:
        # skip header row(s)
        fields = [f.lower() for f in row]
        if 'location' in fields:
          # update the field-to-column-index mapping
          field_to_column = dict(zip(fields, range(len(fields))))
          continue

        # extract values
        values = [row[field_to_column[f]] for f in canonical_fields]
        location, target, type_, unit, start, end, value = values

        # update forecast
        ForecastIO.__load_row(forecast, location, target, type_, start, value)

    # return the group of forecasts per location
    return forecast
Ejemplo n.º 12
0
def load_submission(file, system=None, epiweek=None, insane=False, test=False, verbose=False):
  # logging
  log = print if verbose else lambda x: x

  # load the forecast
  log('loading %s...' % file)
  fc = ForecastIO.load_csv(file)
  log(' forecast file parsed')

  # set the system (team) name
  if system is None:
    systems = [
      ('ec', 'delphi-epicast'),
      ('eb', 'delphi-eb'),
      ('sp', 'delphi-spline'),
      ('st', 'delphi-stat'),
      ('af', 'delphi-archefilter'),
    ]
    num_found = 0
    for (name, label) in systems:
      if label in fc.team.lower():
        num_found += 1
        system = name
    if num_found != 1:
      raise Exception('unrecognized system/team name')
  log(' system: %s' % system)

  # override epiweek
  if epiweek is not None and flu.check_epiweek(epiweek):
    fc.epiweek = epiweek
    fc.epiweek2 = flu.split_epiweek(epiweek)[1]
  epiweek = fc.epiweek
  log(' epiweek: %d' % epiweek)

  # sanity check
  if insane:
    log(' sanity check skipped')
  else:
    fc.sanity_check()
    log(' sanity check passed')

  # export JSON string
  fc_json = ForecastIO.export_json_delphi(fc)
  log(' forecast exported (%.1f KB)' % (len(fc_json) / 1024))

  # store forecast
  u, p = secrets.db.epi
  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
  log(' connected to database')
  sql = """
    INSERT INTO
      `forecasts` (`system`, `epiweek`, `json`)
    VALUES
      (%s, %s, %s)
    ON DUPLICATE KEY UPDATE
      `json` = %s
  """
  values = (system, epiweek, fc_json, fc_json)
  if test:
    log(' test mode, no commit')
  else:
    cur = cnx.cursor()
    cur.execute(sql, values)
    cur.close()
    cnx.commit()
    log(' forecast committed')

  # cleanup
  cnx.close()
  log('forecast loaded')
Ejemplo n.º 13
0
 def get_season(epiweek):
     """Return the first year of the season that contains the given epiweek."""
     year, week = split_epiweek(epiweek)
     if week < 40:
         year -= 1
     return year
Ejemplo n.º 14
0
    def plot(forecasts, prefix, fig_label=''):
        # timing
        epiweek = forecasts[0][0].epiweek
        ew0, ew1 = flu.get_season(epiweek)
        num_weeks = flu.delta_epiweeks(ew0, ew1) + 1
        year = flu.split_epiweek(ew0)[0]

        # plot settings
        x_ticks = [i for i in range(0, num_weeks, 3)]
        x_tick_labels = [
            '%02d' % ForecastIO.get_index_week(i) for i in x_ticks
        ]
        y_ticks = [i for i in range(0, 14, 2)]
        regions = ['nat'] + ['hhs%s' % i for i in range(1, 11)]

        # TODO: avoid hardcoding these values everywhere
        baseline_values_2019 = [
            2.4, 1.9, 3.2, 1.9, 2.4, 1.9, 3.8, 1.7, 2.7, 2.4, 1.5
        ]
        baselines = dict(
            (r, v) for (r, v) in zip(regions, baseline_values_2019))
        bin_size = forecasts[0][0].ili_bin_size

        # get the somewhat sorted list of all unique locations
        locations = []
        for info in forecasts:
            fc = info[0]
            for loc in fc.get_locations():
                if loc not in locations:
                    locations.append(loc)

        # plot each region
        for region in locations:

            # only consider forecasts that include this location
            region_forecasts = []
            for info in forecasts:
                if info[0].has_forecast(region):
                    region_forecasts.append(info)

            # center subplot
            plt.figure(figsize=(12, 12))
            ax2 = plt.subplot(3, 2, 3)
            if region in baselines:
                plt.axhline(baselines[region], color='#888888')
            weeks = [i for i in range(flu.delta_epiweeks(ew0, epiweek) + 1)]
            values = Plotter.get_unstable_wILI(region, ew0, epiweek)
            plt.plot(weeks, values, color='#000000', linewidth=2)
            weeks = [flu.delta_epiweeks(ew0, epiweek) + i for i in range(1, 5)]
            for (forecast, label, color) in region_forecasts:
                fc = forecast.get_forecast(region)
                values = [fc.get_lookahead(i)['point'] for i in range(1, 5)]
                plt.plot(weeks, values, color=color, linewidth=2)
            ax2.set_xbound(0, 33)
            ax2.set_ybound(0, 12)
            ax2.set_xticks(x_ticks)
            ax2.set_yticks(y_ticks)
            ax2.set_xticklabels(x_tick_labels)
            ax2.get_xaxis().set_tick_params(labelbottom='on', labeltop='on')
            ax2.get_yaxis().set_tick_params(labelleft='on', labelright='on')

            # top subplot: peakweek
            top = Plotter.weekly_subplot(region_forecasts, region,
                                         plt.subplot(3, 2, 1), ax2, False)

            # bottom subplot: onset
            bottom = Plotter.weekly_subplot(region_forecasts, region,
                                            plt.subplot(3, 2, 5), ax2, True)

            # right subplot: peakheight
            right = Plotter.wili_subplot(region_forecasts, region,
                                         plt.subplot(3, 2, 4), ax2, bin_size)

            # top-right subplot: legend
            leg = plt.subplot(3, 2, 2)
            for (forecast, label, color) in forecasts:
                plt.plot([0], [0], color=color, label=label)
            plt.legend(loc='lower left')

            # other stuff
            top.set_ylabel('Pr(Peak Week)')
            top.get_yaxis().set_label_position('right')
            bottom.set_ylabel('Pr(Onset Week)')
            bottom.get_yaxis().set_label_position('right')
            right.set_xlabel('Pr(Peak Height)')
            right.get_xaxis().set_label_position('top')
            ax2.set_ylabel('%s %s' % (fig_label, region.upper()))
            ax2.get_yaxis().set_label_position('left')

            # show the finished figure
            if prefix is None:
                plt.show()
                break
            else:
                filename = '%s_%s.png' % (prefix, region)
                plt.savefig(filename, bbox_inches='tight')
                print('saved %s' % filename)
Ejemplo n.º 15
0
    def forecast(self, epiweek):
        """
    `epiweek`: the most recent epiweek for which ILINet data is available
    """

        # sanity checks
        flu.check_epiweek(epiweek)
        season = flu.split_epiweek(flu.get_season(epiweek)[0])[0]
        week = flu.split_epiweek(epiweek)[1]
        first_epiweek = flu.join_epiweek(season, 40)
        offset = flu.delta_epiweeks(first_epiweek, epiweek)
        if season != self.test_season:
            raise Exception('unable to forecast season %d' % season)
        if 20 < week < 40:
            raise Exception('unable to forecast week %02d' % week)

        # initialize forecast
        forecast = Forecast(self.test_season, datetime.now(), self.name,
                            epiweek, self.forecast_type)

        # aliases for readability
        num_week_bins = forecast.season_length
        num_wili_bins = forecast.num_ili_bins
        wili_bin_size = forecast.ili_bin_size

        # if (forecast_type == ForecastType.HOSP):
        #     num_wili_bins = 601

        # uniform blending weights
        week_weight = self.min_week_prob * (num_week_bins + 1
                                            )  # include `none` "bin"
        wili_weight = self.min_wili_prob * num_wili_bins
        if week_weight > 1:
            raise Exception('`min_week_prob` is impossibly high')
        if wili_weight > 1:
            raise Exception('`min_wili_prob` is impossibly high')

        # forecast each region
        for region in self.locations:

            # draw sample curves
            curves = self._forecast(region, epiweek)

            # regional info
            if Locations.is_region(region):
                baseline = Targets.baselines[self.test_season][region]
            else:
                baseline = None

            # get all targets
            targets = [
                Targets.get_all_targets(c,
                                        baseline,
                                        offset,
                                        rule_season=self.test_season)
                for c in curves
            ]
            onsets = [t['onset'] for t in targets]
            peakweeks = [t['peakweek'] for t in targets]
            peaks = [t['peak'] for t in targets]
            x1s = [t['x1'] for t in targets]
            x2s = [t['x2'] for t in targets]
            x3s = [t['x3'] for t in targets]
            x4s = [t['x4'] for t in targets]

            # forecast each target
            allow_no_pw = self.test_season < 2016
            if Locations.is_region(region):
                # skip onset for states and hospitalization, and do it only for regions
                onset = self.forecast_weeks(first_epiweek, num_week_bins,
                                            onsets, week_weight,
                                            self.smooth_weeks_bw, True)

            peakweek = self.forecast_weeks(first_epiweek, num_week_bins,
                                           peakweeks, week_weight,
                                           self.smooth_weeks_bw, allow_no_pw)
            peak = self.forecast_wili(wili_bin_size, num_wili_bins, peaks,
                                      wili_weight, self.smooth_wili_bw)
            x1 = self.forecast_wili(wili_bin_size, num_wili_bins, x1s,
                                    wili_weight, self.smooth_wili_bw)
            x2 = self.forecast_wili(wili_bin_size, num_wili_bins, x2s,
                                    wili_weight, self.smooth_wili_bw)
            x3 = self.forecast_wili(wili_bin_size, num_wili_bins, x3s,
                                    wili_weight, self.smooth_wili_bw)
            x4 = self.forecast_wili(wili_bin_size, num_wili_bins, x4s,
                                    wili_weight, self.smooth_wili_bw)

            # fill in the forecast data
            fc = forecast.get_or_create_forecast(region)
            if Locations.is_region(region):
                fc.set_onset(*onset)
            fc.set_peakweek(*peakweek)
            fc.set_peak(*peak)
            fc.set_lookahead(1, *x1)
            fc.set_lookahead(2, *x2)
            fc.set_lookahead(3, *x3)
            fc.set_lookahead(4, *x4)

        # sanity check completed forecast
        forecast.sanity_check()
        return forecast