def main():
  # args and usage
  parser = argparse.ArgumentParser()
  parser.add_argument('apikey', action='store', type=str, default=None, help='API key')
  parser.add_argument('startweek', action='store', type=int, default=None, help='first week (ex: 201440)')
  parser.add_argument('endweek', action='store', type=int, default=None, help='last week (ex: 201520)')
  parser.add_argument('location', action='store', type=str, default=None, help='location (ex: US)')
  parser.add_argument('term', action='store', type=str, default=None, help='term/query/topic (ex: /m/0cycc)')
  args = parser.parse_args()

  # get the data
  ght = GHT(args.apikey)
  result = ght.get_data(args.startweek, args.endweek, args.location, args.term)
  values = result['values']

  # sanity check
  expected_weeks = result['num_weeks']
  received_weeks = len([v for v in values if v is not None and type(v) == float and v >= 0])
  if expected_weeks != received_weeks:
    raise Exception('expected %d weeks, received %d' % (expected_weeks, received_weeks))

  # results
  epiweeks = [ew for ew in flu.range_epiweeks(args.startweek, args.endweek, inclusive=True)]
  for (epiweek, value) in zip(epiweeks, values):
    print('%6d: %.3f' % (epiweek, value))
Beispiel #2
0
  def update(self, sensors, first_week, last_week):
    """
    Compute sensor readings and store them in the database.
    """

    # most recent issue
    if last_week is None:
      last_issue = get_most_recent_issue(self.epidata)
      last_week = flu.add_epiweeks(last_issue, +1)

    # connect
    with self.database as database:

      # update each sensor
      for (name, loc) in sensors:

        # update each location
        for location in get_location_list(loc):

          # timing
          ew1 = first_week
          if ew1 is None:
            ew1 = database.get_most_recent_epiweek(name, location)
            if ew1 is None:
              # If an existing sensor reading wasn't found in the database and
              # no start week was given, just assume that readings should start
              # at 2014w01.
              ew1 = 201401
              print('%s-%s not found, starting at %d' % (name, location, ew1))

          args = (name, location, ew1, last_week)
          print('Updating %s-%s from %d to %d.' % args)
          for test_week in flu.range_epiweeks(ew1, last_week, inclusive=True):
            self.update_single(database, test_week, name, location)
Beispiel #3
0
 def _get_partial_trajectory(self, epiweek, valid=True):
   y, w = EW.split_epiweek(epiweek)
   if w < 30:
     y -= 1
   ew1 = EW.join_epiweek(y, 30)
   ew2 = epiweek
   limit = EW.add_epiweeks(ew2, -5)
   weeks = Epidata.range(ew1, ew2)
   stable = Epidata.check(Epidata.fluview(self.region, weeks))
   try:
     unstable = Epidata.check(Epidata.fluview(self.region, weeks, issues=ew2))
   except:
     unstable = []
   wili = {}
   for row in stable:
     ew, value = row['epiweek'], row['wili']
     if not valid or ew < limit:
       wili[ew] = value
   for row in unstable:
     ew, value = row['epiweek'], row['wili']
     wili[ew] = value
   curve = []
   for ew in EW.range_epiweeks(ew1, ew2, inclusive=True):
     if ew not in wili:
       if valid:
         t = 'unstable'
       else:
         t = 'any'
       raise Exception('wILI (%s) not available for week %d' % (t, ew))
     curve.append(wili[ew])
   n1 = EW.delta_epiweeks(ew1, ew2) + 1
   n2 = len(curve)
   if n1 != n2:
     raise Exception('missing data (expected %d, found %d)' % (n1, n2))
   return curve
Beispiel #4
0
def update(ew1, ew2, test_mode=False):
    # init
    si = StateInfo()
    sql = '''
    INSERT INTO
      `state_ili_imputed` (`epiweek`, `state`, `ili`)
    VALUES
      (%s, %s, %s)
    ON DUPLICATE KEY UPDATE
      `ili` = %s
  '''

    # connect
    u, p = secrets.db.epi
    cnx = mysql.connector.connect(user=u, password=p, database='epidata')
    cur = cnx.cursor()

    # get state ILI on each week
    for ew in flu.range_epiweeks(ew1, ew2, inclusive=True):
        print('epiweek:', ew)
        result = si.get_ili(ew)
        for state in si.sta:
            ili = result[state]
            if not (0 <= ili < 25):
                raise Exception('ILI for %s is %+.3f?' % (state, ili))
            print(' %s %.3f' % (state, ili))
            # upload
            if not test_mode:
                args = (ew, state, ili, ili)
                cur.execute(sql, args)

    # disconnect
    cur.close()
    cnx.commit()
    cnx.close()
Beispiel #5
0
def update(sensors, first_week=None, last_week=None, valid=False, test_mode=False):
  # most recent issue
  last_issue = get_most_recent_issue()

  # location information
  loc_info = StateInfo()

  # connect
  u, p = secrets.db.epi
  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
  cur = cnx.cursor()

  # update each sensor
  for (name, loc) in sensors:
    if loc == 'hhs':
      locations = loc_info.hhs
    elif loc == 'cen':
      locations = loc_info.cen
    elif loc == 'state' or loc == 'sta':
      locations = loc_info.sta
    else:
      locations = [loc]
    # update each location
    print(locations)
    for location in locations:
      # timing
      ew1, ew2 = first_week, last_week
      if ew1 is None:
        ew1 = get_last_update(cur, name, location)
      if ew2 is None:
        ew2 = flu.add_epiweeks(last_issue, +1)
      print('Updating %s-%s from %d to %d.' % (name, location, ew1, ew2))
      for test_week in flu.range_epiweeks(ew1, ew2, inclusive=True):
        train_week = flu.add_epiweeks(test_week, -1)
        try:
          value = {
            'gft': get_gft,
            'ght': get_ght,
            'ghtj': get_ghtj,
            'twtr': get_twtr,
            'wiki': get_wiki,
            'cdc': get_cdc,
            'epic': get_epic,
            'sar3': get_sar3,
            'arch': get_arch,
            'quid': get_quid,
          }[name](location, train_week, valid)
          print(' %4s %5s %d -> %.3f' % (name, location, test_week, value))
          # upload
          store_value(cur, name, location, test_week, value)
        except Exception as ex:
          print(' failed: %4s %5s %d' % (name, location, test_week), ex)
          #raise ex
        sys.stdout.flush()

  # disconnect
  cur.close()
  if not test_mode:
    cnx.commit()
  cnx.close()
    def update(self, first_week, last_week):
        """Nowcast the given range of weeks and save the result to the database."""

        # update the week range if needed
        first_week, last_week = self.get_update_range(first_week, last_week)
        print('nowcasting %d--%d' % (first_week, last_week))

        # prefetch bulk data
        self.data_source.prefetch(last_week)

        # compute the nowcast(s)
        weeks = list(range_epiweeks(first_week, last_week, inclusive=True))
        nowcasts = Nowcast(self.data_source,
                           DatasetnameLocationMapper).batch_nowcast(weeks)

        # save to database
        with self.database as db:

            # save each nowcast
            for week, nowcast in zip(weeks, nowcasts):
                for location, value, stdev in nowcast:
                    db.insert(self.target, week, location, float(value),
                              float(stdev))

            # update the timestamp
            db.set_last_update_time(self.target)
Beispiel #7
0
 def __init__(self, region, target, use_weekly=True):
     self.region = region
     self.target = target
     self.stts = 0
     weeks = Epidata.range(201401, 202330)
     rx = Epidata.check(Epidata.paho_dengue(self.region, weeks))
     self.data = {}
     self.valid = {}
     self.ew2i, self.i2ew = {}, {}
     for ew in EW.range_epiweeks(weeks['from'], weeks['to'],
                                 inclusive=True):
         # if 200916 <= ew <= 201015:
         #   continue
         i = len(self.ew2i)
         self.ew2i[ew] = i
         self.i2ew[i] = ew
     epiweeks = list(map(lambda elt: elt['epiweek'], rx))
     values = list(map(lambda elt: elt[self.target], rx))
     data = {elt['epiweek']: elt[self.target] for elt in rx}
     w_data = cum_to_week(data)
     for i in range(len(rx)):
         ew, observation = epiweeks[i], w_data[epiweeks[i]]
         if ew not in self.ew2i:
             continue
         i = self.ew2i[ew]
         if i not in self.data:
             self.data[i] = {}
             self.valid[i] = {'stable': False}
         lag = 'stable'
         self.data[i][lag] = observation
         self.valid[i][lag] = True
     self.weeks = sorted(list(self.data.keys()))
     self.dds = DengueDataSource.new_instance(target)
Beispiel #8
0
 def __init__(self, region, target):
   self.region = region
   self.target = target
   weeks = Epidata.range(199301, 202330)
   auth = secrets.api.datasetname_targets
   rx = mutate_rows_as_if_lagged(Epidata.check(Epidata.datasetname_targets(auth, self.target, self.region, weeks)), 1000000)
   self.data = {}
   self.valid = {}
   self.ew2i, self.i2ew = {}, {}
   for ew in EW.range_epiweeks(weeks['from'], weeks['to'], inclusive=True):
     # if 200916 <= ew <= 201015:
     #   continue
     i = len(self.ew2i)
     self.ew2i[ew] = i
     self.i2ew[i] = ew
   for row in rx:
     ew, observation, lag = row['epiweek'], row['value'], row['lag']
     if ew not in self.ew2i:
       continue
     i = self.ew2i[ew]
     if i not in self.data:
       self.data[i] = {}
       self.valid[i] = {'stable': False}
     lag = 'stable'
     self.data[i][lag] = observation
     self.valid[i][lag] = True
   self.weeks = sorted(list(self.data.keys()))
   for i in self.weeks:
     if 'stable' not in self.data[i]:
       continue
Beispiel #9
0
def get_kcdc_data():
    issue = EpiDate.today().get_ew()
    last_season = issue // 100 + (1 if issue % 100 > 35 else 0)
    url = 'http://www.cdc.go.kr/npt/biz/npp/iss/influenzaListAjax.do'
    params = {
        'icdNm': 'influenza',
        'startYear': '2004',  # Started in 2004
        'endYear': str(last_season)
    }
    response = requests.post(url, params)
    datas = response.json()
    data = datas['data']
    ews = []
    ilis = []
    ew1 = 200436
    for year in range(2004, last_season):
        year_data = data[year - 2004]
        if year > 2004:
            ew1 = ews[-1] + 1
        ili_yr = year_data["VALUE"].split('`')
        ili_yr = [float(f) for f in ili_yr if f != '']
        ew2 = add_epiweeks(ew1, len(ili_yr))
        new_ews = list(range_epiweeks(ew1, ew2))
        for i in range(len(new_ews)):
            j = float(ili_yr[i])
            ilis.append(j)
            ews.append(new_ews[i])
    return ews, ilis
Beispiel #10
0
  def test_update(self):
    """Compute and store a nowcast."""

    database = MagicMock()
    database.__enter__.return_value = database
    database.__exit__.return_value = None
    data_source = MagicMock(
        get_truth_locations=lambda *a: ['nat', 'vi'],
        get_sensor_locations=lambda *a: ['nat', 'vi'],
        get_missing_locations=lambda *a: (),
        get_sensors=lambda *a: ['epic', 'sar3'],
        get_most_recent_issue=lambda *a: 201813,
        get_weeks=lambda *a: list(range_epiweeks(201713, 201814)),
        get_truth_value=lambda *a: random.random(),
        get_sensor_value=lambda *a: random.random(),
        prefetch=lambda *a: None)

    NowcastUpdate(database, data_source).update(201812, 201813)

    self.assertEqual(database.set_last_update_time.call_count, 1)
    self.assertEqual(database.insert.call_count, 4)

    epiweek_location_pairs = set()
    for args, kwargs in database.insert.call_args_list:
      epiweek_location_pairs.add(args[:2])

    self.assertIn((201812, 'nat'), epiweek_location_pairs)
    self.assertIn((201813, 'nat'), epiweek_location_pairs)
    self.assertIn((201812, 'vi'), epiweek_location_pairs)
    self.assertIn((201813, 'vi'), epiweek_location_pairs)
 def get_weeks(self):
     """Return a list of weeks on which truth and sensors are both available."""
     latest_week = EpiDate.today().get_ew()
     latest_week = add_epiweeks(latest_week, -1)
     week_range = range_epiweeks(self.FIRST_DATA_EPIWEEK,
                                 latest_week,
                                 inclusive=True)
     return list(week_range)
Beispiel #12
0
def extract(first_week=None, last_week=None, test_mode=False):
  # page title templates
  pages = [
    '%What You Should Know for the % Influenza Season%',
    '%What To Do If You Get Sick%',
    '%Flu Symptoms & Severity%',
    '%How Flu Spreads%',
    '%What You Should Know About Flu Antiviral Drugs%',
    '%Weekly US Map%',
    '%Basics%',
    '%Flu Activity & Surveillance%',
  ]

  # location information
  states = sorted(cdc_upload.STATES.values())

  # connect
  u, p = secrets.db.epi
  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
  cur = cnx.cursor()

  # weeks to update
  if first_week is None:
    cur.execute('SELECT max(`epiweek`) FROM `cdc_extract`')
    for (first_week,) in cur:
      pass
  if last_week is None:
    cur.execute('SELECT max(`epiweek`) FROM `cdc_meta`')
    for (last_week,) in cur:
      pass
  print('extracting %d--%d' % (first_week, last_week))

  # update each epiweek
  for epiweek in flu.range_epiweeks(first_week, last_week, inclusive=True):
    # update each state
    for state in states:
      try:
        num1 = get_num_hits(cur, epiweek, state, pages[0])
        num2 = get_num_hits(cur, epiweek, state, pages[1])
        num3 = get_num_hits(cur, epiweek, state, pages[2])
        num4 = get_num_hits(cur, epiweek, state, pages[3])
        num5 = get_num_hits(cur, epiweek, state, pages[4])
        num6 = get_num_hits(cur, epiweek, state, pages[5])
        num7 = get_num_hits(cur, epiweek, state, pages[6])
        num8 = get_num_hits(cur, epiweek, state, pages[7])
        total = get_total_hits(cur, epiweek, state)
        store_result(cur, epiweek, state, num1, num2, num3, num4, num5, num6, num7, num8, total)
        print(' %d-%s: %d %d %d %d %d %d %d %d (%d)' % (epiweek, state, num1, num2, num3, num4, num5, num6, num7, num8, total))
      except Exception as ex:
        print(' %d-%s: failed' % (epiweek, state), ex)
        #raise ex
      sys.stdout.flush()

  # disconnect
  cur.close()
  if not test_mode:
    cnx.commit()
  cnx.close()
Beispiel #13
0
 def fetch(weeks):
   # Impute missing weeks with 0%
   # This is actually correct because twitter does not store rows with `num` =
   # 0. So weeks with 0 `num` (and `percent`) are missing from the response.
   res = Epidata.twitter(secrets.api.twitter, location, epiweeks=weeks)
   if 'epidata' in res:
     epiweeks = set([r['epiweek'] for r in res['epidata']])
     first, last = 201149, weeks['to']
     for ew in flu.range_epiweeks(first, last, inclusive=True):
       if ew not in epiweeks:
         res['epidata'].append({'epiweek': ew, 'percent': 0.})
   return res
Beispiel #14
0
 def get_dengue_data(first_week, last_week):
     # Check week order
     if first_week > last_week:
         first_week, last_week = last_week, first_week
     # Bounds check
     if first_week < 200301 or last_week < 200301:
         raise Exception('week out of range')
     # Initialize data by week and location (zeroes are not reported)
     data = {}
     for week in range_epiweeks(first_week, add_epiweeks(last_week, 1)):
         data[week] = {}
         for location in NIDSS.LOCATION_TO_REGION.keys():
             data[week][location] = 0
     # Download CSV
     response = requests.get(NIDSS.DENGUE_URL)
     if response.status_code != 200:
         raise Exception('export Dengue failed [%d]' % response.status_code)
     csv = response.content.decode('big5-tw')
     # Parse the data
     lines = [l.strip() for l in csv.split('\n')[1:] if l.strip() != '']
     for line in lines:
         fields = line.split(',')
         location_b64 = base64.b64encode(fields[3].encode('utf-8'))
         location = NIDSS._TRANSLATED[location_b64]
         region = NIDSS.LOCATION_TO_REGION[location]
         imported_b64 = base64.b64encode(fields[6].encode('utf-8'))
         imported = imported_b64 == b'5piv'
         sex = fields[5]
         age = fields[7]
         count = int(fields[8])
         year = int(fields[1])
         week = int(fields[2])
         # Week 53 was reported each year in 2003-2007
         if year < 2008 and year != 2003 and week > 52:
             week = 52
         # Epiweek system change in 2009
         # See also: http://research.undefinedx.com/forum/index.php?topic=300.0
         if year == 2009:
             week -= 1
             if week == 0:
                 year, week = 2008, 53
         epiweek = year * 100 + week
         if epiweek < first_week or epiweek > last_week:
             # Outside of the requested range
             continue
         if epiweek not in data or location not in data[epiweek]:
             # Not a vaild U.S. epiweek
             raise Exception('data missing %d-%s' % (epiweek, location))
         # Add the counts to the location on this epiweek
         data[epiweek][location] += count
     # Return results indexed by week and location
     return data
Beispiel #15
0
 def _train(self, region):
     if region in self.bf_var:
         # already trained
         return
     if len(region) == 2:
         # TODO: this is a hack for state ILI
         # assume backfill of region 4
         print('FIXME: setting backfill for %s as hhs4' % region)
         self.bf_var[region] = self.bf_var['hhs4']
         self.emp_mean[region] = self.emp_mean['hhs4']
         self.emp_var[region] = self.emp_var['hhs4']
         self.emp_curves[region] = self.emp_curves['hhs4']
         return
     stable = self._get_stable(region)
     start_weeks = [flu.get_season(ew)[0] for ew in stable.keys()]
     curves = []
     seasons = set(
         [flu.split_epiweek(ew)[0] for ew in start_weeks if ew is not None])
     for s in seasons:
         ew1 = flu.join_epiweek(s + 0, 40)
         if self.forecast_type == ForecastType.WILI:
             ew2 = flu.add_epiweeks(ew1, 37)
         else:
             ew2 = flu.add_epiweeks(ew1, 29)
         # print("stable: ", stable)
         # print("range_epiweeks: ", [i for i in flu.range_epiweeks(ew1, ew2)])
         curve = [stable[ew] for ew in flu.range_epiweeks(ew1, ew2)]
         curves.append(curve)
     self.emp_mean[region] = np.mean(curves, axis=0)
     self.emp_var[region] = np.var(curves, axis=0, ddof=1)
     self.emp_curves[region] = curves
     if self.backfill_weeks is None:
         self.bf_var[region] = [0]
     else:
         self.bf_var[region] = []
         for lag in range(self.backfill_weeks):
             unstable = self._get_unstable(region, lag)
             changes = [
                 stable[ew] - unstable[ew]
                 for ew in stable.keys() & unstable.keys()
             ]
             if len(changes) < 2:
                 raise Exception('not enough data')
             self.bf_var[region].append(np.var(changes, ddof=1))
     print(
         ' %5s: %s' %
         (region, ' '.join(['%.3f' % (b**0.5)
                            for b in self.bf_var[region]])))
    def prefetch(self, epiweek):
        """
    Fetch all data in all locations up to the given epiweek.

    Requests are batched. This is significantly more efficient (and faster)
    than querying each sensor/location/epiweek data point individually.
    """
        def extract(response):
            if response['result'] == -2:
                return []
            return self.epidata.check(response)

        weeks = Epidata.range(self.FIRST_DATA_EPIWEEK, epiweek)
        sensor_locations = set(self.get_sensor_locations())

        # loop over locations to avoid hitting the limit of ~3.5k rows
        for loc in self.get_truth_locations():
            print('fetching %s...' % loc)

            # default to None to prevent cache misses on missing values
            for week in range_epiweeks(self.FIRST_DATA_EPIWEEK,
                                       epiweek,
                                       inclusive=True):
                for name in ['datasetname_targets'] + self.get_sensors():
                    self.add_to_cache(name, self.target, loc, week, None)

            # ground truth
            auth = secrets.api.datasetname_targets
            datasetnameData = self.epidata.check(
                self.epidata.datasetname_targets(auth, self.target, loc,
                                                 weeks))
            for row in datasetnameData:
                self.add_to_cache('datasetname_targets', self.target, loc,
                                  row['epiweek'], row['value'])

            # sensor readings
            if loc not in sensor_locations:
                # skip withheld locations (i.e. a retrospective experiment)
                continue
            for sen in self.get_sensors():
                response = self.epidata.datasetname_sensors(
                    secrets.api.datasetname_sensors, self.target, sen, loc,
                    weeks)
                for row in extract(response):
                    self.add_to_cache(sen, self.target, loc, row['epiweek'],
                                      row['value'])
Beispiel #17
0
  def get_heatmap_data(self):
    w0s, w1s = [], []
    for sensor in FluDataSource.SENSORS:
      x = self.get_sensor(sensor, 'nat')
      w0s.append(min(x))
      w1s.append(max(x))

    w0, w1 = min(w0s), max(w1s)
    weeks = list(Epiweek.range_epiweeks(w0, w1, inclusive=True))
    data = np.ones((len(FluDataSource.SENSORS), len(weeks))) * -1
    for i, sensor in enumerate(FluDataSource.SENSORS):
      x = self.get_sensor(sensor, 'nat')
      for j, ew in enumerate(weeks):
        if ew in x:
          data[i, j] = x[ew]

    return data, FluDataSource.SENSORS, weeks
Beispiel #18
0
def update(ew1, ew2, test_mode=False, epidata_cache=None):
    # database setup
    u, p = secrets.db.epi
    cnx = mysql.connector.connect(user=u, password=p, database='epidata')
    cur = cnx.cursor()
    sql = """
    INSERT INTO `nowcasts`
      (`epiweek`, `location`, `value`, `std`)
    VALUES
      (%s, %s, %s, %s)
    ON DUPLICATE KEY UPDATE
      value = %s, std = %s
  """

    for ew in flu.range_epiweeks(ew1, ew2, inclusive=True):
        try:
            print(ew)
            locations, values, stds = nowcast(ew, epidata_cache)
            print(' ', locations[0], values[0], stds[0])
            for (l, v, s) in zip(locations, values, stds):
                cur.execute(sql, (ew, l, v, s, v, s))
        except Exception as ex:
            print('failed: ', ew, ex)
            #raise ex
        sys.stdout.flush()

    # the Ugliest Hack Ever Written lies below. turn back now, cannot be unseen.
    # please fix me
    # store the unix timestamp in a meta row representing the last update time
    # the key to this row is `epiweek`=0, `location`='updated'
    # the timestamp is stored across the `value` and `std` fields
    # these are 32-bit floats, so precision is limited (hence, using both fields)
    t = round(time.time())
    a, b = t // 100000, t % 100000
    cur.execute(sql, (0, 'updated', a, b, a, b))
    # /hack

    # database cleanup
    cur.close()
    if test_mode:
        print('test mode - nowcasts not saved')
    else:
        cnx.commit()
    cnx.close()
Beispiel #19
0
 def __init__(self, region, target):
   self.region = region
   self.target = target
   weeks = Epidata.range(199301, 202330)
   auth = secrets.api.datasetname_targets
   # r0 = Epidata.check(Epidata.fluview(self.region, weeks, lag=0, auth=auth))
   # r1 = Epidata.check(Epidata.fluview(self.region, weeks, lag=1, auth=auth))
   # r2 = Epidata.check(Epidata.fluview(self.region, weeks, lag=2, auth=auth))
   # rx = Epidata.check(Epidata.fluview(self.region, weeks, auth=auth))
   r0 = mutate_rows_as_if_lagged(Epidata.check(Epidata.datasetname_targets(auth, self.target, self.region, weeks)), 0)
   r1 = mutate_rows_as_if_lagged(Epidata.check(Epidata.datasetname_targets(auth, self.target, self.region, weeks)), 1)
   r2 = mutate_rows_as_if_lagged(Epidata.check(Epidata.datasetname_targets(auth, self.target, self.region, weeks)), 2)
   rx = mutate_rows_as_if_lagged(Epidata.check(Epidata.datasetname_targets(auth, self.target, self.region, weeks)), 1000000)
   self.data = {}
   self.valid = {}
   self.ew2i, self.i2ew = {}, {}
   for ew in EW.range_epiweeks(weeks['from'], weeks['to'], inclusive=True):
     # if 200916 <= ew <= 201015:
     #   continue
     i = len(self.ew2i)
     self.ew2i[ew] = i
     self.i2ew[i] = ew
   for row in r0 + r1 + r2 + rx:
     ew, observation, lag = row['epiweek'], row['value'], row['lag']
     if ew not in self.ew2i:
       continue
     i = self.ew2i[ew]
     if i not in self.data:
       self.data[i] = {}
       self.valid[i] = {0: False, 1: False, 2: False, 'stable': False}
     if not (0 <= lag <= 2):
       lag = 'stable'
     self.data[i][lag] = observation
     self.valid[i][lag] = True
   self.weeks = sorted(list(self.data.keys()))
   for i in self.weeks:
     if 'stable' not in self.data[i]:
       continue
     for lag in range(3):
       if lag not in self.data[i]:
         self.data[i][lag] = self.data[i]['stable']
Beispiel #20
0
 def __init__(self, region):
     self.region = region
     weeks = Epidata.range(200330, 202330)
     auth = secrets.api.fluview
     r0 = Epidata.check(
         Epidata.fluview(self.region, weeks, lag=0, auth=auth))
     r1 = Epidata.check(
         Epidata.fluview(self.region, weeks, lag=1, auth=auth))
     r2 = Epidata.check(
         Epidata.fluview(self.region, weeks, lag=2, auth=auth))
     rx = Epidata.check(Epidata.fluview(self.region, weeks, auth=auth))
     self.data = {}
     self.valid = {}
     self.ew2i, self.i2ew = {}, {}
     for ew in EW.range_epiweeks(weeks['from'], weeks['to'],
                                 inclusive=True):
         if 200916 <= ew <= 201015:
             continue
         i = len(self.ew2i)
         self.ew2i[ew] = i
         self.i2ew[i] = ew
     for row in r0 + r1 + r2 + rx:
         ew, wili, lag = row['epiweek'], row['wili'], row['lag']
         if ew not in self.ew2i:
             continue
         i = self.ew2i[ew]
         if i not in self.data:
             self.data[i] = {}
             self.valid[i] = {0: False, 1: False, 2: False, 'stable': False}
         if not (0 <= lag <= 2):
             lag = 'stable'
         self.data[i][lag] = wili
         self.valid[i][lag] = True
     self.weeks = sorted(list(self.data.keys()))
     for i in self.weeks:
         if 'stable' not in self.data[i]:
             continue
         for lag in range(3):
             if lag not in self.data[i]:
                 self.data[i][lag] = self.data[i]['stable']
Beispiel #21
0
    def update(self, sensors, first_week, last_week):
        """
    Compute sensor readings and store them in the database.
    """

        # most recent issue
        if last_week is None:
            # last_issue = get_most_recent_issue(self.epidata)
            # last_week = flu.add_epiweeks(last_issue, +1)
            raise Exception(
                "last_week must be provided for now --- todo select based on current time (rather than on the ground truth data set since the ground truth here is currently static, not streaming)"
            )

        # connect
        with self.database as database:

            # update each sensor
            for (name, loc) in sensors:

                # update each location
                for location in get_location_list(loc):

                    # timing
                    ew1 = first_week
                    if ew1 is None:
                        ew1 = database.get_most_recent_epiweek(name, location)
                        if ew1 is None:
                            # If an existing sensor reading wasn't found in the database and
                            # no start week was given, just assume that readings should start
                            # at 2010w40.
                            ew1 = 201040
                            print('%s-%s not found, starting at %d' %
                                  (name, location, ew1))

                    args = (name, location, ew1, last_week)
                    print('Updating %s-%s from %d to %d.' % args)
                    for test_week in flu.range_epiweeks(ew1,
                                                        last_week,
                                                        inclusive=True):
                        self.update_single(database, test_week, name, location)
Beispiel #22
0
    def _forecast(self, region, epiweek):
        ew1 = flu.join_epiweek(self.test_season + 0, 40)
        ew2 = flu.join_epiweek(self.test_season + 1, 24)
        num_weeks = flu.delta_epiweeks(ew1, ew2)
        print('fetching past data until week %d' % (epiweek))
        observed = self._get_current(region, epiweek, self.forecast_type)

        mean, var = self.emp_mean[region].copy(), self.emp_var[region].copy()
        for ew in flu.range_epiweeks(ew1, flu.add_epiweeks(epiweek, 1)):
            i = flu.delta_epiweeks(ew1, ew)
            lag = flu.delta_epiweeks(ew1, epiweek) - i
            lag = min(lag, len(self.bf_var[region]) - 1)
            mean[i] = observed[i]
            var[i] = self.bf_var[region][lag]
        curves = Forecaster.Utils.sample_normal_var(mean, var,
                                                    self.num_samples)
        if not self.do_sampling:
            offset = flu.delta_epiweeks(ew1, epiweek) + 1
            for (i, curve) in enumerate(curves):
                index = i % len(self.emp_curves[region])
                curve[offset:] = self.emp_curves[region][index][offset:]
        return curves
def cum_to_week(data):
    epiweeks = list(data.keys())
    all_epiweeks = list(
        EW.range_epiweeks(min(epiweeks), max(epiweeks), inclusive=True))

    result = np.zeros((len(all_epiweeks)))
    last_valid = (-1, 0)  # (idx, value)
    for i in range(len(result)):
        ew = all_epiweeks[i]
        if ew in data:
            if data[all_epiweeks[i]] is not None:
                result[last_valid[0] + 1:i +
                       1] = (data[ew] - last_valid[1]) / float(
                           i -
                           last_valid[0])  # Evenly distribute missing counts
                last_valid = (i, data[ew])
        yr, wk = EW.split_epiweek(all_epiweeks[i])
        if EW.get_num_weeks(yr) == wk:
            result[
                last_valid[0] + 1:i +
                1] = 0  # Fill rest of year with 0s, not getting this information
            last_valid = (i, 0)  # Start new year at 0
    return {all_epiweeks[i]: result[i] for i in range(len(all_epiweeks))}
Beispiel #24
0
    def test_update(self):
        """Compute and store a nowcast."""

        database = MagicMock()
        database.__enter__.return_value = database
        database.__exit__.return_value = None
        data_source = MagicMock(
            ALL_LOCATIONS=['pa', 'va'],
            ATOMIC_LOCATIONS=['pa', 'va'],
            get_truth_locations=lambda *a: ['pa', 'va'],
            get_sensor_locations=lambda *a: ['pa', 'va'],
            get_missing_locations=lambda *a: (),
            get_sensors=lambda *a: ['ght', 'isch'],
            get_most_recent_issue=lambda *a: 201513,
            get_weeks=lambda *a: list(range_epiweeks(201413, 201514)),
            get_truth_value=lambda *a: random.random(),
            get_sensor_value=lambda *a: random.random(),
            prefetch=lambda *a: None)
        target = 'datasetname_rate'

        NowcastUpdate(database, data_source, target).update(201512, 201513)

        self.assertEqual(database.set_last_update_time.call_count, 1)
        self.assertEqual(database.insert.call_count, 4)

        target_epiweek_location_triplets = set()
        for args, kwargs in database.insert.call_args_list:
            target_epiweek_location_triplets.add(args[:3])

        self.assertIn(('datasetname_rate', 201512, 'pa'),
                      target_epiweek_location_triplets)
        self.assertIn(('datasetname_rate', 201513, 'pa'),
                      target_epiweek_location_triplets)
        self.assertIn(('datasetname_rate', 201512, 'va'),
                      target_epiweek_location_triplets)
        self.assertIn(('datasetname_rate', 201513, 'va'),
                      target_epiweek_location_triplets)
Beispiel #25
0
 def __init__(self, region, target):
   self.region = region
   self.target = target
   weeks = Epidata.range(201401, 202330)
   r0 = Epidata.check(Epidata.paho_dengue(self.region, weeks, lag=0))
   r1 = Epidata.check(Epidata.paho_dengue(self.region, weeks, lag=1))
   r2 = Epidata.check(Epidata.paho_dengue(self.region, weeks, lag=2))
   rx = Epidata.check(Epidata.paho_dengue(self.region, weeks))
   self.data = {}
   self.valid = {}
   self.ew2i, self.i2ew = {}, {}
   for ew in EW.range_epiweeks(weeks['from'], weeks['to'], inclusive=True):
     # if 200916 <= ew <= 201015:
     #   continue
     i = len(self.ew2i)
     self.ew2i[ew] = i
     self.i2ew[i] = ew
   for row in r0 + r1 + r2 + rx:
     ew, observation, lag = row['epiweek'], row[self.target], row['lag']
     if ew not in self.ew2i:
       continue
     i = self.ew2i[ew]
     if i not in self.data:
       self.data[i] = {}
       self.valid[i] = {0: False, 1: False, 2: False, 'stable': False}
     if not (0 <= lag <= 2):
       lag = 'stable'
     self.data[i][lag] = observation
     self.valid[i][lag] = True
   self.weeks = sorted(list(self.data.keys()))
   for i in self.weeks:
     if 'stable' not in self.data[i]:
       continue
     for lag in range(3):
       if lag not in self.data[i]:
         self.data[i][lag] = self.data[i]['stable']
Beispiel #26
0
def nowcast(epiweek, epidata_cache=None):
    si = StateInfo()
    # all sensors and locations
    all_names, all_loc = get_all_sensors()
    # get sensors available on the target week
    rows = Epidata.check(
        Epidata.sensors(secrets.api.sensors, all_names, all_loc, epiweek))
    present = {}
    for row in rows:
        name, loc, value = row['name'], row['location'], row['value']
        if name not in present:
            present[name] = {}
        if loc not in present[name]:
            present[name][loc] = value
    # get the history of each available sensor (6 sec)
    past = {}
    sensor_locs = set()
    missing = set()
    past_weeks = Epidata.range(FIRST_DATA_EPIWEEK,
                               flu.add_epiweeks(epiweek, -1))
    all_epiweeks = [
        w for w in flu.range_epiweeks(
            past_weeks['from'], past_weeks['to'], inclusive=True)
    ]
    num_obs = len(all_epiweeks)
    for name in present.keys():
        past[name] = {}
        for loc in present[name].keys():
            past[name][loc] = {}
            sensor_locs |= set([loc])
            #print(name, loc)
            try:
                if epidata_cache is not None:
                    rows = epidata_cache.sensors(name, loc, past_weeks)
                else:
                    rows = Epidata.check(
                        Epidata.sensors(secrets.api.sensors, name, loc,
                                        past_weeks))
                if len(rows) < 2:
                    raise Exception()
                for row in rows:
                    past[name][loc][row['epiweek']] = row['value']
            except:
                missing |= set([(name, loc)])
    # remove sensors with zero past data
    for (n, l) in missing:
        del present[n][l]
        if len(present[n]) == 0:
            del present[n]
        del past[n][l]
        if len(past[n]) == 0:
            del past[n]
        #print(n, l, 'is missing')
    # inventory
    all_sensors = []
    for n in all_names:
        for l in si.nat + si.hhs + si.cen + si.sta:
            if n in past and l in past[n]:
                all_sensors.append((n, l))
    #print(all_sensors)
    num_sensors = len(all_sensors)
    # get historical ground truth for each sensor (4 sec)
    truth = {}
    auth = secrets.api.fluview
    for loc in sensor_locs:
        truth[loc] = {}
        if epidata_cache is not None:
            srows = epidata_cache.fluview(loc, past_weeks)
        else:
            srows = Epidata.check(Epidata.fluview(loc, past_weeks, auth=auth))
        sdata = dict([(r['epiweek'], r) for r in srows])
        udata = {}
        try:
            i = past_weeks['to']
            result = Epidata.fluview(loc, past_weeks, issues=i, auth=auth)
            urows = Epidata.check(result)
            udata = dict([(r['epiweek'], r) for r in urows])
        except:
            pass
        rows = []
        for ew in all_epiweeks:
            if ew in udata:
                rows.append(udata[ew])
            else:
                rows.append(sdata[ew])
        for row in rows:
            truth[loc][row['epiweek']] = row['wili']
    # rows are epiweeks, cols are sensors
    X = np.zeros((num_obs, num_sensors)) * np.nan
    for (r, ew) in enumerate(all_epiweeks):
        for (c, (name, loc)) in enumerate(all_sensors):
            if name in past and loc in past[name] and ew in past[name][
                    loc] and loc in truth and ew in truth[loc]:
                X[r, c] = past[name][loc][ew] - truth[loc][ew]
    # sparse precision matrix
    Ri = Fusion.precision(X, mean=np.zeros((1, num_sensors)), b=0.25)
    # prepare for sensor fusion
    inputs = all_sensors
    state = si.sta
    outputs = si.nat + si.hhs + si.cen + si.sta
    num_i, num_s, num_o = len(inputs), len(state), len(outputs)
    # input  (z): [ num_i  x    1   ]
    # state  (x): [ num_s  x    1   ]
    # output (y): [ num_o  x    1   ]
    # S->I   (H): [ num_i  x  num_s ]
    # S->O   (W): [ num_o  x  num_s ]
    z = np.array([present[n][l] for (n, l) in inputs]).reshape((num_i, 1))
    H = np.zeros((num_i, num_s))
    W = np.zeros((num_o, num_s))
    # populate H, given input signals
    for (row, (name, location)) in enumerate(inputs):
        for (col, loc) in enumerate(state):
            if loc in si.within[location]:
                H[row, col] = si.weight[location][loc]
    if np.linalg.matrix_rank(np.dot(H.T, H)) != num_s:
        raise Exception('H is singluar')
    if not np.allclose(np.sum(H, axis=1), 1):
        raise Exception('H rows do not sum to 1')
    # populate W, given output locations
    for (row, location) in enumerate(outputs):
        for (col, loc) in enumerate(state):
            if loc in si.within[location]:
                W[row, col] = si.weight[location][loc]
    if not np.allclose(np.sum(W, axis=1), 1):
        raise Exception('W rows do not sum to 1')
    # sensor fusion
    x, P = Fusion.fuse(z, Ri, H)
    y, S = Fusion.extract(x, P, W)
    print(num_obs, num_i, num_s, num_o)
    pt = [float(v) for v in y.flatten()]
    std = [float(v) for v in np.sqrt(S).flatten()]
    return (outputs, pt, std)
Beispiel #27
0
  def test_implemented_methods(self):
    # sample data
    locations = ['ar', 'tx']
    sensors = ['epic', 'sar3']
    epiweek = 201812

    # helper that mimics an Epidata API response
    def fake_api(value=1, result=1, num_providers=1):
      return {
        'result': result,
        'epidata': [{
          'value': value,
          'wili': value,
          'num_providers': num_providers
        }]
      }

    # fake implementation of epidata.fluview
    def get_fluview(loc, week, auth):
      if loc == 'X':
        return fake_api(num_providers=0)
      if loc in locations:
        return fake_api()
      return fake_api(result=-2)

    # fake implementation of epidata.sensors
    def get_sensors(auth, name, loc, week):
      if name in sensors:
        return fake_api()
      return fake_api(result=-2)

    # create data source
    epidata = MagicMock(fluview=get_fluview, sensors=get_sensors)
    data_source = FluDataSource(epidata, sensors, Locations.region_list)
    data_source.get_most_recent_issue = lambda: epiweek

    # expected values
    expected_locations = set(Locations.region_list)
    expected_missing = set(Locations.atom_list) - set(locations)
    expected_sensors = set(sensors)
    expected_weeks = set(
        range_epiweeks(
            FluDataSource.FIRST_DATA_EPIWEEK, epiweek, inclusive=True))

    # actual values
    actual_locations = set(data_source.get_truth_locations())
    actual_missing = set(data_source.get_missing_locations(None))
    actual_sensors = set(data_source.get_sensors())
    actual_weeks = set(data_source.get_weeks())

    # compare values
    self.assertEqual(actual_locations, expected_locations)
    self.assertEqual(actual_missing, expected_missing)
    self.assertEqual(actual_sensors, expected_sensors)
    self.assertEqual(actual_weeks, expected_weeks)

    # don't have data
    self.assertIsNone(data_source.get_truth_value(None, None))
    self.assertIsNone(data_source.get_sensor_value(None, None, None))

    # have data, but location had no reporting providers
    self.assertIsNone(data_source.get_truth_value(None, 'X'))

    # have data
    self.assertIsNotNone(data_source.get_truth_value(None, 'tx'))
    self.assertIsNotNone(data_source.get_sensor_value(None, None, 'epic'))
Beispiel #28
0
 def get_weeks(self):
   """Return a list of weeks on which truth and sensors are both available."""
   latest_week = self.get_most_recent_issue()
   week_range = range_epiweeks(
       FluDataSource.FIRST_DATA_EPIWEEK, latest_week, inclusive=True)
   return list(week_range)
 def get_weeks(self):
   """Return a list of weeks on which truth and sensors are both available."""
   week_range = range_epiweeks(
       self.FIRST_DATA_EPIWEEK, self.LAST_DATA_EPIWEEK, inclusive=True)
   return list(week_range)