Example #1
0
 def __init__(self, region, target, use_weekly=True):
     self.region = region
     self.target = target
     self.stts = 0
     weeks = Epidata.range(201401, 202330)
     rx = Epidata.check(Epidata.paho_dengue(self.region, weeks))
     self.data = {}
     self.valid = {}
     self.ew2i, self.i2ew = {}, {}
     for ew in EW.range_epiweeks(weeks['from'], weeks['to'],
                                 inclusive=True):
         # if 200916 <= ew <= 201015:
         #   continue
         i = len(self.ew2i)
         self.ew2i[ew] = i
         self.i2ew[i] = ew
     epiweeks = list(map(lambda elt: elt['epiweek'], rx))
     values = list(map(lambda elt: elt[self.target], rx))
     data = {elt['epiweek']: elt[self.target] for elt in rx}
     w_data = cum_to_week(data)
     for i in range(len(rx)):
         ew, observation = epiweeks[i], w_data[epiweeks[i]]
         if ew not in self.ew2i:
             continue
         i = self.ew2i[ew]
         if i not in self.data:
             self.data[i] = {}
             self.valid[i] = {'stable': False}
         lag = 'stable'
         self.data[i][lag] = observation
         self.valid[i][lag] = True
     self.weeks = sorted(list(self.data.keys()))
     self.dds = DengueDataSource.new_instance(target)
Example #2
0
def get_weeks(epiweek):
  ew1 = 200330
  ew2 = epiweek
  ew3 = flu.add_epiweeks(epiweek, 1)
  weeks0 = Epidata.range(ew1, ew2)
  weeks1 = Epidata.range(ew1, ew3)
  return (ew1, ew2, ew3, weeks0, weeks1)
Example #3
0
 def __init__(self, ew2):
     ew1 = FIRST_DATA_EPIWEEK
     print('prefetching %d--%d...' % (ew1, ew2))
     weeks = Epidata.range(ew1, ew2)
     si = StateInfo()
     all_names, all_loc = get_all_sensors()
     self._sensors = {}
     self._fluview = {}
     na, nc = 0, 0
     for loc in all_loc:
         for name in all_names:
             res = Epidata.sensors(secrets.api.sensors, name, loc, weeks)
             if res['result'] == 1:
                 for row in res['epidata']:
                     n, l = row['name'], row['location']
                     if n not in self._sensors:
                         self._sensors[n] = {}
                     if l not in self._sensors[n]:
                         self._sensors[n][l] = []
                     self._sensors[n][l].append(row)
                     na += 1
         res = Epidata.fluview(loc, weeks, auth=secrets.api.fluview)
         if res['result'] == 1:
             for row in res['epidata']:
                 # Make sure that the case of the returned location is consistent with
                 # the case used elsewhere; lower for regions and upper for states.
                 if row['region'].lower() != loc.lower():
                     raise Exception(row['region'], loc)
                 row['region'] = loc
                 if loc not in self._fluview:
                     self._fluview[loc] = []
                 self._fluview[loc].append(row)
                 nc += 1
     print('done (%d|%d)' % (na, nc))
Example #4
0
  def test_acquire_specific_issue(self):
    """Acquire a new dataset."""

    # make sure the data does not yet exist
    with self.subTest(name='no data yet'):
      response = Epidata.covid_hosp('MA', Epidata.range(20200101, 20210101))
      self.assertEqual(response['result'], -2)

    # acquire sample data into local database
    # mock out network calls to external hosts
    with Database.connect() as db:
      pre_max_issue = db.get_max_issue()
    self.assertEqual(pre_max_issue, pd.Timestamp('1900-01-01 00:00:00'))
    with self.subTest(name='first acquisition'), \
         patch.object(Network, 'fetch_metadata', return_value=self.test_utils.load_sample_metadata()) as mock_fetch_meta, \
         patch.object(Network, 'fetch_dataset', side_effect=[self.test_utils.load_sample_dataset("dataset0.csv")]
                      ) as mock_fetch:
      acquired = Utils.update_dataset(Database,
                                      Network,
                                      date(2021, 3, 12),
                                      date(2021, 3, 14))
      with Database.connect() as db:
        post_max_issue = db.get_max_issue()
      self.assertEqual(post_max_issue, pd.Timestamp('2021-03-13 00:00:00'))
      self.assertTrue(acquired)
Example #5
0
 def fetch(weeks):
   # a map from epiweeks to a map of field-value pairs (for each article/hour)
   data = {}
   # field name index
   idx = 0
   # download each time series individually
   for article in articles:
     for hour in hours:
       # fetch the data from the API
       res = Epidata.wiki(article, epiweeks=weeks, hours=hour)
       epidata = Epidata.check(res)
       field_name = fields[idx]
       idx += 1
       # loop over rows of the response, ordered by epiweek
       for row in epidata:
         ew = row['epiweek']
         if ew not in data:
           # make a new entry for this epiweek
           data[ew] = {'epiweek': ew}
         # save the value of this field
         data[ew][field_name] = row['value']
   # convert the map to a list matching the API epidata list
   rows = []
   for ew in sorted(list(data.keys())):
     rows.append(data[ew])
   # spoof the API response
   return {
     'result': 1,
     'message': None,
     'epidata': rows,
   }
Example #6
0
 def __init__(self, region):
   self.region = region
   weeks = Epidata.range(200330, 202330)
   rows = Epidata.check(Epidata.fluview(self.region, weeks))
   self.seasons = {}
   for row in rows:
     ew, wili = row['epiweek'], row['wili']
     y, w = EW.split_epiweek(ew)
     if w < 30:
       y -= 1
     i = EW.delta_epiweeks(EW.join_epiweek(y, 30), ew)
     if y not in self.seasons:
       self.seasons[y] = {}
     if 0 <= i < 52:
       self.seasons[y][i] = wili
   years = sorted(list(self.seasons.keys()))
   for year in years:
     if len(self.seasons[year]) != 52:
       del self.seasons[year]
   if 2008 in self.seasons and 2009 in self.seasons:
     for i in range(40, 52):
       self.seasons[2008][i] = self.seasons[2009][i]
     del self.seasons[2009]
   curve = lambda y: [self.seasons[y][i] for i in range(52)]
   self.years = sorted(list(self.seasons.keys()))
   self.curves = dict([(y, curve(y)) for y in self.years])
Example #7
0
 def get_training_set_datasetname(location, epiweek, signal, target,
                                  signal_to_truth_ew_shift):
     ew1, ew2, ew3, weeks0, weeks1 = get_weeks(epiweek)
     groundTruth = dict()
     auth = secrets.api.datasetname_targets
     datasetnameData = Epidata.check(
         Epidata.datasetname_targets(auth, target, location, weeks0))
     for row in datasetnameData:
         groundTruth[row['epiweek']] = row['value']
     data = {}
     dropped_weeks = 0
     for signal_week in signal.keys():
         ground_truth_week = flu.add_epiweeks(signal_week,
                                              signal_to_truth_ew_shift)
         # skip the week we're trying to predict
         if ground_truth_week == ew3:
             continue
         sig = signal[signal_week]
         if ground_truth_week in groundTruth:
             label = groundTruth[ground_truth_week]
         else:
             dropped_weeks += 1
             continue
         data[ground_truth_week] = {'x': sig, 'y': label}
     if dropped_weeks:
         msg = 'warning: dropped %d/%d signal weeks because ground truth / target was unavailable'
         print(msg % (dropped_weeks, len(signal)))
     epiweeks = sorted(list(data.keys()))
     X = [data[week]['x'] for week in epiweeks]
     Y = [data[week]['y'] for week in epiweeks]
     return (epiweeks, X, Y)
Example #8
0
    def _forecast(self, ageGroup, epiweek):
        # season setup and sanity check
        ew1 = flu.join_epiweek(self.test_season, 40)
        ew2 = flu.join_epiweek(self.test_season + 1, 17)
        print("test season:", self.test_season, "ew1:", ew1, "epiweek:",
              epiweek)
        if not ew1 <= epiweek <= ew2:
            raise Exception('`epiweek` outside of `test_season`')

        # get past values (left half) from the Epidata API
        response = Epidata.flusurv('network_all',
                                   Epidata.range(ew1, epiweek),
                                   issues=epiweek)
        epidata = Forecaster.Utils.decode(response)

        pinned = [row[ageGroup] for row in epidata]

        if len(pinned) != flu.delta_epiweeks(ew1, epiweek) + 1:
            raise Exception('missing ILINet data')
        # get the user submissions (right half) from the database
        print("ageGroup", ageGroup, "epiweek", epiweek)
        submissions = self.fetch_submissions(ageGroup, epiweek)
        self._num_users = len(submissions)
        if self.verbose:
            print(' [EC] %d users found for %s on %d' %
                  (len(submissions), ageGroup, epiweek))
        # concatenate observed data and user submissions
        return [pinned + sub for sub in submissions]
Example #9
0
 def _get_partial_trajectory(self, epiweek, valid=True):
   y, w = EW.split_epiweek(epiweek)
   if w < 30:
     y -= 1
   ew1 = EW.join_epiweek(y, 30)
   ew2 = epiweek
   limit = EW.add_epiweeks(ew2, -5)
   weeks = Epidata.range(ew1, ew2)
   stable = Epidata.check(Epidata.fluview(self.region, weeks))
   try:
     unstable = Epidata.check(Epidata.fluview(self.region, weeks, issues=ew2))
   except:
     unstable = []
   wili = {}
   for row in stable:
     ew, value = row['epiweek'], row['wili']
     if not valid or ew < limit:
       wili[ew] = value
   for row in unstable:
     ew, value = row['epiweek'], row['wili']
     wili[ew] = value
   curve = []
   for ew in EW.range_epiweeks(ew1, ew2, inclusive=True):
     if ew not in wili:
       if valid:
         t = 'unstable'
       else:
         t = 'any'
       raise Exception('wILI (%s) not available for week %d' % (t, ew))
     curve.append(wili[ew])
   n1 = EW.delta_epiweeks(ew1, ew2) + 1
   n2 = len(curve)
   if n1 != n2:
     raise Exception('missing data (expected %d, found %d)' % (n1, n2))
   return curve
Example #10
0
 def __init__(self, region, target):
   self.region = region
   self.target = target
   weeks = Epidata.range(199301, 202330)
   auth = secrets.api.datasetname_targets
   rx = mutate_rows_as_if_lagged(Epidata.check(Epidata.datasetname_targets(auth, self.target, self.region, weeks)), 1000000)
   self.data = {}
   self.valid = {}
   self.ew2i, self.i2ew = {}, {}
   for ew in EW.range_epiweeks(weeks['from'], weeks['to'], inclusive=True):
     # if 200916 <= ew <= 201015:
     #   continue
     i = len(self.ew2i)
     self.ew2i[ew] = i
     self.i2ew[i] = ew
   for row in rx:
     ew, observation, lag = row['epiweek'], row['value'], row['lag']
     if ew not in self.ew2i:
       continue
     i = self.ew2i[ew]
     if i not in self.data:
       self.data[i] = {}
       self.valid[i] = {'stable': False}
     lag = 'stable'
     self.data[i][lag] = observation
     self.valid[i][lag] = True
   self.weeks = sorted(list(self.data.keys()))
   for i in self.weeks:
     if 'stable' not in self.data[i]:
       continue
    def test_query_by_issue(self):
        """Query with and without specifying an issue."""

        # insert dummy data
        def insert_issue(cur, issue, value, record_type):
            so_many_nulls = ', '.join(['null'] * 57)
            cur.execute(f'''insert into covid_hosp_state_timeseries values (
        0, {issue}, 'PA', 20201118, {value}, {so_many_nulls}, '{record_type}'
      )''')

        with Database.connect() as db:
            with db.new_cursor() as cur:
                # inserting out of order to test server-side order by
                # also inserting two for 20201201 to test tiebreaker.
                insert_issue(cur, 20201201, 123, 'T')
                insert_issue(cur, 20201201, 321, 'D')
                insert_issue(cur, 20201203, 789, 'T')
                insert_issue(cur, 20201202, 456, 'T')

        # request without issue (defaulting to latest issue)
        with self.subTest(name='no issue (latest)'):
            response = Epidata.covid_hosp('PA', 20201118)

            self.assertEqual(response['result'], 1)
            self.assertEqual(len(response['epidata']), 1)
            self.assertEqual(response['epidata'][0]['issue'], 20201203)
            self.assertEqual(
                response['epidata'][0]['critical_staffing_shortage_today_yes'],
                789)

        # request for specific issue
        with self.subTest(name='specific single issue'):
            response = Epidata.covid_hosp('PA', 20201118, issues=20201201)

            self.assertEqual(response['result'], 1)
            self.assertEqual(len(response['epidata']), 1)
            self.assertEqual(response['epidata'][0]['issue'], 20201201)
            self.assertEqual(
                response['epidata'][0]['critical_staffing_shortage_today_yes'],
                321)

        # request for multiple issues
        with self.subTest(name='specific multiple issues'):
            issues = Epidata.range(20201201, 20201231)
            response = Epidata.covid_hosp('PA', 20201118, issues=issues)

            self.assertEqual(response['result'], 1)
            self.assertEqual(len(response['epidata']), 3)
            rows = response['epidata']
            # tiebreaker
            self.assertEqual(rows[0]['issue'], 20201201)
            self.assertEqual(rows[0]['critical_staffing_shortage_today_yes'],
                             321)
            # server-side order by
            self.assertEqual(rows[1]['issue'], 20201202)
            self.assertEqual(rows[1]['critical_staffing_shortage_today_yes'],
                             456)
            self.assertEqual(rows[2]['issue'], 20201203)
            self.assertEqual(rows[2]['critical_staffing_shortage_today_yes'],
                             789)
Example #12
0
def get_training_set(location, epiweek, signal, valid):
  ew1, ew2, ew3, weeks0, weeks1 = get_weeks(epiweek)
  auth = secrets.api.fluview
  try:
    result = Epidata.fluview(location, weeks0, issues=ew2, auth=auth)
    rows = Epidata.check(result)
    unstable = extract(rows, ['wili'])
  except:
    unstable = {}
  rows = Epidata.check(Epidata.fluview(location, weeks0, auth=auth))
  stable = extract(rows, ['wili'])
  data = {}
  num_dropped = 0
  for ew in signal.keys():
    if ew == ew3:
      continue
    sig = signal[ew]
    if ew not in unstable:
      if valid and flu.delta_epiweeks(ew, ew3) <= 5:
        raise Exception('unstable wILI is not available on %d' % ew)
      if ew not in stable:
        num_dropped += 1
        continue
      wili = stable[ew]
    else:
      wili = unstable[ew]
    data[ew] = {'x': sig, 'y': wili}
  if num_dropped:
    msg = 'warning: dropped %d/%d signal weeks because (w)ILI was unavailable'
    print(msg % (num_dropped, len(signal)))
  return get_training_set_data(data)
Example #13
0
def download_preliminary_fluview(f):
    for lag in range(3):
        print('preliminary fluview', lag)
        resp = Epidata.fluview('nat', weeks, lag=lag, auth=secrets.api.fluview)
        rows = Epidata.check(resp)
        for row in rows:
            week, value = row['epiweek'], row['wili']
            f.write('%d,%s,%.5f\n' % (week, 'nat_%d' % lag, value))
Example #14
0
def download_fluview(f):
    for loc in Locations.region_list:
        print('fluview', loc)
        resp = Epidata.fluview(loc, weeks, auth=secrets.api.fluview)
        rows = Epidata.check(resp)
        for row in rows:
            week, value = row['epiweek'], row['wili']
            f.write('%d,%s,%.5f\n' % (week, loc, value))
Example #15
0
 def fetch(weeks):
   # The GFT model update of 2013 significantly improved the GFT signal, so
   # much so that training on the old data will severely hurt the predictive
   # power of the new data. To overcome this, I basically pretend that GFT
   # versions before and after mid-2013 are different signals.
   if weeks['to'] >= 201340:
     # this is the new GFT model, so throw out data from the old model
     weeks = Epidata.range(max(weeks['from'], 201331), weeks['to'])
   return Epidata.gft(location, weeks)
  def test_async_epidata(self):
    # insert dummy data
    self.cur.execute(f'''
      INSERT INTO
        `covidcast` (`id`, `source`, `signal`, `time_type`, `geo_type`, 
	      `time_value`, `geo_value`, `value_updated_timestamp`, 
        `value`, `stderr`, `sample_size`, `direction_updated_timestamp`, 
        `direction`, `issue`, `lag`, `is_latest_issue`, `is_wip`,`missing_value`,
        `missing_stderr`,`missing_sample_size`) 
      VALUES
        (0, 'src', 'sig', 'day', 'county', 20200414, '11111',
          123, 10, 11, 12, 456, 13, 20200414, 0, 1, False,
          {Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}),
        (0, 'src', 'sig', 'day', 'county', 20200414, '22222',
          123, 20, 21, 22, 456, 23, 20200414, 0, 1, False,
          {Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}),
        (0, 'src', 'sig', 'day', 'county', 20200414, '33333',
          123, 30, 31, 32, 456, 33, 20200414, 0, 1, False,
          {Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}),
        (0, 'src', 'sig', 'day', 'msa', 20200414, '11111',
          123, 40, 41, 42, 456, 43, 20200414, 0, 1, False,
          {Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}),
        (0, 'src', 'sig', 'day', 'msa', 20200414, '22222',
          123, 50, 51, 52, 456, 53, 20200414, 0, 1, False,
          {Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}),
        (0, 'src', 'sig', 'day', 'msa', 20200414, '33333',
          123, 60, 61, 62, 456, 634, 20200414, 0, 1, False,
          {Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING})
    ''')
    self.cnx.commit()
    test_output = Epidata.async_epidata([
      {
        'source': 'covidcast',
        'data_source': 'src',
        'signals': 'sig',
        'time_type': 'day',
        'geo_type': 'county',
        'geo_value': '11111',
        'time_values': '20200414'
      },
      {
        'source': 'covidcast',
        'data_source': 'src',
        'signals': 'sig',
        'time_type': 'day',
        'geo_type': 'county',
        'geo_value': '00000',
        'time_values': '20200414'
      }
    ]*12, batch_size=10)
    responses = [i[0] for i in test_output]
    # check response is same as standard covidcast call, using 24 calls to test batch sizing
    self.assertEqual(responses,
                     [Epidata.covidcast('src', 'sig', 'day', 'county', 20200414, '11111'),
                      Epidata.covidcast('src', 'sig', 'day', 'county', 20200414, '00000')]*12
                     )
    def test_query_by_issue(self):
        """Query with and without specifying an issue."""

        with Database.connect() as db:
            with db.new_cursor() as cur:
                # inserting out of order to test server-side order by
                # also inserting two for 20201201 to test tiebreaker.
                self.insert_issue(cur, 20201201, 123, 'T')
                self.insert_issue(cur, 20201201, 321, 'D')
                self.insert_issue(cur, 20201203, 789, 'T')
                self.insert_issue(cur, 20201202, 456, 'T')

        # request without issue (defaulting to latest issue)
        with self.subTest(name='no issue (latest)'):
            response = Epidata.covid_hosp('PA', 20201118)

            self.assertEqual(response['result'], 1)
            self.assertEqual(len(response['epidata']), 1)
            self.assertEqual(response['epidata'][0]['issue'], 20201203)
            self.assertEqual(
                response['epidata'][0]['critical_staffing_shortage_today_yes'],
                789)

        # request for specific issue
        with self.subTest(name='specific single issue'):
            response = Epidata.covid_hosp('PA', 20201118, issues=20201201)

            self.assertEqual(response['result'], 1)
            self.assertEqual(len(response['epidata']), 1)
            self.assertEqual(response['epidata'][0]['issue'], 20201201)
            self.assertEqual(
                response['epidata'][0]['critical_staffing_shortage_today_yes'],
                321)

        # request for multiple issues
        with self.subTest(name='specific multiple issues'):
            issues = Epidata.range(20201201, 20201231)
            response = Epidata.covid_hosp('PA', 20201118, issues=issues)

            self.assertEqual(response['result'], 1)
            self.assertEqual(len(response['epidata']), 3)
            rows = response['epidata']
            # tiebreaker
            self.assertEqual(rows[0]['issue'], 20201201)
            self.assertEqual(rows[0]['critical_staffing_shortage_today_yes'],
                             321)
            # server-side order by
            self.assertEqual(rows[1]['issue'], 20201202)
            self.assertEqual(rows[1]['critical_staffing_shortage_today_yes'],
                             456)
            self.assertEqual(rows[2]['issue'], 20201203)
            self.assertEqual(rows[2]['critical_staffing_shortage_today_yes'],
                             789)
Example #18
0
 def get_training_set(location, epiweek, signal, valid):
   ew1, ew2, ew3, weeks0, weeks1 = get_weeks(epiweek)
   result = Epidata.paho_dengue(location, weeks0)
   rows = Epidata.check(result)
   stable = extract(rows, 'num_dengue', to_weekly=True)
   data = {}
   for ew in signal.keys():
     if ew == ew3 or ew not in stable:
       continue
     sig = signal[ew]
     num_dengue = stable[ew]
     data[ew] = {'x': sig, 'y': num_dengue}
   return get_training_set_data(data)
 def test_async_epidata_fail(self):
   with pytest.raises(ClientResponseError, match="404, message='NOT FOUND'"):
     Epidata.async_epidata([
       {
         'source': 'covidcast',
         'data_source': 'src',
         'signals': 'sig',
         'time_type': 'day',
         'geo_type': 'county',
         'geo_value': '11111',
         'time_values': '20200414'
       }
     ])
    def test_query_by_issue(self):
        """Query with and without specifying an issue."""

        # insert dummy data
        def insert_issue(cur, issue, value):
            so_many_nulls = ', '.join(['null'] * 51)
            cur.execute(f'''insert into covid_hosp values (
        0, {issue}, 'PA', 20201118, {value}, {so_many_nulls}
      )''')

        with Database.connect() as db:
            with db.new_cursor() as cur:
                # inserting out of order to test server-side order by
                insert_issue(cur, 20201201, 123)
                insert_issue(cur, 20201203, 789)
                insert_issue(cur, 20201202, 456)

        # request without issue (defaulting to latest issue)
        with self.subTest(name='no issue (latest)'):
            response = Epidata.covid_hosp('PA', 20201118)

            self.assertEqual(response['result'], 1)
            self.assertEqual(len(response['epidata']), 1)
            self.assertEqual(response['epidata'][0]['issue'], 20201203)
            self.assertEqual(response['epidata'][0]['hospital_onset_covid'],
                             789)

        # request for specific issue
        with self.subTest(name='specific single issue'):
            response = Epidata.covid_hosp('PA', 20201118, issues=20201201)

            self.assertEqual(response['result'], 1)
            self.assertEqual(len(response['epidata']), 1)
            self.assertEqual(response['epidata'][0]['issue'], 20201201)
            self.assertEqual(response['epidata'][0]['hospital_onset_covid'],
                             123)

        # request for multiple issues
        with self.subTest(name='specific multiple issues'):
            issues = Epidata.range(20201201, 20201231)
            response = Epidata.covid_hosp('PA', 20201118, issues=issues)

            self.assertEqual(response['result'], 1)
            self.assertEqual(len(response['epidata']), 3)
            rows = response['epidata']
            self.assertEqual(rows[0]['issue'], 20201201)
            self.assertEqual(rows[0]['hospital_onset_covid'], 123)
            self.assertEqual(rows[1]['issue'], 20201202)
            self.assertEqual(rows[1]['hospital_onset_covid'], 456)
            self.assertEqual(rows[2]['issue'], 20201203)
            self.assertEqual(rows[2]['hospital_onset_covid'], 789)
Example #21
0
 def _get_unstable(self, region, lag):
     ranges = []
     for s in range(2010, self.test_season):
         ew1 = flu.join_epiweek(s + 0, 40)
         ew2 = flu.join_epiweek(s + 1, 20)
         ranges.append(Epidata.range(ew1, ew2))
     if self.forecast_type == ForecastType.WILI:
         epidata = Forecaster.Utils.decode(
             Epidata.fluview(region, ranges, lag=lag))
         return dict([(row['epiweek'], row['wili']) for row in epidata])
     else:
         epidata = Forecaster.Utils.decode(
             Epidata.flusurv('network_all', ranges, lag=lag))
         return dict([(row['epiweek'], row[region]) for row in epidata])
Example #22
0
 def test_request_method(self, get, post):
     """Test that a GET request is default and POST is used if a 414 is returned."""
     with self.subTest(name='get request'):
         Epidata.covidcast('src', 'sig', 'day', 'county', 20200414, '01234')
         get.assert_called_once()
         post.assert_not_called()
     with self.subTest(name='post request'):
         mock_response = MagicMock()
         mock_response.status_code = 414
         get.return_value = mock_response
         Epidata.covidcast('src', 'sig', 'day', 'county', 20200414, '01234')
         self.assertEqual(get.call_count,
                          2)  # one from post test and one from get test
         post.assert_called_once()
Example #23
0
  def test_round_trip(self):
    """Make a simple round-trip with some sample data."""

    # insert dummy data
    self.cur.execute('''
      INSERT INTO 
        `fluview` (`id`, `release_date`, `issue`, `epiweek`, `region`, 
        `lag`, `num_ili`, `num_patients`, `num_providers`, `wili`, `ili`, 
        `num_age_0`, `num_age_1`, `num_age_2`, `num_age_3`, `num_age_4`, `num_age_5`)
      VALUES
        (0, "2020-04-07", 202021, 202020, "nat", 1, 2, 3, 4, 3.14159, 1.41421,
          10, 11, 12, 13, 14, 15),
        (0, "2020-04-28", 202022, 202022, "hhs1", 5, 6, 7, 8, 1.11111, 2.22222,
          20, 21, 22, 23, 24, 25)
    ''')
    self.cnx.commit()

    # make the request
    response = Epidata.fluview_meta()

    # assert that the right data came back
    self.assertEqual(response, {
      'result': 1,
      'epidata': [{
         'latest_update': '2020-04-28',
         'latest_issue': 202022,
         'table_rows': 2,
       }],
      'message': 'success',
    })
  def test_round_trip(self):
    """Make a simple round-trip with some sample data."""

    # insert dummy data
    self.cur.execute('''
      insert into fluview values
        (0, "2020-04-07", 202021, 202020, "nat", 1, 2, 3, 4, 3.14159, 1.41421,
          10, 11, 12, 13, 14, 15),
        (0, "2020-04-28", 202022, 202022, "hhs1", 5, 6, 7, 8, 1.11111, 2.22222,
          20, 21, 22, 23, 24, 25)
    ''')
    self.cnx.commit()

    # make the request
    response = Epidata.fluview_meta()

    # assert that the right data came back
    self.assertEqual(response, {
      'result': 1,
      'epidata': [{
         'latest_update': '2020-04-28',
         'latest_issue': 202022,
         'table_rows': 2,
       }],
      'message': 'success',
    })
Example #25
0
  def test_covidcast_meta(self):
    """Test that the covidcast_meta endpoint returns expected data."""

    # insert dummy data
    self.cur.execute('''
      insert into covidcast values
        (0, 'src', 'sig', 'day', 'county', 20200414, '01234',
          123, 1.5, 2.5, 3.5, 456, 4)
    ''')
    self.cnx.commit()

    # fetch data
    response = Epidata.covidcast_meta()

    # check result
    self.assertEqual(response, {
      'result': 1,
      'epidata': [{
        'data_source': 'src',
        'signal': 'sig',
        'time_type': 'day',
        'geo_type': 'county',
        'min_time': 20200414,
        'max_time': 20200414,
        'num_locations': 1,
        'min_value': 1.5,
        'max_value': 1.5,
        'mean_value': 1.5,
        'stdev_value': 0,
        'last_update': 123,
       }],
      'message': 'success',
    })
Example #26
0
  def test_covidcast(self):
    """Test that the covidcast endpoint returns expected data."""

    # insert dummy data
    self.cur.execute('''
      insert into covidcast values
        (0, 'src', 'sig', 'day', 'county', 20200414, '01234',
          123, 1.5, 2.5, 3.5, 456, 4)
    ''')
    self.cnx.commit()

    # fetch data
    response = Epidata.covidcast(
        'src', 'sig', 'day', 'county', 20200414, '01234')

    # check result
    self.assertEqual(response, {
      'result': 1,
      'epidata': [{
        'time_value': 20200414,
        'geo_value': '01234',
        'value': 1.5,
        'stderr': 2.5,
        'sample_size': 3.5,
        'direction': 4,
       }],
      'message': 'success',
    })
Example #27
0
    def _get_stable(self, region):
        ranges = []
        for s in range(2003, self.test_season):
            if s == 2009:
                continue
            ew1 = flu.join_epiweek(s, 40)
            ew2 = flu.add_epiweeks(ew1, 37)
            ranges.append(Epidata.range(ew1, ew2))

        if self.forecast_type == ForecastType.WILI:
            epidata = Forecaster.Utils.decode(Epidata.fluview(region, ranges))
            return dict([(row['epiweek'], row['wili']) for row in epidata])
        else:
            epidata = Forecaster.Utils.decode(
                Epidata.flusurv('network_all', ranges))
            return dict([(row['epiweek'], row[region]) for row in epidata])
Example #28
0
 def _forecast(self, region, epiweek):
   # season setup and sanity check
   ew1 = flu.join_epiweek(self.test_season, 40)
   ew2 = flu.join_epiweek(self.test_season + 1, 20)
   if not ew1 <= epiweek <= ew2:
     raise Exception('`epiweek` outside of `test_season`')
   # get past values (left half) from the Epidata API
   epidata = Forecaster.Utils.decode(Epidata.fluview(region, Epidata.range(ew1, epiweek), issues=epiweek))
   pinned = [row['wili'] for row in epidata]
   if len(pinned) != flu.delta_epiweeks(ew1, epiweek) + 1:
     raise Exception('missing ILINet data')
   # get the user submissions (right half) from the database
   submissions = self.fetch_submissions(region, epiweek)
   self._num_users = len(submissions)
   print(' [EC] %d users found for %s on %d' % (len(submissions), region, epiweek))
   # concatenate observed data and user submissions
   return [pinned + sub for sub in submissions]
Example #29
0
 def fetch(weeks):
   # It appears that log-transformed counts provide a much better fit.
   res = Epidata.cdc(secrets.api.cdc, weeks, location)
   if 'epidata' in res:
     for row in res['epidata']:
       for col in fields:
         row[col] = np.log(1. + row[col])
   return res
Example #30
0
  def test_acquire_dataset(self):
    """Acquire a new dataset."""

    # make sure the data does not yet exist
    with self.subTest(name='no data yet'):
      response = Epidata.covid_hosp('MA', Epidata.range(20200101, 20210101))
      self.assertEqual(response['result'], -2, response)

    # acquire sample data into local database
    # mock out network calls to external hosts
    with self.subTest(name='first acquisition'), \
         patch.object(Network, 'fetch_metadata', return_value=self.test_utils.load_sample_metadata()) as mock_fetch_meta, \
         patch.object(Network, 'fetch_dataset', side_effect=[self.test_utils.load_sample_dataset("dataset0.csv"), # dataset for 3/13
                                                             self.test_utils.load_sample_dataset("dataset0.csv"), # first dataset for 3/15
                                                             self.test_utils.load_sample_dataset()] # second dataset for 3/15
                      ) as mock_fetch:
      acquired = Update.run()
      self.assertTrue(acquired)
      self.assertEqual(mock_fetch_meta.call_count, 1)

    # make sure the data now exists
    with self.subTest(name='initial data checks'):
      response = Epidata.covid_hosp('WY', Epidata.range(20200101, 20210101))
      self.assertEqual(response['result'], 1)
      self.assertEqual(len(response['epidata']), 1)
      row = response['epidata'][0]
      self.assertEqual(row['state'], 'WY')
      self.assertEqual(row['date'], 20201209)
      self.assertEqual(row['issue'], 20210315)
      self.assertEqual(row['critical_staffing_shortage_today_yes'], 8)
      self.assertEqual(row['total_patients_hospitalized_confirmed_influenza_covid_coverage'], 56)
      actual = row['inpatient_bed_covid_utilization']
      expected = 0.11729857819905214
      self.assertAlmostEqual(actual, expected)
      self.assertIsNone(row['critical_staffing_shortage_today_no'])

      # expect 61 fields per row (63 database columns, except `id` and `record_type`)
      self.assertEqual(len(row), 118)

    with self.subTest(name='all date batches acquired'):
      response = Epidata.covid_hosp('WY', Epidata.range(20200101, 20210101), issues=20210313)
      self.assertEqual(response['result'], 1)

    # re-acquisition of the same dataset should be a no-op
    with self.subTest(name='second acquisition'), \
         patch.object(Network, 'fetch_metadata', return_value=self.test_utils.load_sample_metadata()) as mock_fetch_meta, \
         patch.object(Network, 'fetch_dataset', return_value=self.test_utils.load_sample_dataset()) as mock_fetch:
      acquired = Update.run()
      self.assertFalse(acquired)

    # make sure the data still exists
    with self.subTest(name='final data checks'):
      response = Epidata.covid_hosp('WY', Epidata.range(20200101, 20210101))
      self.assertEqual(response['result'], 1)
      self.assertEqual(len(response['epidata']), 1)