def mmwrid_to_epiweek(mmwrid): """Convert a CDC week index into an epiweek.""" # Add the difference in IDs, which are sequential, to a reference epiweek, # which is 2003w40 in this case. epiweek_200340 = EpiDate(2003, 9, 28) mmwrid_200340 = 2179 return epiweek_200340.add_weeks(mmwrid - mmwrid_200340).get_ew()
def get_most_recent_issue(self): """Return the most recent epiweek for which FluView data is available.""" ew2 = EpiDate.today().get_ew() ew1 = add_epiweeks(ew2, -9) response = self.epidata.fluview('nat', self.epidata.range(ew1, ew2)) issues = [row['issue'] for row in self.epidata.check(response)] return max(issues)
def get_kcdc_data(): issue = EpiDate.today().get_ew() last_season = issue // 100 + (1 if issue % 100 > 35 else 0) url = 'http://www.cdc.go.kr/npt/biz/npp/iss/influenzaListAjax.do' params = { 'icdNm': 'influenza', 'startYear': '2004', # Started in 2004 'endYear': str(last_season) } response = requests.post(url, params) datas = response.json() data = datas['data'] ews = [] ilis = [] ew1 = 200436 for year in range(2004, last_season): year_data = data[year - 2004] if year > 2004: ew1 = ews[-1] + 1 ili_yr = year_data["VALUE"].split('`') ili_yr = [float(f) for f in ili_yr if f != ''] ew2 = add_epiweeks(ew1, len(ili_yr)) new_ews = list(range_epiweeks(ew1, ew2)) for i in range(len(new_ews)): j = float(ili_yr[i]) ilis.append(j) ews.append(new_ews[i]) return ews, ilis
def main(): # args and usage parser = argparse.ArgumentParser() parser.add_argument( '--test', action='store_true', help='do dry run only, do not update the database' ) parser.add_argument( '--file', type=str, help='load an existing zip file (otherwise fetch current data)' ) parser.add_argument( '--issue', type=int, help='issue of the file (e.g. 201740); used iff --file is given' ) args = parser.parse_args() if (args.file is None) != (args.issue is None): raise Exception('--file and --issue must both be present or absent') date = datetime.datetime.now().strftime('%Y-%m-%d') print('assuming release date is today, %s' % date) ensure_tables_exist() if args.file: update_from_file(args.issue, date, args.file, test_mode=args.test) else: # Code doesn't always download all files, unreproducible errors # Try a few times and hopefully one will work flag = 0 max_tries = 5 while flag < max_tries: flag = flag + 1 tmp_dir = ''.join(random.choice('0123456789abcdefghijklmnopqrstuvwxyz') for i in range(8)) tmp_dir = 'downloads_' + tmp_dir subprocess.call(["mkdir",tmp_dir]) # Use temporary directory to avoid data from different time # downloaded to same folder download_ecdc_data(download_dir=tmp_dir) issue = EpiDate.today().get_ew() files = glob.glob('%s/*.csv' % tmp_dir) for filename in files: with open(filename,'r') as f: _ = f.readline() db_error = False for filename in files: try: update_from_file(issue, date, filename, test_mode=args.test) subprocess.call(["rm",filename]) except Exception: db_error = True subprocess.call(["rm","-r",tmp_dir]) if not db_error: break # Exit loop with success if flag >= max_tries: print('WARNING: Database `ecdc_ili` did not update successfully')
def get_weeks(self): """Return a list of weeks on which truth and sensors are both available.""" latest_week = EpiDate.today().get_ew() latest_week = add_epiweeks(latest_week, -1) week_range = range_epiweeks(self.FIRST_DATA_EPIWEEK, latest_week, inclusive=True) return list(week_range)
def get_most_recent_issue(self, location): """Return the most recent epiweek for which paho_dengue data is available in given location.""" ew2 = EpiDate.today().get_ew() ew1 = add_epiweeks(ew2, -52) response = self.epidata.paho_dengue(location, self.epidata.range(ew1, ew2)) ews = [row['epiweek'] for row in self.epidata.check(response)] return max(ews)
def _ew2date(ew): # parse the epiweek year, week = flu.split_epiweek(ew) # get the date object (middle of the week; Wednesday) date = EpiDate.from_epiweek(year, week) # go to the first day of the week (Sunday) date = date.add_days(-3) # date as string return str(date)
def get_current_issue(): """Scrape the current issue from the FluSurv main page.""" # fetch data = fetch_json('GetPhase03InitApp?appVersion=Public', None) # extract date = datetime.strptime(data['loaddatetime'], '%b %d, %Y') # convert and return return EpiDate(date.year, date.month, date.day).get_ew()
def season_db_to_epiweek(season_str, db_date_str, first_db_date_of_season_str="1-Aug"): year_strs = season_str.split("-") first_year = int(year_strs[0]) second_year = first_year + 1 # FIXME check/enforce locale first_date_of_season = datetime.datetime.strptime( first_db_date_of_season_str + "-" + str(first_year), "%d-%b-%Y").date() date_using_first_year = datetime.datetime.strptime( db_date_str + "-" + str(first_year), "%d-%b-%Y").date() date_using_second_year = datetime.datetime.strptime( db_date_str + "-" + str(second_year), "%d-%b-%Y").date() date = date_using_first_year if date_using_first_year >= first_date_of_season else date_using_second_year epiweek = EpiDate(date.year, date.month, date.day).get_ew() return epiweek
def main(): # args and usage parser = argparse.ArgumentParser() parser.add_argument('--test', action='store_true', help='do dry run only, do not update the database') args = parser.parse_args() date = datetime.datetime.now().strftime('%Y-%m-%d') print('assuming release date is today, %s' % date) issue = EpiDate.today().get_ew() ensure_tables_exist() ews, ilis = get_kcdc_data() update_from_data(ews, ilis, date, issue, test_mode=args.test)
def extract_epiweek_and_team(filename): """ Extract the submission epiweek (epiweek of most recently published report) and the team name from the file name of a flu contest submission. The return value is a tuple of: 1. the submission epiweek (e.g. 201751) 2. the team name (e.g. "delphi-epicast") """ # this is the naming convention for 2017 flu contest submissions pattern = re.compile('^EW(\\d{2})-(.*)-(\\d{4})-(\\d{2})-(\\d{2}).csv$') match = pattern.match(os.path.basename(filename)) if match is None: # only able to parse this specific naming convention raise Exception() week = int(match.group(1)) team = match.group(2) year = int(match.group(3)) month = int(match.group(4)) day = int(match.group(5)) epiweek = EpiDate(year, month, day).get_ew() # We know the week number, but the year has to be inferred from the # submission date. Since the week of submission is never less than the week # of the most recent report, we can step backwards from the week of # submission until we find the expected week number. Ordinarily, this will # take exactly two steps. For example, data collected on 2017w51 is # reported on 2017w52, and our forecast is submitted on 2018w01; so we # start with 2018w01 and step backwards until find the first week 51, which # is 2017w51. if not 1 <= week <= 53: # prevent an infinite loop raise Exception('invalid week number: %d' % week) while Epiweek.split_epiweek(epiweek)[1] != week: epiweek = Epiweek.add_epiweeks(epiweek, -1) return epiweek, team
#print('Updated Epicast df for %d users.' % future._num_users) forecaster._callback = update_epicast_df print('Generating epicast for', epiweek) forecaster.open() forecast = forecaster.forecast( epiweek) # is this the forecast function in fc_abstract.py? filename = ForecastIO.save_csv(forecast) forecaster.close() print(filename) return filename if __name__ == '__main__': epiweek = EpiDate.today().add_weeks(-1).get_ew() print("epiweek: ", epiweek) print('WARNING: For testing only!') print(' - Using very small number of samples') print(' - Not uploading submissions to database') print(' - Not emailing submissions to CDC') print(' - Assuming last published wILI on %d' % epiweek) print(' - Limited locations') ec_age_groups = [ 'rate_overall', 'rate_age_0', 'rate_age_1', 'rate_age_2', 'rate_age_3', 'rate_age_4' ] sub = Submissions_Hosp(ec_age_groups, 1000) ec = None ec = sub.run_epicast(epiweek, 0.001, 0.001)
def update(issue, location_name, test_mode=False): """Fetch and store the currently avialble weekly FluSurv dataset.""" # fetch data location_code = flusurv.location_codes[location_name] print('fetching data for', location_name, location_code) data = flusurv.get_data(location_code) # metadata epiweeks = sorted(data.keys()) location = location_name release_date = str(EpiDate.today()) # connect to the database u, p = secrets.db.epi cnx = mysql.connector.connect(user=u, password=p, database='epidata') cur = cnx.cursor() rows1 = get_rows(cur) print('rows before: %d' % rows1) # SQL for insert/update sql = ''' INSERT INTO `flusurv` ( `release_date`, `issue`, `epiweek`, `location`, `lag`, `rate_age_0`, `rate_age_1`, `rate_age_2`, `rate_age_3`, `rate_age_4`, `rate_overall`, `rate_age_5`, `rate_age_6`, `rate_age_7` ) VALUES ( %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s ) ON DUPLICATE KEY UPDATE `release_date` = least(`release_date`, %s), `rate_age_0` = coalesce(%s, `rate_age_0`), `rate_age_1` = coalesce(%s, `rate_age_1`), `rate_age_2` = coalesce(%s, `rate_age_2`), `rate_age_3` = coalesce(%s, `rate_age_3`), `rate_age_4` = coalesce(%s, `rate_age_4`), `rate_overall` = coalesce(%s, `rate_overall`), `rate_age_5` = coalesce(%s, `rate_age_5`), `rate_age_6` = coalesce(%s, `rate_age_6`), `rate_age_7` = coalesce(%s, `rate_age_7`) ''' # insert/update each row of data (one per epiweek) for epiweek in epiweeks: lag = delta_epiweeks(epiweek, issue) if lag > 52: # Ignore values older than one year, as (1) they are assumed not to # change, and (2) it would adversely affect database performance if all # values (including duplicates) were stored on each run. continue args_meta = [release_date, issue, epiweek, location, lag] args_insert = data[epiweek] args_update = [release_date] + data[epiweek] cur.execute(sql, tuple(args_meta + args_insert + args_update)) # commit and disconnect rows2 = get_rows(cur) print('rows after: %d (+%d)' % (rows2, rows2 - rows1)) cur.close() if test_mode: print('test mode: not committing database changes') else: cnx.commit() cnx.close()
def get_most_recent_issue(): # search for FluView issues within the last 10 weeks ew2 = EpiDate.today().get_ew() ew1 = flu.add_epiweeks(ew2, -9) rows = Epidata.check(Epidata.fluview('nat', Epidata.range(ew1, ew2))) return max([row['issue'] for row in rows])