def get_unstable_wILI(region, ew1, ew2): weeks = Epidata.range(ew1, ew2) epidata = AF_Utils._get(Epidata.fluview(region, weeks, issues=ew2)) data = [row['wili'] for row in epidata] if len(data) != flu.delta_epiweeks(ew1, ew2) + 1: raise Exception('missing data') return data
def get_wiki(ew1, ew2): # get the raw wiki data, broken down by epiweek, article, and hour epiweeks = Epidata.range(ew1, ew2) result = {} data = api_fetch(Epidata.wiki(ARTICLES, epiweeks=epiweeks, hours=HOURS)) # index the data for fast access for row in data: epiweek, article = row['epiweek'], row['article'] if epiweek not in result: result[epiweek] = {} if article not in result[epiweek]: result[epiweek][article] = {'c': [], 't': []} result[epiweek][article]['c'].append(row['count']) result[epiweek][article]['t'].append(row['total']) # group by epiweek and article (combining hours) data = [] for epiweek in sorted(list(result.keys())): row = [] for article in sorted(ARTICLES): count, total = result[epiweek][article]['c'], result[epiweek][ article]['t'] if len(count) != len(HOURS) or len(total) != len(HOURS): raise Exception('wiki is missing hours') row.append(1e6 * sum(count) / sum(total)) data.append(row) # return a list of weekly data return data
def pull_data_iteratively(states: set, dates: dict) -> list: """ Pull Epidata API for a set of states and dates. To avoid Epidata API row limits, does not grab all values at once. Instead, it loops through each state and pulls all data for 10 hospitals at a time. Parameters ---------- states: set Set of state codes (2 letter lowercase abbreviation) to get data for. dates: dict Dict of 'from' and 'to' dates output by Epidata.range(). Returns ------- List of dictionaries. Concatenation of all the response['epidata'] lists. """ responses = [] for state in states: lookup_response = Epidata.covid_hosp_facility_lookup(state) state_hospital_ids = [ i["hospital_pk"] for i in lookup_response.get("epidata", []) ] for i in range(0, len(state_hospital_ids), 50): response = Epidata.covid_hosp_facility( state_hospital_ids[i:i + 50], dates) if response["result"] == 2: raise Exception( f"Bad result from Epidata: {response['message']}") responses += response.get("epidata", []) if len(responses) == 0: raise Exception("No results found.") return responses
def generate_date_ranges(start, end): """ Take a start and end date and convert to list of 30 day Epidata ranges. The final tuple may only be a few days depending of the modulo of the range and 30. The ranges should partition the entire range, inclusive of both endpoints, and do not overlap, i.e. they will be of the form (start, start+30), (start+31, start+61), (start+62, start+92), ... Parameters ---------- start: date datetime.date object for first day. end: datetime datetime.date object for last day. Returns ------- Ordered list of dictionaries generated by Epidata.range specifying the partitioning intervals. """ curr_end = start + timedelta(30) output = [] while curr_end < end: output.append( Epidata.range(_date_to_int(start), _date_to_int(curr_end))) start += timedelta(31) curr_end = start + timedelta(30) output.append(Epidata.range(_date_to_int(start), _date_to_int(end))) return output
def get_season(season, location): #end = (season + 1) * 100 + 29 #epiweeks = Epidata.range(flu.add_epiweeks(end, -51), end) begin = season * 100 + 30 epiweeks = Epidata.range(begin, flu.add_epiweeks(begin, 51)) rows = AF_Utils._get(Epidata.ilinet(location, epiweeks)) return [row['wili'] for row in rows]
def EpiCallForData(year, week, region, lag): if region == 'nat': fluData = Epidata.fluview(regions=['nat'], epiweeks=['{:04d}{:02d}'.format(year, week)], lag=lag) else: region = int(region) fluData = Epidata.fluview(regions=['HHS{:d}'.format(region)], epiweeks=['{:04d}{:02d}'.format(year, week)], lag=lag) return fluData
def get_historical_sensor_data(sensor: SensorConfig, geo_value: str, geo_type: str, start_date: date, end_date: date) -> Tuple[LocationSeries, list]: """ Query Epidata API for historical sensorization data. Will only return values if they are not null. If any days are null or are not available, they will be listed as missing. Parameters ---------- sensor SensorConfig specifying which sensor to retrieve. geo_type Geo type to retrieve. geo_value Geo value to retrieve. start_date First day to retrieve (inclusive). end_date Last day to retrieve (inclusive). Returns ------- Tuple of (LocationSeries containing non-na data, list of dates without valid data). If no data was found, an empty LocationSeries is returned. """ response = Epidata.covidcast_nowcast(data_source=sensor.source, signals=sensor.signal, time_type="day", geo_type=geo_type, time_values=Epidata.range( start_date.strftime("%Y%m%d"), end_date.strftime("%Y%m%d")), geo_value=geo_value, sensor_names=sensor.name, lag=sensor.lag) all_dates = [i.date() for i in date_range(start_date, end_date)] if response["result"] == 1: output = LocationSeries(geo_value=geo_value, geo_type=geo_type, data={ datetime.strptime(str(i["time_value"]), "%Y%m%d").date(): i["value"] for i in response.get("epidata", []) if not isnan(i["value"]) }) missing_dates = [i for i in all_dates if i not in output.dates] return output, missing_dates if response["result"] == -2: # no results print("No historical results found") output = LocationSeries(geo_value=geo_value, geo_type=geo_type) return output, all_dates raise Exception(f"Bad result from Epidata: {response['message']}")
def get_fluview_data(states, start, end): """ return a dictionary of dataframe with the different epiweeks """ ilinet_raw = {} for state in states: print("State {}".format(state)) res = Epidata.fluview( regions=state, #source epiweeks=[Epidata.range(start, end)]) #range 2009 to 2016 if res['result'] == 1: print(res['result'], res['message'], len(res['epidata'])) data = pd.DataFrame(res['epidata']) ilinet_raw[state] = data else: print("(-2, u'no success')") return ilinet_raw
def _async_fetch_epidata( data_source: str, signal: str, # pylint: disable=W0621 start_day: date, end_day: date, geo_type: str, geo_value: Union[str, Iterable[str]], as_of: date, issues: Union[date, tuple, list], lag: int, time_type: str = "day") -> Union[pd.DataFrame, None]: """Fetch data from Epidata API asynchronously. signal() wraps this to support fetching data over a range of dates and stacks the resulting data frames. If no data is found, return None, so signal() can easily filter out these entries. """ dfs = [] params = [] date_range = pd.date_range(start_day, end_day, freq="D" if time_type == "day" else "W") for day in date_range: day_param = { "source": "covidcast", "data_source": data_source, "signals": signal, "time_type": "day", "geo_type": geo_type, "geo_value": geo_value, "time_values": _date_to_api_string(day, time_type), } if as_of: day_param["as_of"] = _date_to_api_string(as_of, time_type) if issues: day_param["issues"] = _dates_to_api_strings(issues, time_type) if lag: day_param["lag"] = lag params.append(day_param) output = Epidata.async_epidata(params, batch_size=100) for day_data, params in output: if day_data["message"] == "no results": warnings.warn( f"No {data_source} {signal} data found on {params['time_values']} " f"for geography '{geo_type}'", NoDataWarning) if day_data["message"] not in {"success", "no results"}: warnings.warn( f"Problem obtaining {data_source} {signal} " f"data on {params['time_values']} " f"for geography '{geo_type}': {day_data['message']}", RuntimeWarning) if day_data.get("epidata"): dfs.append(pd.DataFrame.from_dict(day_data["epidata"])) return dfs
def _fetch_epidata( data_source: str, signal: str, # pylint: disable=W0621 start_day: date, end_day: date, geo_type: str, geo_value: Union[str, Iterable[str]], as_of: date, issues: Union[date, tuple, list], lag: int, time_type: str = "day") -> Union[pd.DataFrame, None]: """Fetch data from Epidata API. signal() wraps this to support fetching data over a range of dates and stacks the resulting data frames. If no data is found, return None, so signal() can easily filter out these entries. """ as_of_str = _date_to_api_string(as_of, time_type) if as_of is not None else None issues_strs = _dates_to_api_strings( issues, time_type) if issues is not None else None cur_day = start_day dfs = [] while cur_day <= end_day: day_str = _date_to_api_string(cur_day, time_type) day_data = Epidata.covidcast(data_source, signal, time_type=time_type, geo_type=geo_type, time_values=day_str, geo_value=geo_value, as_of=as_of_str, issues=issues_strs, lag=lag) # Two possible error conditions: no data or too much data. if day_data["message"] == "no results": warnings.warn( f"No {data_source} {signal} data found on {day_str} " f"for geography '{geo_type}'", NoDataWarning) if day_data["message"] not in {"success", "no results"}: warnings.warn( f"Problem obtaining {data_source} {signal} data on {day_str} " f"for geography '{geo_type}': {day_data['message']}", RuntimeWarning) # In the too-much-data case, we continue to try putting the truncated # data in our results. In the no-data case, skip this day entirely, # since there is no "epidata" in the response. if day_data.get("epidata"): dfs.append(pd.DataFrame.from_dict(day_data["epidata"])) cur_day += timedelta(1) if time_type == "day" else timedelta(7) return dfs
def get_ili(location, issue, ew1, ew2): result = {} epiweeks = Epidata.range(ew1, ew2) num_weeks = flu.delta_epiweeks(ew1, ew2) + 1 # try to get unstable, but gracefully fall back to stable if issue is not None: res = Epidata.fluview(location, epiweeks, issues=issue) if res['result'] == 1: for row in res['epidata']: result[row['epiweek']] = row['wili'] # check to see if another API call is needed if issue is None or res['result'] != 1 or len(res['epidata']) < num_weeks: # get stable data data = api_fetch(Epidata.fluview(location, epiweeks)) for row in data: epiweek = row['epiweek'] if epiweek not in result: result[epiweek] = row['wili'] # return a list of weekly data return [[result[ew]] for ew in sorted(list(result.keys()))]
def get_influenza_counts_df(): """Load influenza counts from the CMU Delphi API, return a pandas dataframe""" # Retrieves current date, formats it "YYYY-mm-dd", and converts it to epiweek today_obj = datetime.today() today_str = today_obj.strftime("%Y-%m-%d") epiweek = DataLoader.get_approx_epiweek_from_date(today_str) # Retrieves national fluview data for each "epiweek" from 2020: results = Epidata.fluview(["nat"], [Epidata.range(202001, epiweek)]) results_df = pd.DataFrame.from_records( results["epidata"]).sort_values(by=["epiweek"]) results_df = results_df[[ "epiweek", "lag", "num_ili", "num_patients", "num_providers", "wili", "ili" ]] # Convert epiweeks to approximate real date for graphing results_df["date"] = results_df["epiweek"].apply( DataLoader.get_approx_date_from_epiweek) return results_df
def load_us(states, latest=False): us_covid19_cases_path = os.path.join(config.base_data_dir, config.us_covid19_cases) #df_us = pd.read_csv(us_covid19_cases_path) import sys sys.path.append('src/') from delphi_epidata import Epidata start_date = 20200401 from datetime import datetime stop_date = int(datetime.today().strftime('%Y%m%d')) for target_state in states: print(f'Processing data for state: {target_state} ' + ' *' * 10) print('Start date = ', start_date, ' End date = ', stop_date) res_incidence = Epidata.covidcast('jhu-csse', 'confirmed_7dav_incidence_num', 'day', 'state', \ [start_date, Epidata.range(start_date, stop_date)], target_state) res_death = Epidata.covidcast('jhu-csse', 'deaths_7dav_incidence_num', 'day', 'state', \ [start_date, Epidata.range(start_date, stop_date)], target_state) df_state = pd.DataFrame(columns=['Confirmed', 'Deceased', 'Recovered']) if len(res_incidence) > 0 and len(res_death) > 0: df_jhu_7day = pd.DataFrame(res_incidence['epidata']) df_jhu_7day_deaths = pd.DataFrame(res_death['epidata']) df_state['Date'] = pd.to_datetime(df_jhu_7day['time_value'], format='%Y%m%d') df_state['Confirmed'] = df_jhu_7day['value'] df_state['Deceased'] = df_jhu_7day_deaths['value'] df_state['Recovered'].fillna(value=0, inplace=True) # ensures sorting with respect to date df_state.index = pd.to_datetime(df_state.Date) df_state[['Total_Confirmed', 'Total_Deceased', 'Total_Recovered']] \ = df_state[['Confirmed', 'Deceased', 'Recovered']].cumsum(axis=0, skipna=True) df_state.to_csv(os.path.join(config.base_data_dir, f'Cases_USA_{target_state}.csv'), index=False) else: print(' *** Error: Can not import data from Delphi database. Check src/state_data_loader.py') exit()
def get_indicator_data(sensors: List[SensorConfig], locations: List[LocationSeries], as_of: date) -> Dict[Tuple, LocationSeries]: """ Given a list of sensors and locations, asynchronously gets covidcast data for all combinations. Parameters ---------- sensors list of SensorConfigs for sensors to retrieve. locations list of LocationSeries, one for each location desired. This is only used for the list of locations; none of the dates or values are used. as_of Date that the data should be retrieved as of. Returns ------- Dictionary of {(source, signal, geo_type, geo_value): LocationSeries} containing indicator data, """ # gets all available data up to as_of day for now, could be optimized to only get a window output = {} all_combos = product(sensors, locations) as_of_str = as_of.strftime("%Y%m%d") all_params = [{ "source": "covidcast", "data_source": sensor.source, "signals": sensor.signal, "time_type": "day", "geo_type": location.geo_type, "geo_value": location.geo_value, "time_values": f"{EPIDATA_START_DATE}-{as_of_str}", "as_of": as_of_str } for sensor, location in all_combos] responses = Epidata.async_epidata(all_params) for response, params in responses: # -2 = no results, 1 = success. Truncated data or server errors may lead to this Exception. if response["result"] not in (-2, 1): raise Exception(f"Bad result from Epidata: {response['message']}") data = LocationSeries(geo_value=params["geo_value"], geo_type=params["geo_type"], data={ datetime.strptime(str(i["time_value"]), "%Y%m%d").date(): i["value"] for i in response.get("epidata", []) if not isnan(i["value"]) }) if data.data: output[(params["data_source"], params["signals"], params["geo_type"], params["geo_value"])] = data return output
def run_module(params): """ Generate ground truth HHS hospitalization data. Parameters ---------- params Dictionary containing indicator configuration. Expected to have the following structure: - "common": - "export_dir": str, directory to write output - "log_filename" (optional): str, name of file to write logs """ start_time = time.time() logger = get_structured_logger( __name__, filename=params["common"].get("log_filename"), log_exceptions=params["common"].get("log_exceptions", True)) mapper = GeoMapper() request_all_states = ",".join(mapper.get_geo_values("state_id")) today = date.today() past_reference_day = date(year=2020, month=1, day=1) # first available date in DB date_range = generate_date_ranges(past_reference_day, today) dfs = [] for r in date_range: response = Epidata.covid_hosp(request_all_states, r) # The last date range might only have recent days that don't have any data, so don't error. if response["result"] != 1 and r != date_range[-1]: raise Exception(f"Bad result from Epidata: {response['message']}") if response["result"] == -2 and r == date_range[ -1]: # -2 code means no results continue dfs.append(pd.DataFrame(response['epidata'])) all_columns = pd.concat(dfs) geo_mapper = GeoMapper() for sig in SIGNALS: state = geo_mapper.add_geocode(make_signal(all_columns, sig), "state_id", "state_code", from_col="state") for geo in GEOS: create_export_csv(make_geo(state, geo, geo_mapper), params["common"]["export_dir"], geo, sig) elapsed_time_in_seconds = round(time.time() - start_time, 2) logger.info("Completed indicator run", elapsed_time_in_seconds=elapsed_time_in_seconds)
def grabDataFromEpicast(self): if self.region=='': self.fludata = Epidata.fluview(self.state, [Epidata.range(201040,self.todaysEW)]) elif self.state=='': self.fludata = Epidata.fluview(self.region, [Epidata.range(201040,self.todaysEW)]) else: self.fludata = Epidata.fluview(self.region+self.state, [Epidata.range(201040,self.todaysEW)]) self.fludata_message = self.fludata['message'] self.fludata_data = self.fludata['epidata']
def pull_data() -> pd.DataFrame: """ Pull HHS data from Epidata API for all states and dates and convert to a DataFrame. Returns ------- DataFrame of HHS data. """ today = int(date.today().strftime("%Y%m%d")) past_reference_day = int(date( 2020, 1, 1).strftime("%Y%m%d")) # first available date in DB all_states = GeoMapper().get_geo_values("state_id") responses = pull_data_iteratively(all_states, Epidata.range(past_reference_day, today)) all_columns = pd.DataFrame(responses).replace(NAN_VALUES, np.nan) all_columns["timestamp"] = pd.to_datetime(all_columns["collection_week"], format="%Y%m%d") return all_columns
def run_module(): """Generate ground truth HHS hospitalization data.""" params = read_params() mapper = GeoMapper() request_all_states = ",".join(mapper.get_geo_values("state_id")) today = date.today() past_reference_day = date(year=2020, month=1, day=1) # first available date in DB date_range = generate_date_ranges(past_reference_day, today) dfs = [] for r in date_range: response = Epidata.covid_hosp(request_all_states, r) if response['result'] != 1: raise Exception(f"Bad result from Epidata: {response['message']}") dfs.append(pd.DataFrame(response['epidata'])) all_columns = pd.concat(dfs) for sig in SIGNALS: create_export_csv(make_signal(all_columns, sig), params["export_dir"], "state", sig)
,'quidel' :['smoothed_pct_negative','smoothed_tests_per_device']} return d if __name__ == "__main__": todaysEW = fromToday2EpiWeek() todayYMD = todayYMD() variables = ['geo_value','time_value','value','stderr','sample_size'] fromDataSource2Signal = fromDataSource2Signal() fips2name = listPACounties() for datasource in ['fb-survey','ght','doctor-visits','google-survey','quidel']: for signal in fromDataSource2Signal[datasource]: dataSet = DS(variables,datasource,signal) for county in fips2name: sys.stdout.write('\r{:s}--{:s}--{:06d}\r'.format(datasource,signal,county)) sys.stdout.flush() dataFromAPI = Epidata.covidcast(datasource,signal,'day','county',Epidata.range(20200101,todayYMD),county) if dataFromAPI["message"] == "no results": continue if dataFromAPI['message'] == "success": for data in dataFromAPI['epidata']: dataSet.appendData(data) if dataSet.has_data(): dataSet.convert2pandasDF().exportDF()
def _signal(name, region, epiweek): rows = AF_Utils._get( Epidata.signals(secrets.api.signals, name, region, epiweek)) if len(rows) != 1: raise Exception('expected one signal row') return rows[0]['value']
cur = cnx.cursor(buffered=True) # Get ground truth history = {} regions = [ "nat", "hhs1", "hhs2", "hhs3", "hhs4", "hhs5", "hhs6", "hhs7", "hhs8", "hhs9", "hhs10", "ga", "pa", "dc", "tx", "or" ] # for 2017-18 season, 201744 is the first ground truth data we get after the competition starts (i.e., users forecasted for it in 201743) ############################################################# season_start, season_end = 201744, 201820 for r in range(1, len(regions) + 1): history[r] = {} rows = Epidata.check( Epidata.fluview(regions[r - 1], Epidata.range(season_start, season_end))) truth = [(row['epiweek'], row['wili']) for row in rows] availableWeeks = [row[0] for row in truth] for row in truth: (epiweek, wili) = row history[r][epiweek] = wili print(regions[r - 1], epiweek, wili) epiweek = availableWeeks[-1] print("epiweek", epiweek) if (epiweek == 201801): forecast_made = 201752 else: forecast_made = epiweek - 1 # debug print print("availableWeeks", availableWeeks)
Collect actual wili data using the delphi API """ from delphi_epidata import Epidata from datetime import datetime import pandas as pd import pymmwr BASELINE_URL = "https://raw.githubusercontent.com/cdcepi/FluSight-forecasts/master/wILI_Baseline.csv" current_epiweek = pymmwr.date_to_mmwr_week() # Range of epiweeks to gather data for epiweek_start = 199710 epiweek_end = int(str(current_epiweek["year"]) + str(current_epiweek["week"]).zfill(2)) epiweek_range = Epidata.range(epiweek_start, epiweek_end) regions = ["nat", *["hhs" + str(i) for i in range(1, 11)]] # NOTE Lag value # A lag of 0 means that the data for each week collected will be # as observed at that point in time. # Pass None as lag will let us collect the most recent data # available df = { "epiweek": [], "region": [], "wili": [] }
def get_twitter(location, ew1, ew2): epiweeks = Epidata.range(ew1, ew2) data = api_fetch( Epidata.twitter(secrets.api.twitter, location, epiweeks=epiweeks)) return [[row['percent']] for row in data]
def get_ght(ew1, ew2): epiweeks = Epidata.range(ew1, ew2) data = api_fetch(Epidata.ght(secrets.api.ght, 'US', epiweeks, '/m/0cycc')) return [[row['value']] for row in data]
def get_gft(location, ew1, ew2): epiweeks = Epidata.range(ew1, ew2) data = api_fetch(Epidata.gft(location, epiweeks)) return [[1e-3 * row['num']] for row in data]
def EpiCallForLag(year, week, region): if region == 'nat': return Epidata.fluview(['nat'], ['{:04d}{:02d}'.format(year, week)]) region = int(region) return Epidata.fluview(['HHS{:d}'.format(region)], ['{:04d}{:02d}'.format(year, week)])
def _fetch_single_geo( data_source: str, signal: str, # pylint: disable=W0621 start_day: date, end_day: date, geo_type: str, geo_value: str, as_of: date, issues: Union[date, tuple, list], lag: int) -> Union[pd.DataFrame, None]: """Fetch data for a single geo. signal() wraps this to support fetching data over an iterable of geographies, and stacks the resulting data frames. If no data is found, return None, so signal() can easily filter out these entries. """ as_of_str = _date_to_api_string(as_of) if as_of is not None else None issues_strs = _dates_to_api_strings(issues) if issues is not None else None cur_day = start_day dfs = [] while cur_day <= end_day: day_str = _date_to_api_string(cur_day) day_data = Epidata.covidcast(data_source, signal, time_type="day", geo_type=geo_type, time_values=day_str, geo_value=geo_value, as_of=as_of_str, issues=issues_strs, lag=lag) # Two possible error conditions: no data or too much data. if day_data["message"] == "no results": warnings.warn( f"No {data_source} {signal} data found on {day_str} " f"for geography '{geo_type}'", NoDataWarning) if day_data["message"] not in {"success", "no results"}: warnings.warn( f"Problem obtaining {data_source} {signal} data on {day_str} " f"for geography '{geo_type}': {day_data['message']}", RuntimeWarning) # In the too-much-data case, we continue to try putting the truncated # data in our results. In the no-data case, skip this day entirely, # since there is no "epidata" in the response. if "epidata" in day_data: dfs.append(pd.DataFrame.from_dict(day_data["epidata"])) cur_day += timedelta(1) if len(dfs) > 0: out = pd.concat(dfs) out.drop("direction", axis=1, inplace=True) out["time_value"] = pd.to_datetime(out["time_value"], format="%Y%m%d") out["issue"] = pd.to_datetime(out["issue"], format="%Y%m%d") out["geo_type"] = geo_type out["data_source"] = data_source out["signal"] = signal return out return None
def metadata() -> pd.DataFrame: """Fetch COVIDcast surveillance stream metadata. Obtains a data frame of metadata describing all publicly available data streams from the COVIDcast API. See the `data source and signals documentation <https://cmu-delphi.github.io/delphi-epidata/api/covidcast_signals.html>`_ for descriptions of the available sources. :returns: A data frame containing one row per available signal, with the following columns: ``data_source`` Data source name. ``signal`` Signal name. ``time_type`` Temporal resolution at which this signal is reported. "day", for example, means the signal is reported daily. ``geo_type`` Geographic level for which this signal is available, such as county, state, msa, hss, hrr, or nation. Most signals are available at multiple geographic levels and will hence be listed in multiple rows with their own metadata. ``min_time`` First day for which this signal is available. For weekly signals, will be the first day of the epiweek. ``max_time`` Most recent day for which this signal is available. For weekly signals, will be the first day of the epiweek. ``num_locations`` Number of distinct geographic locations available for this signal. For example, if `geo_type` is county, the number of counties for which this signal has ever been reported. ``min_value`` The smallest value that has ever been reported. ``max_value`` The largest value that has ever been reported. ``mean_value`` The arithmetic mean of all reported values. ``stdev_value`` The sample standard deviation of all reported values. ``last_update`` The UTC datetime for when the signal value was last updated. ``max_issue`` Most recent date data was issued. ``min_lag`` Smallest lag from observation to issue, in days. ``max_lag`` Largest lag from observation to issue, in days. """ meta = Epidata.covidcast_meta() if meta["result"] != 1: # Something failed in the API and we did not get real metadata raise RuntimeError("Error when fetching metadata from the API", meta["message"]) meta_df = pd.DataFrame.from_dict(meta["epidata"]) meta_df["min_time"] = meta_df.apply(lambda x: _parse_datetimes(x.min_time, x.time_type), axis=1) meta_df["max_time"] = meta_df.apply(lambda x: _parse_datetimes(x.max_time, x.time_type), axis=1) meta_df["last_update"] = pd.to_datetime(meta_df["last_update"], unit="s") return meta_df
return obs unique_EWLagPairs= unique_EWLagPairs.apply(addLag,1) return d.merge( unique_EWLagPairs, on = ['EW','lag']) def timeStamp(): return datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S") if __name__ == "__main__": firstWeekOfSeason = datetime.datetime.strptime('2019-10-01',"%Y-%m-%d") epiWeeks = computeEpiWeeksWithData(firstWeekOfSeason) regions = createAllRegions() mostRecentEpiData = {'EW':[],'region':[],'wili':[],'lag':[],'releaseDate':[],'releaseEW':[]} for lag in np.arange(40,-1,-1): fluData = Epidata.fluview(regions = regions ,epiweeks = epiWeeks,lag=lag) if fluData['message'] != 'success': print('could not download data-lag={:d}'.format(lag)) continue print('Downloading data-lag={:d}'.format(lag)) for data in fluData['epidata']: mostRecentEpiData['EW'].append(data['epiweek']) mostRecentEpiData['region'].append(data['region']) mostRecentEpiData['wili'].append(data['wili']) mostRecentEpiData['lag'].append(lag) mostRecentEpiData['releaseDate'].append(data['release_date']) releasedateDT = datetime.datetime.strptime(data['release_date'],"%Y-%m-%d") mostRecentEpiData['releaseEW'].append( fromDateTime2EW(releasedateDT )) mostRecentEpiData = pd.DataFrame(mostRecentEpiData)