def _async_fetch_epidata( data_source: str, signal: str, # pylint: disable=W0621 start_day: date, end_day: date, geo_type: str, geo_value: Union[str, Iterable[str]], as_of: date, issues: Union[date, tuple, list], lag: int, time_type: str = "day") -> Union[pd.DataFrame, None]: """Fetch data from Epidata API asynchronously. signal() wraps this to support fetching data over a range of dates and stacks the resulting data frames. If no data is found, return None, so signal() can easily filter out these entries. """ dfs = [] params = [] date_range = pd.date_range(start_day, end_day, freq="D" if time_type == "day" else "W") for day in date_range: day_param = { "source": "covidcast", "data_source": data_source, "signals": signal, "time_type": "day", "geo_type": geo_type, "geo_value": geo_value, "time_values": _date_to_api_string(day, time_type), } if as_of: day_param["as_of"] = _date_to_api_string(as_of, time_type) if issues: day_param["issues"] = _dates_to_api_strings(issues, time_type) if lag: day_param["lag"] = lag params.append(day_param) output = Epidata.async_epidata(params, batch_size=100) for day_data, params in output: if day_data["message"] == "no results": warnings.warn( f"No {data_source} {signal} data found on {params['time_values']} " f"for geography '{geo_type}'", NoDataWarning) if day_data["message"] not in {"success", "no results"}: warnings.warn( f"Problem obtaining {data_source} {signal} " f"data on {params['time_values']} " f"for geography '{geo_type}': {day_data['message']}", RuntimeWarning) if day_data.get("epidata"): dfs.append(pd.DataFrame.from_dict(day_data["epidata"])) return dfs
def get_indicator_data(sensors: List[SensorConfig], locations: List[LocationSeries], as_of: date) -> Dict[Tuple, LocationSeries]: """ Given a list of sensors and locations, asynchronously gets covidcast data for all combinations. Parameters ---------- sensors list of SensorConfigs for sensors to retrieve. locations list of LocationSeries, one for each location desired. This is only used for the list of locations; none of the dates or values are used. as_of Date that the data should be retrieved as of. Returns ------- Dictionary of {(source, signal, geo_type, geo_value): LocationSeries} containing indicator data, """ # gets all available data up to as_of day for now, could be optimized to only get a window output = {} all_combos = product(sensors, locations) as_of_str = as_of.strftime("%Y%m%d") all_params = [{ "source": "covidcast", "data_source": sensor.source, "signals": sensor.signal, "time_type": "day", "geo_type": location.geo_type, "geo_value": location.geo_value, "time_values": f"{EPIDATA_START_DATE}-{as_of_str}", "as_of": as_of_str } for sensor, location in all_combos] responses = Epidata.async_epidata(all_params) for response, params in responses: # -2 = no results, 1 = success. Truncated data or server errors may lead to this Exception. if response["result"] not in (-2, 1): raise Exception(f"Bad result from Epidata: {response['message']}") data = LocationSeries(geo_value=params["geo_value"], geo_type=params["geo_type"], data={ datetime.strptime(str(i["time_value"]), "%Y%m%d").date(): i["value"] for i in response.get("epidata", []) if not isnan(i["value"]) }) if data.data: output[(params["data_source"], params["signals"], params["geo_type"], params["geo_value"])] = data return output