Example #1
0
def _async_fetch_epidata(
        data_source: str,
        signal: str,  # pylint: disable=W0621
        start_day: date,
        end_day: date,
        geo_type: str,
        geo_value: Union[str, Iterable[str]],
        as_of: date,
        issues: Union[date, tuple, list],
        lag: int,
        time_type: str = "day") -> Union[pd.DataFrame, None]:
    """Fetch data from Epidata API asynchronously.

    signal() wraps this to support fetching data over a range of dates
    and stacks the resulting data frames.

    If no data is found, return None, so signal() can easily filter out these
    entries.
    """
    dfs = []
    params = []
    date_range = pd.date_range(start_day,
                               end_day,
                               freq="D" if time_type == "day" else "W")
    for day in date_range:
        day_param = {
            "source": "covidcast",
            "data_source": data_source,
            "signals": signal,
            "time_type": "day",
            "geo_type": geo_type,
            "geo_value": geo_value,
            "time_values": _date_to_api_string(day, time_type),
        }
        if as_of:
            day_param["as_of"] = _date_to_api_string(as_of, time_type)
        if issues:
            day_param["issues"] = _dates_to_api_strings(issues, time_type)
        if lag:
            day_param["lag"] = lag
        params.append(day_param)
    output = Epidata.async_epidata(params, batch_size=100)
    for day_data, params in output:
        if day_data["message"] == "no results":
            warnings.warn(
                f"No {data_source} {signal} data found on {params['time_values']} "
                f"for geography '{geo_type}'", NoDataWarning)
        if day_data["message"] not in {"success", "no results"}:
            warnings.warn(
                f"Problem obtaining {data_source} {signal} "
                f"data on {params['time_values']} "
                f"for geography '{geo_type}': {day_data['message']}",
                RuntimeWarning)
        if day_data.get("epidata"):
            dfs.append(pd.DataFrame.from_dict(day_data["epidata"]))
    return dfs
Example #2
0
def get_indicator_data(sensors: List[SensorConfig],
                       locations: List[LocationSeries],
                       as_of: date) -> Dict[Tuple, LocationSeries]:
    """
    Given a list of sensors and locations, asynchronously gets covidcast data for all combinations.

    Parameters
    ----------
    sensors
        list of SensorConfigs for sensors to retrieve.
    locations
        list of LocationSeries, one for each location desired. This is only used for the list of
        locations; none of the dates or values are used.
    as_of
        Date that the data should be retrieved as of.
    Returns
    -------
        Dictionary of {(source, signal, geo_type, geo_value): LocationSeries} containing indicator
        data,
    """
    # gets all available data up to as_of day for now, could be optimized to only get a window
    output = {}
    all_combos = product(sensors, locations)
    as_of_str = as_of.strftime("%Y%m%d")
    all_params = [{
        "source": "covidcast",
        "data_source": sensor.source,
        "signals": sensor.signal,
        "time_type": "day",
        "geo_type": location.geo_type,
        "geo_value": location.geo_value,
        "time_values": f"{EPIDATA_START_DATE}-{as_of_str}",
        "as_of": as_of_str
    } for sensor, location in all_combos]
    responses = Epidata.async_epidata(all_params)
    for response, params in responses:
        # -2 = no results, 1 = success. Truncated data or server errors may lead to this Exception.
        if response["result"] not in (-2, 1):
            raise Exception(f"Bad result from Epidata: {response['message']}")
        data = LocationSeries(geo_value=params["geo_value"],
                              geo_type=params["geo_type"],
                              data={
                                  datetime.strptime(str(i["time_value"]),
                                                    "%Y%m%d").date():
                                  i["value"]
                                  for i in response.get("epidata", [])
                                  if not isnan(i["value"])
                              })
        if data.data:
            output[(params["data_source"], params["signals"],
                    params["geo_type"], params["geo_value"])] = data
    return output