Example #1
0
 def get_unstable_wILI(region, ew1, ew2):
     weeks = Epidata.range(ew1, ew2)
     epidata = AF_Utils._get(Epidata.fluview(region, weeks, issues=ew2))
     data = [row['wili'] for row in epidata]
     if len(data) != flu.delta_epiweeks(ew1, ew2) + 1:
         raise Exception('missing data')
     return data
Example #2
0
def get_wiki(ew1, ew2):
    # get the raw wiki data, broken down by epiweek, article, and hour
    epiweeks = Epidata.range(ew1, ew2)
    result = {}
    data = api_fetch(Epidata.wiki(ARTICLES, epiweeks=epiweeks, hours=HOURS))
    # index the data for fast access
    for row in data:
        epiweek, article = row['epiweek'], row['article']
        if epiweek not in result:
            result[epiweek] = {}
        if article not in result[epiweek]:
            result[epiweek][article] = {'c': [], 't': []}
        result[epiweek][article]['c'].append(row['count'])
        result[epiweek][article]['t'].append(row['total'])
    # group by epiweek and article (combining hours)
    data = []
    for epiweek in sorted(list(result.keys())):
        row = []
        for article in sorted(ARTICLES):
            count, total = result[epiweek][article]['c'], result[epiweek][
                article]['t']
            if len(count) != len(HOURS) or len(total) != len(HOURS):
                raise Exception('wiki is missing hours')
            row.append(1e6 * sum(count) / sum(total))
        data.append(row)
    # return a list of weekly data
    return data
Example #3
0
def pull_data_iteratively(states: set, dates: dict) -> list:
    """
    Pull Epidata API for a set of states and dates.

    To avoid Epidata API row limits, does not grab all values at once. Instead, it loops through
    each state and pulls all data for 10 hospitals at a time.

    Parameters
    ----------
    states: set
      Set of state codes (2 letter lowercase abbreviation) to get data for.
    dates: dict
      Dict of 'from' and 'to' dates output by Epidata.range().

    Returns
    -------
    List of dictionaries. Concatenation of all the response['epidata'] lists.
    """
    responses = []
    for state in states:
        lookup_response = Epidata.covid_hosp_facility_lookup(state)
        state_hospital_ids = [
            i["hospital_pk"] for i in lookup_response.get("epidata", [])
        ]
        for i in range(0, len(state_hospital_ids), 50):
            response = Epidata.covid_hosp_facility(
                state_hospital_ids[i:i + 50], dates)
            if response["result"] == 2:
                raise Exception(
                    f"Bad result from Epidata: {response['message']}")
            responses += response.get("epidata", [])
    if len(responses) == 0:
        raise Exception("No results found.")
    return responses
def generate_date_ranges(start, end):
    """
    Take a start and end date and convert to list of 30 day Epidata ranges.

    The final tuple may only be a few days depending of the modulo of the range and 30.
    The ranges should partition the entire range, inclusive of both endpoints, and do not overlap,
    i.e. they will be of the form (start, start+30), (start+31, start+61), (start+62, start+92), ...

    Parameters
    ----------
    start: date
      datetime.date object for first day.
    end: datetime
      datetime.date object for last day.

    Returns
    -------
    Ordered list of dictionaries generated by Epidata.range specifying the partitioning intervals.
    """
    curr_end = start + timedelta(30)
    output = []
    while curr_end < end:
        output.append(
            Epidata.range(_date_to_int(start), _date_to_int(curr_end)))
        start += timedelta(31)
        curr_end = start + timedelta(30)
    output.append(Epidata.range(_date_to_int(start), _date_to_int(end)))
    return output
Example #5
0
 def get_season(season, location):
     #end = (season + 1) * 100 + 29
     #epiweeks = Epidata.range(flu.add_epiweeks(end, -51), end)
     begin = season * 100 + 30
     epiweeks = Epidata.range(begin, flu.add_epiweeks(begin, 51))
     rows = AF_Utils._get(Epidata.ilinet(location, epiweeks))
     return [row['wili'] for row in rows]
def EpiCallForData(year, week, region, lag):
    if region == 'nat':
        fluData = Epidata.fluview(regions=['nat'],
                                  epiweeks=['{:04d}{:02d}'.format(year, week)],
                                  lag=lag)
    else:
        region = int(region)
        fluData = Epidata.fluview(regions=['HHS{:d}'.format(region)],
                                  epiweeks=['{:04d}{:02d}'.format(year, week)],
                                  lag=lag)
    return fluData
Example #7
0
def get_historical_sensor_data(sensor: SensorConfig, geo_value: str,
                               geo_type: str, start_date: date,
                               end_date: date) -> Tuple[LocationSeries, list]:
    """
    Query Epidata API for historical sensorization data.

    Will only return values if they are not null. If any days are null or are not available,
    they will be listed as missing.

    Parameters
    ----------
    sensor
        SensorConfig specifying which sensor to retrieve.
    geo_type
        Geo type to retrieve.
    geo_value
        Geo value to retrieve.
    start_date
        First day to retrieve (inclusive).
    end_date
        Last day to retrieve (inclusive).
    Returns
    -------
        Tuple of (LocationSeries containing non-na data, list of dates without valid data). If no
        data was found, an empty LocationSeries is returned.
    """
    response = Epidata.covidcast_nowcast(data_source=sensor.source,
                                         signals=sensor.signal,
                                         time_type="day",
                                         geo_type=geo_type,
                                         time_values=Epidata.range(
                                             start_date.strftime("%Y%m%d"),
                                             end_date.strftime("%Y%m%d")),
                                         geo_value=geo_value,
                                         sensor_names=sensor.name,
                                         lag=sensor.lag)
    all_dates = [i.date() for i in date_range(start_date, end_date)]
    if response["result"] == 1:
        output = LocationSeries(geo_value=geo_value,
                                geo_type=geo_type,
                                data={
                                    datetime.strptime(str(i["time_value"]),
                                                      "%Y%m%d").date():
                                    i["value"]
                                    for i in response.get("epidata", [])
                                    if not isnan(i["value"])
                                })
        missing_dates = [i for i in all_dates if i not in output.dates]
        return output, missing_dates
    if response["result"] == -2:  # no results
        print("No historical results found")
        output = LocationSeries(geo_value=geo_value, geo_type=geo_type)
        return output, all_dates
    raise Exception(f"Bad result from Epidata: {response['message']}")
Example #8
0
def get_fluview_data(states, start, end):
    """
    return a dictionary of dataframe with the different epiweeks
    """
    ilinet_raw = {}
    for state in states:
        print("State {}".format(state))
        res = Epidata.fluview(
            regions=state,  #source
            epiweeks=[Epidata.range(start, end)])  #range 2009 to 2016
        if res['result'] == 1:
            print(res['result'], res['message'], len(res['epidata']))
            data = pd.DataFrame(res['epidata'])
            ilinet_raw[state] = data
        else:
            print("(-2, u'no success')")
    return ilinet_raw
Example #9
0
def _async_fetch_epidata(
        data_source: str,
        signal: str,  # pylint: disable=W0621
        start_day: date,
        end_day: date,
        geo_type: str,
        geo_value: Union[str, Iterable[str]],
        as_of: date,
        issues: Union[date, tuple, list],
        lag: int,
        time_type: str = "day") -> Union[pd.DataFrame, None]:
    """Fetch data from Epidata API asynchronously.

    signal() wraps this to support fetching data over a range of dates
    and stacks the resulting data frames.

    If no data is found, return None, so signal() can easily filter out these
    entries.
    """
    dfs = []
    params = []
    date_range = pd.date_range(start_day,
                               end_day,
                               freq="D" if time_type == "day" else "W")
    for day in date_range:
        day_param = {
            "source": "covidcast",
            "data_source": data_source,
            "signals": signal,
            "time_type": "day",
            "geo_type": geo_type,
            "geo_value": geo_value,
            "time_values": _date_to_api_string(day, time_type),
        }
        if as_of:
            day_param["as_of"] = _date_to_api_string(as_of, time_type)
        if issues:
            day_param["issues"] = _dates_to_api_strings(issues, time_type)
        if lag:
            day_param["lag"] = lag
        params.append(day_param)
    output = Epidata.async_epidata(params, batch_size=100)
    for day_data, params in output:
        if day_data["message"] == "no results":
            warnings.warn(
                f"No {data_source} {signal} data found on {params['time_values']} "
                f"for geography '{geo_type}'", NoDataWarning)
        if day_data["message"] not in {"success", "no results"}:
            warnings.warn(
                f"Problem obtaining {data_source} {signal} "
                f"data on {params['time_values']} "
                f"for geography '{geo_type}': {day_data['message']}",
                RuntimeWarning)
        if day_data.get("epidata"):
            dfs.append(pd.DataFrame.from_dict(day_data["epidata"]))
    return dfs
Example #10
0
def _fetch_epidata(
        data_source: str,
        signal: str,  # pylint: disable=W0621
        start_day: date,
        end_day: date,
        geo_type: str,
        geo_value: Union[str, Iterable[str]],
        as_of: date,
        issues: Union[date, tuple, list],
        lag: int,
        time_type: str = "day") -> Union[pd.DataFrame, None]:
    """Fetch data from Epidata API.

    signal() wraps this to support fetching data over a range of dates
    and stacks the resulting data frames.

    If no data is found, return None, so signal() can easily filter out these
    entries.

    """
    as_of_str = _date_to_api_string(as_of,
                                    time_type) if as_of is not None else None
    issues_strs = _dates_to_api_strings(
        issues, time_type) if issues is not None else None
    cur_day = start_day
    dfs = []
    while cur_day <= end_day:
        day_str = _date_to_api_string(cur_day, time_type)
        day_data = Epidata.covidcast(data_source,
                                     signal,
                                     time_type=time_type,
                                     geo_type=geo_type,
                                     time_values=day_str,
                                     geo_value=geo_value,
                                     as_of=as_of_str,
                                     issues=issues_strs,
                                     lag=lag)

        # Two possible error conditions: no data or too much data.
        if day_data["message"] == "no results":
            warnings.warn(
                f"No {data_source} {signal} data found on {day_str} "
                f"for geography '{geo_type}'", NoDataWarning)
        if day_data["message"] not in {"success", "no results"}:
            warnings.warn(
                f"Problem obtaining {data_source} {signal} data on {day_str} "
                f"for geography '{geo_type}': {day_data['message']}",
                RuntimeWarning)

        # In the too-much-data case, we continue to try putting the truncated
        # data in our results. In the no-data case, skip this day entirely,
        # since there is no "epidata" in the response.
        if day_data.get("epidata"):
            dfs.append(pd.DataFrame.from_dict(day_data["epidata"]))
        cur_day += timedelta(1) if time_type == "day" else timedelta(7)
    return dfs
Example #11
0
def get_ili(location, issue, ew1, ew2):
    result = {}
    epiweeks = Epidata.range(ew1, ew2)
    num_weeks = flu.delta_epiweeks(ew1, ew2) + 1
    # try to get unstable, but gracefully fall back to stable
    if issue is not None:
        res = Epidata.fluview(location, epiweeks, issues=issue)
        if res['result'] == 1:
            for row in res['epidata']:
                result[row['epiweek']] = row['wili']
    # check to see if another API call is needed
    if issue is None or res['result'] != 1 or len(res['epidata']) < num_weeks:
        # get stable data
        data = api_fetch(Epidata.fluview(location, epiweeks))
        for row in data:
            epiweek = row['epiweek']
            if epiweek not in result:
                result[epiweek] = row['wili']
    # return a list of weekly data
    return [[result[ew]] for ew in sorted(list(result.keys()))]
Example #12
0
    def get_influenza_counts_df():
        """Load influenza counts from the CMU Delphi API, return a pandas dataframe"""
        # Retrieves current date, formats it "YYYY-mm-dd", and converts it to epiweek
        today_obj = datetime.today()
        today_str = today_obj.strftime("%Y-%m-%d")
        epiweek = DataLoader.get_approx_epiweek_from_date(today_str)

        # Retrieves national fluview data for each "epiweek" from 2020:
        results = Epidata.fluview(["nat"], [Epidata.range(202001, epiweek)])
        results_df = pd.DataFrame.from_records(
            results["epidata"]).sort_values(by=["epiweek"])
        results_df = results_df[[
            "epiweek", "lag", "num_ili", "num_patients", "num_providers",
            "wili", "ili"
        ]]

        # Convert epiweeks to approximate real date for graphing
        results_df["date"] = results_df["epiweek"].apply(
            DataLoader.get_approx_date_from_epiweek)
        return results_df
Example #13
0
def load_us(states, latest=False):
    us_covid19_cases_path = os.path.join(config.base_data_dir, config.us_covid19_cases)
 
    #df_us = pd.read_csv(us_covid19_cases_path)
    import sys
    sys.path.append('src/')
    from delphi_epidata import Epidata
    
    start_date = 20200401
    
    from datetime import datetime
    stop_date = int(datetime.today().strftime('%Y%m%d'))
 
    for target_state in states:
        print(f'Processing data for state: {target_state} ' + ' *' * 10)
        print('Start date = ', start_date, ' End date = ', stop_date)
       
        res_incidence = Epidata.covidcast('jhu-csse', 'confirmed_7dav_incidence_num', 'day', 'state', \
                        [start_date, Epidata.range(start_date, stop_date)], target_state)
        res_death = Epidata.covidcast('jhu-csse', 'deaths_7dav_incidence_num', 'day', 'state', \
                        [start_date, Epidata.range(start_date, stop_date)], target_state)
        
        df_state = pd.DataFrame(columns=['Confirmed', 'Deceased', 'Recovered'])
        if len(res_incidence) > 0 and len(res_death) > 0:
            df_jhu_7day = pd.DataFrame(res_incidence['epidata'])
            df_jhu_7day_deaths = pd.DataFrame(res_death['epidata'])

            df_state['Date'] = pd.to_datetime(df_jhu_7day['time_value'], format='%Y%m%d')
            df_state['Confirmed'] = df_jhu_7day['value']
            df_state['Deceased'] = df_jhu_7day_deaths['value']
            df_state['Recovered'].fillna(value=0, inplace=True)
            
            # ensures sorting with respect to date
            df_state.index = pd.to_datetime(df_state.Date)
            df_state[['Total_Confirmed', 'Total_Deceased', 'Total_Recovered']] \
                = df_state[['Confirmed', 'Deceased', 'Recovered']].cumsum(axis=0, skipna=True)
            df_state.to_csv(os.path.join(config.base_data_dir, f'Cases_USA_{target_state}.csv'), index=False)
        else:
            print(' *** Error: Can not import data from Delphi database. Check src/state_data_loader.py')
            exit()
Example #14
0
def get_indicator_data(sensors: List[SensorConfig],
                       locations: List[LocationSeries],
                       as_of: date) -> Dict[Tuple, LocationSeries]:
    """
    Given a list of sensors and locations, asynchronously gets covidcast data for all combinations.

    Parameters
    ----------
    sensors
        list of SensorConfigs for sensors to retrieve.
    locations
        list of LocationSeries, one for each location desired. This is only used for the list of
        locations; none of the dates or values are used.
    as_of
        Date that the data should be retrieved as of.
    Returns
    -------
        Dictionary of {(source, signal, geo_type, geo_value): LocationSeries} containing indicator
        data,
    """
    # gets all available data up to as_of day for now, could be optimized to only get a window
    output = {}
    all_combos = product(sensors, locations)
    as_of_str = as_of.strftime("%Y%m%d")
    all_params = [{
        "source": "covidcast",
        "data_source": sensor.source,
        "signals": sensor.signal,
        "time_type": "day",
        "geo_type": location.geo_type,
        "geo_value": location.geo_value,
        "time_values": f"{EPIDATA_START_DATE}-{as_of_str}",
        "as_of": as_of_str
    } for sensor, location in all_combos]
    responses = Epidata.async_epidata(all_params)
    for response, params in responses:
        # -2 = no results, 1 = success. Truncated data or server errors may lead to this Exception.
        if response["result"] not in (-2, 1):
            raise Exception(f"Bad result from Epidata: {response['message']}")
        data = LocationSeries(geo_value=params["geo_value"],
                              geo_type=params["geo_type"],
                              data={
                                  datetime.strptime(str(i["time_value"]),
                                                    "%Y%m%d").date():
                                  i["value"]
                                  for i in response.get("epidata", [])
                                  if not isnan(i["value"])
                              })
        if data.data:
            output[(params["data_source"], params["signals"],
                    params["geo_type"], params["geo_value"])] = data
    return output
Example #15
0
def run_module(params):
    """
    Generate ground truth HHS hospitalization data.

    Parameters
    ----------
    params
        Dictionary containing indicator configuration. Expected to have the following structure:
        - "common":
            - "export_dir": str, directory to write output
            - "log_filename" (optional): str, name of file to write logs
    """
    start_time = time.time()
    logger = get_structured_logger(
        __name__,
        filename=params["common"].get("log_filename"),
        log_exceptions=params["common"].get("log_exceptions", True))
    mapper = GeoMapper()
    request_all_states = ",".join(mapper.get_geo_values("state_id"))
    today = date.today()
    past_reference_day = date(year=2020, month=1,
                              day=1)  # first available date in DB
    date_range = generate_date_ranges(past_reference_day, today)
    dfs = []
    for r in date_range:
        response = Epidata.covid_hosp(request_all_states, r)
        # The last date range might only have recent days that don't have any data, so don't error.
        if response["result"] != 1 and r != date_range[-1]:
            raise Exception(f"Bad result from Epidata: {response['message']}")
        if response["result"] == -2 and r == date_range[
                -1]:  # -2 code means no results
            continue
        dfs.append(pd.DataFrame(response['epidata']))
    all_columns = pd.concat(dfs)

    geo_mapper = GeoMapper()

    for sig in SIGNALS:
        state = geo_mapper.add_geocode(make_signal(all_columns, sig),
                                       "state_id",
                                       "state_code",
                                       from_col="state")
        for geo in GEOS:
            create_export_csv(make_geo(state, geo, geo_mapper),
                              params["common"]["export_dir"], geo, sig)

    elapsed_time_in_seconds = round(time.time() - start_time, 2)
    logger.info("Completed indicator run",
                elapsed_time_in_seconds=elapsed_time_in_seconds)
Example #16
0
 def grabDataFromEpicast(self):
     if self.region=='':
         self.fludata = Epidata.fluview(self.state, [Epidata.range(201040,self.todaysEW)])
     elif self.state=='':
         self.fludata = Epidata.fluview(self.region, [Epidata.range(201040,self.todaysEW)])
     else:
         self.fludata = Epidata.fluview(self.region+self.state, [Epidata.range(201040,self.todaysEW)])
         
     self.fludata_message = self.fludata['message']
     self.fludata_data    = self.fludata['epidata']
Example #17
0
def pull_data() -> pd.DataFrame:
    """
    Pull HHS data from Epidata API for all states and dates and convert to a DataFrame.

    Returns
    -------
    DataFrame of HHS data.
    """
    today = int(date.today().strftime("%Y%m%d"))
    past_reference_day = int(date(
        2020, 1, 1).strftime("%Y%m%d"))  # first available date in DB
    all_states = GeoMapper().get_geo_values("state_id")
    responses = pull_data_iteratively(all_states,
                                      Epidata.range(past_reference_day, today))
    all_columns = pd.DataFrame(responses).replace(NAN_VALUES, np.nan)
    all_columns["timestamp"] = pd.to_datetime(all_columns["collection_week"],
                                              format="%Y%m%d")
    return all_columns
def run_module():
    """Generate ground truth HHS hospitalization data."""
    params = read_params()
    mapper = GeoMapper()
    request_all_states = ",".join(mapper.get_geo_values("state_id"))

    today = date.today()
    past_reference_day = date(year=2020, month=1,
                              day=1)  # first available date in DB
    date_range = generate_date_ranges(past_reference_day, today)
    dfs = []
    for r in date_range:
        response = Epidata.covid_hosp(request_all_states, r)
        if response['result'] != 1:
            raise Exception(f"Bad result from Epidata: {response['message']}")
        dfs.append(pd.DataFrame(response['epidata']))
    all_columns = pd.concat(dfs)

    for sig in SIGNALS:
        create_export_csv(make_signal(all_columns, sig), params["export_dir"],
                          "state", sig)
Example #19
0
     ,'quidel'       :['smoothed_pct_negative','smoothed_tests_per_device']}
    return d

if __name__ == "__main__":

    todaysEW = fromToday2EpiWeek()
    todayYMD = todayYMD()
    
    variables = ['geo_value','time_value','value','stderr','sample_size']

    fromDataSource2Signal = fromDataSource2Signal()
    fips2name = listPACounties()
    
    for datasource in ['fb-survey','ght','doctor-visits','google-survey','quidel']:
        for signal in fromDataSource2Signal[datasource]:
            
            dataSet = DS(variables,datasource,signal)
            for county in fips2name:
                sys.stdout.write('\r{:s}--{:s}--{:06d}\r'.format(datasource,signal,county))
                sys.stdout.flush()
                
                dataFromAPI = Epidata.covidcast(datasource,signal,'day','county',Epidata.range(20200101,todayYMD),county)
                if dataFromAPI["message"] == "no results":
                    continue
                
                if dataFromAPI['message'] == "success":
                    for data in dataFromAPI['epidata']:
                        dataSet.appendData(data)
            if dataSet.has_data():
                dataSet.convert2pandasDF().exportDF()
Example #20
0
 def _signal(name, region, epiweek):
     rows = AF_Utils._get(
         Epidata.signals(secrets.api.signals, name, region, epiweek))
     if len(rows) != 1:
         raise Exception('expected one signal row')
     return rows[0]['value']
Example #21
0
cur = cnx.cursor(buffered=True)

# Get ground truth
history = {}
regions = [
    "nat", "hhs1", "hhs2", "hhs3", "hhs4", "hhs5", "hhs6", "hhs7", "hhs8",
    "hhs9", "hhs10", "ga", "pa", "dc", "tx", "or"
]
# for 2017-18 season, 201744 is the first ground truth data we get after the competition starts (i.e., users forecasted for it in 201743)
#############################################################
season_start, season_end = 201744, 201820

for r in range(1, len(regions) + 1):
    history[r] = {}
    rows = Epidata.check(
        Epidata.fluview(regions[r - 1], Epidata.range(season_start,
                                                      season_end)))
    truth = [(row['epiweek'], row['wili']) for row in rows]
    availableWeeks = [row[0] for row in truth]
    for row in truth:
        (epiweek, wili) = row
        history[r][epiweek] = wili
        print(regions[r - 1], epiweek, wili)

epiweek = availableWeeks[-1]
print("epiweek", epiweek)
if (epiweek == 201801): forecast_made = 201752
else: forecast_made = epiweek - 1

# debug print
print("availableWeeks", availableWeeks)
Example #22
0
Collect actual wili data using the delphi API
"""

from delphi_epidata import Epidata
from datetime import datetime
import pandas as pd
import pymmwr

BASELINE_URL = "https://raw.githubusercontent.com/cdcepi/FluSight-forecasts/master/wILI_Baseline.csv"
current_epiweek = pymmwr.date_to_mmwr_week()

# Range of epiweeks to gather data for
epiweek_start = 199710
epiweek_end = int(str(current_epiweek["year"]) + str(current_epiweek["week"]).zfill(2))

epiweek_range = Epidata.range(epiweek_start, epiweek_end)

regions = ["nat", *["hhs" + str(i) for i in range(1, 11)]]

# NOTE Lag value
# A lag of 0 means that the data for each week collected will be
# as observed at that point in time.
# Pass None as lag will let us collect the most recent data
# available

df = {
    "epiweek": [],
    "region": [],
    "wili": []
}
Example #23
0
def get_twitter(location, ew1, ew2):
    epiweeks = Epidata.range(ew1, ew2)
    data = api_fetch(
        Epidata.twitter(secrets.api.twitter, location, epiweeks=epiweeks))
    return [[row['percent']] for row in data]
Example #24
0
def get_ght(ew1, ew2):
    epiweeks = Epidata.range(ew1, ew2)
    data = api_fetch(Epidata.ght(secrets.api.ght, 'US', epiweeks, '/m/0cycc'))
    return [[row['value']] for row in data]
Example #25
0
def get_gft(location, ew1, ew2):
    epiweeks = Epidata.range(ew1, ew2)
    data = api_fetch(Epidata.gft(location, epiweeks))
    return [[1e-3 * row['num']] for row in data]
def EpiCallForLag(year, week, region):
    if region == 'nat':
        return Epidata.fluview(['nat'], ['{:04d}{:02d}'.format(year, week)])
    region = int(region)
    return Epidata.fluview(['HHS{:d}'.format(region)],
                           ['{:04d}{:02d}'.format(year, week)])
Example #27
0
def _fetch_single_geo(
        data_source: str,
        signal: str,  # pylint: disable=W0621
        start_day: date,
        end_day: date,
        geo_type: str,
        geo_value: str,
        as_of: date,
        issues: Union[date, tuple, list],
        lag: int) -> Union[pd.DataFrame, None]:
    """Fetch data for a single geo.

    signal() wraps this to support fetching data over an iterable of
    geographies, and stacks the resulting data frames.

    If no data is found, return None, so signal() can easily filter out these
    entries.

    """
    as_of_str = _date_to_api_string(as_of) if as_of is not None else None
    issues_strs = _dates_to_api_strings(issues) if issues is not None else None

    cur_day = start_day

    dfs = []

    while cur_day <= end_day:
        day_str = _date_to_api_string(cur_day)

        day_data = Epidata.covidcast(data_source,
                                     signal,
                                     time_type="day",
                                     geo_type=geo_type,
                                     time_values=day_str,
                                     geo_value=geo_value,
                                     as_of=as_of_str,
                                     issues=issues_strs,
                                     lag=lag)

        # Two possible error conditions: no data or too much data.
        if day_data["message"] == "no results":
            warnings.warn(
                f"No {data_source} {signal} data found on {day_str} "
                f"for geography '{geo_type}'", NoDataWarning)
        if day_data["message"] not in {"success", "no results"}:
            warnings.warn(
                f"Problem obtaining {data_source} {signal} data on {day_str} "
                f"for geography '{geo_type}': {day_data['message']}",
                RuntimeWarning)

        # In the too-much-data case, we continue to try putting the truncated
        # data in our results. In the no-data case, skip this day entirely,
        # since there is no "epidata" in the response.
        if "epidata" in day_data:
            dfs.append(pd.DataFrame.from_dict(day_data["epidata"]))

        cur_day += timedelta(1)

    if len(dfs) > 0:
        out = pd.concat(dfs)
        out.drop("direction", axis=1, inplace=True)
        out["time_value"] = pd.to_datetime(out["time_value"], format="%Y%m%d")
        out["issue"] = pd.to_datetime(out["issue"], format="%Y%m%d")
        out["geo_type"] = geo_type
        out["data_source"] = data_source
        out["signal"] = signal
        return out

    return None
Example #28
0
def metadata() -> pd.DataFrame:
    """Fetch COVIDcast surveillance stream metadata.

    Obtains a data frame of metadata describing all publicly available data
    streams from the COVIDcast API. See the `data source and signals
    documentation
    <https://cmu-delphi.github.io/delphi-epidata/api/covidcast_signals.html>`_
    for descriptions of the available sources.

    :returns: A data frame containing one row per available signal, with the
      following columns:

      ``data_source``
        Data source name.

      ``signal``
        Signal name.

      ``time_type``
        Temporal resolution at which this signal is reported. "day", for
        example, means the signal is reported daily.

      ``geo_type``
        Geographic level for which this signal is available, such as county,
        state, msa, hss, hrr, or nation. Most signals are available at multiple geographic
        levels and will hence be listed in multiple rows with their own
        metadata.

      ``min_time``
        First day for which this signal is available. For weekly signals, will be
        the first day of the epiweek.

      ``max_time``
        Most recent day for which this signal is available. For weekly signals, will be
        the first day of the epiweek.

      ``num_locations``
        Number of distinct geographic locations available for this signal. For
        example, if `geo_type` is county, the number of counties for which this
        signal has ever been reported.

      ``min_value``
        The smallest value that has ever been reported.

      ``max_value``
        The largest value that has ever been reported.

      ``mean_value``
        The arithmetic mean of all reported values.

      ``stdev_value``
        The sample standard deviation of all reported values.

      ``last_update``
        The UTC datetime for when the signal value was last updated.

      ``max_issue``
        Most recent date data was issued.

      ``min_lag``
        Smallest lag from observation to issue, in days.

      ``max_lag``
        Largest lag from observation to issue, in days.
    """
    meta = Epidata.covidcast_meta()

    if meta["result"] != 1:
        # Something failed in the API and we did not get real metadata
        raise RuntimeError("Error when fetching metadata from the API",
                           meta["message"])

    meta_df = pd.DataFrame.from_dict(meta["epidata"])
    meta_df["min_time"] = meta_df.apply(lambda x: _parse_datetimes(x.min_time, x.time_type), axis=1)
    meta_df["max_time"] = meta_df.apply(lambda x: _parse_datetimes(x.max_time, x.time_type), axis=1)
    meta_df["last_update"] = pd.to_datetime(meta_df["last_update"], unit="s")
    return meta_df
        return obs
    unique_EWLagPairs= unique_EWLagPairs.apply(addLag,1)
    return d.merge( unique_EWLagPairs, on = ['EW','lag'])

def timeStamp():
    return datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")

if __name__ == "__main__":

    firstWeekOfSeason = datetime.datetime.strptime('2019-10-01',"%Y-%m-%d")
    epiWeeks = computeEpiWeeksWithData(firstWeekOfSeason)
    regions = createAllRegions()
    
    mostRecentEpiData = {'EW':[],'region':[],'wili':[],'lag':[],'releaseDate':[],'releaseEW':[]}
    for lag in np.arange(40,-1,-1):
        fluData = Epidata.fluview(regions = regions ,epiweeks = epiWeeks,lag=lag)
        if fluData['message'] != 'success':
            print('could not download data-lag={:d}'.format(lag))
            continue
        print('Downloading data-lag={:d}'.format(lag))
        for data in fluData['epidata']:
            mostRecentEpiData['EW'].append(data['epiweek'])
            mostRecentEpiData['region'].append(data['region'])
            mostRecentEpiData['wili'].append(data['wili'])
            mostRecentEpiData['lag'].append(lag)
            mostRecentEpiData['releaseDate'].append(data['release_date'])

            releasedateDT = datetime.datetime.strptime(data['release_date'],"%Y-%m-%d")
            mostRecentEpiData['releaseEW'].append( fromDateTime2EW(releasedateDT ))
            
    mostRecentEpiData = pd.DataFrame(mostRecentEpiData)