예제 #1
0
def generate_date_ranges(start, end):
    """
    Take a start and end date and convert to list of 30 day Epidata ranges.

    The final tuple may only be a few days depending of the modulo of the range and 30.
    The ranges should partition the entire range, inclusive of both endpoints, and do not overlap,
    i.e. they will be of the form (start, start+30), (start+31, start+61), (start+62, start+92), ...

    Parameters
    ----------
    start: date
      datetime.date object for first day.
    end: datetime
      datetime.date object for last day.

    Returns
    -------
    Ordered list of dictionaries generated by Epidata.range specifying the partitioning intervals.
    """
    curr_end = start + timedelta(30)
    output = []
    while curr_end < end:
        output.append(
            Epidata.range(_date_to_int(start), _date_to_int(curr_end)))
        start += timedelta(31)
        curr_end = start + timedelta(30)
    output.append(Epidata.range(_date_to_int(start), _date_to_int(end)))
    return output
예제 #2
0
 def grabDataFromEpicast(self):
     if self.region=='':
         self.fludata = Epidata.fluview(self.state, [Epidata.range(201040,self.todaysEW)])
     elif self.state=='':
         self.fludata = Epidata.fluview(self.region, [Epidata.range(201040,self.todaysEW)])
     else:
         self.fludata = Epidata.fluview(self.region+self.state, [Epidata.range(201040,self.todaysEW)])
         
     self.fludata_message = self.fludata['message']
     self.fludata_data    = self.fludata['epidata']
예제 #3
0
 def get_season(season, location):
     #end = (season + 1) * 100 + 29
     #epiweeks = Epidata.range(flu.add_epiweeks(end, -51), end)
     begin = season * 100 + 30
     epiweeks = Epidata.range(begin, flu.add_epiweeks(begin, 51))
     rows = AF_Utils._get(Epidata.ilinet(location, epiweeks))
     return [row['wili'] for row in rows]
예제 #4
0
def get_wiki(ew1, ew2):
    # get the raw wiki data, broken down by epiweek, article, and hour
    epiweeks = Epidata.range(ew1, ew2)
    result = {}
    data = api_fetch(Epidata.wiki(ARTICLES, epiweeks=epiweeks, hours=HOURS))
    # index the data for fast access
    for row in data:
        epiweek, article = row['epiweek'], row['article']
        if epiweek not in result:
            result[epiweek] = {}
        if article not in result[epiweek]:
            result[epiweek][article] = {'c': [], 't': []}
        result[epiweek][article]['c'].append(row['count'])
        result[epiweek][article]['t'].append(row['total'])
    # group by epiweek and article (combining hours)
    data = []
    for epiweek in sorted(list(result.keys())):
        row = []
        for article in sorted(ARTICLES):
            count, total = result[epiweek][article]['c'], result[epiweek][
                article]['t']
            if len(count) != len(HOURS) or len(total) != len(HOURS):
                raise Exception('wiki is missing hours')
            row.append(1e6 * sum(count) / sum(total))
        data.append(row)
    # return a list of weekly data
    return data
예제 #5
0
 def get_unstable_wILI(region, ew1, ew2):
     weeks = Epidata.range(ew1, ew2)
     epidata = AF_Utils._get(Epidata.fluview(region, weeks, issues=ew2))
     data = [row['wili'] for row in epidata]
     if len(data) != flu.delta_epiweeks(ew1, ew2) + 1:
         raise Exception('missing data')
     return data
예제 #6
0
def get_historical_sensor_data(sensor: SensorConfig, geo_value: str,
                               geo_type: str, start_date: date,
                               end_date: date) -> Tuple[LocationSeries, list]:
    """
    Query Epidata API for historical sensorization data.

    Will only return values if they are not null. If any days are null or are not available,
    they will be listed as missing.

    Parameters
    ----------
    sensor
        SensorConfig specifying which sensor to retrieve.
    geo_type
        Geo type to retrieve.
    geo_value
        Geo value to retrieve.
    start_date
        First day to retrieve (inclusive).
    end_date
        Last day to retrieve (inclusive).
    Returns
    -------
        Tuple of (LocationSeries containing non-na data, list of dates without valid data). If no
        data was found, an empty LocationSeries is returned.
    """
    response = Epidata.covidcast_nowcast(data_source=sensor.source,
                                         signals=sensor.signal,
                                         time_type="day",
                                         geo_type=geo_type,
                                         time_values=Epidata.range(
                                             start_date.strftime("%Y%m%d"),
                                             end_date.strftime("%Y%m%d")),
                                         geo_value=geo_value,
                                         sensor_names=sensor.name,
                                         lag=sensor.lag)
    all_dates = [i.date() for i in date_range(start_date, end_date)]
    if response["result"] == 1:
        output = LocationSeries(geo_value=geo_value,
                                geo_type=geo_type,
                                data={
                                    datetime.strptime(str(i["time_value"]),
                                                      "%Y%m%d").date():
                                    i["value"]
                                    for i in response.get("epidata", [])
                                    if not isnan(i["value"])
                                })
        missing_dates = [i for i in all_dates if i not in output.dates]
        return output, missing_dates
    if response["result"] == -2:  # no results
        print("No historical results found")
        output = LocationSeries(geo_value=geo_value, geo_type=geo_type)
        return output, all_dates
    raise Exception(f"Bad result from Epidata: {response['message']}")
예제 #7
0
def load_us(states, latest=False):
    us_covid19_cases_path = os.path.join(config.base_data_dir, config.us_covid19_cases)
 
    #df_us = pd.read_csv(us_covid19_cases_path)
    import sys
    sys.path.append('src/')
    from delphi_epidata import Epidata
    
    start_date = 20200401
    
    from datetime import datetime
    stop_date = int(datetime.today().strftime('%Y%m%d'))
 
    for target_state in states:
        print(f'Processing data for state: {target_state} ' + ' *' * 10)
        print('Start date = ', start_date, ' End date = ', stop_date)
       
        res_incidence = Epidata.covidcast('jhu-csse', 'confirmed_7dav_incidence_num', 'day', 'state', \
                        [start_date, Epidata.range(start_date, stop_date)], target_state)
        res_death = Epidata.covidcast('jhu-csse', 'deaths_7dav_incidence_num', 'day', 'state', \
                        [start_date, Epidata.range(start_date, stop_date)], target_state)
        
        df_state = pd.DataFrame(columns=['Confirmed', 'Deceased', 'Recovered'])
        if len(res_incidence) > 0 and len(res_death) > 0:
            df_jhu_7day = pd.DataFrame(res_incidence['epidata'])
            df_jhu_7day_deaths = pd.DataFrame(res_death['epidata'])

            df_state['Date'] = pd.to_datetime(df_jhu_7day['time_value'], format='%Y%m%d')
            df_state['Confirmed'] = df_jhu_7day['value']
            df_state['Deceased'] = df_jhu_7day_deaths['value']
            df_state['Recovered'].fillna(value=0, inplace=True)
            
            # ensures sorting with respect to date
            df_state.index = pd.to_datetime(df_state.Date)
            df_state[['Total_Confirmed', 'Total_Deceased', 'Total_Recovered']] \
                = df_state[['Confirmed', 'Deceased', 'Recovered']].cumsum(axis=0, skipna=True)
            df_state.to_csv(os.path.join(config.base_data_dir, f'Cases_USA_{target_state}.csv'), index=False)
        else:
            print(' *** Error: Can not import data from Delphi database. Check src/state_data_loader.py')
            exit()
예제 #8
0
def get_fluview_data(states, start, end):
    """
    return a dictionary of dataframe with the different epiweeks
    """
    ilinet_raw = {}
    for state in states:
        print("State {}".format(state))
        res = Epidata.fluview(
            regions=state,  #source
            epiweeks=[Epidata.range(start, end)])  #range 2009 to 2016
        if res['result'] == 1:
            print(res['result'], res['message'], len(res['epidata']))
            data = pd.DataFrame(res['epidata'])
            ilinet_raw[state] = data
        else:
            print("(-2, u'no success')")
    return ilinet_raw
예제 #9
0
def pull_data() -> pd.DataFrame:
    """
    Pull HHS data from Epidata API for all states and dates and convert to a DataFrame.

    Returns
    -------
    DataFrame of HHS data.
    """
    today = int(date.today().strftime("%Y%m%d"))
    past_reference_day = int(date(
        2020, 1, 1).strftime("%Y%m%d"))  # first available date in DB
    all_states = GeoMapper().get_geo_values("state_id")
    responses = pull_data_iteratively(all_states,
                                      Epidata.range(past_reference_day, today))
    all_columns = pd.DataFrame(responses).replace(NAN_VALUES, np.nan)
    all_columns["timestamp"] = pd.to_datetime(all_columns["collection_week"],
                                              format="%Y%m%d")
    return all_columns
예제 #10
0
    def get_influenza_counts_df():
        """Load influenza counts from the CMU Delphi API, return a pandas dataframe"""
        # Retrieves current date, formats it "YYYY-mm-dd", and converts it to epiweek
        today_obj = datetime.today()
        today_str = today_obj.strftime("%Y-%m-%d")
        epiweek = DataLoader.get_approx_epiweek_from_date(today_str)

        # Retrieves national fluview data for each "epiweek" from 2020:
        results = Epidata.fluview(["nat"], [Epidata.range(202001, epiweek)])
        results_df = pd.DataFrame.from_records(
            results["epidata"]).sort_values(by=["epiweek"])
        results_df = results_df[[
            "epiweek", "lag", "num_ili", "num_patients", "num_providers",
            "wili", "ili"
        ]]

        # Convert epiweeks to approximate real date for graphing
        results_df["date"] = results_df["epiweek"].apply(
            DataLoader.get_approx_date_from_epiweek)
        return results_df
예제 #11
0
def get_ili(location, issue, ew1, ew2):
    result = {}
    epiweeks = Epidata.range(ew1, ew2)
    num_weeks = flu.delta_epiweeks(ew1, ew2) + 1
    # try to get unstable, but gracefully fall back to stable
    if issue is not None:
        res = Epidata.fluview(location, epiweeks, issues=issue)
        if res['result'] == 1:
            for row in res['epidata']:
                result[row['epiweek']] = row['wili']
    # check to see if another API call is needed
    if issue is None or res['result'] != 1 or len(res['epidata']) < num_weeks:
        # get stable data
        data = api_fetch(Epidata.fluview(location, epiweeks))
        for row in data:
            epiweek = row['epiweek']
            if epiweek not in result:
                result[epiweek] = row['wili']
    # return a list of weekly data
    return [[result[ew]] for ew in sorted(list(result.keys()))]
예제 #12
0
def get_gft(location, ew1, ew2):
    epiweeks = Epidata.range(ew1, ew2)
    data = api_fetch(Epidata.gft(location, epiweeks))
    return [[1e-3 * row['num']] for row in data]
예제 #13
0
     ,'quidel'       :['smoothed_pct_negative','smoothed_tests_per_device']}
    return d

if __name__ == "__main__":

    todaysEW = fromToday2EpiWeek()
    todayYMD = todayYMD()
    
    variables = ['geo_value','time_value','value','stderr','sample_size']

    fromDataSource2Signal = fromDataSource2Signal()
    fips2name = listPACounties()
    
    for datasource in ['fb-survey','ght','doctor-visits','google-survey','quidel']:
        for signal in fromDataSource2Signal[datasource]:
            
            dataSet = DS(variables,datasource,signal)
            for county in fips2name:
                sys.stdout.write('\r{:s}--{:s}--{:06d}\r'.format(datasource,signal,county))
                sys.stdout.flush()
                
                dataFromAPI = Epidata.covidcast(datasource,signal,'day','county',Epidata.range(20200101,todayYMD),county)
                if dataFromAPI["message"] == "no results":
                    continue
                
                if dataFromAPI['message'] == "success":
                    for data in dataFromAPI['epidata']:
                        dataSet.appendData(data)
            if dataSet.has_data():
                dataSet.convert2pandasDF().exportDF()
예제 #14
0
def get_twitter(location, ew1, ew2):
    epiweeks = Epidata.range(ew1, ew2)
    data = api_fetch(
        Epidata.twitter(secrets.api.twitter, location, epiweeks=epiweeks))
    return [[row['percent']] for row in data]
예제 #15
0
def get_ght(ew1, ew2):
    epiweeks = Epidata.range(ew1, ew2)
    data = api_fetch(Epidata.ght(secrets.api.ght, 'US', epiweeks, '/m/0cycc'))
    return [[row['value']] for row in data]
예제 #16
0
    todaysEW = fromToday2EpiWeek()
    todayYMD = todayYMD()

    variables = ['geo_value', 'time_value', 'value', 'stderr', 'sample_size']

    fromDataSource2Signal = fromDataSource2Signal()
    fips2name = listPACounties()

    for datasource in [
            'fb-survey', 'ght', 'doctor-visits', 'google-survey', 'quidel'
    ]:
        for signal in fromDataSource2Signal[datasource]:

            dataSet = DS(variables, datasource, signal)
            for county in fips2name:
                sys.stdout.write('\r{:s}--{:s}--{:06d}\r'.format(
                    datasource, signal, county))
                sys.stdout.flush()

                dataFromAPI = Epidata.covidcast(
                    datasource, signal, 'day', 'county',
                    Epidata.range(20200101, todayYMD), county)
                if dataFromAPI["message"] == "no results":
                    continue

                if dataFromAPI['message'] == "success":
                    for data in dataFromAPI['epidata']:
                        dataSet.appendData(data)
            if dataSet.has_data():
                dataSet.convert2pandasDF().exportDF()
예제 #17
0
cur = cnx.cursor(buffered=True)

# Get ground truth
history = {}
regions = [
    "nat", "hhs1", "hhs2", "hhs3", "hhs4", "hhs5", "hhs6", "hhs7", "hhs8",
    "hhs9", "hhs10", "ga", "pa", "dc", "tx", "or"
]
# for 2017-18 season, 201744 is the first ground truth data we get after the competition starts (i.e., users forecasted for it in 201743)
#############################################################
season_start, season_end = 201744, 201820

for r in range(1, len(regions) + 1):
    history[r] = {}
    rows = Epidata.check(
        Epidata.fluview(regions[r - 1], Epidata.range(season_start,
                                                      season_end)))
    truth = [(row['epiweek'], row['wili']) for row in rows]
    availableWeeks = [row[0] for row in truth]
    for row in truth:
        (epiweek, wili) = row
        history[r][epiweek] = wili
        print(regions[r - 1], epiweek, wili)

epiweek = availableWeeks[-1]
print("epiweek", epiweek)
if (epiweek == 201801): forecast_made = 201752
else: forecast_made = epiweek - 1

# debug print
print("availableWeeks", availableWeeks)
expected_weeks = epi_utils.delta_epiweeks(season_start, epiweek) + 1
예제 #18
0
Collect actual wili data using the delphi API
"""

from delphi_epidata import Epidata
from datetime import datetime
import pandas as pd
import pymmwr

BASELINE_URL = "https://raw.githubusercontent.com/cdcepi/FluSight-forecasts/master/wILI_Baseline.csv"
current_epiweek = pymmwr.date_to_mmwr_week()

# Range of epiweeks to gather data for
epiweek_start = 199710
epiweek_end = int(str(current_epiweek["year"]) + str(current_epiweek["week"]).zfill(2))

epiweek_range = Epidata.range(epiweek_start, epiweek_end)

regions = ["nat", *["hhs" + str(i) for i in range(1, 11)]]

# NOTE Lag value
# A lag of 0 means that the data for each week collected will be
# as observed at that point in time.
# Pass None as lag will let us collect the most recent data
# available

df = {
    "epiweek": [],
    "region": [],
    "wili": []
}