Esempio n. 1
0
def _fetch_epidata(
        data_source: str,
        signal: str,  # pylint: disable=W0621
        start_day: date,
        end_day: date,
        geo_type: str,
        geo_value: Union[str, Iterable[str]],
        as_of: date,
        issues: Union[date, tuple, list],
        lag: int,
        time_type: str = "day") -> Union[pd.DataFrame, None]:
    """Fetch data from Epidata API.

    signal() wraps this to support fetching data over a range of dates
    and stacks the resulting data frames.

    If no data is found, return None, so signal() can easily filter out these
    entries.

    """
    as_of_str = _date_to_api_string(as_of,
                                    time_type) if as_of is not None else None
    issues_strs = _dates_to_api_strings(
        issues, time_type) if issues is not None else None
    cur_day = start_day
    dfs = []
    while cur_day <= end_day:
        day_str = _date_to_api_string(cur_day, time_type)
        day_data = Epidata.covidcast(data_source,
                                     signal,
                                     time_type=time_type,
                                     geo_type=geo_type,
                                     time_values=day_str,
                                     geo_value=geo_value,
                                     as_of=as_of_str,
                                     issues=issues_strs,
                                     lag=lag)

        # Two possible error conditions: no data or too much data.
        if day_data["message"] == "no results":
            warnings.warn(
                f"No {data_source} {signal} data found on {day_str} "
                f"for geography '{geo_type}'", NoDataWarning)
        if day_data["message"] not in {"success", "no results"}:
            warnings.warn(
                f"Problem obtaining {data_source} {signal} data on {day_str} "
                f"for geography '{geo_type}': {day_data['message']}",
                RuntimeWarning)

        # In the too-much-data case, we continue to try putting the truncated
        # data in our results. In the no-data case, skip this day entirely,
        # since there is no "epidata" in the response.
        if day_data.get("epidata"):
            dfs.append(pd.DataFrame.from_dict(day_data["epidata"]))
        cur_day += timedelta(1) if time_type == "day" else timedelta(7)
    return dfs
Esempio n. 2
0
def load_us(states, latest=False):
    us_covid19_cases_path = os.path.join(config.base_data_dir, config.us_covid19_cases)
 
    #df_us = pd.read_csv(us_covid19_cases_path)
    import sys
    sys.path.append('src/')
    from delphi_epidata import Epidata
    
    start_date = 20200401
    
    from datetime import datetime
    stop_date = int(datetime.today().strftime('%Y%m%d'))
 
    for target_state in states:
        print(f'Processing data for state: {target_state} ' + ' *' * 10)
        print('Start date = ', start_date, ' End date = ', stop_date)
       
        res_incidence = Epidata.covidcast('jhu-csse', 'confirmed_7dav_incidence_num', 'day', 'state', \
                        [start_date, Epidata.range(start_date, stop_date)], target_state)
        res_death = Epidata.covidcast('jhu-csse', 'deaths_7dav_incidence_num', 'day', 'state', \
                        [start_date, Epidata.range(start_date, stop_date)], target_state)
        
        df_state = pd.DataFrame(columns=['Confirmed', 'Deceased', 'Recovered'])
        if len(res_incidence) > 0 and len(res_death) > 0:
            df_jhu_7day = pd.DataFrame(res_incidence['epidata'])
            df_jhu_7day_deaths = pd.DataFrame(res_death['epidata'])

            df_state['Date'] = pd.to_datetime(df_jhu_7day['time_value'], format='%Y%m%d')
            df_state['Confirmed'] = df_jhu_7day['value']
            df_state['Deceased'] = df_jhu_7day_deaths['value']
            df_state['Recovered'].fillna(value=0, inplace=True)
            
            # ensures sorting with respect to date
            df_state.index = pd.to_datetime(df_state.Date)
            df_state[['Total_Confirmed', 'Total_Deceased', 'Total_Recovered']] \
                = df_state[['Confirmed', 'Deceased', 'Recovered']].cumsum(axis=0, skipna=True)
            df_state.to_csv(os.path.join(config.base_data_dir, f'Cases_USA_{target_state}.csv'), index=False)
        else:
            print(' *** Error: Can not import data from Delphi database. Check src/state_data_loader.py')
            exit()
Esempio n. 3
0
     ,'quidel'       :['smoothed_pct_negative','smoothed_tests_per_device']}
    return d

if __name__ == "__main__":

    todaysEW = fromToday2EpiWeek()
    todayYMD = todayYMD()
    
    variables = ['geo_value','time_value','value','stderr','sample_size']

    fromDataSource2Signal = fromDataSource2Signal()
    fips2name = listPACounties()
    
    for datasource in ['fb-survey','ght','doctor-visits','google-survey','quidel']:
        for signal in fromDataSource2Signal[datasource]:
            
            dataSet = DS(variables,datasource,signal)
            for county in fips2name:
                sys.stdout.write('\r{:s}--{:s}--{:06d}\r'.format(datasource,signal,county))
                sys.stdout.flush()
                
                dataFromAPI = Epidata.covidcast(datasource,signal,'day','county',Epidata.range(20200101,todayYMD),county)
                if dataFromAPI["message"] == "no results":
                    continue
                
                if dataFromAPI['message'] == "success":
                    for data in dataFromAPI['epidata']:
                        dataSet.appendData(data)
            if dataSet.has_data():
                dataSet.convert2pandasDF().exportDF()
Esempio n. 4
0
def _fetch_single_geo(
        data_source: str,
        signal: str,  # pylint: disable=W0621
        start_day: date,
        end_day: date,
        geo_type: str,
        geo_value: str,
        as_of: date,
        issues: Union[date, tuple, list],
        lag: int) -> Union[pd.DataFrame, None]:
    """Fetch data for a single geo.

    signal() wraps this to support fetching data over an iterable of
    geographies, and stacks the resulting data frames.

    If no data is found, return None, so signal() can easily filter out these
    entries.

    """
    as_of_str = _date_to_api_string(as_of) if as_of is not None else None
    issues_strs = _dates_to_api_strings(issues) if issues is not None else None

    cur_day = start_day

    dfs = []

    while cur_day <= end_day:
        day_str = _date_to_api_string(cur_day)

        day_data = Epidata.covidcast(data_source,
                                     signal,
                                     time_type="day",
                                     geo_type=geo_type,
                                     time_values=day_str,
                                     geo_value=geo_value,
                                     as_of=as_of_str,
                                     issues=issues_strs,
                                     lag=lag)

        # Two possible error conditions: no data or too much data.
        if day_data["message"] == "no results":
            warnings.warn(
                f"No {data_source} {signal} data found on {day_str} "
                f"for geography '{geo_type}'", NoDataWarning)
        if day_data["message"] not in {"success", "no results"}:
            warnings.warn(
                f"Problem obtaining {data_source} {signal} data on {day_str} "
                f"for geography '{geo_type}': {day_data['message']}",
                RuntimeWarning)

        # In the too-much-data case, we continue to try putting the truncated
        # data in our results. In the no-data case, skip this day entirely,
        # since there is no "epidata" in the response.
        if "epidata" in day_data:
            dfs.append(pd.DataFrame.from_dict(day_data["epidata"]))

        cur_day += timedelta(1)

    if len(dfs) > 0:
        out = pd.concat(dfs)
        out.drop("direction", axis=1, inplace=True)
        out["time_value"] = pd.to_datetime(out["time_value"], format="%Y%m%d")
        out["issue"] = pd.to_datetime(out["issue"], format="%Y%m%d")
        out["geo_type"] = geo_type
        out["data_source"] = data_source
        out["signal"] = signal
        return out

    return None