def _fetch_epidata( data_source: str, signal: str, # pylint: disable=W0621 start_day: date, end_day: date, geo_type: str, geo_value: Union[str, Iterable[str]], as_of: date, issues: Union[date, tuple, list], lag: int, time_type: str = "day") -> Union[pd.DataFrame, None]: """Fetch data from Epidata API. signal() wraps this to support fetching data over a range of dates and stacks the resulting data frames. If no data is found, return None, so signal() can easily filter out these entries. """ as_of_str = _date_to_api_string(as_of, time_type) if as_of is not None else None issues_strs = _dates_to_api_strings( issues, time_type) if issues is not None else None cur_day = start_day dfs = [] while cur_day <= end_day: day_str = _date_to_api_string(cur_day, time_type) day_data = Epidata.covidcast(data_source, signal, time_type=time_type, geo_type=geo_type, time_values=day_str, geo_value=geo_value, as_of=as_of_str, issues=issues_strs, lag=lag) # Two possible error conditions: no data or too much data. if day_data["message"] == "no results": warnings.warn( f"No {data_source} {signal} data found on {day_str} " f"for geography '{geo_type}'", NoDataWarning) if day_data["message"] not in {"success", "no results"}: warnings.warn( f"Problem obtaining {data_source} {signal} data on {day_str} " f"for geography '{geo_type}': {day_data['message']}", RuntimeWarning) # In the too-much-data case, we continue to try putting the truncated # data in our results. In the no-data case, skip this day entirely, # since there is no "epidata" in the response. if day_data.get("epidata"): dfs.append(pd.DataFrame.from_dict(day_data["epidata"])) cur_day += timedelta(1) if time_type == "day" else timedelta(7) return dfs
def load_us(states, latest=False): us_covid19_cases_path = os.path.join(config.base_data_dir, config.us_covid19_cases) #df_us = pd.read_csv(us_covid19_cases_path) import sys sys.path.append('src/') from delphi_epidata import Epidata start_date = 20200401 from datetime import datetime stop_date = int(datetime.today().strftime('%Y%m%d')) for target_state in states: print(f'Processing data for state: {target_state} ' + ' *' * 10) print('Start date = ', start_date, ' End date = ', stop_date) res_incidence = Epidata.covidcast('jhu-csse', 'confirmed_7dav_incidence_num', 'day', 'state', \ [start_date, Epidata.range(start_date, stop_date)], target_state) res_death = Epidata.covidcast('jhu-csse', 'deaths_7dav_incidence_num', 'day', 'state', \ [start_date, Epidata.range(start_date, stop_date)], target_state) df_state = pd.DataFrame(columns=['Confirmed', 'Deceased', 'Recovered']) if len(res_incidence) > 0 and len(res_death) > 0: df_jhu_7day = pd.DataFrame(res_incidence['epidata']) df_jhu_7day_deaths = pd.DataFrame(res_death['epidata']) df_state['Date'] = pd.to_datetime(df_jhu_7day['time_value'], format='%Y%m%d') df_state['Confirmed'] = df_jhu_7day['value'] df_state['Deceased'] = df_jhu_7day_deaths['value'] df_state['Recovered'].fillna(value=0, inplace=True) # ensures sorting with respect to date df_state.index = pd.to_datetime(df_state.Date) df_state[['Total_Confirmed', 'Total_Deceased', 'Total_Recovered']] \ = df_state[['Confirmed', 'Deceased', 'Recovered']].cumsum(axis=0, skipna=True) df_state.to_csv(os.path.join(config.base_data_dir, f'Cases_USA_{target_state}.csv'), index=False) else: print(' *** Error: Can not import data from Delphi database. Check src/state_data_loader.py') exit()
,'quidel' :['smoothed_pct_negative','smoothed_tests_per_device']} return d if __name__ == "__main__": todaysEW = fromToday2EpiWeek() todayYMD = todayYMD() variables = ['geo_value','time_value','value','stderr','sample_size'] fromDataSource2Signal = fromDataSource2Signal() fips2name = listPACounties() for datasource in ['fb-survey','ght','doctor-visits','google-survey','quidel']: for signal in fromDataSource2Signal[datasource]: dataSet = DS(variables,datasource,signal) for county in fips2name: sys.stdout.write('\r{:s}--{:s}--{:06d}\r'.format(datasource,signal,county)) sys.stdout.flush() dataFromAPI = Epidata.covidcast(datasource,signal,'day','county',Epidata.range(20200101,todayYMD),county) if dataFromAPI["message"] == "no results": continue if dataFromAPI['message'] == "success": for data in dataFromAPI['epidata']: dataSet.appendData(data) if dataSet.has_data(): dataSet.convert2pandasDF().exportDF()
def _fetch_single_geo( data_source: str, signal: str, # pylint: disable=W0621 start_day: date, end_day: date, geo_type: str, geo_value: str, as_of: date, issues: Union[date, tuple, list], lag: int) -> Union[pd.DataFrame, None]: """Fetch data for a single geo. signal() wraps this to support fetching data over an iterable of geographies, and stacks the resulting data frames. If no data is found, return None, so signal() can easily filter out these entries. """ as_of_str = _date_to_api_string(as_of) if as_of is not None else None issues_strs = _dates_to_api_strings(issues) if issues is not None else None cur_day = start_day dfs = [] while cur_day <= end_day: day_str = _date_to_api_string(cur_day) day_data = Epidata.covidcast(data_source, signal, time_type="day", geo_type=geo_type, time_values=day_str, geo_value=geo_value, as_of=as_of_str, issues=issues_strs, lag=lag) # Two possible error conditions: no data or too much data. if day_data["message"] == "no results": warnings.warn( f"No {data_source} {signal} data found on {day_str} " f"for geography '{geo_type}'", NoDataWarning) if day_data["message"] not in {"success", "no results"}: warnings.warn( f"Problem obtaining {data_source} {signal} data on {day_str} " f"for geography '{geo_type}': {day_data['message']}", RuntimeWarning) # In the too-much-data case, we continue to try putting the truncated # data in our results. In the no-data case, skip this day entirely, # since there is no "epidata" in the response. if "epidata" in day_data: dfs.append(pd.DataFrame.from_dict(day_data["epidata"])) cur_day += timedelta(1) if len(dfs) > 0: out = pd.concat(dfs) out.drop("direction", axis=1, inplace=True) out["time_value"] = pd.to_datetime(out["time_value"], format="%Y%m%d") out["issue"] = pd.to_datetime(out["issue"], format="%Y%m%d") out["geo_type"] = geo_type out["data_source"] = data_source out["signal"] = signal return out return None