def __init__(self, fips, N_samples, t_list, I_initial=1, suppression_policy=None): # Caching globally to avoid relatively significant performance overhead # of loading for each county. global beds_data, population_data if not beds_data or not population_data: beds_data = DHBeds.local().beds() population_data = FIPSPopulation.local().population() self.fips = fips self.agg_level = AggregationLevel.COUNTY if len(self.fips) == 5 else AggregationLevel.STATE self.N_samples = N_samples self.I_initial = I_initial self.suppression_policy = suppression_policy self.t_list = t_list if self.agg_level is AggregationLevel.COUNTY: self.county_metadata = load_data.load_county_metadata().set_index('fips').loc[fips].to_dict() self.state_abbr = us.states.lookup(self.county_metadata['state']).abbr self.population = population_data.get_county_level('USA', state=self.state_abbr, fips=self.fips) # TODO: Some counties do not have hospitals. Likely need to go to HRR level.. self.beds = beds_data.get_county_level(self.state_abbr, fips=self.fips) or 0 self.icu_beds = beds_data.get_county_level(self.state_abbr, fips=self.fips, column='icu_beds') or 0 else: self.state_abbr = us.states.lookup(fips).abbr self.population = population_data.get_state_level('USA', state=self.state_abbr) self.beds = beds_data.get_state_level(self.state_abbr) or 0 self.icu_beds = beds_data.get_state_level(self.state_abbr, column='icu_beds') or 0
def build_county_summary(min_date, country="USA", state=None, output_dir=OUTPUT_DIR): """Builds county summary json files.""" beds_data = DHBeds.local().beds() population_data = FIPSPopulation.local().population() timeseries = JHUDataset.local().timeseries() timeseries = timeseries.get_subset( AggregationLevel.COUNTY, after=min_date, country=country, state=state ) output_dir = pathlib.Path(output_dir) / "county_summaries" _logger.info(f"Outputting to {output_dir}") if not output_dir.exists(): _logger.info(f"{output_dir} does not exist, creating") output_dir.mkdir(parents=True) counties_by_state = defaultdict(list) for country, state, county, fips in timeseries.county_keys(): counties_by_state[state].append((county, fips)) for state, counties in counties_by_state.items(): data = {"counties_with_data": []} for county, fips in counties: cases = timeseries.get_data(state=state, country=country, fips=fips) beds = beds_data.get_county_level(state, fips=fips) population = population_data.get_county_level(country, state, fips=fips) if population and beds and sum(cases.cases): data["counties_with_data"].append(fips) output_path = output_dir / f"{state}.summary.json" output_path.write_text(json.dumps(data, indent=2))
def run_county_level_forecast( min_date: datetime.datetime, max_date: datetime.datetime, output_dir: pathlib.Path, country: str = "USA", state: str = None, ): beds_data = DHBeds.local().beds() population_data = FIPSPopulation.local().population() timeseries = JHUDataset.local().timeseries() timeseries = timeseries.get_subset(AggregationLevel.COUNTY, after=min_date, country=country, state=state) _logger.info(f"Outputting to {output_dir}") output_dir.mkdir(parents=True, exist_ok=True) counties_by_state = defaultdict(list) county_keys = timeseries.county_keys() for country, state, county, fips in county_keys: counties_by_state[state].append((county, fips)) pool = get_pool() for state, counties in counties_by_state.items(): _logger.info(f"Running county models for {state}") for county, fips in counties: args = ( min_date, max_date, country, state, county, fips, timeseries, beds_data, population_data, output_dir, ) pool.apply_async( forecast_each_county, args, callback=_result_callback_wrapper( f"{county}, {state}: {fips}"), ) pool.close() pool.join()
def run_county_level_forecast(min_date, max_date, country="USA", state=None, output_dir=OUTPUT_DIR): beds_data = DHBeds.local().beds() population_data = FIPSPopulation.local().population() timeseries = JHUDataset.local().timeseries() timeseries = timeseries.get_subset(AggregationLevel.COUNTY, after=min_date, country=country, state=state) output_dir = pathlib.Path(output_dir) / "county" _logger.info(f"Outputting to {output_dir}") # Dont want to replace when just running the states if output_dir.exists() and not state: backup = output_dir.name + "." + str(int(time.time())) output_dir.rename(output_dir.parent / backup) output_dir.mkdir(parents=True, exist_ok=True) counties_by_state = defaultdict(list) county_keys = timeseries.county_keys() for country, state, county, fips in county_keys: counties_by_state[state].append((county, fips)) pool = get_pool() for state, counties in counties_by_state.items(): _logger.info(f"Running county models for {state}") for county, fips in counties: args = ( min_date, max_date, country, state, county, fips, timeseries, beds_data, population_data, output_dir, ) # forecast_each_county(*args) pool.apply_async(forecast_each_county, args=args) pool.close() pool.join()
def run_state_level_forecast( min_date, max_date, output_dir, country="USA", state=None, ): # DH Beds dataset does not have all counties, so using the legacy state # level bed data. beds_data = DHBeds.local().beds() population_data = FIPSPopulation.local().population() timeseries = JHUDataset.local().timeseries() timeseries = timeseries.get_subset(AggregationLevel.STATE, after=min_date, country=country, state=state) output_dir = pathlib.Path(output_dir) output_dir.mkdir(parents=True, exist_ok=True) pool = get_pool() for state in timeseries.states: args = ( country, state, timeseries, beds_data, population_data, min_date, max_date, output_dir, ) pool.apply_async( forecast_each_state, args, callback=_result_callback_wrapper(f"{state}, {country}"), ) pool.close() pool.join()
def test_dh_beds_loading(): beds_data = DHBeds.local().beds() assert beds_data