def __init__(self, fips, N_samples, t_list,
                 I_initial=1, suppression_policy=None):

        # Caching globally to avoid relatively significant performance overhead
        # of loading for each county.
        global beds_data, population_data
        if not beds_data or not population_data:
            beds_data = DHBeds.local().beds()
            population_data = FIPSPopulation.local().population()

        self.fips = fips
        self.agg_level = AggregationLevel.COUNTY if len(self.fips) == 5 else AggregationLevel.STATE
        self.N_samples = N_samples
        self.I_initial = I_initial
        self.suppression_policy = suppression_policy
        self.t_list = t_list

        if self.agg_level is AggregationLevel.COUNTY:
            self.county_metadata = load_data.load_county_metadata().set_index('fips').loc[fips].to_dict()
            self.state_abbr = us.states.lookup(self.county_metadata['state']).abbr
            self.population = population_data.get_county_level('USA', state=self.state_abbr, fips=self.fips)
            # TODO: Some counties do not have hospitals. Likely need to go to HRR level..
            self.beds = beds_data.get_county_level(self.state_abbr, fips=self.fips) or 0
            self.icu_beds = beds_data.get_county_level(self.state_abbr, fips=self.fips, column='icu_beds') or 0
        else:
            self.state_abbr = us.states.lookup(fips).abbr
            self.population = population_data.get_state_level('USA', state=self.state_abbr)
            self.beds = beds_data.get_state_level(self.state_abbr) or 0
            self.icu_beds = beds_data.get_state_level(self.state_abbr, column='icu_beds') or 0
Ejemplo n.º 2
0
def build_county_summary(min_date, country="USA", state=None, output_dir=OUTPUT_DIR):
    """Builds county summary json files."""
    beds_data = DHBeds.local().beds()
    population_data = FIPSPopulation.local().population()
    timeseries = JHUDataset.local().timeseries()
    timeseries = timeseries.get_subset(
        AggregationLevel.COUNTY, after=min_date, country=country, state=state
    )

    output_dir = pathlib.Path(output_dir) / "county_summaries"
    _logger.info(f"Outputting to {output_dir}")
    if not output_dir.exists():
        _logger.info(f"{output_dir} does not exist, creating")
        output_dir.mkdir(parents=True)

    counties_by_state = defaultdict(list)
    for country, state, county, fips in timeseries.county_keys():
        counties_by_state[state].append((county, fips))

    for state, counties in counties_by_state.items():
        data = {"counties_with_data": []}
        for county, fips in counties:
            cases = timeseries.get_data(state=state, country=country, fips=fips)
            beds = beds_data.get_county_level(state, fips=fips)
            population = population_data.get_county_level(country, state, fips=fips)
            if population and beds and sum(cases.cases):
                data["counties_with_data"].append(fips)

        output_path = output_dir / f"{state}.summary.json"
        output_path.write_text(json.dumps(data, indent=2))
def run_county_level_forecast(
    min_date: datetime.datetime,
    max_date: datetime.datetime,
    output_dir: pathlib.Path,
    country: str = "USA",
    state: str = None,
):
    beds_data = DHBeds.local().beds()
    population_data = FIPSPopulation.local().population()
    timeseries = JHUDataset.local().timeseries()
    timeseries = timeseries.get_subset(AggregationLevel.COUNTY,
                                       after=min_date,
                                       country=country,
                                       state=state)

    _logger.info(f"Outputting to {output_dir}")
    output_dir.mkdir(parents=True, exist_ok=True)

    counties_by_state = defaultdict(list)
    county_keys = timeseries.county_keys()
    for country, state, county, fips in county_keys:
        counties_by_state[state].append((county, fips))

    pool = get_pool()
    for state, counties in counties_by_state.items():
        _logger.info(f"Running county models for {state}")
        for county, fips in counties:
            args = (
                min_date,
                max_date,
                country,
                state,
                county,
                fips,
                timeseries,
                beds_data,
                population_data,
                output_dir,
            )

            pool.apply_async(
                forecast_each_county,
                args,
                callback=_result_callback_wrapper(
                    f"{county}, {state}: {fips}"),
            )

    pool.close()
    pool.join()
Ejemplo n.º 4
0
def run_county_level_forecast(min_date,
                              max_date,
                              country="USA",
                              state=None,
                              output_dir=OUTPUT_DIR):
    beds_data = DHBeds.local().beds()
    population_data = FIPSPopulation.local().population()
    timeseries = JHUDataset.local().timeseries()
    timeseries = timeseries.get_subset(AggregationLevel.COUNTY,
                                       after=min_date,
                                       country=country,
                                       state=state)

    output_dir = pathlib.Path(output_dir) / "county"
    _logger.info(f"Outputting to {output_dir}")
    # Dont want to replace when just running the states
    if output_dir.exists() and not state:
        backup = output_dir.name + "." + str(int(time.time()))
        output_dir.rename(output_dir.parent / backup)

    output_dir.mkdir(parents=True, exist_ok=True)

    counties_by_state = defaultdict(list)
    county_keys = timeseries.county_keys()
    for country, state, county, fips in county_keys:
        counties_by_state[state].append((county, fips))

    pool = get_pool()
    for state, counties in counties_by_state.items():
        _logger.info(f"Running county models for {state}")
        for county, fips in counties:
            args = (
                min_date,
                max_date,
                country,
                state,
                county,
                fips,
                timeseries,
                beds_data,
                population_data,
                output_dir,
            )
            # forecast_each_county(*args)
            pool.apply_async(forecast_each_county, args=args)

    pool.close()
    pool.join()
def run_state_level_forecast(
    min_date,
    max_date,
    output_dir,
    country="USA",
    state=None,
):
    # DH Beds dataset does not have all counties, so using the legacy state
    # level bed data.
    beds_data = DHBeds.local().beds()
    population_data = FIPSPopulation.local().population()
    timeseries = JHUDataset.local().timeseries()
    timeseries = timeseries.get_subset(AggregationLevel.STATE,
                                       after=min_date,
                                       country=country,
                                       state=state)
    output_dir = pathlib.Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    pool = get_pool()
    for state in timeseries.states:
        args = (
            country,
            state,
            timeseries,
            beds_data,
            population_data,
            min_date,
            max_date,
            output_dir,
        )
        pool.apply_async(
            forecast_each_state,
            args,
            callback=_result_callback_wrapper(f"{state}, {country}"),
        )

    pool.close()
    pool.join()
def test_dh_beds_loading():
    beds_data = DHBeds.local().beds()
    assert beds_data