Beispiel #1
0
def _run_all(state=None,
             run_mode=DEFAULT_RUN_MODE,
             generate_reports=True,
             output_interval_days=4,
             skip_download=False,
             states_only=False,
             output_dir=None,
             skip_whitelist=False):

    _cache_global_datasets()

    if not skip_download:
        cache_all_data()

    _generate_whitelist()

    if state:
        # Deprecate temporarily since not needed. Our full model fits have
        # superseded these for now. But we may return to a context where this
        # method is used to measure localized Reff.
        # if not states_only:
        #     _impute_start_dates(state)
        _infer_rt(state, states_only=states_only)
        _run_mle_fits(state, states_only=states_only)
        _run_ensembles(state,
                       ensemble_kwargs=dict(run_mode=run_mode,
                                            generate_report=generate_reports,
                                            covid_timeseries=nyt_dataset),
                       states_only=states_only)
        if generate_reports:
            _generate_state_reports(state)
        _map_outputs(state,
                     output_interval_days,
                     states_only=states_only,
                     output_dir=output_dir,
                     run_mode=run_mode)
    else:
        if states_only:
            f = partial(_run_all,
                        run_mode=run_mode,
                        generate_reports=generate_reports,
                        output_interval_days=output_interval_days,
                        skip_download=True,
                        states_only=True,
                        output_dir=output_dir,
                        skip_whitelist=True)
            p = Pool()
            p.map(f, ALL_STATES)
            p.close()

        else:
            for state_name in ALL_STATES:
                _run_all(state_name,
                         run_mode,
                         generate_reports,
                         output_interval_days,
                         skip_download=True,
                         states_only=False,
                         output_dir=output_dir,
                         skip_whitelist=True)
def _run_all(state=None):
    exceptions = []
    cache_all_data()

    if state:
        _impute_start_dates(state.title())
        _run_mle_fits(state)
        _run_ensembles(state.title())
        _generate_state_reports(state.title())
    else:
        for state in us.states.STATES:
            try:
                _generate_state_reports(state.name)
            except ValueError as e:
                exceptions.append(exceptions)
    for exception in exceptions:
        logging.critical(exception)
Beispiel #3
0
def download_data():
    cache_all_data()
Beispiel #4
0
def _build_all_for_states(
    states=[],
    run_mode=DEFAULT_RUN_MODE,
    generate_reports=False,
    output_interval_days=4,
    skip_download=False,
    output_dir=None,
    skip_whitelist=False,
    states_only=False,
):
    # prepare data
    _cache_global_datasets()
    if not skip_download:
        cache_all_data()
    if not skip_whitelist:
        _generate_whitelist()

    # do everything for just states in paralell
    p = Pool()
    states_only_func = partial(
        _state_only_pipeline,
        run_mode=run_mode,
        generate_reports=generate_reports,
        output_interval_days=output_interval_days,
        output_dir=output_dir,
    )
    p.map(states_only_func, states)

    if states_only:
        root.info("Only executing for states. returning.")
        return

    # run states in paralell
    all_county_fips = {}
    for state in states:
        state_county_fips = model_fitter.build_county_list(state)
        county_fips_per_state = {fips: state for fips in state_county_fips}
        all_county_fips.update(county_fips_per_state)

    # calculate calculate county inference
    p.map(infer_rt_module.run_county, all_county_fips.keys())

    # calculate model fit
    root.info(f"executing model for {len(all_county_fips)} counties")
    fitters = p.map(model_fitter._execute_model_for_fips,
                    all_county_fips.keys())

    df = pd.DataFrame([fit.fit_results for fit in fitters if fit])
    df["state"] = df.fips.replace(all_county_fips)
    df["mle_model"] = [fit.mle_model for fit in fitters if fit]
    df.index = df.fips

    state_dfs = [state_df for name, state_df in df.groupby("state")]
    p.map(model_fitter._persist_results_per_state, state_dfs)

    # calculate ensemble
    root.info(f"running ensemble for {len(all_county_fips)} counties")
    ensemble_func = partial(
        _run_county,
        ensemble_kwargs=dict(run_mode=run_mode,
                             generate_report=generate_reports),
    )
    p.map(ensemble_func, all_county_fips.keys())

    # output it all
    output_interval_days = int(output_interval_days)
    _cache_global_datasets()

    root.info(
        f"outputing web results for states and {len(all_county_fips)} counties"
    )
    # does not parallelize well, because web_ui mapper doesn't serialize efficiently
    # TODO: Remove intermediate artifacts and paralellize artifacts creation better
    # Approximately 40% of the processing time is taken on this step
    for state in states:
        web_ui_mapper = WebUIDataAdaptorV1(
            state,
            output_interval_days=output_interval_days,
            run_mode=run_mode,
            jhu_dataset=nyt_dataset,
            cds_dataset=cds_dataset,
            output_dir=output_dir,
        )
        web_ui_mapper.generate_state(all_fips=all_county_fips.keys())
    p.close()
    p.join()

    return