def test_validate_rt_over_time(): """ Check that our Bettencourt R(t) predictions integrate properly to explain the growth in new casees over time """ t_list = np.linspace(100, 200, 200 - 100 + 1) results = [] for state in HistoricalData.get_states(): # ["MI", "FL", "TX", "NY", "CA"]: if state == "VI": continue # test data corrupt for this state (rt, nc, _1, _2, _3) = HistoricalData.get_state_data_for_dates( state, t_list, compartments_as_functions=True ) (avg, adj, adj_rt) = adjust_rt_to_match_cases(rt, nc, t_list) (ignore1, check, ignore2) = adjust_rt_to_match_cases(adj_rt, nc, t_list) assert check > 0.95 and check < 1.05 results.append((state, avg, adj)) if not MAKE_PLOTS: return df = pd.DataFrame(results, columns=["state", "avg", "adj"]) fig, ax = plt.subplots() ax.scatter(df.avg, df.adj) for i in df.index: ax.annotate(df["state"][i], (df["avg"][i], df["adj"][i])) plt.xlabel("Average R(t)") plt.ylabel("Case Ratio / Integrated R(t)") plt.yscale("log") fig.savefig(TEST_OUTPUT_DIR / "test_validate_rt_over_time.pdf", bbox_inches="tight")
def test_reproduce_FL_demographic_shift(): """ Florida is the one state where we have median age data. Test that this median age time history can be recreated using only R(t) and median age history for US as a whole - a process we can use with any state """ t_list = np.linspace(120, 230, 230 - 120 + 1) expected_f = Demographics.median_age_f("FL") default_usa_f = Demographics.median_age_f() demo = Demographics(median_age=expected_f(t_list[0])) (rt_f, ignore1, ignore2, ignore3, ignore4) = HistoricalData.get_state_data_for_dates( "FL", t_list ) values = [demo.get_median_age()] for t in t_list[:-1]: demo.evolve_median(rt_f(t), default_usa_f(t)) values.append(demo.get_median_age()) if not MAKE_PLOTS: return fig = plt.figure(facecolor="w", figsize=(10, 6)) plt.plot(t_list, values, label="results") plt.plot(t_list, [expected_f(t) for t in t_list], label="expected") plt.legend() fig.savefig(TEST_OUTPUT_DIR / "test_reproduce_FL_demographic_shift.pdf")
def test_simulate_iowa_late_august(): """ What could happen in Iowa given similar behaviour as in Florida? """ lag = 85 days = 60 # Time period for the run (again relative to Jan 1, 2020) t_list = np.linspace(244, 244 + days, days + 1) fl_t_list = np.linspace(244 - (lag + 5), 244 - (lag + 5) + days + 2 * lag, days + 2 * lag + 1) # Need assumptions for R(t) and testing_rate(t) for the future as inputs (rt_fl, ignore_0, ignore_1, ignore_2, ignore_3) = HistoricalData.get_state_data_for_dates("FL", fl_t_list) median_age_fl = Demographics.median_age_f("FL") def rt(t): return rt_fl(t - lag) - 0.1 # due to mask wearing def median_age(t): # Iowa has younger population ma = median_age_fl(t - lag) adj = 35.0 + 0.75 * (ma - 35) # turn 50 into 46 return adj # Create a ModelRun (with specific assumptions) based on a Model (potentially with some trained inputs) run = ModelRun( NowcastingSEIRModel( ), # creating a default model here with no trained parameters 5e6, # N, population of jurisdiction t_list, None, # tests, rt, case_median_age_f=median_age, initial_compartments={ "nC": 1170.0, "H": 500.0, "nD": 18.0 }, auto_initialize_other_compartments= True, # By running to steady state at initial R(t=t0) auto_calibrate= True, # Override some model constants to ensure continuity with initial compartments ) # Execute the run, results are a DataFrame, fig is Matplotlib Figure, ratios are key metrics (results, ratios, fig) = run.execute_dataframe_ratios_fig(plot=MAKE_PLOTS) results.to_csv(TEST_OUTPUT_DIR / "test_simulate_iowa_results.csv") if MAKE_PLOTS: fig.savefig(TEST_OUTPUT_DIR / "test_simulate_iowa.pdf", bbox_inches="tight")
def test_reproduce_TX_late_peak(): """ Reproduce behaviour of late peaks for Texas starting from t0 = 170 - peak H=1050 at t=200 - peak nD=200 at t=220 TODO move over assertions from the Florida case that no longer runs TODO filter the data to remove the high value for texas on about t=210 """ if not MAKE_PLOTS: return # Time period for the run (again relative to Jan 1, 2020) t_list = np.linspace( 150, 230, 230 - 150 + 1) # Here starts from when FL started to ramp up cases # Need assumptions for R(t) and testing_rate(t) for the future as inputs (rt, nC, tests, H, nD) = HistoricalData.get_state_data_for_dates("VA", t_list) # Here taken from historical data but typically will use R(t) projections # Infer median age function for Texas median_age_f = Demographics.infer_median_age_function(t_list, rt) # Does slightly better with Demographics.median_age_f("FL") # Create a ModelRun (with specific assumptions) based on a Model (potentially with some trained inputs) run = ModelRun( NowcastingSEIRModel( # delay_ci_h=5 ), # creating a default model here with no trained parameters 20e6, # N, population of jurisdiction t_list, tests, rt, case_median_age_f=median_age_f, historical_compartments={ "nC": nC, "H": H, "nD": nD }, auto_initialize_other_compartments= True, # By running to steady state at initial R(t=t0) auto_calibrate= True, # Override some model constants to ensure continuity with initial compartments ) # Execute the run, results are a DataFrame, fig is Matplotlib Figure, ratios are key metrics (results, ratios, fig) = run.execute_dataframe_ratios_fig() fig.savefig(TEST_OUTPUT_DIR / "test_reproduce_TX_late_peak.pdf", bbox_inches="tight")
def test_demonstrate_hospitalization_delay_changes(): """ Demonstrates that hospitalization peaks are delayed relative to new cases by varying number of days depending on how well states were prepared at various points in time """ if MAKE_PLOTS: return t_list = np.linspace(0, 230, 230 + 1) sets = { "early": { "states": ["NY", "NJ", "CT"], "delay": 0 }, "late": { "states": ["FL", "TX", "GA", "CA", "AZ"], "delay": 12 }, "steady": { "states": ["PA", "IL"], "delay": 7 }, } model = NowcastingSEIRModel() for name, s in sets.items(): fig, ax = plt.subplots() for state in s["states"]: (rt, nc, tests, h, nd) = HistoricalData.get_state_data_for_dates(state, t_list, no_functions=True) fh = h / (nc * model.t_i) fd = (nd * model.t_h()) / h pos = nc / tests hdelay = h.shift(s["delay"]) plt.scatter(hdelay.values, nd.values, marker=".", label=state) plt.plot([hdelay.values[-1]], [nd.values[-1]], marker="o") plt.plot([1e3, 1e4], [2e1, 2e2], linestyle="--") plt.xlabel(f"Hospitalizations (delayed %s days)" % s["delay"]) plt.ylabel("new Deaths") plt.yscale("log") plt.xscale("log") # plt.ylim((0.01, 1.0)) fig.legend() fig.savefig(TEST_OUTPUT_DIR / (f"test_ratio_evolution_%s_states.pdf" % name))
def test_reproducing_forecast_deaths_from_cases(): """ Use forecast cases from Reich Labs and evolve forward in time from present using that data determining new deaths. Compare those with values contained in forecast """ for state in ["FL", "GA", "TX", "DE", "WI"]: today = 250 # TODO compute for today # Get actuals to compare to t_history = np.linspace(today - 30, today, 30 + 1) (rt, nc, ignore_tests, h, nd) = HistoricalData.get_state_data_for_dates(state, t_history) # Get latest forecast data (forecast_nC, forecast_nD) = ForecastData.get_state_nC_nD_forecast(state) now_and_forecast = [(today, nc[today])] + forecast_nC latest = now_and_forecast[-1][0] # Latest day in the forecast t_list = np.linspace(today, latest, latest - today + 1) # Create extended R(t) function using forecasted_cases forecast_rt_f = extend_rt_function_with_new_cases_forecast( rt, NowcastingSEIRModel().serial_period, now_and_forecast, ) # Create a ModelRun (with specific assumptions) based on a Model (potentially with some trained inputs) model = NowcastingSEIRModel() run = ModelRun( model, # creating a default model here with no trained parameters 20e6, # N, population of jurisdiction t_list, None, # tests (these are currently ignored so positivity not used) forecast_rt_f, case_median_age_f=Demographics.infer_median_age_function( t_list, forecast_rt_f), initial_compartments={ "nC": nc[today], "H": h[today], "nD": nd[today] }, auto_initialize_other_compartments= True, # By running to steady state at initial R(t=t0) auto_calibrate= True, # Override some model constants to ensure continuity with initial compartments ) # Execute the run, results are a DataFrame, fig is Matplotlib Figure, ratios are key metrics (results, ignore, fig) = run.execute_dataframe_ratios_fig(plot=MAKE_PLOTS) if not MAKE_PLOTS: return fig.savefig( TEST_OUTPUT_DIR / (f"test_reproducing_forecast_deaths_from_cases_%s_run.pdf" % state), bbox_inches="tight", ) plt.close(fig) # Now compare results against forecasts and for continuity with actuals run_nd = results.nD fig = plt.figure(facecolor="w", figsize=(10, 7)) plt.title(f"Compare deaths with forecast for state = %s" % state) plt.scatter(t_history, nd, label="actual data", marker="o") plt.scatter( [forecast_nD[i][0] for i in range(0, len(forecast_nD))], [forecast_nD[i][1] for i in range(0, len(forecast_nD))], marker="o", label="Reich Lab forecast", ) plt.plot(t_list, run_nd.values, label="run results") plt.legend() fig.savefig( TEST_OUTPUT_DIR / (f"test_reproducing_forecast_deaths_from_cases_%s_compare.pdf" % state), bbox_inches="tight", ) plt.close(fig)
def test_multiple_periods_for_select_states(): """ Show how fit evolves over time for one staste """ for state in [ "AZ", "CT", "FL", "GA", "IA", "LA", "MA", "ME", "NE", "MO", "MT", "NY", "OR", "TX", "VA", ]: latest = 250 # Get actuals to compare to t_all = np.linspace(100, latest, latest - 100 + 1) (rt, nc_all, ignore3, h_all, nd_all) = HistoricalData.get_state_data_for_dates(state, t_all) if MAKE_PLOTS: # Create chart fig = plt.figure(facecolor="w", figsize=(12, 7)) plt.title(f"State = %s" % state) plt.plot(t_all, nd_all, label="actual nD") plt.plot(t_all, h_all / 10.0, label="actual H/10") for today in [ 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240 ]: duration = min(30, latest - today) t_list = np.linspace(today, today + duration, duration + 1) # Need assumptions for R(t) and testing_rate(t) for the future as inputs (ignore, nc, tests, h, nd) = HistoricalData.get_state_data_for_dates(state, t_list) # Create a ModelRun (with specific assumptions) based on a Model (potentially with some trained inputs) model = NowcastingSEIRModel() run = ModelRun( model, # creating a default model here with no trained parameters 20e6, # N, population of jurisdiction t_list, None, # tests (these are currently ignored so positivity not used) rt, case_median_age_f=Demographics.infer_median_age_function( t_list, rt), initial_compartments={ "nC": nc[today], "H": h[today], "nD": nd[today] }, auto_initialize_other_compartments= True, # By running to steady state at initial R(t=t0) auto_calibrate= True, # Override some model constants to ensure continuity with initial compartments ) # Execute the run, results are a DataFrame, fig is Matplotlib Figure, ratios are key metrics (results, ignore, ignore) = run.execute_dataframe_ratios_fig(plot=False) (fh0, fd0) = model.get_calibration() if MAKE_PLOTS: plt.plot( t_list, results["nD"].values, label=(f"t=%d: (%.2f, %.2f)" % (today, fh0, fd0)), linestyle="--", color="grey", ) plt.plot(t_list, results["H"].values / 10.0, linestyle="--", color="grey") if MAKE_PLOTS: plt.legend() plt.xlim(70, latest) plt.yscale("log") fig.savefig( TEST_OUTPUT_DIR / (f"test_multiple_periods_for_select_states_%s.pdf" % state)) plt.close(fig)
def test_run_new_model_incrementally(): """ Demonstrates how to minimally run the new model incrementally from an initial set of observables (new cases, hospitalizations, new deaths) and with given assumptions (for r(t), test_rate and median_age) into the future. """ # Run into the future using R(t) ramp starting sometime in the past (start, today, ramp_end, future) = (200, 230, 280, 320) t_list = np.linspace(start, future, future - start + 1) # Try changing these to get different possible futures nC_ramp_to = 8000.0 nC_future = 15000.0 # Need assumptions for R(t) and testing_rate(t) for the future as inputs data_tlist = np.linspace(start, today, today - start + 1) # This would really be smoothed current values (rt, nc, tests, h, nd) = HistoricalData.get_state_data_for_dates("FL", data_tlist) # Create extended R(t) function using projected cases sometime in the future forecast_rt_f = extend_rt_function_with_new_cases_forecast( rt, NowcastingSEIRModel().serial_period, [(start, nc[start]), (today, nc[today]), (ramp_end, nC_ramp_to), (future, nC_future)], ) start = datetime.now() with_history = True # try True or False # Create a ModelRun (with specific assumptions) based on a Model (potentially with some trained inputs) run = ModelRun( NowcastingSEIRModel( ), # creating a default model here with no trained parameters 20e6, # N, population of jurisdiction t_list, None, # tests (these are currently ignored so positivity not used) forecast_rt_f, case_median_age_f=Demographics.infer_median_age_function( t_list, forecast_rt_f), historical_compartments={ "nC": nc, "H": h, "nD": nd } if with_history else None, initial_compartments=None if with_history else { "nC": nc[start], "H": h[start], "nD": nd[start] }, auto_initialize_other_compartments= True, # By running to steady state at initial R(t=t0) auto_calibrate= True, # Override some model constants to ensure continuity with initial compartments ) # Execute the run, results are a DataFrame, fig is Matplotlib Figure, ratios are key metrics (results, ratios, fig) = run.execute_dataframe_ratios_fig() # Should have finished in less that 1 second elapsed = (datetime.now() - start).seconds if MAKE_PLOTS: fig.savefig(TEST_OUTPUT_DIR / "test_run_new_model_incrementally.pdf") assert elapsed < 1
def test_historical_period_state_deaths_and_hospitalizations(): """ Validate model by recreating historical death and hospitalization timeseries starting for specific periods in time from initial conditions and given correct R(t) as input. TODO refactor so can have several separate tests with same structure TODO add many tests for one state at different times TODO get most recent data from Brett and retest recent """ # For recent tests with states we should have this level of accuracy # Note smape in [0.,2.] - best values for recent avg is about .335 TARGET_SMAPE = 0.35 states = HistoricalData.get_states() starts = { "early": 90, "recent": 155 } # calendar day in 2020 when each test starts num_days = 95 # duration of each test # Various state outbreaks started at different points in time, overrides starts.early early_start = EARLY_OUTBREAK_START_DAY_BY_STATE # Keep track of the model calibrations for each state and period calibrations = [] for state in states: # Do a test for each period (when) for each state for (when, std_start) in starts.items(): earliest = (early_start[state] if (when == "early" and state in early_start) else std_start) start = earliest t_list = np.linspace(start, start + num_days, num_days + 1) t0 = t_list[0] # Adjusting for change in delay over time # h_delay = 3.0 if when == "early" else 10.0 h_delay = 0.0 # Get historical data for that state and adjust R(t) for long term bias (rt, nc, tests, h, nd) = HistoricalData.get_state_data_for_dates(state, t_list) if len(nc) != len(t_list): # no data returned continue (average_R, growth_ratio, adj_r_f) = adjust_rt_to_match_cases(rt, lambda t: nc[t], t_list) # Confiigure and run the model with history for initial constraints and to add actuals to charts run = ModelRun( NowcastingSEIRModel(), # delay_ci_h=h_delay, death_delay=0), 20e6, # N t_list, tests, adj_r_f, case_median_age_f=Demographics.infer_median_age_function( t_list, rt), # Demographics.median_age_f(state), historical_compartments={ "nC": nc, "H": h, "nD": nd }, auto_initialize_other_compartments=True, auto_calibrate=True, # True ) (results, ratios, fig) = run.execute_dataframe_ratios_fig(plot=MAKE_PLOTS) calibrations.append(( state, when, run.model.lr_fh[0], run.model.lr_fd[0], ratios["SMAPE"], )) if MAKE_PLOTS and when == "recent": # Save figure for each state in each period # TODO store test result metrics in file rather than only in chart title string fig.savefig( (TEST_OUTPUT_DIR / (f"test_historical_period_%s_period_%s_start=%d_calibration=(%.2f,%.2f->%.2f)_smape=%.2f.pdf" % ( when, state, start, run.model.lr_fh[0], run.model.lr_fd[0], run.model.lr_fh[0] * run.model.lr_fd[0], ratios["SMAPE"], ))), bbox_inches="tight", ) plt.close(fig) # Create summary chart for each "period" showing calibration and SMAPE of all states df = pd.DataFrame(calibrations, columns=["state", "when", "fh0", "fd0", "smape"]) df["markersize"] = (10.0 * df["smape"] + 1.0).astype(int) for when in ["early", "recent"]: sub = df[df["when"] == when] fig, ax = plt.subplots() rect = plt.Rectangle([0.5, 0.5], 1.5, 1.5, facecolor="g", alpha=0.2) ax.add_patch(rect) for i in sub.index: plt.plot([ sub.fh0[i], ], [ sub.fd0[i], ], "o", markersize=sub.markersize[i], color="b") ax.annotate(sub["state"][i], (sub["fh0"][i], sub["fd0"][i])) plt.xlabel("fh0 (gmean =%.2f)" % stats.gmean(sub.fh0)) plt.ylabel("fd0 (gmean =%.2f)" % stats.gmean(sub.fd0)) plt.xscale("log") plt.yscale("log") plt.title(f"%s with avg SMAPE=%.3f" % (when, sub["smape"].mean())) fig.savefig( (TEST_OUTPUT_DIR / (f"test_historical_period_%s_state_calibrations.pdf" % when)), bbox_inches="tight", ) plt.close(fig) # Validate that accuracy of recent period state values is still beating the threshold if when == "recent": assert sub["smape"].mean() < TARGET_SMAPE
def test_using_outputs_of_case_forecast_to_extend_rt(): """ Demonstrates how to use the output of a new case forecast in the future, along with historical R(t) function to generate an extended R(t) function out into the future """ future = 250 t_list = np.linspace(120, future, future - 120 + 1) for state in ["AK", "AZ", "FL", "HI", "IL", "NY", "WI"]: # Using each state's data to use as forecast cases below and to roughly check extended R(t) # function looks reasonable (rt_f, nC_f, ignore2, ignore3, ignore4) = HistoricalData.get_state_data_for_dates( state, t_list, compartments_as_functions=True ) # As test take forecasted cases (past 140) from historical data for new cases in Illinois forecasted_cases = [] for day in [140, 160, 180, 200, 220]: forecasted_cases.append((day, nC_f(day))) start = forecasted_cases[0][0] end = forecasted_cases[-1][0] # Now generate an extended rt_f function from # - the forecasted new cases at various times in the future # - the rt_f from Bettencourt for the same source data (Illinois) serial_period = NowcastingSEIRModel().serial_period forecast_rt_f = extend_rt_function_with_new_cases_forecast( rt_f, serial_period, forecasted_cases ) # Check result for the final new cases at end of extended R(t) check_nC = [nC_f(start)] check_t = list(range(start, future)) for t in check_t[1:]: check_nC.append(check_nC[-1] * math.exp((forecast_rt_f(t) - 1) / serial_period)) if MAKE_PLOTS: # Plot resulting R(t), cases and compare with Bettencourt R(t), actual cases fig = plt.figure(facecolor="w", figsize=(8, 8)) fig.suptitle((f"Does R(t) extrapolation fit cases for %s?" % state)) plt.subplot(211) plt.ylabel("R(t)") plt.plot(t_list, [forecast_rt_f(t) for t in t_list], label="piecewise linear R(t)") plt.plot(t_list, [rt_f(t) for t in t_list], label="Bettencourt R(t)", linestyle="--") for (day, nc) in forecasted_cases: plt.plot([day, day], [0.5, 1.5], linestyle="--", color="black") plt.legend() plt.subplot(212) plt.ylabel("New cases") plt.plot(check_t, check_nC, label="from piecewise linear R(t)") plt.plot(t_list, [nC_f(i) for i in t_list], linestyle="--", label="actual cases") plt.yscale("log") plt.legend() fig.savefig( TEST_OUTPUT_DIR / (f"test_using_outputs_of_case_forecast_to_extend_rt_%s.pdf" % state) ) # Check that cases match nC_ratio = check_nC[-(future - end)] / nC_f(end) assert nC_ratio > 0.95 and nC_ratio < 1.05