def test_reperform_chainladder_nowcasts(): current_path = os.path.dirname(os.path.realpath(__file__)) folder = os.path.join(current_path, r"../../../") start_date = datetime.date(2020, 8, 1) end_date = datetime.date(2021, 5, 30) beta = 0.0 method = "L-BFGS-B" #method = "Powell" get_lagged_values_func = lambda x: get_lagged_values(folder, x) corrected_cases_per_day, probabilities = nowcast_cases_per_day( end_date, get_lagged_values_func, get_cases_per_day_from_data_frame, GitHubRepository(), beta=beta, method=method) avg = pd.Series(corrected_cases_per_day).rolling(window=7).mean().dropna() avg.to_csv(r"c:\temp\chainladder.csv") print(avg.iloc[-1]) for i in range(0, (end_date - start_date).days + 1): dt = start_date + datetime.timedelta(days=i) corrected_cases_per_day, probabilities = nowcast_cases_per_day( dt, get_lagged_values_func, get_cases_per_day_from_data_frame, GitHubRepository(), beta=beta, method=method) avg = pd.Series(corrected_cases_per_day).rolling( window=7).mean().dropna() print(avg.iloc[-1])
def generate_plot_national_cases_per_day_chainladder(repository, statistics_repository, show_only_last, reporting_lag=0): df_daily = statistics_repository.get_cases_per_day_from_file() dt = df_daily.index.unique().max() + datetime.timedelta(days=reporting_lag) corrected_cases_per_day = chainladder.nowcast_cases_per_day(dt, statistics_repository.get_lagged_values, statistics_repository.get_cases_per_day_from_data_frame, repository, beta=0.2, reporting_lag=reporting_lag)[0] df_updated = pd.Series(data=corrected_cases_per_day, index=df_daily.index[-len(corrected_cases_per_day):]) data_actual = df_daily.dropna()[-show_only_last:] data_forecast = df_updated.dropna()[-show_only_last:] data_rolling = df_updated.rolling(window=7).mean().dropna()[-show_only_last:] data_dict = {"Date": data_actual.index, "Value": data_actual.array } data_forecast_dict = {"Date": data_forecast.index, "Value": data_forecast.array} data_rolling_dict = {"Date": data_rolling.index, "Value": data_rolling.array} source = ColumnDataSource(data_dict) source_forecast = ColumnDataSource(data_forecast_dict) source_rolling = ColumnDataSource(data_rolling_dict) p = figure(x_axis_type="datetime", plot_width=800, plot_height=350) p.title.text = "Positive tests - actual vs. nowcast" p.title.align = "center" p.title.text_font_size = "18px" p.line('Date', 'Value', source=source, line_dash="dashed", line_width=3, legend_label="Actual (not fully known)", line_color="black") p.line('Date', 'Value', source=source_forecast, line_dash="dashed", line_width=3, legend_label="Nowcast", line_color=Spectral10[2]) p.line('Date', 'Value', source=source_rolling, line_width=4, legend_label="Nowcast (7-day rolling average)", line_color=Spectral10[0]) p.xaxis.formatter = DatetimeTickFormatter(days="%d/%b", months="%d/%b", hours="%d/%b", minutes="%d/%b") p.yaxis.axis_label = "Positive tests" p.legend.location = "top_left" export_png(p, filename=statistics_repository.folder + r"plots\{country}\COVID-19_daily_cases_plot.png".format(country=statistics_repository.country_code))
def update_measures(df_measures, folder, repository, date_to_run=None): if df_measures is None or len(df_measures.index) == 0: dt_last_measure_present = datetime.datetime.min else: dt_last_measure_present = df_measures.index[-1].date() if date_to_run is None: df_rki_latest = repository.get_dataset(datetime.datetime.today().date()) else: df_rki_latest = repository.get_dataset(date_to_run + datetime.timedelta(days=REPORTING_LAG)) dt_rki_file = max(df_rki_latest.index).date() if (dt_last_measure_present + datetime.timedelta(days=REPORTING_LAG)) == dt_rki_file: return df_measures df_rivm_previous_day = repository.get_dataset(dt_rki_file - datetime.timedelta(days=1)) df_measures_updated = df_measures.copy() new_row = pd.Series(dtype="float64") new_row["net"] = __calculate_measure(df_rki_latest, df_rivm_previous_day, net_increases) new_row["gross"] = __calculate_measure(df_rki_latest, df_rivm_previous_day, gross_increases) get_lagged_values_func = lambda x: get_lagged_values(folder, x) method = "L-BFGS-B" corrected_cases_per_day, _ = chainladder.nowcast_cases_per_day(dt_rki_file, get_lagged_values_func, get_cases_per_day_from_data_frame, repository, beta=0.0, method=method, reporting_lag=REPORTING_LAG) nowcast_chainladder_value = pd.Series(corrected_cases_per_day).rolling(window=7).mean().dropna().iloc[-1] new_row["nowcast_chain"] = nowcast_chainladder_value corrected_cases_per_day, _ = chainladder.nowcast_cases_per_day(dt_rki_file, get_lagged_values_func, get_cases_per_day_from_data_frame, repository, beta=0.2, method=method, reporting_lag=REPORTING_LAG) nowcast_chainladder_value_beta_0_2 = pd.Series(corrected_cases_per_day).rolling(window=7).mean().dropna().iloc[-1] new_row["nowcast_chain_0_2"] = nowcast_chainladder_value_beta_0_2 new_row.name = (dt_rki_file - datetime.timedelta(days=REPORTING_LAG)).strftime("%Y-%m-%d") df_measures_updated = df_measures_updated.append(new_row) df_measures_updated.index = pd.to_datetime(df_measures_updated.index, format="%Y-%m-%d") return df_measures_updated
def test_nowcast_powell_vs_lbfgsb(): dt = datetime.date(2020, 10, 1) dir_path = os.path.dirname(os.path.realpath(__file__)) folder = os.path.join(dir_path, '../../') get_lagged_values_func = lambda x: get_lagged_values(folder, x) corrected_cases_per_day_powell, probabilities_powell =\ nowcast_cases_per_day(dt, get_lagged_values_func, get_cases_per_day_from_data_frame, FixtureRepository, 31, beta=0.0, method="Powell") corrected_cases_per_day_lbfgsb, probabilities_lbfgsb =\ nowcast_cases_per_day(dt, get_lagged_values_func, get_cases_per_day_from_data_frame, FixtureRepository, 31, beta=0.0, method="L-BFGS-B") max_probability_difference = max( abs(np.array(probabilities_powell) - np.array(probabilities_lbfgsb))) assert max_probability_difference < 0.0003 corrected_cases_differences = max( abs( np.array(corrected_cases_per_day_powell) - np.array(corrected_cases_per_day_lbfgsb))) assert corrected_cases_differences < 5.0
def test_reperform_chainladder_nowcasts(): current_path = os.path.dirname(os.path.realpath(__file__)) folder = os.path.join(current_path, r"../../../") today_date = datetime.date(2020, 12, 25) start_date = datetime.date(2020, 8, 1) end_date = datetime.date(2020, 12, 24) method = "L-BFGS-B" repo = RkiAndGitHubRepositoryWithCaching(today_date) get_lagged_values_func = lambda x: get_lagged_values(folder, x) for i in range(0, (end_date - start_date).days + 1): dt = start_date + datetime.timedelta(days=i) corrected_cases_per_day, probabilities = nowcast_cases_per_day( dt, get_lagged_values_func, get_cases_per_day_from_data_frame, repo, beta=0.2, method=method) avg_0_2 = pd.Series(corrected_cases_per_day).rolling( window=7).mean().dropna() corrected_cases_per_day, probabilities = nowcast_cases_per_day( dt, get_lagged_values_func, get_cases_per_day_from_data_frame, repo, beta=0.0, method=method) avg_0_0 = pd.Series(corrected_cases_per_day).rolling( window=7).mean().dropna() print("{zero},{zeropointtwo}".format(zero=avg_0_0.iloc[-1], zeropointtwo=avg_0_2.iloc[-1]))
def test_update_nowcasts(): current_path = os.path.dirname(os.path.realpath(__file__)) folder = os.path.join(current_path, r"../../../") dt_rki_file = datetime.date(2020, 12, 19) rki_repository = GitHubRepository() get_lagged_values_func = lambda x: get_lagged_values(folder, x) corrected_cases_per_day, probs = chainladder.nowcast_cases_per_day(dt_rki_file, get_lagged_values_func, get_cases_per_day_from_data_frame, rki_repository, beta=0.2) probs = probs
def generate_plots_chainladder(repository, statistics_repository, start_date, skip_last, reporting_lag=0): df_daily = statistics_repository.get_cases_per_day_from_file() dt = df_daily.index.unique().max() + datetime.timedelta(days=reporting_lag) nowcast_cases_per_day = chainladder.nowcast_cases_per_day(dt, statistics_repository.get_lagged_values, statistics_repository.get_cases_per_day_from_data_frame, repository, beta=0.2, reporting_lag=reporting_lag)[0] df_updated = pd.Series(data=nowcast_cases_per_day, index=df_daily.index[-len(nowcast_cases_per_day):]) df_measures = statistics_repository.get_measures() nowcast_same_day_chain_0_2 = df_measures["nowcast_chain_0_2"] nowcast_same_day_chain = df_measures["nowcast_chain"] gross = df_measures["gross"] data_actual = df_daily.dropna().rolling(window=7).mean().dropna()[start_date:] data_nowcast_chain_0_2 = df_updated.rolling(window=7).mean().dropna()[start_date:] data_nowcast_chain_0_2_same_day = nowcast_same_day_chain_0_2.dropna()[start_date:] data_nowcast_chain_same_day = nowcast_same_day_chain.dropna()[start_date:] data_gross = gross.rolling(window=7).mean().dropna()[start_date:] data_actual_dict = {"Date": data_actual.index, "Value": data_actual.array} data_nowcast_dict = {"Date": data_nowcast_chain_0_2.index, "Value": data_nowcast_chain_0_2.array} data_nowcast_same_day_dict = {"Date": data_nowcast_chain_0_2_same_day.index, "Value": data_nowcast_chain_0_2_same_day.array} data_gross_dict = {"Date": data_gross.index, "Value": data_gross.array} source_actual = ColumnDataSource(data_actual_dict) source_nowcast = ColumnDataSource(data_nowcast_dict) source_nowcast_same_day = ColumnDataSource(data_nowcast_same_day_dict) source_gross = ColumnDataSource(data_gross_dict) p = figure(x_axis_type="datetime", plot_width=800, plot_height=350) p.title.text = "7-day rolling average of positive tests" p.title.align = "center" p.title.text_font_size = "18px" p.line('Date', 'Value', source=source_actual, line_dash="dashed", line_width=4, legend_label="Actual (not fully known)", line_color='black') p.line('Date', 'Value', source=source_nowcast, line_width=4, legend_label="Nowcast", line_color=Spectral10[0]) p.line('Date', 'Value', source=source_nowcast_same_day, line_dash="solid", line_dash_offset=2, line_width=4, legend_label="Nowcast (same-day)", line_color=Spectral10[2]) p.line('Date', 'Value', source=source_gross, line_width=4, legend_label="Gross", line_color=Spectral10[1]) p.xaxis.formatter = DatetimeTickFormatter(days="%d/%b", months="%d/%b", hours="%d/%b", minutes="%d/%b") p.yaxis.axis_label = "Positive tests" p.legend.location = "top_left" export_png(p, filename=statistics_repository.folder + r"plots\{country}\COVID-19_daily_cases_nowcast_performance.png".format(country=statistics_repository.country_code)) # Calculate differences number_of_elements = len(data_actual) - skip_last data_gross_diff = (data_gross - data_actual)[:number_of_elements] data_nowcast_chain_0_2_diff = (data_nowcast_chain_0_2_same_day - data_actual)[:number_of_elements] data_nowcast_chain_diff = (data_nowcast_chain_same_day - data_actual)[:number_of_elements] source_gross_diff = ColumnDataSource({"Date": data_gross_diff.index, "Value": data_gross_diff.array}) source_nowcast_chain_0_2_diff = ColumnDataSource({"Date": data_nowcast_chain_0_2_diff.index, "Value": data_nowcast_chain_0_2_diff.array}) source_nowcast_chain_diff = ColumnDataSource({"Date": data_nowcast_chain_diff.index, "Value": data_nowcast_chain_diff.array}) p = figure(x_axis_type="datetime", plot_width=800, plot_height=350) p.title.text = "Error in same-day nowcast of 7-day rolling average of positive tests" p.title.align = "center" p.title.text_font_size = "18px" p.line('Date', 'Value', source=source_gross_diff, line_width=3, legend_label="Gross", line_color=Spectral10[1]) p.line('Date', 'Value', source=source_nowcast_chain_diff, line_width=3, line_dash="solid", legend_label="Nowcast (β=0.0)", line_color=Spectral10[9]) p.line('Date', 'Value', source=source_nowcast_chain_0_2_diff, line_width=3, line_dash="solid", legend_label="Nowcast (β=0.2)", line_color=Spectral10[2]) p.xaxis.formatter = DatetimeTickFormatter(days="%d/%b", months="%d/%b", hours="%d/%b", minutes="%d/%b") p.yaxis.axis_label = "Error in positive tests" p.legend.location = "top_left" export_png(p, filename=statistics_repository.folder + r"plots\{country}\COVID-19_daily_cases_nowcast_error.png".format(country=statistics_repository.country_code))
def update_measures(df_measures, folder, repository, date_to_run=None): dt_last_measure_present = df_measures.index[-1].date() if date_to_run is None: df_rivm_latest = repository.get_dataset( datetime.datetime.today().date()) else: df_rivm_latest = repository.get_dataset(date_to_run) dt_rivm_file = max(df_rivm_latest.index).date() if dt_last_measure_present == dt_rivm_file: return df_measures df_rivm_previous_day = get_rivm_file_historical(dt_rivm_file - datetime.timedelta(days=1)) ggd_regions = get_ggd_regions() df_measures_updated = df_measures.copy() new_row = pd.Series(dtype="float64") for ggd_region in ggd_regions["Municipal_health_service"]: new_row["net_" + ggd_region] = __calculate_measure( df_rivm_latest, df_rivm_previous_day, net_increases, ggd_region) new_row["gross_" + ggd_region] = __calculate_measure( df_rivm_latest, df_rivm_previous_day, gross_increases, ggd_region) new_row["net"] = __calculate_measure(df_rivm_latest, df_rivm_previous_day, net_increases) new_row["gross"] = __calculate_measure(df_rivm_latest, df_rivm_previous_day, gross_increases) nowcast_value = forecast_daily_cases( folder, maximum_lag=14).rolling(window=7).mean().dropna().iloc[-1] new_row["nowcast"] = nowcast_value nowcast_value_beta_0_2 = forecast_daily_cases( folder, beta=0.2, maximum_lag=14).rolling(window=7).mean().dropna().iloc[-1] new_row["nowcast_0_2"] = nowcast_value_beta_0_2 get_lagged_values_func = lambda x: get_lagged_values(folder, x) method = "L-BFGS-B" corrected_cases_per_day, _ = chainladder.nowcast_cases_per_day( dt_rivm_file, get_lagged_values_func, get_cases_per_day_from_data_frame, repository, beta=0.0, method=method) nowcast_chainladder_value = pd.Series(corrected_cases_per_day).rolling( window=7).mean().dropna().iloc[-1] new_row["nowcast_chain"] = nowcast_chainladder_value corrected_cases_per_day, _ = chainladder.nowcast_cases_per_day( dt_rivm_file, get_lagged_values_func, get_cases_per_day_from_data_frame, repository, beta=0.2, method=method) nowcast_chainladder_value_beta_0_2 = pd.Series( corrected_cases_per_day).rolling(window=7).mean().dropna().iloc[-1] new_row["nowcast_chain_0_2"] = nowcast_chainladder_value_beta_0_2 new_row.name = dt_rivm_file.strftime("%Y-%m-%d") df_measures_updated = df_measures_updated.append(new_row) df_measures_updated.index = pd.to_datetime(df_measures_updated.index, format="%Y-%m-%d") return df_measures_updated