def test_reperform_chainladder_nowcasts(): current_path = os.path.dirname(os.path.realpath(__file__)) folder = os.path.join(current_path, r"../../../") start_date = datetime.date(2020, 8, 1) end_date = datetime.date(2021, 5, 30) beta = 0.0 method = "L-BFGS-B" #method = "Powell" get_lagged_values_func = lambda x: get_lagged_values(folder, x) corrected_cases_per_day, probabilities = nowcast_cases_per_day( end_date, get_lagged_values_func, get_cases_per_day_from_data_frame, GitHubRepository(), beta=beta, method=method) avg = pd.Series(corrected_cases_per_day).rolling(window=7).mean().dropna() avg.to_csv(r"c:\temp\chainladder.csv") print(avg.iloc[-1]) for i in range(0, (end_date - start_date).days + 1): dt = start_date + datetime.timedelta(days=i) corrected_cases_per_day, probabilities = nowcast_cases_per_day( dt, get_lagged_values_func, get_cases_per_day_from_data_frame, GitHubRepository(), beta=beta, method=method) avg = pd.Series(corrected_cases_per_day).rolling( window=7).mean().dropna() print(avg.iloc[-1])
def test_recreate_lagged_values(covid_19_lagged_values): current_path = os.path.dirname(os.path.realpath(__file__)) df_lagged_most_recent = get_lagged_values(os.path.join( current_path, r"../../../"), maximum_lag=14) df_lagged_recreated = recreate_lagged_values(df_lagged_most_recent, datetime.date(2020, 8, 8)) assert_frame_equal(df_lagged_recreated, covid_19_lagged_values)
def test_nowcast_powell_vs_lbfgsb(): dt = datetime.date(2020, 10, 1) dir_path = os.path.dirname(os.path.realpath(__file__)) folder = os.path.join(dir_path, '../../') get_lagged_values_func = lambda x: get_lagged_values(folder, x) corrected_cases_per_day_powell, probabilities_powell =\ nowcast_cases_per_day(dt, get_lagged_values_func, get_cases_per_day_from_data_frame, FixtureRepository, 31, beta=0.0, method="Powell") corrected_cases_per_day_lbfgsb, probabilities_lbfgsb =\ nowcast_cases_per_day(dt, get_lagged_values_func, get_cases_per_day_from_data_frame, FixtureRepository, 31, beta=0.0, method="L-BFGS-B") max_probability_difference = max( abs(np.array(probabilities_powell) - np.array(probabilities_lbfgsb))) assert max_probability_difference < 0.0003 corrected_cases_differences = max( abs( np.array(corrected_cases_per_day_powell) - np.array(corrected_cases_per_day_lbfgsb))) assert corrected_cases_differences < 5.0
def test_reperform_forecasting(): first_date = datetime.date(2020, 7, 1) most_recent_date = datetime.date(2020, 10, 10) beta = 0.2 current_path = os.path.dirname(os.path.realpath(__file__)) df_lagged_most_recent = get_lagged_values( os.path.join(current_path, r"../../../"), 14) cases_per_day_list = get_cases_per_day_historical(first_date, most_recent_date) start_date = datetime.date(2020, 8, 1) end_date = most_recent_date offset = (start_date - first_date).days for i in range(0, (end_date - start_date).days + 1): dt = start_date + datetime.timedelta(days=i) df_daily_cases = cases_per_day_list[offset + i][1] df_lagged = recreate_lagged_values(df_lagged_most_recent, dt) df_lagged = df_lagged[first_date:dt] df_forecast = forecast_daily_cases_from_data_frames( df_daily_cases, df_lagged, beta) avg = df_forecast.rolling(window=7).mean().dropna() print(avg.iloc[-1])
def update_files(folder, repository, date_to_run=None): ds_daily_cases = get_cases_per_day_from_file(folder) if date_to_run is None: date_to_run = datetime.datetime.today().date() df_rivm = repository.get_dataset(date_to_run) last_available_date = max(ds_daily_cases.index).date() last_available_date_rivm = max(df_rivm.index).date() if not last_available_date_rivm > last_available_date: return ds_daily_cases_updated = get_cases_per_day_from_data_frame( df_rivm, last_available_date_rivm) ds_daily_cases_updated.sort_index(inplace=True) ds_daily_cases_updated.to_csv(folder + r"data\nl\COVID-19_daily_cases.csv", header=False) df_lagged = get_lagged_values(folder) df_lagged = update_lagged_values(df_lagged, ds_daily_cases_updated, last_available_date_rivm) df_lagged.to_csv(folder + r"data\nl\COVID-19_lagged.csv", header=True)
def forecast_daily_cases(folder, beta=0.0, maximum_lag=np.inf): df_daily_cases = get_cases_per_day_from_file(folder).sort_index() df_lagged_values = get_lagged_values(folder, maximum_lag).copy().sort_index() return forecast_daily_cases_from_data_frames(df_daily_cases, df_lagged_values, beta)
def update_measures(df_measures, folder, repository, date_to_run=None): dt_last_measure_present = df_measures.index[-1].date() if date_to_run is None: df_rivm_latest = repository.get_dataset( datetime.datetime.today().date()) else: df_rivm_latest = repository.get_dataset(date_to_run) dt_rivm_file = max(df_rivm_latest.index).date() if dt_last_measure_present == dt_rivm_file: return df_measures df_rivm_previous_day = get_rivm_file_historical(dt_rivm_file - datetime.timedelta(days=1)) ggd_regions = get_ggd_regions() df_measures_updated = df_measures.copy() new_row = pd.Series(dtype="float64") for ggd_region in ggd_regions["Municipal_health_service"]: new_row["net_" + ggd_region] = __calculate_measure( df_rivm_latest, df_rivm_previous_day, net_increases, ggd_region) new_row["gross_" + ggd_region] = __calculate_measure( df_rivm_latest, df_rivm_previous_day, gross_increases, ggd_region) new_row["net"] = __calculate_measure(df_rivm_latest, df_rivm_previous_day, net_increases) new_row["gross"] = __calculate_measure(df_rivm_latest, df_rivm_previous_day, gross_increases) nowcast_value = forecast_daily_cases( folder, maximum_lag=14).rolling(window=7).mean().dropna().iloc[-1] new_row["nowcast"] = nowcast_value nowcast_value_beta_0_2 = forecast_daily_cases( folder, beta=0.2, maximum_lag=14).rolling(window=7).mean().dropna().iloc[-1] new_row["nowcast_0_2"] = nowcast_value_beta_0_2 get_lagged_values_func = lambda x: get_lagged_values(folder, x) method = "L-BFGS-B" corrected_cases_per_day, _ = chainladder.nowcast_cases_per_day( dt_rivm_file, get_lagged_values_func, get_cases_per_day_from_data_frame, repository, beta=0.0, method=method) nowcast_chainladder_value = pd.Series(corrected_cases_per_day).rolling( window=7).mean().dropna().iloc[-1] new_row["nowcast_chain"] = nowcast_chainladder_value corrected_cases_per_day, _ = chainladder.nowcast_cases_per_day( dt_rivm_file, get_lagged_values_func, get_cases_per_day_from_data_frame, repository, beta=0.2, method=method) nowcast_chainladder_value_beta_0_2 = pd.Series( corrected_cases_per_day).rolling(window=7).mean().dropna().iloc[-1] new_row["nowcast_chain_0_2"] = nowcast_chainladder_value_beta_0_2 new_row.name = dt_rivm_file.strftime("%Y-%m-%d") df_measures_updated = df_measures_updated.append(new_row) df_measures_updated.index = pd.to_datetime(df_measures_updated.index, format="%Y-%m-%d") return df_measures_updated