Ejemplo n.º 1
0
def test_reperform_chainladder_nowcasts():
    current_path = os.path.dirname(os.path.realpath(__file__))
    folder = os.path.join(current_path, r"../../../")

    start_date = datetime.date(2020, 8, 1)
    end_date = datetime.date(2021, 5, 30)
    beta = 0.0
    method = "L-BFGS-B"
    #method = "Powell"

    get_lagged_values_func = lambda x: get_lagged_values(folder, x)

    corrected_cases_per_day, probabilities = nowcast_cases_per_day(
        end_date,
        get_lagged_values_func,
        get_cases_per_day_from_data_frame,
        GitHubRepository(),
        beta=beta,
        method=method)
    avg = pd.Series(corrected_cases_per_day).rolling(window=7).mean().dropna()
    avg.to_csv(r"c:\temp\chainladder.csv")
    print(avg.iloc[-1])

    for i in range(0, (end_date - start_date).days + 1):
        dt = start_date + datetime.timedelta(days=i)
        corrected_cases_per_day, probabilities = nowcast_cases_per_day(
            dt,
            get_lagged_values_func,
            get_cases_per_day_from_data_frame,
            GitHubRepository(),
            beta=beta,
            method=method)
        avg = pd.Series(corrected_cases_per_day).rolling(
            window=7).mean().dropna()
        print(avg.iloc[-1])
Ejemplo n.º 2
0
def test_recreate_lagged_values(covid_19_lagged_values):
    current_path = os.path.dirname(os.path.realpath(__file__))
    df_lagged_most_recent = get_lagged_values(os.path.join(
        current_path, r"../../../"),
                                              maximum_lag=14)
    df_lagged_recreated = recreate_lagged_values(df_lagged_most_recent,
                                                 datetime.date(2020, 8, 8))
    assert_frame_equal(df_lagged_recreated, covid_19_lagged_values)
Ejemplo n.º 3
0
def test_nowcast_powell_vs_lbfgsb():
    dt = datetime.date(2020, 10, 1)
    dir_path = os.path.dirname(os.path.realpath(__file__))
    folder = os.path.join(dir_path, '../../')

    get_lagged_values_func = lambda x: get_lagged_values(folder, x)

    corrected_cases_per_day_powell, probabilities_powell =\
        nowcast_cases_per_day(dt, get_lagged_values_func, get_cases_per_day_from_data_frame,  FixtureRepository, 31, beta=0.0, method="Powell")
    corrected_cases_per_day_lbfgsb, probabilities_lbfgsb =\
        nowcast_cases_per_day(dt, get_lagged_values_func, get_cases_per_day_from_data_frame, FixtureRepository, 31, beta=0.0, method="L-BFGS-B")

    max_probability_difference = max(
        abs(np.array(probabilities_powell) - np.array(probabilities_lbfgsb)))
    assert max_probability_difference < 0.0003

    corrected_cases_differences = max(
        abs(
            np.array(corrected_cases_per_day_powell) -
            np.array(corrected_cases_per_day_lbfgsb)))
    assert corrected_cases_differences < 5.0
Ejemplo n.º 4
0
def test_reperform_forecasting():
    first_date = datetime.date(2020, 7, 1)
    most_recent_date = datetime.date(2020, 10, 10)
    beta = 0.2
    current_path = os.path.dirname(os.path.realpath(__file__))
    df_lagged_most_recent = get_lagged_values(
        os.path.join(current_path, r"../../../"), 14)
    cases_per_day_list = get_cases_per_day_historical(first_date,
                                                      most_recent_date)

    start_date = datetime.date(2020, 8, 1)
    end_date = most_recent_date
    offset = (start_date - first_date).days
    for i in range(0, (end_date - start_date).days + 1):
        dt = start_date + datetime.timedelta(days=i)
        df_daily_cases = cases_per_day_list[offset + i][1]
        df_lagged = recreate_lagged_values(df_lagged_most_recent, dt)
        df_lagged = df_lagged[first_date:dt]
        df_forecast = forecast_daily_cases_from_data_frames(
            df_daily_cases, df_lagged, beta)
        avg = df_forecast.rolling(window=7).mean().dropna()
        print(avg.iloc[-1])
Ejemplo n.º 5
0
def update_files(folder, repository, date_to_run=None):
    ds_daily_cases = get_cases_per_day_from_file(folder)

    if date_to_run is None:
        date_to_run = datetime.datetime.today().date()
    df_rivm = repository.get_dataset(date_to_run)

    last_available_date = max(ds_daily_cases.index).date()
    last_available_date_rivm = max(df_rivm.index).date()
    if not last_available_date_rivm > last_available_date:
        return

    ds_daily_cases_updated = get_cases_per_day_from_data_frame(
        df_rivm, last_available_date_rivm)
    ds_daily_cases_updated.sort_index(inplace=True)
    ds_daily_cases_updated.to_csv(folder + r"data\nl\COVID-19_daily_cases.csv",
                                  header=False)

    df_lagged = get_lagged_values(folder)
    df_lagged = update_lagged_values(df_lagged, ds_daily_cases_updated,
                                     last_available_date_rivm)

    df_lagged.to_csv(folder + r"data\nl\COVID-19_lagged.csv", header=True)
Ejemplo n.º 6
0
def forecast_daily_cases(folder, beta=0.0, maximum_lag=np.inf):
    df_daily_cases = get_cases_per_day_from_file(folder).sort_index()
    df_lagged_values = get_lagged_values(folder,
                                         maximum_lag).copy().sort_index()
    return forecast_daily_cases_from_data_frames(df_daily_cases,
                                                 df_lagged_values, beta)
Ejemplo n.º 7
0
def update_measures(df_measures, folder, repository, date_to_run=None):
    dt_last_measure_present = df_measures.index[-1].date()

    if date_to_run is None:
        df_rivm_latest = repository.get_dataset(
            datetime.datetime.today().date())
    else:
        df_rivm_latest = repository.get_dataset(date_to_run)

    dt_rivm_file = max(df_rivm_latest.index).date()

    if dt_last_measure_present == dt_rivm_file:
        return df_measures

    df_rivm_previous_day = get_rivm_file_historical(dt_rivm_file -
                                                    datetime.timedelta(days=1))
    ggd_regions = get_ggd_regions()

    df_measures_updated = df_measures.copy()
    new_row = pd.Series(dtype="float64")
    for ggd_region in ggd_regions["Municipal_health_service"]:
        new_row["net_" + ggd_region] = __calculate_measure(
            df_rivm_latest, df_rivm_previous_day, net_increases, ggd_region)
        new_row["gross_" + ggd_region] = __calculate_measure(
            df_rivm_latest, df_rivm_previous_day, gross_increases, ggd_region)

    new_row["net"] = __calculate_measure(df_rivm_latest, df_rivm_previous_day,
                                         net_increases)
    new_row["gross"] = __calculate_measure(df_rivm_latest,
                                           df_rivm_previous_day,
                                           gross_increases)

    nowcast_value = forecast_daily_cases(
        folder, maximum_lag=14).rolling(window=7).mean().dropna().iloc[-1]
    new_row["nowcast"] = nowcast_value

    nowcast_value_beta_0_2 = forecast_daily_cases(
        folder, beta=0.2,
        maximum_lag=14).rolling(window=7).mean().dropna().iloc[-1]
    new_row["nowcast_0_2"] = nowcast_value_beta_0_2

    get_lagged_values_func = lambda x: get_lagged_values(folder, x)
    method = "L-BFGS-B"

    corrected_cases_per_day, _ = chainladder.nowcast_cases_per_day(
        dt_rivm_file,
        get_lagged_values_func,
        get_cases_per_day_from_data_frame,
        repository,
        beta=0.0,
        method=method)
    nowcast_chainladder_value = pd.Series(corrected_cases_per_day).rolling(
        window=7).mean().dropna().iloc[-1]
    new_row["nowcast_chain"] = nowcast_chainladder_value

    corrected_cases_per_day, _ = chainladder.nowcast_cases_per_day(
        dt_rivm_file,
        get_lagged_values_func,
        get_cases_per_day_from_data_frame,
        repository,
        beta=0.2,
        method=method)
    nowcast_chainladder_value_beta_0_2 = pd.Series(
        corrected_cases_per_day).rolling(window=7).mean().dropna().iloc[-1]
    new_row["nowcast_chain_0_2"] = nowcast_chainladder_value_beta_0_2

    new_row.name = dt_rivm_file.strftime("%Y-%m-%d")
    df_measures_updated = df_measures_updated.append(new_row)
    df_measures_updated.index = pd.to_datetime(df_measures_updated.index,
                                               format="%Y-%m-%d")

    return df_measures_updated