Esempio n. 1
0
def test_reperform_chainladder_nowcasts():
    current_path = os.path.dirname(os.path.realpath(__file__))
    folder = os.path.join(current_path, r"../../../")

    start_date = datetime.date(2020, 8, 1)
    end_date = datetime.date(2021, 5, 30)
    beta = 0.0
    method = "L-BFGS-B"
    #method = "Powell"

    get_lagged_values_func = lambda x: get_lagged_values(folder, x)

    corrected_cases_per_day, probabilities = nowcast_cases_per_day(
        end_date,
        get_lagged_values_func,
        get_cases_per_day_from_data_frame,
        GitHubRepository(),
        beta=beta,
        method=method)
    avg = pd.Series(corrected_cases_per_day).rolling(window=7).mean().dropna()
    avg.to_csv(r"c:\temp\chainladder.csv")
    print(avg.iloc[-1])

    for i in range(0, (end_date - start_date).days + 1):
        dt = start_date + datetime.timedelta(days=i)
        corrected_cases_per_day, probabilities = nowcast_cases_per_day(
            dt,
            get_lagged_values_func,
            get_cases_per_day_from_data_frame,
            GitHubRepository(),
            beta=beta,
            method=method)
        avg = pd.Series(corrected_cases_per_day).rolling(
            window=7).mean().dropna()
        print(avg.iloc[-1])
Esempio n. 2
0
def generate_plot_national_cases_per_day_chainladder(repository, statistics_repository, show_only_last, reporting_lag=0):
    df_daily = statistics_repository.get_cases_per_day_from_file()
    dt = df_daily.index.unique().max() + datetime.timedelta(days=reporting_lag)

    corrected_cases_per_day = chainladder.nowcast_cases_per_day(dt, statistics_repository.get_lagged_values,
                                                                statistics_repository.get_cases_per_day_from_data_frame,
                                                                repository, beta=0.2, reporting_lag=reporting_lag)[0]
    df_updated = pd.Series(data=corrected_cases_per_day, index=df_daily.index[-len(corrected_cases_per_day):])

    data_actual = df_daily.dropna()[-show_only_last:]
    data_forecast = df_updated.dropna()[-show_only_last:]
    data_rolling = df_updated.rolling(window=7).mean().dropna()[-show_only_last:]

    data_dict = {"Date": data_actual.index, "Value": data_actual.array }
    data_forecast_dict = {"Date": data_forecast.index, "Value": data_forecast.array}
    data_rolling_dict = {"Date": data_rolling.index, "Value": data_rolling.array}

    source = ColumnDataSource(data_dict)
    source_forecast = ColumnDataSource(data_forecast_dict)
    source_rolling = ColumnDataSource(data_rolling_dict)

    p = figure(x_axis_type="datetime", plot_width=800, plot_height=350)
    p.title.text = "Positive tests - actual vs. nowcast"
    p.title.align = "center"
    p.title.text_font_size = "18px"
    p.line('Date', 'Value', source=source, line_dash="dashed", line_width=3, legend_label="Actual (not fully known)", line_color="black")
    p.line('Date', 'Value', source=source_forecast, line_dash="dashed", line_width=3, legend_label="Nowcast", line_color=Spectral10[2])
    p.line('Date', 'Value', source=source_rolling, line_width=4, legend_label="Nowcast (7-day rolling average)", line_color=Spectral10[0])
    p.xaxis.formatter = DatetimeTickFormatter(days="%d/%b", months="%d/%b", hours="%d/%b", minutes="%d/%b")
    p.yaxis.axis_label = "Positive tests"
    p.legend.location = "top_left"

    export_png(p, filename=statistics_repository.folder + r"plots\{country}\COVID-19_daily_cases_plot.png".format(country=statistics_repository.country_code))
Esempio n. 3
0
def update_measures(df_measures, folder, repository, date_to_run=None):
    if df_measures is None or len(df_measures.index) == 0:
        dt_last_measure_present = datetime.datetime.min
    else:
        dt_last_measure_present = df_measures.index[-1].date()

    if date_to_run is None:
        df_rki_latest = repository.get_dataset(datetime.datetime.today().date())
    else:
        df_rki_latest = repository.get_dataset(date_to_run + datetime.timedelta(days=REPORTING_LAG))

    dt_rki_file = max(df_rki_latest.index).date()

    if (dt_last_measure_present + datetime.timedelta(days=REPORTING_LAG)) == dt_rki_file:
        return df_measures

    df_rivm_previous_day = repository.get_dataset(dt_rki_file - datetime.timedelta(days=1))

    df_measures_updated = df_measures.copy()
    new_row = pd.Series(dtype="float64")
    new_row["net"] = __calculate_measure(df_rki_latest, df_rivm_previous_day, net_increases)
    new_row["gross"] = __calculate_measure(df_rki_latest, df_rivm_previous_day, gross_increases)

    get_lagged_values_func = lambda x: get_lagged_values(folder, x)
    method = "L-BFGS-B"

    corrected_cases_per_day, _ = chainladder.nowcast_cases_per_day(dt_rki_file,
                                                                   get_lagged_values_func,
                                                                   get_cases_per_day_from_data_frame,
                                                                   repository, beta=0.0, method=method,
                                                                   reporting_lag=REPORTING_LAG)
    nowcast_chainladder_value = pd.Series(corrected_cases_per_day).rolling(window=7).mean().dropna().iloc[-1]
    new_row["nowcast_chain"] = nowcast_chainladder_value

    corrected_cases_per_day, _ = chainladder.nowcast_cases_per_day(dt_rki_file,
                                                                   get_lagged_values_func,
                                                                   get_cases_per_day_from_data_frame,
                                                                   repository, beta=0.2, method=method,
                                                                   reporting_lag=REPORTING_LAG)
    nowcast_chainladder_value_beta_0_2 = pd.Series(corrected_cases_per_day).rolling(window=7).mean().dropna().iloc[-1]
    new_row["nowcast_chain_0_2"] = nowcast_chainladder_value_beta_0_2

    new_row.name = (dt_rki_file - datetime.timedelta(days=REPORTING_LAG)).strftime("%Y-%m-%d")
    df_measures_updated = df_measures_updated.append(new_row)
    df_measures_updated.index = pd.to_datetime(df_measures_updated.index, format="%Y-%m-%d")

    return df_measures_updated
Esempio n. 4
0
def test_nowcast_powell_vs_lbfgsb():
    dt = datetime.date(2020, 10, 1)
    dir_path = os.path.dirname(os.path.realpath(__file__))
    folder = os.path.join(dir_path, '../../')

    get_lagged_values_func = lambda x: get_lagged_values(folder, x)

    corrected_cases_per_day_powell, probabilities_powell =\
        nowcast_cases_per_day(dt, get_lagged_values_func, get_cases_per_day_from_data_frame,  FixtureRepository, 31, beta=0.0, method="Powell")
    corrected_cases_per_day_lbfgsb, probabilities_lbfgsb =\
        nowcast_cases_per_day(dt, get_lagged_values_func, get_cases_per_day_from_data_frame, FixtureRepository, 31, beta=0.0, method="L-BFGS-B")

    max_probability_difference = max(
        abs(np.array(probabilities_powell) - np.array(probabilities_lbfgsb)))
    assert max_probability_difference < 0.0003

    corrected_cases_differences = max(
        abs(
            np.array(corrected_cases_per_day_powell) -
            np.array(corrected_cases_per_day_lbfgsb)))
    assert corrected_cases_differences < 5.0
Esempio n. 5
0
def test_reperform_chainladder_nowcasts():
    current_path = os.path.dirname(os.path.realpath(__file__))
    folder = os.path.join(current_path, r"../../../")

    today_date = datetime.date(2020, 12, 25)
    start_date = datetime.date(2020, 8, 1)
    end_date = datetime.date(2020, 12, 24)
    method = "L-BFGS-B"

    repo = RkiAndGitHubRepositoryWithCaching(today_date)

    get_lagged_values_func = lambda x: get_lagged_values(folder, x)

    for i in range(0, (end_date - start_date).days + 1):
        dt = start_date + datetime.timedelta(days=i)
        corrected_cases_per_day, probabilities = nowcast_cases_per_day(
            dt,
            get_lagged_values_func,
            get_cases_per_day_from_data_frame,
            repo,
            beta=0.2,
            method=method)
        avg_0_2 = pd.Series(corrected_cases_per_day).rolling(
            window=7).mean().dropna()

        corrected_cases_per_day, probabilities = nowcast_cases_per_day(
            dt,
            get_lagged_values_func,
            get_cases_per_day_from_data_frame,
            repo,
            beta=0.0,
            method=method)
        avg_0_0 = pd.Series(corrected_cases_per_day).rolling(
            window=7).mean().dropna()

        print("{zero},{zeropointtwo}".format(zero=avg_0_0.iloc[-1],
                                             zeropointtwo=avg_0_2.iloc[-1]))
Esempio n. 6
0
def test_update_nowcasts():
    current_path = os.path.dirname(os.path.realpath(__file__))
    folder = os.path.join(current_path, r"../../../")

    dt_rki_file = datetime.date(2020, 12, 19)
    rki_repository = GitHubRepository()

    get_lagged_values_func = lambda x: get_lagged_values(folder, x)

    corrected_cases_per_day, probs = chainladder.nowcast_cases_per_day(dt_rki_file,
                                                                   get_lagged_values_func,
                                                                   get_cases_per_day_from_data_frame,
                                                                   rki_repository, beta=0.2)

    probs = probs
Esempio n. 7
0
def generate_plots_chainladder(repository, statistics_repository, start_date, skip_last, reporting_lag=0):
    df_daily = statistics_repository.get_cases_per_day_from_file()
    dt = df_daily.index.unique().max() + datetime.timedelta(days=reporting_lag)

    nowcast_cases_per_day = chainladder.nowcast_cases_per_day(dt, statistics_repository.get_lagged_values,
                                                              statistics_repository.get_cases_per_day_from_data_frame,
                                                              repository, beta=0.2, reporting_lag=reporting_lag)[0]
    df_updated = pd.Series(data=nowcast_cases_per_day, index=df_daily.index[-len(nowcast_cases_per_day):])
    df_measures = statistics_repository.get_measures()
    nowcast_same_day_chain_0_2 = df_measures["nowcast_chain_0_2"]
    nowcast_same_day_chain = df_measures["nowcast_chain"]
    gross = df_measures["gross"]

    data_actual = df_daily.dropna().rolling(window=7).mean().dropna()[start_date:]
    data_nowcast_chain_0_2 = df_updated.rolling(window=7).mean().dropna()[start_date:]
    data_nowcast_chain_0_2_same_day = nowcast_same_day_chain_0_2.dropna()[start_date:]
    data_nowcast_chain_same_day = nowcast_same_day_chain.dropna()[start_date:]
    data_gross = gross.rolling(window=7).mean().dropna()[start_date:]

    data_actual_dict = {"Date": data_actual.index, "Value": data_actual.array}
    data_nowcast_dict = {"Date": data_nowcast_chain_0_2.index, "Value": data_nowcast_chain_0_2.array}
    data_nowcast_same_day_dict = {"Date": data_nowcast_chain_0_2_same_day.index, "Value": data_nowcast_chain_0_2_same_day.array}
    data_gross_dict = {"Date": data_gross.index, "Value": data_gross.array}

    source_actual = ColumnDataSource(data_actual_dict)
    source_nowcast = ColumnDataSource(data_nowcast_dict)
    source_nowcast_same_day = ColumnDataSource(data_nowcast_same_day_dict)
    source_gross = ColumnDataSource(data_gross_dict)

    p = figure(x_axis_type="datetime", plot_width=800, plot_height=350)
    p.title.text = "7-day rolling average of positive tests"
    p.title.align = "center"
    p.title.text_font_size = "18px"
    p.line('Date', 'Value', source=source_actual, line_dash="dashed", line_width=4, legend_label="Actual (not fully known)", line_color='black')
    p.line('Date', 'Value', source=source_nowcast, line_width=4, legend_label="Nowcast", line_color=Spectral10[0])
    p.line('Date', 'Value', source=source_nowcast_same_day, line_dash="solid", line_dash_offset=2, line_width=4, legend_label="Nowcast (same-day)", line_color=Spectral10[2])
    p.line('Date', 'Value', source=source_gross, line_width=4, legend_label="Gross", line_color=Spectral10[1])
    p.xaxis.formatter = DatetimeTickFormatter(days="%d/%b", months="%d/%b", hours="%d/%b", minutes="%d/%b")
    p.yaxis.axis_label = "Positive tests"
    p.legend.location = "top_left"

    export_png(p, filename=statistics_repository.folder + r"plots\{country}\COVID-19_daily_cases_nowcast_performance.png".format(country=statistics_repository.country_code))

    # Calculate differences
    number_of_elements = len(data_actual) - skip_last
    data_gross_diff = (data_gross - data_actual)[:number_of_elements]
    data_nowcast_chain_0_2_diff = (data_nowcast_chain_0_2_same_day - data_actual)[:number_of_elements]
    data_nowcast_chain_diff = (data_nowcast_chain_same_day - data_actual)[:number_of_elements]

    source_gross_diff = ColumnDataSource({"Date": data_gross_diff.index, "Value": data_gross_diff.array})
    source_nowcast_chain_0_2_diff = ColumnDataSource({"Date": data_nowcast_chain_0_2_diff.index, "Value": data_nowcast_chain_0_2_diff.array})
    source_nowcast_chain_diff = ColumnDataSource({"Date": data_nowcast_chain_diff.index, "Value": data_nowcast_chain_diff.array})

    p = figure(x_axis_type="datetime", plot_width=800, plot_height=350)
    p.title.text = "Error in same-day nowcast of 7-day rolling average of positive tests"
    p.title.align = "center"
    p.title.text_font_size = "18px"
    p.line('Date', 'Value', source=source_gross_diff, line_width=3, legend_label="Gross", line_color=Spectral10[1])
    p.line('Date', 'Value', source=source_nowcast_chain_diff, line_width=3, line_dash="solid", legend_label="Nowcast (β=0.0)", line_color=Spectral10[9])
    p.line('Date', 'Value', source=source_nowcast_chain_0_2_diff, line_width=3, line_dash="solid", legend_label="Nowcast (β=0.2)", line_color=Spectral10[2])

    p.xaxis.formatter = DatetimeTickFormatter(days="%d/%b", months="%d/%b", hours="%d/%b", minutes="%d/%b")
    p.yaxis.axis_label = "Error in positive tests"
    p.legend.location = "top_left"

    export_png(p, filename=statistics_repository.folder + r"plots\{country}\COVID-19_daily_cases_nowcast_error.png".format(country=statistics_repository.country_code))
Esempio n. 8
0
def update_measures(df_measures, folder, repository, date_to_run=None):
    dt_last_measure_present = df_measures.index[-1].date()

    if date_to_run is None:
        df_rivm_latest = repository.get_dataset(
            datetime.datetime.today().date())
    else:
        df_rivm_latest = repository.get_dataset(date_to_run)

    dt_rivm_file = max(df_rivm_latest.index).date()

    if dt_last_measure_present == dt_rivm_file:
        return df_measures

    df_rivm_previous_day = get_rivm_file_historical(dt_rivm_file -
                                                    datetime.timedelta(days=1))
    ggd_regions = get_ggd_regions()

    df_measures_updated = df_measures.copy()
    new_row = pd.Series(dtype="float64")
    for ggd_region in ggd_regions["Municipal_health_service"]:
        new_row["net_" + ggd_region] = __calculate_measure(
            df_rivm_latest, df_rivm_previous_day, net_increases, ggd_region)
        new_row["gross_" + ggd_region] = __calculate_measure(
            df_rivm_latest, df_rivm_previous_day, gross_increases, ggd_region)

    new_row["net"] = __calculate_measure(df_rivm_latest, df_rivm_previous_day,
                                         net_increases)
    new_row["gross"] = __calculate_measure(df_rivm_latest,
                                           df_rivm_previous_day,
                                           gross_increases)

    nowcast_value = forecast_daily_cases(
        folder, maximum_lag=14).rolling(window=7).mean().dropna().iloc[-1]
    new_row["nowcast"] = nowcast_value

    nowcast_value_beta_0_2 = forecast_daily_cases(
        folder, beta=0.2,
        maximum_lag=14).rolling(window=7).mean().dropna().iloc[-1]
    new_row["nowcast_0_2"] = nowcast_value_beta_0_2

    get_lagged_values_func = lambda x: get_lagged_values(folder, x)
    method = "L-BFGS-B"

    corrected_cases_per_day, _ = chainladder.nowcast_cases_per_day(
        dt_rivm_file,
        get_lagged_values_func,
        get_cases_per_day_from_data_frame,
        repository,
        beta=0.0,
        method=method)
    nowcast_chainladder_value = pd.Series(corrected_cases_per_day).rolling(
        window=7).mean().dropna().iloc[-1]
    new_row["nowcast_chain"] = nowcast_chainladder_value

    corrected_cases_per_day, _ = chainladder.nowcast_cases_per_day(
        dt_rivm_file,
        get_lagged_values_func,
        get_cases_per_day_from_data_frame,
        repository,
        beta=0.2,
        method=method)
    nowcast_chainladder_value_beta_0_2 = pd.Series(
        corrected_cases_per_day).rolling(window=7).mean().dropna().iloc[-1]
    new_row["nowcast_chain_0_2"] = nowcast_chainladder_value_beta_0_2

    new_row.name = dt_rivm_file.strftime("%Y-%m-%d")
    df_measures_updated = df_measures_updated.append(new_row)
    df_measures_updated.index = pd.to_datetime(df_measures_updated.index,
                                               format="%Y-%m-%d")

    return df_measures_updated