Beispiel #1
0
def save_MCMC_sampling(df, column, trace, pastdays, interval=0.95, start=0):
    interval_frac = int(interval * 100)
    sampling_mean = np.mean(trace['r_t'], axis=0)

    df[f'{column}_Rt_MCMC_pastdays_{pastdays:03d}'] = padnan(
        sampling_mean, (start, pastdays))

    #credible interval
    sampling_hdi = pm.stats.hpd(trace['r_t'], hdi_prob=interval)
    df[f'{column}_Rt_MCMC_HDI_{interval_frac}_min_pastdays_{pastdays:03d}'] = padnan(
        sampling_hdi[:, 0], (start, pastdays))
    df[f'{column}_Rt_MCMC_HDI_{interval_frac}_max_pastdays_{pastdays:03d}'] = padnan(
        sampling_hdi[:, 1], (start, pastdays))
Beispiel #2
0
def predict_next_value(X, use_last_values=None, search_steps=100):
    if not use_last_values:
        use_last_values = X.shape[0]

    search_alpha = np.linspace(0, 10., search_steps)
    pad = 3

    smapes = []
    x = X[-use_last_values:]
    for alpha in search_alpha:
        tik = TikhonovRegularization(timesteps=len(x), alpha=alpha)
        x_tik = tik.stat_smooth_data(x, verbose=False)
        x_pred = padnan(next_from_taylor(x_tik), (1, 0))
        pred_smape = smape(x[pad:], x_pred[pad:-1]) * 100
        smapes.append(pred_smape)

    alpha = search_alpha[np.argmin(smapes)]
    tik = TikhonovRegularization(timesteps=len(x), alpha=alpha)
    x_tik = tik.stat_smooth_data(x, verbose=False)
    x_pred = padnan(next_from_taylor(x_tik),
                    (X.shape[0] - use_last_values + 1, 0))
    return x_pred
Beispiel #3
0
def RSVD_smooth_data(df,
                     alpha,
                     beta,
                     season_period=7,
                     trend_alpha=100.,
                     difference_degree=2):

    initial_cols = df.columns

    filter_columns = [
        'newCasesByPublishDate',
    ]

    prettyprint.pprint(filter_columns)

    for col in filter_columns:
        smoothcol = col + '_deseason'
        print(smoothcol)

        lrsvd = LogSeasonalRegularizer(df[col],
                                       season_period=season_period,
                                       max_r=season_period,
                                       trend_alpha=trend_alpha,
                                       difference_degree=difference_degree,
                                       verbose=True)

        m = lrsvd.fit()
        print(f'patterns: {m.final_r}')

        df[f'{smoothcol}'] = m.deseasoned
        df[f'{smoothcol}_seasonality'] = m.season_svd
        df[f'{smoothcol}_smoothed'] = m.trend
        df[f'{smoothcol}_residuals'] = m.residuals
        df[f'{smoothcol}_relative_residuals'] = m.relative_residuals

        df[f'{smoothcol}_smoothed_Rt'] = padnan(
            naive.compute_Rt(df[f'{smoothcol}_smoothed'].dropna(),
                             alpha=alpha,
                             beta=beta), (m.padding_left, 0))

        prettyprint.pprint(lrsvd.adfuller())

        print('new columns generated:')
        prettyprint.pprint([c for c in df.columns if c not in initial_cols])
Beispiel #4
0
def draw_expanded_series(X,
                         draws,
                         season_period,
                         trend_alpha,
                         difference_degree,
                         truncate,
                         alpha,
                         beta,
                         method='future_range',
                         lower_ratio=0.2,
                         upper_ratio=1.2,
                         res_window=None,
                         verbose=False):

    assert (method in ['future_range', 'residuals'])

    if type(X) == pd.Series:
        X = X.to_numpy()

    # res_window
    if not res_window:
        res_window = season_period

    # deseason:
    lrsvd = LogSeasonalRegularizer(X,
                                   season_period=season_period,
                                   max_r=season_period,
                                   trend_alpha=trend_alpha,
                                   difference_degree=difference_degree,
                                   verbose=verbose)
    m = lrsvd.fit()

    # truncate means that, AFTER deseasoning, we drop the last element:
    # in this way, deseasoning is affected by the additional element in
    # the original series, while we drop the last result as it is in the future
    if truncate:
        sl = np.s_[:-1]
    else:
        sl = np.s_[:]
    T, S, eps_rel = m.trend[sl], m.season_svd[sl], m.relative_residuals[sl]

    _, _, S_hat = LogSeasonalRegularizer.periods_to_matrix(S, season_period)
    #print(S_hat[-2:,:])

    # compute Rt on T
    rt = padnan(Rt.naive.compute_Rt(T[m.padding_left:], alpha, beta),
                (m.padding_left, 0))

    # predict next T value
    T_next = predict_next_case(T, rt, alpha, beta)[-1]

    # predict next S value
    # we need the season of tomorrow
    # the season of today is the last column in S_hat
    # hence -> the season of tomorrow is the first column, as seasons are periodic
    S_tomorrow = S_hat[:, 0]
    # predict the next value of S_tomorrow
    S_tomorrow_next = predict_next_value(S_tomorrow, use_last_values=15)[-1]

    # compute the next X value
    if method == 'future_range':
        # draws multiple X_next based on range applied to last T_next
        lower, upper = T_next * lower_ratio, T_next * upper_ratio
        mu, sigma = T_next, T_next
        possible_T_next = stats.truncnorm((lower - mu) / sigma,
                                          (upper - mu) / sigma,
                                          loc=mu,
                                          scale=sigma)
        X_next = S_tomorrow_next + possible_T_next.rvs((draws, 1))
    elif method == 'residuals':
        # draw multiple eps based on eps_rel
        if res_window == 1:
            eps_mean = 0.
            eps_sigma = np.abs(eps_rel[-1])
        else:
            eps_mean = eps_rel[-res_window:].mean()
            eps_sigma = eps_rel[-res_window:].std()
        eps_rel_draw = np.random.normal(loc=eps_mean,
                                        scale=eps_sigma,
                                        size=(draws, 1))
        eps_draw = eps_rel_draw * T_next
        X_next = T_next + S_tomorrow_next + eps_draw

    # expand the original X series and return it
    X_expanded = np.repeat(X[sl].reshape((1, -1)), draws, axis=0)
    X_expanded = np.append(X_expanded, X_next, axis=1)

    return X_expanded
Beispiel #5
0
def predict_next_case(cases, rt, alpha, beta):
    return (Rt.naive.infectious_charge(
        np.nan_to_num(padnan(cases,
                             (0, 1)), nan=0.), alpha=alpha, beta=beta)[1:] *
            next_from_taylor(rt))
Beispiel #6
0
def next_from_taylor(x):
    return 2.5 * x - 2. * padnan(x[:-1], (1, 0)) + 0.5 * padnan(x[:-2], (2, 0))