Beispiel #1
0
def plot_anomalies_deltas(ndays=120):
    """Show effect of anomaly correction."""

    df, _npop = get_region_data('Nederland', correct_anomalies=True)
    fig, ax = plt.subplots(tight_layout=True, figsize=(8, 5))

    col_labs = [('Delta_orig', 'Raw'), ('Delta', 'Anomalies corrected')]
    for col, lab in col_labs:
        ax.semilogy(df.iloc[-ndays:][col], label=lab)
    ax.legend()
    tools.set_xaxis_dateformat(ax, maxticks=7)
    title = 'Anomaly correction'
    ax.set_title(title)
    fig.canvas.set_window_title(title)
    fig.show()
Beispiel #2
0
def plot_cumulative_trends(ndays=100, regions=None, source='r7'):
    """Plot cumulative trends per capita (pull data from global DFS dict).

    - lastday: up to this day.
    - source: 'r7' (7-day rolling average), 'raw' (no smoothing), 'sg'
      (Savitsky-Golay smoothed).
    - region_list: list of municipalities (including e.g. 'HR:Zuid',
      'POP:100-200').
    """

    fig, ax = plt.subplots(figsize=(12, 6))
    # fig.subplots_adjust(top=0.945, bottom=0.085, left=0.09, right=0.83)

    for region in regions:
        df, npop = nl_regions.select_cases_region(DFS['cases'], region)
        df = df.iloc[-ndays:]
        ax.semilogy(df['Total_reported'] * (1e5 / npop), label=region)

    ax.set_ylabel('Cumulatieve Covid-19 gevallen per 100k')
    tools.set_xaxis_dateformat(ax)
    ax.legend()
    fig.show()
Beispiel #3
0
def plot_doo_dpl_don(ndays=100):
    """Plot distribution DOO DPL DON per file date."""

    df = get_summary_df()

    fdates = df.index.get_level_values(0).unique()[-ndays:]

    # Rows will be tuples (fdate, dDOO, dDPL, dDON, DON_yesterday, DON_today).
    rows = []
    dcolumns = ['DOO', 'DPL', 'DON']
    for i in range(1, len(fdates)):
        row = [fdates[i]]
        for col in dcolumns:
            delta = df.loc[fdates[i], col].sum() - df.loc[fdates[i - 1],
                                                          col].sum()
            row.append(delta)

        row.append(df.loc[(fdates[i], fdates[i - 1]), 'DON'])
        row.append(df.loc[(fdates[i], fdates[i]), 'DON'])
        rows.append(row)

    dcolumns += ['DON_yesterday', 'DON_today']
    dfd = pd.DataFrame(rows, columns=['Date_file'] + dcolumns)
    dfd = dfd.set_index('Date_file')
    dfd['Total'] = 0
    for col in dcolumns:
        dfd['Total'] += dfd[col]

    fig, ax = plt.subplots(tight_layout=True, figsize=(10, 5))
    lstyles = ['-', '--', '-.', ':'] * 3
    for col, lsty in zip(dcolumns + ['Total'], lstyles):
        ax.plot(dfd[col], label=col, linestyle=lsty)

    ax.legend()
    ax.set_ylabel('Aantal per dag')
    tools.set_xaxis_dateformat(ax)
    fig.show()

    print(dfd)
Beispiel #4
0
def plot_Rt_oscillation():
    """Uses global DFS['Rt_rivm'] variable."""

    fig, axs = plt.subplots(2, 1, tight_layout=True)

    df_Rt_rivm = DFS['Rt_rivm']

    series_Rr = df_Rt_rivm['R'][~df_Rt_rivm['R'].isna()].iloc[-120:]
    Rr = series_Rr.to_numpy()
    Rr_smooth = scipy.signal.savgol_filter(Rr, 15, 2)

    dates = series_Rr.index
    ax = axs[0]
    n = len(Rr)
    ax.plot(dates, Rr, label='Rt (RIVM)')
    ax.plot(dates, Rr_smooth, label='Rt smooth', zorder=-1)
    ax.plot(dates, Rr - Rr_smooth, label='Difference')
    ax.set_ylabel('R')
    ax.set_xlim(dates[0], dates[-1])
    tools.set_xaxis_dateformat(ax)
    plt.xticks(rotation=0)  # undo; doesn't work with subplots
    ax.legend()
    ax = axs[1]
    # window = 1 - np.linspace(-1, 1, len(Rr))**2

    window = scipy.signal.windows.tukey(n, alpha=(n - 14) / n)
    n_padded = n * 3 // 7 * 7  # make sure it's a multiple of 1 week
    spectrum = np.fft.rfft((Rr - Rr_smooth) * window, n=n_padded)
    freqs = 7 / n_padded * np.arange(len(spectrum))
    mask = (freqs < 2.4)
    ax.plot(freqs[mask], np.abs(spectrum[mask])**2)
    ax.set_xlabel('Frequency (1/wk)')
    ax.set_ylabel('Power')
    ax.grid()

    fig.canvas.set_window_title('Rt oscillation')

    fig.show()
Beispiel #5
0
def plot_daily_trends(ndays=100,
                      lastday=-1,
                      mun_regexp=None,
                      region_list=None,
                      source='r7',
                      subtitle=None):
    """Plot daily-case trends (pull data from global DFS dict).

    - lastday: up to this day.
    - source: 'r7' (7-day rolling average), 'raw' (no smoothing), 'sg'
      (Savitsky-Golay smoothed).
    - mun_regexp: regular expression matching municipalities.
    - region_list: list of municipalities (including e.g. 'HR:Zuid',
      'POP:100-200', 'JSON:{...}'.
      if mun_regexp and mun_list are both specified, then concatenate.
      If neither are specified, assume 'Nederland'.

      JSON is a json-encoded dict with:

      - 'label': short label string
      - 'color': for plotting, optional.
      - 'fmt': format for plotting, e.g. 'o--', optional.
      - 'muns': list of municipality names

    - subtitle: second title line (optional)
    """

    df_restrictions = DFS['restrictions']
    df_mun = DFS['mun']

    fig, ax = plt.subplots(figsize=(12, 6))
    fig.subplots_adjust(top=0.945 - 0.03 * (subtitle is not None),
                        bottom=0.1,
                        left=0.09,
                        right=0.83)

    if region_list is None:
        region_list = []

    if mun_regexp:
        region_list = [m for m in df_mun.index if re.match(mun_regexp, m)
                       ] + region_list

    if region_list == []:
        region_list = ['Nederland']

    labels = []  # tuples (y, txt)f
    citystats = []  # tuples (Rt, T2, cp100k, cwk, popk, city_name)
    for region in region_list:
        df1, n_inw = get_region_data(region, lastday=lastday)
        df1 = df1.iloc[-ndays:]
        fmt = 'o-' if ndays < 70 else '-'
        psize = 5 if ndays < 30 else 3

        dnc_column = dict(r7='Delta7r', raw='Delta', sg='DeltaSG')[source]

        if region.startswith('JSON:'):
            reg_dict = json.loads(region[5:])
            reg_label = reg_dict['label']
            if 'fmt' in reg_dict:
                fmt = reg_dict['fmt']
            color = reg_dict['color'] if 'color' in reg_dict else None
        else:
            reg_label = re.sub(r'POP:(.*)-(.*)', r'\1k-\2k inw.', region)
            reg_label = re.sub(r'^[A-Z]+:', '', reg_label)
            color = None

        ax.semilogy(df1[dnc_column] * 1e5,
                    fmt,
                    color=color,
                    label=reg_label,
                    markersize=psize)
        delta_t = 7
        i0 = dict(raw=-1, r7=-3, sg=-3)[source]
        t_double, Rt = get_t2_Rt(df1[dnc_column], delta_t, i0=i0)
        citystats.append(
            (np.around(Rt, 2), np.around(t_double, 2),
             np.around(df1['Delta'][-1] * 1e5,
                       2), int(df1['Delta7r'][-4] * n_inw * 7 + 0.5),
             int(n_inw / 1e3 + .5), reg_label))

        if abs(t_double) > 60:
            texp = f'Stabiel'
        elif t_double > 0:
            texp = f'×2: {t_double:.3g} d'
        elif t_double < 0:
            texp = f'×½: {-t_double:.2g} d'

        ax.semilogy(df1.index[[i0 - delta_t, i0]],
                    df1[dnc_column].iloc[[i0 - delta_t, i0]] * 1e5,
                    'k--',
                    zorder=-10)

        labels.append((df1[dnc_column][-1] * 1e5, f' {reg_label} ({texp})'))

    _add_restriction_labels(ax,
                            df1.index[0],
                            df1.index[-1],
                            with_ribbons=False,
                            flagmatch='CaseGraph')

    dfc = pd.DataFrame.from_records(
        sorted(citystats),
        columns=['Rt', 'T2', 'C/100k', 'C/wk', 'Pop/k', 'Region'])
    dfc.set_index('Region', inplace=True)
    print(dfc)

    lab_x = df1.index[-1] + pd.Timedelta('1.2 d')
    add_labels(ax, labels, lab_x)

    if source == 'r7':
        ax.axvline(df1.index[-4], color='gray')
        # ax.text(df1.index[-4], 0.3, '3 dagen geleden - extrapolatie', rotation=90)
        title = '7-daags voortschrijdend gemiddelde; laatste 3 dagen zijn een schatting'
    elif source == 'sg':
        ax.axvline(df1.index[-8], color='gray')
        # ax.text(df1.index[-4], 0.3, '3 dagen geleden - extrapolatie', rotation=90)
        title = 'Gefilterde data; laatste 7 dagen zijn minder nauwkeurig'
    else:
        title = 'Dagcijfers'

    ax.set_ylabel('Nieuwe gevallen per 100k per dag')

    #ax.set_ylim(0.05, None)
    ax.set_xlim(None, df1.index[-1] + pd.Timedelta('1 d'))
    from matplotlib.ticker import LogFormatter, FormatStrFormatter
    ax.yaxis.set_major_formatter(FormatStrFormatter('%g'))
    # Monkey-patch to prevent '%e' formatting.
    LogFormatter._num_to_string = lambda _0, x, _1, _2: ('%g' % x)
    ax.yaxis.set_minor_formatter(LogFormatter(minor_thresholds=(3, 1)))
    #plt.xticks(pd.to_dateTime(['2020-0{i}-01' for i in range(1, 9)]))
    ax.legend()  # loc='lower left')

    tools.set_xaxis_dateformat(ax, yminor=True)

    if subtitle:
        title += f'\n{subtitle}'
        win_xtitle = f', {subtitle}'
    else:
        win_xtitle = ''

    ax.set_title(title)
    fig.canvas.set_window_title(f'Case trends (ndays={ndays}){win_xtitle}')
    fig.show()
Beispiel #6
0
def construct_Dfunc(delays, plot=False):
    """Return interpolation functions fD(t) and fdD(t).

    fD(t) is the delay between infection and reporting at reporting time t.
    fdD(t) is its derivative.

    Parameter:

    - delays: tuples (time_report, delay_days)
    - plot: whether to generate a plot.

    Return:

    - fD: interpolation function for D(t) with t in nanoseconds.
    - fdD: interpolation function for dD/dt.
      (taking time in ns but returning dD per day.)
    - delay_str: delay string e.g. '7' or '7-9'
    """

    ts0 = [float(pd.to_datetime(x[0]).to_datetime64()) for x in delays]
    Ds0 = [float(x[1]) for x in delays]
    if len(delays) == 1:
        # prevent interp1d complaining.
        ts0 = [ts0[0], ts0[0] + 1e9]
        Ds0 = np.concatenate([Ds0, Ds0])

    # delay function as linear interpolation;
    # nanosecond timestamps as t value.
    fD0 = scipy.interpolate.interp1d(ts0,
                                     Ds0,
                                     kind='linear',
                                     bounds_error=False,
                                     fill_value=(Ds0[0], Ds0[-1]))

    # construct derivative dD/dt, smoothen out
    day = 1e9 * 86400  # one day in nanoseconds
    ts = np.arange(ts0[0] - 3 * day, ts0[-1] + 3.01 * day, day)
    dDs = (fD0(ts + 3 * day) - fD0(ts - 3 * day)) / 6
    fdD = scipy.interpolate.interp1d(ts,
                                     dDs,
                                     'linear',
                                     bounds_error=False,
                                     fill_value=(dDs[0], dDs[-1]))

    # reconstruct D(t) to be consistent with the smoothened derivative.
    Ds = scipy.integrate.cumtrapz(dDs, ts / day, initial=0) + Ds0[0]
    fD = scipy.interpolate.interp1d(ts,
                                    Ds,
                                    'linear',
                                    bounds_error=False,
                                    fill_value=(Ds[0], Ds[-1]))

    Dmin, Dmax = np.min(Ds0), np.max(Ds0)
    if Dmin == Dmax:
        delay_str = f'{Dmin:.0f}'
    else:
        delay_str = f'{Dmin:.0f}-{Dmax:.0f}'

    if plot:
        fig, ax = plt.subplots(1, 1, figsize=(7, 3), tight_layout=True)
        tsx = np.linspace(ts[0], int(pd.to_datetime('now').to_datetime64()))
        ax.plot(pd.to_datetime(tsx.astype(int)), fD(tsx))
        ax.set_ylabel('Vertraging (dagen)')
        tools.set_xaxis_dateformat(ax, 'Rapportagedatum')
        fig.canvas.set_window_title('Vertraging infectiedatum - rapportage')
        fig.show()

    return fD, fdD, delay_str
Beispiel #7
0
def get_dow_correction(dayrange=(-50, -1), verbose=False):
    """Return array with day-of-week correction factors.

    - dayrange: days to consider for DoW correction.
    - verbose: whether to show plots and print diagnostics.

    Return:

    - dow_corr_factor: array (7,) with DoW correction (0=Monday).
    """

    dayrange = tuple(dayrange)
    if dayrange in _DOW_CORR_CACHE and not verbose:
        return _DOW_CORR_CACHE[dayrange].copy()

    # timestamp index, columns Delta, Delta7r, and others.
    df, _ = get_region_data('Nederland',
                            lastday=dayrange[-1],
                            correct_dow=None)
    df = df.iloc[:
                 -4]  # Discard the last rows that have no correct rolling average.
    df = df.iloc[dayrange[0] - dayrange[1]:]

    # Correction factor - 1
    df['Delta_factor'] = df['Delta'] / df['Delta7r']

    # Collect by day of week (0=Monday)
    factor_by_dow = np.zeros(7)
    for i in range(7):
        factor_by_dow[i] = 1 / df.loc[df.index.dayofweek == i,
                                      'Delta_factor'].mean()
    factor_by_dow /= factor_by_dow.mean()

    df['Delta_est_factor'] = factor_by_dow[df.index.dayofweek]
    df['Delta_corrected'] = df['Delta'] * df['Delta_est_factor']

    rms_dc = (df['Delta_corrected'] / df['Delta7r']).std()
    rms_d = df['Delta_factor'].std()

    if verbose:
        print(
            'DoW effect: deviations from 7-day rolling average.\n'
            f'  Original: RMS={rms_d:.3g}; after correction: RMS={rms_dc:.3g}')

        fig, ax = plt.subplots(tight_layout=True)

        ax.plot(df['Delta_factor'], label='Delta')
        ax.plot(df['Delta_corrected'] / df['Delta7r'], label='Delta_corrected')
        ax.plot(df['Delta_est_factor'], label='Correction factor')

        tools.set_xaxis_dateformat(ax, 'Date')
        ax.legend()
        ax.set_ylabel('Daily cases deviation')

        title = 'Day-of-week correction on daily cases'
        ax.set_title(title)
        fig.canvas.set_window_title(title)
        fig.show()

    if rms_dc > 0.8 * rms_d:
        print(
            f'WARNING: DoW correction for dayrange={dayrange} does not seem to work.\n'
            '  Abandoning this correction.')

        factor_by_dow = np.ones(7)

    _DOW_CORR_CACHE[dayrange] = factor_by_dow.copy()
    return factor_by_dow
Beispiel #8
0
def plot_Rt(ndays=100,
            lastday=-1,
            delay=9,
            regions='Nederland',
            source='r7',
            Tc=4.0,
            correct_anomalies=True,
            g_mobility=False,
            mode='show',
            ylim=None):
    """Plot R number based on growth/shrink in daily cases.

    - lastday: use case data up to this day.
    - delay: assume delay days from infection to positive report.
      alternatively: list of (timestamp, delay) tuples if the delay varies over time.
      The timestamps refer to the date of report. See doc of estimeate_Rt_series.
    - source: 'r7' or 'sg' for rolling 7-day average or Savitsky-Golay-
      filtered data.
    - Tc: generation interval timepd.to_datetime(matplotlib.dates.num2date(ax.get_xlim()))
    - regions: comma-separated string (or list of str);
      'Nederland', 'V:xx' (holiday region), 'P:xx' (province), 'M:xx'
      (municipality).
    - correct_anomalies: whether to correct for known reporting anomalies.
    - g_mobility: include Google mobility data (experimental, not very usable yet).
    - mode: 'show' or 'return_fig'
    - ylim: optional y axis range (ymin, ymax)
    """

    Rt_rivm = DFS['Rt_rivm']

    fig, ax = plt.subplots(figsize=(10, 5))
    fig.subplots_adjust(top=0.90, bottom=0.11, left=0.09, right=0.92)
    plt.xticks(rotation=-20)

    if ylim:
        ax.set_ylim(*ylim)

    # dict: municitpality -> population

    # from rcParams['axes.prop_cycle']
    colors = [
        '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b',
        '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'
    ] * 5

    labels = []  # tuples (y, txt)
    if isinstance(regions, str):
        regions = regions.split(',')

    for region, color in zip(regions, colors):

        df1, _npop = get_region_data(region,
                                     lastday=lastday,
                                     correct_anomalies=correct_anomalies)
        source_col = dict(r7='Delta7r', sg='DeltaSG')[source]

        # skip the first 10 days because of zeros
        Rdf = estimate_Rt_df(df1[source_col].iloc[10:], delay=delay, Tc=Tc)
        Rt = Rdf['Rt'].iloc[-ndays:]
        delays = Rdf['delay'].iloc[-ndays:]
        delay_min, delay_max = delays.min(), delays.max()
        if delay_min == delay_max:
            delay_str = f'{delay_min:.2g}'
        else:
            delay_str = f'{delay_min:.2g}-{delay_max:.2g}'

        fmt = 'o'
        psize = 5 if ndays < 30 else 3

        if region.startswith('POP:'):
            label = region[4:] + ' k inw.'
        elif region == 'Nederland':
            label = 'R schatting Nederland'
        else:
            label = re.sub('^[A-Z]+:', '', region)

        ax.plot(Rt[:-3], fmt, label=label, markersize=psize, color=color)
        ax.plot(Rt[-3:], fmt, markersize=psize, color=color, alpha=0.35)

        # add confidence range (ballpark estimate)
        print(region)

        # Last 3 days are extrapolation, but peek at one extra day for the
        # smooth curve generation.
        # SG filter (13, 2): n=13 (2 weeks) will iron out all weekday effects
        # remaining despite starting from a 7-day average.
        Rt_smooth = scipy.signal.savgol_filter(Rt.iloc[:-2], 13, 2)[:-1]
        Rt_smooth = pd.Series(Rt_smooth, index=Rt.index[:-3])
        print(f'Smooth R: {Rt_smooth.iloc[-1]:.3g} @ {Rt_smooth.index[-1]}')

        if region == 'Nederland':
            # Error: hardcoded estimate 0.05. Because of SG filter, last 6 days
            # are increasingly less accurate.
            Rt_err = np.full(len(Rt_smooth), 0.05)
            Rt_err[-6:] *= np.linspace(1, 1.4, 6)
            ax.fill_between(Rt_smooth.index,
                            Rt_smooth.values - Rt_err,
                            Rt_smooth.values + Rt_err,
                            color=color,
                            alpha=0.15,
                            zorder=-10)

            # This is for posting on Twitter
            Rt_smooth_latest = Rt_smooth.iloc[-1]
            Rt_point_latest = Rt.iloc[-4]
            date_latest = Rt.index[-4].strftime('%d %b')
            slope = (Rt_smooth.iloc[-1] - Rt_smooth.iloc[-4]) / 3
            if abs(Rt_smooth_latest - Rt_point_latest) < 0.015:
                txt = f'R={(Rt_smooth_latest+Rt_point_latest)/2:.2f}'
            else:
                txt = (f'R={Rt_point_latest:.2f} (datapunt), '
                       f'R={Rt_smooth_latest:.2f} (voorlopige trendlijn)')
            print(
                f'Update reproductiegetal Nederland t/m {date_latest}: {txt}.'
                f' Trend: {"+" if slope>=0 else "−"}{abs(slope):.3f} per dag.')

        smooth_line = ax.plot(
            Rt_smooth[:-5],
            color=color,
            alpha=1,
            zorder=0,
            label=('R trend Nederland' if region == 'Nederland' else None))
        ax.plot(Rt_smooth[-6:],
                color=color,
                alpha=1,
                zorder=0,
                linestyle='--',
                dashes=(2, 2))
        mpl_cursor(smooth_line)

        labels.append((Rt[-1], f' {label}'))

    if len(labels) == 0:
        fig.close()
        raise ValueError(f'No data to plot.')

    if Rt_rivm is not None:
        tm_lo, tm_hi = Rt.index[[0, -1]]  # lowest timestamp
        tm_rivm_est = Rt_rivm[Rt_rivm['R'].isna()].index[
            0]  # 1st index with NaN
        # final values
        df_Rt_rivm_final = Rt_rivm.loc[tm_lo:tm_rivm_est, ['R', 'Rt_update']]
        ax.plot(df_Rt_rivm_final.iloc[:-1]['R'], 'k-', label='RIVM')
        ax.plot(df_Rt_rivm_final.iloc[:-1]['Rt_update'],
                'k^',
                markersize=4,
                label='RIVM updates')
        # estimates
        Rt_rivm_est = Rt_rivm.loc[tm_rivm_est -
                                  pd.Timedelta(1, 'd'):Rt.index[-1]]
        # print(Rt_rivm_est)
        ax.fill_between(Rt_rivm_est.index,
                        Rt_rivm_est['Rmin'],
                        Rt_rivm_est['Rmax'],
                        color='k',
                        alpha=0.15,
                        label='RIVM prognose')
        mpl_cursor(None)

    iex = dict(r7=3, sg=7)[source]  # days of extrapolation

    # add_labels(ax, labels, lab_x)
    # marker at 12:00 on final day (index may be a few hours off)
    t_mark = Rt.index[-iex - 1]
    t_mark += pd.Timedelta(12 - t_mark.hour, 'h')
    ax.axvline(t_mark, color='gray')
    ax.axhline(1, color='k', linestyle='--')
    ax.text(t_mark,
            ax.get_ylim()[1],
            Rt.index[-4].strftime("%d %b "),
            rotation=90,
            horizontalalignment='right',
            verticalalignment='top')

    xnotes = []
    if source != 'r7':
        xnotes.append(source)
    if correct_anomalies:
        anom_date = DFS['anomalies'].index[-1].strftime(
            '%d %b')  # most recent anomaly date
        xnotes.append(f'correctie pos. tests o.a. {anom_date}')
    if xnotes:
        xnotes = ", ".join([""] + xnotes)
    else:
        xnotes = ''

    ax.set_title(
        f'Reproductiegetal o.b.v. positieve tests; laatste {iex} dagen zijn een extrapolatie\n'
        f'(Generatie-interval: {Tc:.3g} dg, rapportagevertraging {delay_str} dg{xnotes})'
    )
    ax.set_ylabel('Reproductiegetal $R_t$')

    # setup the x axis before adding y2 axis.
    tools.set_xaxis_dateformat(ax, maxticks=10)

    # get second y axis
    ax2 = ax.twinx()
    T2s = np.array(
        [-2, -4, -7, -10, -14, -21, -60, 9999, 60, 21, 14, 10, 7, 4, 2])
    y2ticks = 2**(Tc / T2s)
    y2labels = [f'{t2 if t2 != 9999 else "∞"}' for t2 in T2s]
    ax2.set_yticks(y2ticks)
    ax2.set_yticklabels(y2labels)
    ax2.set_ylim(*ax.get_ylim())
    ax2.set_ylabel('Halverings-/verdubbelingstijd (dagen)')

    xlim = (Rt.index[0] - pd.Timedelta('12 h'),
            Rt.index[-1] + pd.Timedelta('3 d'))
    ax.set_xlim(*xlim)
    _add_restriction_labels(ax, Rt.index[0], Rt.index[-1], flagmatch='RGraph')
    if g_mobility:
        _add_mobility_data_to_R_plot(ax)

    ax.text(0.99,
            0.98,
            '@hk_nien',
            transform=ax.transAxes,
            verticalAlignment='top',
            horizontalAlignment='right',
            rotation=90)

    ax.legend(loc='upper left')

    if mode == 'show':
        fig.canvas.set_window_title(
            f'Rt ({", ".join(regions)[:30]}, ndays={ndays})')
        fig.show()

    elif mode == 'return_fig':
        return fig

    else:
        raise ValueError(f'mode={mode!r}')
Beispiel #9
0
        }, inplace=True)

    df.drop(columns=['country_region_code', 'country_region', 'sub_region_1', 'sub_region_2',
           'metro_area', 'iso_3166_2_code', 'census_fips_code'], inplace=True)

    for c in df.columns:
        smooth_data = scipy.signal.savgol_filter(df[c].values, 13, 2, mode='interp')
        df[c] = 1 + 0.01 * smooth_data

    # check whether it's up to date.
    # Mobility data released on 2nd or 3rd of the month?
    today = pd.to_datetime('now')
    if today.month != (df.index[-1].month % 12 + 1) and today.day >= 3:
        print('Google mobility report may be outdated. Call download_g_mobility_data().')

    return df


if __name__ == '__main__':

    df = get_g_mobility_data()
    plt.close('all')
    fig, ax = plt.subplots(tight_layout=True, figsize=(14, 6))
    for c in df.columns:
        ax.plot(df.index, df[c], label=c)
    ax.legend()
    ax.grid()
    tools.set_xaxis_dateformat(ax, maxticks=25)
    fig.show()

Beispiel #10
0
                 regions='HR:Noord,HR:Midden+Zuid')

    Rts = []  # Rt for North, Mid+South

    for region in ['HR:Noord', 'HR:Midden+Zuid']:
        df1, _npop = nlcs.get_region_data(region,
                                          lastday=lastday,
                                          correct_anomalies=True)
        source_col = 'Delta7r'

        # skip the first 10 days because of zeros
        Rt, delay_str = nlcs.estimate_Rt_series(df1[source_col].iloc[10:],
                                                delay=nlcs.DELAY_INF2REP)
        Rt = Rt.iloc[-ndays:]

        Rts.append(Rt)

    Rtdiff = Rts[0] - Rts[1]
    deltaR_smooth = scipy.signal.savgol_filter(Rtdiff.values, 13, 2)
    deltaR_smooth = pd.Series(deltaR_smooth, index=Rtdiff.index)

    fig, ax = plt.subplots(figsize=(10, 4), tight_layout=True)
    ax.plot(deltaR_smooth, label='Verschil Noord vs. Mid+Zuid', color='r')
    ax.set_ylabel(r'$\Delta R_t$')
    ax.axhline(0, color='k', linestyle='--')
    ax.legend()
    nlcs._add_restriction_labels(ax, Rt.index[0], Rt.index[-1])
    tools.set_xaxis_dateformat(ax)

    fig.show()
Beispiel #11
0
dft = dft_full.groupby('Date_of_statistics').sum()
dft['perc_positive'] = dft['Tested_positive'] / dft['Tested_with_result'] * 100

fig, axs = plt.subplots(2, 1, figsize=(10, 6), tight_layout=True, sharex=True)
ax = axs[0]

ax.plot(dft['Tested_with_result'].iloc[-50:])
title = 'Dagelijks afgenomen tests bij GGD'
ax.set_title(title)
fig.canvas.set_window_title(title)

ax = axs[1]
ax.set_title('Percentage positief')
ax.plot(dft['perc_positive'].iloc[-50:])

tools.set_xaxis_dateformat(axs[0])
tools.set_xaxis_dateformat(axs[1])
fig.show()

#%% oost-west

vrcodes = {
    'all':
    set(x for x in dft_full['Security_region_code'].unique()
        if isinstance(x, str))
}

vrcodes['low_pollen_20210226'] = {
    'VR01', 'VR02', 'VR03', 'VR25', 'VR10', 'VR11', 'VR12', 'VR13', 'VR14',
    'VR15', 'VR16', 'VR17', 'VR18', 'VR19', 'VR24'
}
Beispiel #12
0
def plot_countries_odds_ratios(country_select='all_recent',
                               subtract_eng_bg=True,
                               wiki=False):
    """Generate graph with odds ratios over time on semi log scale.

    UK data is based on population sampling, mostly SGTF (background subtracted).
    UK SGTF data shifted by 14 days to estimate symptom onset.
    Other data is from genomic sequencing ('seq').
    """

    cdict, meta_df = get_data_countries(country_select,
                                        subtract_eng_bg=subtract_eng_bg)

    fig, ax = _setup_country_fig_ax(cdict)

    markers = iter('o^vs*Do^vs*D' * 4)
    colors = plt.rcParams['axes.prop_cycle']()
    colors = iter([next(colors)['color'] for _ in range(40)])

    tm0 = pd.to_datetime('2020-12-01')
    one_day = pd.Timedelta(1, 'd')

    oddsfit_records = []

    tm_now = pd.to_datetime('now')
    tm_now += pd.Timedelta(12 - tm_now.hour, 'h')  # 12:00 noon

    for desc, df in cdict.items():
        meta = meta_df.loc[desc]  # metadata
        if meta['ccode'] and meta['is_seq'] or ('DK' not in meta_df['ccode']):
            # highlight countries with sequence data
            # (only if country data like DK is in the selection)
            plotargs = dict(zorder=0, alpha=0.9, linewidth=2, markersize=6)
        else:
            # SGTF data and sub-national regions
            plotargs = dict(zorder=-10, alpha=0.4, markersize=4)

        odds = f2odds(df['f_b117']).values
        tms = df.index
        xs = np.array((tms - tm0) / one_day)

        # Fitting on at most 6 weeks.
        ifirst = np.argmax(tms > tms[-1] - pd.Timedelta(42, 'd'))
        ifirst = max(1, ifirst)
        oslope, odds0 = fit_log_odds(xs[ifirst:],
                                     odds[ifirst:],
                                     last_weight=0.33)

        # show fit result
        odds_latest = np.exp(odds0 + oslope * xs[-1])
        tm_latest = tms[-1].strftime("%Y-%m-%d")
        oddsfit_records.append(
            dict(region=desc,
                 date=tm_latest,
                 odds=float('%.4g' % odds_latest),
                 log_slope=float('%.4g' % oslope)))

        xse = np.array([xs[ifirst], xs[-1]])  # expanded x range
        tms_fit = [tms[ifirst], tms[-1]]
        odds_fit = np.exp(oslope * xse + odds0)

        # extrapolate fit to present day. (Clip at odds=20, 21 days after most recent point)
        xs_ext = np.arange((tms[-1] - tm0) / one_day, (tm_now - tm0) / one_day)
        odds_ext = np.exp(oslope * xs_ext + odds0)
        tms_ext = np.array([tm0 + dt for dt in xs_ext * one_day])
        mask_ext = (odds_ext < 15) & (tms_ext <
                                      tms[-1] + pd.Timedelta(21, 'd'))

        # draw the data
        p = next(markers)
        col = next(colors)
        label = f'{desc} [{oslope:.3f}]'
        ax.semilogy(tms, odds, f'{p}', color=col, label=label, **plotargs)
        ax.semilogy(tms_fit, odds_fit, '-', color=col, **plotargs)
        ax.semilogy(tms_ext[mask_ext],
                    odds_ext[mask_ext],
                    '--',
                    color=col,
                    **plotargs)

        if ifirst > 0:
            ax.semilogy(tms[:ifirst + 1],
                        odds[:ifirst + 1],
                        ':',
                        color=col,
                        **plotargs)

    odds_fit_df = pd.DataFrame.from_records(oddsfit_records).set_index(
        'region')
    print(f'Slope fit results:\n{odds_fit_df}')

    if not wiki:
        ymin = ax.get_ylim()[0]
        ax.axvline(tm_now, color='#888888')
        ax.text(tm_now,
                ymin,
                tm_now.strftime('  %d %b'),
                horizontalalignment='right',
                verticalalignment='bottom',
                rotation=90)

    ax.set_ylabel('Odds ratio B.1.1.7/other variants')

    ax.yaxis.set_major_formatter(FormatStrFormatter('%g'))
    # Monkey-patch to prevent '%e' formatting.
    LogFormatter._num_to_string = lambda _0, x, _1, _2: ('%g' % x)
    ax.yaxis.set_minor_formatter(LogFormatter(minor_thresholds=(2, 1)))

    tools.set_xaxis_dateformat(ax)  # must be before adding a second y axis.

    ax.legend(loc='upper left', bbox_to_anchor=(1.15, 1), fontsize=9)
    ax.set_title('B.1.1.7 presence in positive cases, with $\\log_e$ slopes')
    fig.canvas.set_window_title('B117 in countries/regions')

    if not wiki:
        #     if subtract_eng_bg:
        #         sgtf_subtracted = ' (backgroud positive rate subtracted for England regions)'
        #     else:
        #         sgtf_subtracted = ''

        #     ax.text(1.10, -0.05 + 0.1 * (len(cdict) < 16),
        fig.text(0.99,
                 0.01,
                 '@hk_nien',
                 fontsize=8,
                 horizontalalignment='right',
                 verticalalignment='bottom')

    add_percentage_y2_axis(ax)
    tools.set_xaxis_dateformat(
        ax)  # repeat, otherwise the y2 axis will delete the minor ticks.
    fig.show()
    plt.pause(0.5)

    if wiki:
        fname = f'output/uk_strain_status-{country_select}.png'
        fig.savefig(fname)
        print(f'Wrote {fname}.')
Beispiel #13
0
def simulate_and_plot(Rs,
                      f0,
                      title_prefix='',
                      date_ra=('2020-12-18', '2021-02-15'),
                      n0=1e4,
                      clip_nRo=('2099', '2099', '2099'),
                      R_changes=None,
                      use_r7=True,
                      df_lohi=None,
                      country_select=None):
    """Simulate and plot, given R and initial prevelance.

    - clip_nRo: optional max dates for (n_NL, R_nL, f_other)
    - R_changes: list of R-changes as tuples (date, R_scaling, label).
      For example: R_changes=[('2021-01-23', 0.8, 'Avondklok')]
    - df_lohi: optional DataFrame with columns nlo, nhi, Rlo, Rhi to use
      as confidence intervals.
    - country_select: selection preset (str) for which countries to show.
      See get_data_countries() for details.
    """

    df = simulate_cases(f0=f0,
                        Rs=Rs,
                        date_ra=date_ra,
                        n0=n0,
                        use_r7=use_r7,
                        R_changes=R_changes)

    # simulation for 'no interventions'
    df_nointv = simulate_cases(f0=f0,
                               Rs=Rs,
                               date_ra=date_ra,
                               n0=n0,
                               use_r7=use_r7,
                               R_changes=None)

    df_R, dfc = get_Rt_cases()
    df_R = df_R.loc[df_R.index >= '2020-12-01']
    dfc = dfc.loc[dfc.index >= '2020-12-25']  # cases

    colors = plt.rcParams['axes.prop_cycle']()
    colors = [next(colors)['color'] for _ in range(10)]

    fig, axs = plt.subplots(3,
                            1,
                            tight_layout=True,
                            sharex=True,
                            figsize=(9, 10))
    ## top panel: number of cases
    ax = axs[0]
    ax.set_ylabel('Aantal per dag')
    ax.semilogy(df['ni_old'],
                label='Infecties oude variant (simulatie)',
                color=colors[0],
                linestyle='-.')
    ax.semilogy(df['ni_b117'],
                label='Infecties B117 variant (simulatie)',
                color=colors[0],
                linestyle='--')
    ax.semilogy(df['ni_old'] + df['ni_b117'],
                label='Infecties totaal (simulatie)',
                color=colors[0],
                linestyle='-',
                linewidth=2)
    ax.semilogy(df['npos'],
                label='Positieve tests (simulatie)',
                color=colors[1],
                linestyle='-.')

    if df_lohi is not None:
        _fill_between_df(ax,
                         df_lohi,
                         'nlo',
                         'nhi',
                         color=colors[1],
                         alpha=0.15,
                         zorder=-10)

    if R_changes:
        ax.semilogy(df_nointv['npos'],
                    label='P.T. (sim., geen maatregelen)',
                    color=colors[1],
                    linestyle=':')

    select = dfc.index <= clip_nRo[0]
    ax.semilogy(dfc.loc[select, 'Delta7r'] * 17.4e6,
                label='Positieve tests (NL)',
                color=colors[2],
                linestyle='--',
                linewidth=3)
    # first valid point of positive tests
    firstpos = df.loc[~df['npos'].isna()].iloc[0]
    ax.scatter(firstpos.name, firstpos['npos'], color=colors[1], zorder=10)

    ax.set_ylim(df['npos'].min() / 3, 20000)
    ax.yaxis.set_minor_formatter(LogFormatter(minor_thresholds=(2, 1)))

    date_labels = [('2020-12-15', 0, 'Lockdown')] + (R_changes or [])
    for date, _, label in date_labels:
        ax.text(pd.to_datetime(date),
                df['npos'].min() / 2.6,
                label,
                rotation=90,
                horizontalalignment='center')

    ax.grid()
    ax.grid(which='minor', axis='y')
    ax.legend(loc='lower left')

    ## R plot
    ax = axs[1]
    ax.set_ylabel('R')
    ax.plot(df['Rt'], label='$R_t$ (simulatie)', color=colors[1])

    if df_lohi is not None:
        _fill_between_df(ax,
                         df_lohi,
                         'Rlo',
                         'Rhi',
                         color=colors[1],
                         alpha=0.15,
                         zorder=-10)

    if R_changes:
        ax.plot(df_nointv['Rt'],
                label='$R_t$ (sim., geen maatregelen)',
                color=colors[1],
                linestyle=':')
    ax.scatter(df.index[0], df['Rt'][0], color=colors[1], zorder=10)
    dfR1 = df_R.loc[df_R.index <= clip_nRo[1]]
    ax.fill_between(dfR1.index,
                    dfR1['Rlo'],
                    dfR1['Rhi'],
                    color='#0000ff',
                    alpha=0.15,
                    label='$R_t$ (observatie NL)')

    date_labels = [('2020-12-15', 0, 'Lockdown')] + (R_changes or [])
    for date, _, label in date_labels:
        ax.text(pd.to_datetime(date),
                0.82,
                label,
                rotation=90,
                horizontalalignment='center')

    ax.grid(zorder=0)
    ax.legend()

    ## odds plot
    ax = axs[2]
    ax.set_ylabel('Verhouding B117:overig (pos. tests)')
    ax.semilogy(f2odds(df['f_b117']),
                label='Nederland (simulatie)',
                color=colors[1])

    if df_lohi is not None:
        df_lohi['odds_lo'] = f2odds(df_lohi['flo'])
        df_lohi['odds_hi'] = f2odds(df_lohi['fhi'])
        _fill_between_df(ax,
                         df_lohi,
                         'odds_lo',
                         'odds_hi',
                         color=colors[1],
                         alpha=0.15,
                         zorder=-10)

    markers = iter('o^vs*Do^vs*D' * 2)
    cdict, _meta_df = get_data_countries(select=country_select)
    for country_name, df in cdict.items():
        df = df.loc[df.index <= clip_nRo[2]]
        marker = next(markers)
        label = country_name if len(
            country_name) < 25 else country_name[:23] + '...'
        ax.plot(df.index,
                f2odds(df['f_b117']),
                f'{marker}:',
                linewidth=2,
                label=label,
                zorder=100)

    ax.grid(which='both', axis='y')
    ax.legend(fontsize=10, framealpha=0.9)

    ax.yaxis.set_major_formatter(FormatStrFormatter('%g'))
    # Monkey-patch to prevent '%e' formatting.
    LogFormatter._num_to_string = lambda _0, x, _1, _2: ('%g' % x)
    ax.yaxis.set_minor_formatter(LogFormatter(minor_thresholds=(2, 1)))

    title = f'{title_prefix}R_oud={Rs[0]:.2f};  R_B117={Rs[1]:.2f}'
    if R_changes:
        title += f'\n(R wijzigt vanaf {R_changes[0][0]})'
    axs[0].set_title(title)

    for i in range(3):
        tools.set_xaxis_dateformat(axs[i], maxticks=15, ticklabels=(i == 2))
    add_percentage_y2_axis(ax, label='Aandeel B.1.1.7 (%)')

    # repeat to get the weekly ticks
    tools.set_xaxis_dateformat(axs[2], maxticks=15)
    fig.show()
    plt.pause(0.75)
Beispiel #14
0
def get_reporting_delay(df,
                        initial_delay=7,
                        end_trunc=4,
                        start_trunc=5,
                        plot=True,
                        m=18):
    """Estimate delay from DOO (eDOO) to file_date.

    Parameters:

    - df: Dataframe with multiindex and DON, DOO, DPL, Dtot columns.
    - initial_delay: assume initial delay (days).
    - end_trunc: how many dates to truncate at the end
      (recent data unreliable; but set it too high and you'll get
       a range error. Lower than 4 is not very meaningful.)
    - start_trunc: how many dates to truncate at the beginning
      of the delay data.
    - plot: whether to show plot of the data.
    - m: days to wait for reports to converge (for weekday corrections).

    Return:

    - delays_d: pandas Series with delays (days), Date_file as index.
    """

    # Setup refdata dataframe;d
    # index: Date_statistics
    # columns: ..., nDOO: estimated number of new disease onsets.
    # Estimate is corrected for partial reporting but not for day-of-week
    # effects. Statsitics based on most recent m=18 days.
    fdates = np.array(sorted(df.index.get_level_values(0).unique()))
    fdrange = ('2020-10-01', fdates[-1])
    doo_corr = DOOCorrection.from_doo_df(
        df,
        date_range=fdrange,
        m=m,
    )
    refdata = doo_corr.create_df_nDOO(df.loc[fdates[-1]])
    refdata.loc[refdata.index[-end_trunc:], 'nDOO'] = np.nan
    refdata = refdata.loc[~refdata['nDOO'].isna()]

    # df_deltas: changes in cumulative Dtot etc. values.
    # The columns 'Dtot' is roughly the 'daily case numbers'.
    columns = ['DOO', 'DON', 'DPL', 'Dtot']
    df_deltas = df[columns].groupby('Date_file').sum().diff()
    df_deltas.iloc[0, :] = 0

    # by date of disease onset
    by_doo = refdata['nDOO'].rolling(7, center=True).mean().iloc[3:-3]

    by_doo = by_doo.loc[fdates[0] - (pd.Timedelta(initial_delay - 3, 'd')):]
    cum_doo = by_doo.cumsum() - by_doo[0]

    # by date of report
    by_dor = df_deltas['Dtot'].rolling(7, center=True).mean().iloc[3:-3].copy()
    cum_dor = by_dor.cumsum() - by_dor[0]

    # Get delay by matching cumulatives
    f_cumdoo2date = scipy.interpolate.interp1d(cum_doo,
                                               cum_doo.index.astype(int),
                                               bounds_error=False,
                                               fill_value=(cum_doo[0],
                                                           cum_doo[-1]))

    delays = pd.Series(
        cum_dor.index -
        pd.to_datetime(np.around(f_cumdoo2date(cum_dor.values), -9)),
        index=cum_dor.index)
    delays = delays.iloc[start_trunc:]
    # delay in days
    delays_d = np.around(delays / pd.Timedelta('1 d'), 2)

    if plot:
        fig, axs = plt.subplots(3,
                                1,
                                tight_layout=True,
                                figsize=(10, 7),
                                sharex=True)

        kwa_doo = dict(linestyle='--')

        ax = axs[0]
        ax.set_title('Aantal positieve tests (7-daags gemiddelde)')
        ax.set_ylabel('Gevallen per dag')
        ax.plot(by_dor, label='versus rapportagedatum')
        ax.plot(by_doo, label='versus 1e ziektedag', **kwa_doo)
        #    ax.set_xlim(by_doo.index[0], by_doo.index[-1])
        ax.legend()

        ax = axs[1]
        ax.set_title('Cumulatief aantal positieve tests')
        ax.set_ylabel('Cumulatieve gevallen')
        ax.plot(cum_dor, label='versus rapportagedatum')
        ax.plot(cum_doo, label='versus 1e ziektedag', **kwa_doo)
        ax.legend()

        ax = axs[2]
        ax.set_title(
            'Tijd tussen 1e ziektedag en rapportage (versus rapportagedatum)')
        ax.plot(delays_d, label=None)
        # ax.plot(delays/pd.Timedelta('1 d') + 2.5, label='Date of infection??')
        ax.set_ylabel('Vertraging (dagen)')

        for ax in axs:
            tools.set_xaxis_dateformat(ax)

        fig.canvas.set_window_title('Rapportagevertraging')
        fig.show()

    return delays_d