예제 #1
0
def run_Rt_estimation(grp_numbers,smoothing_window, r_window_size):
    """Code following online example:
    https://github.com/lo-hfk/epyestim/blob/main/notebooks/covid_tutorial.ipynb
    smoothing_window of 28 days was found to be most comparable to EpiEstim in this case
    r_window_size default is 3 if not specified, increasing r_window_size narrows the uncertainity bounds
    """
    simdate = exp_name.split("_")[0]
    df = pd.read_csv(os.path.join(exp_dir, f'nu_{simdate}.csv'))
    df['date'] = pd.to_datetime(df['date'])
    df = df[(df['date'] > pd.Timestamp('2020-03-01'))]
    
    df_rt_all = pd.DataFrame()
    for ems_nr in grp_numbers:

        if ems_nr == 0:
            region_suffix = "illinois"
        else:
            region_suffix = f'covidregion_{str(ems_nr)}'

        if region_suffix not in df["geography_modeled"].unique():
            continue
        mdf = df[df['geography_modeled'] == region_suffix]
        mdf = mdf.set_index('date')['cases_new_median']

        """Use default distributions (for covid-19)"""
        si_distrb, delay_distrb = get_distributions(show_plot=False)
        df_rt = covid19.r_covid(mdf[:-1], smoothing_window=smoothing_window, r_window_size=r_window_size)

        df_rt['geography_modeled'] = region_suffix
        df_rt.reset_index(inplace=True)
        df_rt = df_rt.rename(columns={'index': 'date',
                                      'Q0.5': 'rt_median',
                                      'Q0.025': 'rt_lower',
                                      'Q0.975': 'rt_upper'})
        df_rt['model_date'] = pd.Timestamp(simdate)
        df_rt = df_rt[['model_date', 'date', 'geography_modeled', 'rt_median', 'rt_lower', 'rt_upper']]
        # df_rt['smoothing_window'] =smoothing_window
        # df_rt['r_window_size'] = r_window_size
        df_rt_all = df_rt_all.append(df_rt)

    df_rt_all.to_csv(os.path.join(exp_dir, 'rtNU.csv'), index=False)
    rt_plot(df=df_rt_all, first_day=last_plot_day - pd.Timedelta(60,'days'), last_day=last_plot_day, plotname='rt_by_covidregion_truncated')

    if not 'rt_median' in df.columns:
        df_with_rt = pd.merge(how='left', left=df, right=df_rt_all,
                              left_on=['date', 'geography_modeled'],
                              right_on=['date', 'geography_modeled'])
        df_with_rt.to_csv(os.path.join(exp_dir, f'nu_{simdate}.csv'), index=False)
    else:
        print("Warning: Overwriting already present Rt estimates")
        df = df.drop(['rt_median', 'rt_lower', 'rt_upper'], axis=1)
        df_with_rt = pd.merge(how='left', left=df, right=df_rt_all,
                              left_on=['date', 'geography_modeled'],
                              right_on=['date', 'geography_modeled'])
        df_with_rt.to_csv(os.path.join(exp_dir, f'nu_{simdate}.csv'), index=False)
    rt_plot(df=df_rt_all,plotname='estimated_rt_by_covidregion_full')

    return df_rt
예제 #2
0
def run_Rt_estimation(smoothing_window=14, r_window_size=7):
    """Code following online example:
    https://github.com/lo-hfk/epyestim/blob/main/notebooks/covid_tutorial.ipynb
    """

    LL_file_date = get_latest_LLfiledate(
        file_path=os.path.join(datapath, 'covid_IDPH', 'Cleaned Data'))
    df = pd.read_csv(
        os.path.join(datapath, 'covid_IDPH', 'Cleaned Data',
                     f'{LL_file_date}_jg_aggregated_covidregion.csv'))
    df['date'] = pd.to_datetime(df['date'])
    df = df[df['date'].between(pd.Timestamp('2020-04-01'),
                               pd.Timestamp.today())]

    df_rt_all = pd.DataFrame()
    for ems_nr in range(1, 12):
        mdf = df.loc[df['covid_region'] == ems_nr]
        mdf.cases = mdf.cases.astype(int)
        mdf = mdf.set_index('date')['cases']
        """Use default distributions (for covid-19)"""
        si_distrb, delay_distrb = get_distributions(show_plot=False)
        df_rt = covid19.r_covid(mdf[:-1],
                                smoothing_window=smoothing_window,
                                r_window_size=r_window_size)

        df_rt['geography_modeled'] = f'covidregion_{ems_nr}'
        df_rt.reset_index(inplace=True)
        df_rt = df_rt.rename(
            columns={
                'index': 'date',
                'Q0.5': 'rt_median',
                'Q0.025': 'rt_lower',
                'Q0.975': 'rt_upper'
            })
        df_rt = df_rt[[
            'date', 'geography_modeled', 'rt_median', 'rt_lower', 'rt_upper'
        ]]
        df_rt['smoothing_window'] = smoothing_window
        df_rt['r_window_size'] = r_window_size
        df_rt_all = df_rt_all.append(df_rt)

    df_rt_all.to_csv(os.path.join(plot_path, f'rt_from_LLdata.csv'),
                     index=False)
    rt_plot(df=df_rt_all, first_day=None, last_day=None)
    rt_plot(df=df_rt_all, first_day=first_plot_day, last_day=last_plot_day)

    return df_rt
예제 #3
0
def run_Rt_estimation(grp_numbers, smoothing_window, r_window_size):
    """Code following online example:
    https://github.com/lo-hfk/epyestim/blob/main/notebooks/covid_tutorial.ipynb
    smoothing_window of 28 days was found to be most comparable to EpiEstim in this case
    r_window_size default is 3 if not specified, increasing r_window_size narrows the uncertainity bounds
    """
    simdate = exp_name.split("_")[0]

    df_rt_all = pd.DataFrame()
    for e, ems_nr in enumerate(grp_numbers):
        if ems_nr == 0:
            region_suffix = '_All'
            region_label = "illinois"
        else:
            region_suffix = f'_EMS-{ems_nr}'
            region_label = f'covidregion_{str(ems_nr)}'
        print(region_suffix)
        df = load_sim_data(exp_name, region_suffix=region_suffix)
        df['date'] = pd.to_datetime(df['date'])
        """Use default distributions (for covid-19)"""
        si_distrb, delay_distrb = get_distributions(show_plot=False)

        df_rt_scen = pd.DataFrame()
        for s, scen in enumerate(df.scen_num.unique()):
            print(scen)
            mdf = df[df['scen_num'] == scen]
            mdf = mdf.set_index('date')['new_infected']
            df_rt = covid19.r_covid(mdf[:-1],
                                    smoothing_window=smoothing_window,
                                    r_window_size=r_window_size)
            df_rt.reset_index(inplace=True)
            df_rt = df_rt.rename(
                columns={
                    'index': 'date',
                    'Q0.5': 'rt_median',
                    'Q0.025': 'rt_lower',
                    'Q0.975': 'rt_upper'
                })
            df_rt['model_date'] = pd.Timestamp(simdate)
            df_rt['geography_modeled'] = region_label
            df_rt['scen_num'] = scen
            df_rt['scen_enumerator'] = s
            df_rt_scen = df_rt_scen.append(df_rt)
        df_rt_scen.to_csv(os.path.join(
            exp_dir, 'rt_trajectories' + region_label + '.csv'),
                          index=False)
예제 #4
0
    def test_r_covid_cutoff(self):

        np.random.seed(5764)

        confirmed_cases = pd.Series([
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 7, 0, 10, 9, 15, 14, 34,
            24, 100, 54, 69, 37, 117, 161, 0, 487, 220, 841, 0, 500, 328, 1047,
            1219, 1281, 899, 1321, 1082, 1020, 914, 1117, 1148, 753, 1093, 683,
            1163, 1059, 779, 899, 595, 557, 596, 1027, 771, 500, 556, 308, 273,
            248, 400, 396, 346, 326, 336, 204, 119, 205, 228, 181, 217, 167,
            103, 100, 143, 179, 119, 112, 88, 76, 28, 51, 66, 81, 44, 54, 39,
            36, 33, 50, 51, 58, 15, 10, 21, 40, 36, 13, 18, 11, 10, 15, 15, 20,
            32, 17, 17, 9, 3, 19, 20, 23, 20, 9, 7, 16, 23, 33, 19, 31, 23, 14,
            23, 33, 13, 35, 8, 49, 18, 22, 44, 52, 58, 69, 62, 35, 62, 137,
            116, 134, 97, 70, 47, 54, 129, 88, 104, 127, 66, 63, 70, 132, 142,
            92, 110, 99, 43, 108, 141, 117, 154, 148, 110, 65, 132, 193, 220,
            210, 180, 138, 66, 130, 181, 181, 161, 182, 152, 105, 187, 274,
            234, 268, 253, 200, 128, 197, 311, 266, 306, 295, 276, 157, 202,
            383, 361, 340, 376, 292, 163, 216, 370, 364, 405, 425, 444, 191,
            245, 469, 405, 528, 465, 475, 257, 315, 514, 530, 488, 0, 0, 1095,
            286, 437, 391, 372, 0, 0, 782, 225, 411, 550, 552, 0, 0, 1548, 700,
            1077, 1172, 1487, 0, 0, 4068, 1445, 2823, 2613, 3105, 0, 0, 8737,
            3008, 5596, 5256, 6634, 0, 0, 17440, 5949, 8616, 9386, 9207, 0, 0,
            21926, 6126, 10073, 10128, 9409, 0, 0, 17309
        ],
                                    index=pd.date_range('2020-01-22',
                                                        periods=293))

        result = r_covid(confirmed_cases=confirmed_cases,
                         delay_distribution=
                         generate_onset_to_reporting_distribution_singapore(),
                         quantiles=[0.5])

        expected_result = [
            2.59960214, 2.42713596, 2.36801365, 2.44343708, 2.54371424,
            2.59963764, 2.53764468, 2.39790211, 2.25661617, 2.1689964,
            2.13205504, 2.10722693, 2.05398289, 1.96185502, 1.82583515,
            1.67238124, 1.52266781, 1.39083715, 1.28338702, 1.19770325,
            1.12809483, 1.06886891, 1.01726886, 0.96890564, 0.92362263,
            0.88377431, 0.85152062, 0.82804287, 0.81421856, 0.80843819,
            0.80883319, 0.80933912, 0.80606134, 0.79563434, 0.77692084,
            0.7529111, 0.72841375, 0.70834919, 0.69675028, 0.69548764,
            0.69848636, 0.70033662, 0.6964799, 0.68619966, 0.67762849,
            0.67691045, 0.69190406, 0.71940879, 0.75094328, 0.77253703,
            0.77903016, 0.77054792, 0.75845293, 0.74712806, 0.74087147,
            0.73948702, 0.74120346, 0.73805086, 0.72855332, 0.71533214,
            0.70149376, 0.6932034, 0.68991684, 0.68851331, 0.68835197,
            0.68483663, 0.68632654, 0.70099351, 0.72582877, 0.76086942,
            0.79663442, 0.81960689, 0.82568108, 0.82178255, 0.80762545,
            0.79448565, 0.78327463, 0.77209426, 0.76407495, 0.76222464,
            0.7704921, 0.79284348, 0.82959292, 0.86446548, 0.89962207,
            0.9311943, 0.9579237, 0.98441243, 1.0352803, 1.08957905,
            1.11678579, 1.12860319, 1.12464211, 1.11692886, 1.12750097,
            1.15296689, 1.1838994, 1.22107088, 1.23894021, 1.2328738,
            1.21126435, 1.19421602, 1.18040038, 1.17694589, 1.18363591,
            1.19113706, 1.19995364, 1.22028044, 1.26160559, 1.32272207,
            1.38064407, 1.44175695, 1.48599467, 1.52381945, 1.55364961,
            1.57350822, 1.55720066, 1.49823225, 1.40417142, 1.3034397,
            1.21622314, 1.14951094, 1.10627253, 1.07663047, 1.05578566,
            1.03038797, 1.00369567, 0.98196295, 0.97145419, 0.97741591,
            0.99761393, 1.02539234, 1.05030807, 1.06660386, 1.07399873,
            1.08003171, 1.08947449, 1.10413773, 1.11814489, 1.12648959,
            1.13177592, 1.13838639, 1.15194605, 1.17238448, 1.19432446,
            1.20100826, 1.18565284, 1.15206383, 1.11238385, 1.07845761,
            1.05709792, 1.04774574, 1.04638855, 1.04756145, 1.04612918,
            1.0469924, 1.05948666, 1.07980256, 1.11200442, 1.14287345,
            1.16462608, 1.17054932, 1.16668208, 1.15999564, 1.15751063,
            1.15835069, 1.15497534, 1.14132904, 1.11762787, 1.0956717,
            1.08353587, 1.08382907, 1.09196015, 1.10182822, 1.10281747,
            1.091496, 1.07380952, 1.06056518, 1.05967571, 1.06934734,
            1.08446026, 1.09413929, 1.09177153, 1.08346686, 1.07614132,
            1.0780672, 1.08780339, 1.10104586, 1.11195392, 1.10750554,
            1.09311454, 1.07002757, 1.03841093, 1.00912397, 0.98801527,
            0.97950988, 0.97873168, 0.97970125, 0.97381753, 0.95784123,
            0.93552611, 0.91356957, 0.90312199, 0.90995052, 0.93441949,
            0.97278021, 1.01274102, 1.05082948, 1.09286146, 1.14838029,
            1.2297979, 1.34413185, 1.47177478, 1.57404505, 1.62573053,
            1.64020524, 1.65240012, 1.68002175, 1.71706891, 1.7367423,
            1.70830268, 1.64205551, 1.56982027, 1.52576074, 1.5187258,
            1.54104773, 1.5622353, 1.55376551, 1.51205394, 1.45996614,
            1.42411088, 1.4130697, 1.42072672, 1.42542989, 1.40307067,
            1.351099, 1.28347502, 1.22232452, 1.18062028, 1.15925023,
            1.14822265, 1.12199702, 1.07887335, 1.02846493, 0.99358597,
            0.97897602, 0.98055132, 0.99108749
        ]

        assert_array_almost_equal(expected_result, result['Q0.5'], 1)
예제 #5
0
    def test_r_covid(self):
        np.random.seed(5764)

        confirmed_cases = pd.Series([
            1, 0, 7, 0, 10, 9, 15, 14, 34, 24, 100, 54, 69, 37, 117, 161, 0,
            487, 220, 841, 0, 500, 328, 1047, 1219, 1281, 899, 1321, 1082,
            1020, 914, 1117, 1148, 753, 1093, 683, 1163, 1059, 779, 899, 595,
            557, 596, 1027, 771, 500, 556, 308, 273, 248, 400, 396, 346, 326,
            336, 204, 119, 205, 228, 181, 217, 167, 103, 100, 143, 179, 119,
            112, 88, 76, 28, 51, 66, 81, 44, 54, 39, 36, 33, 50, 51, 58, 15,
            10, 21, 40, 36, 13, 18, 11, 10, 15, 15, 20, 32, 17, 17, 9, 3, 19,
            20, 23, 20, 9, 7, 16, 23, 33, 19, 31, 23, 14, 23, 33, 13, 35, 8,
            49, 18, 22, 44, 52, 58, 69, 62, 35, 62, 137, 116, 134, 97, 70, 47,
            54, 129, 88, 104, 127, 66, 63, 70, 132, 142, 92, 110, 99, 43, 108,
            141, 117, 154, 148, 110, 65, 132, 193, 220, 210, 180, 138, 66, 130,
            181, 181, 161, 182, 152, 105, 187, 274, 234, 268, 253, 200, 128,
            197, 311, 266, 306, 295, 276, 157, 202, 383, 361, 340, 376, 292,
            163, 216, 370, 364, 405, 425, 444, 191, 245, 469, 405, 528, 465,
            475, 257, 315, 514, 530, 488, 0, 0, 1095, 286, 437, 391, 372, 0, 0,
            782, 225, 411, 550, 552, 0, 0, 1548, 700, 1077, 1172, 1487, 0, 0,
            4068, 1445, 2823, 2613, 3105, 0, 0, 8737, 3008, 5596, 5256, 6634,
            0, 0, 17440, 5949, 8616, 9386, 9207, 0, 0, 21926, 6126, 10073,
            10128, 9409, 0, 0, 17309
        ],
                                    index=pd.date_range(
                                        '2020-02-25', '2020-11-09'))

        result = r_covid(confirmed_cases=confirmed_cases,
                         delay_distribution=
                         generate_onset_to_reporting_distribution_singapore(),
                         quantiles=[0.5])

        self.assertTrue(
            result.index.equals(pd.date_range('2020-03-01', '2020-11-03')))

        expected_result = [
            4.58, 3.39, 2.63, 2.31, 2.25, 2.33, 2.36, 2.31, 2.23, 2.17, 2.14,
            2.12, 2.07, 1.97, 1.83, 1.67, 1.52, 1.39, 1.28, 1.20, 1.13, 1.07,
            1.02, 0.97, 0.92, 0.88, 0.85, 0.83, 0.82, 0.81, 0.81, 0.81, 0.81,
            0.80, 0.78, 0.75, 0.73, 0.71, 0.70, 0.70, 0.70, 0.70, 0.70, 0.69,
            0.68, 0.68, 0.69, 0.72, 0.75, 0.77, 0.78, 0.77, 0.75, 0.74, 0.74,
            0.74, 0.74, 0.74, 0.73, 0.71, 0.70, 0.69, 0.69, 0.69, 0.69, 0.69,
            0.69, 0.70, 0.73, 0.76, 0.79, 0.82, 0.83, 0.82, 0.81, 0.80, 0.78,
            0.77, 0.76, 0.76, 0.76, 0.79, 0.82, 0.87, 0.91, 0.93, 0.96, 1.00,
            1.04, 1.09, 1.11, 1.11, 1.12, 1.12, 1.14, 1.16, 1.19, 1.22, 1.24,
            1.23, 1.22, 1.19, 1.18, 1.18, 1.18, 1.19, 1.19, 1.22, 1.26, 1.32,
            1.38, 1.44, 1.49, 1.53, 1.57, 1.58, 1.56, 1.50, 1.41, 1.31, 1.22,
            1.15, 1.11, 1.08, 1.06, 1.03, 1.00, 0.98, 0.97, 0.98, 1.00, 1.03,
            1.05, 1.06, 1.07, 1.07, 1.09, 1.10, 1.11, 1.12, 1.13, 1.14, 1.15,
            1.17, 1.19, 1.20, 1.19, 1.15, 1.11, 1.07, 1.05, 1.05, 1.04, 1.04,
            1.04, 1.04, 1.06, 1.08, 1.11, 1.15, 1.17, 1.17, 1.17, 1.16, 1.16,
            1.16, 1.15, 1.14, 1.12, 1.10, 1.08, 1.08, 1.09, 1.10, 1.10, 1.09,
            1.08, 1.06, 1.06, 1.07, 1.09, 1.10, 1.09, 1.08, 1.08, 1.07, 1.09,
            1.10, 1.11, 1.11, 1.09, 1.07, 1.04, 1.01, 0.99, 0.98, 0.98, 0.98,
            0.97, 0.96, 0.94, 0.91, 0.91, 0.91, 0.94, 0.97, 1.01, 1.05, 1.09,
            1.15, 1.23, 1.34, 1.47, 1.57, 1.62, 1.64, 1.65, 1.68, 1.72, 1.74,
            1.71, 1.64, 1.57, 1.52, 1.52, 1.54, 1.56, 1.55, 1.51, 1.46, 1.42,
            1.41, 1.42, 1.43, 1.40, 1.35, 1.28, 1.22, 1.18, 1.16, 1.15, 1.12,
            1.08, 1.03, 0.99, 0.98, 0.98, 0.99
        ]

        assert_array_almost_equal(expected_result, result['Q0.5'], 2)