コード例 #1
0
def simulate_posterior(region, params, dates, initial, N = 1000, weekly = False,
                       parI = (1,1), parR = (1,1),parD = (1,1), random_params = False):
    """Simulate from the HMM model.
    
    Args:
        region (str): Region for the data.
        params (list): Optimized parameters.
        dates (tuple (2)): Date range of the data.
        initial (dict): Initial values in dict with keys S,E,I,R,D.
        N (int): Number of samples.
        weekly (bool, optional): Weekly time step if True, otherwise daily.
        parI (tuple (2)): Prior parameters for emission model I. By default (1,1).
        parR (tuple (2)): Prior parameters for emission model R. By default (1,1).
        parD (tuple (2)): Prior parameters for emission model D. By default (1,1).
        random_params (bool, optional): Bayesian parameters if True, otherwise single point.
    """
    x = _posterior_data(region, dates, weekly=weekly)\
        .reset_index(drop = True)
    POP = population.get_population(region)
    # filter param
    params = params[params.start <= dates[1]]
    if (params.end > dates[1]).any():
        params.loc[params.end > dates[1], 'end'] = dates[1]
    latent = transition(POP, initial, params, random_params=random_params)
    xx = x.merge(latent, how='left', on=['date'])
    Dw = xx.shape[0]
    D = (dates[1] - dates[0]).days + 1
    sim_lat = np.zeros((5,N,Dw))
    sim_obs = np.zeros((5,N,Dw))
    for i in range(N):
        if i == 0 or (i+1) % 100 == 0:
            print('%4d / %d' % (i+1,N))
        # transition
        latent = transition(POP, initial, params, random_params=random_params)
        latent[latent.I < 0]['I'] = 0
        xx = x.merge(latent, how='left', on=['date'])
        xx.tests = xx['tests'].apply(lambda t: t if t >= 0 else 1)
        sim_lat[:,i,:] = xx[['S','E','I','R','D']].to_numpy().T
        # emission
        try:
            sim_obs[2,i,:] = emission(np.abs(xx.I.to_numpy()), xx.tests.to_numpy(), *parI)
        except:
            print(xx.I)
            print(xx.tests)
            raise
        sim_obs[3,i,:] = emission(xx.R.to_numpy(), xx.cumtests.to_numpy(), *parR)
        sim_obs[4,i,:] = emission(xx.D.to_numpy(), xx.cumtests.to_numpy(), *parD)
    # spare last
    last_values = sim_lat[:,:,-1].mean(axis = 1)
    # denormalize probability
    sim_lat[1:3,:,:] = sim_lat[1:3,:,:] * x.tests.to_numpy()
    sim_lat[3:5,:,:] = sim_lat[3:5,:,:] * x.cumtests.to_numpy()
    sim_obs[1:3,:,:] = sim_obs[1:3,:,:] * x.tests.to_numpy()
    sim_obs[3:5,:,:] = sim_obs[3:5,:,:] * x.cumtests.to_numpy()
    return (sim_lat, sim_obs), last_values
コード例 #2
0
def covid_deaths(save=False, name='img/data/deaths_per100K.png'):
    """Constructs the trace plot of Covid-19 deaths.
    
    Args:
        save (bool, optional): Whether to save the figure, defaultly not.
        name (str, optional): Path to save the plot to.
    """
    # get data
    countries = ['CZ', 'PL', 'IT', 'SE']
    xx = pd.concat([
        posterior._posterior_data(country,
                                  (datetime(2020, 3, 1), datetime(2021, 5, 1)))
        for country in countries
    ])
    # population
    POP = {
        country: population.get_population(country)
        for country in countries
    }
    xx['POP'] = xx.region.apply(POP.get)
    # normalize
    xx['deaths100K'] = xx.deaths / xx.POP * 1e5
    # to weekly
    xx['year'] = xx.date.apply(lambda d: int(datetime.strftime(d, '%Y')))
    xx['week'] = xx.date.apply(lambda d: int(datetime.strftime(d, '%W')))

    def q025(x):
        return x.quantile(0.)

    def q975(x):
        return x.quantile(1.)
    xx = xx\
        .groupby(['year','week','region'])\
        .aggregate({'deaths100K': 'sum'})\
        .reset_index(drop=False)
    xx['date'] = xx.apply(
        lambda r: datetime.strptime('%04d-%02d-1' %
                                    (r.year, r.week), '%Y-%W-%w'),
        axis=1)
    # plot
    fig, ax = plt.subplots(figsize=(8, 6))
    for label, df in xx.groupby('region'):
        ax.plot(df.date, df.deaths100K, label=label)
    ax.set_xlabel('Date')
    ax.set_ylabel('Deaths per 100K')
    ax.legend()
    if save: fig.savefig(name)
コード例 #3
0
def _load_data(dates):
    """Load Covid-19 statistics in the appropriate format.
    
    Args:
        dates (tuple (2)): Date range of the data.
    """
    global _cache
    if _cache is not None:
        return _cache
    # regions
    with open('model/regions.json') as fp:
        regions = [k for k in json.load(fp) if len(k) > 2]
    # fetch data
    data_c, data_d, data_r = None, None, None
    dateaxis = None
    regions_r = []
    for reg in regions:
        x = posterior._posterior_data(reg, dates, weekly=True)
        if dateaxis is None:
            dateaxis = x.date
        POP = population.get_population(reg)
        # normalize by popylation
        x['I1K'] = x.confirmed / POP * 1e3
        x['D1K'] = x.deaths / POP * 1e3
        # confirmed
        c = x.I1K.to_numpy().reshape((1, -1))
        data_c = np.concatenate([data_c, c],
                                axis=0) if data_c is not None else c
        # deaths
        d = x.D1K.to_numpy().reshape((1, -1))
        data_d = np.concatenate([data_d, d],
                                axis=0) if data_d is not None else d
        # recovered
        if 'recovered' in x:
            x['R1K'] = x.recovered / POP * 1e3
            r = x.recovered.to_numpy().reshape((1, -1))
            data_r = np.concatenate([data_r, r],
                                    axis=0) if data_r is not None else r
            regions_r.append(reg)
    _cache = (data_c, data_d, data_r), dateaxis, (regions, regions, regions_r)
    return _cache
コード例 #4
0
def covid_recovered(save=False, name='img/data/recovered_per100K.png'):
    """Constructs the trace plot of Covid-19 recovered.
    
    Args:
        save (bool, optional): Whether to save the figure, defaultly not.
        name (str, optional): Path to save the plot to.
    """
    # get data
    countries = ['CZ', 'PL', 'IT']
    xx = pd.concat([
        posterior._posterior_data(country,
                                  (datetime(2020, 3, 1), datetime(2021, 5, 1)))
        for country in countries
    ])
    # population
    POP = {
        country: population.get_population(country)
        for country in countries
    }
    xx['POP'] = xx.region.apply(POP.get)
    # normalize
    xx['deaths100K'] = xx.deaths / xx.POP * 1e5
    # to weekly
    xx['year'] = xx.date.apply(lambda d: int(datetime.strftime(d, '%Y')))
    xx['week'] = xx.date.apply(lambda d: int(datetime.strftime(d, '%W')))

    def q025(x):
        return x.quantile(0.)

    def q975(x):
        return x.quantile(1.)

    # plot
    fig, ax = plt.subplots(figsize=(8, 6))
    for label, df in xx.groupby('region'):
        alpha = 1 if label != 'PL' else .5
        ax.plot(df.date, df.deaths100K, label=label, alpha=alpha)
    ax.set_xlabel('Date')
    ax.set_ylabel('Deaths per 100K')
    ax.legend()
    if save: fig.savefig(name)
コード例 #5
0
def posterior_objective(params, region, dates, initial, fixparams = None, weekly=False,
                        attributes = 'IRD', parI = (1,1), parR = (1,1), parD = (1,1)):
    """Objective function of the HMM model for optimization.
    
    Args:
        params (list): Optimized parameters.
        region (str): Region for the data.
        dates (tuple (2)): Date range of the data.
        initial (dict): Initial values in dict with keys S,E,I,R,D.
        fixparams (list): Fixed parameters.
        weekly (bool, optional): Weekly time step if True, otherwise daily.
        attributes (str, optional): Attributes used for optimization, 'I', 'R' or 'D'.
        parI (tuple (2)): Prior parameters for emission model I.
        parR (tuple (2)): Prior parameters for emission model R.
        parD (tuple (2)): Prior parameters for emission model D.
    """
    x = _posterior_data(region, dates, weekly=weekly)
    POP = population.get_population(region)
    # construct params dataframe
    a,c,b,d = _parse_params(params, fixparams)
    params = pd.DataFrame({'start': [dates[0]], 'end': [dates[1]],
                            'a': [a], 'b': [b], 'c': [c], 'd': [d]})
    # compute score
    D = (dates[1] - dates[0]).days + 1
    score = 0
    latent = transition(POP, initial, params)
    latent.loc[latent.I < 0,'I'] = 0
    x = x.merge(latent, how='left', on=['date'])
    if 'I' in attributes:
        score += emission_objective(x.confirmed.to_numpy() / x.tests.to_numpy(),
                                    np.abs(x.I.to_numpy()), x.tests.to_numpy(), *parI)
    if 'D' in attributes:
        score += emission_objective(x.deaths.cumsum().to_numpy() / x.cumtests.to_numpy(),
                                    x.D.to_numpy(), x.cumtests.to_numpy(), *parD)
    if 'R' in attributes:
        score += emission_objective(x.recovered.cumsum().to_numpy() / x.cumtests.to_numpy(),
                                    x.R.to_numpy(), x.cumtests.to_numpy(), *parR)
    return score / D
コード例 #6
0
ファイル: optimize.py プロジェクト: martinbenes1996/732A64
def run(region, N=1000):
    """Run model simulation.
    
    Args:
        region (str): Region to run the simulation for.
        N (int, optional): Number of samples.
    """
    region = region.upper().strip()
    print(region)
    # load config
    with open("model/regions.json") as fp:
        _config = json.load(fp)
    config = _config[region]
    config = {
        'dates': ('2020-08-01', '2021-03-13'),
        'window': 7,
        'weekly': False,
        'attributes': 'IRD',
        'initial': {
            'E': .1,
            'I': .1,
            'R': 0,
            'D': 0
        },
        'emission': {
            'I': (1, 1),
            'R': (1, 1),
            'D': (1, 1)
        },
        **config
    }
    POP = population.get_population(region)
    # parse
    dates = [datetime.strptime(d, "%Y-%m-%d") for d in config['dates']]
    window = config['window']
    weekly = config.get('weekly', False)
    attributes = config['attributes'].upper()
    initial = [
        1 - sum(config['initial'].values()),  # S
        config['initial'].get('E', 0),  # E
        config['initial'].get('I', 0),  # I
        config['initial'].get('R', 0),  # R     
        config['initial'].get('D', 0)
    ]  # D
    emission = [
        config['emission'].get('I',
                               (1, 1)), config['emission'].get('R', (1, 1)),
        config['emission'].get('D', (1, 1))
    ]
    # optimize
    params = optimize_spline(region,
                             dates,
                             initial=initial,
                             attributes=attributes,
                             emission=emission,
                             window=window,
                             weekly=weekly)
    # simulate result
    (sim_lat,
     sim_obs), last_values = posterior.simulate_posterior(region=region,
                                                          params=params,
                                                          dates=dates,
                                                          N=N,
                                                          initial=initial,
                                                          parI=emission[0],
                                                          parR=emission[1],
                                                          parD=emission[2])
    # save result
    _results.save((sim_lat, sim_obs), dates, region, params)