Ejemplo n.º 1
0
def generate_and_append_data(data,
                             data_type,
                             truth,
                             age_intervals,
                             gbd_region='Asia, Southeast',
                             country='Thailand',
                             year=2005,
                             sex='male'):
    """ create simulated data"""
    for a0, a1 in age_intervals:
        d = {
            'condition': 'type_2_diabetes',
            'data_type': data_type,
            'gbd_region': gbd_region,
            'region': country,
            'year_start': year,
            'year_end': year,
            'sex': sex,
            'age_start': a0,
            'age_end': a1,
            'age_weights': list(np.ones(a1 + 1 - a0)),
            'id': len(data)
        }

        p0 = dismod3.utils.rate_for_range(truth, range(a0, a1 + 1),
                                          np.ones(a1 + 1 - a0))
        p1 = mc.rbeta(p0 * dispersion, (1 - p0) * dispersion)
        p2 = mc.rbinomial(n, p1) / n

        d['value'] = p2
        d['standard_error'] = np.sqrt(p2 * (1 - p2) / n)

        data.append(d)
Ejemplo n.º 2
0
def simdata_postproc(sp_sub, survey_plan):
    """
    This function should take a value for the Gaussian random field in the submodel 
    sp_sub, evaluated at the survey plan locations, and return a simulated dataset.
    """
    p = pm.invlogit(sp_sub)
    n = survey_plan.n
    return pm.rbinomial(n, p)
Ejemplo n.º 3
0
def plot_beta_binomial_funnel(alpha, beta):
    pi_true = alpha/(alpha+beta)
    pi = mc.rbeta(alpha, beta, size=10000)

    n = pl.exp(mc.rnormal(10, 2**-2, size=10000))
    k = mc.rbinomial(pl.array(n, dtype=int), pi)
    r = k/n
    pl.vlines([pi_true], .1*n.min(), 10*n.max(),
              linewidth=2, linestyle='-', color='w', zorder=9)
    pl.vlines([pi_true], .1*n.min(), 10*n.max(),
              linewidth=1, linestyle='--', color='black', zorder=10)
    pl.plot(r, n, 'ko',
            mew=0, alpha=.25)

    pl.semilogy(schiz['r'], schiz['n'], 'ks', mew=1, mec='white', ms=4,
                label='Observed values')

    pl.xlabel('Rate (per PY)')
    pl.ylabel('Study size (PY)')
    pl.xticks([0, .005, .01])
    pl.axis([-.0001, .0101, 50., 1500000])
    pl.title(r'$\alpha=%d$, $\beta=%d$' % (alpha, beta))
Ejemplo n.º 4
0
def generate_and_append_data(data, data_type, truth, age_intervals,
                             gbd_region='Asia, Southeast', country='Thailand', year=2005, sex='male'):
    """ create simulated data"""
    for a0, a1 in age_intervals:
        d = { 'condition': 'type_2_diabetes',
              'data_type': data_type,
              'gbd_region': gbd_region,
              'region': country,
              'year_start': year,
              'year_end': year,
              'sex': sex,
              'age_start': a0,
              'age_end': a1,
              'age_weights': list(np.ones(a1 + 1 - a0)),
              'id': len(data)}

        p0 = dismod3.utils.rate_for_range(truth, range(a0, a1 + 1), np.ones(a1 + 1 - a0))
        p1 = mc.rbeta(p0 * dispersion, (1 - p0) * dispersion)
        p2 = mc.rbinomial(n, p1) / n
    
        d['value'] = p2
        d['standard_error'] = np.sqrt(p2 * (1 - p2) / n)

        data.append(d)
Ejemplo n.º 5
0
def plot_beta_binomial_funnel(alpha, beta):
    pi_true = alpha / (alpha + beta)
    pi = mc.rbeta(alpha, beta, size=10000)

    n = pl.exp(mc.rnormal(10, 2**-2, size=10000))
    k = mc.rbinomial(pl.array(n, dtype=int), pi)
    r = k / n
    pl.vlines([pi_true],
              .1 * n.min(),
              10 * n.max(),
              linewidth=2,
              linestyle='-',
              color='w',
              zorder=9)
    pl.vlines([pi_true],
              .1 * n.min(),
              10 * n.max(),
              linewidth=1,
              linestyle='--',
              color='black',
              zorder=10)
    pl.plot(r, n, 'ko', mew=0, alpha=.25)

    pl.semilogy(schiz['r'],
                schiz['n'],
                'ks',
                mew=1,
                mec='white',
                ms=4,
                label='Observed values')

    pl.xlabel('Rate (per PY)')
    pl.ylabel('Study size (PY)')
    pl.xticks([0, .005, .01])
    pl.axis([-.0001, .0101, 50., 1500000])
    pl.title(r'$\alpha=%d$, $\beta=%d$' % (alpha, beta))
Ejemplo n.º 6
0
def pred(alpha=alpha, beta=beta):
    return mc.rbinomial(n_pred, mc.rbeta(alpha, beta)) / float(n_pred)
Ejemplo n.º 7
0
def pred(pi=pi):
    return mc.rbinomial(n_pred, pi) / float(n_pred)
Ejemplo n.º 8
0
def pred(pi=pi):
    return mc.rbinomial(n, pi)
Ejemplo n.º 9
0
def simdata_postproc(sp_sub, survey_plan, a1, a2):
    p = pm.stukel_invlogit(sp_sub, a1, a2)
    n = survey_plan.n
    return pm.rbinomial(n, p)
Ejemplo n.º 10
0
 def p_pred(pi=pi, n=n_nonzero):
     return mc.rbinomial(n, pi + 1.0e-9) / (1.0 * n)
Ejemplo n.º 11
0
 def p_pred(pi=pi, n=n_nonzero):
     return mc.rbinomial(n, pi + 1.e-9) / (1. * n)
Ejemplo n.º 12
0
 def f(sp_sub, n=n):
     return pm.rbinomial(n=n,p=pm.invlogit(sp_sub))
Ejemplo n.º 13
0
    
    x = np.vstack((lon,lat,t)).T
    
    cov1 = gencirc(x,r1)
    cov2 = gencirc(x,r2)
    
    M = c1*cov1+c2*cov2
    S = pm.gp.FullRankCovariance(pm.gp.cov_funs.exponential.aniso_geo_rad, amp=.5, scale=.08, inc=.5, ecc=.5).cholesky(x[:,:2])
    
    y = pm.rmv_normal_chol(M,S.T)+np.random.normal(N)*.1
    z = pm.flib.invlogit(y)
    
    lo_age = np.ones(N)*2
    up_age = np.ones(N)*10
    n = np.random.randint(10,500,size=N)
    pos = pm.rbinomial(n, z)
    neg = n-pos
    
    data_file = np.rec.fromarrays([pos,neg,lo_age,up_age,lon,lat,t,cov1,cov2],names='pos,neg,lo_age,up_age,lon,lat,t,cov1,cov2')


# where_0 = np.where(M==0)
# where_1 = np.where(M==1)
# where_n1 = np.where(M==-1)
# 
# pl.figure(1)
# pl.clf()
# pl.hist(z[where_0])
# 
# pl.figure(2)
# pl.clf()
Ejemplo n.º 14
0
import numpy
import pymc
from pymc import rbinomial,Binomial,Normal,Gamma
import pylab
import scipy.stats

#numpy.random.seed(15)

Nsubj = 4
Ntrls = 100

# the data
signal_resp = rbinomial(n=Ntrls, p=0.80, size=Nsubj)
noise_resp  = rbinomial(n=Ntrls, p=0.10, size=Nsubj)

# the model
prior_md = Normal('prior_md', mu=0.0, tau=0.001, value=0.0)
prior_mc = Normal('prior_mc', mu=0.0, tau=0.001, value=0.0)
prior_taud = Gamma('prior_taud', alpha=0.001, beta=0.001, value=0.01)
prior_tauc = Gamma('prior_tauc', alpha=0.001, beta=0.001, value=0.01)

dprm = Normal('dprm', mu=Pmd, tau=Ptaud, size=Nsubj, value=[0,0,0,0])
bias = Normal('bias', mu=Pmc, tau=Ptauc, size=Nsubj, value=[0,0,0,0])

Phi = scipy.stats.norm.cdf

@pymc.deterministic
def hi(d=dprm, c=bias):
    return Phi(+0.5*d - c)

@pymc.deterministic
Ejemplo n.º 15
0
def pred(pi=pi):
    return mc.rbinomial(n, pi)
Ejemplo n.º 16
0
import pylab as pl
import pymc as mc

import dismod3
import book_graphics

reload(book_graphics)

# set font
book_graphics.set_font()

### @export 'binomial-model-funnel'
pi_binomial_funnel = 0.004

n = pl.exp(mc.rnormal(10, 2 ** -2, size=10000))
k = mc.rbinomial(pl.array(n.round(), dtype=int), pi_binomial_funnel)
r = k / n

pl.figure(**book_graphics.half_page_params)
pl.vlines(
    [pi_binomial_funnel],
    0.1 * n.min(),
    10 * n.max(),
    linewidth=2,
    linestyle="-",
    color="w",
    zorder=9,
    label="_nolegend_",
)
pl.vlines(
    [pi_binomial_funnel], 0.1 * n.min(), 10 * n.max(), linewidth=1, linestyle="--", color="k", zorder=10, label="$\pi$"
Ejemplo n.º 17
0
def pred(alpha=alpha, beta=beta, phi=phi):
    if pl.rand() < phi:
        return 0
    else:
        return mc.rbinomial(n_pred, mc.rbeta(alpha, beta)) / float(n_pred)
Ejemplo n.º 18
0
def validate_rate_model(rate_type='neg_binom', data_type='epilepsy', replicate=0):
    # set random seed for reproducibility
    mc.np.random.seed(1234567 + replicate)
    
    # load data
    model = dismod3.data.load('/home/j/Project/dismod/output/dm-32377/')

    data = model.get_data('p')

    #data = data.ix[:20, :]
    
    # replace data with synthetic data if requested
    if data_type == 'epilepsy':
        # no replacement needed
        pass

    elif data_type == 'schiz':
        import pandas as pd
        data = pd.read_csv('/homes/abie/gbd_dev/gbd/tests/schiz.csv')
    
    elif data_type == 'binom':
        N = 1.e6
        data['effective_sample_size'] = N
        mu = data['value'].mean()
        data['value'] = mc.rbinomial(N, mu, size=len(data.index)) / N

    elif data_type == 'poisson':
        N = 1.e6
        data['effective_sample_size'] = N
        mu = data['value'].mean()
        data['value'] = mc.rpoisson(N*mu, size=len(data.index)) / N

    elif data_type == 'normal':
        mu = data['value'].mean()
        sigma = .125*mu
        data['standard_error'] = sigma
        data['value'] = mc.rnormal(mu, sigma**-2, size=len(data.index))

    elif data_type == 'log_normal':
        mu = data['value'].mean()
        sigma = .25
        data['standard_error'] = sigma*mu
        data['value'] = pl.exp(mc.rnormal(pl.log(mu), sigma**-2, size=len(data.index)))

    else:
        raise TypeError, 'Unknown data type "%s"' % data_type

    # sample prevalence data
    i_test = mc.rbernoulli(.25, size=len(data.index))
    i_nan = pl.isnan(data['effective_sample_size'])
    
    data['lower_ci'] = pl.nan
    data['upper_ci'] = pl.nan
    data.ix[i_nan, 'effective_sample_size'] = 0.
    data['standard_error'] = pl.sqrt(data['value']*(1-data['value'])) / data['effective_sample_size']
    data.ix[pl.isnan(data['standard_error']), 'standard_error'] = pl.inf

    data['standard_error'][i_test] = pl.inf
    data['effective_sample_size'][i_test] = 0.

    data['value'] = pl.maximum(data['value'], 1.e-12)

    model.input_data = data


    # create model
    # TODO: set parameters in model.parameters['p'] dict
    # then have simple method to create age specific rate model
    #model.parameters['p'] = ...
    #model.vars += dismod3.ism.age_specific_rate(model, 'p')

    model.parameters['p']['parameter_age_mesh'] = [0,100]
    model.parameters['p']['heterogeneity'] = 'Very'
    model.vars['p'] = dismod3.data_model.data_model(
        'p', model, 'p',
        'all', 'total', 'all',
        None, None, None,
        rate_type=rate_type,
        interpolation_method='zero',
        include_covariates=False)
    
    # add upper bound on sigma in log normal model to help convergence
    #if rate_type == 'log_normal':
    #    model.vars['p']['sigma'].parents['upper'] = 1.5

    # add upper bound on sigma, zeta in offset log normal
    #if rate_type == 'offset_log_normal':
    #    model.vars['p']['sigma'].parents['upper'] = .1
    #    model.vars['p']['p_zeta'].value = 5.e-9
    #    model.vars['p']['p_zeta'].parents['upper'] = 1.e-8

    # fit model
    dismod3.fit.fit_asr(model, 'p', iter=20000, thin=10, burn=10000)
    #dismod3.fit.fit_asr(model, 'p', iter=100, thin=1, burn=0)

    # compare estimate to hold-out
    data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    data['lb_pred'] = model.vars['p']['p_pred'].stats()['95% HPD interval'][:,0]
    data['ub_pred'] = model.vars['p']['p_pred'].stats()['95% HPD interval'][:,1]

    import data_simulation
    model.test = data[i_test]
    data = model.test
    data['true'] = data['value']
    data_simulation.add_quality_metrics(data)

    data_simulation.initialize_results(model)
    data_simulation.add_to_results(model, 'test')
    data_simulation.finalize_results(model)


    return model
Ejemplo n.º 19
0
 def f(sp_sub, n=n):
     return pm.rbinomial(n=n, p=pm.invlogit(sp_sub))
Ejemplo n.º 20
0
def validate_rate_model(rate_type='neg_binom',
                        data_type='epilepsy',
                        replicate=0):
    # set random seed for reproducibility
    mc.np.random.seed(1234567 + replicate)

    # load data
    model = dismod3.data.load('/home/j/Project/dismod/output/dm-32377/')

    data = model.get_data('p')

    #data = data.ix[:20, :]

    # replace data with synthetic data if requested
    if data_type == 'epilepsy':
        # no replacement needed
        pass

    elif data_type == 'schiz':
        import pandas as pd
        data = pd.read_csv('/homes/abie/gbd_dev/gbd/tests/schiz.csv')

    elif data_type == 'binom':
        N = 1.e6
        data['effective_sample_size'] = N
        mu = data['value'].mean()
        data['value'] = mc.rbinomial(N, mu, size=len(data.index)) / N

    elif data_type == 'poisson':
        N = 1.e6
        data['effective_sample_size'] = N
        mu = data['value'].mean()
        data['value'] = mc.rpoisson(N * mu, size=len(data.index)) / N

    elif data_type == 'normal':
        mu = data['value'].mean()
        sigma = .125 * mu
        data['standard_error'] = sigma
        data['value'] = mc.rnormal(mu, sigma**-2, size=len(data.index))

    elif data_type == 'log_normal':
        mu = data['value'].mean()
        sigma = .25
        data['standard_error'] = sigma * mu
        data['value'] = pl.exp(
            mc.rnormal(pl.log(mu), sigma**-2, size=len(data.index)))

    else:
        raise TypeError, 'Unknown data type "%s"' % data_type

    # sample prevalence data
    i_test = mc.rbernoulli(.25, size=len(data.index))
    i_nan = pl.isnan(data['effective_sample_size'])

    data['lower_ci'] = pl.nan
    data['upper_ci'] = pl.nan
    data.ix[i_nan, 'effective_sample_size'] = 0.
    data['standard_error'] = pl.sqrt(
        data['value'] * (1 - data['value'])) / data['effective_sample_size']
    data.ix[pl.isnan(data['standard_error']), 'standard_error'] = pl.inf

    data['standard_error'][i_test] = pl.inf
    data['effective_sample_size'][i_test] = 0.

    data['value'] = pl.maximum(data['value'], 1.e-12)

    model.input_data = data

    # create model
    # TODO: set parameters in model.parameters['p'] dict
    # then have simple method to create age specific rate model
    #model.parameters['p'] = ...
    #model.vars += dismod3.ism.age_specific_rate(model, 'p')

    model.parameters['p']['parameter_age_mesh'] = [0, 100]
    model.parameters['p']['heterogeneity'] = 'Very'
    model.vars['p'] = dismod3.data_model.data_model(
        'p',
        model,
        'p',
        'all',
        'total',
        'all',
        None,
        None,
        None,
        rate_type=rate_type,
        interpolation_method='zero',
        include_covariates=False)

    # add upper bound on sigma in log normal model to help convergence
    #if rate_type == 'log_normal':
    #    model.vars['p']['sigma'].parents['upper'] = 1.5

    # add upper bound on sigma, zeta in offset log normal
    #if rate_type == 'offset_log_normal':
    #    model.vars['p']['sigma'].parents['upper'] = .1
    #    model.vars['p']['p_zeta'].value = 5.e-9
    #    model.vars['p']['p_zeta'].parents['upper'] = 1.e-8

    # fit model
    dismod3.fit.fit_asr(model, 'p', iter=20000, thin=10, burn=10000)
    #dismod3.fit.fit_asr(model, 'p', iter=100, thin=1, burn=0)

    # compare estimate to hold-out
    data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    data['lb_pred'] = model.vars['p']['p_pred'].stats()['95% HPD interval'][:,
                                                                            0]
    data['ub_pred'] = model.vars['p']['p_pred'].stats()['95% HPD interval'][:,
                                                                            1]

    import data_simulation
    model.test = data[i_test]
    data = model.test
    data['true'] = data['value']
    data_simulation.add_quality_metrics(data)

    data_simulation.initialize_results(model)
    data_simulation.add_to_results(model, 'test')
    data_simulation.finalize_results(model)

    return model
Ejemplo n.º 21
0
 def f(sp_sub, a, b, n=n):
     p = pm.invlogit(sp_sub)
     h = pm.rbeta(a, b, size=len(sp_sub))
     p_def = g6pd.p_fem_def(p, h)
     return pm.rbinomial(n=n, p=p)
Ejemplo n.º 22
0
def simdata_postproc(sp_sub, survey_plan):
    p = pm.invlogit(sp_sub)
    n = survey_plan.n
    return pm.rbinomial(n, p)
Ejemplo n.º 23
0
 def f(sp_sub, a, b, n=n):
     p = pm.invlogit(sp_sub)
     h = pm.rbeta(a,b,size=len(sp_sub))
     p_def = g6pd.p_fem_def(p,h)
     return pm.rbinomial(n=n, p=p)
Ejemplo n.º 24
0
if len(names)>2:
    for name in names[:-2]:
        cv[name] = np.random.normal(size=n_data+n_pred)*on#np.ones(n_data)
cv['m'] = np.ones(n_data+n_pred)*on
cv['t'] = t*on
    
C = pm.gp.FullRankCovariance(my_st, amp=1, scale=1, inc=np.pi/4, ecc=.3,st=.1, sd=.5, tlc=.2, sf = .1)

dm = np.vstack((lon,lat,t)).T

C_eval = C(dm,dm)

f = pm.rmv_normal_cov(np.sum([cv[name]*vals[name] for name in names],axis=0), C_eval) + np.random.normal(size=n_data+n_pred)*np.sqrt(V)
p = pm.flib.invlogit(f)
ns = 100
pos = pm.rbinomial(ns, p)
neg = ns - pos

print p

ra_data = np.rec.fromarrays((pos[:n_data], neg[:n_data], lon[:n_data], lat[:n_data]) + tuple([cv[name][:n_data] for name in names]), names=['pos','neg','lon','lat']+names)
pl.rec2csv(ra_data,'test_data.csv')

ra_pred = np.rec.fromarrays((pos[n_data:], neg[n_data:], lon[n_data:], lat[n_data:]) + tuple([cv[name][n_data:] for name in names]), names=['pos','neg','lon','lat']+names)
pl.rec2csv(ra_pred,'test_pred.csv')

os.system('infer cov_test test_db test_data.csv -t 10 -n 8 -i 100000')
# os.system('cov-test-predict test test_pred.csv 1000 100')
# 
# # ra_data = pl.csv2rec('test_data.csv')
# # ra_pred = pl.csv2rec('test_pred.csv')
Ejemplo n.º 25
0
 def p_pred(pi=pi_latent, n=n_nonzero):
     return mc.rbinomial(n, pi) / (1. * n)
Ejemplo n.º 26
0
import pylab as pl
import pymc as mc

import dismod3
import book_graphics
reload(book_graphics)

# set font
book_graphics.set_font()

### @export 'binomial-model-funnel'
pi_binomial_funnel = .004

n = pl.exp(mc.rnormal(10, 2**-2, size=10000))
k = mc.rbinomial(pl.array(n.round(), dtype=int), pi_binomial_funnel)
r = k / n

pl.figure(**book_graphics.half_page_params)
pl.vlines([pi_binomial_funnel],
          .1 * n.min(),
          10 * n.max(),
          linewidth=2,
          linestyle='-',
          color='w',
          zorder=9,
          label='_nolegend_')
pl.vlines([pi_binomial_funnel],
          .1 * n.min(),
          10 * n.max(),
          linewidth=1,
Ejemplo n.º 27
0
 def p_pred(pi=pi_latent, n=n_nonzero):
     return mc.rbinomial(n, pi) / (1.0 * n)
Ejemplo n.º 28
0
def pred(alpha=alpha, beta=beta, phi=phi):
    if pl.rand() < phi:
        return 0
    else:
        return mc.rbinomial(n_pred, mc.rbeta(alpha, beta)) / float(n_pred)
Ejemplo n.º 29
0
    ###############################
    # Simulate data.
    ###############################

    # How many datapoints?
    n = 250

    # Put down a random scattering of data locations on the unit sphere.
    X = spherical.well_spaced_mesh(n)

    # Generate some binomial data. Prevalence is going to be high at the equator, low at the poles.
    p_true = np.exp(-X[:,2]**2*5)

    # Number sampled and number positive.
    N = 100
    k = pm.rbinomial(N, p_true)
    
    ################################
    # Fit the model.
    ################################
    M = pm.MCMC(make_model(N,k,X,cholmod,spherical),db='hdf5')
    scalar_variables = filter(lambda x:not x.observed, [M.m, M.amp, M.kappa])
    if len(scalar_variables)>0:    
        M.use_step_method(pm.AdaptiveMetropolis, scalar_variables)
    # Comment to use the default AdaptiveMetropolis step method.
    # GMRFMetropolis kind of scales better to high dimensions, but may mix worse in low.
    M.use_step_method(pymc_objects.GMRFMetropolis, M.S, M.likelihood_string, M.M, M.Q, M.likelihood_variables, n_sweeps=100)

    M.isample(1000,0,10)
    
    ################################
Ejemplo n.º 30
0
 def f(sp_sub, a, n=n):
     return pm.rbinomial(n=n, p=pm.stukel_invlogit(sp_sub, *a))
Ejemplo n.º 31
0
def deaths_sim(n=n, p=theta):
    """deaths_sim = rbinomial(n, p)"""
    return pm.rbinomial(n, p)
Ejemplo n.º 32
0
def create_test_rates(rate_function_str='(age/100.0)**2', rate_type='prevalence data',
                      age_list=None, num_subjects=1000):
    import dismod3.models as models

    if not age_list:
        #age_list = range(0,101,10)
        age_list = np.random.random_integers(0,90,20)

    params = {}
    params['disease'], flag = models.Disease.objects.get_or_create(name='Test Disease')
    params['region'], flag = models.Region.objects.get_or_create(name='World')
    params['rate_type'] = rate_type
    params['sex'] = 'total'
    params['country'] = 'Canada'
    params['epoch_start'] = 2000
    params['epoch_end'] = 2000
    

    rate_list = []

    # TODO: make this safe and robust
    if isinstance(rate_function_str, str):
        rate_function = eval('lambda age: %s'%rate_function_str)
    else:
        from scipy.interpolate import interp1d

        rf_vals = np.array(rate_function_str) # it is actually an Nx2 array
        rate_function = interp1d(rf_vals[:,0], rf_vals[:,1], kind='cubic')


    rate_vec = np.array([rate_function(a) for a in range(101)])
    
    for a in age_list:
        #params['age_start'] = a-5
        params['age_start'] = a

        #params['age_end'] = params['age_start']
        #params['age_end'] = a+5
        params['age_end'] = np.random.random_integers(a, 100)

        params['denominator'] = num_subjects
        params['numerator'] = 0
        
        new_rate = models.Rate(**params)
        new_rate.params['Notes'] = 'Simulated data, created using function %s' % rate_function_str
        new_rate.params['Urbanicity'] = (np.random.randn() > 0) and 'Urban' or 'Rural'
        
        new_rate.save()

        p = probabilistic_utils.rate_for_range(rate_vec, new_rate.age_start, new_rate.age_end, new_rate.population())

        # adjust p to make data heterogeneous according to 'Urbanicity' covariate
        if new_rate.params['Urbanicity'] == 'Urban':
            p *= 1.5

        #multiplicative_noise = 1.
        #multiplicative_noise = 1 + 0.1*np.random.randn()
        #new_rate.numerator = multiplicative_noise * \
        #                     new_rate.denominator * probabilistic_utils.rate_for_range(rate_vec, new_rate.age_start, new_rate.age_end, new_rate.population())
        new_rate.numerator = mc.rbinomial(new_rate.denominator, p)

        new_rate.save()

        
        rate_list.append(new_rate)

    return rate_list, rate_function