Example #1
1
def logit_normal_draw(cf_mean, std, N, J):
    std = pl.array(std)
    if mc.__version__ == '2.0rc2': # version on Omak 
        X = [mc.invlogit(mc.rnormal(mu=cf_mean, tau=std**-2)) for n in range(N)]
        Y = pl.array(X)
    else: 
        X = mc.rnormal(mu=cf_mean, tau=std**-2, size=(N,J))
        Y = mc.invlogit(X)
    return Y
Example #2
0
def logit_normal_draw(cf_mean, std, N, J):
    std = pl.array(std)
    if mc.__version__ == '2.0rc2':  # version on Omak
        X = [
            mc.invlogit(mc.rnormal(mu=cf_mean, tau=std**-2)) for n in range(N)
        ]
        Y = pl.array(X)
    else:
        X = mc.rnormal(mu=cf_mean, tau=std**-2, size=(N, J))
        Y = mc.invlogit(X)
    return Y
Example #3
0
def ages_and_data(N_exam, f_samp, correction_factor_array, age_lims):
    """Called by pred_samps. Simulates ages of survey participants and data given f."""

    N_samp = len(f_samp)
    N_age_samps = correction_factor_array.shape[1]

    # Get samples for the age distribution at the observation points.
    age_distribution = []
    for i in xrange(N_samp):
        l = age_lims[i]
        age_distribution.append(S_trace[np.random.randint(S_trace.shape[0]), 0,
                                        l[0]:l[1] + 1])
        age_distribution[-1] /= np.sum(age_distribution[-1])

    # Draw age for each individual, draw an age-correction profile for each location,
    # compute probability of positive for each individual, see how many individuals are
    # positive.
    A = []
    pos = []
    for s in xrange(N_samp):
        A.append(
            np.array(pm.rcategorical(age_distribution[s], size=N_exam[s]),
                     dtype=int) + age_lims[s][0])
        P_samp = pm.invlogit(f_samp[s].ravel(
        )) * correction_factor_array[:, np.random.randint(N_age_samps)][A[-1]]
        pos.append(pm.rbernoulli(P_samp))

    return A, pos, age_distribution
Example #4
0
def generate_synthetic_data(truth, key, d):
    """ create simulated data"""
    a0 = d['age_start']
    a1 = d['age_end']
    age_weights = d['age_weights']
        
    d.update(condition='type_2_diabetes',
             year_start=y,
             year_end=y)

    p0 = dismod3.utils.rate_for_range(truth[key], range(a0, a1 + 1), np.ones(a1 + 1 - a0)/(a1+1-a0))
    p0 = dismod3.utils.trim(p0, 1.e-6, 1. - 1.e-6)

    # TODO: make beta dispersion study level (instead of datum level)
    # p1 = mc.rbeta(p0 * dispersion, (1 - p0) * dispersion)
    p1 = p0

    # TODO: add additional covariates
    if key.find('prevalence') != -1:
        if random.random() < .1:
            d['self-reported'] = True
            p1 = mc.invlogit(mc.logit(p1) - .2)
        else:
            d['self-reported'] = False
    
    #p2 = mc.rbinomial(n, p1) / n
    p2 = float(p1)
    
    d['value'] = p2
    d['standard_error'] = .0001

    return d
Example #5
0
def simdata_postproc(sp_sub, survey_plan):
    """
    This function should take a value for the Gaussian random field in the submodel 
    sp_sub, evaluated at the survey plan locations, and return a simulated dataset.
    """
    p = pm.invlogit(sp_sub)
    n = survey_plan.n
    return pm.rbinomial(n, p)
Example #6
0
def sim_data(N,
             true_cf=[[.3, .6, .1], [.3, .5, .2]],
             true_std=[[.2, .05, .05], [.3, 0.1, 0.1]],
             sum_to_one=True):
    """ 
    Create an NxTxJ matrix of simulated data (T is determined by the length 
    of true_cf, J by the length of the elements of true_cf). 

    true_cf - a list of lists of true cause fractions (each must sum to one)
    true_std - a list of lists of the standard deviations corresponding to the true csmf's 
             for each time point. Can either be a list of length J inside a list of length
             1 (in this case, the same standard deviation is used for all time points) or 
             can be T lists of length J (in this case, the a separate standard deviation 
             is specified and used for each time point). 
    """

    if sum_to_one == True:
        assert pl.allclose(pl.sum(true_cf, 1),
                           1), 'The sum of elements of true_cf must equal 1'
    T = len(true_cf)
    J = len(true_cf[0])

    ## if only one std provided, duplicate for all time points
    if len(true_std) == 1 and len(true_cf) > 1:
        true_std = [true_std[0] for i in range(len(true_cf))]

    ## transform the mean and std to logit space
    transformed_std = []
    for t in range(T):
        pi_i = pl.array(true_cf[t])
        sigma_pi_i = pl.array(true_std[t])
        transformed_std.append(
            ((1 / (pi_i * (pi_i - 1)))**2 * sigma_pi_i**2)**0.5)

    ## find minimum standard deviation (by cause across time) and draw from this
    min = pl.array(transformed_std).min(0)
    common_perturbation = [
        pl.ones([T, J]) * mc.rnormal(mu=0, tau=min**-2) for n in range(N)
    ]

    ## draw from remaining variation
    tau = pl.array(transformed_std)**2 - min**2
    tau[tau == 0] = 0.000001
    additional_perturbation = [
        [mc.rnormal(mu=0, tau=tau[t]**-1) for t in range(T)] for n in range(N)
    ]

    result = pl.zeros([N, T, J])
    for n in range(N):
        result[n, :, :] = [
            mc.invlogit(
                mc.logit(true_cf[t]) + common_perturbation[n][t] +
                additional_perturbation[n][t]) for t in range(T)
        ]

    return result
def known_age_corr_likelihoods_f(pos, A, fac_array, f_mesh, nug, type=None):
    """
    Computes spline representations over P_mesh for the likelihood 
    of N_pos | N_exam, A
    """

    # TODO: Optimize large-N case using CLT of some kind.

    # Allocate work and output arrays.
    N_recs = len(A)

    likelihoods = empty((N_recs, len(f_mesh)))
    likes_now = empty((fac_array.shape[1], len(f_mesh)), dtype=float128)
    splreps = []

    p1 = invlogit(f_mesh)

    # For each record
    for i in xrange(N_recs):
        posi = pos[i]
        Ai = A[i]
        spi = np.sum(posi)
        negi = 1. - posi

        if type is None:
            if len(Ai) < 100:
                fn = outer_small
            else:
                fn = outer_large
        elif type == 's':
            fn = outer_small
        else:
            fn = outer_large

        likelihoods[i, :] = fn(p1, fac_array, Ai, spi, posi, negi, likes_now)

        # Clean out occasional infinities on the edges.
        good_indices = where(1 - isinf(likelihoods[i, :]))[0]

        # Compute spline representations.
        this_splrep = interp.splrep(x=f_mesh[good_indices],
                                    y=likelihoods[i, good_indices].squeeze())

        def this_fun(x,
                     sp=this_splrep,
                     Pml=f_mesh[good_indices].min(),
                     Pmh=f_mesh[good_indices].max()):
            out = np.atleast_1d(interp.splev(x, sp))
            if np.any(x < Pml) or np.any(x > Pmh):
                out[np.where(x < Pml)] = -np.Inf
                out[np.where(x > Pmh)] = -np.Inf
            return out.reshape(np.shape(x))

        splreps.append(this_fun)
    return splreps
def p_wells(base_fx=base_fx,
            batch_fx=batch_fx,
            plate_fx=plate_fx,
            batchrow_fx=batchrow_fx,
            batchcol_fx=batchcol_fx,
            treatment_fx=treatment_fx):
    # use this ordering to make everything turn into an ArrayContainer
    return invlogit(treatment_fx[treatment_idxs] + 
                    base_fx +
                    batch_fx[batch_idxs] +
                    plate_fx[plate_idxs] +
                    batchrow_fx[batchrow_idxs] +
                    batchcol_fx[batchcol_idxs])
Example #9
0
def PR_samps(mesh, Ms, Cs, Vs, ind, facs):
    """
    Converts a mean function, covariance function, nugget and array of correction factors
    to a sample for the average of parasite rate over a given spatiotemporal mesh.
    """
    nm = mesh.shape[0]        
    samps = np.empty((len(ind), nm))
    for i in ind:
        C = Cs[i](mesh, mesh)
        C[::nm+1] += Vs[i]
        samps[i,:] = pm.invlogit(pm.mv_normal_cov(Ms[i](mesh), C).ravel()) * facs[A[i]]

    return np.mean(samps,axis=1)
Example #10
0
def known_age_corr_likelihoods_f(pos, A, fac_array, f_mesh, nug, type=None):
    """
    Computes spline representations over P_mesh for the likelihood 
    of N_pos | N_exam, A
    """

    # TODO: Optimize large-N case using CLT of some kind.

    # Allocate work and output arrays.
    N_recs = len(A)

    likelihoods = empty((N_recs, len(f_mesh)))
    likes_now = empty((fac_array.shape[1], len(f_mesh)), dtype=float128)
    splreps = []
    
    p1 = invlogit(f_mesh)
    
    # For each record
    for i in xrange(N_recs):
        posi = pos[i]
        Ai = A[i]
        spi = np.sum(posi)
        negi = 1.-posi

        if type is None:
            if len(Ai) < 100:
                fn = outer_small
            else:
                fn = outer_large
        elif type=='s':
            fn = outer_small
        else:
            fn = outer_large

        likelihoods[i,:] = fn(p1, fac_array, Ai, spi, posi, negi, likes_now)

        # Clean out occasional infinities on the edges.
        good_indices = where(1-isinf(likelihoods[i,:]))[0]

        # Compute spline representations.
        this_splrep = interp.splrep(x=f_mesh[good_indices], y=likelihoods[i,good_indices].squeeze())
        def this_fun(x, sp=this_splrep, Pml=f_mesh[good_indices].min(), Pmh=f_mesh[good_indices].max()):
            out = np.atleast_1d(interp.splev(x, sp))
            if np.any(x<Pml) or np.any(x>Pmh):
                out[np.where(x<Pml)] = -np.Inf
                out[np.where(x>Pmh)] = -np.Inf
            return out.reshape(np.shape(x))

        splreps.append(this_fun)        
    return splreps
Example #11
0
def PR_samps(mesh, Ms, Cs, Vs, ind, facs):
    """
    Converts a mean function, covariance function, nugget and array of correction factors
    to a sample for the average of parasite rate over a given spatiotemporal mesh.
    """
    nm = mesh.shape[0]
    samps = np.empty((len(ind), nm))
    for i in ind:
        C = Cs[i](mesh, mesh)
        C[::nm + 1] += Vs[i]
        samps[i, :] = pm.invlogit(pm.mv_normal_cov(Ms[i](mesh),
                                                   C).ravel()) * facs[A[i]]

    return np.mean(samps, axis=1)
Example #12
0
def sim_data(N, true_cf=[[.3, .6, .1],
                           [.3, .5, .2]],
             true_std=[[.2, .05, .05], 
                       [.3, 0.1, 0.1]],
             sum_to_one=True):
    """ 
    Create an NxTxJ matrix of simulated data (T is determined by the length 
    of true_cf, J by the length of the elements of true_cf). 

    true_cf - a list of lists of true cause fractions (each must sum to one)
    true_std - a list of lists of the standard deviations corresponding to the true csmf's 
             for each time point. Can either be a list of length J inside a list of length
             1 (in this case, the same standard deviation is used for all time points) or 
             can be T lists of length J (in this case, the a separate standard deviation 
             is specified and used for each time point). 
    """

    if sum_to_one == True: 
        assert pl.allclose(pl.sum(true_cf, 1), 1), 'The sum of elements of true_cf must equal 1' 
    T = len(true_cf)
    J = len(true_cf[0])
    
    ## if only one std provided, duplicate for all time points 
    if len(true_std)==1 and len(true_cf)>1: 
        true_std = [true_std[0] for i in range(len(true_cf))]    

    ## transform the mean and std to logit space
    transformed_std = []
    for t in range(T): 
        pi_i = pl.array(true_cf[t])
        sigma_pi_i = pl.array(true_std[t])
        transformed_std.append( ((1/(pi_i*(pi_i-1)))**2 * sigma_pi_i**2)**0.5 )
            
    ## find minimum standard deviation (by cause across time) and draw from this 
    min = pl.array(transformed_std).min(0)
    common_perturbation = [pl.ones([T,J])*mc.rnormal(mu=0, tau=min**-2) for n in range(N)]
    
    ## draw from remaining variation 
    tau=pl.array(transformed_std)**2 - min**2
    tau[tau==0] = 0.000001
    additional_perturbation = [[mc.rnormal(mu=0, tau=tau[t]**-1) for t in range(T)] for n in range(N)]

    result = pl.zeros([N, T, J])
    for n in range(N):
        result[n, :, :] = [mc.invlogit(mc.logit(true_cf[t]) + common_perturbation[n][t] + additional_perturbation[n][t]) for t in range(T)]

    return result
Example #13
0
    def mortality(self, key="all-cause_mortality", data=None):
        """ Calculate the all-cause mortality rate for the
        region and sex of disease_model, and return it
        in an array corresponding to age_mesh

        Parameters
        ----------
        key : str, optional
          of the form 'all-cause_mortality+gbd_region+year+sex'
        data: list, optional
          the data list to extract all-cause mortality from
        """
        if self.params.get("initial_value", {}).has_key(key):
            return self.get_initial_value(key)

        if not data:
            data = self.filter_data("all-cause_mortality data")

        if len(data) == 0:
            return NEARLY_ZERO * np.ones(len(self.get_estimate_age_mesh()))
        else:
            M, C = uninformative_prior_gp(c=-1.0, scale=300.0)
            age = []
            val = []
            V = []
            for d in data:
                scale = self.extract_units(d)
                a0 = d.get("age_start", MISSING)
                a1 = d.get("age_end", MISSING)
                y = self.value_per_1(d)
                se = self.se_per_1(d)

                if se == MISSING:
                    se = 0.01
                if MISSING in [a0, a1, y]:
                    continue

                age.append(0.5 * (a0 + a1))
                val.append(y + 0.00001)
                V.append(se ** 2.0)

            if len(data) > 0:
                gp.observe(M, C, age, mc.logit(val), V)

            normal_approx_vals = mc.invlogit(M(self.get_estimate_age_mesh()))
            self.set_initial_value(key, normal_approx_vals)
            return self.get_initial_value(key)
Example #14
0
def f_ifr_factory(df_ifr, logit_shift):
    """Create age-interpolating IFR function

    Parameters
    ----------
    df_ifr : pd.DataFrame with columns for age_mid, lowest_ifr
    logit_shift : float, shift of value in logit-space

    Results
    -------
    returns function that maps from age to IFR
    """
    return scipy.interpolate.interp1d(df_ifr.age_mid.values,
                                      pm.invlogit(df_ifr.lowest_ifr +
                                                  logit_shift),
                                      kind='linear',
                                      fill_value='extrapolate')
Example #15
0
def normal_approx(asrf):
    """
    This 'normal approximation' of the age-specific rate function is
    formed by using each rate to produce an estimate of the
    age-specific rate, and then saying that that logit of the true
    rate function is a gaussian process and these age-specific rates
    are observations of this gaussian process.

    This is less valid and less accurate than using mcmc or map on the
    vars produced by the model_rate_list method below, but maybe it
    will be faster.
    """
    M,C = uninformative_prior_gp()

    # use prior to set rate near zero as requested
    for prior_str in asrf.fit.get('priors', '').split('\n'):
        prior = prior_str.split()
        if len(prior) > 0 and prior[0] == 'zero':
            age_start = int(prior[1])
            age_end = int(prior[2])

            gp.observe(M, C, range(age_start, age_end+1), [-10.], [0.])
               
    for r in asrf.rates.all():
        mesh, obs, V = logit_rate_from_range(r)

        # make sure that there is something to observe
        if mesh == []:
            continue
        
        # uncomment the following line to make more inferences than
        # are valid from the data
        #gp.observe(M, C, mesh, obs, V)

        # uncomment the following 2 lines to make less inferences than
        # possible: it may be better to waste information than have
        # false confidence
        ii = len(mesh)/2
        gp.observe(M, C, [mesh[ii]], [obs[ii]], [V[ii]])

    x = asrf.fit['out_age_mesh']
    na_rate = mc.invlogit(M(x))
    asrf.fit['normal_approx'] = list(na_rate)
    asrf.save()

    return M, C
Example #16
0
    def mu_age_p(logit_C0=logit_C0, i=rate['i']['mu_age'], r=rate['r']['mu_age'], f=rate['f']['mu_age']):

        # for acute conditions, it is silly to use ODE solver to
        # derive prevalence, and it can be approximated with a simple
        # transformation of incidence
        if r.min() > 5.99:
            return i / (r + m_all + f)

        C0 = mc.invlogit(logit_C0)

        x = np.hstack((i, r, f, 1-C0, C0))
        y = fun.forward(0, x)

        susceptible = y[:N]
        condition = y[N:]

        p = condition / (susceptible + condition)
        p[np.isnan(p)] = 0.
        return p
Example #17
0
File: ism.py Project: peterhm/gbd
    def mu_age_p(logit_C0=logit_C0, i=rate["i"]["mu_age"], r=rate["r"]["mu_age"], f=rate["f"]["mu_age"]):

        # for acute conditions, it is silly to use ODE solver to
        # derive prevalence, and it can be approximated with a simple
        # transformation of incidence
        if r.min() > 5.99:
            return i / (r + m_all + f)

        C0 = mc.invlogit(logit_C0)

        x = pl.hstack((i, r, f, 1 - C0, C0))
        y = fun.forward(0, x)

        susceptible = y[:N]
        condition = y[N:]

        p = condition / (susceptible + condition)
        p[pl.isnan(p)] = 0.0
        return p
Example #18
0
def reduce_realizations(filename, reduce_fns, slices, a_lo, a_hi, n_per):
    """
    Generates n_per * len(filename.root.realizations) realizations, 
    on the space-time slice defined by slice (a tuple of three slices) 
    and reduces them according to the function reduce. Reduce_fns should 
    be a list of Python functions of the form
    
    reduce(this_PR_chunk, product_sofar=None)
    
    and incorporate this_realization into product_sofar in the desired
    way. It should be robust to the product_sofar=None case, of course.
    a_lo and a_hi are the limits of the age range.
    """
    slices = tuple(slices)
    hf = tb.openFile(filename)
    hr = hf.root
    n_realizations = len(hr.realizations)
    products = dict(zip(reduce_fns, [None] * len(reduce_fns)))

    N_facs = int(1e5)

    # Get nugget variance and age-correction factors
    V = hr.PyMCsamples.col('V')[:]
    facs = mbgw.correction_factors.age_corr_factors_from_limits(
        a_lo, a_hi, N_facs)

    for i in xrange(n_realizations):
        # Pull out parasite rate chunk
        tot_slice = (slice(i, i + 1, 1), ) + slices
        f_chunk = hr.realizations[tot_slice].squeeze()
        for j in xrange(n_per):
            chunk = f_chunk + np.random.normal(
                loc=0, scale=np.sqrt(V[i]), size=f_chunk.shape)
            chunk = pm.invlogit(chunk)
            chunk *= facs[np.random.randint(N_facs, size=np.prod(chunk.shape))]
            chunk = chunk.reshape(f_chunk.shape)

            for f in reduce_fns:
                product_sofar = products[f]
                products[f] = f(chunk, product_sofar)

    return products
Example #19
0
    def mu_age_p(logit_C0=logit_C0,
                 i=rate['i']['mu_age'],
                 r=rate['r']['mu_age'],
                 f=rate['f']['mu_age']):

        # for acute conditions, it is silly to use ODE solver to
        # derive prevalence, and it can be approximated with a simple
        # transformation of incidence
        if r.min() > 5.99:
            return i / (r + m_all + f)

        C0 = float(mc.invlogit(logit_C0))

        susceptible = np.zeros(len(ages))
        condition = np.zeros(len(ages))
        dismod_mr.model.ode.ode_function(susceptible, condition, num_step,
                                         ages, m_all, i, r, f, 1 - C0, C0)

        p = condition / (susceptible + condition)
        p[np.isnan(p)] = 0.
        return p
def reduce_realizations(filename, reduce_fns, slices, a_lo, a_hi, n_per):
    """
    Generates n_per * len(filename.root.realizations) realizations, 
    on the space-time slice defined by slice (a tuple of three slices) 
    and reduces them according to the function reduce. Reduce_fns should 
    be a list of Python functions of the form
    
    reduce(this_PR_chunk, product_sofar=None)
    
    and incorporate this_realization into product_sofar in the desired
    way. It should be robust to the product_sofar=None case, of course.
    a_lo and a_hi are the limits of the age range.
    """
    slices = tuple(slices)
    hf = tb.openFile(filename)
    hr = hf.root
    n_realizations = len(hr.realizations)
    products = dict(zip(reduce_fns, [None]*len(reduce_fns)))
    
    N_facs = int(1e5)
    
    # Get nugget variance and age-correction factors
    V = hr.PyMCsamples.col('V')[:]
    facs = mbgw.correction_factors.age_corr_factors_from_limits(a_lo, a_hi, N_facs)
    
    for i in xrange(n_realizations):
        # Pull out parasite rate chunk
        tot_slice = (slice(i,i+1,1),) + slices
        f_chunk = hr.realizations[tot_slice].squeeze()
        for j in xrange(n_per):
            chunk = f_chunk + np.random.normal(loc=0, scale=np.sqrt(V[i]), size=f_chunk.shape)
            chunk = pm.invlogit(chunk)
            chunk *= facs[np.random.randint(N_facs, size=np.prod(chunk.shape))]
            chunk = chunk.reshape(f_chunk.shape)
            
            for f in reduce_fns:
                product_sofar = products[f]
                products[f] = f(chunk, product_sofar)
    
    return products
Example #21
0
def ages_and_data(N_exam, f_samp, correction_factor_array, age_lims):
    """Called by pred_samps. Simulates ages of survey participants and data given f."""
    
    N_samp = len(f_samp)
    N_age_samps = correction_factor_array.shape[1]
    
    # Get samples for the age distribution at the observation points.
    age_distribution = []
    for i in xrange(N_samp):
        l = age_lims[i]
        age_distribution.append(S_trace[np.random.randint(S_trace.shape[0]),0,l[0]:l[1]+1])
        age_distribution[-1] /= np.sum(age_distribution[-1])
    
    # Draw age for each individual, draw an age-correction profile for each location,
    # compute probability of positive for each individual, see how many individuals are
    # positive.
    A = []
    pos = []
    for s in xrange(N_samp):
        A.append(np.array(pm.rcategorical(age_distribution[s], size=N_exam[s]),dtype=int) + age_lims[s][0])
        P_samp = pm.invlogit(f_samp[s].ravel())*correction_factor_array[:,np.random.randint(N_age_samps)][A[-1]]
        pos.append(pm.rbernoulli(P_samp))
    
    return A, pos, age_distribution
 def this_fun(x, p2=p2, p3=p3, negi=negi, posi=posi, Ai=Ai):
     p1 = np.log(invlogit(x))
     return p1 * spi + p3 + cfh(p1, p2, negi)
Example #23
0
def fit_emp_prior(dm, param_type):
    """ Generate an empirical prior distribution for a single disease parameter

    Parameters
    ----------
    dm : dismod3.DiseaseModel
      The object containing all the data, (hyper)-priors, and additional
      information (like input and output age-mesh).

    param_type : str, one of 'incidence', 'prevalence', 'remission', 'excess-mortality'
      The disease parameter to work with

    Notes
    -----
    The results of this fit are stored in the disease model's params
    hash for use when fitting multiple paramter types together

    Example
    -------
    $ python2.5 gbd_fit.py 175 -t incidence -p 'zero 0 4, zero 41 100, smooth 25' # takes 7m to run
    """

    data = [d for d in dm.data if clean(d['data_type']).find(param_type) != -1]

    # don't do anything if there is no data for this parameter type
    if len(data) == 0:
        return
    
    dm.fit_initial_estimate(param_type, data)

    dm.vars = setup(dm, param_type, data)
    
    # fit the model
    dm.map = mc.MAP(dm.vars)
    try:
        dm.map.fit(method='fmin_powell', iterlim=500, tol=.00001, verbose=1)
    except KeyboardInterrupt:
        print 'User halted optimization routine before optimal value found'
    
    # save the results in the param_hash
    dm.clear_empirical_prior()
    prior_vals = dict(
        alpha=list(dm.vars['region_coeffs'].value),
        beta=list(dm.vars['study_coeffs'].value),
        gamma=list(dm.vars['age_coeffs'].value),
        sigma=float(dm.vars['dispersion'].value))
    dm.set_empirical_prior(param_type, prior_vals)

    dispersion = prior_vals['sigma']
    for r in dismod3.gbd_regions:
        for y in dismod3.gbd_years:
            for s in dismod3.gbd_sexes:
                key = dismod3.gbd_key_for(param_type, r, y, s)
                logit_mu = predict_logit_rate(regional_covariates(key), **prior_vals)
                mu = mc.invlogit(logit_mu)
                dm.set_initial_value(key, mu)
                dm.set_mcmc('emp_prior_mean', key, mu)
                dm.set_mcmc('emp_prior_lower_ui', key, mc.invlogit(logit_mu - 1.96*dispersion))
                dm.set_mcmc('emp_prior_upper_ui', key, mc.invlogit(logit_mu + 1.96*dispersion))

    key = dismod3.gbd_key_for(param_type, 'world', 1997, 'total')
    logit_mu = predict_logit_rate(regional_covariates(key), **prior_vals)
    mu = mc.invlogit(logit_mu)
    dm.set_initial_value(key, mu)
    dm.set_mcmc('emp_prior_mean', key, mu)
    dm.set_mcmc('emp_prior_lower_ui', key, mc.invlogit(logit_mu - 1.96*dispersion))
    dm.set_mcmc('emp_prior_upper_ui', key, mc.invlogit(logit_mu + 1.96*dispersion))
Example #24
0
def theta(a=alpha, b=beta):
    """theta = logit^{−1}(a+b)"""
    return pymc.invlogit(a + b * x)
# <codecell>

### hyperpriors
d = mc.Normal('d', 0., 1.e-6, value=0.)
tau = mc.Gamma('tau', 1.e-3, 1.e-3, value=1.)

sigma = mc.Lambda('sigma', lambda tau=tau: tau**-.5)
delta_new = mc.Normal('delta_new', d, tau, value=0.)


### priors
mu = [mc.Normal('mu_%d'%i, 0., 1.e-5, value=0.) for i in range(N)]
delta = [mc.Normal('delta_%d'%i, d, tau, value=0.) for i in range(N)]

p_c = mc.Lambda('p_c', lambda mu=mu: mc.invlogit(mu))
p_t = mc.Lambda('p_t', lambda mu=mu, delta=delta: mc.invlogit(array(mu)+delta))


### likelihood
r_c = mc.Binomial('r_c', n_c_obs, p_c, value=r_c_obs, observed=True)
r_t = mc.Binomial('r_t', n_t_obs, p_t, value=r_t_obs, observed=True)

# <markdowncell>

# BUGS uses Gibbs steps automatically, so it only takes 10000 steps of MCMC after a 1000 step burn in for this model in their example.
# 
# PyMC only uses Gibbs steps if you set them up yourself, and it uses Metropolis steps by default.  So 10000 steps
# go by more quickly, but the chain takes longer to converge to the stationary distribution.

# <codecell>
Example #26
0
File: mcmc.py Project: thouska/pymc
Msurf = zeros(data.shape)
E2surf = zeros(data.shape)

# Get E[v] and E[v**2] over the entire posterior
for i in xrange(n):
    # Reset all variables to their values at frame i of the trace
    DuffySampler.remember(0, i)
    # Evaluate the observed mean
    store_africa_val(DuffySampler.sp_sub_b.M_obs.value, dpred, africa)
    Msurf_b, Vsurf_b = pm.gp.point_eval(DuffySampler.sp_sub_b.M_obs.value, DuffySampler.sp_sub_b.C_obs.value, dpred)
    Msurf_s, Vsurf_s = pm.gp.point_eval(DuffySampler.sp_sub_s.M_obs.value, DuffySampler.sp_sub_s.C_obs.value, dpred)
    Vsurf_b += DuffySampler.V_b.value
    Vsurf_s += DuffySampler.V_s.value

    freq_b = pm.invlogit(Msurf_b + pm.rnormal(0, 1) * np.sqrt(Vsurf_b))
    freq_s = pm.invlogit(Msurf_s + pm.rnormal(0, 1) * np.sqrt(Vsurf_s))

    samp_i = (freq_b * freq_s + (1 - freq_b) * DuffySampler.p1.value) ** 2

    Msurf[where_unmasked] += samp_i / float(n)
    # Evaluate the observed covariance with one argument
    E2surf[where_unmasked] += samp_i ** 2 / float(n)

# Get the posterior variance and standard deviation
Vsurf = E2surf - Msurf ** 2
SDsurf = sqrt(Vsurf)

Msurf = ma.masked_array(Msurf, mask=covariate_raster.root.mask[:])
SDsurf = ma.masked_array(SDsurf, mask=covariate_raster.root.mask[:])
covariate_raster.close()
Example #27
0
def fit_without_confrontation(id, region, sex, year):
    """ Fit posterior of specified region/sex/year for specified model
    without trying to integrate conflicting sources of data

    Parameters
    ----------
    id : int
      The model id number for the job to fit
    region : str
      From dismod3.settings.gbd_regions, but clean()-ed
    sex : str, from dismod3.settings.gbd_sexes
    year : str, from dismod3.settings.gbd_years
    """

    ## load model
    dm = dismod3.load_disease_model(id)

    ## separate out prevalence and relative-risk data
    prev_data = [
        d for d in dm.data
        if dm.relevant_to(d, 'prevalence', region, year, sex)
    ]
    rr_data = [
        d for d in dm.data
        if dm.relevant_to(d, 'relative-risk', region, year, sex)
    ]
    dm.data = [d for d in dm.data if not d in prev_data and not d in rr_data]

    ### setup the generic disease model (without prevalence data)
    import dismod3.gbd_disease_model as model
    keys = dismod3.utils.gbd_keys(region_list=[region],
                                  year_list=[year],
                                  sex_list=[sex])
    dm.calc_effective_sample_size(dm.data)
    dm.vars = model.setup(dm, keys)

    ## override the birth prevalence prior, based on the withheld prevalence data
    logit_C_0 = dm.vars[dismod3.utils.gbd_key_for('bins', region, year,
                                                  sex)]['initial']['logit_C_0']
    assert len(prev_data) == 1, 'should be a single prevalance datum'
    d = prev_data[0]

    mu_logit_C_0 = mc.logit(dm.value_per_1(d) + dismod3.settings.NEARLY_ZERO)
    lb, ub = dm.bounds_per_1(d)
    sigma_logit_C_0 = (mc.logit(ub + dismod3.settings.NEARLY_ZERO) -
                       mc.logit(lb + dismod3.settings.NEARLY_ZERO)) / (2 *
                                                                       1.96)
    print 'mu_C_0_pri:', mc.invlogit(mu_logit_C_0)
    print 'ui_C_0_pri:', lb, ub

    # override the excess-mortality, based on the relative-risk data
    mu_rr = 1.01 * np.ones(dismod3.settings.MAX_AGE)
    sigma_rr = .01 * np.ones(dismod3.settings.MAX_AGE)
    for d in rr_data:
        mu_rr[d['age_start']:(d['age_end'] + 1)] = dm.value_per_1(d)
        sigma_rr[d['age_start']:(d['age_end'] + 1)] = dm.se_per_1(d)
    print 'mu_rr:', mu_rr.round(2)
    #print 'sigma_rr:', sigma_rr.round(2)

    log_f = dm.vars[dismod3.utils.gbd_key_for('excess-mortality', region, year,
                                              sex)]['age_coeffs']
    log_f_mesh = log_f.parents['gamma_mesh']
    param_mesh = log_f.parents['param_mesh']

    m_all = dm.vars[dismod3.utils.gbd_key_for('all-cause_mortality', region,
                                              year, sex)]
    mu_log_f = np.log((mu_rr - 1) * m_all)
    sigma_log_f = 1 / ((mu_rr - 1) * m_all) * sigma_rr * m_all
    print 'mu_log_f:', mu_log_f.round(2)[param_mesh]
    print 'sigma_log_f:', sigma_log_f.round(2)[param_mesh]

    ### fit the model using Monte Carlo simulation (shoehorned into the MCMC framework of PyMC)
    dm.mcmc = mc.MCMC(dm.vars)
    dm.mcmc.use_step_method(SampleFromNormal,
                            logit_C_0,
                            mu=mu_logit_C_0,
                            tau=sigma_logit_C_0**-2)
    dm.mcmc.use_step_method(SampleFromNormal,
                            log_f_mesh,
                            mu=mu_log_f[param_mesh],
                            tau=sigma_log_f[param_mesh]**-2)
    for stoch in dm.mcmc.stochastics:
        dm.mcmc.use_step_method(mc.NoStepper, stoch)
    dm.mcmc.sample(1000, verbose=dismod3.settings.ON_SGE)

    #print 'mu_C_0_post:', mc.invlogit(logit_C_0.stats()['mean']).round(2)
    #print 'ui_C_0_post:', mc.invlogit(logit_C_0.stats()['95% HPD interval']).round(2)
    #print 'mu_rr_post:', dm.vars[dismod3.utils.gbd_key_for('relative-risk', region, year, sex)]['rate_stoch'].stats()['mean'].round(2)
    print 'mu_log_f_mesh_post:', log_f_mesh.stats()['mean'].round(2)
    print 'mu_f_post:', dm.vars[dismod3.utils.gbd_key_for(
        'excess-mortality', region, year,
        sex)]['rate_stoch'].stats()['mean'].round(2)

    for k in keys:
        t, r, y, s = dismod3.utils.type_region_year_sex_from_key(k)

        if t in [
                'incidence', 'prevalence', 'remission', 'excess-mortality',
                'mortality', 'prevalence_x_excess-mortality'
        ]:
            dismod3.neg_binom_model.store_mcmc_fit(dm, k, dm.vars[k])

        elif t in ['relative-risk', 'duration', 'incidence_x_duration']:
            dismod3.normal_model.store_mcmc_fit(dm, k, dm.vars[k])

    from fit_posterior import save_country_level_posterior
    if str(year) == '2005':  # also generate 2010 estimates
        save_country_level_posterior(dm, region, 2010, sex,
                                     ['prevalence', 'remission'])
    save_country_level_posterior(
        dm, region, year, sex, ['prevalence', 'remission']
    )  #'prevalence incidence remission excess-mortality duration mortality relative-risk'.split())

    # save results (do this last, because it removes things from the disease model that plotting function, etc, might need
    keys = dismod3.utils.gbd_keys(region_list=[region],
                                  year_list=[year],
                                  sex_list=[sex])
    dm.save('dm-%d-posterior-%s-%s-%s.json' % (dm.id, region, sex, year),
            keys_to_save=keys)

    return dm
Example #28
0
 def C_0(logit_C_0=logit_C_0):
     return mc.invlogit(logit_C_0)
Example #29
0
def make_model(lon,lat,africa,n,datatype,
                genaa,genab,genbb,gen00,gena0,genb0,gena1,genb1,gen01,gen11,
                pheab,phea,pheb,
                phe0,prom0,promab,
                aphea,aphe0,
                bpheb,bphe0):
    
    logp_mesh = np.vstack((lon,lat)).T*np.pi/180.
    
    # Probability of mutation in the promoter region, given that the other thing is a.
    p1 = pm.Uniform('p1', 0, .04, value=.01)
            
    # Spatial submodels
    spatial_b_vars = make_gp_submodel('b',logp_mesh,africa,with_africa_covariate=True)
    spatial_s_vars = make_gp_submodel('0',logp_mesh)
    sp_sub_b = spatial_b_vars['sp_sub']
    sp_sub_s = spatial_s_vars['sp_sub']
    
    # Loop over data clusters, adding nugget and applying link function.
    tilde_fs_d = []
    p0_d = []
    tilde_fb_d = []
    pb_d = []
    V_b = spatial_b_vars['V']
    V_s = spatial_s_vars['V']            
    data_d = []    

    for i in xrange(len(n)):        
        this_fb =sp_sub_b.f_eval[i]
        this_fs = sp_sub_s.f_eval[i]

        # Nuggeted field in this cluster
        tilde_fb_d.append(pm.Normal('tilde_fb_%i'%i, this_fb, 1./V_b, value=np.random.normal(), trace=False))
        tilde_fs_d.append(pm.Normal('tilde_fs_%i'%i, this_fs, 1./V_s, value=np.random.normal(), trace=False))
            
        # The frequencies.
        p0 = pm.Lambda('pb_%i'%i,lambda lt=tilde_fb_d[-1]: pm.invlogit(lt),trace=False)
        pb = pm.Lambda('p0_%i'%i,lambda lt=tilde_fs_d[-1]: pm.invlogit(lt),trace=False)
        
        # The likelihoods
        if datatype[i]=='prom':
            cur_obs = [prom0[i], promab[i]]
            # Need to have either b and 0 or a and 1 on both chromosomes
            p = pm.Lambda('p_%i'%i, lambda pb=pb, p0=p0, p1=p1: (pb*p0+(1-pb)*p1)**2, trace=False)
            n = np.sum(cur_obs)
            data_d.append(pm.Binomial('data_%i'%i, p=p, n=n, value=prom0[i], observed=True))
            
        elif datatype[i]=='aphe':
            cur_obs = [aphea[i], aphe0[i]]
            n = np.sum(cur_obs)
            # Need to have (a and not 1) on either chromosome, or not (not (a and not 1) on both chromosomes)
            p = pm.Lambda('p_%i'%i, lambda pb=pb, p0=p0, p1=p1: 1-(1-(1-pb)*(1-p1))**2, trace=False)
            data_d.append(pm.Binomial('data_%i'%i, p=p, n=n, value=aphea[i], observed=True))
            
        elif datatype[i]=='bphe':
            cur_obs = [bpheb[i], bphe0[i]]
            n = np.sum(cur_obs)
            # Need to have (b and not 0) on either chromosome
            p = pm.Lambda('p_%i'%i, lambda pb=pb, p0=p0, p1=p1: 1-(1-pb*(1-p0))**2, trace=False)
            data_d.append(pm.Binomial('data_%i'%i, p=p, n=n, value=aphea[i], observed=True))            
            
        elif datatype[i]=='phe':
            cur_obs = np.array([pheab[i],phea[i],pheb[i],phe0[i]])
            n = np.sum(cur_obs)
            p = pm.Lambda('p_%i'%i, lambda pb=pb, p0=p0, p1=p1: np.array([\
                g_freqs['ab'](pb,p0,p1),
                g_freqs['a0'](pb,p0,p1)+g_freqs['a1'](pb,p0,p1)+g_freqs['aa'](pb,p0,p1),
                g_freqs['b0'](pb,p0,p1)+g_freqs['b1'](pb,p0,p1)+g_freqs['bb'](pb,p0,p1),
                g_freqs['00'](pb,p0,p1)+g_freqs['01'](pb,p0,p1)+g_freqs['11'](pb,p0,p1)]), trace=False)
            np.testing.assert_almost_equal(p.value.sum(), 1)
            data_d.append(pm.Multinomial('data_%i'%i, p=p, n=n, value=cur_obs, observed=True))
            
        elif datatype[i]=='gen':
            cur_obs = np.array([genaa[i],genab[i],gena0[i],gena1[i],genbb[i],genb0[i],genb1[i],gen00[i],gen01[i],gen11[i]])
            n = np.sum(cur_obs)
            p = pm.Lambda('p_%i'%i, lambda pb=pb, p0=p0, p1=p1, g_freqs=g_freqs: \
                np.array([g_freqs[key](pb,p0,p1) for key in ['aa','ab','a0','a1','bb','b0','b1','00','01','11']]), trace=False)
            np.testing.assert_almost_equal(p.value.sum(), 1)
            data_d.append(pm.Multinomial('data_%i'%i, p=p, n=n, value=cur_obs, observed=True))
            
    # The fields plus the nugget, in convenient vector form
    @pm.deterministic
    def tilde_fb(tilde_fb_d = tilde_fb_d):
        """Concatenated version of tilde_fb, for postprocessing & Gibbs sampling purposes"""
        return np.hstack(tilde_fb_d)

    @pm.deterministic
    def tilde_fs(tilde_fs_d = tilde_fs_d):
        """Concatenated version of tilde_fs, for postprocessing & Gibbs sampling purposes"""
        return np.hstack(tilde_fs_d)

    return locals()
Example #30
0
def generate_disease_data(condition, cov):
    """ Generate csv files with gold-standard disease data,
    and somewhat good, somewhat dense disease data, as might be expected from a
    condition that is carefully studied in the literature
    """

    age_len = dismod3.MAX_AGE
    ages = np.arange(age_len, dtype='float')

    # incidence rate
    i0 = .005 + .02 * mc.invlogit((ages - 44) / 3)
    #i0 = np.maximum(0., .001 * (-.125 + np.ones_like(ages) + (ages / age_len)**2.))

    # remission rate
    #r = 0. * ages
    r = .1 * np.ones_like(ages)

    # excess-mortality rate
    #f_init = .085 * (ages / 100) ** 2.5
    SMR = 3. * np.ones_like(ages) - ages / age_len

    # all-cause mortality-rate
    mort = dismod3.get_disease_model('all-cause_mortality')

    #age_intervals = [[a, a+9] for a in range(0, dismod3.MAX_AGE-4, 10)] + [[0, 100] for ii in range(1)]
    age_intervals = [[a, a] for a in range(0, dismod3.MAX_AGE, 1)]

    # TODO:  take age structure from real data
    sparse_intervals = dict([[
        region,
        random.sample(age_intervals,
                      (ii**3 * len(age_intervals)) / len(countries_for)**3 / 1)
    ] for ii, region in enumerate(countries_for)])
    dense_intervals = dict(
        [[region, random.sample(age_intervals,
                                len(age_intervals) / 2)]
         for ii, region in enumerate(countries_for)])

    gold_data = []
    noisy_data = []

    for ii, region in enumerate(sorted(countries_for)):
        if region == 'world':
            continue

        print region
        sys.stdout.flush()

        # introduce unexplained regional variation
        #i = i0 * (1 + float(ii) / 21)

        # or not
        i = i0

        for year in [1990, 2005]:
            for sex in ['male', 'female']:

                param_type = 'all-cause_mortality'
                key = dismod3.gbd_key_for(param_type, region, year, sex)
                m_all_cause = mort.mortality(key, mort.data)

                # calculate excess-mortality rate from smr
                f = (SMR - 1.) * m_all_cause

                ## compartmental model (bins S, C, D, M)
                import scipy.linalg
                from dismod3 import NEARLY_ZERO
                from dismod3.utils import trim

                SCDM = np.zeros([4, age_len])
                p = np.zeros(age_len)
                m = np.zeros(age_len)

                SCDM[0, 0] = 1.
                SCDM[1, 0] = 0.
                SCDM[2, 0] = 0.
                SCDM[3, 0] = 0.

                p[0] = SCDM[1, 0] / (SCDM[0, 0] + SCDM[1, 0] + NEARLY_ZERO)
                m[0] = trim(m_all_cause[0] - f[0] * p[0], NEARLY_ZERO,
                            1 - NEARLY_ZERO)

                for a in range(age_len - 1):
                    A = [[-i[a] - m[a], r[a], 0., 0.],
                         [i[a], -r[a] - m[a] - f[a], 0., 0.],
                         [m[a], m[a], 0., 0.], [0., f[a], 0., 0.]]

                    SCDM[:, a + 1] = np.dot(scipy.linalg.expm(A), SCDM[:, a])

                    p[a + 1] = SCDM[1, a + 1] / (SCDM[0, a + 1] +
                                                 SCDM[1, a + 1] + NEARLY_ZERO)
                    m[a + 1] = m_all_cause[a + 1] - f[a + 1] * p[a + 1]

                # duration = E[time in bin C]
                hazard = r + m + f
                pr_not_exit = np.exp(-hazard)
                X = np.empty(len(hazard))
                X[-1] = 1 / hazard[-1]
                for ii in reversed(range(len(X) - 1)):
                    X[ii] = (pr_not_exit[ii] *
                             (X[ii + 1] + 1)) + (1 / hazard[ii] *
                                                 (1 - pr_not_exit[ii]) -
                                                 pr_not_exit[ii])

                country = countries_for[region][0]
                params = dict(age_intervals=age_intervals,
                              condition=condition,
                              gbd_region=region,
                              country=country,
                              year=year,
                              sex=sex,
                              effective_sample_size=1000)

                params['age_intervals'] = [[0, 99]]
                generate_and_append_data(gold_data, 'prevalence data', p,
                                         **params)
                generate_and_append_data(gold_data, 'incidence data', i,
                                         **params)
                generate_and_append_data(gold_data, 'excess-mortality data', f,
                                         **params)
                generate_and_append_data(gold_data, 'remission data', r,
                                         **params)
                generate_and_append_data(gold_data, 'duration data', X,
                                         **params)

                # TODO: use this approach to age standardize all gold data, and then change it to get iX as a direct sum
                params['age_intervals'] = [[0, 99]]
                iX = i * X * (1 - p) * regional_population(key)
                generate_and_append_data(gold_data, 'incidence_x_duration', iX,
                                         **params)

                params['effective_sample_size'] = 1000
                params['cov'] = 0.
                params['age_intervals'] = age_intervals
                generate_and_append_data(noisy_data, 'prevalence data', p,
                                         **params)
                generate_and_append_data(noisy_data, 'excess-mortality data',
                                         f, **params)
                generate_and_append_data(noisy_data, 'remission data', r,
                                         **params)
                generate_and_append_data(noisy_data, 'incidence data', i,
                                         **params)

    col_names = sorted(data_dict_for_csv(gold_data[0]).keys())

    f_file = open(OUTPUT_PATH + '%s_gold.tsv' % condition, 'w')
    csv_f = csv.writer(f_file, dialect='excel-tab')
    csv_f.writerow(col_names)
    for d in gold_data:
        dd = data_dict_for_csv(d)
        csv_f.writerow([dd[c] for c in col_names])
    f_file.close()

    f_name = OUTPUT_PATH + '%s_data.tsv' % condition
    f_file = open(f_name, 'w')
    csv_f = csv.writer(f_file, dialect='excel-tab')
    csv_f.writerow(col_names)

    for d in noisy_data:
        dd = data_dict_for_csv(d)
        csv_f.writerow([dd[c] for c in col_names])
    f_file.close()

    # upload data file
    from dismod3.disease_json import dismod_server_login, twc, DISMOD_BASE_URL
    dismod_server_login()
    twc.go(DISMOD_BASE_URL + 'dismod/data/upload/')
    twc.formvalue(1, 'tab_separated_values', open(f_name).read())

    # TODO: find or set the model number for this model, set the
    # expert priors and covariates, merge the covariate data into the
    # model, and add the "ground truth" to the disease json

    try:
        url = twc.submit()
    except Exception, e:
        print e
Example #31
0
pl.plot(X, Y, 'ks', label='Observed', mec='w', mew=1)

XX = sm.add_constant(X)
X_pred = pl.arange(65)
XX_pred = sm.add_constant(X_pred)

model = sm.OLS(Y, XX)
results = model.fit()
Y_pred = model.predict(XX_pred)

pl.plot(X_pred, Y_pred, 'k-', linewidth=2, label='Predicted by OLS')

Y = mc.logit(df['Parameter Value'].__array__())
model = sm.OLS(Y, XX)
results = model.fit()
Y_pred = model.predict(XX_pred)

pl.plot(X_pred,
        mc.invlogit(Y_pred),
        'k--',
        linewidth=2,
        label='Predicted by logit-transformed OLS')

pl.xlabel('Age (Years)')
pl.ylabel('Seroprevalence (Per 1)')
pl.legend(loc='lower right', fancybox=True, shadow=True)
pl.axis([-5, 55, 0, 1.2])
pl.grid()

pl.savefig('vzv_forest.pdf')
def assessRealizationCovariance(filename,
                                Rel,
                                Month,
                                paramfileINDEX,
                                TemporalStartMonth=None,
                                TemporalEndMonth=None,
                                conditioned=False,
                                flipVertical="FALSE",
                                SPACE=True,
                                TIME=True):

    # deal with system arguments
    #filename = sys.argv[1]
    #Rel = int(sys.argv[2])
    #Month = int(sys.argv[3])
    #conditioned = sys.argv[4]
    #flipVertical = sys.argv[5]
    #paramfileINDEX = int(sys.argv[6])
    #TemporalStartMonth = int(sys.argv[7])
    #TemporalEndMonth = int(sys.argv[8])
    #SPACE = sys.argv[9]
    #TIME = sys.argv[10]

    ## if filename is a string, assume its a path and import the hdf5 file (otherwise, assumption is we are pasing the 'hr' root of an hdf5 realisation file)
    if type(filename) is str:
        hf = tb.openFile(filename)
        hr = hf.root

    if type(filename) is not str:
        hr = filename

    # define path to R param file
    mbgw_root = __root__ = mbgw.__path__[0]
    r_paramfile_path = mbgw_root + '/joint_simulation/CONDSIMalgorithm/ParamFile_uncond_' + str(
        paramfileINDEX) + '.R'

    ###CHECK SPATIAL COVARIANCE AND BASIC FEATURE OF A SINGLE MONTH
    if SPACE is True:

        # define basic parameters
        slices = [
            slice(None, None, None),
            slice(None, None, None),
            slice(Month, Month + 1, None)
        ]

        slices = tuple(slices)
        n_realizations = 1
        n_rows = len(hr.lat_axis)
        n_cols = len(hr.lon_axis)
        N_facs = int(1e5)

        # Pull out parasite rate chunk (i.e. import n months of block)
        slices = tuple(slices)
        tot_slice = (slice(Rel, Rel + 1, None), ) + slices

        n_months = tot_slice[3].stop - tot_slice[3].start
        f_chunk = np.zeros(1 * n_cols * n_rows * n_months).reshape(
            1, n_rows, n_cols, n_months)
        subsetmonth = 0

        #print tot_slice
        #print f_chunk[:,:,:,subsetmonth]

        for mm in xrange(tot_slice[3].start, tot_slice[3].stop):
            f_chunk[:, :, :,
                    subsetmonth] = hr.realizations[tot_slice[0], tot_slice[1],
                                                   tot_slice[2], mm]
            subsetmonth = subsetmonth + 1
        #f_chunk = f_chunk[::-1,:,::-1,:].T[:,:,:,0]
        f_chunk = f_chunk.squeeze()
        f_chunk[f_chunk == -9999] = nan
        inv_f_chunk = pm.invlogit(f_chunk.squeeze().T)
        inv_f_chunk = inv_f_chunk.reshape(shape(f_chunk))

        #from IPython.Debugger import Pdb
        #Pdb(color_scheme='Linux').set_trace()

        # calculate empirical covariance function in N-S direction
        gridIN = cp.deepcopy(f_chunk).squeeze()

        if conditioned is False: meanIN = 0
        if conditioned is True:
            meanIN = hr.PyMCsamples.col("m_const")[Rel] + (
                hr.PyMCsamples.col("t_coef")[Rel] * hr.t_axis[Month])
        cellWidth = 5 / 6378.137
        covDict = getGridCovarianceInY(gridIN, meanIN, cellWidth)

        # obtain theoretical covariance function from input MCMC paramater values: pymc method
        C = hr.group0.C[Rel]
        xplot = covDict['RadDist']
        yplot1 = C([[0, 0, 0]],
                   np.vstack(
                       (np.zeros(len(xplot)), xplot, np.zeros(len(xplot)))).T)
        yplot1 = np.asarray(yplot1).squeeze()

        #        # obtain theoretical covariance function from input MCMC paramater values: R method
        #        Scale=hr.PyMCsamples.col("scale")[Rel]
        #        amp=hr.PyMCsamples.col("amp")[Rel]
        #        inc=hr.PyMCsamples.col("inc")[Rel]
        #        ecc=hr.PyMCsamples.col("ecc")[Rel]
        #        t_lim_corr=hr.PyMCsamples.col("t_lim_corr")[Rel]
        #        scale_t=hr.PyMCsamples.col("scale_t")[Rel]
        #        sin_frac=hr.PyMCsamples.col("sin_frac")[Rel]

        #        CfromR=temptestcovPY(xplot,np.zeros(len(xplot)),np.zeros(len(xplot)),Scale,amp,inc,ecc,t_lim_corr,scale_t,sin_frac,r_paramfile_path)
        #        yplot = CfromR[0,:]

        # plot

        Slag_emp = covDict['RadDist']
        Slag_mod = xplot
        Scov_emp = covDict['E_cov']
        Scov_mod = yplot1

    ###CHECK TEMPORAL COVARIANCE

    if TIME is True:

        # if start and months are None, or if they are non-valid, rest to maximum temporal extents
        if ((TemporalEndMonth is None) |
            (TemporalEndMonth >= hr.realizations.shape[3])):
            TemporalEndMonth = hr.realizations.shape[3]
        if ((TemporalStartMonth is None) | (TemporalStartMonth >=
                                            (hr.realizations.shape[3] - 1))):
            TemporalStartMonth = 0

        # define basic parameters
        slices = [
            slice(None, None, None),
            slice(None, None, None),
            slice(TemporalStartMonth, TemporalEndMonth, None)
        ]

        slices = tuple(slices)
        n_realizations = 1
        n_rows = len(hr.lat_axis)
        n_cols = len(hr.lon_axis)
        N_facs = int(1e5)

        # Pull out parasite rate chunk (i.e. import n months of block)
        slices = tuple(slices)
        tot_slice = (slice(Rel, Rel + 1, None), ) + slices

        n_months = tot_slice[3].stop - tot_slice[3].start
        f_chunk = np.zeros(1 * n_cols * n_rows * n_months).reshape(
            1, n_rows, n_cols, n_months)
        subsetmonth = 0

        for mm in xrange(tot_slice[3].start, tot_slice[3].stop):
            f_chunk[:, :, :,
                    subsetmonth] = hr.realizations[tot_slice[0], tot_slice[1],
                                                   tot_slice[2], mm]
            subsetmonth = subsetmonth + 1
        #f_chunk = f_chunk[::-1,:,::-1,:].T[:,:,:,0]
        f_chunk = f_chunk.squeeze()
        f_chunk[f_chunk == -9999] = nan

        # calculate and plot empirical temporal covariance
        gridIN = cp.deepcopy(f_chunk).squeeze()

        if conditioned is False: meanIN = 0
        if conditioned is True:
            meanIN = hr.PyMCsamples.col("m_const")[Rel] + (
                hr.PyMCsamples.col("t_coef")[Rel] *
                hr.t_axis[TemporalStartMonth:TemporalEndMonth + 1:1])

        covDict = getGridCovarianceInT(gridIN, meanIN)

        # obtain theoretical covariance function from input MCMC paramater values: pymc method
        C = hr.group0.C[Rel]
        xplot = covDict['yearDist']
        yplot = C([[0, 0, 0]],
                  np.vstack(
                      (np.zeros(len(xplot)), np.zeros(len(xplot)), xplot)).T)
        yplot = np.asarray(yplot).squeeze()

        #        # obtain theoretical covariance function from input MCMC paramater values: R method
        #        Scale=hr.PyMCsamples.col("scale")[Rel]
        #        amp=hr.PyMCsamples.col("amp")[Rel]
        #        inc=hr.PyMCsamples.col("inc")[Rel]
        #        ecc=hr.PyMCsamples.col("ecc")[Rel]
        #        t_lim_corr=hr.PyMCsamples.col("t_lim_corr")[Rel]
        #        scale_t=hr.PyMCsamples.col("scale_t")[Rel]
        #        sin_frac=hr.PyMCsamples.col("sin_frac")[Rel]

        #        CfromR=temptestcovPY(np.zeros(len(xplot)),np.zeros(len(xplot)),xplot,Scale,amp,inc,ecc,t_lim_corr,scale_t,sin_frac,r_paramfile_path)
        #        yplot2 = CfromR[0,:]

        # plot

        Tlag_emp = covDict['yearDist']
        Tlag_mod = xplot
        Tcov_emp = covDict['E_cov']
        Tcov_mod = yplot

        retDict = {
            'Slag_emp': Slag_emp,
            'Slag_mod': Slag_mod,
            'Scov_emp': Scov_emp,
            'Scov_mod': Scov_mod,
            'Tlag_emp': Tlag_emp,
            'Tlag_mod': Tlag_mod,
            'Tcov_emp': Tcov_emp,
            'Tcov_mod': Tcov_mod
        }
        return (retDict)

    retDict = {
        'Slag_emp': Slag_emp,
        'Slag_mod': Slag_mod,
        'Scov_emp': Scov_emp,
        'Scov_mod': Scov_mod
    }
    return (retDict)
Example #33
0
File: model.py Project: zkxshg/pymc
def make_model(lon, lat, africa, n, datatype, genaa, genab, genbb, gen00,
               gena0, genb0, gena1, genb1, gen01, gen11, pheab, phea, pheb,
               phe0, prom0, promab, aphea, aphe0, bpheb, bphe0):

    logp_mesh = np.vstack((lon, lat)).T * np.pi / 180.

    # Probability of mutation in the promoter region, given that the other thing is a.
    p1 = pm.Uniform('p1', 0, .04, value=.01)

    # Spatial submodels
    spatial_b_vars = make_gp_submodel('b',
                                      logp_mesh,
                                      africa,
                                      with_africa_covariate=True)
    spatial_s_vars = make_gp_submodel('0', logp_mesh)
    sp_sub_b = spatial_b_vars['sp_sub']
    sp_sub_s = spatial_s_vars['sp_sub']

    # Loop over data clusters, adding nugget and applying link function.
    tilde_fs_d = []
    p0_d = []
    tilde_fb_d = []
    pb_d = []
    V_b = spatial_b_vars['V']
    V_s = spatial_s_vars['V']
    data_d = []

    for i in xrange(len(n)):
        this_fb = sp_sub_b.f_eval[i]
        this_fs = sp_sub_s.f_eval[i]

        # Nuggeted field in this cluster
        tilde_fb_d.append(
            pm.Normal('tilde_fb_%i' % i,
                      this_fb,
                      1. / V_b,
                      value=np.random.normal(),
                      trace=False))
        tilde_fs_d.append(
            pm.Normal('tilde_fs_%i' % i,
                      this_fs,
                      1. / V_s,
                      value=np.random.normal(),
                      trace=False))

        # The frequencies.
        p0 = pm.Lambda('pb_%i' % i,
                       lambda lt=tilde_fb_d[-1]: pm.invlogit(lt),
                       trace=False)
        pb = pm.Lambda('p0_%i' % i,
                       lambda lt=tilde_fs_d[-1]: pm.invlogit(lt),
                       trace=False)

        # The likelihoods
        if datatype[i] == 'prom':
            cur_obs = [prom0[i], promab[i]]
            # Need to have either b and 0 or a and 1 on both chromosomes
            p = pm.Lambda('p_%i' % i,
                          lambda pb=pb, p0=p0, p1=p1: (pb * p0 +
                                                       (1 - pb) * p1)**2,
                          trace=False)
            n = np.sum(cur_obs)
            data_d.append(
                pm.Binomial('data_%i' % i,
                            p=p,
                            n=n,
                            value=prom0[i],
                            observed=True))

        elif datatype[i] == 'aphe':
            cur_obs = [aphea[i], aphe0[i]]
            n = np.sum(cur_obs)
            # Need to have (a and not 1) on either chromosome, or not (not (a and not 1) on both chromosomes)
            p = pm.Lambda('p_%i' % i,
                          lambda pb=pb, p0=p0, p1=p1: 1 - (1 - (1 - pb) *
                                                           (1 - p1))**2,
                          trace=False)
            data_d.append(
                pm.Binomial('data_%i' % i,
                            p=p,
                            n=n,
                            value=aphea[i],
                            observed=True))

        elif datatype[i] == 'bphe':
            cur_obs = [bpheb[i], bphe0[i]]
            n = np.sum(cur_obs)
            # Need to have (b and not 0) on either chromosome
            p = pm.Lambda('p_%i' % i,
                          lambda pb=pb, p0=p0, p1=p1: 1 - (1 - pb *
                                                           (1 - p0))**2,
                          trace=False)
            data_d.append(
                pm.Binomial('data_%i' % i,
                            p=p,
                            n=n,
                            value=aphea[i],
                            observed=True))

        elif datatype[i] == 'phe':
            cur_obs = np.array([pheab[i], phea[i], pheb[i], phe0[i]])
            n = np.sum(cur_obs)
            p = pm.Lambda('p_%i'%i, lambda pb=pb, p0=p0, p1=p1: np.array([\
                g_freqs['ab'](pb,p0,p1),
                g_freqs['a0'](pb,p0,p1)+g_freqs['a1'](pb,p0,p1)+g_freqs['aa'](pb,p0,p1),
                g_freqs['b0'](pb,p0,p1)+g_freqs['b1'](pb,p0,p1)+g_freqs['bb'](pb,p0,p1),
                g_freqs['00'](pb,p0,p1)+g_freqs['01'](pb,p0,p1)+g_freqs['11'](pb,p0,p1)]), trace=False)
            np.testing.assert_almost_equal(p.value.sum(), 1)
            data_d.append(
                pm.Multinomial('data_%i' % i,
                               p=p,
                               n=n,
                               value=cur_obs,
                               observed=True))

        elif datatype[i] == 'gen':
            cur_obs = np.array([
                genaa[i], genab[i], gena0[i], gena1[i], genbb[i], genb0[i],
                genb1[i], gen00[i], gen01[i], gen11[i]
            ])
            n = np.sum(cur_obs)
            p = pm.Lambda('p_%i'%i, lambda pb=pb, p0=p0, p1=p1, g_freqs=g_freqs: \
                np.array([g_freqs[key](pb,p0,p1) for key in ['aa','ab','a0','a1','bb','b0','b1','00','01','11']]), trace=False)
            np.testing.assert_almost_equal(p.value.sum(), 1)
            data_d.append(
                pm.Multinomial('data_%i' % i,
                               p=p,
                               n=n,
                               value=cur_obs,
                               observed=True))

    # The fields plus the nugget, in convenient vector form
    @pm.deterministic
    def tilde_fb(tilde_fb_d=tilde_fb_d):
        """Concatenated version of tilde_fb, for postprocessing & Gibbs sampling purposes"""
        return np.hstack(tilde_fb_d)

    @pm.deterministic
    def tilde_fs(tilde_fs_d=tilde_fs_d):
        """Concatenated version of tilde_fs, for postprocessing & Gibbs sampling purposes"""
        return np.hstack(tilde_fs_d)

    return locals()
Example #34
0
            if sex == 'male':
                offset += .5
            if year == 2005:
                offset += .5
            if region == 'Asia, South':
                offset -= .1
            if region == 'Asia, East':
                offset -= .2
            if region == 'Europe, Central':
                offset += .3

            gdp = Covariate.objects.get(iso3=country, year=year).value
            offset += .3 * gdp

            # incidence rate
            i = .012 * mc.invlogit((ages - 44) / 3) * (1 + offset)

            # remission rate
            r = 0. * ages

            # excess-mortality rate
            f = .085 * (ages / 100)**2.5

            # all-cause mortality-rate
            mort_data = [
                d for d in mort.data
                if d['data_type'] == 'all-cause mortality data' and d['region']
                == region and d['sex'] == sex and d['year_start'] == year
            ]
            m_all_cause = mort.mortality('all_cause', mort_data)
Example #35
0
File: vzv.py Project: aflaxman/gbd
pl.plot(X, Y, 'ks', label='Observed', mec='w', mew=1)


XX = sm.add_constant(X)
X_pred = pl.arange(65)
XX_pred = sm.add_constant(X_pred)


model = sm.OLS(Y, XX)
results = model.fit()
Y_pred = model.predict(XX_pred)

pl.plot(X_pred, Y_pred, 'k-', linewidth=2, label='Predicted by OLS')


Y = mc.logit(df['Parameter Value'].__array__())
model = sm.OLS(Y, XX)
results = model.fit()
Y_pred = model.predict(XX_pred)

pl.plot(X_pred, mc.invlogit(Y_pred), 'k--', linewidth=2, label='Predicted by logit-transformed OLS')


pl.xlabel('Age (Years)')
pl.ylabel('Seroprevalence (Per 1)')
pl.legend(loc='lower right', fancybox=True, shadow=True)
pl.axis([-5, 55, 0, 1.2])
pl.grid()

pl.savefig('vzv_forest.pdf')
Example #36
0
 def rate_stoch(logit_rate=logit_rate):
     return interpolate(param_mesh, mc.invlogit(logit_rate), est_mesh)
Example #37
0
 
 safe_name = name.replace('.','_')
 
 P_prime_now = pm.Beta('P_prime_%s'%safe_name,3.,3.)
 p_vec_now = pm.MvNormalChol('p_vec_%s'%safe_name, p_mean, cholfac)
 p_vec_list.append(p_vec_now)
 P_prime_list.append(P_prime_now)
 
 b = pm.lam_dtrm('b', lambda p_vec = p_vec_now: 1./exp(p_vec[0]))
 
 if methods[name] == 'Microscopy':        
     
     # alpha, s and c depend on p_vec[1:4]
     c = pm.lam_dtrm('c', lambda p_vec = p_vec_now: 1./exp(p_vec[1]))
     alph = pm.lam_dtrm('alph', lambda p_vec = p_vec_now: exp(p_vec[2]))
     s = pm.lam_dtrm('s', lambda p_vec = p_vec_now: pm.invlogit(p_vec[3]))
 
 elif methods[name] == 'RDT':
     
     # alpha, s and c depend on p_vec[4:7]
     c = pm.lam_dtrm('c', lambda p_vec = p_vec_now: 1./exp(p_vec[4]))
     alph = pm.lam_dtrm('alph', lambda p_vec = p_vec_now: exp(p_vec[5]))
     s = pm.lam_dtrm('s', lambda p_vec = p_vec_now: pm.invlogit(p_vec[6]))        
     
 @pm.dtrm
 def this_F(c=c, alph=alph, a=age_bin_ctrs[name], s=s):
     """
     The function F, which gives detection probability.
     """
     out = empty(len(a))
     out[where(a<alph)] = 1.
Example #38
0
def theta(a=alpha, b=beta, d=dose):
    """theta = inv_logit(a+b)"""
    return pm.invlogit(a+b*d)
Example #39
0
    subsetmonth = 0
    for mm in xrange(n_months):
        chunk[:, :, :,
              subsetmonth] = hr.realizations[tot_slice[0], tot_slice[1],
                                             tot_slice[2], mm]
        subsetmonth = subsetmonth + 1
    chunk = chunk.squeeze()

    holdshape = chunk.shape
    chunk = chunk.ravel()

    # optionally, add nugget, inverse logit, and age correct
    if ADDNUGGET is True:
        chunk = chunk + np.random.normal(
            loc=0, scale=np.sqrt(V[ii]), size=np.prod(chunk.shape))
    if BACKTRANSFORM is True: chunk = pm.invlogit(chunk)
    if AGECORRECT is True:
        chunk *= facs[np.random.randint(N_facs, size=np.prod(chunk.shape))]

    chunk = chunk.reshape(holdshape).squeeze()

    # aggregate through time
    chunkTMEAN = np.atleast_2d(np.mean(chunk, -1))

    # add this realisation to output block
    annualmean_block[:, :, ii] = chunkTMEAN

# get posterior mean and std of predicted maps
annualmean_mean = np.atleast_2d(np.mean(annualmean_block, -1))
annualmean_std = np.atleast_2d(np.std(annualmean_block, -1))
Example #40
0
 def this_fun(x, p2=p2, p3=p3,negi=negi, posi=posi, Ai=Ai):
     p1 = np.log(invlogit(x))
     return p1*spi + p3 + cfh(p1,p2,negi)
Example #41
0
 def f(sp_sub, a, b, n=n):
     p = pm.invlogit(sp_sub)
     h = pm.rbeta(a, b, size=len(sp_sub))
     p_def = g6pd.p_fem_def(p, h)
     return pm.rbinomial(n=n, p=p)
    # Pull out relevent section of hdf5 f block
    tot_slice = (slice(ii,ii+1,None),slice(None,None,None),slice(None,None,None),slice(startMonth,endMonth,None))  
    chunk = np.zeros(1*n_cols*n_rows*n_months).reshape(1,n_rows,n_cols,n_months)
    subsetmonth=0 
    for mm in xrange(n_months):
        chunk[:,:,:,subsetmonth] = hr.realizations[tot_slice[0],tot_slice[1],tot_slice[2],mm]
        subsetmonth=subsetmonth+1
    chunk = chunk.squeeze()

    holdshape = chunk.shape
    chunk = chunk.ravel()
    
    # optionally, add nugget, inverse logit, and age correct
    if ADDNUGGET is True: chunk = chunk + np.random.normal(loc=0, scale=np.sqrt(V[ii]), size=np.prod(chunk.shape))
    if BACKTRANSFORM is True: chunk = pm.invlogit(chunk)
    if AGECORRECT is True: chunk *= facs[np.random.randint(N_facs, size=np.prod(chunk.shape))]

    chunk = chunk.reshape(holdshape).squeeze()
   
    # aggregate through time
    chunkTMEAN = np.atleast_2d(np.mean(chunk,-1))
        
    # add this realisation to output block
    annualmean_block[:,:,ii]=chunkTMEAN

# get posterior mean and std of predicted maps
annualmean_mean = np.atleast_2d(np.mean(annualmean_block,-1))
annualmean_std = np.atleast_2d(np.std(annualmean_block,-1))

print 'surface mean of annual mean is '+str(np.mean(annualmean_mean))
Example #43
0
 def f(sp_sub, x, a, b):
     p = pm.invlogit(sp_sub(x))
     h = pm.rbeta(a,b,size=len(p))
     return g6pd.p_fem_def(p,h)
Example #44
0
def generate_disease_data(condition, cov):
    """ Generate csv files with gold-standard disease data,
    and somewhat good, somewhat dense disease data, as might be expected from a
    condition that is carefully studied in the literature
    """
    
    age_len = dismod3.MAX_AGE
    ages = np.arange(age_len, dtype='float')

    # incidence rate
    i0 = .005 + .02 * mc.invlogit((ages - 44) / 3)
    #i0 = np.maximum(0., .001 * (-.125 + np.ones_like(ages) + (ages / age_len)**2.))

    # remission rate
    #r = 0. * ages
    r = .1 * np.ones_like(ages)

    # excess-mortality rate
    #f_init = .085 * (ages / 100) ** 2.5
    SMR = 3. * np.ones_like(ages) - ages / age_len

    # all-cause mortality-rate
    mort = dismod3.get_disease_model('all-cause_mortality')

    #age_intervals = [[a, a+9] for a in range(0, dismod3.MAX_AGE-4, 10)] + [[0, 100] for ii in range(1)]
    age_intervals = [[a, a] for a in range(0, dismod3.MAX_AGE, 1)]
    
    # TODO:  take age structure from real data
    sparse_intervals = dict([[region, random.sample(age_intervals, (ii**3 * len(age_intervals)) / len(countries_for)**3 / 1)] for ii, region in enumerate(countries_for)])
    dense_intervals = dict([[region, random.sample(age_intervals, len(age_intervals)/2)] for ii, region in enumerate(countries_for)])

    gold_data = []
    noisy_data = []
            
    for ii, region in enumerate(sorted(countries_for)):
        if region == 'world':
            continue
        
        print region
        sys.stdout.flush()

        # introduce unexplained regional variation
        #i = i0 * (1 + float(ii) / 21)

        # or not
        i = i0
        
        for year in [1990, 2005]:
            for sex in ['male', 'female']:

                param_type = 'all-cause_mortality'
                key = dismod3.gbd_key_for(param_type, region, year, sex)
                m_all_cause = mort.mortality(key, mort.data)

                # calculate excess-mortality rate from smr
                f = (SMR - 1.) * m_all_cause


                ## compartmental model (bins S, C, D, M)
                import scipy.linalg
                from dismod3 import NEARLY_ZERO
                from dismod3.utils import trim

                SCDM = np.zeros([4, age_len])
                p = np.zeros(age_len)
                m = np.zeros(age_len)

                SCDM[0,0] = 1.
                SCDM[1,0] = 0.
                SCDM[2,0] = 0.
                SCDM[3,0] = 0.

                p[0] = SCDM[1,0] / (SCDM[0,0] + SCDM[1,0] + NEARLY_ZERO)
                m[0] = trim(m_all_cause[0] - f[0] * p[0], NEARLY_ZERO, 1-NEARLY_ZERO)

                for a in range(age_len - 1):
                    A = [[-i[a]-m[a],  r[a]          , 0., 0.],
                         [ i[a]     , -r[a]-m[a]-f[a], 0., 0.],
                         [      m[a],       m[a]     , 0., 0.],
                         [        0.,            f[a], 0., 0.]]

                    SCDM[:,a+1] = np.dot(scipy.linalg.expm(A), SCDM[:,a])

                    p[a+1] = SCDM[1,a+1] / (SCDM[0,a+1] + SCDM[1,a+1] + NEARLY_ZERO)
                    m[a+1] = m_all_cause[a+1] - f[a+1] * p[a+1]


                # duration = E[time in bin C]
                hazard = r + m + f
                pr_not_exit = np.exp(-hazard)
                X = np.empty(len(hazard))
                X[-1] = 1 / hazard[-1]
                for ii in reversed(range(len(X)-1)):
                    X[ii] = (pr_not_exit[ii] * (X[ii+1] + 1)) + (1 / hazard[ii] * (1 - pr_not_exit[ii]) - pr_not_exit[ii])

                country = countries_for[region][0]
                params = dict(age_intervals=age_intervals, condition=condition, gbd_region=region,
                              country=country, year=year, sex=sex, effective_sample_size=1000)

                params['age_intervals'] = [[0,99]]
                generate_and_append_data(gold_data, 'prevalence data', p, **params)
                generate_and_append_data(gold_data, 'incidence data', i, **params)
                generate_and_append_data(gold_data, 'excess-mortality data', f, **params)
                generate_and_append_data(gold_data, 'remission data', r, **params)
                generate_and_append_data(gold_data, 'duration data', X, **params)

                # TODO: use this approach to age standardize all gold data, and then change it to get iX as a direct sum
                params['age_intervals'] = [[0,99]]
                iX = i * X * (1-p) * regional_population(key)
                generate_and_append_data(gold_data, 'incidence_x_duration', iX, **params)
                

                params['effective_sample_size'] = 1000
                params['cov'] = 0.
                params['age_intervals'] = age_intervals
                generate_and_append_data(noisy_data, 'prevalence data', p, **params)
                generate_and_append_data(noisy_data, 'excess-mortality data', f, **params)
                generate_and_append_data(noisy_data, 'remission data', r, **params)
                generate_and_append_data(noisy_data, 'incidence data', i, **params)



    col_names = sorted(data_dict_for_csv(gold_data[0]).keys())

    f_file = open(OUTPUT_PATH + '%s_gold.tsv' % condition, 'w')
    csv_f = csv.writer(f_file, dialect='excel-tab')
    csv_f.writerow(col_names)
    for d in gold_data:
        dd = data_dict_for_csv(d)
        csv_f.writerow([dd[c] for c in col_names])
    f_file.close()

    f_name = OUTPUT_PATH + '%s_data.tsv' % condition
    f_file = open(f_name, 'w')
    csv_f = csv.writer(f_file, dialect='excel-tab')
    csv_f.writerow(col_names)

    for d in noisy_data:
        dd = data_dict_for_csv(d)
        csv_f.writerow([dd[c] for c in col_names])
    f_file.close()

    # upload data file
    from dismod3.disease_json import dismod_server_login, twc, DISMOD_BASE_URL
    dismod_server_login()
    twc.go(DISMOD_BASE_URL + 'dismod/data/upload/')
    twc.formvalue(1, 'tab_separated_values', open(f_name).read())

    # TODO: find or set the model number for this model, set the
    # expert priors and covariates, merge the covariate data into the
    # model, and add the "ground truth" to the disease json

    try:
        url = twc.submit()
    except Exception, e:
        print e
Example #45
0
 def f(sp_sub, n=n):
     return pm.rbinomial(n=n,p=pm.invlogit(sp_sub))
Example #46
0
# Get E[v] and E[v**2] over the entire posterior
for i in xrange(n):
    # Reset all variables to their values at frame i of the trace
    DuffySampler.remember(0, i)
    # Evaluate the observed mean
    store_africa_val(DuffySampler.sp_sub_b.M_obs.value, dpred, africa)
    Msurf_b, Vsurf_b = pm.gp.point_eval(DuffySampler.sp_sub_b.M_obs.value,
                                        DuffySampler.sp_sub_b.C_obs.value,
                                        dpred)
    Msurf_s, Vsurf_s = pm.gp.point_eval(DuffySampler.sp_sub_s.M_obs.value,
                                        DuffySampler.sp_sub_s.C_obs.value,
                                        dpred)
    Vsurf_b += DuffySampler.V_b.value
    Vsurf_s += DuffySampler.V_s.value

    freq_b = pm.invlogit(Msurf_b + pm.rnormal(0, 1) * np.sqrt(Vsurf_b))
    freq_s = pm.invlogit(Msurf_s + pm.rnormal(0, 1) * np.sqrt(Vsurf_s))

    samp_i = (freq_b * freq_s + (1 - freq_b) * DuffySampler.p1.value)**2

    Msurf[where_unmasked] += samp_i / float(n)
    # Evaluate the observed covariance with one argument
    E2surf[where_unmasked] += samp_i**2 / float(n)

# Get the posterior variance and standard deviation
Vsurf = E2surf - Msurf**2
SDsurf = sqrt(Vsurf)

Msurf = ma.masked_array(Msurf, mask=covariate_raster.root.mask[:])
SDsurf = ma.masked_array(SDsurf, mask=covariate_raster.root.mask[:])
covariate_raster.close()
Example #47
0
 def f(sp_sub, a, b, n=n):
     p = pm.invlogit(sp_sub)
     h = pm.rbeta(a,b,size=len(sp_sub))
     p_def = g6pd.p_fem_def(p,h)
     return pm.rbinomial(n=n, p=p)
def theta(a=alpha,b=beta): 
	return pymc.invlogit(a+b*x)
Example #49
0
 def f(sp_sub, x):
     p = pm.invlogit(sp_sub(x))
     return p**2
Example #50
0
 def rate_stoch(mu=mu):
     return mc.invlogit(mu)
Example #51
0
 def p(S=S):
     """The success probability."""
     return pm.invlogit(S)
Example #52
0
def examineRealization(filename,
                       Rel,
                       Month,
                       paramfileINDEX,
                       TemporalStartMonth=None,
                       TemporalEndMonth=None,
                       conditioned=False,
                       flipVertical="FALSE",
                       SPACE=True,
                       TIME=True):

    # deal with system arguments
    #filename = sys.argv[1]
    #Rel = int(sys.argv[2])
    #Month = int(sys.argv[3])
    #conditioned = sys.argv[4]
    #flipVertical = sys.argv[5]
    #paramfileINDEX = int(sys.argv[6])
    #TemporalStartMonth = int(sys.argv[7])
    #TemporalEndMonth = int(sys.argv[8])
    #SPACE = sys.argv[9]
    #TIME = sys.argv[10]

    ## if filename is a string, assume its a path and import the hdf5 file (otherwise, assumption is we are pasing the 'hr' root of an hdf5 realisation file)
    if type(filename) is str:
        hf = tb.openFile(filename)
        hr = hf.root

    if type(filename) is not str:
        hr = filename

    # define path to R param file
    mbgw_root = __root__ = mbgw.__path__[0]
    r_paramfile_path = mbgw_root + '/joint_simulation/CONDSIMalgorithm/ParamFile_uncond_' + str(
        paramfileINDEX) + '.R'

    # initialise plot window
    nplots = 0
    if SPACE is True: nplots = nplots + 5
    if TIME is True: nplots = nplots + 1
    r.X11(width=3.3 * nplots, height=4)
    r.par(mfrow=(1, nplots))

    ###CHECK SPATIAL COVARIANCE AND BASIC FEATURE OF A SINGLE MONTH
    if SPACE is True:

        # define basic parameters
        slices = [
            slice(None, None, None),
            slice(None, None, None),
            slice(Month, Month + 1, None)
        ]

        slices = tuple(slices)
        n_realizations = 1
        n_rows = len(hr.lat_axis)
        n_cols = len(hr.lon_axis)
        N_facs = int(1e5)

        # Pull out parasite rate chunk (i.e. import n months of block)
        slices = tuple(slices)
        tot_slice = (slice(Rel, Rel + 1, None), ) + slices

        n_months = tot_slice[3].stop - tot_slice[3].start
        f_chunk = np.zeros(1 * n_cols * n_rows * n_months).reshape(
            1, n_rows, n_cols, n_months)
        subsetmonth = 0

        #print tot_slice
        #print f_chunk[:,:,:,subsetmonth]

        for mm in xrange(tot_slice[3].start, tot_slice[3].stop):
            f_chunk[:, :, :,
                    subsetmonth] = hr.realizations[tot_slice[0], tot_slice[1],
                                                   tot_slice[2], mm]
            subsetmonth = subsetmonth + 1
        #f_chunk = f_chunk[::-1,:,::-1,:].T[:,:,:,0]
        f_chunk = f_chunk.squeeze()
        f_chunk[f_chunk == -9999] = nan

        # plot this grid
        plotMapPY(f_chunk.squeeze(), flipVertical=flipVertical)
        r.title(main="logit")

        inv_f_chunk = pm.invlogit(f_chunk.squeeze().T)
        inv_f_chunk = inv_f_chunk.reshape(shape(f_chunk))
        plotMapPY(inv_f_chunk, flipVertical=flipVertical)
        r.title(main="inv logit")

        #from IPython.Debugger import Pdb
        #Pdb(color_scheme='Linux').set_trace()

        # compare global variance to parameter draw
        observedVar = round(np.var(f_chunk[np.isnan(f_chunk) == False]), 10)
        theoreticalVar = ((hr.PyMCsamples.col('amp')[Rel])**2)
        varString = 'observedVar = :' + str(
            observedVar) + ';  amp^2 =: ' + str(theoreticalVar)
        print varString

        # plot histogram
        junk = r.hist(f_chunk[np.isnan(f_chunk) == False],
                      main=varString,
                      xlab="",
                      ylab="")
        junk = r.hist(pm.invlogit(f_chunk[np.isnan(f_chunk) == False]),
                      xlab="",
                      ylab="",
                      main="")

        # calculate and plot empirical covariance function in N-S direction
        gridIN = cp.deepcopy(f_chunk).squeeze()

        if conditioned is False: meanIN = 0
        if conditioned is True:
            meanIN = hr.PyMCsamples.col("m_const")[Rel] + (
                hr.PyMCsamples.col("t_coef")[Rel] * hr.t_axis[Month])
        cellWidth = 5 / 6378.137
        covDict = getGridCovarianceInY(gridIN, meanIN, cellWidth)

        # obtain theoretical covariance function from input MCMC paramater values: pymc method
        C = hr.group0.C[Rel]
        xplot = covDict['RadDist']
        yplot1 = C([[0, 0, 0]],
                   np.vstack(
                       (np.zeros(len(xplot)), xplot, np.zeros(len(xplot)))).T)
        yplot1 = np.asarray(yplot1).squeeze()

        # obtain theoretical covariance function from input MCMC paramater values: R method
        Scale = hr.PyMCsamples.col("scale")[Rel]
        amp = hr.PyMCsamples.col("amp")[Rel]
        inc = hr.PyMCsamples.col("inc")[Rel]
        ecc = hr.PyMCsamples.col("ecc")[Rel]
        t_lim_corr = hr.PyMCsamples.col("t_lim_corr")[Rel]
        scale_t = hr.PyMCsamples.col("scale_t")[Rel]
        sin_frac = hr.PyMCsamples.col("sin_frac")[Rel]

        CfromR = temptestcovPY(xplot, np.zeros(len(xplot)),
                               np.zeros(len(xplot)), Scale, amp, inc, ecc,
                               t_lim_corr, scale_t, sin_frac, r_paramfile_path)
        yplot = CfromR[0, :]

        # plot

        ymax = max(np.max(covDict['E_cov']), np.max(xplot), np.max(yplot))
        ymin = min(np.min(covDict['E_cov']), np.min(xplot), np.min(yplot))

        r.plot(covDict['RadDist'],
               covDict['E_cov'],
               xlab="radians",
               ylab="C",
               main=str(paramfileINDEX),
               ylim=(ymin, ymax))
        r.lines(xplot, yplot1, col=2)
        r.lines(xplot, yplot, col=3)

    ###CHECK TEMPORAL COVARIANCE

    if TIME is True:

        # if start and months are None, or if they are non-valid, rest to maximum temporal extents
        if ((TemporalEndMonth is None) |
            (TemporalEndMonth >= hr.realizations.shape[3])):
            TemporalEndMonth = hr.realizations.shape[3]
        if ((TemporalStartMonth is None) | (TemporalStartMonth >=
                                            (hr.realizations.shape[3] - 1))):
            TemporalStartMonth = 0

        # define basic parameters
        slices = [
            slice(None, None, None),
            slice(None, None, None),
            slice(TemporalStartMonth, TemporalEndMonth, None)
        ]

        slices = tuple(slices)
        n_realizations = 1
        n_rows = len(hr.lat_axis)
        n_cols = len(hr.lon_axis)
        N_facs = int(1e5)

        # Pull out parasite rate chunk (i.e. import n months of block)
        slices = tuple(slices)
        tot_slice = (slice(Rel, Rel + 1, None), ) + slices

        n_months = tot_slice[3].stop - tot_slice[3].start
        f_chunk = np.zeros(1 * n_cols * n_rows * n_months).reshape(
            1, n_rows, n_cols, n_months)
        subsetmonth = 0

        for mm in xrange(tot_slice[3].start, tot_slice[3].stop):
            f_chunk[:, :, :,
                    subsetmonth] = hr.realizations[tot_slice[0], tot_slice[1],
                                                   tot_slice[2], mm]
            subsetmonth = subsetmonth + 1
        #f_chunk = f_chunk[::-1,:,::-1,:].T[:,:,:,0]
        f_chunk = f_chunk.squeeze()
        f_chunk[f_chunk == -9999] = nan

        # calculate and plot empirical temporal covariance
        gridIN = cp.deepcopy(f_chunk).squeeze()

        if conditioned is False: meanIN = 0
        if conditioned is True:
            meanIN = hr.PyMCsamples.col("m_const")[Rel] + (
                hr.PyMCsamples.col("t_coef")[Rel] *
                hr.t_axis[TemporalStartMonth:TemporalEndMonth + 1:1])

        covDict = getGridCovarianceInT(gridIN, meanIN)

        # obtain theoretical covariance function from input MCMC paramater values: pymc method
        C = hr.group0.C[Rel]
        xplot = covDict['yearDist']
        yplot = C([[0, 0, 0]],
                  np.vstack(
                      (np.zeros(len(xplot)), np.zeros(len(xplot)), xplot)).T)
        yplot = np.asarray(yplot).squeeze()

        # obtain theoretical covariance function from input MCMC paramater values: R method
        Scale = hr.PyMCsamples.col("scale")[Rel]
        amp = hr.PyMCsamples.col("amp")[Rel]
        inc = hr.PyMCsamples.col("inc")[Rel]
        ecc = hr.PyMCsamples.col("ecc")[Rel]
        t_lim_corr = hr.PyMCsamples.col("t_lim_corr")[Rel]
        scale_t = hr.PyMCsamples.col("scale_t")[Rel]
        sin_frac = hr.PyMCsamples.col("sin_frac")[Rel]

        CfromR = temptestcovPY(np.zeros(len(xplot)), np.zeros(len(xplot)),
                               xplot, Scale, amp, inc, ecc, t_lim_corr,
                               scale_t, sin_frac, r_paramfile_path)
        yplot2 = CfromR[0, :]

        # plot

        ymax = max(np.max(covDict['E_cov']), np.max(yplot), np.max(yplot2))
        ymin = min(np.min(covDict['E_cov']), np.min(yplot), np.min(yplot2), 0)

        r.plot(covDict['yearDist'],
               covDict['E_cov'],
               xlab="lag (years)",
               ylab="C",
               main=str(paramfileINDEX),
               ylim=(ymin, ymax))
        r.lines(xplot, yplot, col=2)
        r.lines(xplot, yplot2, col=3)
Example #53
0
 def f(sp_sub, x):
     p = pm.invlogit(sp_sub(x))
     return p**2
Example #54
0
def theta(a=alpha, b=beta):
    """theta = logit^{-1}(a+b)"""
    return pymc.invlogit(a + b * x)
Example #55
0
 def f(sp_sub, x, a, b):
     p = pm.invlogit(sp_sub(x))
     h = pm.rbeta(a, b, size=len(p))
     return g6pd.p_fem_def(p, h)
Example #56
0
def y(logit_p=logit_p, value=df[11]):
    return pm.bernoulli_like(df[11], pm.invlogit(logit_p))
Example #57
0
 def f(sp_sub, n=n):
     return pm.rbinomial(n=n, p=pm.invlogit(sp_sub))
Example #58
0
 def C_0(logit_C_0=logit_C_0):
     return mc.invlogit(logit_C_0)
Example #59
0
                  default='50',
                  help='thinning ratio of MCMC process')
parser.add_option('-v',
                  '--verbose',
                  default='0',
                  help='level of verbosity (0 = none, 1 = some, etc...)')

(options, args) = parser.parse_args()

age_len = dismod3.MAX_AGE
ages = np.arange(age_len, dtype='float')

print 'defining model transition parameters'

# incidence rate
i = .012 * mc.invlogit((ages - 44) / 3)

# remission rate
r = 0. * ages

# case-fatality rate
f = .085 * (ages / 100)**2.5

# all-cause mortality-rate
m = np.array([
    0.03266595, 0.01114646, 0.00450302, 0.00226896, 0.00143311, 0.00109108,
    0.00094584, 0.00087981, 0.00083913, 0.0008073, 0.00078515, 0.00077967,
    0.00079993, 0.00085375, 0.00094349, 0.00106717, 0.00121825, 0.00138438,
    0.00154968, 0.00170171, 0.0018332, 0.00194182, 0.00202949, 0.00210058,
    0.00215954, 0.00221083, 0.00225905, 0.00230878, 0.00236425, 0.00242902,
    0.00250614, 0.00259834, 0.00270792, 0.00283638, 0.00298377, 0.00314906,