def logit_normal_draw(cf_mean, std, N, J): std = pl.array(std) if mc.__version__ == '2.0rc2': # version on Omak X = [mc.invlogit(mc.rnormal(mu=cf_mean, tau=std**-2)) for n in range(N)] Y = pl.array(X) else: X = mc.rnormal(mu=cf_mean, tau=std**-2, size=(N,J)) Y = mc.invlogit(X) return Y
def logit_normal_draw(cf_mean, std, N, J): std = pl.array(std) if mc.__version__ == '2.0rc2': # version on Omak X = [ mc.invlogit(mc.rnormal(mu=cf_mean, tau=std**-2)) for n in range(N) ] Y = pl.array(X) else: X = mc.rnormal(mu=cf_mean, tau=std**-2, size=(N, J)) Y = mc.invlogit(X) return Y
def ages_and_data(N_exam, f_samp, correction_factor_array, age_lims): """Called by pred_samps. Simulates ages of survey participants and data given f.""" N_samp = len(f_samp) N_age_samps = correction_factor_array.shape[1] # Get samples for the age distribution at the observation points. age_distribution = [] for i in xrange(N_samp): l = age_lims[i] age_distribution.append(S_trace[np.random.randint(S_trace.shape[0]), 0, l[0]:l[1] + 1]) age_distribution[-1] /= np.sum(age_distribution[-1]) # Draw age for each individual, draw an age-correction profile for each location, # compute probability of positive for each individual, see how many individuals are # positive. A = [] pos = [] for s in xrange(N_samp): A.append( np.array(pm.rcategorical(age_distribution[s], size=N_exam[s]), dtype=int) + age_lims[s][0]) P_samp = pm.invlogit(f_samp[s].ravel( )) * correction_factor_array[:, np.random.randint(N_age_samps)][A[-1]] pos.append(pm.rbernoulli(P_samp)) return A, pos, age_distribution
def generate_synthetic_data(truth, key, d): """ create simulated data""" a0 = d['age_start'] a1 = d['age_end'] age_weights = d['age_weights'] d.update(condition='type_2_diabetes', year_start=y, year_end=y) p0 = dismod3.utils.rate_for_range(truth[key], range(a0, a1 + 1), np.ones(a1 + 1 - a0)/(a1+1-a0)) p0 = dismod3.utils.trim(p0, 1.e-6, 1. - 1.e-6) # TODO: make beta dispersion study level (instead of datum level) # p1 = mc.rbeta(p0 * dispersion, (1 - p0) * dispersion) p1 = p0 # TODO: add additional covariates if key.find('prevalence') != -1: if random.random() < .1: d['self-reported'] = True p1 = mc.invlogit(mc.logit(p1) - .2) else: d['self-reported'] = False #p2 = mc.rbinomial(n, p1) / n p2 = float(p1) d['value'] = p2 d['standard_error'] = .0001 return d
def simdata_postproc(sp_sub, survey_plan): """ This function should take a value for the Gaussian random field in the submodel sp_sub, evaluated at the survey plan locations, and return a simulated dataset. """ p = pm.invlogit(sp_sub) n = survey_plan.n return pm.rbinomial(n, p)
def sim_data(N, true_cf=[[.3, .6, .1], [.3, .5, .2]], true_std=[[.2, .05, .05], [.3, 0.1, 0.1]], sum_to_one=True): """ Create an NxTxJ matrix of simulated data (T is determined by the length of true_cf, J by the length of the elements of true_cf). true_cf - a list of lists of true cause fractions (each must sum to one) true_std - a list of lists of the standard deviations corresponding to the true csmf's for each time point. Can either be a list of length J inside a list of length 1 (in this case, the same standard deviation is used for all time points) or can be T lists of length J (in this case, the a separate standard deviation is specified and used for each time point). """ if sum_to_one == True: assert pl.allclose(pl.sum(true_cf, 1), 1), 'The sum of elements of true_cf must equal 1' T = len(true_cf) J = len(true_cf[0]) ## if only one std provided, duplicate for all time points if len(true_std) == 1 and len(true_cf) > 1: true_std = [true_std[0] for i in range(len(true_cf))] ## transform the mean and std to logit space transformed_std = [] for t in range(T): pi_i = pl.array(true_cf[t]) sigma_pi_i = pl.array(true_std[t]) transformed_std.append( ((1 / (pi_i * (pi_i - 1)))**2 * sigma_pi_i**2)**0.5) ## find minimum standard deviation (by cause across time) and draw from this min = pl.array(transformed_std).min(0) common_perturbation = [ pl.ones([T, J]) * mc.rnormal(mu=0, tau=min**-2) for n in range(N) ] ## draw from remaining variation tau = pl.array(transformed_std)**2 - min**2 tau[tau == 0] = 0.000001 additional_perturbation = [ [mc.rnormal(mu=0, tau=tau[t]**-1) for t in range(T)] for n in range(N) ] result = pl.zeros([N, T, J]) for n in range(N): result[n, :, :] = [ mc.invlogit( mc.logit(true_cf[t]) + common_perturbation[n][t] + additional_perturbation[n][t]) for t in range(T) ] return result
def known_age_corr_likelihoods_f(pos, A, fac_array, f_mesh, nug, type=None): """ Computes spline representations over P_mesh for the likelihood of N_pos | N_exam, A """ # TODO: Optimize large-N case using CLT of some kind. # Allocate work and output arrays. N_recs = len(A) likelihoods = empty((N_recs, len(f_mesh))) likes_now = empty((fac_array.shape[1], len(f_mesh)), dtype=float128) splreps = [] p1 = invlogit(f_mesh) # For each record for i in xrange(N_recs): posi = pos[i] Ai = A[i] spi = np.sum(posi) negi = 1. - posi if type is None: if len(Ai) < 100: fn = outer_small else: fn = outer_large elif type == 's': fn = outer_small else: fn = outer_large likelihoods[i, :] = fn(p1, fac_array, Ai, spi, posi, negi, likes_now) # Clean out occasional infinities on the edges. good_indices = where(1 - isinf(likelihoods[i, :]))[0] # Compute spline representations. this_splrep = interp.splrep(x=f_mesh[good_indices], y=likelihoods[i, good_indices].squeeze()) def this_fun(x, sp=this_splrep, Pml=f_mesh[good_indices].min(), Pmh=f_mesh[good_indices].max()): out = np.atleast_1d(interp.splev(x, sp)) if np.any(x < Pml) or np.any(x > Pmh): out[np.where(x < Pml)] = -np.Inf out[np.where(x > Pmh)] = -np.Inf return out.reshape(np.shape(x)) splreps.append(this_fun) return splreps
def p_wells(base_fx=base_fx, batch_fx=batch_fx, plate_fx=plate_fx, batchrow_fx=batchrow_fx, batchcol_fx=batchcol_fx, treatment_fx=treatment_fx): # use this ordering to make everything turn into an ArrayContainer return invlogit(treatment_fx[treatment_idxs] + base_fx + batch_fx[batch_idxs] + plate_fx[plate_idxs] + batchrow_fx[batchrow_idxs] + batchcol_fx[batchcol_idxs])
def PR_samps(mesh, Ms, Cs, Vs, ind, facs): """ Converts a mean function, covariance function, nugget and array of correction factors to a sample for the average of parasite rate over a given spatiotemporal mesh. """ nm = mesh.shape[0] samps = np.empty((len(ind), nm)) for i in ind: C = Cs[i](mesh, mesh) C[::nm+1] += Vs[i] samps[i,:] = pm.invlogit(pm.mv_normal_cov(Ms[i](mesh), C).ravel()) * facs[A[i]] return np.mean(samps,axis=1)
def known_age_corr_likelihoods_f(pos, A, fac_array, f_mesh, nug, type=None): """ Computes spline representations over P_mesh for the likelihood of N_pos | N_exam, A """ # TODO: Optimize large-N case using CLT of some kind. # Allocate work and output arrays. N_recs = len(A) likelihoods = empty((N_recs, len(f_mesh))) likes_now = empty((fac_array.shape[1], len(f_mesh)), dtype=float128) splreps = [] p1 = invlogit(f_mesh) # For each record for i in xrange(N_recs): posi = pos[i] Ai = A[i] spi = np.sum(posi) negi = 1.-posi if type is None: if len(Ai) < 100: fn = outer_small else: fn = outer_large elif type=='s': fn = outer_small else: fn = outer_large likelihoods[i,:] = fn(p1, fac_array, Ai, spi, posi, negi, likes_now) # Clean out occasional infinities on the edges. good_indices = where(1-isinf(likelihoods[i,:]))[0] # Compute spline representations. this_splrep = interp.splrep(x=f_mesh[good_indices], y=likelihoods[i,good_indices].squeeze()) def this_fun(x, sp=this_splrep, Pml=f_mesh[good_indices].min(), Pmh=f_mesh[good_indices].max()): out = np.atleast_1d(interp.splev(x, sp)) if np.any(x<Pml) or np.any(x>Pmh): out[np.where(x<Pml)] = -np.Inf out[np.where(x>Pmh)] = -np.Inf return out.reshape(np.shape(x)) splreps.append(this_fun) return splreps
def PR_samps(mesh, Ms, Cs, Vs, ind, facs): """ Converts a mean function, covariance function, nugget and array of correction factors to a sample for the average of parasite rate over a given spatiotemporal mesh. """ nm = mesh.shape[0] samps = np.empty((len(ind), nm)) for i in ind: C = Cs[i](mesh, mesh) C[::nm + 1] += Vs[i] samps[i, :] = pm.invlogit(pm.mv_normal_cov(Ms[i](mesh), C).ravel()) * facs[A[i]] return np.mean(samps, axis=1)
def sim_data(N, true_cf=[[.3, .6, .1], [.3, .5, .2]], true_std=[[.2, .05, .05], [.3, 0.1, 0.1]], sum_to_one=True): """ Create an NxTxJ matrix of simulated data (T is determined by the length of true_cf, J by the length of the elements of true_cf). true_cf - a list of lists of true cause fractions (each must sum to one) true_std - a list of lists of the standard deviations corresponding to the true csmf's for each time point. Can either be a list of length J inside a list of length 1 (in this case, the same standard deviation is used for all time points) or can be T lists of length J (in this case, the a separate standard deviation is specified and used for each time point). """ if sum_to_one == True: assert pl.allclose(pl.sum(true_cf, 1), 1), 'The sum of elements of true_cf must equal 1' T = len(true_cf) J = len(true_cf[0]) ## if only one std provided, duplicate for all time points if len(true_std)==1 and len(true_cf)>1: true_std = [true_std[0] for i in range(len(true_cf))] ## transform the mean and std to logit space transformed_std = [] for t in range(T): pi_i = pl.array(true_cf[t]) sigma_pi_i = pl.array(true_std[t]) transformed_std.append( ((1/(pi_i*(pi_i-1)))**2 * sigma_pi_i**2)**0.5 ) ## find minimum standard deviation (by cause across time) and draw from this min = pl.array(transformed_std).min(0) common_perturbation = [pl.ones([T,J])*mc.rnormal(mu=0, tau=min**-2) for n in range(N)] ## draw from remaining variation tau=pl.array(transformed_std)**2 - min**2 tau[tau==0] = 0.000001 additional_perturbation = [[mc.rnormal(mu=0, tau=tau[t]**-1) for t in range(T)] for n in range(N)] result = pl.zeros([N, T, J]) for n in range(N): result[n, :, :] = [mc.invlogit(mc.logit(true_cf[t]) + common_perturbation[n][t] + additional_perturbation[n][t]) for t in range(T)] return result
def mortality(self, key="all-cause_mortality", data=None): """ Calculate the all-cause mortality rate for the region and sex of disease_model, and return it in an array corresponding to age_mesh Parameters ---------- key : str, optional of the form 'all-cause_mortality+gbd_region+year+sex' data: list, optional the data list to extract all-cause mortality from """ if self.params.get("initial_value", {}).has_key(key): return self.get_initial_value(key) if not data: data = self.filter_data("all-cause_mortality data") if len(data) == 0: return NEARLY_ZERO * np.ones(len(self.get_estimate_age_mesh())) else: M, C = uninformative_prior_gp(c=-1.0, scale=300.0) age = [] val = [] V = [] for d in data: scale = self.extract_units(d) a0 = d.get("age_start", MISSING) a1 = d.get("age_end", MISSING) y = self.value_per_1(d) se = self.se_per_1(d) if se == MISSING: se = 0.01 if MISSING in [a0, a1, y]: continue age.append(0.5 * (a0 + a1)) val.append(y + 0.00001) V.append(se ** 2.0) if len(data) > 0: gp.observe(M, C, age, mc.logit(val), V) normal_approx_vals = mc.invlogit(M(self.get_estimate_age_mesh())) self.set_initial_value(key, normal_approx_vals) return self.get_initial_value(key)
def f_ifr_factory(df_ifr, logit_shift): """Create age-interpolating IFR function Parameters ---------- df_ifr : pd.DataFrame with columns for age_mid, lowest_ifr logit_shift : float, shift of value in logit-space Results ------- returns function that maps from age to IFR """ return scipy.interpolate.interp1d(df_ifr.age_mid.values, pm.invlogit(df_ifr.lowest_ifr + logit_shift), kind='linear', fill_value='extrapolate')
def normal_approx(asrf): """ This 'normal approximation' of the age-specific rate function is formed by using each rate to produce an estimate of the age-specific rate, and then saying that that logit of the true rate function is a gaussian process and these age-specific rates are observations of this gaussian process. This is less valid and less accurate than using mcmc or map on the vars produced by the model_rate_list method below, but maybe it will be faster. """ M,C = uninformative_prior_gp() # use prior to set rate near zero as requested for prior_str in asrf.fit.get('priors', '').split('\n'): prior = prior_str.split() if len(prior) > 0 and prior[0] == 'zero': age_start = int(prior[1]) age_end = int(prior[2]) gp.observe(M, C, range(age_start, age_end+1), [-10.], [0.]) for r in asrf.rates.all(): mesh, obs, V = logit_rate_from_range(r) # make sure that there is something to observe if mesh == []: continue # uncomment the following line to make more inferences than # are valid from the data #gp.observe(M, C, mesh, obs, V) # uncomment the following 2 lines to make less inferences than # possible: it may be better to waste information than have # false confidence ii = len(mesh)/2 gp.observe(M, C, [mesh[ii]], [obs[ii]], [V[ii]]) x = asrf.fit['out_age_mesh'] na_rate = mc.invlogit(M(x)) asrf.fit['normal_approx'] = list(na_rate) asrf.save() return M, C
def mu_age_p(logit_C0=logit_C0, i=rate['i']['mu_age'], r=rate['r']['mu_age'], f=rate['f']['mu_age']): # for acute conditions, it is silly to use ODE solver to # derive prevalence, and it can be approximated with a simple # transformation of incidence if r.min() > 5.99: return i / (r + m_all + f) C0 = mc.invlogit(logit_C0) x = np.hstack((i, r, f, 1-C0, C0)) y = fun.forward(0, x) susceptible = y[:N] condition = y[N:] p = condition / (susceptible + condition) p[np.isnan(p)] = 0. return p
def mu_age_p(logit_C0=logit_C0, i=rate["i"]["mu_age"], r=rate["r"]["mu_age"], f=rate["f"]["mu_age"]): # for acute conditions, it is silly to use ODE solver to # derive prevalence, and it can be approximated with a simple # transformation of incidence if r.min() > 5.99: return i / (r + m_all + f) C0 = mc.invlogit(logit_C0) x = pl.hstack((i, r, f, 1 - C0, C0)) y = fun.forward(0, x) susceptible = y[:N] condition = y[N:] p = condition / (susceptible + condition) p[pl.isnan(p)] = 0.0 return p
def reduce_realizations(filename, reduce_fns, slices, a_lo, a_hi, n_per): """ Generates n_per * len(filename.root.realizations) realizations, on the space-time slice defined by slice (a tuple of three slices) and reduces them according to the function reduce. Reduce_fns should be a list of Python functions of the form reduce(this_PR_chunk, product_sofar=None) and incorporate this_realization into product_sofar in the desired way. It should be robust to the product_sofar=None case, of course. a_lo and a_hi are the limits of the age range. """ slices = tuple(slices) hf = tb.openFile(filename) hr = hf.root n_realizations = len(hr.realizations) products = dict(zip(reduce_fns, [None] * len(reduce_fns))) N_facs = int(1e5) # Get nugget variance and age-correction factors V = hr.PyMCsamples.col('V')[:] facs = mbgw.correction_factors.age_corr_factors_from_limits( a_lo, a_hi, N_facs) for i in xrange(n_realizations): # Pull out parasite rate chunk tot_slice = (slice(i, i + 1, 1), ) + slices f_chunk = hr.realizations[tot_slice].squeeze() for j in xrange(n_per): chunk = f_chunk + np.random.normal( loc=0, scale=np.sqrt(V[i]), size=f_chunk.shape) chunk = pm.invlogit(chunk) chunk *= facs[np.random.randint(N_facs, size=np.prod(chunk.shape))] chunk = chunk.reshape(f_chunk.shape) for f in reduce_fns: product_sofar = products[f] products[f] = f(chunk, product_sofar) return products
def mu_age_p(logit_C0=logit_C0, i=rate['i']['mu_age'], r=rate['r']['mu_age'], f=rate['f']['mu_age']): # for acute conditions, it is silly to use ODE solver to # derive prevalence, and it can be approximated with a simple # transformation of incidence if r.min() > 5.99: return i / (r + m_all + f) C0 = float(mc.invlogit(logit_C0)) susceptible = np.zeros(len(ages)) condition = np.zeros(len(ages)) dismod_mr.model.ode.ode_function(susceptible, condition, num_step, ages, m_all, i, r, f, 1 - C0, C0) p = condition / (susceptible + condition) p[np.isnan(p)] = 0. return p
def reduce_realizations(filename, reduce_fns, slices, a_lo, a_hi, n_per): """ Generates n_per * len(filename.root.realizations) realizations, on the space-time slice defined by slice (a tuple of three slices) and reduces them according to the function reduce. Reduce_fns should be a list of Python functions of the form reduce(this_PR_chunk, product_sofar=None) and incorporate this_realization into product_sofar in the desired way. It should be robust to the product_sofar=None case, of course. a_lo and a_hi are the limits of the age range. """ slices = tuple(slices) hf = tb.openFile(filename) hr = hf.root n_realizations = len(hr.realizations) products = dict(zip(reduce_fns, [None]*len(reduce_fns))) N_facs = int(1e5) # Get nugget variance and age-correction factors V = hr.PyMCsamples.col('V')[:] facs = mbgw.correction_factors.age_corr_factors_from_limits(a_lo, a_hi, N_facs) for i in xrange(n_realizations): # Pull out parasite rate chunk tot_slice = (slice(i,i+1,1),) + slices f_chunk = hr.realizations[tot_slice].squeeze() for j in xrange(n_per): chunk = f_chunk + np.random.normal(loc=0, scale=np.sqrt(V[i]), size=f_chunk.shape) chunk = pm.invlogit(chunk) chunk *= facs[np.random.randint(N_facs, size=np.prod(chunk.shape))] chunk = chunk.reshape(f_chunk.shape) for f in reduce_fns: product_sofar = products[f] products[f] = f(chunk, product_sofar) return products
def ages_and_data(N_exam, f_samp, correction_factor_array, age_lims): """Called by pred_samps. Simulates ages of survey participants and data given f.""" N_samp = len(f_samp) N_age_samps = correction_factor_array.shape[1] # Get samples for the age distribution at the observation points. age_distribution = [] for i in xrange(N_samp): l = age_lims[i] age_distribution.append(S_trace[np.random.randint(S_trace.shape[0]),0,l[0]:l[1]+1]) age_distribution[-1] /= np.sum(age_distribution[-1]) # Draw age for each individual, draw an age-correction profile for each location, # compute probability of positive for each individual, see how many individuals are # positive. A = [] pos = [] for s in xrange(N_samp): A.append(np.array(pm.rcategorical(age_distribution[s], size=N_exam[s]),dtype=int) + age_lims[s][0]) P_samp = pm.invlogit(f_samp[s].ravel())*correction_factor_array[:,np.random.randint(N_age_samps)][A[-1]] pos.append(pm.rbernoulli(P_samp)) return A, pos, age_distribution
def this_fun(x, p2=p2, p3=p3, negi=negi, posi=posi, Ai=Ai): p1 = np.log(invlogit(x)) return p1 * spi + p3 + cfh(p1, p2, negi)
def fit_emp_prior(dm, param_type): """ Generate an empirical prior distribution for a single disease parameter Parameters ---------- dm : dismod3.DiseaseModel The object containing all the data, (hyper)-priors, and additional information (like input and output age-mesh). param_type : str, one of 'incidence', 'prevalence', 'remission', 'excess-mortality' The disease parameter to work with Notes ----- The results of this fit are stored in the disease model's params hash for use when fitting multiple paramter types together Example ------- $ python2.5 gbd_fit.py 175 -t incidence -p 'zero 0 4, zero 41 100, smooth 25' # takes 7m to run """ data = [d for d in dm.data if clean(d['data_type']).find(param_type) != -1] # don't do anything if there is no data for this parameter type if len(data) == 0: return dm.fit_initial_estimate(param_type, data) dm.vars = setup(dm, param_type, data) # fit the model dm.map = mc.MAP(dm.vars) try: dm.map.fit(method='fmin_powell', iterlim=500, tol=.00001, verbose=1) except KeyboardInterrupt: print 'User halted optimization routine before optimal value found' # save the results in the param_hash dm.clear_empirical_prior() prior_vals = dict( alpha=list(dm.vars['region_coeffs'].value), beta=list(dm.vars['study_coeffs'].value), gamma=list(dm.vars['age_coeffs'].value), sigma=float(dm.vars['dispersion'].value)) dm.set_empirical_prior(param_type, prior_vals) dispersion = prior_vals['sigma'] for r in dismod3.gbd_regions: for y in dismod3.gbd_years: for s in dismod3.gbd_sexes: key = dismod3.gbd_key_for(param_type, r, y, s) logit_mu = predict_logit_rate(regional_covariates(key), **prior_vals) mu = mc.invlogit(logit_mu) dm.set_initial_value(key, mu) dm.set_mcmc('emp_prior_mean', key, mu) dm.set_mcmc('emp_prior_lower_ui', key, mc.invlogit(logit_mu - 1.96*dispersion)) dm.set_mcmc('emp_prior_upper_ui', key, mc.invlogit(logit_mu + 1.96*dispersion)) key = dismod3.gbd_key_for(param_type, 'world', 1997, 'total') logit_mu = predict_logit_rate(regional_covariates(key), **prior_vals) mu = mc.invlogit(logit_mu) dm.set_initial_value(key, mu) dm.set_mcmc('emp_prior_mean', key, mu) dm.set_mcmc('emp_prior_lower_ui', key, mc.invlogit(logit_mu - 1.96*dispersion)) dm.set_mcmc('emp_prior_upper_ui', key, mc.invlogit(logit_mu + 1.96*dispersion))
def theta(a=alpha, b=beta): """theta = logit^{−1}(a+b)""" return pymc.invlogit(a + b * x)
# <codecell> ### hyperpriors d = mc.Normal('d', 0., 1.e-6, value=0.) tau = mc.Gamma('tau', 1.e-3, 1.e-3, value=1.) sigma = mc.Lambda('sigma', lambda tau=tau: tau**-.5) delta_new = mc.Normal('delta_new', d, tau, value=0.) ### priors mu = [mc.Normal('mu_%d'%i, 0., 1.e-5, value=0.) for i in range(N)] delta = [mc.Normal('delta_%d'%i, d, tau, value=0.) for i in range(N)] p_c = mc.Lambda('p_c', lambda mu=mu: mc.invlogit(mu)) p_t = mc.Lambda('p_t', lambda mu=mu, delta=delta: mc.invlogit(array(mu)+delta)) ### likelihood r_c = mc.Binomial('r_c', n_c_obs, p_c, value=r_c_obs, observed=True) r_t = mc.Binomial('r_t', n_t_obs, p_t, value=r_t_obs, observed=True) # <markdowncell> # BUGS uses Gibbs steps automatically, so it only takes 10000 steps of MCMC after a 1000 step burn in for this model in their example. # # PyMC only uses Gibbs steps if you set them up yourself, and it uses Metropolis steps by default. So 10000 steps # go by more quickly, but the chain takes longer to converge to the stationary distribution. # <codecell>
Msurf = zeros(data.shape) E2surf = zeros(data.shape) # Get E[v] and E[v**2] over the entire posterior for i in xrange(n): # Reset all variables to their values at frame i of the trace DuffySampler.remember(0, i) # Evaluate the observed mean store_africa_val(DuffySampler.sp_sub_b.M_obs.value, dpred, africa) Msurf_b, Vsurf_b = pm.gp.point_eval(DuffySampler.sp_sub_b.M_obs.value, DuffySampler.sp_sub_b.C_obs.value, dpred) Msurf_s, Vsurf_s = pm.gp.point_eval(DuffySampler.sp_sub_s.M_obs.value, DuffySampler.sp_sub_s.C_obs.value, dpred) Vsurf_b += DuffySampler.V_b.value Vsurf_s += DuffySampler.V_s.value freq_b = pm.invlogit(Msurf_b + pm.rnormal(0, 1) * np.sqrt(Vsurf_b)) freq_s = pm.invlogit(Msurf_s + pm.rnormal(0, 1) * np.sqrt(Vsurf_s)) samp_i = (freq_b * freq_s + (1 - freq_b) * DuffySampler.p1.value) ** 2 Msurf[where_unmasked] += samp_i / float(n) # Evaluate the observed covariance with one argument E2surf[where_unmasked] += samp_i ** 2 / float(n) # Get the posterior variance and standard deviation Vsurf = E2surf - Msurf ** 2 SDsurf = sqrt(Vsurf) Msurf = ma.masked_array(Msurf, mask=covariate_raster.root.mask[:]) SDsurf = ma.masked_array(SDsurf, mask=covariate_raster.root.mask[:]) covariate_raster.close()
def fit_without_confrontation(id, region, sex, year): """ Fit posterior of specified region/sex/year for specified model without trying to integrate conflicting sources of data Parameters ---------- id : int The model id number for the job to fit region : str From dismod3.settings.gbd_regions, but clean()-ed sex : str, from dismod3.settings.gbd_sexes year : str, from dismod3.settings.gbd_years """ ## load model dm = dismod3.load_disease_model(id) ## separate out prevalence and relative-risk data prev_data = [ d for d in dm.data if dm.relevant_to(d, 'prevalence', region, year, sex) ] rr_data = [ d for d in dm.data if dm.relevant_to(d, 'relative-risk', region, year, sex) ] dm.data = [d for d in dm.data if not d in prev_data and not d in rr_data] ### setup the generic disease model (without prevalence data) import dismod3.gbd_disease_model as model keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex]) dm.calc_effective_sample_size(dm.data) dm.vars = model.setup(dm, keys) ## override the birth prevalence prior, based on the withheld prevalence data logit_C_0 = dm.vars[dismod3.utils.gbd_key_for('bins', region, year, sex)]['initial']['logit_C_0'] assert len(prev_data) == 1, 'should be a single prevalance datum' d = prev_data[0] mu_logit_C_0 = mc.logit(dm.value_per_1(d) + dismod3.settings.NEARLY_ZERO) lb, ub = dm.bounds_per_1(d) sigma_logit_C_0 = (mc.logit(ub + dismod3.settings.NEARLY_ZERO) - mc.logit(lb + dismod3.settings.NEARLY_ZERO)) / (2 * 1.96) print 'mu_C_0_pri:', mc.invlogit(mu_logit_C_0) print 'ui_C_0_pri:', lb, ub # override the excess-mortality, based on the relative-risk data mu_rr = 1.01 * np.ones(dismod3.settings.MAX_AGE) sigma_rr = .01 * np.ones(dismod3.settings.MAX_AGE) for d in rr_data: mu_rr[d['age_start']:(d['age_end'] + 1)] = dm.value_per_1(d) sigma_rr[d['age_start']:(d['age_end'] + 1)] = dm.se_per_1(d) print 'mu_rr:', mu_rr.round(2) #print 'sigma_rr:', sigma_rr.round(2) log_f = dm.vars[dismod3.utils.gbd_key_for('excess-mortality', region, year, sex)]['age_coeffs'] log_f_mesh = log_f.parents['gamma_mesh'] param_mesh = log_f.parents['param_mesh'] m_all = dm.vars[dismod3.utils.gbd_key_for('all-cause_mortality', region, year, sex)] mu_log_f = np.log((mu_rr - 1) * m_all) sigma_log_f = 1 / ((mu_rr - 1) * m_all) * sigma_rr * m_all print 'mu_log_f:', mu_log_f.round(2)[param_mesh] print 'sigma_log_f:', sigma_log_f.round(2)[param_mesh] ### fit the model using Monte Carlo simulation (shoehorned into the MCMC framework of PyMC) dm.mcmc = mc.MCMC(dm.vars) dm.mcmc.use_step_method(SampleFromNormal, logit_C_0, mu=mu_logit_C_0, tau=sigma_logit_C_0**-2) dm.mcmc.use_step_method(SampleFromNormal, log_f_mesh, mu=mu_log_f[param_mesh], tau=sigma_log_f[param_mesh]**-2) for stoch in dm.mcmc.stochastics: dm.mcmc.use_step_method(mc.NoStepper, stoch) dm.mcmc.sample(1000, verbose=dismod3.settings.ON_SGE) #print 'mu_C_0_post:', mc.invlogit(logit_C_0.stats()['mean']).round(2) #print 'ui_C_0_post:', mc.invlogit(logit_C_0.stats()['95% HPD interval']).round(2) #print 'mu_rr_post:', dm.vars[dismod3.utils.gbd_key_for('relative-risk', region, year, sex)]['rate_stoch'].stats()['mean'].round(2) print 'mu_log_f_mesh_post:', log_f_mesh.stats()['mean'].round(2) print 'mu_f_post:', dm.vars[dismod3.utils.gbd_key_for( 'excess-mortality', region, year, sex)]['rate_stoch'].stats()['mean'].round(2) for k in keys: t, r, y, s = dismod3.utils.type_region_year_sex_from_key(k) if t in [ 'incidence', 'prevalence', 'remission', 'excess-mortality', 'mortality', 'prevalence_x_excess-mortality' ]: dismod3.neg_binom_model.store_mcmc_fit(dm, k, dm.vars[k]) elif t in ['relative-risk', 'duration', 'incidence_x_duration']: dismod3.normal_model.store_mcmc_fit(dm, k, dm.vars[k]) from fit_posterior import save_country_level_posterior if str(year) == '2005': # also generate 2010 estimates save_country_level_posterior(dm, region, 2010, sex, ['prevalence', 'remission']) save_country_level_posterior( dm, region, year, sex, ['prevalence', 'remission'] ) #'prevalence incidence remission excess-mortality duration mortality relative-risk'.split()) # save results (do this last, because it removes things from the disease model that plotting function, etc, might need keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex]) dm.save('dm-%d-posterior-%s-%s-%s.json' % (dm.id, region, sex, year), keys_to_save=keys) return dm
def C_0(logit_C_0=logit_C_0): return mc.invlogit(logit_C_0)
def make_model(lon,lat,africa,n,datatype, genaa,genab,genbb,gen00,gena0,genb0,gena1,genb1,gen01,gen11, pheab,phea,pheb, phe0,prom0,promab, aphea,aphe0, bpheb,bphe0): logp_mesh = np.vstack((lon,lat)).T*np.pi/180. # Probability of mutation in the promoter region, given that the other thing is a. p1 = pm.Uniform('p1', 0, .04, value=.01) # Spatial submodels spatial_b_vars = make_gp_submodel('b',logp_mesh,africa,with_africa_covariate=True) spatial_s_vars = make_gp_submodel('0',logp_mesh) sp_sub_b = spatial_b_vars['sp_sub'] sp_sub_s = spatial_s_vars['sp_sub'] # Loop over data clusters, adding nugget and applying link function. tilde_fs_d = [] p0_d = [] tilde_fb_d = [] pb_d = [] V_b = spatial_b_vars['V'] V_s = spatial_s_vars['V'] data_d = [] for i in xrange(len(n)): this_fb =sp_sub_b.f_eval[i] this_fs = sp_sub_s.f_eval[i] # Nuggeted field in this cluster tilde_fb_d.append(pm.Normal('tilde_fb_%i'%i, this_fb, 1./V_b, value=np.random.normal(), trace=False)) tilde_fs_d.append(pm.Normal('tilde_fs_%i'%i, this_fs, 1./V_s, value=np.random.normal(), trace=False)) # The frequencies. p0 = pm.Lambda('pb_%i'%i,lambda lt=tilde_fb_d[-1]: pm.invlogit(lt),trace=False) pb = pm.Lambda('p0_%i'%i,lambda lt=tilde_fs_d[-1]: pm.invlogit(lt),trace=False) # The likelihoods if datatype[i]=='prom': cur_obs = [prom0[i], promab[i]] # Need to have either b and 0 or a and 1 on both chromosomes p = pm.Lambda('p_%i'%i, lambda pb=pb, p0=p0, p1=p1: (pb*p0+(1-pb)*p1)**2, trace=False) n = np.sum(cur_obs) data_d.append(pm.Binomial('data_%i'%i, p=p, n=n, value=prom0[i], observed=True)) elif datatype[i]=='aphe': cur_obs = [aphea[i], aphe0[i]] n = np.sum(cur_obs) # Need to have (a and not 1) on either chromosome, or not (not (a and not 1) on both chromosomes) p = pm.Lambda('p_%i'%i, lambda pb=pb, p0=p0, p1=p1: 1-(1-(1-pb)*(1-p1))**2, trace=False) data_d.append(pm.Binomial('data_%i'%i, p=p, n=n, value=aphea[i], observed=True)) elif datatype[i]=='bphe': cur_obs = [bpheb[i], bphe0[i]] n = np.sum(cur_obs) # Need to have (b and not 0) on either chromosome p = pm.Lambda('p_%i'%i, lambda pb=pb, p0=p0, p1=p1: 1-(1-pb*(1-p0))**2, trace=False) data_d.append(pm.Binomial('data_%i'%i, p=p, n=n, value=aphea[i], observed=True)) elif datatype[i]=='phe': cur_obs = np.array([pheab[i],phea[i],pheb[i],phe0[i]]) n = np.sum(cur_obs) p = pm.Lambda('p_%i'%i, lambda pb=pb, p0=p0, p1=p1: np.array([\ g_freqs['ab'](pb,p0,p1), g_freqs['a0'](pb,p0,p1)+g_freqs['a1'](pb,p0,p1)+g_freqs['aa'](pb,p0,p1), g_freqs['b0'](pb,p0,p1)+g_freqs['b1'](pb,p0,p1)+g_freqs['bb'](pb,p0,p1), g_freqs['00'](pb,p0,p1)+g_freqs['01'](pb,p0,p1)+g_freqs['11'](pb,p0,p1)]), trace=False) np.testing.assert_almost_equal(p.value.sum(), 1) data_d.append(pm.Multinomial('data_%i'%i, p=p, n=n, value=cur_obs, observed=True)) elif datatype[i]=='gen': cur_obs = np.array([genaa[i],genab[i],gena0[i],gena1[i],genbb[i],genb0[i],genb1[i],gen00[i],gen01[i],gen11[i]]) n = np.sum(cur_obs) p = pm.Lambda('p_%i'%i, lambda pb=pb, p0=p0, p1=p1, g_freqs=g_freqs: \ np.array([g_freqs[key](pb,p0,p1) for key in ['aa','ab','a0','a1','bb','b0','b1','00','01','11']]), trace=False) np.testing.assert_almost_equal(p.value.sum(), 1) data_d.append(pm.Multinomial('data_%i'%i, p=p, n=n, value=cur_obs, observed=True)) # The fields plus the nugget, in convenient vector form @pm.deterministic def tilde_fb(tilde_fb_d = tilde_fb_d): """Concatenated version of tilde_fb, for postprocessing & Gibbs sampling purposes""" return np.hstack(tilde_fb_d) @pm.deterministic def tilde_fs(tilde_fs_d = tilde_fs_d): """Concatenated version of tilde_fs, for postprocessing & Gibbs sampling purposes""" return np.hstack(tilde_fs_d) return locals()
def generate_disease_data(condition, cov): """ Generate csv files with gold-standard disease data, and somewhat good, somewhat dense disease data, as might be expected from a condition that is carefully studied in the literature """ age_len = dismod3.MAX_AGE ages = np.arange(age_len, dtype='float') # incidence rate i0 = .005 + .02 * mc.invlogit((ages - 44) / 3) #i0 = np.maximum(0., .001 * (-.125 + np.ones_like(ages) + (ages / age_len)**2.)) # remission rate #r = 0. * ages r = .1 * np.ones_like(ages) # excess-mortality rate #f_init = .085 * (ages / 100) ** 2.5 SMR = 3. * np.ones_like(ages) - ages / age_len # all-cause mortality-rate mort = dismod3.get_disease_model('all-cause_mortality') #age_intervals = [[a, a+9] for a in range(0, dismod3.MAX_AGE-4, 10)] + [[0, 100] for ii in range(1)] age_intervals = [[a, a] for a in range(0, dismod3.MAX_AGE, 1)] # TODO: take age structure from real data sparse_intervals = dict([[ region, random.sample(age_intervals, (ii**3 * len(age_intervals)) / len(countries_for)**3 / 1) ] for ii, region in enumerate(countries_for)]) dense_intervals = dict( [[region, random.sample(age_intervals, len(age_intervals) / 2)] for ii, region in enumerate(countries_for)]) gold_data = [] noisy_data = [] for ii, region in enumerate(sorted(countries_for)): if region == 'world': continue print region sys.stdout.flush() # introduce unexplained regional variation #i = i0 * (1 + float(ii) / 21) # or not i = i0 for year in [1990, 2005]: for sex in ['male', 'female']: param_type = 'all-cause_mortality' key = dismod3.gbd_key_for(param_type, region, year, sex) m_all_cause = mort.mortality(key, mort.data) # calculate excess-mortality rate from smr f = (SMR - 1.) * m_all_cause ## compartmental model (bins S, C, D, M) import scipy.linalg from dismod3 import NEARLY_ZERO from dismod3.utils import trim SCDM = np.zeros([4, age_len]) p = np.zeros(age_len) m = np.zeros(age_len) SCDM[0, 0] = 1. SCDM[1, 0] = 0. SCDM[2, 0] = 0. SCDM[3, 0] = 0. p[0] = SCDM[1, 0] / (SCDM[0, 0] + SCDM[1, 0] + NEARLY_ZERO) m[0] = trim(m_all_cause[0] - f[0] * p[0], NEARLY_ZERO, 1 - NEARLY_ZERO) for a in range(age_len - 1): A = [[-i[a] - m[a], r[a], 0., 0.], [i[a], -r[a] - m[a] - f[a], 0., 0.], [m[a], m[a], 0., 0.], [0., f[a], 0., 0.]] SCDM[:, a + 1] = np.dot(scipy.linalg.expm(A), SCDM[:, a]) p[a + 1] = SCDM[1, a + 1] / (SCDM[0, a + 1] + SCDM[1, a + 1] + NEARLY_ZERO) m[a + 1] = m_all_cause[a + 1] - f[a + 1] * p[a + 1] # duration = E[time in bin C] hazard = r + m + f pr_not_exit = np.exp(-hazard) X = np.empty(len(hazard)) X[-1] = 1 / hazard[-1] for ii in reversed(range(len(X) - 1)): X[ii] = (pr_not_exit[ii] * (X[ii + 1] + 1)) + (1 / hazard[ii] * (1 - pr_not_exit[ii]) - pr_not_exit[ii]) country = countries_for[region][0] params = dict(age_intervals=age_intervals, condition=condition, gbd_region=region, country=country, year=year, sex=sex, effective_sample_size=1000) params['age_intervals'] = [[0, 99]] generate_and_append_data(gold_data, 'prevalence data', p, **params) generate_and_append_data(gold_data, 'incidence data', i, **params) generate_and_append_data(gold_data, 'excess-mortality data', f, **params) generate_and_append_data(gold_data, 'remission data', r, **params) generate_and_append_data(gold_data, 'duration data', X, **params) # TODO: use this approach to age standardize all gold data, and then change it to get iX as a direct sum params['age_intervals'] = [[0, 99]] iX = i * X * (1 - p) * regional_population(key) generate_and_append_data(gold_data, 'incidence_x_duration', iX, **params) params['effective_sample_size'] = 1000 params['cov'] = 0. params['age_intervals'] = age_intervals generate_and_append_data(noisy_data, 'prevalence data', p, **params) generate_and_append_data(noisy_data, 'excess-mortality data', f, **params) generate_and_append_data(noisy_data, 'remission data', r, **params) generate_and_append_data(noisy_data, 'incidence data', i, **params) col_names = sorted(data_dict_for_csv(gold_data[0]).keys()) f_file = open(OUTPUT_PATH + '%s_gold.tsv' % condition, 'w') csv_f = csv.writer(f_file, dialect='excel-tab') csv_f.writerow(col_names) for d in gold_data: dd = data_dict_for_csv(d) csv_f.writerow([dd[c] for c in col_names]) f_file.close() f_name = OUTPUT_PATH + '%s_data.tsv' % condition f_file = open(f_name, 'w') csv_f = csv.writer(f_file, dialect='excel-tab') csv_f.writerow(col_names) for d in noisy_data: dd = data_dict_for_csv(d) csv_f.writerow([dd[c] for c in col_names]) f_file.close() # upload data file from dismod3.disease_json import dismod_server_login, twc, DISMOD_BASE_URL dismod_server_login() twc.go(DISMOD_BASE_URL + 'dismod/data/upload/') twc.formvalue(1, 'tab_separated_values', open(f_name).read()) # TODO: find or set the model number for this model, set the # expert priors and covariates, merge the covariate data into the # model, and add the "ground truth" to the disease json try: url = twc.submit() except Exception, e: print e
pl.plot(X, Y, 'ks', label='Observed', mec='w', mew=1) XX = sm.add_constant(X) X_pred = pl.arange(65) XX_pred = sm.add_constant(X_pred) model = sm.OLS(Y, XX) results = model.fit() Y_pred = model.predict(XX_pred) pl.plot(X_pred, Y_pred, 'k-', linewidth=2, label='Predicted by OLS') Y = mc.logit(df['Parameter Value'].__array__()) model = sm.OLS(Y, XX) results = model.fit() Y_pred = model.predict(XX_pred) pl.plot(X_pred, mc.invlogit(Y_pred), 'k--', linewidth=2, label='Predicted by logit-transformed OLS') pl.xlabel('Age (Years)') pl.ylabel('Seroprevalence (Per 1)') pl.legend(loc='lower right', fancybox=True, shadow=True) pl.axis([-5, 55, 0, 1.2]) pl.grid() pl.savefig('vzv_forest.pdf')
def assessRealizationCovariance(filename, Rel, Month, paramfileINDEX, TemporalStartMonth=None, TemporalEndMonth=None, conditioned=False, flipVertical="FALSE", SPACE=True, TIME=True): # deal with system arguments #filename = sys.argv[1] #Rel = int(sys.argv[2]) #Month = int(sys.argv[3]) #conditioned = sys.argv[4] #flipVertical = sys.argv[5] #paramfileINDEX = int(sys.argv[6]) #TemporalStartMonth = int(sys.argv[7]) #TemporalEndMonth = int(sys.argv[8]) #SPACE = sys.argv[9] #TIME = sys.argv[10] ## if filename is a string, assume its a path and import the hdf5 file (otherwise, assumption is we are pasing the 'hr' root of an hdf5 realisation file) if type(filename) is str: hf = tb.openFile(filename) hr = hf.root if type(filename) is not str: hr = filename # define path to R param file mbgw_root = __root__ = mbgw.__path__[0] r_paramfile_path = mbgw_root + '/joint_simulation/CONDSIMalgorithm/ParamFile_uncond_' + str( paramfileINDEX) + '.R' ###CHECK SPATIAL COVARIANCE AND BASIC FEATURE OF A SINGLE MONTH if SPACE is True: # define basic parameters slices = [ slice(None, None, None), slice(None, None, None), slice(Month, Month + 1, None) ] slices = tuple(slices) n_realizations = 1 n_rows = len(hr.lat_axis) n_cols = len(hr.lon_axis) N_facs = int(1e5) # Pull out parasite rate chunk (i.e. import n months of block) slices = tuple(slices) tot_slice = (slice(Rel, Rel + 1, None), ) + slices n_months = tot_slice[3].stop - tot_slice[3].start f_chunk = np.zeros(1 * n_cols * n_rows * n_months).reshape( 1, n_rows, n_cols, n_months) subsetmonth = 0 #print tot_slice #print f_chunk[:,:,:,subsetmonth] for mm in xrange(tot_slice[3].start, tot_slice[3].stop): f_chunk[:, :, :, subsetmonth] = hr.realizations[tot_slice[0], tot_slice[1], tot_slice[2], mm] subsetmonth = subsetmonth + 1 #f_chunk = f_chunk[::-1,:,::-1,:].T[:,:,:,0] f_chunk = f_chunk.squeeze() f_chunk[f_chunk == -9999] = nan inv_f_chunk = pm.invlogit(f_chunk.squeeze().T) inv_f_chunk = inv_f_chunk.reshape(shape(f_chunk)) #from IPython.Debugger import Pdb #Pdb(color_scheme='Linux').set_trace() # calculate empirical covariance function in N-S direction gridIN = cp.deepcopy(f_chunk).squeeze() if conditioned is False: meanIN = 0 if conditioned is True: meanIN = hr.PyMCsamples.col("m_const")[Rel] + ( hr.PyMCsamples.col("t_coef")[Rel] * hr.t_axis[Month]) cellWidth = 5 / 6378.137 covDict = getGridCovarianceInY(gridIN, meanIN, cellWidth) # obtain theoretical covariance function from input MCMC paramater values: pymc method C = hr.group0.C[Rel] xplot = covDict['RadDist'] yplot1 = C([[0, 0, 0]], np.vstack( (np.zeros(len(xplot)), xplot, np.zeros(len(xplot)))).T) yplot1 = np.asarray(yplot1).squeeze() # # obtain theoretical covariance function from input MCMC paramater values: R method # Scale=hr.PyMCsamples.col("scale")[Rel] # amp=hr.PyMCsamples.col("amp")[Rel] # inc=hr.PyMCsamples.col("inc")[Rel] # ecc=hr.PyMCsamples.col("ecc")[Rel] # t_lim_corr=hr.PyMCsamples.col("t_lim_corr")[Rel] # scale_t=hr.PyMCsamples.col("scale_t")[Rel] # sin_frac=hr.PyMCsamples.col("sin_frac")[Rel] # CfromR=temptestcovPY(xplot,np.zeros(len(xplot)),np.zeros(len(xplot)),Scale,amp,inc,ecc,t_lim_corr,scale_t,sin_frac,r_paramfile_path) # yplot = CfromR[0,:] # plot Slag_emp = covDict['RadDist'] Slag_mod = xplot Scov_emp = covDict['E_cov'] Scov_mod = yplot1 ###CHECK TEMPORAL COVARIANCE if TIME is True: # if start and months are None, or if they are non-valid, rest to maximum temporal extents if ((TemporalEndMonth is None) | (TemporalEndMonth >= hr.realizations.shape[3])): TemporalEndMonth = hr.realizations.shape[3] if ((TemporalStartMonth is None) | (TemporalStartMonth >= (hr.realizations.shape[3] - 1))): TemporalStartMonth = 0 # define basic parameters slices = [ slice(None, None, None), slice(None, None, None), slice(TemporalStartMonth, TemporalEndMonth, None) ] slices = tuple(slices) n_realizations = 1 n_rows = len(hr.lat_axis) n_cols = len(hr.lon_axis) N_facs = int(1e5) # Pull out parasite rate chunk (i.e. import n months of block) slices = tuple(slices) tot_slice = (slice(Rel, Rel + 1, None), ) + slices n_months = tot_slice[3].stop - tot_slice[3].start f_chunk = np.zeros(1 * n_cols * n_rows * n_months).reshape( 1, n_rows, n_cols, n_months) subsetmonth = 0 for mm in xrange(tot_slice[3].start, tot_slice[3].stop): f_chunk[:, :, :, subsetmonth] = hr.realizations[tot_slice[0], tot_slice[1], tot_slice[2], mm] subsetmonth = subsetmonth + 1 #f_chunk = f_chunk[::-1,:,::-1,:].T[:,:,:,0] f_chunk = f_chunk.squeeze() f_chunk[f_chunk == -9999] = nan # calculate and plot empirical temporal covariance gridIN = cp.deepcopy(f_chunk).squeeze() if conditioned is False: meanIN = 0 if conditioned is True: meanIN = hr.PyMCsamples.col("m_const")[Rel] + ( hr.PyMCsamples.col("t_coef")[Rel] * hr.t_axis[TemporalStartMonth:TemporalEndMonth + 1:1]) covDict = getGridCovarianceInT(gridIN, meanIN) # obtain theoretical covariance function from input MCMC paramater values: pymc method C = hr.group0.C[Rel] xplot = covDict['yearDist'] yplot = C([[0, 0, 0]], np.vstack( (np.zeros(len(xplot)), np.zeros(len(xplot)), xplot)).T) yplot = np.asarray(yplot).squeeze() # # obtain theoretical covariance function from input MCMC paramater values: R method # Scale=hr.PyMCsamples.col("scale")[Rel] # amp=hr.PyMCsamples.col("amp")[Rel] # inc=hr.PyMCsamples.col("inc")[Rel] # ecc=hr.PyMCsamples.col("ecc")[Rel] # t_lim_corr=hr.PyMCsamples.col("t_lim_corr")[Rel] # scale_t=hr.PyMCsamples.col("scale_t")[Rel] # sin_frac=hr.PyMCsamples.col("sin_frac")[Rel] # CfromR=temptestcovPY(np.zeros(len(xplot)),np.zeros(len(xplot)),xplot,Scale,amp,inc,ecc,t_lim_corr,scale_t,sin_frac,r_paramfile_path) # yplot2 = CfromR[0,:] # plot Tlag_emp = covDict['yearDist'] Tlag_mod = xplot Tcov_emp = covDict['E_cov'] Tcov_mod = yplot retDict = { 'Slag_emp': Slag_emp, 'Slag_mod': Slag_mod, 'Scov_emp': Scov_emp, 'Scov_mod': Scov_mod, 'Tlag_emp': Tlag_emp, 'Tlag_mod': Tlag_mod, 'Tcov_emp': Tcov_emp, 'Tcov_mod': Tcov_mod } return (retDict) retDict = { 'Slag_emp': Slag_emp, 'Slag_mod': Slag_mod, 'Scov_emp': Scov_emp, 'Scov_mod': Scov_mod } return (retDict)
def make_model(lon, lat, africa, n, datatype, genaa, genab, genbb, gen00, gena0, genb0, gena1, genb1, gen01, gen11, pheab, phea, pheb, phe0, prom0, promab, aphea, aphe0, bpheb, bphe0): logp_mesh = np.vstack((lon, lat)).T * np.pi / 180. # Probability of mutation in the promoter region, given that the other thing is a. p1 = pm.Uniform('p1', 0, .04, value=.01) # Spatial submodels spatial_b_vars = make_gp_submodel('b', logp_mesh, africa, with_africa_covariate=True) spatial_s_vars = make_gp_submodel('0', logp_mesh) sp_sub_b = spatial_b_vars['sp_sub'] sp_sub_s = spatial_s_vars['sp_sub'] # Loop over data clusters, adding nugget and applying link function. tilde_fs_d = [] p0_d = [] tilde_fb_d = [] pb_d = [] V_b = spatial_b_vars['V'] V_s = spatial_s_vars['V'] data_d = [] for i in xrange(len(n)): this_fb = sp_sub_b.f_eval[i] this_fs = sp_sub_s.f_eval[i] # Nuggeted field in this cluster tilde_fb_d.append( pm.Normal('tilde_fb_%i' % i, this_fb, 1. / V_b, value=np.random.normal(), trace=False)) tilde_fs_d.append( pm.Normal('tilde_fs_%i' % i, this_fs, 1. / V_s, value=np.random.normal(), trace=False)) # The frequencies. p0 = pm.Lambda('pb_%i' % i, lambda lt=tilde_fb_d[-1]: pm.invlogit(lt), trace=False) pb = pm.Lambda('p0_%i' % i, lambda lt=tilde_fs_d[-1]: pm.invlogit(lt), trace=False) # The likelihoods if datatype[i] == 'prom': cur_obs = [prom0[i], promab[i]] # Need to have either b and 0 or a and 1 on both chromosomes p = pm.Lambda('p_%i' % i, lambda pb=pb, p0=p0, p1=p1: (pb * p0 + (1 - pb) * p1)**2, trace=False) n = np.sum(cur_obs) data_d.append( pm.Binomial('data_%i' % i, p=p, n=n, value=prom0[i], observed=True)) elif datatype[i] == 'aphe': cur_obs = [aphea[i], aphe0[i]] n = np.sum(cur_obs) # Need to have (a and not 1) on either chromosome, or not (not (a and not 1) on both chromosomes) p = pm.Lambda('p_%i' % i, lambda pb=pb, p0=p0, p1=p1: 1 - (1 - (1 - pb) * (1 - p1))**2, trace=False) data_d.append( pm.Binomial('data_%i' % i, p=p, n=n, value=aphea[i], observed=True)) elif datatype[i] == 'bphe': cur_obs = [bpheb[i], bphe0[i]] n = np.sum(cur_obs) # Need to have (b and not 0) on either chromosome p = pm.Lambda('p_%i' % i, lambda pb=pb, p0=p0, p1=p1: 1 - (1 - pb * (1 - p0))**2, trace=False) data_d.append( pm.Binomial('data_%i' % i, p=p, n=n, value=aphea[i], observed=True)) elif datatype[i] == 'phe': cur_obs = np.array([pheab[i], phea[i], pheb[i], phe0[i]]) n = np.sum(cur_obs) p = pm.Lambda('p_%i'%i, lambda pb=pb, p0=p0, p1=p1: np.array([\ g_freqs['ab'](pb,p0,p1), g_freqs['a0'](pb,p0,p1)+g_freqs['a1'](pb,p0,p1)+g_freqs['aa'](pb,p0,p1), g_freqs['b0'](pb,p0,p1)+g_freqs['b1'](pb,p0,p1)+g_freqs['bb'](pb,p0,p1), g_freqs['00'](pb,p0,p1)+g_freqs['01'](pb,p0,p1)+g_freqs['11'](pb,p0,p1)]), trace=False) np.testing.assert_almost_equal(p.value.sum(), 1) data_d.append( pm.Multinomial('data_%i' % i, p=p, n=n, value=cur_obs, observed=True)) elif datatype[i] == 'gen': cur_obs = np.array([ genaa[i], genab[i], gena0[i], gena1[i], genbb[i], genb0[i], genb1[i], gen00[i], gen01[i], gen11[i] ]) n = np.sum(cur_obs) p = pm.Lambda('p_%i'%i, lambda pb=pb, p0=p0, p1=p1, g_freqs=g_freqs: \ np.array([g_freqs[key](pb,p0,p1) for key in ['aa','ab','a0','a1','bb','b0','b1','00','01','11']]), trace=False) np.testing.assert_almost_equal(p.value.sum(), 1) data_d.append( pm.Multinomial('data_%i' % i, p=p, n=n, value=cur_obs, observed=True)) # The fields plus the nugget, in convenient vector form @pm.deterministic def tilde_fb(tilde_fb_d=tilde_fb_d): """Concatenated version of tilde_fb, for postprocessing & Gibbs sampling purposes""" return np.hstack(tilde_fb_d) @pm.deterministic def tilde_fs(tilde_fs_d=tilde_fs_d): """Concatenated version of tilde_fs, for postprocessing & Gibbs sampling purposes""" return np.hstack(tilde_fs_d) return locals()
if sex == 'male': offset += .5 if year == 2005: offset += .5 if region == 'Asia, South': offset -= .1 if region == 'Asia, East': offset -= .2 if region == 'Europe, Central': offset += .3 gdp = Covariate.objects.get(iso3=country, year=year).value offset += .3 * gdp # incidence rate i = .012 * mc.invlogit((ages - 44) / 3) * (1 + offset) # remission rate r = 0. * ages # excess-mortality rate f = .085 * (ages / 100)**2.5 # all-cause mortality-rate mort_data = [ d for d in mort.data if d['data_type'] == 'all-cause mortality data' and d['region'] == region and d['sex'] == sex and d['year_start'] == year ] m_all_cause = mort.mortality('all_cause', mort_data)
def rate_stoch(logit_rate=logit_rate): return interpolate(param_mesh, mc.invlogit(logit_rate), est_mesh)
safe_name = name.replace('.','_') P_prime_now = pm.Beta('P_prime_%s'%safe_name,3.,3.) p_vec_now = pm.MvNormalChol('p_vec_%s'%safe_name, p_mean, cholfac) p_vec_list.append(p_vec_now) P_prime_list.append(P_prime_now) b = pm.lam_dtrm('b', lambda p_vec = p_vec_now: 1./exp(p_vec[0])) if methods[name] == 'Microscopy': # alpha, s and c depend on p_vec[1:4] c = pm.lam_dtrm('c', lambda p_vec = p_vec_now: 1./exp(p_vec[1])) alph = pm.lam_dtrm('alph', lambda p_vec = p_vec_now: exp(p_vec[2])) s = pm.lam_dtrm('s', lambda p_vec = p_vec_now: pm.invlogit(p_vec[3])) elif methods[name] == 'RDT': # alpha, s and c depend on p_vec[4:7] c = pm.lam_dtrm('c', lambda p_vec = p_vec_now: 1./exp(p_vec[4])) alph = pm.lam_dtrm('alph', lambda p_vec = p_vec_now: exp(p_vec[5])) s = pm.lam_dtrm('s', lambda p_vec = p_vec_now: pm.invlogit(p_vec[6])) @pm.dtrm def this_F(c=c, alph=alph, a=age_bin_ctrs[name], s=s): """ The function F, which gives detection probability. """ out = empty(len(a)) out[where(a<alph)] = 1.
def theta(a=alpha, b=beta, d=dose): """theta = inv_logit(a+b)""" return pm.invlogit(a+b*d)
subsetmonth = 0 for mm in xrange(n_months): chunk[:, :, :, subsetmonth] = hr.realizations[tot_slice[0], tot_slice[1], tot_slice[2], mm] subsetmonth = subsetmonth + 1 chunk = chunk.squeeze() holdshape = chunk.shape chunk = chunk.ravel() # optionally, add nugget, inverse logit, and age correct if ADDNUGGET is True: chunk = chunk + np.random.normal( loc=0, scale=np.sqrt(V[ii]), size=np.prod(chunk.shape)) if BACKTRANSFORM is True: chunk = pm.invlogit(chunk) if AGECORRECT is True: chunk *= facs[np.random.randint(N_facs, size=np.prod(chunk.shape))] chunk = chunk.reshape(holdshape).squeeze() # aggregate through time chunkTMEAN = np.atleast_2d(np.mean(chunk, -1)) # add this realisation to output block annualmean_block[:, :, ii] = chunkTMEAN # get posterior mean and std of predicted maps annualmean_mean = np.atleast_2d(np.mean(annualmean_block, -1)) annualmean_std = np.atleast_2d(np.std(annualmean_block, -1))
def this_fun(x, p2=p2, p3=p3,negi=negi, posi=posi, Ai=Ai): p1 = np.log(invlogit(x)) return p1*spi + p3 + cfh(p1,p2,negi)
def f(sp_sub, a, b, n=n): p = pm.invlogit(sp_sub) h = pm.rbeta(a, b, size=len(sp_sub)) p_def = g6pd.p_fem_def(p, h) return pm.rbinomial(n=n, p=p)
# Pull out relevent section of hdf5 f block tot_slice = (slice(ii,ii+1,None),slice(None,None,None),slice(None,None,None),slice(startMonth,endMonth,None)) chunk = np.zeros(1*n_cols*n_rows*n_months).reshape(1,n_rows,n_cols,n_months) subsetmonth=0 for mm in xrange(n_months): chunk[:,:,:,subsetmonth] = hr.realizations[tot_slice[0],tot_slice[1],tot_slice[2],mm] subsetmonth=subsetmonth+1 chunk = chunk.squeeze() holdshape = chunk.shape chunk = chunk.ravel() # optionally, add nugget, inverse logit, and age correct if ADDNUGGET is True: chunk = chunk + np.random.normal(loc=0, scale=np.sqrt(V[ii]), size=np.prod(chunk.shape)) if BACKTRANSFORM is True: chunk = pm.invlogit(chunk) if AGECORRECT is True: chunk *= facs[np.random.randint(N_facs, size=np.prod(chunk.shape))] chunk = chunk.reshape(holdshape).squeeze() # aggregate through time chunkTMEAN = np.atleast_2d(np.mean(chunk,-1)) # add this realisation to output block annualmean_block[:,:,ii]=chunkTMEAN # get posterior mean and std of predicted maps annualmean_mean = np.atleast_2d(np.mean(annualmean_block,-1)) annualmean_std = np.atleast_2d(np.std(annualmean_block,-1)) print 'surface mean of annual mean is '+str(np.mean(annualmean_mean))
def f(sp_sub, x, a, b): p = pm.invlogit(sp_sub(x)) h = pm.rbeta(a,b,size=len(p)) return g6pd.p_fem_def(p,h)
def generate_disease_data(condition, cov): """ Generate csv files with gold-standard disease data, and somewhat good, somewhat dense disease data, as might be expected from a condition that is carefully studied in the literature """ age_len = dismod3.MAX_AGE ages = np.arange(age_len, dtype='float') # incidence rate i0 = .005 + .02 * mc.invlogit((ages - 44) / 3) #i0 = np.maximum(0., .001 * (-.125 + np.ones_like(ages) + (ages / age_len)**2.)) # remission rate #r = 0. * ages r = .1 * np.ones_like(ages) # excess-mortality rate #f_init = .085 * (ages / 100) ** 2.5 SMR = 3. * np.ones_like(ages) - ages / age_len # all-cause mortality-rate mort = dismod3.get_disease_model('all-cause_mortality') #age_intervals = [[a, a+9] for a in range(0, dismod3.MAX_AGE-4, 10)] + [[0, 100] for ii in range(1)] age_intervals = [[a, a] for a in range(0, dismod3.MAX_AGE, 1)] # TODO: take age structure from real data sparse_intervals = dict([[region, random.sample(age_intervals, (ii**3 * len(age_intervals)) / len(countries_for)**3 / 1)] for ii, region in enumerate(countries_for)]) dense_intervals = dict([[region, random.sample(age_intervals, len(age_intervals)/2)] for ii, region in enumerate(countries_for)]) gold_data = [] noisy_data = [] for ii, region in enumerate(sorted(countries_for)): if region == 'world': continue print region sys.stdout.flush() # introduce unexplained regional variation #i = i0 * (1 + float(ii) / 21) # or not i = i0 for year in [1990, 2005]: for sex in ['male', 'female']: param_type = 'all-cause_mortality' key = dismod3.gbd_key_for(param_type, region, year, sex) m_all_cause = mort.mortality(key, mort.data) # calculate excess-mortality rate from smr f = (SMR - 1.) * m_all_cause ## compartmental model (bins S, C, D, M) import scipy.linalg from dismod3 import NEARLY_ZERO from dismod3.utils import trim SCDM = np.zeros([4, age_len]) p = np.zeros(age_len) m = np.zeros(age_len) SCDM[0,0] = 1. SCDM[1,0] = 0. SCDM[2,0] = 0. SCDM[3,0] = 0. p[0] = SCDM[1,0] / (SCDM[0,0] + SCDM[1,0] + NEARLY_ZERO) m[0] = trim(m_all_cause[0] - f[0] * p[0], NEARLY_ZERO, 1-NEARLY_ZERO) for a in range(age_len - 1): A = [[-i[a]-m[a], r[a] , 0., 0.], [ i[a] , -r[a]-m[a]-f[a], 0., 0.], [ m[a], m[a] , 0., 0.], [ 0., f[a], 0., 0.]] SCDM[:,a+1] = np.dot(scipy.linalg.expm(A), SCDM[:,a]) p[a+1] = SCDM[1,a+1] / (SCDM[0,a+1] + SCDM[1,a+1] + NEARLY_ZERO) m[a+1] = m_all_cause[a+1] - f[a+1] * p[a+1] # duration = E[time in bin C] hazard = r + m + f pr_not_exit = np.exp(-hazard) X = np.empty(len(hazard)) X[-1] = 1 / hazard[-1] for ii in reversed(range(len(X)-1)): X[ii] = (pr_not_exit[ii] * (X[ii+1] + 1)) + (1 / hazard[ii] * (1 - pr_not_exit[ii]) - pr_not_exit[ii]) country = countries_for[region][0] params = dict(age_intervals=age_intervals, condition=condition, gbd_region=region, country=country, year=year, sex=sex, effective_sample_size=1000) params['age_intervals'] = [[0,99]] generate_and_append_data(gold_data, 'prevalence data', p, **params) generate_and_append_data(gold_data, 'incidence data', i, **params) generate_and_append_data(gold_data, 'excess-mortality data', f, **params) generate_and_append_data(gold_data, 'remission data', r, **params) generate_and_append_data(gold_data, 'duration data', X, **params) # TODO: use this approach to age standardize all gold data, and then change it to get iX as a direct sum params['age_intervals'] = [[0,99]] iX = i * X * (1-p) * regional_population(key) generate_and_append_data(gold_data, 'incidence_x_duration', iX, **params) params['effective_sample_size'] = 1000 params['cov'] = 0. params['age_intervals'] = age_intervals generate_and_append_data(noisy_data, 'prevalence data', p, **params) generate_and_append_data(noisy_data, 'excess-mortality data', f, **params) generate_and_append_data(noisy_data, 'remission data', r, **params) generate_and_append_data(noisy_data, 'incidence data', i, **params) col_names = sorted(data_dict_for_csv(gold_data[0]).keys()) f_file = open(OUTPUT_PATH + '%s_gold.tsv' % condition, 'w') csv_f = csv.writer(f_file, dialect='excel-tab') csv_f.writerow(col_names) for d in gold_data: dd = data_dict_for_csv(d) csv_f.writerow([dd[c] for c in col_names]) f_file.close() f_name = OUTPUT_PATH + '%s_data.tsv' % condition f_file = open(f_name, 'w') csv_f = csv.writer(f_file, dialect='excel-tab') csv_f.writerow(col_names) for d in noisy_data: dd = data_dict_for_csv(d) csv_f.writerow([dd[c] for c in col_names]) f_file.close() # upload data file from dismod3.disease_json import dismod_server_login, twc, DISMOD_BASE_URL dismod_server_login() twc.go(DISMOD_BASE_URL + 'dismod/data/upload/') twc.formvalue(1, 'tab_separated_values', open(f_name).read()) # TODO: find or set the model number for this model, set the # expert priors and covariates, merge the covariate data into the # model, and add the "ground truth" to the disease json try: url = twc.submit() except Exception, e: print e
def f(sp_sub, n=n): return pm.rbinomial(n=n,p=pm.invlogit(sp_sub))
# Get E[v] and E[v**2] over the entire posterior for i in xrange(n): # Reset all variables to their values at frame i of the trace DuffySampler.remember(0, i) # Evaluate the observed mean store_africa_val(DuffySampler.sp_sub_b.M_obs.value, dpred, africa) Msurf_b, Vsurf_b = pm.gp.point_eval(DuffySampler.sp_sub_b.M_obs.value, DuffySampler.sp_sub_b.C_obs.value, dpred) Msurf_s, Vsurf_s = pm.gp.point_eval(DuffySampler.sp_sub_s.M_obs.value, DuffySampler.sp_sub_s.C_obs.value, dpred) Vsurf_b += DuffySampler.V_b.value Vsurf_s += DuffySampler.V_s.value freq_b = pm.invlogit(Msurf_b + pm.rnormal(0, 1) * np.sqrt(Vsurf_b)) freq_s = pm.invlogit(Msurf_s + pm.rnormal(0, 1) * np.sqrt(Vsurf_s)) samp_i = (freq_b * freq_s + (1 - freq_b) * DuffySampler.p1.value)**2 Msurf[where_unmasked] += samp_i / float(n) # Evaluate the observed covariance with one argument E2surf[where_unmasked] += samp_i**2 / float(n) # Get the posterior variance and standard deviation Vsurf = E2surf - Msurf**2 SDsurf = sqrt(Vsurf) Msurf = ma.masked_array(Msurf, mask=covariate_raster.root.mask[:]) SDsurf = ma.masked_array(SDsurf, mask=covariate_raster.root.mask[:]) covariate_raster.close()
def f(sp_sub, a, b, n=n): p = pm.invlogit(sp_sub) h = pm.rbeta(a,b,size=len(sp_sub)) p_def = g6pd.p_fem_def(p,h) return pm.rbinomial(n=n, p=p)
def theta(a=alpha,b=beta): return pymc.invlogit(a+b*x)
def f(sp_sub, x): p = pm.invlogit(sp_sub(x)) return p**2
def rate_stoch(mu=mu): return mc.invlogit(mu)
def p(S=S): """The success probability.""" return pm.invlogit(S)
def examineRealization(filename, Rel, Month, paramfileINDEX, TemporalStartMonth=None, TemporalEndMonth=None, conditioned=False, flipVertical="FALSE", SPACE=True, TIME=True): # deal with system arguments #filename = sys.argv[1] #Rel = int(sys.argv[2]) #Month = int(sys.argv[3]) #conditioned = sys.argv[4] #flipVertical = sys.argv[5] #paramfileINDEX = int(sys.argv[6]) #TemporalStartMonth = int(sys.argv[7]) #TemporalEndMonth = int(sys.argv[8]) #SPACE = sys.argv[9] #TIME = sys.argv[10] ## if filename is a string, assume its a path and import the hdf5 file (otherwise, assumption is we are pasing the 'hr' root of an hdf5 realisation file) if type(filename) is str: hf = tb.openFile(filename) hr = hf.root if type(filename) is not str: hr = filename # define path to R param file mbgw_root = __root__ = mbgw.__path__[0] r_paramfile_path = mbgw_root + '/joint_simulation/CONDSIMalgorithm/ParamFile_uncond_' + str( paramfileINDEX) + '.R' # initialise plot window nplots = 0 if SPACE is True: nplots = nplots + 5 if TIME is True: nplots = nplots + 1 r.X11(width=3.3 * nplots, height=4) r.par(mfrow=(1, nplots)) ###CHECK SPATIAL COVARIANCE AND BASIC FEATURE OF A SINGLE MONTH if SPACE is True: # define basic parameters slices = [ slice(None, None, None), slice(None, None, None), slice(Month, Month + 1, None) ] slices = tuple(slices) n_realizations = 1 n_rows = len(hr.lat_axis) n_cols = len(hr.lon_axis) N_facs = int(1e5) # Pull out parasite rate chunk (i.e. import n months of block) slices = tuple(slices) tot_slice = (slice(Rel, Rel + 1, None), ) + slices n_months = tot_slice[3].stop - tot_slice[3].start f_chunk = np.zeros(1 * n_cols * n_rows * n_months).reshape( 1, n_rows, n_cols, n_months) subsetmonth = 0 #print tot_slice #print f_chunk[:,:,:,subsetmonth] for mm in xrange(tot_slice[3].start, tot_slice[3].stop): f_chunk[:, :, :, subsetmonth] = hr.realizations[tot_slice[0], tot_slice[1], tot_slice[2], mm] subsetmonth = subsetmonth + 1 #f_chunk = f_chunk[::-1,:,::-1,:].T[:,:,:,0] f_chunk = f_chunk.squeeze() f_chunk[f_chunk == -9999] = nan # plot this grid plotMapPY(f_chunk.squeeze(), flipVertical=flipVertical) r.title(main="logit") inv_f_chunk = pm.invlogit(f_chunk.squeeze().T) inv_f_chunk = inv_f_chunk.reshape(shape(f_chunk)) plotMapPY(inv_f_chunk, flipVertical=flipVertical) r.title(main="inv logit") #from IPython.Debugger import Pdb #Pdb(color_scheme='Linux').set_trace() # compare global variance to parameter draw observedVar = round(np.var(f_chunk[np.isnan(f_chunk) == False]), 10) theoreticalVar = ((hr.PyMCsamples.col('amp')[Rel])**2) varString = 'observedVar = :' + str( observedVar) + '; amp^2 =: ' + str(theoreticalVar) print varString # plot histogram junk = r.hist(f_chunk[np.isnan(f_chunk) == False], main=varString, xlab="", ylab="") junk = r.hist(pm.invlogit(f_chunk[np.isnan(f_chunk) == False]), xlab="", ylab="", main="") # calculate and plot empirical covariance function in N-S direction gridIN = cp.deepcopy(f_chunk).squeeze() if conditioned is False: meanIN = 0 if conditioned is True: meanIN = hr.PyMCsamples.col("m_const")[Rel] + ( hr.PyMCsamples.col("t_coef")[Rel] * hr.t_axis[Month]) cellWidth = 5 / 6378.137 covDict = getGridCovarianceInY(gridIN, meanIN, cellWidth) # obtain theoretical covariance function from input MCMC paramater values: pymc method C = hr.group0.C[Rel] xplot = covDict['RadDist'] yplot1 = C([[0, 0, 0]], np.vstack( (np.zeros(len(xplot)), xplot, np.zeros(len(xplot)))).T) yplot1 = np.asarray(yplot1).squeeze() # obtain theoretical covariance function from input MCMC paramater values: R method Scale = hr.PyMCsamples.col("scale")[Rel] amp = hr.PyMCsamples.col("amp")[Rel] inc = hr.PyMCsamples.col("inc")[Rel] ecc = hr.PyMCsamples.col("ecc")[Rel] t_lim_corr = hr.PyMCsamples.col("t_lim_corr")[Rel] scale_t = hr.PyMCsamples.col("scale_t")[Rel] sin_frac = hr.PyMCsamples.col("sin_frac")[Rel] CfromR = temptestcovPY(xplot, np.zeros(len(xplot)), np.zeros(len(xplot)), Scale, amp, inc, ecc, t_lim_corr, scale_t, sin_frac, r_paramfile_path) yplot = CfromR[0, :] # plot ymax = max(np.max(covDict['E_cov']), np.max(xplot), np.max(yplot)) ymin = min(np.min(covDict['E_cov']), np.min(xplot), np.min(yplot)) r.plot(covDict['RadDist'], covDict['E_cov'], xlab="radians", ylab="C", main=str(paramfileINDEX), ylim=(ymin, ymax)) r.lines(xplot, yplot1, col=2) r.lines(xplot, yplot, col=3) ###CHECK TEMPORAL COVARIANCE if TIME is True: # if start and months are None, or if they are non-valid, rest to maximum temporal extents if ((TemporalEndMonth is None) | (TemporalEndMonth >= hr.realizations.shape[3])): TemporalEndMonth = hr.realizations.shape[3] if ((TemporalStartMonth is None) | (TemporalStartMonth >= (hr.realizations.shape[3] - 1))): TemporalStartMonth = 0 # define basic parameters slices = [ slice(None, None, None), slice(None, None, None), slice(TemporalStartMonth, TemporalEndMonth, None) ] slices = tuple(slices) n_realizations = 1 n_rows = len(hr.lat_axis) n_cols = len(hr.lon_axis) N_facs = int(1e5) # Pull out parasite rate chunk (i.e. import n months of block) slices = tuple(slices) tot_slice = (slice(Rel, Rel + 1, None), ) + slices n_months = tot_slice[3].stop - tot_slice[3].start f_chunk = np.zeros(1 * n_cols * n_rows * n_months).reshape( 1, n_rows, n_cols, n_months) subsetmonth = 0 for mm in xrange(tot_slice[3].start, tot_slice[3].stop): f_chunk[:, :, :, subsetmonth] = hr.realizations[tot_slice[0], tot_slice[1], tot_slice[2], mm] subsetmonth = subsetmonth + 1 #f_chunk = f_chunk[::-1,:,::-1,:].T[:,:,:,0] f_chunk = f_chunk.squeeze() f_chunk[f_chunk == -9999] = nan # calculate and plot empirical temporal covariance gridIN = cp.deepcopy(f_chunk).squeeze() if conditioned is False: meanIN = 0 if conditioned is True: meanIN = hr.PyMCsamples.col("m_const")[Rel] + ( hr.PyMCsamples.col("t_coef")[Rel] * hr.t_axis[TemporalStartMonth:TemporalEndMonth + 1:1]) covDict = getGridCovarianceInT(gridIN, meanIN) # obtain theoretical covariance function from input MCMC paramater values: pymc method C = hr.group0.C[Rel] xplot = covDict['yearDist'] yplot = C([[0, 0, 0]], np.vstack( (np.zeros(len(xplot)), np.zeros(len(xplot)), xplot)).T) yplot = np.asarray(yplot).squeeze() # obtain theoretical covariance function from input MCMC paramater values: R method Scale = hr.PyMCsamples.col("scale")[Rel] amp = hr.PyMCsamples.col("amp")[Rel] inc = hr.PyMCsamples.col("inc")[Rel] ecc = hr.PyMCsamples.col("ecc")[Rel] t_lim_corr = hr.PyMCsamples.col("t_lim_corr")[Rel] scale_t = hr.PyMCsamples.col("scale_t")[Rel] sin_frac = hr.PyMCsamples.col("sin_frac")[Rel] CfromR = temptestcovPY(np.zeros(len(xplot)), np.zeros(len(xplot)), xplot, Scale, amp, inc, ecc, t_lim_corr, scale_t, sin_frac, r_paramfile_path) yplot2 = CfromR[0, :] # plot ymax = max(np.max(covDict['E_cov']), np.max(yplot), np.max(yplot2)) ymin = min(np.min(covDict['E_cov']), np.min(yplot), np.min(yplot2), 0) r.plot(covDict['yearDist'], covDict['E_cov'], xlab="lag (years)", ylab="C", main=str(paramfileINDEX), ylim=(ymin, ymax)) r.lines(xplot, yplot, col=2) r.lines(xplot, yplot2, col=3)
def theta(a=alpha, b=beta): """theta = logit^{-1}(a+b)""" return pymc.invlogit(a + b * x)
def f(sp_sub, x, a, b): p = pm.invlogit(sp_sub(x)) h = pm.rbeta(a, b, size=len(p)) return g6pd.p_fem_def(p, h)
def y(logit_p=logit_p, value=df[11]): return pm.bernoulli_like(df[11], pm.invlogit(logit_p))
def f(sp_sub, n=n): return pm.rbinomial(n=n, p=pm.invlogit(sp_sub))
default='50', help='thinning ratio of MCMC process') parser.add_option('-v', '--verbose', default='0', help='level of verbosity (0 = none, 1 = some, etc...)') (options, args) = parser.parse_args() age_len = dismod3.MAX_AGE ages = np.arange(age_len, dtype='float') print 'defining model transition parameters' # incidence rate i = .012 * mc.invlogit((ages - 44) / 3) # remission rate r = 0. * ages # case-fatality rate f = .085 * (ages / 100)**2.5 # all-cause mortality-rate m = np.array([ 0.03266595, 0.01114646, 0.00450302, 0.00226896, 0.00143311, 0.00109108, 0.00094584, 0.00087981, 0.00083913, 0.0008073, 0.00078515, 0.00077967, 0.00079993, 0.00085375, 0.00094349, 0.00106717, 0.00121825, 0.00138438, 0.00154968, 0.00170171, 0.0018332, 0.00194182, 0.00202949, 0.00210058, 0.00215954, 0.00221083, 0.00225905, 0.00230878, 0.00236425, 0.00242902, 0.00250614, 0.00259834, 0.00270792, 0.00283638, 0.00298377, 0.00314906,