def logit_rate_from_range(rate): """ calculate age-specific rates and variances in logit space from a Rate model object """ logit_mesh = np.arange(rate.age_start, rate.age_end+1) pop_vals = np.array(rate.population()) n = (rate.numerator + 1.*NEARLY_ZERO) * pop_vals / np.sum(pop_vals) d = (rate.denominator + 2.*NEARLY_ZERO) * pop_vals / np.sum(pop_vals) logit_rate = mc.logit(np.minimum(n/d, 1.-NEARLY_ZERO)) logit_V = ( logit_rate - mc.logit( n/d + (n/d)*(1.-n/d)/np.sqrt(d) ) )**2. # filter out the points where the denominator is very close to zero good_mesh = [] good_rate = [] good_V = [] for ii in range(len(logit_mesh)): if n[ii] > 0. and n[ii] < d[ii] and d[ii] > .01: good_mesh.append(logit_mesh[ii]) good_rate.append(logit_rate[ii]) good_V.append(logit_V[ii]) return good_mesh, good_rate, good_V
def generate_synthetic_data(truth, key, d): """ create simulated data""" a0 = d['age_start'] a1 = d['age_end'] age_weights = d['age_weights'] d.update(condition='type_2_diabetes', year_start=y, year_end=y) p0 = dismod3.utils.rate_for_range(truth[key], range(a0, a1 + 1), np.ones(a1 + 1 - a0)/(a1+1-a0)) p0 = dismod3.utils.trim(p0, 1.e-6, 1. - 1.e-6) # TODO: make beta dispersion study level (instead of datum level) # p1 = mc.rbeta(p0 * dispersion, (1 - p0) * dispersion) p1 = p0 # TODO: add additional covariates if key.find('prevalence') != -1: if random.random() < .1: d['self-reported'] = True p1 = mc.invlogit(mc.logit(p1) - .2) else: d['self-reported'] = False #p2 = mc.rbinomial(n, p1) / n p2 = float(p1) d['value'] = p2 d['standard_error'] = .0001 return d
def sim_cod_data(N, cf_rec): """ Create an NxJ matrix of simulated data (J is the number of causes and is determined by the length of cf_mean). N - the number of simulations cf_rec - a recarray containing: cause - a list of causes est - the estimates of the cause fractions lower - the lower bound of the cause fractions upper - the upper bound of the cause fractions """ # logit the mean and bounds and approximate the standard deviation in logit space cf_mean = mc.logit(cf_rec.est) cf_lower = mc.logit(cf_rec.lower) cf_upper = mc.logit(cf_rec.upper) std = (cf_upper - cf_lower)/(2*1.96)
def sim_cod_data(N, cf_rec): """ Create an NxJ matrix of simulated data (J is the number of causes and is determined by the length of cf_mean). N - the number of simulations cf_rec - a recarray containing: cause - a list of causes est - the estimates of the cause fractions lower - the lower bound of the cause fractions upper - the upper bound of the cause fractions """ # logit the mean and bounds and approximate the standard deviation in logit space cf_mean = mc.logit(cf_rec.est) cf_lower = mc.logit(cf_rec.lower) cf_upper = mc.logit(cf_rec.upper) std = (cf_upper - cf_lower) / (2 * 1.96)
def sim_data(N, true_cf=[[.3, .6, .1], [.3, .5, .2]], true_std=[[.2, .05, .05], [.3, 0.1, 0.1]], sum_to_one=True): """ Create an NxTxJ matrix of simulated data (T is determined by the length of true_cf, J by the length of the elements of true_cf). true_cf - a list of lists of true cause fractions (each must sum to one) true_std - a list of lists of the standard deviations corresponding to the true csmf's for each time point. Can either be a list of length J inside a list of length 1 (in this case, the same standard deviation is used for all time points) or can be T lists of length J (in this case, the a separate standard deviation is specified and used for each time point). """ if sum_to_one == True: assert pl.allclose(pl.sum(true_cf, 1), 1), 'The sum of elements of true_cf must equal 1' T = len(true_cf) J = len(true_cf[0]) ## if only one std provided, duplicate for all time points if len(true_std) == 1 and len(true_cf) > 1: true_std = [true_std[0] for i in range(len(true_cf))] ## transform the mean and std to logit space transformed_std = [] for t in range(T): pi_i = pl.array(true_cf[t]) sigma_pi_i = pl.array(true_std[t]) transformed_std.append( ((1 / (pi_i * (pi_i - 1)))**2 * sigma_pi_i**2)**0.5) ## find minimum standard deviation (by cause across time) and draw from this min = pl.array(transformed_std).min(0) common_perturbation = [ pl.ones([T, J]) * mc.rnormal(mu=0, tau=min**-2) for n in range(N) ] ## draw from remaining variation tau = pl.array(transformed_std)**2 - min**2 tau[tau == 0] = 0.000001 additional_perturbation = [ [mc.rnormal(mu=0, tau=tau[t]**-1) for t in range(T)] for n in range(N) ] result = pl.zeros([N, T, J]) for n in range(N): result[n, :, :] = [ mc.invlogit( mc.logit(true_cf[t]) + common_perturbation[n][t] + additional_perturbation[n][t]) for t in range(T) ] return result
def sim_data(N, true_cf=[[.3, .6, .1], [.3, .5, .2]], true_std=[[.2, .05, .05], [.3, 0.1, 0.1]], sum_to_one=True): """ Create an NxTxJ matrix of simulated data (T is determined by the length of true_cf, J by the length of the elements of true_cf). true_cf - a list of lists of true cause fractions (each must sum to one) true_std - a list of lists of the standard deviations corresponding to the true csmf's for each time point. Can either be a list of length J inside a list of length 1 (in this case, the same standard deviation is used for all time points) or can be T lists of length J (in this case, the a separate standard deviation is specified and used for each time point). """ if sum_to_one == True: assert pl.allclose(pl.sum(true_cf, 1), 1), 'The sum of elements of true_cf must equal 1' T = len(true_cf) J = len(true_cf[0]) ## if only one std provided, duplicate for all time points if len(true_std)==1 and len(true_cf)>1: true_std = [true_std[0] for i in range(len(true_cf))] ## transform the mean and std to logit space transformed_std = [] for t in range(T): pi_i = pl.array(true_cf[t]) sigma_pi_i = pl.array(true_std[t]) transformed_std.append( ((1/(pi_i*(pi_i-1)))**2 * sigma_pi_i**2)**0.5 ) ## find minimum standard deviation (by cause across time) and draw from this min = pl.array(transformed_std).min(0) common_perturbation = [pl.ones([T,J])*mc.rnormal(mu=0, tau=min**-2) for n in range(N)] ## draw from remaining variation tau=pl.array(transformed_std)**2 - min**2 tau[tau==0] = 0.000001 additional_perturbation = [[mc.rnormal(mu=0, tau=tau[t]**-1) for t in range(T)] for n in range(N)] result = pl.zeros([N, T, J]) for n in range(N): result[n, :, :] = [mc.invlogit(mc.logit(true_cf[t]) + common_perturbation[n][t] + additional_perturbation[n][t]) for t in range(T)] return result
def mortality(self, key="all-cause_mortality", data=None): """ Calculate the all-cause mortality rate for the region and sex of disease_model, and return it in an array corresponding to age_mesh Parameters ---------- key : str, optional of the form 'all-cause_mortality+gbd_region+year+sex' data: list, optional the data list to extract all-cause mortality from """ if self.params.get("initial_value", {}).has_key(key): return self.get_initial_value(key) if not data: data = self.filter_data("all-cause_mortality data") if len(data) == 0: return NEARLY_ZERO * np.ones(len(self.get_estimate_age_mesh())) else: M, C = uninformative_prior_gp(c=-1.0, scale=300.0) age = [] val = [] V = [] for d in data: scale = self.extract_units(d) a0 = d.get("age_start", MISSING) a1 = d.get("age_end", MISSING) y = self.value_per_1(d) se = self.se_per_1(d) if se == MISSING: se = 0.01 if MISSING in [a0, a1, y]: continue age.append(0.5 * (a0 + a1)) val.append(y + 0.00001) V.append(se ** 2.0) if len(data) > 0: gp.observe(M, C, age, mc.logit(val), V) normal_approx_vals = mc.invlogit(M(self.get_estimate_age_mesh())) self.set_initial_value(key, normal_approx_vals) return self.get_initial_value(key)
def make_model(lon, lat, input_data, covariate_keys, pos, neg, cpus=1): """ This function is required by the generic MBG code. """ # Uniquify data locations data_mesh, logp_mesh, fi, ui, ti = uniquify(lon, lat) # Create the mean & its evaluation at the data locations. m = pm.Uninformative("m", 0) @pm.deterministic def M(m=m): return pm.gp.Mean(mean_fn, m=m) # The partial sill. amp = pm.Exponential("amp", 0.1, value=1.0) # The range parameter. scale = pm.Exponential("scale", 0.1, value=0.08) # This parameter controls the degree of differentiability of the field. diff_degree = pm.Uniform("diff_degree", 0.01, 3) # The nugget variance. V = pm.Gamma("V", 4, 40, value=0.1) tau = 1.0 / V # Create the covariance & its evaluation at the data locations. @pm.deterministic(trace=True) def C(amp=amp, scale=scale, diff_degree=diff_degree): """A covariance function created from the current parameter values.""" return pm.gp.FullRankCovariance(pm.gp.cov_funs.matern.euclidean, amp=amp, scale=scale, diff_degree=diff_degree) # The Gaussian process submodel sp_sub = pm.gp.GPSubmodel("sp_sub", M, C, logp_mesh) # Add the nugget process eps_p_f = pm.Normal("eps_p_f", sp_sub.f_eval[fi], tau, value=pm.logit((pos + 1.0) / (pos + neg + 2.0))) # Probability of 'success' p = pm.Lambda("s", lambda lt=eps_p_f: invlogit(lt), trace=False) # The data have the 'observed' flag set to True. d = pm.Binomial("d", pos + neg, p, value=pos, observed=True) return locals()
def values_from(dm, d, min_val=1.e-5, max_se=.1): """ Extract the normalized values from a piece of data Parameters ---------- dm : disease model d : data dict min_val : float, optional the value to use instead of zero, since logit cannot model true zero max_se : float, optional the standard error to use for data with missing or zero standard error """ est_mesh = dm.get_estimate_age_mesh() # get the index vector and weight vector for the age range age_indices = indices_for_range(est_mesh, d['age_start'], d['age_end']) age_weights = d.get('age_weights', np.ones(len(age_indices))) # ensure all rate data is valid d_val = dm.value_per_1(d) if d_val < 0 or d_val > 1: debug('WARNING: data %d not in range (0,1)' % d['id']) raise ValueError elif d_val == 0.: d_val = min_val / 10. # TODO: determine if this is an acceptible way to deal with zero elif d_val == 1.: d_val = 1. - min_val / 10. logit_val = mc.logit(d_val) d_se = dm.se_per_1(d) if d_se == MISSING: d_se = max_se #TODO: determine if this is an acceptible way to deal with missing elif d_se == 0.: d_se = max_se logit_se = (1/d_val + 1/(1-d_val)) * d_se return age_indices, age_weights, logit_val, logit_se
pl.plot(X, Y, 'ks', label='Observed', mec='w', mew=1) XX = sm.add_constant(X) X_pred = pl.arange(65) XX_pred = sm.add_constant(X_pred) model = sm.OLS(Y, XX) results = model.fit() Y_pred = model.predict(XX_pred) pl.plot(X_pred, Y_pred, 'k-', linewidth=2, label='Predicted by OLS') Y = mc.logit(df['Parameter Value'].__array__()) model = sm.OLS(Y, XX) results = model.fit() Y_pred = model.predict(XX_pred) pl.plot(X_pred, mc.invlogit(Y_pred), 'k--', linewidth=2, label='Predicted by logit-transformed OLS') pl.xlabel('Age (Years)') pl.ylabel('Seroprevalence (Per 1)') pl.legend(loc='lower right', fancybox=True, shadow=True) pl.axis([-5, 55, 0, 1.2]) pl.grid() pl.savefig('vzv_forest.pdf')
def fit_without_confrontation(id, region, sex, year): """ Fit posterior of specified region/sex/year for specified model without trying to integrate conflicting sources of data Parameters ---------- id : int The model id number for the job to fit region : str From dismod3.settings.gbd_regions, but clean()-ed sex : str, from dismod3.settings.gbd_sexes year : str, from dismod3.settings.gbd_years """ ## load model dm = dismod3.load_disease_model(id) ## separate out prevalence and relative-risk data prev_data = [ d for d in dm.data if dm.relevant_to(d, 'prevalence', region, year, sex) ] rr_data = [ d for d in dm.data if dm.relevant_to(d, 'relative-risk', region, year, sex) ] dm.data = [d for d in dm.data if not d in prev_data and not d in rr_data] ### setup the generic disease model (without prevalence data) import dismod3.gbd_disease_model as model keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex]) dm.calc_effective_sample_size(dm.data) dm.vars = model.setup(dm, keys) ## override the birth prevalence prior, based on the withheld prevalence data logit_C_0 = dm.vars[dismod3.utils.gbd_key_for('bins', region, year, sex)]['initial']['logit_C_0'] assert len(prev_data) == 1, 'should be a single prevalance datum' d = prev_data[0] mu_logit_C_0 = mc.logit(dm.value_per_1(d) + dismod3.settings.NEARLY_ZERO) lb, ub = dm.bounds_per_1(d) sigma_logit_C_0 = (mc.logit(ub + dismod3.settings.NEARLY_ZERO) - mc.logit(lb + dismod3.settings.NEARLY_ZERO)) / (2 * 1.96) print 'mu_C_0_pri:', mc.invlogit(mu_logit_C_0) print 'ui_C_0_pri:', lb, ub # override the excess-mortality, based on the relative-risk data mu_rr = 1.01 * np.ones(dismod3.settings.MAX_AGE) sigma_rr = .01 * np.ones(dismod3.settings.MAX_AGE) for d in rr_data: mu_rr[d['age_start']:(d['age_end'] + 1)] = dm.value_per_1(d) sigma_rr[d['age_start']:(d['age_end'] + 1)] = dm.se_per_1(d) print 'mu_rr:', mu_rr.round(2) #print 'sigma_rr:', sigma_rr.round(2) log_f = dm.vars[dismod3.utils.gbd_key_for('excess-mortality', region, year, sex)]['age_coeffs'] log_f_mesh = log_f.parents['gamma_mesh'] param_mesh = log_f.parents['param_mesh'] m_all = dm.vars[dismod3.utils.gbd_key_for('all-cause_mortality', region, year, sex)] mu_log_f = np.log((mu_rr - 1) * m_all) sigma_log_f = 1 / ((mu_rr - 1) * m_all) * sigma_rr * m_all print 'mu_log_f:', mu_log_f.round(2)[param_mesh] print 'sigma_log_f:', sigma_log_f.round(2)[param_mesh] ### fit the model using Monte Carlo simulation (shoehorned into the MCMC framework of PyMC) dm.mcmc = mc.MCMC(dm.vars) dm.mcmc.use_step_method(SampleFromNormal, logit_C_0, mu=mu_logit_C_0, tau=sigma_logit_C_0**-2) dm.mcmc.use_step_method(SampleFromNormal, log_f_mesh, mu=mu_log_f[param_mesh], tau=sigma_log_f[param_mesh]**-2) for stoch in dm.mcmc.stochastics: dm.mcmc.use_step_method(mc.NoStepper, stoch) dm.mcmc.sample(1000, verbose=dismod3.settings.ON_SGE) #print 'mu_C_0_post:', mc.invlogit(logit_C_0.stats()['mean']).round(2) #print 'ui_C_0_post:', mc.invlogit(logit_C_0.stats()['95% HPD interval']).round(2) #print 'mu_rr_post:', dm.vars[dismod3.utils.gbd_key_for('relative-risk', region, year, sex)]['rate_stoch'].stats()['mean'].round(2) print 'mu_log_f_mesh_post:', log_f_mesh.stats()['mean'].round(2) print 'mu_f_post:', dm.vars[dismod3.utils.gbd_key_for( 'excess-mortality', region, year, sex)]['rate_stoch'].stats()['mean'].round(2) for k in keys: t, r, y, s = dismod3.utils.type_region_year_sex_from_key(k) if t in [ 'incidence', 'prevalence', 'remission', 'excess-mortality', 'mortality', 'prevalence_x_excess-mortality' ]: dismod3.neg_binom_model.store_mcmc_fit(dm, k, dm.vars[k]) elif t in ['relative-risk', 'duration', 'incidence_x_duration']: dismod3.normal_model.store_mcmc_fit(dm, k, dm.vars[k]) from fit_posterior import save_country_level_posterior if str(year) == '2005': # also generate 2010 estimates save_country_level_posterior(dm, region, 2010, sex, ['prevalence', 'remission']) save_country_level_posterior( dm, region, year, sex, ['prevalence', 'remission'] ) #'prevalence incidence remission excess-mortality duration mortality relative-risk'.split()) # save results (do this last, because it removes things from the disease model that plotting function, etc, might need keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex]) dm.save('dm-%d-posterior-%s-%s-%s.json' % (dm.id, region, sex, year), keys_to_save=keys) return dm
def transform_bin_data(pos, neg): return pm.logit((pos+1.)/(pos+neg+2.))
# =========================== # = Create likelihood layer = # =========================== eps_p_f_list = [] N_pos_list = [] # Obtain the spline representation of the log of the Monte Carlo-integrated # likelihood function at each datapoint. The nodes are at .01,.02,...,.98,.99 . junk, splreps = age_corr_likelihoods(all_pts, 10000, np.arange(.01,1.,.01), norun_name) for i in xrange(len(splreps)): splreps[i] = list(splreps[i]) # Don't worry, these are just reasonable initial values... val_now = pm.logit(np.array(all_pts.PF+1,dtype=float)/(all_pts.EXAMINED+2)) if with_stukel: val_now = pm.stukel_logit(np.array(all_pts.PF+1,dtype=float)/(all_pts.EXAMINED+2), a1.value, a2.value) if data_mesh.shape[0] % chunk == 0: additional_index = 0 else: additional_index = 1 for i in xrange(0,data_mesh.shape[0] / chunk + additional_index): this_slice = slice(chunk*i, min((i+1)*chunk, data_mesh.shape[0])) # epsilon plus f, given f. @pm.stochastic(trace=False, dtype=np.float) def eps_p_f_now(value=val_now[this_slice], f=f_eval, V=V, this_slice = this_slice):
print "Trying again: %s" % msg init_OK = False gc.collect() # =========================== # = Create likelihood layer = # =========================== eps_p_f_list = [] N_pos_list = [] # Don't worry, these are just reasonable initial values... if with_stukel: val_now = pm.stukel_logit((pos + 1.0) / (pos + neg + 2.0), a1.value, a2.value) else: val_now = pm.logit((pos + 1.0) / (pos + neg + 2.0)) if data_mesh.shape[0] % chunk == 0: additional_index = 0 else: additional_index = 1 for i in xrange(0, data_mesh.shape[0] / chunk + additional_index): this_slice = slice(chunk * i, min((i + 1) * chunk, data_mesh.shape[0])) # epsilon plus f, given f. @pm.stochastic(trace=False, dtype=np.float) def eps_p_f_now(value=val_now[this_slice], f=sp_sub.f_eval, V=V, sl=this_slice): return pm.normal_like(value, f[fi][sl], 1.0 / V)
# =========================== # = Create likelihood layer = # =========================== eps_p_f_list = [] N_pos_list = [] # Obtain the spline representation of the log of the Monte Carlo-integrated # likelihood function at each datapoint. The nodes are at .01,.02,...,.98,.99 . junk, splreps = age_corr_likelihoods(all_pts, 10000, np.arange(.01, 1., .01), norun_name) for i in xrange(len(splreps)): splreps[i] = list(splreps[i]) # Don't worry, these are just reasonable initial values... val_now = pm.logit( np.array(all_pts.PF + 1, dtype=float) / (all_pts.EXAMINED + 2)) if with_stukel: val_now = pm.stukel_logit( np.array(all_pts.PF + 1, dtype=float) / (all_pts.EXAMINED + 2), a1.value, a2.value) if data_mesh.shape[0] % chunk == 0: additional_index = 0 else: additional_index = 1 for i in xrange(0, data_mesh.shape[0] / chunk + additional_index): this_slice = slice(chunk * i, min((i + 1) * chunk, data_mesh.shape[0])) # epsilon plus f, given f.
def set_birth_prev(value): model.vars['logit_C0'].value = mc.logit(pl.maximum(1.e-9, value))
def fit_without_confrontation(id, region, sex, year): """ Fit posterior of specified region/sex/year for specified model without trying to integrate conflicting sources of data Parameters ---------- id : int The model id number for the job to fit region : str From dismod3.settings.gbd_regions, but clean()-ed sex : str, from dismod3.settings.gbd_sexes year : str, from dismod3.settings.gbd_years """ ## load model dm = dismod3.load_disease_model(id) ## separate out prevalence and relative-risk data prev_data = [d for d in dm.data if dm.relevant_to(d, 'prevalence', region, year, sex)] rr_data = [d for d in dm.data if dm.relevant_to(d, 'relative-risk', region, year, sex)] dm.data = [d for d in dm.data if not d in prev_data and not d in rr_data] ### setup the generic disease model (without prevalence data) import dismod3.gbd_disease_model as model keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex]) dm.calc_effective_sample_size(dm.data) dm.vars = model.setup(dm, keys) ## override the birth prevalence prior, based on the withheld prevalence data logit_C_0 = dm.vars[dismod3.utils.gbd_key_for('bins', region, year, sex)]['initial']['logit_C_0'] assert len(prev_data) == 1, 'should be a single prevalance datum' d = prev_data[0] mu_logit_C_0 = mc.logit(dm.value_per_1(d)+dismod3.settings.NEARLY_ZERO) lb, ub = dm.bounds_per_1(d) sigma_logit_C_0 = (mc.logit(ub+dismod3.settings.NEARLY_ZERO) - mc.logit(lb+dismod3.settings.NEARLY_ZERO)) / (2 * 1.96) print 'mu_C_0_pri:', mc.invlogit(mu_logit_C_0) print 'ui_C_0_pri:', lb, ub # override the excess-mortality, based on the relative-risk data mu_rr = 1.01*np.ones(dismod3.settings.MAX_AGE) sigma_rr = .01*np.ones(dismod3.settings.MAX_AGE) for d in rr_data: mu_rr[d['age_start']:(d['age_end']+1)] = dm.value_per_1(d) sigma_rr[d['age_start']:(d['age_end']+1)] = dm.se_per_1(d) print 'mu_rr:', mu_rr.round(2) #print 'sigma_rr:', sigma_rr.round(2) log_f = dm.vars[dismod3.utils.gbd_key_for('excess-mortality', region, year, sex)]['age_coeffs'] log_f_mesh = log_f.parents['gamma_mesh'] param_mesh = log_f.parents['param_mesh'] m_all = dm.vars[dismod3.utils.gbd_key_for('all-cause_mortality', region, year, sex)] mu_log_f = np.log((mu_rr-1) * m_all) sigma_log_f = 1 / ((mu_rr-1) * m_all) * sigma_rr * m_all print 'mu_log_f:', mu_log_f.round(2)[param_mesh] print 'sigma_log_f:', sigma_log_f.round(2)[param_mesh] ### fit the model using Monte Carlo simulation (shoehorned into the MCMC framework of PyMC) dm.mcmc = mc.MCMC(dm.vars) dm.mcmc.use_step_method(SampleFromNormal, logit_C_0, mu=mu_logit_C_0, tau=sigma_logit_C_0**-2) dm.mcmc.use_step_method(SampleFromNormal, log_f_mesh, mu=mu_log_f[param_mesh], tau=sigma_log_f[param_mesh]**-2) for stoch in dm.mcmc.stochastics: dm.mcmc.use_step_method(mc.NoStepper, stoch) dm.mcmc.sample(1000, verbose=dismod3.settings.ON_SGE) #print 'mu_C_0_post:', mc.invlogit(logit_C_0.stats()['mean']).round(2) #print 'ui_C_0_post:', mc.invlogit(logit_C_0.stats()['95% HPD interval']).round(2) #print 'mu_rr_post:', dm.vars[dismod3.utils.gbd_key_for('relative-risk', region, year, sex)]['rate_stoch'].stats()['mean'].round(2) print 'mu_log_f_mesh_post:', log_f_mesh.stats()['mean'].round(2) print 'mu_f_post:', dm.vars[dismod3.utils.gbd_key_for('excess-mortality', region, year, sex)]['rate_stoch'].stats()['mean'].round(2) for k in keys: t,r,y,s = dismod3.utils.type_region_year_sex_from_key(k) if t in ['incidence', 'prevalence', 'remission', 'excess-mortality', 'mortality', 'prevalence_x_excess-mortality']: dismod3.neg_binom_model.store_mcmc_fit(dm, k, dm.vars[k]) elif t in ['relative-risk', 'duration', 'incidence_x_duration']: dismod3.normal_model.store_mcmc_fit(dm, k, dm.vars[k]) from fit_posterior import save_country_level_posterior if str(year) == '2005': # also generate 2010 estimates save_country_level_posterior(dm, region, 2010, sex, ['prevalence', 'remission']) save_country_level_posterior(dm, region, year, sex, ['prevalence', 'remission']) #'prevalence incidence remission excess-mortality duration mortality relative-risk'.split()) # save results (do this last, because it removes things from the disease model that plotting function, etc, might need keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex]) dm.save('dm-%d-posterior-%s-%s-%s.json' % (dm.id, region, sex, year), keys_to_save=keys) return dm
def plot_all_priors(model, data=None, unique=True, model_kwargs=None): """ plot the priors of an HDDM model Input: data <DataFrame> - data to be plot against the priors unique <bool> - whether to unique each column in data before before ploting it """ #set limits for plots lb = {'v': -10, 'dc(1)': -5, 'z': 0.001, 'z_std': 0} ub = { 'a': 4, 't': 1, 'v': 10, 'z': 1, 'sz': 1, 'st': 1, 'sv': 15, 'p_outlier': 1, 'z_trans(1)': 1, 'z(1)': 1, 'dc(1)': 5, 'a_std': 5, 'v_std': 5, 'z_std': 0.5, 't_std': 5, 'dc_std': 5 } #plot all priors n_rows = 4 n_cols = 5 for n_subjs in [1]: #,2]: # create a model # h_data, _ = hddm.generate.gen_rand_data(subjs=n_subjs, size=2) # if model_kwargs is None: # model_kwargs = {} # h = model(h_data, include='all', **model_kwargs) #h = model fig = plt.figure() plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1, hspace=.7) counter = 0 for name, node_row in model.iter_group_nodes(): if not name in ub: # only those listed continue if 'var' in name or 'p_outlier' in name: continue if 'trans' in name: trans = True name = name.replace('_trans', '') else: trans = False counter += 1 node = node_row['node'] print(name) print(node.logp) #plot a single proir ax = plt.subplot(n_rows, n_cols, counter) ax.set_yticklabels([]) #generate pdf xlim = np.arange(lb.get(name, 0.001), ub[name], 0.01) pdf = np.zeros(len(xlim)) # assume that the logp has the prior? for i in range(len(pdf)): if not trans: node.value = xlim[i] pdf[i] = np.exp(node.logp) else: node.value = pm.logit(xlim[i]) pdf[i] = np.exp(node.logp) * 10 #plot shit plt.plot(xlim, pdf) plt.xlabel(name) sns.despine(offset=2, trim=True) # # Hide the right and top spines # ax.spines['right'].set_visible(False) # ax.spines['top'].set_visible(False) # # # Only show ticks on the left and bottom spines # ax.yaxis.set_ticks_position('left') # ax.xaxis.set_ticks_position('bottom') #add suptitle plt.suptitle('HDDM priors') # save the figure plt.savefig(os.path.join(mypath, 'priorPlot.pdf'))
def mu(invlogit_mu=rate_stoch): return mc.logit(invlogit_mu)
# Obtain the spline representation of the log of the Monte Carlo-integrated # likelihood function at each datapoint. The nodes are at .01,.02,...,.98,.99 . splrep_fname = hashlib.sha1(lo_age.tostring()+up_age.tostring()+pos.tostring()+neg.tostring()).hexdigest()+'.pickle' if splrep_fname in os.listdir('.'): splreps = cPickle.loads(file(splrep_fname).read()) else: junk, splreps = age_corr_likelihoods(lo_age, up_age, pos, neg, 10000, np.arange(.01,1.,.01), a_pred, P_trace, S_trace, F_trace) file(splrep_fname,'w').write(cPickle.dumps(splreps)) for i in xrange(len(splreps)): splreps[i] = list(splreps[i]) # Don't worry, these are just reasonable initial values... if with_stukel: val_now = pm.stukel_logit((pos+1.)/(pos+neg+2.), a1.value, a2.value) else: val_now = pm.logit((pos+1.)/(pos+neg+2.)) if data_mesh.shape[0] % chunk == 0: additional_index = 0 else: additional_index = 1 for i in xrange(0,data_mesh.shape[0] / chunk + additional_index): this_slice = slice(chunk*i, min((i+1)*chunk, data_mesh.shape[0])) # epsilon plus f, given f. @pm.stochastic(trace=False, dtype=np.float) def eps_p_f_now(value=val_now[this_slice], f=sp_sub.f_eval, V=V, sl=this_slice): return pm.normal_like(value, f[fi][sl], 1./V) eps_p_f_now.__name__ = "eps_p_f%i"%i
def plot_all_priors(model, data=None, unique=True, model_kwargs=None): """ plot the priors of an HDDM model Input: data <DataFrame> - data to be plot against the priors unique <bool> - whether to unique each column in data before before ploting it """ #set limits for plots lb = {'v': -10} ub = { 'a': 4, 't': 1, 'v': 10, 'z': 1, 'sz': 1, 'st': 1, 'sv': 15, 'p_outlier': 1 } #plot all priors n_rows = 2 n_cols = 4 for n_subjs in [1, 2]: #create a model h_data, _ = hddm.generate.gen_rand_data(subjs=n_subjs, size=2) if model_kwargs is None: model_kwargs = {} h = model(h_data, include='all', **model_kwargs) fig = plt.figure() counter = 0 for name, node_row in h.iter_group_nodes(): if 'var' in name: continue if 'trans' in name: trans = True name = name.replace('_trans', '') else: trans = False counter += 1 node = node_row['node'] #plot a single proir ax = plt.subplot(n_rows, n_cols, counter) #if data is given then plot it if data is not None: try: if unique: t_data = data[name].dropna().unique() else: t_data = data[name].dropna().values # if name == 'v': # t_data = np.concatenate((t_data, -t_data)) ax.hist(t_data, 20, normed=True) except KeyError: pass #generate pdf xlim = arange(lb.get(name, 0.001), ub[name], 0.01) pdf = np.zeros(len(xlim)) for i in range(len(pdf)): if not trans: node.value = xlim[i] pdf[i] = np.exp(node.logp) else: node.value = pm.logit(xlim[i]) pdf[i] = np.exp(node.logp) * 10 #plot shit plt.plot(xlim, pdf) plt.title(name) #add suptitle if n_subjs > 1: plt.suptitle('Group model') else: plt.suptitle('Subject model')
def initial_guess(treatment): return np.median(logit((pos_counts[treatment_ids == treatment] + 1).astype(float) / (total_counts[treatment_ids == treatment] + 2)))
def make_model(N,k,X,backend,manifold): """ A standard spatial logistic regression. - N: Number sampled at each location - k: Number positive at each location - X: x,y,z coords of each location - Backend: The linear algebra backend. So far, this has to be 'cholmod'. - manifold: The manifold to work on. So far, this has to be 'spherical'. """ # Make the Delaunay triangulation. neighbors, triangles, trimap, b = manifold.triangulate_sphere(X) # Uncomment to visualize the triangulation. # manifold.plot_triangulation(X,neighbors) # Generate the C, Ctilde and G matrix in SciPy 'lil' format. triangle_areas = [manifold.triangle_area(X, t) for t in triangles] Ctilde = manifold.Ctilde(X, triangles, triangle_areas) C = manifold.C(X, triangles, triangle_areas) G = manifold.G(X, triangles, triangle_areas) # Convert to SciPy 'csc' format for efficient use by the CHOLMOD backend. C = backend.into_matrix_type(C) Ctilde = backend.into_matrix_type(Ctilde) G = backend.into_matrix_type(G) # Kappa is the scale parameter. It's a free variable. kappa = pm.Exponential('kappa',1,value=3) # Fix the value of alpha. alpha = 2. # amp is the overall amplitude. It's a free variable that will probably be highly confounded with kappa. amp = pm.Exponential('amp', .0001, value=100) # A constant mean. m = pm.Uninformative('m',value=0) @pm.deterministic(trace=False) def M(m=m,n=len(X)): """The mean vector""" return np.ones(n)*m @pm.deterministic(trace=False) def Q(kappa=kappa, alpha=alpha, amp=amp, Ctilde=Ctilde, G=G, backend=backend): "The precision matrix." out = operators.mod_frac_laplacian_precision(Ctilde, G, kappa, alpha, backend)/np.asscalar(amp)**2 return out # Do all the precomputation you can based on the sparsity pattern alone. # Note that if alpha is made free, this needs to be free also, as the sparsity # pattern will be changeable. pattern_products = backend.pattern_to_products(Q.value) @pm.deterministic(trace=False) def precision_products(Q=Q, p=pattern_products): "All the analysis of the precision matrix that the backend needs to do MVN computations." try: return backend.precision_to_products(Q, **p) except backend.NonPositiveDefiniteError: return None # The random field. empirical_S = pm.logit((k+1)/(N+2.)) S=pymc_objects.SparseMVN('S',M, precision_products, backend, value=empirical_S) @pm.deterministic(trace=False) def p(S=S): """The success probability.""" return pm.invlogit(S) # The data. data = pm.Binomial('data', n=N, p=p, value=k, observed=True) # A Fortran representation of the likelihood, to allow for fast Metropolis steps without querying data.logp. likelihood_variables = np.vstack((np.resize(N,k.shape),k)).T likelihood_string = """ lkp = dexp({X})/(1.0D0+dexp({X})) lkp = lv(i,2)*dlog(lkp) + (lv(i,1)-lv(i,2))*dlog(1.0D0-lkp) """ return locals()
def pripred_check(m=m,amp=amp,V=V): p_above = scipy.stats.distributions.norm.cdf(m-pm.logit(threshold_val), 0, np.sqrt(amp**2+V)) if p_above <= max_p_above: return 0. else: return -np.inf
def setup(dm, key, data_list, rate_stoch=None, emp_prior={}): """ Generate the PyMC variables for a beta binomial model of a single rate function Parameters ---------- dm : dismod3.DiseaseModel the object containing all the data, priors, and additional information (like input and output age-mesh) key : str the name of the key for everything about this model (priors, initial values, estimations) data_list : list of data dicts the observed data to use in the beta-binomial liklihood function rate_stoch : pymc.Stochastic, optional a PyMC stochastic (or deterministic) object, with len(rate_stoch.value) == len(dm.get_estimation_age_mesh()). This is used to link beta-binomial stochs into a larger model, for example. emp_prior : dict, optional the empirical prior dictionary, retrieved from the disease model if appropriate by:: >>> t, r, y, s = type_region_year_sex_from_key(key) >>> emp_prior = dm.get_empirical_prior(t) Results ------- vars : dict Return a dictionary of all the relevant PyMC objects for the beta binomial model. vars['rate_stoch'] is of particular relevance; this is what is used to link the beta-binomial model into more complicated models, like the generic disease model. Details ------- The beta binomial model parameters are the following: * the mean age-specific rate function * dispersion of this mean * the p_i value for each data observation that has a standard error (data observations that do not have standard errors recorded are fit as observations of the beta r.v., while observations with standard errors recorded have a latent variable for the beta, and an observed binomial r.v.). """ vars = {} est_mesh = dm.get_estimate_age_mesh() if np.any(np.diff(est_mesh) != 1): raise ValueError, "ERROR: Gaps in estimation age mesh must all equal 1" # set up age-specific rate function, if it does not yet exist if not rate_stoch: param_mesh = dm.get_param_age_mesh() if emp_prior.has_key("mu"): initial_value = emp_prior["mu"] else: initial_value = dm.get_initial_value(key) # find the logit of the initial values, which is a little bit # of work because initial values are sampled from the est_mesh, # but the logit_initial_values are needed on the param_mesh logit_initial_value = mc.logit(interpolate(est_mesh, initial_value, param_mesh)) logit_rate = mc.Normal( "logit(%s)" % key, mu=-5.0 * np.ones(len(param_mesh)), tau=1.0e-2, value=logit_initial_value ) # logit_rate = [mc.Normal('logit(%s)_%d' % (key, a), mu=-5., tau=1.e-2) for a in param_mesh] vars["logit_rate"] = logit_rate @mc.deterministic(name=key) def rate_stoch(logit_rate=logit_rate): return interpolate(param_mesh, mc.invlogit(logit_rate), est_mesh) if emp_prior.has_key("mu"): @mc.potential(name="empirical_prior_%s" % key) def emp_prior_potential(f=rate_stoch, mu=emp_prior["mu"], tau=1.0 / np.array(emp_prior["se"]) ** 2): return mc.normal_like(f, mu, tau) vars["empirical_prior"] = emp_prior_potential vars["rate_stoch"] = rate_stoch # create stochastic variable for over-dispersion "random effect" mu_od = emp_prior.get("dispersion", 0.001) dispersion = mc.Gamma("dispersion_%s" % key, alpha=10.0, beta=10.0 / mu_od) vars["dispersion"] = dispersion @mc.deterministic(name="alpha_%s" % key) def alpha(rate=rate_stoch, dispersion=dispersion): return rate / dispersion ** 2 @mc.deterministic(name="beta_%s" % key) def beta(rate=rate_stoch, dispersion=dispersion): return (1.0 - rate) / dispersion ** 2 vars["alpha"] = alpha vars["beta"] = beta # create potentials for priors vars["priors"] = generate_prior_potentials(dm.get_priors(key), est_mesh, rate_stoch, dispersion) # create latent and observed stochastics for data vars["data"] = data_list vars["ab"] = [] vars["latent_p"] = [] vars["observations"] = [] for d in data_list: # set up observed stochs for all relevant data id = d["id"] if d["value"] == MISSING: print "WARNING: data %d missing value" % id continue # ensure all rate data is valid d_val = dm.value_per_1(d) d_se = dm.se_per_1(d) if d_val < 0 or d_val > 1: print "WARNING: data %d not in range [0,1]" % id continue if d["age_start"] < est_mesh[0] or d["age_end"] > est_mesh[-1]: raise ValueError, "Data %d is outside of estimation range---([%d, %d] is not inside [%d, %d])" % ( d["id"], d["age_start"], d["age_end"], est_mesh[0], est_mesh[-1], ) age_indices = indices_for_range(est_mesh, d["age_start"], d["age_end"]) age_weights = d["age_weights"] @mc.deterministic(name="a_%d^%s" % (id, key)) def a_i(alpha=alpha, age_indices=age_indices, age_weights=age_weights): return rate_for_range(alpha, age_indices, age_weights) @mc.deterministic(name="b_%d^%s" % (id, key)) def b_i(beta=beta, age_indices=age_indices, age_weights=age_weights): return rate_for_range(beta, age_indices, age_weights) vars["ab"] += [a_i, b_i] if d_se > 0: # if the data has a standard error, model it as a realization # of a beta binomial r.v. latent_p_i = mc.Beta( "latent_p_%d^%s" % (id, key), alpha=a_i, beta=b_i, value=trim(d_val, NEARLY_ZERO, 1 - NEARLY_ZERO) ) vars["latent_p"].append(latent_p_i) denominator = d_val * (1 - d_val) / d_se ** 2.0 numerator = d_val * denominator obs_binomial = mc.Binomial( "data_%d^%s" % (id, key), value=numerator, n=denominator, p=latent_p_i, observed=True ) vars["observations"].append(obs_binomial) else: # if the data is a point estimate with no uncertainty # recorded, model it as a realization of a beta r.v. obs_p_i = mc.Beta( "latent_p_%d" % id, value=trim(d_val, NEARLY_ZERO, 1 - NEARLY_ZERO), alpha=a_i, beta=b_i, observed=True ) vars["observations"].append(obs_p_i) return vars
key = dismod3.gbd_key_for('%s', region, year, sex) if clean(region) == 'north_america_high_income': regional_offset = 0. else: regional_offset = -.5 time_offset = (int(year)-1997)/10. if clean(sex) == 'male': sex_offset = .1 else: sex_offset = 0. # incidence rate i = mc.invlogit(mc.logit(.012 * mc.invlogit((ages - 44) / 3)) + regional_offset + time_offset + sex_offset) truth[key % 'incidence'] = i # remission rate r = 0. * ages truth[key % 'remission'] = r # excess-mortality rate f = .085 * (ages / 100) ** 2.5 truth[key % 'excess-mortality'] = f ## compartmental model (bins S, C, D, M) SCDM = np.zeros([4, age_len]) SCDM[0,0] = 1. for a in range(age_len - 1):
def make_model(lon,lat,input_data,covariate_keys,pos,neg): """ This function is required by the generic MBG code. """ # How many nuggeted field points to handle with each step method grainsize = 10 # Unique data locations data_mesh, logp_mesh, fi, ui, ti = uniquify(lon, lat) s_hat = (pos+1.)/(pos+neg+2.) # The partial sill. amp = pm.Exponential('amp', .1, value=1.4) # The range parameters. Units are RADIANS. # 1 radian = the radius of the earth, about 6378.1 km scale = pm.Exponential('scale', .1, value=.07) @pm.potential def scale_constraint(scale=scale): if scale>.5: return -np.inf else: return 0 # This parameter controls the degree of differentiability of the field. diff_degree = pm.Uniform('diff_degree', .01, 3, value=0.5, observed=True) # The nugget variance. V = pm.Exponential('V', .1, value=1) # @pm.potential # def V_constraint(V=V): # if V<.1: # return -np.inf # else: # return 0 a0 = pm.Normal('a0',0,.1,value=0,observed=True) # a1 limits mixing. a1 = pm.Normal('a1',0,.1,value=0,observed=True) a = pm.Lambda('a',lambda a0=a0,a1=a1: [a0,a1]) m = pm.Uninformative('m',value=-13) @pm.deterministic(trace=False) def M(m=m): return pm.gp.Mean(mean_fn, m=m) if constrained: @pm.potential def pripred_check(m=m,amp=amp,V=V,a=a): p_above = scipy.stats.distributions.norm.cdf(m-pm.stukel_logit(threshold_val,*a), 0, np.sqrt(amp**2+V)) if p_above <= max_p_above: return 0. else: return -np.inf # Create the covariance & its evaluation at the data locations. facdict = dict([(k,1.e6) for k in covariate_keys]) facdict['m'] = 0 @pm.deterministic(trace=False) def C(amp=amp, scale=scale, diff_degree=diff_degree, ck=covariate_keys, id=input_data, ui=ui, facdict=facdict): """A covariance function created from the current parameter values.""" eval_fn = CovarianceWithCovariates(pm.gp.matern.geo_rad, id, ck, ui, fac=facdict) return pm.gp.FullRankCovariance(eval_fn, amp=amp, scale=scale, diff_degree=diff_degree) sp_sub = pm.gp.GPSubmodel('sp_sub', M, C, logp_mesh, tally_f=False) # Make f start somewhere a bit sane sp_sub.f_eval.value = sp_sub.f_eval.value - np.mean(sp_sub.f_eval.value) # Loop over data clusters eps_p_f_d = [] s_d = [] data_d = [] for i in xrange(len(pos)/grainsize+1): sl = slice(i*grainsize,(i+1)*grainsize,None) if len(pos[sl])>0: # Nuggeted field in this cluster eps_p_f_d.append(pm.Normal('eps_p_f_%i'%i, sp_sub.f_eval[fi[sl]], 1./V, value=pm.logit(s_hat[sl]), trace=False)) # The allele frequency s_d.append(pm.Lambda('s_%i'%i,lambda lt=eps_p_f_d[-1], a=a: pm.flib.stukel_invlogit(lt, *a),trace=False)) # The observed allele frequencies data_d.append(pm.Binomial('data_%i'%i, pos[sl]+neg[sl], s_d[-1], value=pos[sl], observed=True)) # The field plus the nugget @pm.deterministic def eps_p_f(eps_p_fd = eps_p_f_d): """Concatenated version of eps_p_f, for postprocessing & Gibbs sampling purposes""" return np.hstack(eps_p_fd) return locals()
Y = df['Parameter Value'].__array__() X = .5 * (df['Age Start'] + df['Age End']).__array__() pl.plot(X, Y, 'ks', label='Observed', mec='w', mew=1) XX = sm.add_constant(X) X_pred = pl.arange(65) XX_pred = sm.add_constant(X_pred) model = sm.OLS(Y, XX) results = model.fit() Y_pred = model.predict(XX_pred) pl.plot(X_pred, Y_pred, 'k-', linewidth=2, label='Predicted by OLS') Y = mc.logit(df['Parameter Value'].__array__()) model = sm.OLS(Y, XX) results = model.fit() Y_pred = model.predict(XX_pred) pl.plot(X_pred, mc.invlogit(Y_pred), 'k--', linewidth=2, label='Predicted by logit-transformed OLS') pl.xlabel('Age (Years)') pl.ylabel('Seroprevalence (Per 1)') pl.legend(loc='lower right', fancybox=True, shadow=True) pl.axis([-5, 55, 0, 1.2]) pl.grid()
def make_model(lon,lat,covariate_values,pos,neg,cpus=1): """ This function is required by the generic MBG code. """ # How many nuggeted field points to handle with each step method grainsize = 10 # Non-unique data locations data_mesh = combine_spatial_inputs(lon, lat) s_hat = (pos+1.)/(pos+neg+2.) # Uniquify the data locations. locs = [(lon[0], lat[0])] fi = [0] ui = [0] for i in xrange(1,len(lon)): # If repeat location, add observation loc = (lon[i], lat[i]) if loc in locs: fi.append(locs.index(loc)) # Otherwise, new obs else: locs.append(loc) fi.append(max(fi)+1) ui.append(i) fi = np.array(fi) ti = [np.where(fi == i)[0] for i in xrange(max(fi)+1)] ui = np.asarray(ui) lon = np.array(locs)[:,0] lat = np.array(locs)[:,1] # Unique data locations logp_mesh = combine_spatial_inputs(lon,lat) # Create the mean & its evaluation at the data locations. M, M_eval = trivial_means(logp_mesh) init_OK = False while not init_OK: try: # Space-time component sp_sub = ibd_covariance_submodel() covariate_dict, C_eval = cd_and_C_eval(covariate_values, sp_sub['C'], data_mesh, ui) # The field evaluated at the uniquified data locations f = pm.MvNormalCov('f', M_eval, C_eval) # Make f start somewhere a bit sane f.value = f.value - np.mean(f.value) # Loop over data clusters eps_p_f_d = [] s_d = [] data_d = [] for i in xrange(len(pos)/grainsize+1): sl = slice(i*grainsize,(i+1)*grainsize,None) # Nuggeted field in this cluster eps_p_f_d.append(pm.Normal('eps_p_f_%i'%i, f[fi[sl]], 1./sp_sub['V'], value=pm.logit(s_hat[sl]),trace=False)) # The allele frequency s_d.append(pm.Lambda('s_%i'%i,lambda lt=eps_p_f_d[-1]: invlogit(lt),trace=False)) # The observed allele frequencies data_d.append(pm.Binomial('data_%i'%i, pos[sl]+neg[sl], s_d[-1], value=pos[sl], observed=True)) # The field plus the nugget @pm.deterministic def eps_p_f(eps_p_fd = eps_p_f_d): """Concatenated version of eps_p_f, for postprocessing & Gibbs sampling purposes""" return np.concatenate(eps_p_fd) init_OK = True except pm.ZeroProbability, msg: print 'Trying again: %s'%msg init_OK = False gc.collect()
def plot_all_priors(model, data=None, unique=True, model_kwargs=None): """ plot the priors of an HDDM model Input: data <DataFrame> - data to be plot against the priors unique <bool> - whether to unique each column in data before before ploting it """ #set limits for plots lb = {'v': -10} ub = {'a': 4, 't':1, 'v':10, 'z':1, 'sz': 1, 'st':1, 'sv':15, 'p_outlier': 1} #plot all priors n_rows=4 n_cols=2 for n_subjs in [1]: #,2]: #create a model h_data, _ = hddm.generate.gen_rand_data(subjs=n_subjs, size=2) if model_kwargs is None: model_kwargs = {} h = model(h_data, include='all', **model_kwargs) fig = plt.figure() plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1, hspace=.7) counter = 0 for name, node_row in h.iter_group_nodes(): if 'var' in name or 'p_outlier' in name: continue if 'trans' in name: trans = True name = name.replace('_trans','') else: trans = False counter += 1 node = node_row['node'] #plot a single proir ax = plt.subplot(n_rows, n_cols, counter) ax.set_yticklabels([]) #if data is given then plot it if data is not None: try: if unique: t_data = data[name].dropna().unique() else: t_data = data[name].dropna().values # if name == 'v': # t_data = np.concatenate((t_data, -t_data)) ax.hist(t_data, 20, normed=True) except KeyError: pass #generate pdf xlim = np.arange(lb.get(name, 0.001), ub[name], 0.01) pdf = np.zeros(len(xlim)) for i in range(len(pdf)): if not trans: node.value = xlim[i] pdf[i] = np.exp(node.logp) else: node.value = pm.logit(xlim[i]) pdf[i] = np.exp(node.logp)*10 #plot shit plt.plot(xlim, pdf) plt.title(name) #add suptitle if n_subjs > 1: plt.suptitle('Group model') else: plt.suptitle('HDDM Informative model')
### @export 'more-remission' reload(book_graphics) for i, k_i in enumerate(model.parameters[t]['parameter_age_mesh']): model.vars['f']['gamma'][i].value = pl.log(k_i*.005 + .001) book_graphics.plot_age_patterns(model, types='i r m f p'.split(), xticks=[0,50,100], yticks=dict(i=[0,.01,.02], r=[0,.05,.1], m=[0,.2,.4], f=[0,.3,.6], p=[0,.01,.02]), panel='a') pl.subplots_adjust(wspace=.5) pl.savefig('book/graphics/more-excess-mortality.pdf') # <codecell> ### @export 'birth_prevalence' p_0 = .015 model.vars['logit_C0'].value = mc.logit(p_0) p = model.vars['p']['mu_age'].value print """ For a condition with prevalence of %.1f\\%% at age $0$, these rates yield a prevalence age pattern which is highly nonlinear, dipping to a minimum of %.1f\\%% at age %d, and then increasing back up to %.1f\\%% at the oldest ages. """ % (p_0*100, p.min()*100, p.argmin(), p[-1]*100) book_graphics.plot_age_patterns(model, types='i r m f p'.split(), xticks=[0,50,100], yticks=dict(i=[0,.01,.02], r=[0,.05,.1], m=[0,.2,.4], f=[0,.3,.6], p=[.01,.015,.02]), panel='b') pl.savefig('book/graphics/birth-prevalence.pdf') # <codecell>