def model_gen(): varlist = [] stdev = pymc.TruncatedNormal('stdev', mu=400, tau = 1.0/(400**2), a = 0, b = Inf) varlist.append(stdev) @pymc.deterministic def precision (stdev = stdev): return 1.0/(stdev**2) a = pymc.TruncatedNormal('a', mu = 1, tau = 1.0/(30**2),a = 0, b = Inf) b = pymc.Uniform('b', lower = .05, upper = 2.0) varlist.append(a) varlist.append(b) @pymc.deterministic def nonlinear (Re = ReData, value = measured, a = a, b = b , observed = True ): return a * (ReData )** b results = pymc.Normal('results', mu = nonlinear, tau = precision, value = measured, observed = True) varlist.append(results) return varlist
def model(x, f): # PRIORS y_err = sigma0 # print (_t_initial,_t_final, one_x_offset_init) one_x_offset = pymc.Uniform("one_x_offset", _t_initial, time[np.argmax(signal)], value=_t_initial) two_x_offset = pymc.Uniform("two_x_offset", _t_initial, _t_final, value=_t_final) sum_of_amps = pymc.TruncatedNormal("sum_amps", mu=sum_mu, tau=sum_tau, a=sum_a, b=sum_b, value=sum_mu) #sigma/mu is the n=1 std deviation in units of n=1 amplitude diff_of_amps = pymc.TruncatedNormal("diff_amps", mu=0, tau=diff_tau, a=diff_a, b=diff_b, value=0) one_x_amplitude = (sum_of_amps+diff_of_amps)/2 two_x_amplitude = (sum_of_amps-diff_of_amps)/2 # MODEL @pymc.deterministic(plot=False) def mod_two_pulse(x=time, one_x_offset=one_x_offset, two_x_offset=two_x_offset, one_x_amplitude=one_x_amplitude, two_x_amplitude=two_x_amplitude): return one_pulse(x, x_offset=one_x_offset, amplitude=one_x_amplitude)+\ one_pulse(x, x_offset=two_x_offset, amplitude=two_x_amplitude) #likelihoodsy y = pymc.Normal("y", mu=mod_two_pulse, tau= 1.0/y_err**2, value=signal, observed=True) return locals()
def test_aesara_switch_broadcast_edge_cases_2(self): # Known issue 2: https://github.com/pymc-devs/pymc/issues/4417 # fmt: off data = np.array([ 1.35202174, -0.83690274, 1.11175166, 1.29000367, 0.21282749, 0.84430966, 0.24841369, 0.81803141, 0.20550244, -0.45016253, ]) # fmt: on with pm.Model() as m: mu = pm.Normal("mu", 0, 5) obs = pm.TruncatedNormal("obs", mu=mu, sigma=1, lower=-1, upper=2, observed=data) npt.assert_allclose(m.dlogp([m.rvs_to_values[mu]])({ "mu": 0 }), 2.499424682024436, rtol=1e-5)
def make_model(self): scores,team_idx = self.get_data() n_teams = self.n_teams n_games = len(scores) prob_func = self.prob_func scale = self.scale #Just set as a constant now, too lazy scale = pymc.TruncatedNormal( 'scale',mu = self.scale, tau = np.power(1/5.0,2),value=self.scale ,a = 0,b = 10) #expo = pymc.Uniform( 'expo',0.45,1,value=0.5) expo = 0.5 #need to put in initial seeding stuff theta_i = pymc.Normal('theta_i', mu = 0, tau = np.power(1/3.0,2), value=self.inital_theta ) @pymc.deterministic() def theta(beta=theta_i): return beta - sum(beta)/(1.0*n_teams) @pymc.stochastic(observed=True) def games_played(value=scores ,sp=scale,alpha = theta,pow=expo): return sum(prob_func(dot(team_idx, alpha), value[:,0], value[:,1], sp, pow)) @pymc.deterministic() def marginal_delta(beta = theta): return np.dot(self.marginal_mat,beta) return pymc.Model(locals())
def makeModelPrior(self, manager, parts): options = manager.options if options.concentration is None: parts.log10concentration = pymc.TruncatedNormal( 'log10concentration', 0.6, 1. / 0.116**2, np.log10(1.), np.log10(10.)) #tau! @pymc.deterministic def cdelta(log10concentration=parts.log10concentration): return 10**log10concentration parts.cdelta = cdelta else: parts.cdelta = options.concentration manager.massdelta = options.delta parts.massdelta = options.delta parts.scaledmdelta = pymc.Uniform('scaledmdelta', options.masslow / massscale, options.masshigh / massscale) @pymc.deterministic def mdelta(scaledmdelta=parts.scaledmdelta): return massscale * scaledmdelta parts.mdelta = mdelta
class likelihood_model: # Stochastic variables for signal, background, and total event rates #signal_rate = pymc.Normal('signal_rate', mu=s*muT, tau=1/sigmas**2) #background_rate = pymc.Normal('background_rate', mu=b, tau=1/sigmab**2) # Doh, need to use truncated normal to prevent negative values signal_rate = pymc.TruncatedNormal('signal_rate', mu=s*muT, tau=1/sigmas**2, a=0, b=np.inf) background_rate = pymc.TruncatedNormal('background_rate', mu=b, tau=1/sigmab**2, a=0, b=np.inf) # Deterministic variable (simply the sum of the signal and background rates) total_rate = pymc.LinearCombination('total_rate', [1,1], [signal_rate, background_rate]) # Stochastic variable for number of observed events observed_events = pymc.Poisson('observed_events', mu=total_rate) # Deterministic variable for the test statistic @pymc.deterministic() def qCLs(n=observed_events): q,chi2B = self.QCLs(n,s) return q
def model(x, f): """ priors distributions """ one_x_offset = pymc.Uniform("one_x_offset", _t_initial, _t_peak, value=_t_initial) two_x_offset = pymc.Uniform("two_x_offset", _t_initial, _t_final, value=_t_final) one_amplitude = pymc.TruncatedNormal("one_amplitude", mu=1, tau=tau, a=1 - ampl_limit * sigma_ampl, b=1 + ampl_limit * sigma_ampl, value=1) two_amplitude = pymc.TruncatedNormal("two_amplitude", mu=1, tau=tau, a=1 - ampl_limit * sigma_ampl, b=1 + ampl_limit * sigma_ampl, value=1) @pymc.deterministic(plot=False) def mod_two_pulse(x=time, one_x_offset=one_x_offset, two_x_offset=two_x_offset, one_amplitude=one_amplitude, two_amplitude=two_amplitude): return f_model(x, x_offset=one_x_offset, amplitude=one_amplitude) +\ f_model(x, x_offset=two_x_offset, amplitude=two_amplitude) y = pymc.Normal("y", mu=mod_two_pulse, tau=tau_rms, value=signal, observed=True) return locals()
def model_gen(): varlist = [] stdev = pymc.TruncatedNormal('stdev', mu=400, tau=1.0 / (400**2), a=0, b=Inf) varlist.append(stdev) @pymc.deterministic def precision(stdev=stdev): return 1.0 / (stdev**2) fakeA = pymc.TruncatedNormal('a', mu=1, tau=1.0 / (50**2), a=0, b=Inf) b = pymc.Uniform('b', lower=.05, upper=2.0) a = fakeA * maxRe**(-b) z = pymc.Normal('zero', mu=0, tau=1.0 / (400**2)) varlist.append(fakeA) varlist.append(a) varlist.append(b) varlist.append(z) @pymc.deterministic def nonlinear(Re=ReData, value=measured, a=a, b=b, z=z, observed=True): return (a * (ReData)**b) + z results = pymc.Normal('results', mu=nonlinear, tau=precision, value=measured, observed=True) varlist.append(results) return varlist
def model_gen(): variables = [] factors = pymc.Normal( "factormagnitudes", mu=zeros(observations), tau=ones(observations), ) limits = ones(dimensions) * -Inf limits[0] = 0.0 loadings = pymc.TruncatedNormal("factorloadings", mu=ones(dimensions), tau=ones(dimensions) * (1**-2), a=limits, b=Inf) returnSDs = pymc.Gamma("residualsds", alpha=ones(dimensions) * 1, beta=ones(dimensions) * .5) variables.append(loadings) variables.append(returnSDs) variables.append(factors) @pymc.deterministic def returnPrecisions(stdev=returnSDs): precisions = (ones(shape) * (stdev**-2)[:, newaxis]).ravel() return precisions @pymc.deterministic def meanReturns(factors=factors, loadings=loadings): means = factors[newaxis, :] * loadings[:, newaxis] return means.ravel() returns = pymc.Normal("returns", mu=meanReturns, tau=returnPrecisions, observed=True, value=data.ravel()) variables.append(returns) return variables
def generate_pymc_(self, params, q0=None): ''' Creates PyMC objects for each param in dictionary NOTE: the second argument for normal distributions is VARIANCE Prior option: An arbitrary prior distribution derived from a set of samples (e.g., a previous mcmc run) can be passed with the following syntax: = {<name> : ['KDE', <pymc_database>, <param_names>]} where <name> is the name of the distribution (e.g., 'prior' or 'joint_dist'), <pymc_database> is the pymc database containing the samples from which the prior distribution will be estimated, and <param_names> are the children parameter names corresponding to the dimension of the desired sample array. This method will use all samples of the Markov chain contained in <pymc_database> for all traces named in <param_names>. Gaussian kernel-density estimation is used to derive the joint parameter distribution, which is then treated as a prior in subsequent mcmc analyses using the current class instance. The parameters named in <param_names> will be traced as will the multivariate distribution named <name>. ''' pymc_mod = [] pymc_mod_order = [] parents = dict() # Iterate through , assign prior distributions for key, args in self.params.iteritems(): # Distribution name should be first entry in [key] dist = args[0].lower() if dist == 'normal': if q0 == None: RV = [pymc.Normal(key, mu=args[1], tau=1. / args[2])] else: RV = [ pymc.Normal(key, mu=args[1], tau=1. / args[2], value=q0[key]) ] elif dist == 'uniform': if q0 == None: RV = [pymc.Uniform(key, lower=args[1], upper=args[2])] else: RV = [ pymc.Uniform(key, lower=args[1], upper=args[2], value=q0[key]) ] elif dist == 'discreteuniform': if q0 == None: RV = [ pymc.DiscreteUniform(key, lower=args[1], upper=args[2]) ] else: RV = [ pymc.DiscreteUniform(key, lower=args[1], upper=args[2], value=q0[key]) ] elif dist == 'truncatednormal': if q0 == None: RV = [ pymc.TruncatedNormal(key, mu=args[1], tau=1. / args[2], a=args[3], b=args[4]) ] else: RV = [ pymc.TruncatedNormal(key, mu=args[1], tau=1. / args[2], a=args[3], b=args[4], value=q0[key]) ] elif dist == 'kde': kde = multivariate_kde_from_samples(args[1], args[2]) kde_rv, rvs = self._create_kde_stochastic(kde, key, args[2]) if q0 != None: kde_rv.value = q0 RV = [kde_rv] for rv_key, rv_value in rvs.iteritems(): parents[rv_key] = rv_value RV.append(rv_value) else: raise KeyError('The distribution "' + dist + '" is not supported.') parents[key] = RV[0] pymc_mod_order.append(key) pymc_mod += RV return parents, pymc_mod, pymc_mod_order
def makeModelPrior(self, manager, parts): options = manager.options if options.concentration is None: parts.log10concentration = pymc.TruncatedNormal( 'log10concentration', 0.6, 1. / 0.116**2, np.log10(1.), np.log10(10.)) #tau! @pymc.deterministic def concentration(log10concentration=parts.log10concentration): return 10**log10concentration parts.concentration = concentration else: parts.concentration = options.concentration if options.logprior: parts.logmass_15mpc = pymc.Uniform('logmass_15mpc', np.log10(options.masslow), np.log10(options.masshigh)) @pymc.deterministic(name='mass_15mpc') def mass_15mpc(logmass=parts.logmass_15mpc): return 10**logmass parts.mass_15mpc = mass_15mpc else: parts.mass_15mpc = pymc.Uniform('mass_15mpc', options.masslow, options.masshigh) @pymc.deterministic def r_scale(mass=parts.mass_15mpc, concentration=parts.concentration, zcluster=parts.zcluster): try: rs = nfwutils.RsMassInsideR(mass, concentration, zcluster, 1.5) except ValueError: raise pymc.ZeroProbability return rs # parts.m200 = pymc.Uniform('m200', options.masslow, options.masshigh) # # @pymc.deterministic # def r_scale(mass = parts.m200, # concentration = parts.concentration, # zcluster = parts.zcluster): # # try: # rs = nfwutils.rscaleConstM(mass, concentration, zcluster, 200.) # except ValueError: # raise pymc.ZeroProbability # # return rs # # parts.r_scale = r_scale
session_betas = [] for session_num, session_provider in enumerate(data.dr_id): session_betas.append(Bdr[int(session_provider)]) # the Betas to use for each session (which correspond to the # dr that participated in them). SB = pymc.Container(session_betas) ### # setup the cut-off point parameters (lambda's) # for this we will use truncated normals #lambda_inv_var = 1e-5 lambdas = [pymc.Normal('lambda_0', 0, inv_var)] for i in xrange(3): lambdas.append( pymc.TruncatedNormal('lambda_%s' % (i + 1), (i + 1), inv_var, lambdas[i], numpy.inf)) lambdas = pymc.Container(lambdas) #-------------------- model ------------------# @deterministic() def y_hat(X=X_mat, session_betas=SB): # y_hat = x_i * beta_i # where beta_i are coefficients corresponding # to the dr who participated in session i. out = numpy.zeros(num_sessions) for i, x_i in enumerate(X): beta_i = session_betas[i] #out = out + numpy.dot(x_i, beta_i) #print numpy.dot(x_i, beta_i)[0][0] out[i] = numpy.dot(x_i, beta_i)[0]
def pdf(trace, keys, labels=None, color='0.2', facecolor='C0', line_alpha=1.0, face_alpha=0.9, plot_prior=False, params=None, xylim=None, figsize=None, ylabel=None, nbins_x=3, nbins_y=6, fname='pdfs.png', truth=None): ''' Plots the probability distribution function of the parameters defined by "trace", "keys" and "labels" along with their associated chains obtained from MCMC sampling. ''' print 'plotting parameter chains/pdfs...' # set up labels if not provided if labels is None: labels = keys label_dict = {key: lab for key, lab in zip(keys, labels)} # handle extra keys that are not in trace i_keep = [] for i, k in enumerate(keys): try: trace(k) i_keep.append(i) except KeyError: print 'param <%s> is not in trace; skipping this pdf plot.' % k keys = [keys[i] for i in i_keep] labels = [labels[i] for i in i_keep] # plot if figsize == None: fig = plt.figure(figsize=[10, 10 * len(keys) / 3]) else: fig = plt.figure(figsize=figsize) ax_right = [] ax_left = [] for i, key in enumerate(keys): #TODO: add check for length of lists: if type(facecolor) == list: facecolor = facecolor[i] if type(color) == list: color = color[i] if type(face_alpha) == list: face_alpha = face_alpha[i] if type(line_alpha) == list: line_alpha = line_alpha[i] # define left and right axes (left = chains, right = pdfs) ax_left += [fig.add_subplot(len(keys), 2, i * 2 + 1)] ax_right += [fig.add_subplot(len(keys), 2, i * 2 + 2)] # plot left ax_left[i].plot(trace(key)[:], color=color, alpha=line_alpha, linewidth=1) ax_left[i].set_ylabel(labels[i]) ax_left[i].set_xlabel('Chain iteration') ax_left[i].locator_params(nbins=nbins_x, axis='x') ax_left[i].locator_params(nbins=nbins_y, axis='y') # plot right x = np.linspace(min(trace(key)[:]), max(trace(key)[:]), 1000) y = gaussian_kde(trace(key)[:]).pdf(x) ax_right[i].fill_between(x, np.tile(0, y.shape), y, facecolor=facecolor, alpha=face_alpha) ax_right[i].plot(x, y, color) ax_right[i].set_xlabel(labels[i]) if ylabel == None: ax_right[i].set_ylabel('Probability density') else: ax_right[i].set_ylabel(ylabel) ax_right[i].locator_params(nbins=nbins_x, axis='x') ax_right[i].locator_params(nbins=nbins_y, axis='y') # plot prior as dotted line if requested if plot_prior == True: print 'plot priror = True' print params if params != None: print 'params != None = True' if params[key][0] == 'TruncatedNormal': print 'truncatednorm = True' predictive = pymc.TruncatedNormal('predictive', params[key][1], params[key][2], params[key][3], params[key][4]) model = pymc.Model({"pred": predictive}) mcmc = pymc.MCMC(model) mcmc.sample(10000, 1000) samples = mcmc.trace('predictive')[:] print samples kde = sm.nonparametric.KDEUnivariate(samples) kde.fit() x_prior = kde.support y_prior = kde.density ax_right[i].plot(x_prior, y_prior, '--', color='k') #color) if truth != None: if type(truth) == dict: ax_right[i].plot(truth[key], 0., 'k^') else: raise TypeError('truth must be dictionary w/ params as keys') # set parameter axis limits if provided if xylim != None: if key in xylim: ax_right[i].set_xlim(xylim[key]) ax_left[i].set_ylim(xylim[key]) else: ax_right[i].set_ylim(ymin=0) ax_left[i].set_xlim([0, len(trace(key)[:])]) fig.tight_layout() plt.savefig(fname, dpi=300) return plt.gcf()
def complete_model(self): # TODO Priors data should go into configuration file # Gas parameters ne = pymc2.TruncatedNormal('ne', self.obj_data['nSII'], self.obj_data['nSII_error']**-2, a=50.0, b=1000.0) cHbeta = pymc2.TruncatedNormal('cHbeta', 0.15, 0.05**-2, a=0.0, b=3.0) T_low = pymc2.TruncatedNormal('T_low', self.obj_data['TSIII'], self.obj_data['TSIII_error']**-2, a=7000.0, b=20000.0) # Metals abundances S2_abund = pymc2.Uniform('S2_abund', 0.000001, 0.001) S3_abund = pymc2.Uniform('S3_abund', 0.000001, 0.001) O2_abund = pymc2.Uniform('O2_abund', 0.000001, 0.001) O3_abund = pymc2.Uniform('O3_abund', 0.000001, 0.001) N2_abund = pymc2.Uniform('N2_abund', 0.000001, 0.001) Ar3_abund = pymc2.Uniform('Ar3_abund', 0.000001, 0.001) Ar4_abund = pymc2.Uniform('Ar4_abund', 0.000001, 0.001) # Helium parameters He1_abund = pymc2.Uniform('He1_abund', 0.050, 0.15) tau = pymc2.TruncatedNormal('tau', 0.75, 0.5**-2, a=0.0, b=7.0) cHbeta = pymc2.TruncatedNormal('cHbeta', 0.15, 0.05**-2, a=0.0, b=3.0) T_He = pymc2.TruncatedNormal('T_He', self.obj_data['TSIII'], self.obj_data['TSIII_error']**-2, a=7000.0, b=20000.0, value=14500.0) #Stellar parameters Av_star = pymc2.Uniform('Av_star', 0.0, 5.00) sigma_star = pymc2.Uniform('sigma_star', 0.0, 5.00) # z_star = pymc2.Uniform('z_star', self.z_min_ssp_limit, self.z_max_ssp_limit) ssp_coefs = [ pymc2.Uniform('ssp_coefs_%i' % i, self.sspPrefit_Limits[i][0], self.sspPrefit_Limits[i][1]) for i in self.range_bases ] @pymc2.deterministic() def calc_Thigh(Te=T_low): return (1.0807 * Te / 10000.0 - 0.0846) * 10000.0 @pymc2.deterministic() def calc_abund_dict(He1_abund=He1_abund, S2_abund=S2_abund, S3_abund=S3_abund, O2_abund=O2_abund, O3_abund=O3_abund, N2_abund=N2_abund, Ar3_abund=Ar3_abund, Ar4_abund=Ar4_abund): self.abund_iter_dict['H1'] = He1_abund self.abund_iter_dict['He1'] = He1_abund self.abund_iter_dict['S2'] = S2_abund self.abund_iter_dict['S3'] = S3_abund self.abund_iter_dict['O2'] = O2_abund self.abund_iter_dict['O3'] = O3_abund self.abund_iter_dict['N2'] = N2_abund self.abund_iter_dict['Ar3'] = Ar3_abund self.abund_iter_dict['Ar4'] = Ar4_abund return self.abund_iter_dict @pymc2.deterministic def calc_colExcit_fluxes(abund_dict=calc_abund_dict, T_low=T_low, T_High=calc_Thigh, ne=ne, cHbeta=cHbeta): colExcit_fluxes = self.calculate_colExcit_flux( T_low, T_High, ne, cHbeta, abund_dict, self.obj_data['colLine_waves'], self.obj_data['colLine_ions'], self.obj_data['colLine_flambda']) return colExcit_fluxes @pymc2.deterministic def calc_nebular_cont(z_star=self.z_object, cHbeta=self.cHbeta, Te=self.TSIII, He1_abund=He1_abund, He2_abund=0.0, Halpha_Flux=self.f_HalphaNorm): neb_flux_norm = self.nebular_Cont(self.input_wave, z_star, cHbeta, Te, He1_abund, He2_abund, Halpha_Flux) return neb_flux_norm @pymc2.deterministic def calc_continuum(z_star=self.z_object, Av_star=Av_star, sigma_star=sigma_star, ssp_coefs=ssp_coefs, nebular_flux=calc_nebular_cont): ssp_grid_i = self.physical_SED_model(self.onBasesWave, self.input_wave, self.onBasesFluxNorm, Av_star, z_star, sigma_star, self.Rv_model) fit_continuum = ssp_grid_i.dot(ssp_coefs) + nebular_flux return fit_continuum @pymc2.deterministic def calc_recomb_fluxes(abund_dict=calc_abund_dict, T_He=T_He, ne=ne, cHbeta=cHbeta, tau=tau): recomb_fluxes = self.calculate_recomb_fluxes( T_He, ne, cHbeta, tau, abund_dict, self.obj_data['recombLine_labes'], self.obj_data['recombLine_ions'], self.obj_data['recombLine_flambda']) return recomb_fluxes #QUESTION Issues with more than one likelihood @pymc2.stochastic(observed=True) # Likelihood def likelihood_ssp(value=self.input_continuum, fit_continuum=calc_continuum, sigmaContinuum=self.input_continuum_er): calc_continuum_masked = fit_continuum * self.obj_data['int_mask'] chi_F = sum( square(calc_continuum_masked - value) / square(sigmaContinuum)) return -chi_F / 2 @pymc2.stochastic(observed=True) # Likelihood def likelihood_recomb(value=self.recomb_fluxes, H_He_TheoFlux=calc_recomb_fluxes, sigmaLines=self.recomb_err): chi_F = sum(square(H_He_TheoFlux - value) / square(sigmaLines)) return -chi_F / 2 @pymc2.stochastic(observed=True) # Likelihood def likelihood_colExcited(value=self.colExc_fluxes, theo_metal_fluzes=calc_colExcit_fluxes, sigmaLines=self.colExc_fluxes): chi_F = sum(square(theo_metal_fluzes - value) / square(sigmaLines)) return -chi_F / 2 return locals()
import numpy as np from scipy.stats import truncnorm as tn import pymc #mu = 25.0 #sigma = 11.25 #a = 1.0 #b = 650.0 #vals = tn(a=a, b=b, loc=mu, scale=sigma) #plt.hist(vals.rvs(100000), bins=50) #plt.xlim(0, 100) #plt.show() N = 10000 Vcmax = [pymc.TruncatedNormal('Vcmax25', \ mu=100.0, tau=1.0/61.25**2, a=0.0, b=650.0).value \ for i in xrange(N)] Jfac = [pymc.TruncatedNormal('Jfac', mu=1.8, tau=1.0/0.5**2, \ a=0.0, b=5.0).value for i in xrange(N)] Rdfac = [pymc.Uniform('Rdfac', lower=0.005, upper=0.05).value \ for i in xrange(N)] Eaj = [pymc.TruncatedNormal('Eaj', mu=40000.0, tau=1.0/10000.0**2, a=0.0, b=199999.9).value for i in xrange(N)] Eav = [pymc.TruncatedNormal('Eav', mu=60000.0, tau=1.0/10000.0**2, a=0.0, b=199999.9).value for i in xrange(N)]
def make_model(data, mi_mean_min, mi_mean_max, GF_mean_min, GF_mean_max, constant_proliferation = False): values_SOX2 = {} values_m = {} values_nonPCNA = {} switchpoint = {} mi_left = {} GF_left = {} SOX2_mean_left = {} mi_right = {} GF_right = {} SOX2_mean_right = {} cells_SOX2_float = {} cells_nonPCNA = {} cells_m = {} ls = 50.0 # length of section l = pd.read_csv('../../data/cell_length_data.csv')['cell_length'].mean() # length of cell def step_function(x, switchpoint, left_value, right_value): ''' This function should return something in the same format as the passed array Specifically, it produces an output that has an array of the same size of the experimental data but whose contents are the lower average until the switchpoint, and the upper average past the switchpoint. For all purposes, this builds the model to which we want to compare the data. ''' return sp.where(x<=switchpoint, left_value, right_value) def ma(array, fill_value): return sp.ma.masked_array(array, sp.isnan(array), fill_value = fill_value) #data = data.dropna(how='all', subset = ['m', 'PCNA', 'SOX2']) # I'll drop all nan because of the potential bug with the binomials (see my question on stackoverflow) data = data.dropna(how='all', subset = ['m', 'PCNA', 'SOX2']) data = data.sort_values(['ID', 'pos']) # priors for global mean values # define priors for left side of step function mi_left_pop= pymc.Uniform('mi_left_pop', lower = mi_mean_min, upper = mi_mean_max, value = 0.02) GF_left_pop = pymc.Uniform('GF_left_pop', lower = GF_mean_min, upper = GF_mean_max, value = 0.8) # define priors for right side of step function if constant_proliferation: mi_right_pop = mi_left_pop GF_right_pop = GF_left_pop else: mi_right_pop = pymc.Uniform('mi_right_pop', lower = mi_mean_min, upper = mi_mean_max, value = 0.04) GF_right_pop = pymc.Uniform('GF_right_pop', lower = GF_mean_min, upper = GF_mean_max, value = 0.9) # stepsizes @pymc.deterministic(name='step_mi', plot=True) def step_mi(mi_left = mi_left_pop, mi_right = mi_right_pop): return mi_right - mi_left @pymc.deterministic(name='step_GF', plot=True) def step_GF(GF_left = GF_left_pop, GF_right = GF_right_pop): return GF_right - GF_left # prior distribution for sigma beeing uniformly distributed GF_sigma_inter = pymc.Uniform('GF_sigma_inter', lower = 0.001, upper = 0.2) mi_sigma_inter = pymc.Uniform('mi_sigma_inter', lower = 0.001, upper = 0.2) # switchpoint if not constant_proliferation: switchpoint_pop = pymc.Uniform('switchpoint_pop', lower = -2000, upper = outgrowth[data['time'].iloc[0]], value = -500) switchpoint_sigma_inter = pymc.Uniform('switchpoint_sigma_inter', lower=1.0, upper=400.0, value = 50) for ID, IDdata in data.groupby('ID'): values_SOX2[ID] = ma(IDdata['SOX2'], 35.5) values_nonPCNA[ID] = ma(IDdata['SOX2'] - IDdata['PCNA'], 3.5) values_m[ID] = ma(IDdata['m'], 1.5) # Model definition #priors # switchpoint[ID]: for all observables if constant_proliferation: switchpoint[ID] = 0.0 else: switchpoint[ID] = pymc.Normal('switchpoint_{0}'.format(ID), mu = switchpoint_pop, tau = 1/switchpoint_sigma_inter**2, value = -500, plot = False) # number of SOX2 cells SOX2_mean = sp.mean(values_SOX2[ID]) SOX2_std = sp.std(values_SOX2[ID]) # define priors for left side of step function mi_left[ID] = pymc.TruncatedNormal('mi_left_{0}'.format(ID), mu = mi_left_pop, tau = 1.0 / mi_sigma_inter**2, a = 0.0, b = 1.0, value = 0.02, plot = False) GF_left[ID] = pymc.TruncatedNormal('GF_left_{0}'.format(ID), mu = GF_left_pop, tau = 1.0 / GF_sigma_inter**2, a = 0.0, b = 1.0, value = 0.5, plot = False) # define priors for right side of step function mi_right[ID] = pymc.TruncatedNormal('mi_right_{0}'.format(ID), mu = mi_right_pop, tau = 1.0 / mi_sigma_inter**2, a = 0.0, b = 1.0, value = 0.02, plot = False) GF_right[ID] = pymc.TruncatedNormal('GF_right_{0}'.format(ID), mu = GF_right_pop, tau = 1.0 / GF_sigma_inter**2, a = 0.0, b = 1.0, value = 0.5, plot = False) # step functions @pymc.deterministic(name='mi_{}'.format(ID)) def mi(positions = sp.array(IDdata['pos']), switchpoint = switchpoint[ID], left_value = mi_left[ID], right_value = mi_right[ID]): return step_function(positions, switchpoint, left_value, right_value) @pymc.deterministic(name='GF_{}'.format(ID)) def GF(positions = sp.array(IDdata['pos']), switchpoint = switchpoint[ID], left_value = GF_left[ID], right_value = GF_right[ID]): return step_function(positions, switchpoint, left_value, right_value) @pymc.deterministic(name='SOX2_mean_{}'.format(ID)) def SOX2_mean(positions = sp.array(IDdata['pos']), switchpoint = switchpoint[ID], left_value = SOX2_mean , right_value = SOX2_mean): return step_function(positions, switchpoint, left_value, right_value) #likelihoods cells_SOX2_float[ID] = pymc.Normal('cells_SOX2_float_{0}'.format(ID), mu=SOX2_mean, tau = 1/SOX2_std**2, value = values_SOX2[ID], plot = False, observed = True) @pymc.deterministic(name='cells_SOX2_{}'.format(ID)) def cells_SOX2(csf = cells_SOX2_float[ID]): return sp.around(csf) cells_nonPCNA[ID] = pymc.Binomial('cells_nonPCNA_{0}'.format(ID), n = cells_SOX2, p = (1.0 - GF), value = values_nonPCNA[ID], observed = True, plot = False ) @pymc.deterministic(name='cells_PCNA_{}'.format(ID)) def cells_PCNA(cnp = cells_nonPCNA[ID], cs = cells_SOX2): return cs - cnp @pymc.deterministic(name='cells_PCNA_section_{}'.format(ID)) def cells_PCNA_section(cp = cells_PCNA, ls = ls, l = l): return cp * ls / l cells_m[ID] = pymc.Binomial('cells_m_{0}'.format(ID), n = cells_PCNA_section, p = mi, value = values_m[ID], observed = True, plot = False) values_SOX2 = pymc.Container(values_SOX2) values_SOX2 = pymc.Container(values_SOX2) values_m = pymc.Container(values_m) values_nonPCNA = pymc.Container(values_nonPCNA) switchpoint = pymc.Container(switchpoint) mi_left = pymc.Container(mi_left) GF_left = pymc.Container(GF_left) SOX2_mean_left = pymc.Container(SOX2_mean_left) mi_right = pymc.Container(mi_right) GF_right = pymc.Container(GF_right) SOX2_mean_right = pymc.Container(SOX2_mean_right) cells_SOX2_float = pymc.Container(cells_SOX2_float) cells_nonPCNA = pymc.Container(cells_nonPCNA) cells_m = pymc.Container(cells_m) return locals()
def main(mcmc_args=None): print('Setting up parameters and priors...') params = Params() # Set up location here with command line arguments in a list. params.cmd_line_chg(['--kalbar']) assert params.site_name + 'fields.txt' == 'data/kalbarfields.txt' # Set parameters specific to Bayesian runs params.PLOT = False params.OUTPUT = False # This sends a message to CalcSol on whether or not to use CUDA if params.CUDA: globalvars.cuda = True else: globalvars.cuda = False # get wind data and day labels wind_data, days = PM.get_wind_data(*params.get_wind_params()) params.ndays = len(days) # reduce domain params.domain_info = (10000.0, 400) #25 m sided cells domain_res = params.domain_info[0] / params.domain_info[1] cell_area = domain_res**2 locinfo = LocInfo(params.dataset, params.coord, params.domain_info) ###################################################################### ##### Model Priors ##### ###################################################################### lam = pm.Beta("lam", 5, 1, value=0.95) f_a1 = pm.TruncatedNormal("f_a1", 6, 0.3, 0, 9, value=6) f_a2 = pm.TruncatedNormal("f_a2", 20, 0.3, 15, 24, value=20) f_b1_p = pm.Gamma("fb1_p", 2, 1, value=1.5, trace=False, plot=False) #alpha,beta parameterization @pm.deterministic(trace=True, plot=True) def f_b1(f_b1_p=f_b1_p): return f_b1_p + 1 f_b2_p = pm.Gamma("fb2_p", 2, 1, value=1.5, trace=False, plot=False) @pm.deterministic(trace=True, plot=True) def f_b2(f_b2_p=f_b2_p): return f_b2_p + 1 g_aw = pm.Gamma("g_aw", 2.2, 1, value=1.0) g_bw = pm.Gamma("g_bw", 5, 1, value=3.8) # flight diffusion parameters. note: mean is average over flight advection sig_x = pm.Gamma("sig_x", 26, 0.15, value=180) sig_y = pm.Gamma("sig_y", 15, 0.15, value=150) corr_p = pm.Beta("corr_p", 5, 5, value=0.5, trace=False, plot=False) @pm.deterministic(trace=True, plot=True) def corr(corr_p=corr_p): return corr_p * 2 - 1 # local spread paramters sig_x_l = pm.Gamma("sig_xl", 2, 0.08, value=10) sig_y_l = pm.Gamma("sig_yl", 2, 0.14, value=10) corr_l_p = pm.Beta("corr_l_p", 5, 5, value=0.5, trace=False, plot=False) @pm.deterministic(trace=True, plot=True) def corr_l(corr_l_p=corr_l_p): return corr_l_p * 2 - 1 mu_r = pm.Normal("mu_r", 1., 1, value=1) n_periods = pm.Poisson("n_periods", 30, value=30) #alpha_pow = prev. time exponent in ParasitoidModel.h_flight_prob xi = pm.Gamma("xi", 1, 1, value=0.75) # presence to oviposition/emergence factor em_obs_prob = pm.Beta("em_obs_prob", 1, 1, value=0.05) # per-wasp prob of # observing emergence in release field grid given max leaf collection # this is dependent on the size of the cell surrounding the grid point # ...not much to be done about this. grid_obs_prob = pm.Beta("grid_obs_prob", 1, 1, value=0.005) # probability of # observing a wasp present in the grid cell given max leaf sampling #card_obs_prob = pm.Beta("card_obs_prob",1,1,value=0.5) # probability of # observing a wasp present in the grid cell given max leaf sampling #### Data collection model background for sentinel fields #### # Need to fix linear units for area. Meters would be best. # Effective collection area (constant between fields) is very uncertain with warnings.catch_warnings(): # squelsh a warning based on pymc coding we don't need to worry about warnings.simplefilter("ignore", RuntimeWarning) A_collected = pm.TruncatedNormal("A_collected", 2500, 1 / 2500, 0, min(locinfo.field_sizes.values()) * cell_area, value=2500) # in m**2 # Each field has its own binomial probability. # Probabilities are likely to be small, and pm.Beta cannot handle small # parameter values. So we will use TruncatedNormal again. N = len(locinfo.sent_ids) sent_obs_probs = np.empty(N, dtype=object) # fix beta for the Beta distribution sent_beta = 40 # mean of Beta distribution will be A_collected/field size for n, key in enumerate(locinfo.sent_ids): sent_obs_probs[n] = pm.Beta( "sent_obs_probs_{}".format(key), A_collected / (locinfo.field_sizes[key] * cell_area) * sent_beta / (1 - A_collected / (locinfo.field_sizes[key] * cell_area)), sent_beta, value=0.1 * 3600 / (locinfo.field_sizes[key] * cell_area)) sent_obs_probs = pm.Container(sent_obs_probs) # Max a Posterirori estimates have consistantly returned a value near zero # for sprd_factor. So we will comment these sections. # if params.dataset == 'kalbar': # # factor for kalbar initial spread # sprd_factor = pm.Uniform("sprd_factor",0,1,value=0.3) # else: # sprd_factor = None sprd_factor = None #### Collect variables and setup block update #### params_ary = pm.Container( np.array([ g_aw, g_bw, f_a1, f_b1, f_a2, f_b2, sig_x, sig_y, corr, sig_x_l, sig_y_l, corr_l, lam, n_periods, mu_r ], dtype=object)) # The stochastic variables in this list (and the stochastics behind the # deterministic ones) should be block updated in order to avoid the large # computational expense of evaluating the model multiple times for each # MCMC iteration. To do this, starting step variances must be definied # for each variable. This is done via a scaling dict. stoc_vars = [ g_aw, g_bw, f_a1, f_b1_p, f_a2, f_b2_p, sig_x, sig_y, corr_p, sig_x_l, sig_y_l, corr_l_p, lam, n_periods, mu_r ] step_scales = { g_aw: 0.04, g_bw: 0.08, f_a1: 0.25, f_b1_p: 0.05, f_a2: 0.25, f_b2_p: 0.05, sig_x: 2, sig_y: 2, corr_p: 0.0005, sig_x_l: 2, sig_y_l: 2, corr_l_p: 0.0005, lam: 0.0005, n_periods: 1, mu_r: 0.005 } print('Getting initial model values...') ###################################################################### ##### Run Model ##### ###################################################################### @pm.deterministic(plot=False, trace=False) def pop_model(params=params, params_ary=params_ary, locinfo=locinfo, wind_data=wind_data, days=days, sprd_factor=sprd_factor): '''This function acts as an interface between PyMC and the model. Not only does it run the model, but it provides an emergence potential based on the population model result projected forward from feasible oviposition dates. To modify how this projection happens, edit popdensity_to_emergence. Returned values from this function should be nearly ready to compare to data. ''' modeltic = time.time() ### Alter params with stochastic variables ### # g wind function parameters params.g_params = tuple(params_ary[0:2]) # f time of day function parameters params.f_params = tuple(params_ary[2:6]) # Diffusion coefficients params.Dparams = tuple(params_ary[6:9]) params.Dlparams = tuple(params_ary[9:12]) # Probability of any flight during the day under ideal circumstances params.lam = params_ary[12] # TRY BOTH SCALINGS - VARYING mu_r and n_periods # scaling flight advection to wind advection # number of time periods (based on interp_num) in one flight params.n_periods = params_ary[ 13] # if interp_num = 30, this is # of minutes params.mu_r = params_ary[14] ### PHASE ONE ### # First, get spread probability for each day as a coo sparse matrix max_shape = np.array([0, 0]) pm_args = [(days[0], wind_data, *params.get_model_params(), params.r_start)] pm_args.extend([(day, wind_data, *params.get_model_params()) for day in days[1:params.ndays]]) ##### Kalbar wind started recording a day late. Spread the population ##### locally before running full model. if sprd_factor is not None: res = params.domain_info[0] / params.domain_info[1] mean_drift = np.array([-25., 15.]) xdrift_int = int(mean_drift[0] // res) xdrift_r = mean_drift[0] % res ydrift_int = int(mean_drift[1] // res) ydrift_r = mean_drift[1] % res longsprd = PM.get_mvn_cdf_values( res, np.array([xdrift_r, ydrift_r]), PM.Dmat(params_ary[6], params_ary[7], params_ary[8])) shrtsprd = PM.get_mvn_cdf_values( res, np.array([0., 0.]), PM.Dmat(params_ary[9], params_ary[10], params_ary[11])) mlen = int( max(longsprd.shape[0], shrtsprd.shape[0]) + max(abs(xdrift_int), abs(ydrift_int)) * 2) sprd = np.zeros((mlen, mlen)) lbds = [ int(mlen // 2 - longsprd.shape[0] // 2), int(mlen // 2 + longsprd.shape[0] // 2 + 1) ] sprd[lbds[0] - ydrift_int:lbds[1] - ydrift_int, lbds[0] + xdrift_int:lbds[1] + xdrift_int] = longsprd * sprd_factor sbds = [ int(mlen // 2 - shrtsprd.shape[0] // 2), int(mlen // 2 + shrtsprd.shape[0] // 2 + 1) ] sprd[sbds[0]:sbds[1], sbds[0]:sbds[1]] += shrtsprd * (1 - sprd_factor) sprd[int(sprd.shape[0] // 2), int(sprd.shape[0] // 2)] += max(0, 1 - sprd.sum()) pmf_list = [sparse.coo_matrix(sprd)] else: pmf_list = [] ###################### Get pmf_list from multiprocessing pmf_list.extend(pool.starmap(PM.prob_mass, pm_args)) for pmf in pmf_list: for dim in range(2): if pmf.shape[dim] > max_shape[dim]: max_shape[dim] = pmf.shape[dim] r_spread = [] # holds the one-day spread for each release day. # Reshape the prob. mass function of each release day into solution form for ii in range(params.r_dur): offset = params.domain_info[1] - pmf_list[ii].shape[0] // 2 dom_len = params.domain_info[1] * 2 + 1 r_spread.append( sparse.coo_matrix( (pmf_list[ii].data, (pmf_list[ii].row + offset, pmf_list[ii].col + offset)), shape=(dom_len, dom_len)).tocsr()) ### PHASE TWO ### # Pass the probability list, pmf_list, and other info to convolution solver. # This will return the finished population model. with Capturing() as output: if sprd_factor is not None: # extend day count by one days_ext = [days[0] - 1] days_ext.extend(days) modelsol = get_populations(r_spread, pmf_list, days_ext, params.ndays + 1, dom_len, max_shape, params.r_dur, params.r_number, params.r_mthd()) # remove the first one and start where wind started. modelsol = modelsol[1:] else: modelsol = get_populations(r_spread, pmf_list, days, params.ndays, dom_len, max_shape, params.r_dur, params.r_number, params.r_mthd()) # modelsol now holds the model results for this run as CSR sparse arrays # get emergence potential (measured in expected number of wasps previously # present whose oviposition would result in emergence on the given date) # from the model result release_emerg, sentinel_emerg = popdensity_to_emergence( modelsol, locinfo) # get the expected wasp populations at grid points on sample days grid_counts = popdensity_grid(modelsol, locinfo) # get the expected wasp populations in cardinal directions '''card_counts = popdensity_card(modelsol,locinfo,params.domain_info)''' ## For the lists release_emerg and sentinel_emerg: ## Each list entry corresponds to a data collection day (one array) ## In each array: ## Each column corresponds to an emergence observation day (as in data) ## Each row corresponds to a grid point or sentinel field, respectively ## For the array grid_counts: ## Each column corresponds to an observation day ## Each row corresponds to a grid point ## For the list card_counts: ## Each list entry corresponds to a sampling day (one array) ## Each column corresponds to a step in a cardinal direction ## Each row corresponds to a cardinal direction # print('{:03.1f} sec./model at {}'.format(time.time() - modeltic, # time.strftime("%H:%M:%S %d/%m/%Y")),end='\r') # sys.stdout.flush() return (release_emerg, sentinel_emerg, grid_counts) #,card_counts) print('Parsing model output and connecting to Bayesian model...') ###################################################################### ##### Connect Model to Data ##### ###################################################################### ### Parse the results of pop_model into separate deterministic variables ### '''Get Poisson probabilities for sentinal field emergence. Parameters: xi is constant, emerg is a list of ndarrays, betas is a 1D array of field probabilities''' Ncollections = len(locinfo.sent_DataFrames) sent_poi_rates = [] for ii in range(Ncollections): s_ndays = len(locinfo.sent_DataFrames[ii]['datePR'].unique()) sent_poi_rates.append( pm.Lambda('sent_poi_rate_{}'.format(ii), lambda xi=xi, ndays=s_ndays, betas=sent_obs_probs, emerg_model=pop_model[1][ii]: xi * emerg_model * np.tile( betas, (ndays, 1)).T, trace=False)) sent_poi_rates = pm.Container(sent_poi_rates) '''Return Poisson probabilities for release field grid emergence. Parameters: xi is constant, emerg is a list of ndarrays. collection effort is specified in locinfo.''' Ncollections = len(locinfo.release_DataFrames) rel_poi_rates = [] for ii in range(Ncollections): r_effort = locinfo.release_collection[ii] #fraction of max collection r_ndays = len(locinfo.release_DataFrames[ii]['datePR'].unique()) rel_poi_rates.append( pm.Lambda('rel_poi_rate_{}'.format(ii), lambda xi=xi, ndays=r_ndays, r_effort=r_effort, beta= em_obs_prob, emerg_model=pop_model[0][ii]: xi * emerg_model * np.tile(r_effort * beta, (ndays, 1)).T, trace=False)) rel_poi_rates = pm.Container(rel_poi_rates) @pm.deterministic(plot=False, trace=False) def grid_poi_rates(locinfo=locinfo, beta=grid_obs_prob, obs_model=pop_model[2]): '''Return Poisson probabilities for grid sampling obs_model is an ndarray, sampling effort is specified in locinfo.''' return beta * locinfo.grid_samples * obs_model '''Return Poisson probabilities for cardinal direction sampling obs_model is a list of ndarrays, sampling effort is assumed constant''' ''' card_poi_rates = [] for ii,obs in enumerate(pop_model[3]): card_poi_rates.append(pm.Lambda('card_poi_rate_{}'.format(ii), lambda beta=card_obs_prob, obs=obs: beta*obs)) card_poi_rates = pm.Container(card_poi_rates) ''' # Given the expected wasp densities from pop_model, actual wasp densities # are modeled as a thinned Poisson random variable about that mean. # Each wasp in the area then has a small probability of being seen. ### Connect sentinel emergence data to model ### N_sent_collections = len(locinfo.sent_DataFrames) # Create list of collection variables sent_collections = [] for ii in range(N_sent_collections): # Apparently, pymc does not play well with 2D array parameters sent_collections.append( np.empty(sent_poi_rates[ii].value.shape, dtype=object)) for n in range(sent_collections[ii].shape[0]): for m in range(sent_collections[ii].shape[1]): sent_collections[ii][n, m] = pm.Poisson( "sent_em_obs_{}_{}_{}".format(ii, n, m), sent_poi_rates[ii][n, m], value=float(locinfo.sentinel_emerg[ii][n, m]), observed=True) sent_collections = pm.Container(sent_collections) ### Connect release-field emergence data to model ### N_release_collections = len(locinfo.release_DataFrames) # Create list of collection variables rel_collections = [] for ii in range(N_release_collections): rel_collections.append( np.empty(rel_poi_rates[ii].value.shape, dtype=object)) for n in range(rel_collections[ii].shape[0]): for m in range(rel_collections[ii].shape[1]): rel_collections[ii][n, m] = pm.Poisson( "rel_em_obs_{}_{}_{}".format(ii, n, m), rel_poi_rates[ii][n, m], value=float(locinfo.release_emerg[ii][n, m]), observed=True) rel_collections = pm.Container(rel_collections) ### Connect grid sampling data to model ### grid_obs = np.empty(grid_poi_rates.value.shape, dtype=object) for n in range(grid_obs.shape[0]): for m in range(grid_obs.shape[1]): grid_obs[n, m] = pm.Poisson("grid_obs_{}_{}".format(n, m), grid_poi_rates[n, m], value=float(locinfo.grid_obs[n, m]), observed=True) grid_obs = pm.Container(grid_obs) ### Connect cardinal direction data to model ### ''' N_card_collections = len(locinfo.card_obs_DataFrames) # Create list of sampling variables card_collections = [] for ii in range(N_card_collections): card_collections.append(np.empty(card_poi_rates[ii].value.shape, dtype=object)) for n in range(card_collections[ii].shape[0]): for m in range(card_collections[ii].shape[1]): card_collections[ii][n,m] = pm.Poisson( "card_obs_{}_{}_{}".format(ii,n,m), card_poi_rates[ii][n,m], value=locinfo.card_obs[ii][n,m], observed=True, plot=False) card_collections = pm.Container(card_collections) ''' ###################################################################### ##### Collect Model and Run ##### ###################################################################### ### Collect model ### if sprd_factor is not None: Bayes_model = pm.Model([ lam, f_a1, f_a2, f_b1_p, f_b2_p, f_b1, f_b2, g_aw, g_bw, sig_x, sig_y, corr_p, corr, sig_x_l, sig_y_l, corr_l_p, corr_l, n_periods, mu_r, sprd_factor, grid_obs_prob, xi, em_obs_prob, A_collected, sent_obs_probs, params_ary, pop_model, grid_poi_rates, rel_poi_rates, sent_poi_rates, grid_obs, rel_collections, sent_collections ]) else: Bayes_model = pm.Model([ lam, f_a1, f_a2, f_b1_p, f_b2_p, f_b1, f_b2, g_aw, g_bw, sig_x, sig_y, corr_p, corr, sig_x_l, sig_y_l, corr_l_p, corr_l, n_periods, mu_r, grid_obs_prob, xi, em_obs_prob, A_collected, sent_obs_probs, params_ary, pop_model, grid_poi_rates, rel_poi_rates, sent_poi_rates, grid_obs, rel_collections, sent_collections ]) ### Run if parameters were passed in ### if mcmc_args is not None: if len(mcmc_args) == 3: # New run nsamples = int(mcmc_args[0]) burn = int(mcmc_args[1]) fname = mcmc_args[2] if fname[-3:] != '.h5': fname += '.h5' mcmc = pm.MCMC(Bayes_model, db='hdf5', dbname=fname, dbmode='a', dbcomplevel=0) mcmc.use_step_method(pm.AdaptiveMetropolis, stoc_vars, scales=step_scales, interval=500, shrink_if_necessary=True) try: tic = time.time() print('Sampling...') mcmc.sample(nsamples, burn) # sampling finished. commit to database and continue print('Sampling finished.') print('Time elapsed: {}'.format(time.time() - tic)) print('Saving...') #mcmc.save_state() mcmc.commit() print('Closing...') mcmc.db.close() except: print('Exception: database closing...') mcmc.db.close() raise return elif len(mcmc_args) == 2: # Resume run fname = mcmc_args[0] nsamples = int(mcmc_args[1]) fname = fname.strip() if fname[-3:] != '.h5': fname += '.h5' if os.path.isfile(fname): db = pm.database.hdf5.load(fname) mcmc = pm.MCMC(Bayes_model, db=db) mcmc.use_step_method(pm.AdaptiveMetropolis, stoc_vars, scales=step_scales, interval=500, shrink_if_necessary=True) # database loaded. else: print('File not found: {}'.format(fname)) return try: tic = time.time() print('Sampling...') mcmc.sample(nsamples) # sampling finished. commit to database and continue print('Sampling finished.') print('Time elapsed: {}'.format(time.time() - tic)) print('Saving...') #mcmc.save_state() mcmc.commit() print('Closing...') mcmc.db.close() except: print('Exception: database closing...') mcmc.db.close() raise return ###################################################################### ##### Start Interactive Menu ##### ###################################################################### print('--------------- MCMC MAIN MENU ---------------') print(" 'new': Start a new MCMC chain from the beginning.") print("'cont': Continue a previous MCMC chain from an hdf5 file.") #print("'plot': Plot traces/distribution from an hdf5 file.") print("'quit': Quit.") cmd = input('Enter: ') cmd = cmd.strip().lower() if cmd == 'new': print('\n\n') print('--------------- New MCMC Chain ---------------') while True: val = input("Enter number of realizations or 'quit' to quit:") val = val.strip() if val == 'q' or val == 'quit': return else: try: nsamples = int(val) val2 = input("Enter number of realizations to discard:") val2 = val2.strip() if val2 == 'q' or val2 == 'quit': return else: burn = int(val2) fname = input( "Enter filename to save or 'back' to cancel:") fname = fname.strip() if fname == 'q' or fname == 'quit': return elif fname == 'b' or fname == 'back': continue else: fname = fname + '.h5' break # BREAK LOOP AND RUN MCMC WITH GIVEN VALUES except ValueError: print('Unrecognized input.') continue ##### RUN FIRST MCMC HERE ##### mcmc = pm.MCMC(Bayes_model, db='hdf5', dbname=fname, dbmode='a', dbcomplevel=0) mcmc.use_step_method(pm.AdaptiveMetropolis, stoc_vars, scales=step_scales, interval=500, shrink_if_necessary=True) try: tic = time.time() print('Sampling...') mcmc.sample(nsamples, burn) # sampling finished. commit to database and continue print('Sampling finished.') print('Time elapsed: {}'.format(time.time() - tic)) print('Saving...') #mcmc.save_state() mcmc.commit() except: print('Exception: database closing...') mcmc.db.close() raise elif cmd == 'cont': # Load db and continue print('\n') while True: fname = input("Enter path to database to load, or 'q' to quit:") fname = fname.strip() if fname.lower() == 'q' or fname.lower() == 'quit': return else: if fname[-3:] != '.h5': fname += '.h5' if os.path.isfile(fname): db = pm.database.hdf5.load(fname) mcmc = pm.MCMC(Bayes_model, db=db) mcmc.use_step_method(pm.AdaptiveMetropolis, stoc_vars, scales=step_scales, interval=500, shrink_if_necessary=True) break # database loaded else: print('File not found.') #continue elif cmd == 'plot': # Get filename and pass to plotting routine. pass # return elif cmd == 'quit' or cmd == 'q': return else: print('Command not recognized.') print('Quitting....') return ##### MCMC Loop ##### # This should be reached only by cmd == 'new' or 'cont' with a database. # It resumes sampling of a previously sampled chain. print('\n') while True: print('--------------- MCMC ---------------') print(" 'report': generate report on traces") print("'inspect': launch IPython to inspect state") print(" 'run': conduct further sampling") print(" 'quit': Quit") cmd = input('Enter: ') cmd = cmd.strip() cmd = cmd.lower() if cmd == 'inspect': try: import IPython IPython.embed() except ImportError: print('IPython not found.') except: print('Exception: database closing...') mcmc.db.close() raise elif cmd == 'run': val = input("Enter number of realizations or 'back':") val = val.strip() if val == 'back' or val == 'b': continue else: try: nsamples = int(val) except ValueError: print('Unrecognized input.') continue # Run chain try: tic = time.time() print('Sampling...') mcmc.sample(nsamples) # sampling finished. commit to database and continue print('Sampling finished.') print('Time elapsed: {}'.format(time.time() - tic)) print('Saving...') #mcmc.save_state() mcmc.commit() except: print('Exception: database closing...') mcmc.db.close() raise elif cmd == 'report': try: import Bayes_Plot Bayes_Plot.plot_traces(db=db) print('Gelman-Rubin statistics') gr = pm.gelman_rubin(mcmc) print(gr) with open('./diagnostics/gelman-rubin.txt', 'w') as f: f.write('Variable R_hat\n') f.write('---------------------\n') for key, val in gr.items(): f.write(key + ': {}\n'.format(val)) except: print('Exception: database closing...') mcmc.db.close() raise elif cmd == 'quit' or cmd == 'q': mcmc.db.close() print('Database closed.') break else: print('Command not recognized.')
-0.8762523, 0.47377688, 0.76516415, 0.27890419, -0.07819642, -0.13399348, 0.82877293, 0.22308624, 0.7485783, -0.14700254, -1.03145657, 0.85641097, 0.43396285, 0.47901653, 0.80137086, 0.33566812, 0.71443253, -1.57590815, -0.24090179, -2.0128344, 0.34503324, 0.12944091, -1.5327008, 0.06363034, 0.21042021, -0.81425636, 0.20209279, -1.48130423, -1.04983523, 0.16001774, -0.75239072, 0.33427956, -0.10224921, 0.26463561, -1.09374674, -0.72749811, -0.54892116, -1.89631844, -0.94393545, -0.2521341, 0.26840341, 0.23563219, 0.35333094 ]) # Model: the data are truncated-normally distributed with unknown upper bound. mu = pm.Normal('mu', 0, .01, value=0) tau = pm.Exponential('tau', .01, value=1) cutoff = pm.Exponential('cutoff', 1, value=1.3) D = pm.TruncatedNormal('D', mu, tau, -np.inf, cutoff, value=data, observed=True) M = pm.MCMC([mu, tau, cutoff, D]) # Use a TruncatedMetropolis step method that will never propose jumps below D's maximum value. M.use_step_method(TruncatedMetropolis, cutoff, D.value.max(), np.inf) # Get a handle to the step method handling cutoff to investigate its behavior. S = M.step_method_dict[cutoff][0] M.isample(10000, 0, 10)
def run(self): self.validateinput() data = self.data data = self.fluctuate(data) if self.rndseed >= 0 else data # unpack background dictionaries backgroundkeys = self.backgroundsyst.keys() backgrounds = array([self.background[key] for key in backgroundkeys]) backgroundnormsysts = array( [self.backgroundsyst[key] for key in backgroundkeys]) # unpack object systematics dictionary objsystkeys = self.objsyst['signal'].keys() signalobjsysts = array( [self.objsyst['signal'][key] for key in objsystkeys]) backgroundobjsysts = array([]) if len(objsystkeys) > 0 and len(backgroundkeys) > 0: backgroundobjsysts = array([[ self.objsyst['background'][syst][bckg] for syst in objsystkeys ] for bckg in backgroundkeys]) recodim = len(data) resmat = self.response truthdim = len(resmat) import priors truth = priors.wrapper(priorname=self.prior, low=self.lower, up=self.upper, other_args=self.priorparams) bckgnuisances = [] for name, err in zip(backgroundkeys, backgroundnormsysts): if err < 0.: bckgnuisances.append( mc.Uniform('norm_%s' % name, value=1., lower=0., upper=3.)) else: bckgnuisances.append( mc.TruncatedNormal( 'gaus_%s' % name, value=0., mu=0., tau=1.0, a=(-1.0 / err if err > 0.0 else -inf), b=inf, observed=(False if err > 0.0 else True))) bckgnuisances = mc.Container(bckgnuisances) objnuisances = [ mc.Normal('gaus_%s' % name, value=self.systfixsigma, mu=0., tau=1.0, observed=(True if self.systfixsigma != 0 else False)) for name in objsystkeys ] objnuisances = mc.Container(objnuisances) # define potential to constrain truth spectrum if self.regularization: truthpot = self.regularization.getpotential(truth) #This is where the FBU method is actually implemented @mc.deterministic(plot=False) def unfold(truth=truth, bckgnuisances=bckgnuisances, objnuisances=objnuisances): smearbckg = 1. if len(backgroundobjsysts) > 0: smearbckg = smearbckg + dot(objnuisances, backgroundobjsysts) smearedbackgrounds = backgrounds * smearbckg bckgnormerr = array([ (-1. + nuis) / nuis if berr < 0. else berr for berr, nuis in zip(backgroundnormsysts, bckgnuisances) ]) bckg = dot(1. + bckgnuisances * bckgnormerr, smearedbackgrounds) reco = dot(truth, resmat) smear = 1. + dot(objnuisances, signalobjsysts) out = bckg + reco * smear return out unfolded = mc.Poisson('unfolded', mu=unfold, value=data, observed=True, size=recodim) allnuisances = mc.Container(bckgnuisances + objnuisances) modelelements = [unfolded, unfold, truth, allnuisances] if self.regularization: modelelements += [truthpot] model = mc.Model(modelelements) if self.use_emcee: from emcee_sampler import sample_emcee mcmc = sample_emcee(model, nwalkers=self.nwalkers, samples=self.nMCMC / self.nwalkers, burn=self.nBurn / self.nwalkers, thin=self.nThin) else: map_ = mc.MAP(model) map_.fit() mcmc = mc.MCMC(model) mcmc.use_step_method(mc.AdaptiveMetropolis, truth + allnuisances) mcmc.sample(self.nMCMC, burn=self.nBurn, thin=self.nThin) # mc.Matplot.plot(mcmc) self.trace = [ mcmc.trace('truth%d' % bin)[:] for bin in xrange(truthdim) ] self.nuisancestrace = {} for name, err in zip(backgroundkeys, backgroundnormsysts): if err < 0.: self.nuisancestrace[name] = mcmc.trace('norm_%s' % name)[:] if err > 0.: self.nuisancestrace[name] = mcmc.trace('gaus_%s' % name)[:] for name in objsystkeys: if self.systfixsigma == 0.: self.nuisancestrace[name] = mcmc.trace('gaus_%s' % name)[:] if self.monitoring: import monitoring monitoring.plot(self.name + '_monitoring', data, backgrounds, resmat, self.trace, self.nuisancestrace, self.lower, self.upper)
def GenerateTrace(emulators, exp_Ys, exp_Yerrs, prior, id_, iter, output_filename, burnin=1000): """ The main function to generate pandas trace file after comparing the emulator with experimental value Uses pymc2 as it is found to be faster """ pymc.numpy.random.seed(random.randint(0, 1000) + id_) n_models = len(emulators) emulators_list = [] id_to_model_names = [] parameters = [] for i, ename in enumerate(sorted(emulators.keys())): id_to_model_names.append(ename) emulators_list.append(emulators[ename]) ind_parameters = [] for name, row in prior.iterrows(): if row["Type"] == "Uniform": ind_parameters.append( pymc.Uniform( name if i == 0 else '%s_%d' % (name, i), float(row["Min"]), float(row["Max"]), value=0.5 * (float(row["Min"]) + float(row["Max"])), )) else: ind_parameters.append( pymc.TruncatedNormal( name if i == 0 else '%s_%d' % (name, i), mu=float(row["Mean"]), tau=1.0 / float(row["SD"])**2, a=float(row["Min"]), b=float(row["Max"]), value=float(row["Mean"]), )) parameters.append(ind_parameters) # transpose emulator_list emulators_list = list(map(list, zip(*emulators_list))) if n_models == 1: model_choice = 0 else: model_choice = pymc.DiscreteUniform('ModelChoice', lower=0, upper=n_models - 1) for emu, exp_Y, exp_Yerr in zip(emulators_list, exp_Ys, exp_Yerrs): exp_cov = np.diag(np.square(exp_Yerr)) @pymc.stochastic(observed=True) def emulator_result(value=exp_Y, x=parameters, exp_cov=exp_cov, emulator=emu, mc=model_choice): mean, var = emulator[mc].Predict(np.array(x[mc]).reshape(1, -1)) return np.array( mvn.logpdf(value, np.squeeze(mean), np.squeeze(var) + exp_cov)) # model = pymc.Model(parameters) # prepare for MCMC new_output_filename = "%s_%d.h5" % (output_filename, id_) mcmc = pymc.MCMC(parameters if model_choice == 0 else parameters + [model_choice], dbname=new_output_filename, db="hdf5", dbmode="w") # sample from our posterior distribution 50,000 times, but # throw the first 20,000 samples out to ensure that we're only # sampling from our steady-state posterior distribution mcmc.sample(iter, burn=burnin) mcmc.db.close() return new_output_filename, id_to_model_names # pd.DataFrame.from_dict(trace_dict)
def make_model(observed_sed, e_observed_sed): """ This function returns all prior and likelihood objects """ # Prior: mass (Kroupa 2001) @pymc.stochastic() def logM(value=np.array([np.log10(0.5)]), a=np.log10(0.1), b=np.log10(7)): def logp(value, a, b): if value > b or value < a: return -np.Inf # Stay within the model limits (a,b). else: mass = 10 ** value if mass < 0.5: return np.log(mass ** -1.3) # Kroupa (2001) else: return np.log(0.5 * mass ** -2.3) # Kroupa (2001) def random(a, b): val = (b - a) * np.random.rand() + a return np.array([val]) # Prior: age (uniform in the logarithm) logT = pymc.Uniform("logT", np.array([5]), np.array([8])) # Prior: accretion rate (uniform in the logarithm) logMacc = pymc.Uniform("logMacc", np.array([-15]), np.array([-2])) # Prior: disc truncation radius (Rin = 5 +\- 2 R, Gullbring et al. 1998) Rin = pymc.TruncatedNormal("Rin", mu=np.array([5.0]), tau=2.0 ** -2, a=1.01, b=9e99) # Prior: distance (d = 760 +\- 5 pc, Sung 1997) d = pymc.TruncatedNormal("d", mu=np.array([760.0]), tau=5.0 ** -2, a=700, b=9e99) # Prior: extinction (logA0 = -0.27 +/- 0.46, Rebull et al. 2002) logA0 = pymc.Normal("logA0", mu=np.array([-0.27]), tau=0.46 ** -2) # Likelihood: intrinsic SED @pymc.deterministic() def SED_intrinsic(logM=logM, logT=logT): r = siess_Mr(logM, logT) # IPHAS r' as a function of (mass, age) i = siess_Mi(logM, logT) # IPHAS i j = siess_Mj(logM, logT) # 2MASS J ha = r - rminHa_intrinsic(r - i) # IPHAS H-alpha return np.array([r[0], ha[0], i[0], j[0]]) # Likelihood: H-alpha excess luminosity @pymc.deterministic() def logLacc(logM=logM, logT=logT, logMacc=logMacc, Rin=Rin): logR = siess_logR(logM, logT) # Radius as a function of (mass, age) return 7.496 + logM + logMacc - logR + np.log10(1 - 1 / Rin) logLha = pymc.Normal("logLha", mu=(0.64 * logLacc - 2.12), tau=0.43 ** -2) # Likelihood: H-alpha equivalent width (EW). @pymc.deterministic() def logEW(logLha=logLha, SED_intrinsic=SED_intrinsic): Lha = 10 ** logLha # Excess luminosity Lha_con = 0.316 * 10 ** (-0.4 * (SED_intrinsic[1] + 0.03)) # Continuum ew = -95.0 * Lha / Lha_con # Equivalent width. return np.log10(-ew) # Likelihood: apparent SED @pymc.deterministic() def SED_apparent(d=d, logA0=logA0, SED_intr=SED_intrinsic, logEW=logEW): dismod = 5.0 * np.log10(d) - 5.0 # Distance modulus. A0 = 10.0 ** logA0 # Extinction parameter ri_intr = np.array([SED_intr[0] - SED_intr[2]]) # Intrinsic (r'-i') # Correct the intrinsic magnitudes for extinction and H-alpha emission: r = SED_intr[0] + dismod + r_offset(ri_intr, A0, logEW) ha = SED_intr[1] + dismod + ha_offset(ri_intr, A0, logEW) i = SED_intr[2] + dismod + i_offset(ri_intr, A0, logEW) j = SED_intr[3] + dismod + 0.276 * A0 return np.array([r[0], ha[0], i[0], j[0]]) # Likelihood: observed SED @pymc.stochastic(observed=True) def SED_observed(value=observed_sed, SED_apparent=SED_apparent): e_calib = np.array([0.1, 0.1, 0.1, 0.1]) # Absolute uncertainty term D2 = sum((observed_sed - SED_apparent) ** 2 / (e_observed_sed ** 2 + e_calib ** 2)) logp = -D2 / 2.0 return logp return locals() # Return all model components defined above
import pymc import numpy as np # Priors on unknown parameters init = 0.5 minv = 0. maxv = 1. theta = pymc.TruncatedNormal('theta', value=init, mu=0, tau=1., a=minv, b=maxv) # Binomial likelihood for data d = pymc.Binomial('d', n=100, p=theta, value=75, observed=True)
def set_priors(self, df): """ default priors When setting normals I am assuming that sigma = range / 4 to set these priors """ # mu=25, range=(5-50) Vcvals = [pymc.TruncatedNormal('Vcmax25_%d' % (i), \ mu=25.0, tau=1.0/11.25**2, a=0.0, b=650.0) \ for i in np.unique(df["Leaf"])] # mu=1.8, range=(0.8-2.8) Jfac = pymc.TruncatedNormal('Jfac', mu=1.8, tau=1.0/0.5**2, \ a=0.0, b=5.0) # broad prior Rdfac = pymc.Uniform('Rdfac', lower=0.005, upper=0.05) # mu=40000, range=(20000-60000) Eaj = pymc.TruncatedNormal('Eaj', mu=40000.0, tau=1.0 / 10000.0**2, a=0.0, b=199999.9) # mu=60000, range=(40000-80000) Eav = pymc.TruncatedNormal('Eav', mu=60000.0, tau=1.0 / 10000.0**2, a=0.0, b=199999.9) # mu=34000, range=(20000-60000) Ear = pymc.TruncatedNormal('Ear', mu=34000.0, tau=1.0 / 10000.0**2, a=0.0, b=199999.9) # mu=640, range=(620-660) delSj = pymc.TruncatedNormal('delSj', mu=640.0, tau=1.0/10.0**2, \ a=300.0, b=800.0) # mu=640, range=(620-660) delSv = pymc.TruncatedNormal('delSv', mu=640.0, tau=1.0/10.0**2, \ a=300.0, b=800.0) """ log_mu = np.log(25.0) log_sigma = np.log(11.25) log_tau = 1.0/log_sigma**2 Vcvals = [pymc.Lognormal('Vcmax25_%d' % (i), mu=log_mu, tau=log_tau)\ for i in np.unique(df["Leaf"])] log_mu = np.log(1.8) log_sigma = np.log(0.5) log_tau = 1.0/log_sigma**2 Jfac = pymc.Lognormal('Jfac', mu=log_mu, tau=log_tau) Rdfac = pymc.Uniform('Rdfac', lower=0.005, upper=0.05) log_mu = np.log(40000.0) log_sigma = np.log(20000.0) log_tau = 1.0/log_sigma**2 Eaj = pymc.Lognormal('Eaj', mu=log_mu, tau=log_tau) log_mu = np.log(60000.0) log_sigma = np.log(20000.0) log_tau = 1.0/log_sigma**2 Eav = pymc.Lognormal('Eav', mu=log_mu, tau=log_tau) log_mu = np.log(34000) log_sigma = np.log(15000.0) log_tau = 1.0/log_sigma**2 Ear = pymc.Lognormal('Ear', mu=log_mu, tau=log_tau) log_mu = np.log(640.0) log_sigma = np.log(50.0) log_tau = 1.0/log_sigma**2 delSj = pymc.Lognormal('delSj', mu=log_mu, tau=log_tau) log_mu = np.log(640.0) log_sigma = np.log(50.0) log_tau = 1.0/log_sigma**2 delSv = pymc.Lognormal('delSv', mu=log_mu, tau=log_tau) """ return Vcvals, Jfac, Rdfac, Eaj, Eav, Ear, delSj, delSv
def main(RUNFLAG, outname): print('Setting up parameters and priors...') params = Params() # Set up location here with command line arguments in a list. params.cmd_line_chg(['--kalbar']) assert params.site_name + 'fields.txt' == 'data/kalbarfields.txt' # Set parameters specific to Bayesian runs params.PLOT = False params.OUTPUT = False # This sends a message to CalcSol on whether or not to use CUDA if params.CUDA: globalvars.cuda = True else: globalvars.cuda = False # get wind data and day labels wind_data, days = PM.get_wind_data(*params.get_wind_params()) params.ndays = len(days) # reduce domain params.domain_info = (10000.0, 200) #50 m sided cells domain_res = params.domain_info[0] / params.domain_info[1] cell_area = domain_res**2 locinfo = LocInfo(params.dataset, params.coord, params.domain_info) prior_eps = {} #### Model priors #### lam = pm.Beta("lam", 5, 1, value=0.95) prior_eps[lam] = 0.01 f_a1 = pm.TruncatedNormal("f_a1", 6, 0.3, 0, 9, value=6) prior_eps[f_a1] = 0.1 f_a2 = pm.TruncatedNormal("f_a2", 20, 0.3, 15, 24, value=20) prior_eps[f_a2] = 0.1 f_b1_p = pm.Gamma("fb1_p", 2, 1, value=1.5, trace=False, plot=False) #alpha,beta parameterization prior_eps[f_b1_p] = 0.05 @pm.deterministic(trace=True, plot=True) def f_b1(f_b1_p=f_b1_p): return f_b1_p + 1 f_b2_p = pm.Gamma("fb2_p", 2, 1, value=1.5, trace=False, plot=False) prior_eps[f_b2_p] = 0.05 @pm.deterministic(trace=True, plot=True) def f_b2(f_b2_p=f_b2_p): return f_b2_p + 1 g_aw = pm.Gamma("g_aw", 2.2, 1, value=1.0) prior_eps[g_aw] = 0.05 g_bw = pm.Gamma("g_bw", 5, 1, value=3.8) prior_eps[g_bw] = 0.1 # flight diffusion parameters. note: mean is average over flight advection sig_x = pm.Gamma("sig_x", 26, 0.15, value=180) prior_eps[sig_x] = 1 sig_y = pm.Gamma("sig_y", 15, 0.15, value=150) prior_eps[sig_y] = 1 corr_p = pm.Beta("corr_p", 5, 5, value=0.5, trace=False, plot=False) prior_eps[corr_p] = 0.01 @pm.deterministic(trace=True, plot=True) def corr(corr_p=corr_p): return corr_p * 2 - 1 # local spread paramters sig_x_l = pm.Gamma("sig_xl", 2, 0.08, value=10) prior_eps[sig_x_l] = 1 sig_y_l = pm.Gamma("sig_yl", 2, 0.14, value=10) prior_eps[sig_y_l] = 1 corr_l_p = pm.Beta("corr_l_p", 5, 5, value=0.5, trace=False, plot=False) prior_eps[corr_l_p] = 0.005 @pm.deterministic(trace=True, plot=True) def corr_l(corr_l_p=corr_l_p): return corr_l_p * 2 - 1 #pymc.MAP can only take float values, so we vary mu_r and set n_periods. mu_r = pm.Normal("mu_r", 1., 1, value=1) prior_eps[mu_r] = 0.05 params.n_periods = 30 #alpha_pow = prev. time exponent in ParasitoidModel.h_flight_prob xi = pm.Gamma("xi", 1, 1, value=0.75) # presence to oviposition/emergence factor prior_eps[xi] = 0.05 #### Observation probabilities. #### em_obs_prob = pm.Beta("em_obs_prob", 1, 1, value=0.05) # per-wasp prob of # observing emergence in release field grid given max leaf collection. # This is dependent on the size of the cell surrounding the grid point, # but there's not much to be done about this. Just remember to # interpret this number based on grid coarseness. prior_eps[em_obs_prob] = 0.0005 grid_obs_prob = pm.Beta("grid_obs_prob", 1, 1, value=0.005) # probability of # observing a wasp present in the grid cell given max leaf sampling prior_eps[grid_obs_prob] = 0.0005 #card_obs_prob = pm.Beta("card_obs_prob",1,1,value=0.5) # probability of # observing a wasp present in the grid cell given max leaf sampling #### Data collection model background for sentinel fields #### # Need to fix linear units for area. Meters would be best. # Effective collection area (constant between fields) is very uncertain with warnings.catch_warnings(): # squelsh a warning based on pymc coding we don't need to worry about warnings.simplefilter("ignore", RuntimeWarning) A_collected = pm.TruncatedNormal("A_collected", 2500, 1 / 2500, 0, min(locinfo.field_sizes.values()) * cell_area, value=2500) # in m**2 prior_eps[A_collected] = 10 # Each field has its own binomial probability. # Probabilities are likely to be small, and pm.Beta cannot handle small # parameter values. So we will use TruncatedNormal again. N = len(locinfo.sent_ids) sent_obs_probs = np.empty(N, dtype=object) # fix beta for the Beta distribution sent_beta = 40 # mean of Beta distribution will be A_collected/field size ## Loop over fields ## for n, key in enumerate(locinfo.sent_ids): sent_obs_probs[n] = pm.Beta( "sent_obs_probs_{}".format(key), A_collected / (locinfo.field_sizes[key] * cell_area) * sent_beta / (1 - A_collected / (locinfo.field_sizes[key] * cell_area)), sent_beta, value=0.1 * 3600 / (locinfo.field_sizes[key] * cell_area)) prior_eps[sent_obs_probs[n]] = 0.0005 sent_obs_probs = pm.Container(sent_obs_probs) #### Collect variables #### params_ary = pm.Container( np.array([ g_aw, g_bw, f_a1, f_b1, f_a2, f_b2, sig_x, sig_y, corr, sig_x_l, sig_y_l, corr_l, lam, mu_r ], dtype=object)) if params.dataset == 'kalbar': # factor for kalbar initial spread sprd_factor = pm.Uniform("sprd_factor", 0, 1, value=0.1) prior_eps[sprd_factor] = 0.01 else: sprd_factor = None print('Getting initial model values...') #### Run model #### @pm.deterministic(plot=False, trace=False) def pop_model(params=params, params_ary=params_ary, locinfo=locinfo, wind_data=wind_data, days=days, sprd_factor=sprd_factor): '''This function acts as an interface between PyMC and the model. Not only does it run the model, but it provides an emergence potential based on the population model result projected forward from feasible oviposition dates. To modify how this projection happens, edit popdensity_to_emergence. Returned values from this function should be nearly ready to compare to data. ''' modeltic = time.time() ### Alter params with stochastic variables ### # g wind function parameters params.g_params = tuple(params_ary[0:2]) # f time of day function parameters params.f_params = tuple(params_ary[2:6]) # Diffusion coefficients params.Dparams = tuple(params_ary[6:9]) params.Dlparams = tuple(params_ary[9:12]) # Probability of any flight during the day under ideal circumstances params.lam = params_ary[12] # scaling flight advection to wind advection params.mu_r = params_ary[13] ### PHASE ONE ### # First, get spread probability for each day as a coo sparse matrix max_shape = np.array([0, 0]) pm_args = [(days[0], wind_data, *params.get_model_params(), params.r_start)] pm_args.extend([(day, wind_data, *params.get_model_params()) for day in days[1:params.ndays]]) ##### Kalbar wind started recording a day late. Spread the population ##### locally before running full model. if params.dataset == 'kalbar': res = params.domain_info[0] / params.domain_info[1] mean_drift = np.array([-25., 15.]) xdrift_int = int(mean_drift[0] // res) xdrift_r = mean_drift[0] % res ydrift_int = int(mean_drift[1] // res) ydrift_r = mean_drift[1] % res longsprd = PM.get_mvn_cdf_values( res, np.array([xdrift_r, ydrift_r]), PM.Dmat(params_ary[6], params_ary[7], params_ary[8])) shrtsprd = PM.get_mvn_cdf_values( res, np.array([0., 0.]), PM.Dmat(params_ary[9], params_ary[10], params_ary[11])) mlen = int( max(longsprd.shape[0], shrtsprd.shape[0]) + max(abs(xdrift_int), abs(ydrift_int)) * 2) sprd = np.zeros((mlen, mlen)) lbds = [ int(mlen // 2 - longsprd.shape[0] // 2), int(mlen // 2 + longsprd.shape[0] // 2 + 1) ] sprd[lbds[0] - ydrift_int:lbds[1] - ydrift_int, lbds[0] + xdrift_int:lbds[1] + xdrift_int] = longsprd * sprd_factor sbds = [ int(mlen // 2 - shrtsprd.shape[0] // 2), int(mlen // 2 + shrtsprd.shape[0] // 2 + 1) ] sprd[sbds[0]:sbds[1], sbds[0]:sbds[1]] += shrtsprd * (1 - sprd_factor) ''' pmf_list = [sparse.coo_matrix(PM.get_mvn_cdf_values( params.domain_info[0]/params.domain_info[1], np.array([0.,0.]), PM.Dmat(sprd_factor*params_ary[9], sprd_factor*params_ary[10],params_ary[11])))] ''' sprd[int(sprd.shape[0] // 2), int(sprd.shape[0] // 2)] += max(0, 1 - sprd.sum()) pmf_list = [sparse.coo_matrix(sprd)] else: pmf_list = [] ###################### Get pmf_list from multiprocessing pmf_list.extend(pool.starmap(PM.prob_mass, pm_args)) ###################### for pmf in pmf_list: for dim in range(2): if pmf.shape[dim] > max_shape[dim]: max_shape[dim] = pmf.shape[dim] r_spread = [] # holds the one-day spread for each release day. # Reshape the prob. mass function of each release day into solution form for ii in range(params.r_dur): offset = params.domain_info[1] - pmf_list[ii].shape[0] // 2 dom_len = params.domain_info[1] * 2 + 1 r_spread.append( sparse.coo_matrix( (pmf_list[ii].data, (pmf_list[ii].row + offset, pmf_list[ii].col + offset)), shape=(dom_len, dom_len)).tocsr()) ### PHASE TWO ### # Pass the probability list, pmf_list, and other info to convolution solver. # This will return the finished population model. with Capturing() as output: if params.dataset == 'kalbar': # extend day count by one days_ext = [days[0] - 1] days_ext.extend(days) modelsol = get_populations(r_spread, pmf_list, days_ext, params.ndays + 1, dom_len, max_shape, params.r_dur, params.r_number, params.r_mthd()) # remove the first one and start where wind started. modelsol = modelsol[1:] else: modelsol = get_populations(r_spread, pmf_list, days, params.ndays, dom_len, max_shape, params.r_dur, params.r_number, params.r_mthd()) # modelsol now holds the model results for this run as CSR sparse arrays # get emergence potential (measured in expected number of wasps previously # present whose oviposition would result in emergence on the given date) # from the model result release_emerg, sentinel_emerg = popdensity_to_emergence( modelsol, locinfo) # get the expected wasp populations at grid points on sample days grid_counts = popdensity_grid(modelsol, locinfo) # get the expected wasp populations in cardinal directions '''card_counts = popdensity_card(modelsol,locinfo,params.domain_info)''' ## For the lists release_emerg and sentinel_emerg: ## Each list entry corresponds to a data collection day (one array) ## In each array: ## Each column corresponds to an emergence observation day (as in data) ## Each row corresponds to a grid point or sentinel field, respectively ## For the array grid_counts: ## Each column corresponds to an observation day ## Each row corresponds to a grid point ## For the list card_counts: ## Each list entry corresponds to a sampling day (one array) ## Each column corresponds to a step in a cardinal direction ## Each row corresponds to a cardinal direction print('{:03.1f} sec./model at {}'.format( time.time() - modeltic, time.strftime("%H:%M:%S %d/%m/%Y")), end='\r') sys.stdout.flush() return (release_emerg, sentinel_emerg, grid_counts) #,card_counts) print('Parsing model output and connecting to Bayesian model...') ### Parse the results of pop_model into separate deterministic variables ### '''Get Poisson probabilities for sentinal field emergence. Parameters: xi is constant, emerg is a list of ndarrays, betas is a 1D array of field probabilities''' Ncollections = len(locinfo.sent_DataFrames) sent_poi_rates = [] for ii in range(Ncollections): s_ndays = len(locinfo.sent_DataFrames[ii]['datePR'].unique()) sent_poi_rates.append( pm.Lambda('sent_poi_rate_{}'.format(ii), lambda xi=xi, ndays=s_ndays, betas=sent_obs_probs, emerg_model=pop_model[1][ii]: xi * emerg_model * np.tile( betas, (ndays, 1)).T, trace=False)) sent_poi_rates = pm.Container(sent_poi_rates) '''Return Poisson probabilities for release field grid emergence. Parameters: xi is constant, emerg is a list of ndarrays. collection effort is specified in locinfo.''' Ncollections = len(locinfo.release_DataFrames) rel_poi_rates = [] for ii in range(Ncollections): r_effort = locinfo.release_collection[ii] #fraction of max collection r_ndays = len(locinfo.release_DataFrames[ii]['datePR'].unique()) rel_poi_rates.append( pm.Lambda('rel_poi_rate_{}'.format(ii), lambda xi=xi, ndays=r_ndays, r_effort=r_effort, beta= em_obs_prob, emerg_model=pop_model[0][ii]: xi * emerg_model * np.tile(r_effort * beta, (ndays, 1)).T, trace=False)) rel_poi_rates = pm.Container(rel_poi_rates) @pm.deterministic(plot=False, trace=False) def grid_poi_rates(locinfo=locinfo, beta=grid_obs_prob, obs_model=pop_model[2]): '''Return Poisson probabilities for grid sampling obs_model is an ndarray, sampling effort is specified in locinfo.''' return beta * locinfo.grid_samples * obs_model '''Return Poisson probabilities for cardinal direction sampling obs_model is a list of ndarrays, sampling effort is assumed constant''' ''' card_poi_rates = [] for ii,obs in enumerate(pop_model[3]): card_poi_rates.append(pm.Lambda('card_poi_rate_{}'.format(ii), lambda beta=card_obs_prob, obs=obs: beta*obs)) card_poi_rates = pm.Container(card_poi_rates) ''' # Given the expected wasp densities from pop_model, actual wasp densities # are modeled as a thinned Poisson random variable about that mean. # Each wasp in the area then has a small probability of being seen. ### Connect sentinel emergence data to model ### N_sent_collections = len(locinfo.sent_DataFrames) # Create list of collection variables sent_collections = [] for ii in range(N_sent_collections): # Apparently, pymc does not play well with 2D array parameters sent_collections.append( np.empty(sent_poi_rates[ii].value.shape, dtype=object)) for n in range(sent_collections[ii].shape[0]): for m in range(sent_collections[ii].shape[1]): sent_collections[ii][n, m] = pm.Poisson( "sent_em_obs_{}_{}_{}".format(ii, n, m), sent_poi_rates[ii][n, m], value=float(locinfo.sentinel_emerg[ii][n, m]), observed=True) sent_collections = pm.Container(sent_collections) ### Connect release-field emergence data to model ### N_release_collections = len(locinfo.release_DataFrames) # Create list of collection variables rel_collections = [] for ii in range(N_release_collections): rel_collections.append( np.empty(rel_poi_rates[ii].value.shape, dtype=object)) for n in range(rel_collections[ii].shape[0]): for m in range(rel_collections[ii].shape[1]): rel_collections[ii][n, m] = pm.Poisson( "rel_em_obs_{}_{}_{}".format(ii, n, m), rel_poi_rates[ii][n, m], value=float(locinfo.release_emerg[ii][n, m]), observed=True) rel_collections = pm.Container(rel_collections) ### Connect grid sampling data to model ### grid_obs = np.empty(grid_poi_rates.value.shape, dtype=object) for n in range(grid_obs.shape[0]): for m in range(grid_obs.shape[1]): grid_obs[n, m] = pm.Poisson("grid_obs_{}_{}".format(n, m), grid_poi_rates[n, m], value=float(locinfo.grid_obs[n, m]), observed=True) grid_obs = pm.Container(grid_obs) ### Connect cardinal direction data to model ### ''' N_card_collections = len(locinfo.card_obs_DataFrames) # Create list of sampling variables card_collections = [] for ii in range(N_card_collections): card_collections.append(np.empty(card_poi_rates[ii].value.shape, dtype=object)) for n in range(card_collections[ii].shape[0]): for m in range(card_collections[ii].shape[1]): card_collections[ii][n,m] = pm.Poisson( "card_obs_{}_{}_{}".format(ii,n,m), card_poi_rates[ii][n,m], value=locinfo.card_obs[ii][n,m], observed=True, plot=False) card_collections = pm.Container(card_collections) ''' ### Collect model ### if params.dataset == 'kalbar': Bayes_model = pm.Model([ lam, f_a1, f_a2, f_b1_p, f_b2_p, f_b1, f_b2, g_aw, g_bw, sig_x, sig_y, corr_p, corr, sig_x_l, sig_y_l, corr_l_p, corr_l, mu_r, sprd_factor, grid_obs_prob, xi, em_obs_prob, A_collected, sent_obs_probs, params_ary, pop_model, grid_poi_rates, rel_poi_rates, sent_poi_rates, grid_obs, rel_collections, sent_collections ]) else: Bayes_model = pm.Model([ lam, f_a1, f_a2, f_b1_p, f_b2_p, f_b1, f_b2, g_aw, g_bw, sig_x, sig_y, corr_p, corr, sig_x_l, sig_y_l, corr_l_p, corr_l, mu_r, grid_obs_prob, xi, em_obs_prob, A_collected, sent_obs_probs, params_ary, pop_model, grid_poi_rates, rel_poi_rates, sent_poi_rates, grid_obs, rel_collections, sent_collections ]) ###################################################################### ##### Run Methods and Interactive Menu ##### ###################################################################### def MAP_run(outname=None): '''Find Maximum a posteriori distribution''' tic = time.time() M = pm.MAP(Bayes_model, prior_eps) print('Fitting....') M.fit() # Return statistics print('Estimate complete. Time elapsed: {}'.format(time.time() - tic)) print('Free stochastic variables: {}'.format(M.len)) print('Joint log-probability of model: {}'.format(M.logp)) print('Max joint log-probability of model: {}'.format(M.logp_at_max)) print('Maximum log-likelihood: {}'.format(M.lnL)) print("Akaike's Information Criterion {}".format(M.AIC), flush=True) print('---------------Variable estimates---------------') for var in Bayes_model.stochastics: print('{} = {}'.format(var, var.value)) # Save result to file if outname is None: outname = 'Max_aPosteriori_Estimate.txt' with open(outname, 'w') as fobj: fobj.write('Time elapsed: {}\n'.format(time.time() - tic)) fobj.write('Free stochastic variables: {}\n'.format(M.len)) fobj.write('Joint log-probability of model: {}\n'.format(M.logp)) fobj.write('Max joint log-probability of model: {}\n'.format( M.logp_at_max)) fobj.write('Maximum log-likelihood: {}\n'.format(M.lnL)) fobj.write("Akaike's Information Criterion {}\n".format(M.AIC)) fobj.write('---------------Variable estimates---------------\n') for var in Bayes_model.stochastics: fobj.write('{} = {}\n'.format(var, var.value)) print('Result saved to {}.'.format(outname)) return M def norm_run(fname, outname=None): '''Find normal approximation''' try: tic = time.time() M = pm.NormApprox(Bayes_model, eps=prior_eps, db='hdf5', dbname=fname, dbmode='a', dbcomplevel=0) print('Fitting....') M.fit() # Return statistics print('Estimate complete. Time elapsed: {}'.format(time.time() - tic)) print('Free stochastic variables: {}'.format(M.len)) print('Joint log-probability of model: {}'.format(M.logp)) print('Max joint log-probability of model: {}'.format( M.logp_at_max)) print("Akaike's Information Criterion {}".format(M.AIC), flush=True) print('---------------Variable estimates---------------') print('Estimated means: ') for var in bio_model.stochastics: print('{} = {}'.format(var, M.mu[var])) print('Estimated variances: ') for var in bio_model.stochastics: print('{} = {}'.format(var, M.C[var])) # Save result to file if outname is None: outname = "Normal_approx.txt" with open(outname, 'w') as fobj: fobj.write('Time elapsed: {}\n'.format(time.time() - tic)) fobj.write('Free stochastic variables: {}\n'.format(M.len)) fobj.write('Joint log-probability of model: {}\n'.format( M.logp)) fobj.write('Max joint log-probability of model: {}\n'.format( M.logp_at_max)) fobj.write("Akaike's Information Criterion {}\n".format(M.AIC)) fobj.write( '---------------Variable estimates---------------\n') fobj.write('Estimated means: \n') for var in bio_model.stochastics: fobj.write('{} = {}\n'.format(var, M.mu[var])) fobj.write('Estimated variances: \n') for var in bio_model.stochastics: fobj.write('{} = {}\n'.format(var, M.C[var])) print('These results have been saved to {}.'.format(outname)) except Exception as e: print(e) print('Exception: database closing...') M.db.close() print('Database closed.') raise return M # Parse run type if RUNFLAG == 'MAP_RUN': M = MAP_run(outname) elif RUNFLAG is not None: M = norm_run(RUNFLAG, outname) M.db.close() else: print( '----- Maximum a posteriori estimates & Normal approximations -----' ) while True: print(" 'map': Calculate maximum a posteriori estimate") print("'norm': Calculate normal approximation") print("'quit': Quit.") cmd = input('Enter: ') cmd = cmd.strip() cmd = cmd.lower() if cmd == 'map': M = MAP_run(outname) # Option to enter IPython cmd_py = input('Enter IPython y/[n]:') cmd_py = cmd_py.strip() cmd_py = cmd_py.lower() if cmd_py == 'y' or cmd_py == 'yes': import IPython IPython.embed() elif cmd == 'norm': fname = input("Enter database name or 'back' to cancel:") fname = fname.strip() if fname == 'q' or fname == 'quit': return elif fname == 'b' or fname == 'back': continue elif fname[-3:] != '.h5': fname = fname + '.h5' M = norm_run(fname, outname) try: print( 'For covariances, enter IPython and request a covariance' + ' matrix by passing variables in the following syntax:\n' + 'M.C[var1,var2,...,varn]\n' + 'Example: M.C[f_a1,f_a2] gives the covariance matrix of\n' + ' f_a1 and f_a2.') # Option to enter IPython cmd_py = input('Enter IPython y/[n]:') cmd_py = cmd_py.strip() cmd_py = cmd_py.lower() if cmd_py == 'y' or cmd_py == 'yes': import IPython IPython.embed() M.db.close() print('Database closed.') except Exception as e: print(e) print('Exception: database closing...') M.db.close() print('Database closed.') raise elif cmd == 'quit' or cmd == 'q': return else: print('Command not recognized.')