def inference(self, iter_=5000, burn=1000): theta = pm.Container([ pm.CompletedDirichlet( "theta_%s" % d, pm.Dirichlet("ptheta_%s" % d, theta=self.alpha)) for d in range(self.D) ]) phi = pm.Container([ pm.CompletedDirichlet("phi_%s" % k, pm.Dirichlet("pphi_%s" % k, theta=self.beta)) for k in range(self.K) ]) z_d = pm.Container([ pm.Categorical("z_%s" % d, p=theta[d], value=np.random.randint(self.K, size=len(self.bw[d])), size=len(self.bw[d])) for d in range(self.D) ]) w_z = pm.Container([ pm.Categorical("w_%s_%s" % (d, w), p=phi[z_d[d][w].get_value()], value=self.bw[d][w], observed=True) for d in range(self.D) for w in range(len(self.bw[d])) ]) model = pm.Model([theta, phi, z_d, w_z]) self.mcmc = pm.MCMC(model) self.mcmc.sample(iter=iter_, burn=burn)
def create_pymc_model(x_mat, y_vec, prior_sigma=5.0): tau_mc = 1. / (prior_sigma**2) (n, d) = np.shape(x_mat) w_mc = list([]) x_mc = list([]) for i in np.arange(0, d): w_mc[len(w_mc):] = [pymc.Normal('w' + str(i + 1) + '_mc', 0.0, tau_mc)] x_mc[len(x_mc):] = [ pymc.Normal('x' + str(i + 1) + '_mc', 0.0, 1.0, value=x_mat[:, i], observed=True) ] w_mc = np.array(w_mc, dtype=object) x_mc = np.array(x_mc, dtype=object) @pymc.deterministic def pred_mu(w_mc=w_mc, x_mc=x_mc): return sigmoid(np.dot(x_mc, np.transpose(w_mc))) y_mc = pymc.Bernoulli('y_mc', p=pred_mu, value=np.array( map(lambda val: 0 if val == -1 else +1, y_vec)), observed=True) return pymc.Model( [pred_mu, pymc.Container(w_mc), pymc.Container(x_mc), y_mc])
def get_z_data(self, p, p_pos, q): K = 2 # Num topics M = p # Num documents N = q # Total num of unique words across all documents alpha = 1.0 # Concentration parameter for distribution over # distributions over words (one for each topic) beta = 1.0 # Concentration parameter for distribution over # distributions over topics (one for each # document) phi = pymc.Container([ pymc.CompletedDirichlet( name="phi_" + str(k), D=pymc.Dirichlet(name="phi_temp_" + str(k), theta=beta * numpy.ones(N)), ) for k in range(K) ]) theta = pymc.Container([ pymc.CompletedDirichlet( name="theta_" + str(m), D=pymc.Dirichlet(name="theta_temp_" + str(m), theta=alpha * numpy.ones(K)), ) for m in range(M) ]) z = pymc.Container([ pymc.Categorical(name="z_" + str(m), p=theta[m], size=N) for m in range(M) ]) w = pymc.Container([ pymc.Categorical( name="w_" + str(m) + "_" + str(n), p=pymc.Lambda( "phi_z_" + str(m) + str(n), lambda z_in=z[m][n], phi_in=phi: phi_in[z_in], ), ) for m in range(M) for n in range(N) ]) lda = pymc.Model([w, z, theta, phi]) z_rvs = [] for m in range(M): metadata = {"doc_idx": m, "num_unique_words": N} rv = WordCountVecRV( model=lda, name="w_0_0", metadata=metadata) # Note: w_0_0 is just a dummy # argument that must be present in # the pymc.Model z_rvs += [rv] return z_rvs
def __init__(self, corpus, K=10, iterations=1000, burn=100): print("Building model ...") self.K = K self.V = corpus.wordCount + 1 self.M = corpus.documentCount self.alpha = np.ones(self.K) self.beta = np.ones(self.V) self.corpus = corpus self.observations = np.array(corpus.observations) self.phi = np.empty(self.K, dtype=object) for i in range(self.K): self.phi[i] = pm.CompletedDirichlet( "Phi[%i]" % i, pm.Dirichlet("phi[%i]" % i, theta=self.beta)) self.phi = pm.Container(self.phi) self.theta = np.empty(self.M, dtype=object) for i in range(self.M): self.theta[i] = pm.CompletedDirichlet( "Theta[%i]" % i, pm.Dirichlet("theta[%i]" % i, theta=self.alpha)) self.theta = pm.Container(self.theta) self.z = np.empty(self.observations.shape, dtype=object) for i in range(self.M): self.z[i] = pm.Categorical("z[%i]" % i, size=len(self.observations[i]), p=self.theta[i], value=np.random.randint( self.K, size=len(self.observations[i]))) self.z = pm.Container(self.z) self.w = [] for i in range(self.M): self.w.append([]) for j in range(len(self.observations[i])): self.w[i].append( pm.Categorical( "w[%i][%i]" % (i, j), p=pm.Lambda( "phi[z[%i][%i]]" % (i, j), lambda z=self.z[i][j], phi=self.phi: phi[z]), value=self.observations[i][j], observed=True)) self.w = pm.Container(self.w) self.mcmc = pm.MCMC(pm.Model([self.theta, self.phi, self.z, self.w])) print("Fitting model ...") self.mcmc.sample(iter=iterations, burn=burn)
def setup_inference(self): #depending on the number of wavelengths #self.wavelengths = [self.wavelengths[len(self.wavelengths)-1]] wavelength_number = len(self.wavelengths) t = 1. / 5.**2 #C_sigs = pymc.Container([pymc.HalfCauchy("c_sigs_%i_%i" % (i, x), beta = 10, alpha=1) \ # for i in range(1+2*self.N) for x in range(wavelength_number)]) C = pymc.Container([pymc.Normal("c_%i_%i" % (i, x), mu=0, tau = t) for i in range(1+2*self.N) for x in range(wavelength_number)]) #i_ = pymc.Container([pymc.DiscreteUniform('i_%i' %i,lower=0,upper=1) for i in range(len(self.xdata))]) @pymc.stochastic(observed=False) def sigma(value=1): return -np.log(abs(value)) @pymc.stochastic(observed=False) def sigma3(value=1): return -np.log(abs(value)) qw_sigs = pymc.Container([pymc.HalfCauchy("qw_sigs_%i" % x, beta = 10, alpha=1) for x in range(wavelength_number)]) if self.wavelength_sd_defined: qw = pymc.Container([pymc.distributions.Lognormal('qw_%i' %x,mu=self.wavelengths[x], tau = 1. / self.wavelength_sd[x] ** 2) for x in range(wavelength_number)]) else: qw = pymc.Container([pymc.distributions.Normal('qw_%i' %x,mu=self.wavelengths[x], tau = 1. / self.wavelengths[x]*.125) for x in range(wavelength_number)]) def fourier_series(C,N,QW,x,wavelength_number): v = np.array(x) v.fill(0.0) v = v.astype('float') for ii in range(len(x)): for w in range(wavelength_number): v += C[w] for i in range(1,N+1): v[ii] = v[ii] + C[(2*i-1)*wavelength_number+w]*np.cos(2*np.pi/QW[w] * i * (x[ii])) + C[(2*i)*wavelength_number+w]*np.sin(2*np.pi/QW[w] * i * (x[ii])) #if i_[ii] == 0: # v[ii] = -v[ii] return v#np.sum(v) self.vector_fourier_series = np.vectorize(fourier_series) # Define the form of the model and likelihood @pymc.deterministic def y_model(C=C,x=self.xdata,qw=qw,nn=self.N,wavelength_number=wavelength_number): return fourier_series(C,nn,qw,x,wavelength_number) y = pymc.Normal('y', mu=y_model, tau=1. / sigma ** 2, observed=True, value=self.ydata) # package the full model in a dictionary self.model1 = dict(C=C, qw=qw, sigma=sigma,qw_sigs=qw_sigs, y_model=y_model, y=y,x_values=self.xdata,y_values=self.ydata) self.setup = True self.mcmc_uptodate = False return self.model1
def __init__(self, input, sampler, db='ram', eps=.001, diff_order=5, verbose=0, tune_interval=10): Q = pm.Container(input) new_input = (Q.nodes | sampler.nodes) - sampler.stochastics MAP.__init__(self, input=new_input, eps=eps, diff_order=diff_order) self.tune_interval = tune_interval self.verbose = verbose self.sampler = sampler # Figure out which stochastics' log-probabilities need to be averaged. self.stochastics_to_integrate = set() for s in self.stochastics: mb = s.markov_blanket if any([other_s in mb for other_s in sampler.stochastics]): self.stochastics_to_integrate.add(s)
def linear_setup(df, ind_cols, dep_col): ''' Inputs: pandas Data Frame, list of strings for the independent variables, single string for the dependent variable Output: PyMC Model ''' # model our intercept and error term as above b0 = pymc.Normal("b0", 0, 0.0001) err = pymc.Uniform("err", 0, 500) # initialize a NumPy array to hold our betas # and our observed x values b = np.empty(len(ind_cols), dtype=object) x = np.empty(len(ind_cols), dtype=object) # loop through b, and make our ith beta # a normal random variable, as in the single variable case for i in range(len(b)): b[i] = pymc.Normal("b" + str(i + 1), 0, 0.0001) # loop through x, and inform our model about the observed # x values that correspond to the ith position for i, col in enumerate(ind_cols): x[i] = pymc.Normal("x" + str(i + 1), 0, 1, value=np.array(df[col]), observed=True) # as above, but use .dot() for 2D array (i.e., matrix) multiplication @pymc.deterministic def y_pred(b0=b0, b=b, x=x): return b0 + b.dot(x) # finally, "model" our observed y values as above y = pymc.Normal("y", y_pred, err, value=np.array(df[dep_col]), observed=True) return pymc.Model( [b0, pymc.Container(b), err, pymc.Container(x), y, y_pred])
def __setup_sigma(self): """Populates the self.sigma list with RVs corresponding to sigma param of the Logit-normal distribution, one for each equivalence class. """ self.sigma = pymc.Container([pymc.Uniform('sigma_%s' % j, lower=0.01, upper=0.3, value=0.15) for j in xrange(0, self.num_equiv)])
def __setup_eqv(self): """Populates the self.eqv list for each classifier by assigning it a categorical distribution. """ # per_class = self.num_classifiers / self.num_equiv self.eqv = pymc.Container( [pymc.Categorical('categ_%s' % i, p=self.theta[i], value=numpy.random.randint(0, self.num_equiv)) # value=min(i / per_class, self.num_equiv - 1)) for i in xrange(0, self.num_classifiers)])
def __setup_obs(self): self.obs = pymc.Container( [pymc.Normal('obs_%s' % i, mu=pymc.Lambda('omu_%s' % i, lambda cls=self.eqv[i]: self.mu[cls]), tau=pymc.Lambda('otau_%s' % i, lambda cls=self.eqv[i]: 1.0 / (self.sigma[cls]**2)), value=self.logit(acc), observed=True) for i, acc in enumerate(self.observations)])
def __setup_mu(self): """Populates the self.mu list with RVs corresponding to mu param of the Logit-Normal distribution, one for each equivalence class. """ # self.mu = pymc.Container([pymc.Uniform('mu_%s' % j, # lower = -4, # upper = 4, # value = 0) # for j in xrange(0, self.num_equiv)]) self.mu = pymc.Container([pymc.Normal('mu_%s' % j, mu=self.mu_star, tau=1.0 / (self.sigma_star**2), value=0.6) for j in xrange(0, self.num_equiv)])
def linear_setup(df, ind_cols, dep_col): ''' Inputs: pandas Data Frame, list of independent features, outcome var Output: PyMC Model ''' # Non-informative priors for parameters- intercept and error b0 = pm.Normal('b0', 0, 0.0001) err = pm.Normal('err', 0, 0.0001) # initialize NumPy arrays for b and x with same size as no of covariates b = np.empty(len(ind_cols), dtype=object) x = np.empty(len(ind_cols), dtype=object) # Non-informative priors for each coefficient for i in range(len(b)): b[i] = pm.Normal('b' + str(i + 1), 0, 0.0001) # Equating x with normal distribution for each data point for i, col in enumerate(ind_cols): x[i] = pm.Normal('x' + str(i + 1), 0, 1, value=np.array(df[col]), observed=True) # For deterministic equations, need to define the function in this format # .dot() for 2D array (i.e., matrix) multiplication since its multi-variable regression @pm.deterministic def y_pred(b0=b0, b=b, x=x): return b0 + b.dot(x) # Modeling observed y values y = pm.Normal('y', y_pred, err, value=np.array(df[dep_col]), observed=True) # Returning the required model return pm.Model([b0, pm.Container(b), err, pm.Container(x), y, y_pred])
def wrapper(priorname='', low=[], up=[], other_args={}, optimized=False): if priorname in priors: priormethod = priors[priorname] elif hasattr(pymc, priorname): priormethod = getattr(pymc, priorname) else: print 'WARNING: prior name not found! Falling back to DiscreteUniform...' priormethod = pymc.DiscreteUniform truthprior = [] for bin, (l, u) in enumerate(zip(low, up)): name = 'truth%d' % bin default_args = dict(name=name, value=l + (u - l) / 2, lower=l, upper=u) args = dict(default_args.items() + other_args.items()) prior = priormethod(**args) truthprior.append(prior) return pymc.Container(truthprior)
def __init__(self, F, G, V, W, m_0, C_0, Y_vals = None): """ D = DLM(F, G, V, W, m_0, C_0[, Y_vals]) Returns special NormalSubmodel instance representing the dynamic linear model formed by F, G, V and W. Resulting probability model: theta[0] | m_0, C_0 ~ N(m_0, C_0) theta[t] | theta[t-1], G[t], W[t] ~ N(G[t] theta[t-1], W[t]), t = 1..T Y[t] | theta[t], F[t], V[t] ~ N(F[t] theta[t], V[t]), t = 0..T Arguments F, G, V should be dictionaries keyed by name of component. F[comp], G[comp], V[comp] should be lists. F[comp][t] should be the design vector of component 'comp' at time t. G[comp][t] should be the system matrix. Argument W should be either a number between 0 and 1 or a dictionary of lists like V. If a dictionary of lists, W[comp][t] should be the system covariance or variance at time t. If a scalar, W should be the discount factor for the DLM. Arguments V and Y_vals, if given, should be lists. V[t] should be the observation covariance or variance at time t. Y_vals[t] should give the value of output Y at time t. Arguments m_0 and C_0 should be dictionaries keyed by name of component. m_0[comp] should be the mean of theta[comp][0]. C_0[comp] should be the covariance or variance of theta[comp][0]. Note: if multiple components are correlated in W or V, they should be made into a single component. D.comp is a handle to a list. D.comp[t] is a Stochastic representing the value of system state 'theta' sliced according to component 'comp' at time t. D.theta is a dictionary of lists analogous to F, G, V and W. D.Y is a list. D.Y[t] is a Stochastic representing the value of the output 'Y' at time t. """ self.comps = F.keys() self.F = dict_to_recarray(F) self.G = dict_to_recarray(G) self.V = pymc.ListContainer(V) if np.isscalar(W): self.discount = True self.delta = W else: self.W = dict_to_recarray(W) self.discount = False self.delta = None if self.discount: raise NotImplemented, "Have yet to code up the discount factor." self.m_0 = dict_to_recarray(m_0) self.C_0 = dict_to_recarray(C_0) self.T = len(self.V) theta = {} theta_mean = {} Y_mean = [] Y = [] # ============== # = Make theta = # ============== for comp in self.comps: # Is diagonal the covariance or variance? if isinstance(self.W[comp][0], pymc.Variable): diag = isvector(self.W[comp][0].value) else: diag = isvector(self.W[comp][0]) if diag: # Normal variates if diagonal. theta[comp] = [pymc.Normal('%s[0]'%comp, m_0[comp], C_0[comp])] else: # MV normal otherwise. theta[comp] = [pymc.MvNormal('%s[0]'%comp, m_0[comp], C_0[comp])] theta_mean[comp] = [] for t in xrange(1,self.T): theta_mean[comp].append(pymc.LinearCombination('%s_mean[%i]'%(comp, t), [G[comp][t-1]], [theta[comp][t-1]])) if diag: # Normal variates if diagonal. theta[comp].append(pymc.Normal('%s[%i]'%(comp,t), theta_mean[comp][t-1], W[comp][t-1])) else: # MV normal otherwise. theta[comp].append(pymc.MvNormal('%s[%i]'%(comp,t), theta_mean[comp][t-1], W[comp][t-1])) self.theta = dict_to_recarray(theta) self.theta_mean = dict_to_recarray(theta_mean) # ========== # = Make Y = # ========== Y_diag = isvector(self.V.value[0]) for t in xrange(self.T): x_coef = [] y_coef = [] for comp in self.comps: x_coef.append(self.F[comp][t]) y_coef.append(theta[comp][t]) Y_mean.append(pymc.LinearCombination('Y_mean[%i]'%t, x_coef, y_coef)) if Y_diag: # Normal variates if diagonal. Y.append(pymc.Normal('Y[%i]'%t, Y_mean[t], V[t])) else: # MV normal otherwise. Y.append(pymc.MvNormal('Y[%i]'%t, Y_mean[t], V[t])) # If data provided, use it. if Y_vals is not None: Y[t].value = Y_vals[t] Y[t].observed = True self.Y_mean = pymc.Container(np.array(Y_mean)) self.Y = pymc.Container(np.array(Y)) # No sense creating a NormalSubmodel here... just stay a ListContainer. NormalSubmodel.__init__(self, [F,G,W,V,m_0,C_0,Y,theta,theta_mean,Y_mean])
beta = pm.Normal("beta", 0, 0.0001) alpha = np.empty(d, dtype=object) for i in range(d): alpha[i] = pm.Normal('alpha_%i' % i, 0, 0.0001) @pm.deterministic def linear_regress(x=x_data, alpha=alpha, beta=beta): return x.dot(alpha) + beta y = pm.Normal('y', linear_regress, prec, value=y_data, observed=True) model = pm.Model([y, std, prec, pm.Container(alpha), beta]) mcmc = pm.MCMC(model) mcmc.sample(iter=100000, burn=50000, thin=10) ae = np.empty(d) for i in range(d): ae[i] = np.mean(mcmc.trace('alpha_%i' % i)[:], axis=0) be = np.mean(mcmc.trace('beta')[:], axis=0) print() print() yh = xt.dot(ae) + be print('Yh Yt MSE') for i in range(yt.shape[0]): print(yh[i], yt[i], (yh[i] - yt[i])**2)
def main(mcmc_args=None): print('Setting up parameters and priors...') params = Params() # Set up location here with command line arguments in a list. params.cmd_line_chg(['--kalbar']) assert params.site_name + 'fields.txt' == 'data/kalbarfields.txt' # Set parameters specific to Bayesian runs params.PLOT = False params.OUTPUT = False # This sends a message to CalcSol on whether or not to use CUDA if params.CUDA: globalvars.cuda = True else: globalvars.cuda = False # get wind data and day labels wind_data, days = PM.get_wind_data(*params.get_wind_params()) params.ndays = len(days) # reduce domain params.domain_info = (10000.0, 400) #25 m sided cells domain_res = params.domain_info[0] / params.domain_info[1] cell_area = domain_res**2 locinfo = LocInfo(params.dataset, params.coord, params.domain_info) ###################################################################### ##### Model Priors ##### ###################################################################### lam = pm.Beta("lam", 5, 1, value=0.95) f_a1 = pm.TruncatedNormal("f_a1", 6, 0.3, 0, 9, value=6) f_a2 = pm.TruncatedNormal("f_a2", 20, 0.3, 15, 24, value=20) f_b1_p = pm.Gamma("fb1_p", 2, 1, value=1.5, trace=False, plot=False) #alpha,beta parameterization @pm.deterministic(trace=True, plot=True) def f_b1(f_b1_p=f_b1_p): return f_b1_p + 1 f_b2_p = pm.Gamma("fb2_p", 2, 1, value=1.5, trace=False, plot=False) @pm.deterministic(trace=True, plot=True) def f_b2(f_b2_p=f_b2_p): return f_b2_p + 1 g_aw = pm.Gamma("g_aw", 2.2, 1, value=1.0) g_bw = pm.Gamma("g_bw", 5, 1, value=3.8) # flight diffusion parameters. note: mean is average over flight advection sig_x = pm.Gamma("sig_x", 26, 0.15, value=180) sig_y = pm.Gamma("sig_y", 15, 0.15, value=150) corr_p = pm.Beta("corr_p", 5, 5, value=0.5, trace=False, plot=False) @pm.deterministic(trace=True, plot=True) def corr(corr_p=corr_p): return corr_p * 2 - 1 # local spread paramters sig_x_l = pm.Gamma("sig_xl", 2, 0.08, value=10) sig_y_l = pm.Gamma("sig_yl", 2, 0.14, value=10) corr_l_p = pm.Beta("corr_l_p", 5, 5, value=0.5, trace=False, plot=False) @pm.deterministic(trace=True, plot=True) def corr_l(corr_l_p=corr_l_p): return corr_l_p * 2 - 1 mu_r = pm.Normal("mu_r", 1., 1, value=1) n_periods = pm.Poisson("n_periods", 30, value=30) #alpha_pow = prev. time exponent in ParasitoidModel.h_flight_prob xi = pm.Gamma("xi", 1, 1, value=0.75) # presence to oviposition/emergence factor em_obs_prob = pm.Beta("em_obs_prob", 1, 1, value=0.05) # per-wasp prob of # observing emergence in release field grid given max leaf collection # this is dependent on the size of the cell surrounding the grid point # ...not much to be done about this. grid_obs_prob = pm.Beta("grid_obs_prob", 1, 1, value=0.005) # probability of # observing a wasp present in the grid cell given max leaf sampling #card_obs_prob = pm.Beta("card_obs_prob",1,1,value=0.5) # probability of # observing a wasp present in the grid cell given max leaf sampling #### Data collection model background for sentinel fields #### # Need to fix linear units for area. Meters would be best. # Effective collection area (constant between fields) is very uncertain with warnings.catch_warnings(): # squelsh a warning based on pymc coding we don't need to worry about warnings.simplefilter("ignore", RuntimeWarning) A_collected = pm.TruncatedNormal("A_collected", 2500, 1 / 2500, 0, min(locinfo.field_sizes.values()) * cell_area, value=2500) # in m**2 # Each field has its own binomial probability. # Probabilities are likely to be small, and pm.Beta cannot handle small # parameter values. So we will use TruncatedNormal again. N = len(locinfo.sent_ids) sent_obs_probs = np.empty(N, dtype=object) # fix beta for the Beta distribution sent_beta = 40 # mean of Beta distribution will be A_collected/field size for n, key in enumerate(locinfo.sent_ids): sent_obs_probs[n] = pm.Beta( "sent_obs_probs_{}".format(key), A_collected / (locinfo.field_sizes[key] * cell_area) * sent_beta / (1 - A_collected / (locinfo.field_sizes[key] * cell_area)), sent_beta, value=0.1 * 3600 / (locinfo.field_sizes[key] * cell_area)) sent_obs_probs = pm.Container(sent_obs_probs) # Max a Posterirori estimates have consistantly returned a value near zero # for sprd_factor. So we will comment these sections. # if params.dataset == 'kalbar': # # factor for kalbar initial spread # sprd_factor = pm.Uniform("sprd_factor",0,1,value=0.3) # else: # sprd_factor = None sprd_factor = None #### Collect variables and setup block update #### params_ary = pm.Container( np.array([ g_aw, g_bw, f_a1, f_b1, f_a2, f_b2, sig_x, sig_y, corr, sig_x_l, sig_y_l, corr_l, lam, n_periods, mu_r ], dtype=object)) # The stochastic variables in this list (and the stochastics behind the # deterministic ones) should be block updated in order to avoid the large # computational expense of evaluating the model multiple times for each # MCMC iteration. To do this, starting step variances must be definied # for each variable. This is done via a scaling dict. stoc_vars = [ g_aw, g_bw, f_a1, f_b1_p, f_a2, f_b2_p, sig_x, sig_y, corr_p, sig_x_l, sig_y_l, corr_l_p, lam, n_periods, mu_r ] step_scales = { g_aw: 0.04, g_bw: 0.08, f_a1: 0.25, f_b1_p: 0.05, f_a2: 0.25, f_b2_p: 0.05, sig_x: 2, sig_y: 2, corr_p: 0.0005, sig_x_l: 2, sig_y_l: 2, corr_l_p: 0.0005, lam: 0.0005, n_periods: 1, mu_r: 0.005 } print('Getting initial model values...') ###################################################################### ##### Run Model ##### ###################################################################### @pm.deterministic(plot=False, trace=False) def pop_model(params=params, params_ary=params_ary, locinfo=locinfo, wind_data=wind_data, days=days, sprd_factor=sprd_factor): '''This function acts as an interface between PyMC and the model. Not only does it run the model, but it provides an emergence potential based on the population model result projected forward from feasible oviposition dates. To modify how this projection happens, edit popdensity_to_emergence. Returned values from this function should be nearly ready to compare to data. ''' modeltic = time.time() ### Alter params with stochastic variables ### # g wind function parameters params.g_params = tuple(params_ary[0:2]) # f time of day function parameters params.f_params = tuple(params_ary[2:6]) # Diffusion coefficients params.Dparams = tuple(params_ary[6:9]) params.Dlparams = tuple(params_ary[9:12]) # Probability of any flight during the day under ideal circumstances params.lam = params_ary[12] # TRY BOTH SCALINGS - VARYING mu_r and n_periods # scaling flight advection to wind advection # number of time periods (based on interp_num) in one flight params.n_periods = params_ary[ 13] # if interp_num = 30, this is # of minutes params.mu_r = params_ary[14] ### PHASE ONE ### # First, get spread probability for each day as a coo sparse matrix max_shape = np.array([0, 0]) pm_args = [(days[0], wind_data, *params.get_model_params(), params.r_start)] pm_args.extend([(day, wind_data, *params.get_model_params()) for day in days[1:params.ndays]]) ##### Kalbar wind started recording a day late. Spread the population ##### locally before running full model. if sprd_factor is not None: res = params.domain_info[0] / params.domain_info[1] mean_drift = np.array([-25., 15.]) xdrift_int = int(mean_drift[0] // res) xdrift_r = mean_drift[0] % res ydrift_int = int(mean_drift[1] // res) ydrift_r = mean_drift[1] % res longsprd = PM.get_mvn_cdf_values( res, np.array([xdrift_r, ydrift_r]), PM.Dmat(params_ary[6], params_ary[7], params_ary[8])) shrtsprd = PM.get_mvn_cdf_values( res, np.array([0., 0.]), PM.Dmat(params_ary[9], params_ary[10], params_ary[11])) mlen = int( max(longsprd.shape[0], shrtsprd.shape[0]) + max(abs(xdrift_int), abs(ydrift_int)) * 2) sprd = np.zeros((mlen, mlen)) lbds = [ int(mlen // 2 - longsprd.shape[0] // 2), int(mlen // 2 + longsprd.shape[0] // 2 + 1) ] sprd[lbds[0] - ydrift_int:lbds[1] - ydrift_int, lbds[0] + xdrift_int:lbds[1] + xdrift_int] = longsprd * sprd_factor sbds = [ int(mlen // 2 - shrtsprd.shape[0] // 2), int(mlen // 2 + shrtsprd.shape[0] // 2 + 1) ] sprd[sbds[0]:sbds[1], sbds[0]:sbds[1]] += shrtsprd * (1 - sprd_factor) sprd[int(sprd.shape[0] // 2), int(sprd.shape[0] // 2)] += max(0, 1 - sprd.sum()) pmf_list = [sparse.coo_matrix(sprd)] else: pmf_list = [] ###################### Get pmf_list from multiprocessing pmf_list.extend(pool.starmap(PM.prob_mass, pm_args)) for pmf in pmf_list: for dim in range(2): if pmf.shape[dim] > max_shape[dim]: max_shape[dim] = pmf.shape[dim] r_spread = [] # holds the one-day spread for each release day. # Reshape the prob. mass function of each release day into solution form for ii in range(params.r_dur): offset = params.domain_info[1] - pmf_list[ii].shape[0] // 2 dom_len = params.domain_info[1] * 2 + 1 r_spread.append( sparse.coo_matrix( (pmf_list[ii].data, (pmf_list[ii].row + offset, pmf_list[ii].col + offset)), shape=(dom_len, dom_len)).tocsr()) ### PHASE TWO ### # Pass the probability list, pmf_list, and other info to convolution solver. # This will return the finished population model. with Capturing() as output: if sprd_factor is not None: # extend day count by one days_ext = [days[0] - 1] days_ext.extend(days) modelsol = get_populations(r_spread, pmf_list, days_ext, params.ndays + 1, dom_len, max_shape, params.r_dur, params.r_number, params.r_mthd()) # remove the first one and start where wind started. modelsol = modelsol[1:] else: modelsol = get_populations(r_spread, pmf_list, days, params.ndays, dom_len, max_shape, params.r_dur, params.r_number, params.r_mthd()) # modelsol now holds the model results for this run as CSR sparse arrays # get emergence potential (measured in expected number of wasps previously # present whose oviposition would result in emergence on the given date) # from the model result release_emerg, sentinel_emerg = popdensity_to_emergence( modelsol, locinfo) # get the expected wasp populations at grid points on sample days grid_counts = popdensity_grid(modelsol, locinfo) # get the expected wasp populations in cardinal directions '''card_counts = popdensity_card(modelsol,locinfo,params.domain_info)''' ## For the lists release_emerg and sentinel_emerg: ## Each list entry corresponds to a data collection day (one array) ## In each array: ## Each column corresponds to an emergence observation day (as in data) ## Each row corresponds to a grid point or sentinel field, respectively ## For the array grid_counts: ## Each column corresponds to an observation day ## Each row corresponds to a grid point ## For the list card_counts: ## Each list entry corresponds to a sampling day (one array) ## Each column corresponds to a step in a cardinal direction ## Each row corresponds to a cardinal direction # print('{:03.1f} sec./model at {}'.format(time.time() - modeltic, # time.strftime("%H:%M:%S %d/%m/%Y")),end='\r') # sys.stdout.flush() return (release_emerg, sentinel_emerg, grid_counts) #,card_counts) print('Parsing model output and connecting to Bayesian model...') ###################################################################### ##### Connect Model to Data ##### ###################################################################### ### Parse the results of pop_model into separate deterministic variables ### '''Get Poisson probabilities for sentinal field emergence. Parameters: xi is constant, emerg is a list of ndarrays, betas is a 1D array of field probabilities''' Ncollections = len(locinfo.sent_DataFrames) sent_poi_rates = [] for ii in range(Ncollections): s_ndays = len(locinfo.sent_DataFrames[ii]['datePR'].unique()) sent_poi_rates.append( pm.Lambda('sent_poi_rate_{}'.format(ii), lambda xi=xi, ndays=s_ndays, betas=sent_obs_probs, emerg_model=pop_model[1][ii]: xi * emerg_model * np.tile( betas, (ndays, 1)).T, trace=False)) sent_poi_rates = pm.Container(sent_poi_rates) '''Return Poisson probabilities for release field grid emergence. Parameters: xi is constant, emerg is a list of ndarrays. collection effort is specified in locinfo.''' Ncollections = len(locinfo.release_DataFrames) rel_poi_rates = [] for ii in range(Ncollections): r_effort = locinfo.release_collection[ii] #fraction of max collection r_ndays = len(locinfo.release_DataFrames[ii]['datePR'].unique()) rel_poi_rates.append( pm.Lambda('rel_poi_rate_{}'.format(ii), lambda xi=xi, ndays=r_ndays, r_effort=r_effort, beta= em_obs_prob, emerg_model=pop_model[0][ii]: xi * emerg_model * np.tile(r_effort * beta, (ndays, 1)).T, trace=False)) rel_poi_rates = pm.Container(rel_poi_rates) @pm.deterministic(plot=False, trace=False) def grid_poi_rates(locinfo=locinfo, beta=grid_obs_prob, obs_model=pop_model[2]): '''Return Poisson probabilities for grid sampling obs_model is an ndarray, sampling effort is specified in locinfo.''' return beta * locinfo.grid_samples * obs_model '''Return Poisson probabilities for cardinal direction sampling obs_model is a list of ndarrays, sampling effort is assumed constant''' ''' card_poi_rates = [] for ii,obs in enumerate(pop_model[3]): card_poi_rates.append(pm.Lambda('card_poi_rate_{}'.format(ii), lambda beta=card_obs_prob, obs=obs: beta*obs)) card_poi_rates = pm.Container(card_poi_rates) ''' # Given the expected wasp densities from pop_model, actual wasp densities # are modeled as a thinned Poisson random variable about that mean. # Each wasp in the area then has a small probability of being seen. ### Connect sentinel emergence data to model ### N_sent_collections = len(locinfo.sent_DataFrames) # Create list of collection variables sent_collections = [] for ii in range(N_sent_collections): # Apparently, pymc does not play well with 2D array parameters sent_collections.append( np.empty(sent_poi_rates[ii].value.shape, dtype=object)) for n in range(sent_collections[ii].shape[0]): for m in range(sent_collections[ii].shape[1]): sent_collections[ii][n, m] = pm.Poisson( "sent_em_obs_{}_{}_{}".format(ii, n, m), sent_poi_rates[ii][n, m], value=float(locinfo.sentinel_emerg[ii][n, m]), observed=True) sent_collections = pm.Container(sent_collections) ### Connect release-field emergence data to model ### N_release_collections = len(locinfo.release_DataFrames) # Create list of collection variables rel_collections = [] for ii in range(N_release_collections): rel_collections.append( np.empty(rel_poi_rates[ii].value.shape, dtype=object)) for n in range(rel_collections[ii].shape[0]): for m in range(rel_collections[ii].shape[1]): rel_collections[ii][n, m] = pm.Poisson( "rel_em_obs_{}_{}_{}".format(ii, n, m), rel_poi_rates[ii][n, m], value=float(locinfo.release_emerg[ii][n, m]), observed=True) rel_collections = pm.Container(rel_collections) ### Connect grid sampling data to model ### grid_obs = np.empty(grid_poi_rates.value.shape, dtype=object) for n in range(grid_obs.shape[0]): for m in range(grid_obs.shape[1]): grid_obs[n, m] = pm.Poisson("grid_obs_{}_{}".format(n, m), grid_poi_rates[n, m], value=float(locinfo.grid_obs[n, m]), observed=True) grid_obs = pm.Container(grid_obs) ### Connect cardinal direction data to model ### ''' N_card_collections = len(locinfo.card_obs_DataFrames) # Create list of sampling variables card_collections = [] for ii in range(N_card_collections): card_collections.append(np.empty(card_poi_rates[ii].value.shape, dtype=object)) for n in range(card_collections[ii].shape[0]): for m in range(card_collections[ii].shape[1]): card_collections[ii][n,m] = pm.Poisson( "card_obs_{}_{}_{}".format(ii,n,m), card_poi_rates[ii][n,m], value=locinfo.card_obs[ii][n,m], observed=True, plot=False) card_collections = pm.Container(card_collections) ''' ###################################################################### ##### Collect Model and Run ##### ###################################################################### ### Collect model ### if sprd_factor is not None: Bayes_model = pm.Model([ lam, f_a1, f_a2, f_b1_p, f_b2_p, f_b1, f_b2, g_aw, g_bw, sig_x, sig_y, corr_p, corr, sig_x_l, sig_y_l, corr_l_p, corr_l, n_periods, mu_r, sprd_factor, grid_obs_prob, xi, em_obs_prob, A_collected, sent_obs_probs, params_ary, pop_model, grid_poi_rates, rel_poi_rates, sent_poi_rates, grid_obs, rel_collections, sent_collections ]) else: Bayes_model = pm.Model([ lam, f_a1, f_a2, f_b1_p, f_b2_p, f_b1, f_b2, g_aw, g_bw, sig_x, sig_y, corr_p, corr, sig_x_l, sig_y_l, corr_l_p, corr_l, n_periods, mu_r, grid_obs_prob, xi, em_obs_prob, A_collected, sent_obs_probs, params_ary, pop_model, grid_poi_rates, rel_poi_rates, sent_poi_rates, grid_obs, rel_collections, sent_collections ]) ### Run if parameters were passed in ### if mcmc_args is not None: if len(mcmc_args) == 3: # New run nsamples = int(mcmc_args[0]) burn = int(mcmc_args[1]) fname = mcmc_args[2] if fname[-3:] != '.h5': fname += '.h5' mcmc = pm.MCMC(Bayes_model, db='hdf5', dbname=fname, dbmode='a', dbcomplevel=0) mcmc.use_step_method(pm.AdaptiveMetropolis, stoc_vars, scales=step_scales, interval=500, shrink_if_necessary=True) try: tic = time.time() print('Sampling...') mcmc.sample(nsamples, burn) # sampling finished. commit to database and continue print('Sampling finished.') print('Time elapsed: {}'.format(time.time() - tic)) print('Saving...') #mcmc.save_state() mcmc.commit() print('Closing...') mcmc.db.close() except: print('Exception: database closing...') mcmc.db.close() raise return elif len(mcmc_args) == 2: # Resume run fname = mcmc_args[0] nsamples = int(mcmc_args[1]) fname = fname.strip() if fname[-3:] != '.h5': fname += '.h5' if os.path.isfile(fname): db = pm.database.hdf5.load(fname) mcmc = pm.MCMC(Bayes_model, db=db) mcmc.use_step_method(pm.AdaptiveMetropolis, stoc_vars, scales=step_scales, interval=500, shrink_if_necessary=True) # database loaded. else: print('File not found: {}'.format(fname)) return try: tic = time.time() print('Sampling...') mcmc.sample(nsamples) # sampling finished. commit to database and continue print('Sampling finished.') print('Time elapsed: {}'.format(time.time() - tic)) print('Saving...') #mcmc.save_state() mcmc.commit() print('Closing...') mcmc.db.close() except: print('Exception: database closing...') mcmc.db.close() raise return ###################################################################### ##### Start Interactive Menu ##### ###################################################################### print('--------------- MCMC MAIN MENU ---------------') print(" 'new': Start a new MCMC chain from the beginning.") print("'cont': Continue a previous MCMC chain from an hdf5 file.") #print("'plot': Plot traces/distribution from an hdf5 file.") print("'quit': Quit.") cmd = input('Enter: ') cmd = cmd.strip().lower() if cmd == 'new': print('\n\n') print('--------------- New MCMC Chain ---------------') while True: val = input("Enter number of realizations or 'quit' to quit:") val = val.strip() if val == 'q' or val == 'quit': return else: try: nsamples = int(val) val2 = input("Enter number of realizations to discard:") val2 = val2.strip() if val2 == 'q' or val2 == 'quit': return else: burn = int(val2) fname = input( "Enter filename to save or 'back' to cancel:") fname = fname.strip() if fname == 'q' or fname == 'quit': return elif fname == 'b' or fname == 'back': continue else: fname = fname + '.h5' break # BREAK LOOP AND RUN MCMC WITH GIVEN VALUES except ValueError: print('Unrecognized input.') continue ##### RUN FIRST MCMC HERE ##### mcmc = pm.MCMC(Bayes_model, db='hdf5', dbname=fname, dbmode='a', dbcomplevel=0) mcmc.use_step_method(pm.AdaptiveMetropolis, stoc_vars, scales=step_scales, interval=500, shrink_if_necessary=True) try: tic = time.time() print('Sampling...') mcmc.sample(nsamples, burn) # sampling finished. commit to database and continue print('Sampling finished.') print('Time elapsed: {}'.format(time.time() - tic)) print('Saving...') #mcmc.save_state() mcmc.commit() except: print('Exception: database closing...') mcmc.db.close() raise elif cmd == 'cont': # Load db and continue print('\n') while True: fname = input("Enter path to database to load, or 'q' to quit:") fname = fname.strip() if fname.lower() == 'q' or fname.lower() == 'quit': return else: if fname[-3:] != '.h5': fname += '.h5' if os.path.isfile(fname): db = pm.database.hdf5.load(fname) mcmc = pm.MCMC(Bayes_model, db=db) mcmc.use_step_method(pm.AdaptiveMetropolis, stoc_vars, scales=step_scales, interval=500, shrink_if_necessary=True) break # database loaded else: print('File not found.') #continue elif cmd == 'plot': # Get filename and pass to plotting routine. pass # return elif cmd == 'quit' or cmd == 'q': return else: print('Command not recognized.') print('Quitting....') return ##### MCMC Loop ##### # This should be reached only by cmd == 'new' or 'cont' with a database. # It resumes sampling of a previously sampled chain. print('\n') while True: print('--------------- MCMC ---------------') print(" 'report': generate report on traces") print("'inspect': launch IPython to inspect state") print(" 'run': conduct further sampling") print(" 'quit': Quit") cmd = input('Enter: ') cmd = cmd.strip() cmd = cmd.lower() if cmd == 'inspect': try: import IPython IPython.embed() except ImportError: print('IPython not found.') except: print('Exception: database closing...') mcmc.db.close() raise elif cmd == 'run': val = input("Enter number of realizations or 'back':") val = val.strip() if val == 'back' or val == 'b': continue else: try: nsamples = int(val) except ValueError: print('Unrecognized input.') continue # Run chain try: tic = time.time() print('Sampling...') mcmc.sample(nsamples) # sampling finished. commit to database and continue print('Sampling finished.') print('Time elapsed: {}'.format(time.time() - tic)) print('Saving...') #mcmc.save_state() mcmc.commit() except: print('Exception: database closing...') mcmc.db.close() raise elif cmd == 'report': try: import Bayes_Plot Bayes_Plot.plot_traces(db=db) print('Gelman-Rubin statistics') gr = pm.gelman_rubin(mcmc) print(gr) with open('./diagnostics/gelman-rubin.txt', 'w') as f: f.write('Variable R_hat\n') f.write('---------------------\n') for key, val in gr.items(): f.write(key + ': {}\n'.format(val)) except: print('Exception: database closing...') mcmc.db.close() raise elif cmd == 'quit' or cmd == 'q': mcmc.db.close() print('Database closed.') break else: print('Command not recognized.')
%matplotlib inline from pymc.Matplot import plot as mcplot mcplot(mcmc.trace("p"),common_scale=False) # a simple demo for Dirichlet-Multinomal Conjugate N = 5 # dimension beta = np.ones(N) mu=pm.Dirichlet("mu", theta=beta) cmu = pm.CompletedDirichlet("cmu", D=mu) n = pm.Multinomial('n', n=D, p=cmu, value=n_class, observed=True) alpha = np.ones(N) theta = pm.Container([pm.Dirichlet("theta_%s" % i,theta=alpha) \ for i in range(N)]) ctheta = pm.Container([pm.CompletedDirichlet("ctheta_%s" % i, D=theta[i]) for i in range(N)]) c = pm.Container([pm.Multinomial("c_%s" % i, n=n_class[i], p=theta[i]\ ,value = data[i], observed=True)\ for i in range(N)]) @pm.deterministic def precision(mu=cmu, theta=ctheta): return np.sum([mu[0][i]*theta[i][0][i] for i in range(N)]) mcmc = pm.MCMC([n,mu,theta,c,precision]) mcmc.sample(25000) %matplotlib inline from pymc.Matplot import plot as mcplot
# and now make the beta matrix, which stacks beta # coefficients (one per doctor) -- the n_clusters is # because this is how many parameters we have (we # substract one, but then add an intercept) Bdr = [] for provider_i in xrange(num_providers): Bdr.append(pymc.Normal('beta-dr-%s' % provider_i, mu=beta, tau=inv_var)) # construct a vector of betas |sessions| long session_betas = [] for session_num, session_provider in enumerate(data.dr_id): session_betas.append(Bdr[int(session_provider)]) # the Betas to use for each session (which correspond to the # dr that participated in them). SB = pymc.Container(session_betas) ### # setup the cut-off point parameters (lambda's) # for this we will use truncated normals #lambda_inv_var = 1e-5 lambdas = [pymc.Normal('lambda_0', 0, inv_var)] for i in xrange(3): lambdas.append( pymc.TruncatedNormal('lambda_%s' % (i + 1), (i + 1), inv_var, lambdas[i], numpy.inf)) lambdas = pymc.Container(lambdas) #-------------------- model ------------------# @deterministic()
def make_model(data, mi_mean_min, mi_mean_max, GF_mean_min, GF_mean_max, constant_proliferation = False): values_SOX2 = {} values_m = {} values_nonPCNA = {} switchpoint = {} mi_left = {} GF_left = {} SOX2_mean_left = {} mi_right = {} GF_right = {} SOX2_mean_right = {} cells_SOX2_float = {} cells_nonPCNA = {} cells_m = {} ls = 50.0 # length of section l = pd.read_csv('../../data/cell_length_data.csv')['cell_length'].mean() # length of cell def step_function(x, switchpoint, left_value, right_value): ''' This function should return something in the same format as the passed array Specifically, it produces an output that has an array of the same size of the experimental data but whose contents are the lower average until the switchpoint, and the upper average past the switchpoint. For all purposes, this builds the model to which we want to compare the data. ''' return sp.where(x<=switchpoint, left_value, right_value) def ma(array, fill_value): return sp.ma.masked_array(array, sp.isnan(array), fill_value = fill_value) #data = data.dropna(how='all', subset = ['m', 'PCNA', 'SOX2']) # I'll drop all nan because of the potential bug with the binomials (see my question on stackoverflow) data = data.dropna(how='all', subset = ['m', 'PCNA', 'SOX2']) data = data.sort_values(['ID', 'pos']) # priors for global mean values # define priors for left side of step function mi_left_pop= pymc.Uniform('mi_left_pop', lower = mi_mean_min, upper = mi_mean_max, value = 0.02) GF_left_pop = pymc.Uniform('GF_left_pop', lower = GF_mean_min, upper = GF_mean_max, value = 0.8) # define priors for right side of step function if constant_proliferation: mi_right_pop = mi_left_pop GF_right_pop = GF_left_pop else: mi_right_pop = pymc.Uniform('mi_right_pop', lower = mi_mean_min, upper = mi_mean_max, value = 0.04) GF_right_pop = pymc.Uniform('GF_right_pop', lower = GF_mean_min, upper = GF_mean_max, value = 0.9) # stepsizes @pymc.deterministic(name='step_mi', plot=True) def step_mi(mi_left = mi_left_pop, mi_right = mi_right_pop): return mi_right - mi_left @pymc.deterministic(name='step_GF', plot=True) def step_GF(GF_left = GF_left_pop, GF_right = GF_right_pop): return GF_right - GF_left # prior distribution for sigma beeing uniformly distributed GF_sigma_inter = pymc.Uniform('GF_sigma_inter', lower = 0.001, upper = 0.2) mi_sigma_inter = pymc.Uniform('mi_sigma_inter', lower = 0.001, upper = 0.2) # switchpoint if not constant_proliferation: switchpoint_pop = pymc.Uniform('switchpoint_pop', lower = -2000, upper = outgrowth[data['time'].iloc[0]], value = -500) switchpoint_sigma_inter = pymc.Uniform('switchpoint_sigma_inter', lower=1.0, upper=400.0, value = 50) for ID, IDdata in data.groupby('ID'): values_SOX2[ID] = ma(IDdata['SOX2'], 35.5) values_nonPCNA[ID] = ma(IDdata['SOX2'] - IDdata['PCNA'], 3.5) values_m[ID] = ma(IDdata['m'], 1.5) # Model definition #priors # switchpoint[ID]: for all observables if constant_proliferation: switchpoint[ID] = 0.0 else: switchpoint[ID] = pymc.Normal('switchpoint_{0}'.format(ID), mu = switchpoint_pop, tau = 1/switchpoint_sigma_inter**2, value = -500, plot = False) # number of SOX2 cells SOX2_mean = sp.mean(values_SOX2[ID]) SOX2_std = sp.std(values_SOX2[ID]) # define priors for left side of step function mi_left[ID] = pymc.TruncatedNormal('mi_left_{0}'.format(ID), mu = mi_left_pop, tau = 1.0 / mi_sigma_inter**2, a = 0.0, b = 1.0, value = 0.02, plot = False) GF_left[ID] = pymc.TruncatedNormal('GF_left_{0}'.format(ID), mu = GF_left_pop, tau = 1.0 / GF_sigma_inter**2, a = 0.0, b = 1.0, value = 0.5, plot = False) # define priors for right side of step function mi_right[ID] = pymc.TruncatedNormal('mi_right_{0}'.format(ID), mu = mi_right_pop, tau = 1.0 / mi_sigma_inter**2, a = 0.0, b = 1.0, value = 0.02, plot = False) GF_right[ID] = pymc.TruncatedNormal('GF_right_{0}'.format(ID), mu = GF_right_pop, tau = 1.0 / GF_sigma_inter**2, a = 0.0, b = 1.0, value = 0.5, plot = False) # step functions @pymc.deterministic(name='mi_{}'.format(ID)) def mi(positions = sp.array(IDdata['pos']), switchpoint = switchpoint[ID], left_value = mi_left[ID], right_value = mi_right[ID]): return step_function(positions, switchpoint, left_value, right_value) @pymc.deterministic(name='GF_{}'.format(ID)) def GF(positions = sp.array(IDdata['pos']), switchpoint = switchpoint[ID], left_value = GF_left[ID], right_value = GF_right[ID]): return step_function(positions, switchpoint, left_value, right_value) @pymc.deterministic(name='SOX2_mean_{}'.format(ID)) def SOX2_mean(positions = sp.array(IDdata['pos']), switchpoint = switchpoint[ID], left_value = SOX2_mean , right_value = SOX2_mean): return step_function(positions, switchpoint, left_value, right_value) #likelihoods cells_SOX2_float[ID] = pymc.Normal('cells_SOX2_float_{0}'.format(ID), mu=SOX2_mean, tau = 1/SOX2_std**2, value = values_SOX2[ID], plot = False, observed = True) @pymc.deterministic(name='cells_SOX2_{}'.format(ID)) def cells_SOX2(csf = cells_SOX2_float[ID]): return sp.around(csf) cells_nonPCNA[ID] = pymc.Binomial('cells_nonPCNA_{0}'.format(ID), n = cells_SOX2, p = (1.0 - GF), value = values_nonPCNA[ID], observed = True, plot = False ) @pymc.deterministic(name='cells_PCNA_{}'.format(ID)) def cells_PCNA(cnp = cells_nonPCNA[ID], cs = cells_SOX2): return cs - cnp @pymc.deterministic(name='cells_PCNA_section_{}'.format(ID)) def cells_PCNA_section(cp = cells_PCNA, ls = ls, l = l): return cp * ls / l cells_m[ID] = pymc.Binomial('cells_m_{0}'.format(ID), n = cells_PCNA_section, p = mi, value = values_m[ID], observed = True, plot = False) values_SOX2 = pymc.Container(values_SOX2) values_SOX2 = pymc.Container(values_SOX2) values_m = pymc.Container(values_m) values_nonPCNA = pymc.Container(values_nonPCNA) switchpoint = pymc.Container(switchpoint) mi_left = pymc.Container(mi_left) GF_left = pymc.Container(GF_left) SOX2_mean_left = pymc.Container(SOX2_mean_left) mi_right = pymc.Container(mi_right) GF_right = pymc.Container(GF_right) SOX2_mean_right = pymc.Container(SOX2_mean_right) cells_SOX2_float = pymc.Container(cells_SOX2_float) cells_nonPCNA = pymc.Container(cells_nonPCNA) cells_m = pymc.Container(cells_m) return locals()
data_tp, data_sp = [], [] for line in fileinput.input("../../data/stationary.txt"): part = line.strip().split("\t") uid, items = part[0], part[1:] if uid == "460029901722027": for item in items: tm, poi = [int(i) for i in item.split(" ")[0:2]], [int(i) for i in item.split(" ")[4].split(",")] data_tp.append(tm) data_sp.append(poi) fileinput.close() data_tp, data_sp = np.array(data_tp), np.array(data_sp) print data_tp print data_sp prior = pm.Dirichlet('prior', np.array([50.0,50.0])) state = pm.Container([pm.Categorical('state_%i' % i, p=prior) for i in range(len(data_tp))]) stime = pm.Container([pm.DiscreteUniform('stime_%i' % i, lower=0, upper=23) for i in range(2)]) ftime = pm.Container([pm.DiscreteUniform('ftime_%i' % i, lower=0, upper=23) for i in range(2)]) @pm.deterministic(plot=False) def mu_s(state=state, stime=stime): return np.array([stime[0] if state[i] == 0 else stime[1] for i in xrange(len(data_tp))]) @pm.deterministic(plot=False) def mu_f(state=state, stime=ftime): return np.array([ftime[0] if state[i] == 0 else ftime[1] for i in xrange(len(data_tp))]) obs_s = pm.Normal('obs_s', mu=mu_s, tau=0.1, value=data_tp[:,0], observed=True) obs_f = pm.Normal('obs_f', mu=mu_f, tau=0.1, value=data_tp[:,1], observed=True) model = pm.Model([prior, state, stime, ftime, obs_s, obs_f]) mcmc = pm.MCMC(model) mcmc.sample(100) print state.value print stime[0].value, ftime[0].value
y = [5,1,5,14,3,19,1,1,4,22] # Number of failure t = [94,16,63,126,5,31,1,1,2,10] # Observation time length # Define hyperparameters alpha = 1.8 gam = 0.01 delta = 1.0 Nobs = len(y) beta = pymc.Gamma('beta',alpha=delta, beta=gam, value=1.0) # lamb = pymc.Gamma('lamb',alpha=alpha, beta=beta, value=np.ones(Nobs)) lamb = np.asarray([pymc.Gamma('lamb_%i'%i,alpha=alpha, beta=beta, value=1.0) for i in range(Nobs)]) lamb = pymc.Container(lamb) # print lamb # lamb = np.empty(Nobs,dtype=object) # for i in range(Nobs): # lamb[i] = pymc.Gamma('lamb_%i' %(i+1), alpha = alpha, beta = beta, value=0.5) @pymc.deterministic def poi_mu(lamb = lamb, t = t): return lamb*t # @pymc.stochastic # def data_gen(poi_mu,y): # return -np.sum(poi_mu) + np.sum(np.log(poi_mu)*y) # # # @pymc.stochastic
def dict_to_recarray(dict): return pymc.Container(dict)
def run(self): self.validateinput() data = self.data data = self.fluctuate(data) if self.rndseed >= 0 else data # unpack background dictionaries backgroundkeys = self.backgroundsyst.keys() backgrounds = array([self.background[key] for key in backgroundkeys]) backgroundnormsysts = array( [self.backgroundsyst[key] for key in backgroundkeys]) # unpack object systematics dictionary objsystkeys = self.objsyst['signal'].keys() signalobjsysts = array( [self.objsyst['signal'][key] for key in objsystkeys]) backgroundobjsysts = array([]) if len(objsystkeys) > 0 and len(backgroundkeys) > 0: backgroundobjsysts = array([[ self.objsyst['background'][syst][bckg] for syst in objsystkeys ] for bckg in backgroundkeys]) recodim = len(data) resmat = self.response truthdim = len(resmat) import priors truth = priors.wrapper(priorname=self.prior, low=self.lower, up=self.upper, other_args=self.priorparams) bckgnuisances = [] for name, err in zip(backgroundkeys, backgroundnormsysts): if err < 0.: bckgnuisances.append( mc.Uniform('norm_%s' % name, value=1., lower=0., upper=3.)) else: bckgnuisances.append( mc.TruncatedNormal( 'gaus_%s' % name, value=0., mu=0., tau=1.0, a=(-1.0 / err if err > 0.0 else -inf), b=inf, observed=(False if err > 0.0 else True))) bckgnuisances = mc.Container(bckgnuisances) objnuisances = [ mc.Normal('gaus_%s' % name, value=self.systfixsigma, mu=0., tau=1.0, observed=(True if self.systfixsigma != 0 else False)) for name in objsystkeys ] objnuisances = mc.Container(objnuisances) # define potential to constrain truth spectrum if self.regularization: truthpot = self.regularization.getpotential(truth) #This is where the FBU method is actually implemented @mc.deterministic(plot=False) def unfold(truth=truth, bckgnuisances=bckgnuisances, objnuisances=objnuisances): smearbckg = 1. if len(backgroundobjsysts) > 0: smearbckg = smearbckg + dot(objnuisances, backgroundobjsysts) smearedbackgrounds = backgrounds * smearbckg bckgnormerr = array([ (-1. + nuis) / nuis if berr < 0. else berr for berr, nuis in zip(backgroundnormsysts, bckgnuisances) ]) bckg = dot(1. + bckgnuisances * bckgnormerr, smearedbackgrounds) reco = dot(truth, resmat) smear = 1. + dot(objnuisances, signalobjsysts) out = bckg + reco * smear return out unfolded = mc.Poisson('unfolded', mu=unfold, value=data, observed=True, size=recodim) allnuisances = mc.Container(bckgnuisances + objnuisances) modelelements = [unfolded, unfold, truth, allnuisances] if self.regularization: modelelements += [truthpot] model = mc.Model(modelelements) if self.use_emcee: from emcee_sampler import sample_emcee mcmc = sample_emcee(model, nwalkers=self.nwalkers, samples=self.nMCMC / self.nwalkers, burn=self.nBurn / self.nwalkers, thin=self.nThin) else: map_ = mc.MAP(model) map_.fit() mcmc = mc.MCMC(model) mcmc.use_step_method(mc.AdaptiveMetropolis, truth + allnuisances) mcmc.sample(self.nMCMC, burn=self.nBurn, thin=self.nThin) # mc.Matplot.plot(mcmc) self.trace = [ mcmc.trace('truth%d' % bin)[:] for bin in xrange(truthdim) ] self.nuisancestrace = {} for name, err in zip(backgroundkeys, backgroundnormsysts): if err < 0.: self.nuisancestrace[name] = mcmc.trace('norm_%s' % name)[:] if err > 0.: self.nuisancestrace[name] = mcmc.trace('gaus_%s' % name)[:] for name in objsystkeys: if self.systfixsigma == 0.: self.nuisancestrace[name] = mcmc.trace('gaus_%s' % name)[:] if self.monitoring: import monitoring monitoring.plot(self.name + '_monitoring', data, backgrounds, resmat, self.trace, self.nuisancestrace, self.lower, self.upper)
def generate_MCMC_model(specobj, templates, offset=False, shiftout='velocity', v0=0, multitemps=False, copy=False): """ Makes a PyMC model for the given data and x-axis to scale and offset from a template: offset can specify an offset, or be True to have it be a free variate shiftout can be 'vel','z',or 'pix' v0 determines the initial value of the pixshift (random if None) multitemps determines if a linear combination of templates should be used data=Normal(tau=ivar,center=A*template(x-shift)+offset) """ import pymc x, flux, ivar = specobj.x.copy(), specobj.flux.copy(), specobj.ivar.copy() templates = array(templates, copy=copy) if any([t.shape != templates[0].shape for t in templates]): raise ValueError("templates don't match") ntempix = templates[0].shape[0] npix = x.shape[0] #TODO: match pixels with alignment instead of just assuming middle tx = arange(ntempix) sx = arange(npix) + ( ntempix - npix) / 2 #spectrum x-value in terms of template coordinates tmax, tmin = templates.max(), templates.min() fmax, fmin = flux.max(), flux.min() imax, imin = ivar.max(), ivar.min() xmax, xmin = x.max(), x.min() ivar0 = imin / npix / 100 #A small value to un-weight bad data points maxlshift = maxrshift = (ntempix - npix) / 2 #TODO:fix for non-symmetric if offset: loff, uoff = (fmin + tmin, fmax + tmax) #TODO:fix else: loff = uoff = float(offset) offset = pymc.Uniform('offset', loff, uoff, trace=bool(offset), plot=bool(offset)) pixshift = pymc.Uniform('pixshift', -round(npix / 2), round(npix / 2), trace=True, plot=False) if v0 is not None: pixshift.value = v0 svar = None if 'vel' in shiftout: svar = pymc.Lambda( 'vel', lambda pixshift=pixshift: pixshift_to_vel( pixshift, x, zout=False, logify=True, lincheck=False)) elif shiftout == 'z': svar = pymc.Lambda( 'z', lambda pixshift=pixshift: pixshift_to_vel( pixshift, x, zout=True, logify=True, lincheck=False)) elif 'pix' in shiftout: svar = pixshift else: raise ValueError('unrecognized shiftout') svar.plot = True svar.trace = True elems = {'offset': offset, 'pixshift': pixshift} if svar is not pixshift: elems[svar.__name__] = svar if multitemps: #TODO:smarter initial A = pymc.Container([ pymc.Uniform('A%i' % i, 0, fmax / np.max(t)) for i, t in enumerate(templates) ]) elems['A'] = A for e in A: e.plot = False e.value = 0.1 A[0].value = 1 @pymc.deterministic(trace=True, plot=False) def modelflux(A=A, offset=offset, pixshift=pixshift): #TODO:caching of some kind ? temp = sum((A * templates.T), 1) #TODO:test #r = int(round(pixshift)) shifted = interp(sx - pixshift + 1, tx, temp) #TODO:why +1 ? #shifted = roll(temp,r) # if r > 0: # # shifted[:r]=flux[:r] #do something smarter here for the edges # shifted[:r]=shifted[r] # elif r < 0: # # shifted[r:]=flux[r:] #do something smarter here for the edges # shifted[r:]=shifted[r] return shifted + offset elems['modelflux'] = modelflux else: A = pymc.Uniform('A', 1, 1, value=1) #TODO:smarter setting elems['A'] = A templatei = pymc.DiscreteUniform('templatei', 0, len(templates) - 1, trace=True, plot=False) templatei.value = 0 elems['templatei'] = templatei @pymc.deterministic(trace=True, plot=False) def modelflux(A=A, offset=offset, pixshift=pixshift, templatei=templatei, templates=templates): """ The flux expected from the template parameters """ #TODO:caching of some kind ? temp = templates[templatei] tx = arange(len(temp)) + 1 #r = int(round(pixshift)) shifted = interp(sx - pixshift + 1, tx, temp) #TODO:why +1 ? #shifted = roll(temp,r) #Rendered unnecessary by ivar variable # if r > 0: # # shifted[:r]=flux[:r] #do something smarter here for the edges # shifted[:r]=shifted[r] # elif r < 0: # # shifted[r:]=flux[r:] #do something smarter here for the edges # shifted[r:]=shifted[r] return A * shifted + offset elems['modelflux'] = modelflux @pymc.potential def pixelcutoff(pixshift=pixshift): #TODO:rethink lcut = np.exp(-(pixshift - maxlshift) / npix) rcut = np.exp((pixshift - maxrshift) / npix) return lcut * rcut #elems['pixelcutoff']=pixelcutoff # @pymc.deterministic(trace=True,plot=False) # def ivar(pixshift=pixshift,ivararr=ivar,ivar0=ivar0): # """ # The inverse variance is adjusted to be very small for points that are # off the edge # """ # r = int(round(pixshift)) # if r > 0: # ivars = ivararr.copy() # ivars[:r] = ivar0 # elif r < 0: # ivars = ivararr.copy() # ivars[r:] = ivar0 # else: # ivars = ivararr #leave alone # return ivars # elems['ivar']=ivar #fluxvar = pymc.Poisson('flux',mu=modelflux,observed=True,value=flux) dmask = isfinite(ivar) & (ivar > 0) ivar[~dmask] = np.min(ivar[dmask]) / 1000 #TODO:test fluxvar = pymc.Normal('flux', mu=modelflux, tau=ivar, observed=True, value=flux) elems['fluxvar'] = fluxvar m = pymc.MCMC(elems) m.ivar = ivar return m
def __init__(self, snobj, filters=None, inc_var=False, **args): '''Create an MCMC sampler based on a sn object. The specified filters are fit using the model that is currently selected. Uniform priors are assumed for the parameters unless overridden by assigning pymc Stochastics through **args.''' self.sn = snobj if filters is None: filters = list(self.sn.data.keys()) self.model = snobj.model self.model.args = {} self.model._fbands = filters self.model.setup() params = [] paramnames = list(self.model.parameters.keys()) # First, setup stochastics for our parameters for param in paramnames: if param in args: params.append(args[param]) del args[param] continue if param == 'dm15': params.append(pymc.Uniform('dm15', 0.7, 2.0)) elif param == 'st': params.append(pymc.Uniform('st', 0.25, 1.22)) elif param == 'Tmax': t0 = min([self.sn.data[f].MJD.min() for f in self.sn.data]) t1 = max([self.sn.data[f].MJD.max() for f in self.sn.data]) params.append(pymc.Uniform('Tmax', t0 - 30, t1 + 30)) elif param == 'EBVhost': params.append(pymc.Uniform('EBVhost', 0, 10.)) elif param == 'DM': params.append(pymc.Uniform('DM', 0, 100)) elif param.find('max') > 0: params.append(pymc.Uniform(str(param), 10., 30.)) else: raise AttributeError( "Error, parameter %s not recognized. Update MCMC package" % (param)) if self.model.parameters[param] is None: params[-1].value = self.model.guess(param) else: params[-1].value = self.model.parameters[param] params = pymc.Container(params) # now setup intrinsic variances for each filter if inc_var: vars = pymc.InverseGamma('taus', alpha=0.5, beta=0.1**2, value=np.random.uniform( 0, 0.1**2, size=len(filters))) else: vars = np.array([0.0] * len(filters)) # The data stochastic that maps parameters to observations @pymc.data @pymc.stochastic def model(params=params, vars=vars, paramnames=paramnames, filters=filters, value=1.0): # Set the parameters in the model for i, param in enumerate(paramnames): if debug: print("setting ", param, " to ", params[i]) self.model.parameters[param] = params[i] logp = 0 numpts = 0 for i, f in enumerate(filters): mod, err, mask = self.model(f, self.sn.data[f].MJD) m = mask * self.sn.data[f].mask if not np.sometrue(m): continue numpts += np.sum(m) tau = np.power(vars[i] + np.power(self.sn.data[f].e_mag, 2), -1) logp += pymc.normal_like(self.sn.data[f].mag[m], mod[m], tau[m]) #if numpts < len(paramnames): # return -np.inf return logp pymc.MCMC.__init__(self, locals(), **args) # Setup the step methods # 1) params will be AdaptiveMetropolis, so we need to setup initial # scales. If the model has been fit, use error, otherwise guess. def_scales = { 'Tmax': 0.5**2, 'st': 0.001**2, 'dm15': 0.001**2, 'max': 0.01**2, 'DM': 0.01**2, 'EBVhost': 0.01**2 } scales = {} for i, par in enumerate(self.paramnames): if par in self.model.errors and self.model.errors[par] > 0: scales[self.params[i]] = self.model.errors[par] else: if par in def_scales: scales[self.params[i]] = def_scales[par] elif par[0] == "T" and par[-3:] == "max": scales[self.params[i]] = def_scales['Tmax'] elif par[-3:] == "max": scales[self.params[i]] = def_scales['max'] else: scales[self.params[i]] = self.params[i].value / 10. self.use_step_method(pymc.AdaptiveMetropolis, self.params, scales=scales, delay=1000, interval=1000) if inc_var: self.use_step_method( pymc.AdaptiveMetropolis, [self.vars], scales={self.vars: self.vars.value * 0 + 0.005**2})
def setup_inference(self): #depending on the number of wavelengths wavelength_number = len(self.wavelengths) l = [] i = 0 #add c0 t = 1. / 5.**2 #mu_ = np.mean(self.ydata) l.append(pymc.Normal("c_%i" % (i), mu=0, tau=t)) i += 1 for x in range(wavelength_number): for _ in range(2 * self.N): t = 1. / 5.**2 mu_ = 0 l.append(pymc.Normal("c_%i" % (i), mu=mu_, tau=t)) i += 1 C = pymc.Container(l) #\ #for i in range(1+2*self.N) for x in range(wavelength_number)]) #C[0] @pymc.stochastic(observed=False) def sigma(value=1): return -np.log(abs(value)) @pymc.stochastic(observed=False) def sigma3(value=1): return -np.log(abs(value)) qw_sigs = pymc.Container([pymc.HalfCauchy("qw_sigs_%i" % x, beta = 10, alpha=1) \ for x in range(wavelength_number)]) if self.wavelength_sd_defined: qw = pymc.Container([pymc.distributions.Lognormal('qw_%i' %x,mu=self.wavelengths[x], \ tau = 1. / self.wavelength_sd[x] ** 2) \ for x in range(wavelength_number)]) else: qw = pymc.Container([pymc.distributions.TruncatedNormal('qw_%i' %x,mu=self.wavelengths[x],\ tau = 1. / self.wavelengths[x]/3.,a=0,b=np.inf) \ for x in range(wavelength_number)]) def fourier_series(C, N, QW, x, wavelength_number): v = np.array(x) v.fill(0.0) v = v.astype('float') for ii in range(len(x)): v[ii] += C[0] for w in range(wavelength_number): for i in range(1, N + 1): v[ii] = v[ii] + C[(2*i-1)+2*N*w]*np.cos(2*np.pi/QW[w] * i * (x[ii])) + \ C[(2*i)+2*N*w]*np.sin(2*np.pi/QW[w] * i * (x[ii])) return v self.vector_fourier_series = np.vectorize(fourier_series) # Define the form of the model and likelihood @pymc.deterministic def y_model(C=C, x=self.xdata, qw=qw, nn=self.N, wavelength_number=wavelength_number): return fourier_series(C, nn, qw, x, wavelength_number) y = pymc.Normal('y', mu=y_model, tau=1. / sigma**2, observed=True, value=self.ydata) # package the full model in a dictionary self.model1 = dict(C=C, qw=qw, sigma=sigma, qw_sigs=qw_sigs, y_model=y_model, y=y, x_values=self.xdata, y_values=self.ydata) self.model_e = pymc.Model([C, qw, sigma, y]) if len(self.vergence) > 0: @pymc.deterministic def vergence_values(c=C, qw=qw, y=np.array(self.vergence)[:, 0]): return np.sign(fourier_series2(c, qw, y)) @pymc.stochastic(observed=True) def vergence(value=np.array(self.vergence)[:, 1], mu=vergence_values): loglike = 0. loglike += pymc.distributions.normal_like((mu[value == 1]), mu=1, tau=1.) loglike += pymc.distributions.normal_like((mu[value == -1]), mu=-1, tau=1.) if loglike < float(-1.7876931348623157e+308): return float(-1.7876931348623157e+308) return loglike self.model1.update({'vergence': vergence}) if len(self.asymmetry_likelihoods) > 0: @pymc.deterministic def y_model_asym(c=C, qw=qw): x = np.linspace(-np.max(qw), np.max(qw)) v = np.rad2deg(np.arctan(fourier_series2(c, qw, x))) m = np.median(v) #-np.min(v) return m #np.max(v)-np.min(v) @pymc.stochastic(observed=True) def y_asym(mu=y_model_asym, value=self.asymmetry_likelihoods[0], tau=1. / self.asymmetry_sigma**2): loglike = pymc.distributions.normal_like(x=value, mu=mu, tau=tau) return loglike * 10 #y_interlimb = pymc.Normal('y_interlimb',mu=y_model_interlimb,value=self.interlimb_likelihoods[0], #tau = 1. / self.interlimb_sigma**2 ) self.model1.update({'y_asym': y_asym}) if len(self.interlimb_likelihoods) > 0: @pymc.deterministic def y_model_interlimb(c=C, qw=qw): x = np.linspace(-np.max(qw), np.max(qw)) v = np.rad2deg(np.arctan(fourier_series2(c, qw, x))) d = np.max(v) - np.min(v) return d #np.max(v)-np.min(v) @pymc.stochastic(observed=True) def y_interlimb(mu=y_model_interlimb, value=self.interlimb_likelihoods[0], tau=1. / self.interlimb_sigma**2): loglike = pymc.distributions.normal_like(x=value, mu=y_model_interlimb, tau=tau) return loglike * 10 #y_interlimb = pymc.Normal('y_interlimb',mu=y_model_interlimb,value=self.interlimb_likelihoods[0], #tau = 1. / self.interlimb_sigma**2 ) self.model1.update({'y_interlimb': y_interlimb}) if len(self.axial_trace_likelihoods) > 0: d = self.wavelengths[ 0] #np.max(self.axial_trace_likelihoods_limb) - np.min(self.axial_trace_likelihoods_limb) x_at = np.linspace( np.min(self.axial_trace_likelihoods) - d, np.max(self.axial_trace_likelihoods) + d, 300) @pymc.stochastic(observed=False) def at_sigma(value=1): return -np.log(abs(value)) @pymc.deterministic def z_model_axial_t(c=C, wl=qw, z_at=x_at): return np.array(fourier_series_x_intercepts(c, wl, z_at)) @pymc.stochastic(observed=True) def z_at(mu=z_model_axial_t, sigma=at_sigma, value=self.axial_trace_likelihoods): loglike = 0. mu = np.array(mu) #print mu if not np.array(mu).size: return float(-1.7876931348623157e+308) #-99999#-np.2inf for v in value: m = 0. if mu.shape: dif = np.sort(np.abs(mu - v)) #if there are two hinges for the same axial trace penalise this! if dif[1] < sigma: loglike += -99999 m = mu[(np.abs(mu - v)).argmin()] #m = mu[(np.abs(mu-v)).argmin()] else: m = mu #print 'm', m loglike += pymc.distributions.normal_like(x=v, mu=m, tau=1. / sigma**2) loglike if loglike < float(-1.7876931348623157e+308): return float(-1.7876931348623157e+308) return loglike #z_at = pymc.Normal('z_at',mu=z_model_axial_t,tau = 1. / self.axial_trace_limb_sigma,value=self.axial_trace_likelihoods_limb) self.model1.update({'z_at': z_at, 'at_sigma': at_sigma}) self.setup = True self.mcmc_uptodate = False return True
import spacepy.plot as spp # for the styles import numpy as np import pymc as pm K = 2 # number of topics V = 4 # number of words D = 3 # number of documents data = np.array([[1, 1, 1, 1], [1, 1, 1, 1], [0, 0, 0, 0]]) alpha = np.ones(K) beta = np.ones(V) theta = pm.Container([ pm.CompletedDirichlet("theta_%s" % i, pm.Dirichlet("ptheta_%s" % i, theta=alpha)) for i in range(D) ]) phi = pm.Container([ pm.CompletedDirichlet("phi_%s" % k, pm.Dirichlet("pphi_%s" % k, theta=beta)) for k in range(K) ]) Wd = [len(doc) for doc in data] z = pm.Container([ pm.Categorical('z_%i' % d, p=theta[d], size=Wd[d], value=np.random.randint(K, size=Wd[d])) for d in range(D) ])
nr_assoc_word = numpy_array(word_in_dict, list_lists) print(nr_assoc_word) nr_doc = len(nr_assoc_word) nr_words_doc = [len(doc) for doc in nr_assoc_word] nr_words = len(word_in_dict) nr_topics = 3 alpha = np.ones(nr_topics) beta = np.ones(nr_words) theta = pm.Container([ pm.CompletedDirichlet("theta_%s" % i, pm.Dirichlet("theta1_%s" % i, theta=alpha)) for i in range(nr_doc) ]) for d in range(nr_doc): print(theta[d].value) phi = pm.Container([ pm.CompletedDirichlet("phi_%s" % j, pm.Dirichlet("phi1_%s" % j, theta=beta)) for j in range(nr_topics) ]) for i in range(nr_topics): print(phi[i].value)
def setup_inference_mixture(self): #depending on the number of wavelengths wavelength_number = len(self.wavelengths) l = [] i = 0 #add c0 t = 1. / 5.**2 mu_ = np.mean(self.ydata) l.append(pymc.Normal("c_%i" % (i), mu=mu_, tau=t)) i += 1 for x in range(wavelength_number): for _ in range(2 * self.N): t = 1. / 5.**2 mu_ = 0 l.append(pymc.Normal("c_%i" % (i), mu=mu_, tau=t)) i += 1 C = pymc.Container(l) #\ #for i in range(1+2*self.N) for x in range(wavelength_number)]) #C[0] i_ = pymc.Container([ pymc.DiscreteUniform('i_%i' % i, lower=0, upper=1) for i in range(len(self.xdata)) ]) @pymc.stochastic(observed=False) def sigma(value=1): return -np.log(abs(value)) @pymc.stochastic(observed=False) def sigma3(value=1): return -np.log(abs(value)) qw_sigs = pymc.Container([pymc.HalfCauchy("qw_sigs_%i" % x, beta = 10, alpha=1) \ for x in range(wavelength_number)]) if self.wavelength_sd_defined: qw = pymc.Container([pymc.distributions.Lognormal('qw_%i' %x,mu=self.wavelengths[x], \ tau = 1. / self.wavelength_sd[x] ** 2) \ for x in range(wavelength_number)]) else: qw = pymc.Container([pymc.distributions.Normal('qw_%i' %x,mu=self.wavelengths[x],\ tau = 1. / self.wavelengths[x]/3.) \ for x in range(wavelength_number)]) def fourier_series(C, N, QW, x, wavelength_number, i_): v = np.array(x) v.fill(0.0) v = v.astype('float') for ii in range(len(x)): v[ii] += C[0] for w in range(wavelength_number): for i in range(1, N + 1): v[ii] = v[ii] + C[(2*i-1)+2*N*w]*np.cos(2*np.pi/QW[w] * i * (x[ii])) + \ C[(2*i)+2*N*w]*np.sin(2*np.pi/QW[w] * i * (x[ii])) if i_[ii] == 0: v[ii] = -v[ii] return v self.vector_fourier_series = np.vectorize(fourier_series) # Define the form of the model and likelihood @pymc.deterministic def y_model(C=C, x=self.xdata, qw=qw, nn=self.N, wavelength_number=wavelength_number, i_=i_): return fourier_series(C, nn, qw, x, wavelength_number, i_) y = pymc.Normal('y', mu=y_model, tau=1. / sigma**2, observed=True, value=self.ydata) # package the full model in a dictionary self.model1 = dict(C=C, qw=qw, sigma=sigma, qw_sigs=qw_sigs, y_model=y_model, y=y, x_values=self.xdata, y_values=self.ydata, i_=i_) self.model_e = pymc.Model([C, qw, sigma, y]) self.setup = True self.mcmc_uptodate = False return True
def main(RUNFLAG, outname): print('Setting up parameters and priors...') params = Params() # Set up location here with command line arguments in a list. params.cmd_line_chg(['--kalbar']) assert params.site_name + 'fields.txt' == 'data/kalbarfields.txt' # Set parameters specific to Bayesian runs params.PLOT = False params.OUTPUT = False # This sends a message to CalcSol on whether or not to use CUDA if params.CUDA: globalvars.cuda = True else: globalvars.cuda = False # get wind data and day labels wind_data, days = PM.get_wind_data(*params.get_wind_params()) params.ndays = len(days) # reduce domain params.domain_info = (10000.0, 200) #50 m sided cells domain_res = params.domain_info[0] / params.domain_info[1] cell_area = domain_res**2 locinfo = LocInfo(params.dataset, params.coord, params.domain_info) prior_eps = {} #### Model priors #### lam = pm.Beta("lam", 5, 1, value=0.95) prior_eps[lam] = 0.01 f_a1 = pm.TruncatedNormal("f_a1", 6, 0.3, 0, 9, value=6) prior_eps[f_a1] = 0.1 f_a2 = pm.TruncatedNormal("f_a2", 20, 0.3, 15, 24, value=20) prior_eps[f_a2] = 0.1 f_b1_p = pm.Gamma("fb1_p", 2, 1, value=1.5, trace=False, plot=False) #alpha,beta parameterization prior_eps[f_b1_p] = 0.05 @pm.deterministic(trace=True, plot=True) def f_b1(f_b1_p=f_b1_p): return f_b1_p + 1 f_b2_p = pm.Gamma("fb2_p", 2, 1, value=1.5, trace=False, plot=False) prior_eps[f_b2_p] = 0.05 @pm.deterministic(trace=True, plot=True) def f_b2(f_b2_p=f_b2_p): return f_b2_p + 1 g_aw = pm.Gamma("g_aw", 2.2, 1, value=1.0) prior_eps[g_aw] = 0.05 g_bw = pm.Gamma("g_bw", 5, 1, value=3.8) prior_eps[g_bw] = 0.1 # flight diffusion parameters. note: mean is average over flight advection sig_x = pm.Gamma("sig_x", 26, 0.15, value=180) prior_eps[sig_x] = 1 sig_y = pm.Gamma("sig_y", 15, 0.15, value=150) prior_eps[sig_y] = 1 corr_p = pm.Beta("corr_p", 5, 5, value=0.5, trace=False, plot=False) prior_eps[corr_p] = 0.01 @pm.deterministic(trace=True, plot=True) def corr(corr_p=corr_p): return corr_p * 2 - 1 # local spread paramters sig_x_l = pm.Gamma("sig_xl", 2, 0.08, value=10) prior_eps[sig_x_l] = 1 sig_y_l = pm.Gamma("sig_yl", 2, 0.14, value=10) prior_eps[sig_y_l] = 1 corr_l_p = pm.Beta("corr_l_p", 5, 5, value=0.5, trace=False, plot=False) prior_eps[corr_l_p] = 0.005 @pm.deterministic(trace=True, plot=True) def corr_l(corr_l_p=corr_l_p): return corr_l_p * 2 - 1 #pymc.MAP can only take float values, so we vary mu_r and set n_periods. mu_r = pm.Normal("mu_r", 1., 1, value=1) prior_eps[mu_r] = 0.05 params.n_periods = 30 #alpha_pow = prev. time exponent in ParasitoidModel.h_flight_prob xi = pm.Gamma("xi", 1, 1, value=0.75) # presence to oviposition/emergence factor prior_eps[xi] = 0.05 #### Observation probabilities. #### em_obs_prob = pm.Beta("em_obs_prob", 1, 1, value=0.05) # per-wasp prob of # observing emergence in release field grid given max leaf collection. # This is dependent on the size of the cell surrounding the grid point, # but there's not much to be done about this. Just remember to # interpret this number based on grid coarseness. prior_eps[em_obs_prob] = 0.0005 grid_obs_prob = pm.Beta("grid_obs_prob", 1, 1, value=0.005) # probability of # observing a wasp present in the grid cell given max leaf sampling prior_eps[grid_obs_prob] = 0.0005 #card_obs_prob = pm.Beta("card_obs_prob",1,1,value=0.5) # probability of # observing a wasp present in the grid cell given max leaf sampling #### Data collection model background for sentinel fields #### # Need to fix linear units for area. Meters would be best. # Effective collection area (constant between fields) is very uncertain with warnings.catch_warnings(): # squelsh a warning based on pymc coding we don't need to worry about warnings.simplefilter("ignore", RuntimeWarning) A_collected = pm.TruncatedNormal("A_collected", 2500, 1 / 2500, 0, min(locinfo.field_sizes.values()) * cell_area, value=2500) # in m**2 prior_eps[A_collected] = 10 # Each field has its own binomial probability. # Probabilities are likely to be small, and pm.Beta cannot handle small # parameter values. So we will use TruncatedNormal again. N = len(locinfo.sent_ids) sent_obs_probs = np.empty(N, dtype=object) # fix beta for the Beta distribution sent_beta = 40 # mean of Beta distribution will be A_collected/field size ## Loop over fields ## for n, key in enumerate(locinfo.sent_ids): sent_obs_probs[n] = pm.Beta( "sent_obs_probs_{}".format(key), A_collected / (locinfo.field_sizes[key] * cell_area) * sent_beta / (1 - A_collected / (locinfo.field_sizes[key] * cell_area)), sent_beta, value=0.1 * 3600 / (locinfo.field_sizes[key] * cell_area)) prior_eps[sent_obs_probs[n]] = 0.0005 sent_obs_probs = pm.Container(sent_obs_probs) #### Collect variables #### params_ary = pm.Container( np.array([ g_aw, g_bw, f_a1, f_b1, f_a2, f_b2, sig_x, sig_y, corr, sig_x_l, sig_y_l, corr_l, lam, mu_r ], dtype=object)) if params.dataset == 'kalbar': # factor for kalbar initial spread sprd_factor = pm.Uniform("sprd_factor", 0, 1, value=0.1) prior_eps[sprd_factor] = 0.01 else: sprd_factor = None print('Getting initial model values...') #### Run model #### @pm.deterministic(plot=False, trace=False) def pop_model(params=params, params_ary=params_ary, locinfo=locinfo, wind_data=wind_data, days=days, sprd_factor=sprd_factor): '''This function acts as an interface between PyMC and the model. Not only does it run the model, but it provides an emergence potential based on the population model result projected forward from feasible oviposition dates. To modify how this projection happens, edit popdensity_to_emergence. Returned values from this function should be nearly ready to compare to data. ''' modeltic = time.time() ### Alter params with stochastic variables ### # g wind function parameters params.g_params = tuple(params_ary[0:2]) # f time of day function parameters params.f_params = tuple(params_ary[2:6]) # Diffusion coefficients params.Dparams = tuple(params_ary[6:9]) params.Dlparams = tuple(params_ary[9:12]) # Probability of any flight during the day under ideal circumstances params.lam = params_ary[12] # scaling flight advection to wind advection params.mu_r = params_ary[13] ### PHASE ONE ### # First, get spread probability for each day as a coo sparse matrix max_shape = np.array([0, 0]) pm_args = [(days[0], wind_data, *params.get_model_params(), params.r_start)] pm_args.extend([(day, wind_data, *params.get_model_params()) for day in days[1:params.ndays]]) ##### Kalbar wind started recording a day late. Spread the population ##### locally before running full model. if params.dataset == 'kalbar': res = params.domain_info[0] / params.domain_info[1] mean_drift = np.array([-25., 15.]) xdrift_int = int(mean_drift[0] // res) xdrift_r = mean_drift[0] % res ydrift_int = int(mean_drift[1] // res) ydrift_r = mean_drift[1] % res longsprd = PM.get_mvn_cdf_values( res, np.array([xdrift_r, ydrift_r]), PM.Dmat(params_ary[6], params_ary[7], params_ary[8])) shrtsprd = PM.get_mvn_cdf_values( res, np.array([0., 0.]), PM.Dmat(params_ary[9], params_ary[10], params_ary[11])) mlen = int( max(longsprd.shape[0], shrtsprd.shape[0]) + max(abs(xdrift_int), abs(ydrift_int)) * 2) sprd = np.zeros((mlen, mlen)) lbds = [ int(mlen // 2 - longsprd.shape[0] // 2), int(mlen // 2 + longsprd.shape[0] // 2 + 1) ] sprd[lbds[0] - ydrift_int:lbds[1] - ydrift_int, lbds[0] + xdrift_int:lbds[1] + xdrift_int] = longsprd * sprd_factor sbds = [ int(mlen // 2 - shrtsprd.shape[0] // 2), int(mlen // 2 + shrtsprd.shape[0] // 2 + 1) ] sprd[sbds[0]:sbds[1], sbds[0]:sbds[1]] += shrtsprd * (1 - sprd_factor) ''' pmf_list = [sparse.coo_matrix(PM.get_mvn_cdf_values( params.domain_info[0]/params.domain_info[1], np.array([0.,0.]), PM.Dmat(sprd_factor*params_ary[9], sprd_factor*params_ary[10],params_ary[11])))] ''' sprd[int(sprd.shape[0] // 2), int(sprd.shape[0] // 2)] += max(0, 1 - sprd.sum()) pmf_list = [sparse.coo_matrix(sprd)] else: pmf_list = [] ###################### Get pmf_list from multiprocessing pmf_list.extend(pool.starmap(PM.prob_mass, pm_args)) ###################### for pmf in pmf_list: for dim in range(2): if pmf.shape[dim] > max_shape[dim]: max_shape[dim] = pmf.shape[dim] r_spread = [] # holds the one-day spread for each release day. # Reshape the prob. mass function of each release day into solution form for ii in range(params.r_dur): offset = params.domain_info[1] - pmf_list[ii].shape[0] // 2 dom_len = params.domain_info[1] * 2 + 1 r_spread.append( sparse.coo_matrix( (pmf_list[ii].data, (pmf_list[ii].row + offset, pmf_list[ii].col + offset)), shape=(dom_len, dom_len)).tocsr()) ### PHASE TWO ### # Pass the probability list, pmf_list, and other info to convolution solver. # This will return the finished population model. with Capturing() as output: if params.dataset == 'kalbar': # extend day count by one days_ext = [days[0] - 1] days_ext.extend(days) modelsol = get_populations(r_spread, pmf_list, days_ext, params.ndays + 1, dom_len, max_shape, params.r_dur, params.r_number, params.r_mthd()) # remove the first one and start where wind started. modelsol = modelsol[1:] else: modelsol = get_populations(r_spread, pmf_list, days, params.ndays, dom_len, max_shape, params.r_dur, params.r_number, params.r_mthd()) # modelsol now holds the model results for this run as CSR sparse arrays # get emergence potential (measured in expected number of wasps previously # present whose oviposition would result in emergence on the given date) # from the model result release_emerg, sentinel_emerg = popdensity_to_emergence( modelsol, locinfo) # get the expected wasp populations at grid points on sample days grid_counts = popdensity_grid(modelsol, locinfo) # get the expected wasp populations in cardinal directions '''card_counts = popdensity_card(modelsol,locinfo,params.domain_info)''' ## For the lists release_emerg and sentinel_emerg: ## Each list entry corresponds to a data collection day (one array) ## In each array: ## Each column corresponds to an emergence observation day (as in data) ## Each row corresponds to a grid point or sentinel field, respectively ## For the array grid_counts: ## Each column corresponds to an observation day ## Each row corresponds to a grid point ## For the list card_counts: ## Each list entry corresponds to a sampling day (one array) ## Each column corresponds to a step in a cardinal direction ## Each row corresponds to a cardinal direction print('{:03.1f} sec./model at {}'.format( time.time() - modeltic, time.strftime("%H:%M:%S %d/%m/%Y")), end='\r') sys.stdout.flush() return (release_emerg, sentinel_emerg, grid_counts) #,card_counts) print('Parsing model output and connecting to Bayesian model...') ### Parse the results of pop_model into separate deterministic variables ### '''Get Poisson probabilities for sentinal field emergence. Parameters: xi is constant, emerg is a list of ndarrays, betas is a 1D array of field probabilities''' Ncollections = len(locinfo.sent_DataFrames) sent_poi_rates = [] for ii in range(Ncollections): s_ndays = len(locinfo.sent_DataFrames[ii]['datePR'].unique()) sent_poi_rates.append( pm.Lambda('sent_poi_rate_{}'.format(ii), lambda xi=xi, ndays=s_ndays, betas=sent_obs_probs, emerg_model=pop_model[1][ii]: xi * emerg_model * np.tile( betas, (ndays, 1)).T, trace=False)) sent_poi_rates = pm.Container(sent_poi_rates) '''Return Poisson probabilities for release field grid emergence. Parameters: xi is constant, emerg is a list of ndarrays. collection effort is specified in locinfo.''' Ncollections = len(locinfo.release_DataFrames) rel_poi_rates = [] for ii in range(Ncollections): r_effort = locinfo.release_collection[ii] #fraction of max collection r_ndays = len(locinfo.release_DataFrames[ii]['datePR'].unique()) rel_poi_rates.append( pm.Lambda('rel_poi_rate_{}'.format(ii), lambda xi=xi, ndays=r_ndays, r_effort=r_effort, beta= em_obs_prob, emerg_model=pop_model[0][ii]: xi * emerg_model * np.tile(r_effort * beta, (ndays, 1)).T, trace=False)) rel_poi_rates = pm.Container(rel_poi_rates) @pm.deterministic(plot=False, trace=False) def grid_poi_rates(locinfo=locinfo, beta=grid_obs_prob, obs_model=pop_model[2]): '''Return Poisson probabilities for grid sampling obs_model is an ndarray, sampling effort is specified in locinfo.''' return beta * locinfo.grid_samples * obs_model '''Return Poisson probabilities for cardinal direction sampling obs_model is a list of ndarrays, sampling effort is assumed constant''' ''' card_poi_rates = [] for ii,obs in enumerate(pop_model[3]): card_poi_rates.append(pm.Lambda('card_poi_rate_{}'.format(ii), lambda beta=card_obs_prob, obs=obs: beta*obs)) card_poi_rates = pm.Container(card_poi_rates) ''' # Given the expected wasp densities from pop_model, actual wasp densities # are modeled as a thinned Poisson random variable about that mean. # Each wasp in the area then has a small probability of being seen. ### Connect sentinel emergence data to model ### N_sent_collections = len(locinfo.sent_DataFrames) # Create list of collection variables sent_collections = [] for ii in range(N_sent_collections): # Apparently, pymc does not play well with 2D array parameters sent_collections.append( np.empty(sent_poi_rates[ii].value.shape, dtype=object)) for n in range(sent_collections[ii].shape[0]): for m in range(sent_collections[ii].shape[1]): sent_collections[ii][n, m] = pm.Poisson( "sent_em_obs_{}_{}_{}".format(ii, n, m), sent_poi_rates[ii][n, m], value=float(locinfo.sentinel_emerg[ii][n, m]), observed=True) sent_collections = pm.Container(sent_collections) ### Connect release-field emergence data to model ### N_release_collections = len(locinfo.release_DataFrames) # Create list of collection variables rel_collections = [] for ii in range(N_release_collections): rel_collections.append( np.empty(rel_poi_rates[ii].value.shape, dtype=object)) for n in range(rel_collections[ii].shape[0]): for m in range(rel_collections[ii].shape[1]): rel_collections[ii][n, m] = pm.Poisson( "rel_em_obs_{}_{}_{}".format(ii, n, m), rel_poi_rates[ii][n, m], value=float(locinfo.release_emerg[ii][n, m]), observed=True) rel_collections = pm.Container(rel_collections) ### Connect grid sampling data to model ### grid_obs = np.empty(grid_poi_rates.value.shape, dtype=object) for n in range(grid_obs.shape[0]): for m in range(grid_obs.shape[1]): grid_obs[n, m] = pm.Poisson("grid_obs_{}_{}".format(n, m), grid_poi_rates[n, m], value=float(locinfo.grid_obs[n, m]), observed=True) grid_obs = pm.Container(grid_obs) ### Connect cardinal direction data to model ### ''' N_card_collections = len(locinfo.card_obs_DataFrames) # Create list of sampling variables card_collections = [] for ii in range(N_card_collections): card_collections.append(np.empty(card_poi_rates[ii].value.shape, dtype=object)) for n in range(card_collections[ii].shape[0]): for m in range(card_collections[ii].shape[1]): card_collections[ii][n,m] = pm.Poisson( "card_obs_{}_{}_{}".format(ii,n,m), card_poi_rates[ii][n,m], value=locinfo.card_obs[ii][n,m], observed=True, plot=False) card_collections = pm.Container(card_collections) ''' ### Collect model ### if params.dataset == 'kalbar': Bayes_model = pm.Model([ lam, f_a1, f_a2, f_b1_p, f_b2_p, f_b1, f_b2, g_aw, g_bw, sig_x, sig_y, corr_p, corr, sig_x_l, sig_y_l, corr_l_p, corr_l, mu_r, sprd_factor, grid_obs_prob, xi, em_obs_prob, A_collected, sent_obs_probs, params_ary, pop_model, grid_poi_rates, rel_poi_rates, sent_poi_rates, grid_obs, rel_collections, sent_collections ]) else: Bayes_model = pm.Model([ lam, f_a1, f_a2, f_b1_p, f_b2_p, f_b1, f_b2, g_aw, g_bw, sig_x, sig_y, corr_p, corr, sig_x_l, sig_y_l, corr_l_p, corr_l, mu_r, grid_obs_prob, xi, em_obs_prob, A_collected, sent_obs_probs, params_ary, pop_model, grid_poi_rates, rel_poi_rates, sent_poi_rates, grid_obs, rel_collections, sent_collections ]) ###################################################################### ##### Run Methods and Interactive Menu ##### ###################################################################### def MAP_run(outname=None): '''Find Maximum a posteriori distribution''' tic = time.time() M = pm.MAP(Bayes_model, prior_eps) print('Fitting....') M.fit() # Return statistics print('Estimate complete. Time elapsed: {}'.format(time.time() - tic)) print('Free stochastic variables: {}'.format(M.len)) print('Joint log-probability of model: {}'.format(M.logp)) print('Max joint log-probability of model: {}'.format(M.logp_at_max)) print('Maximum log-likelihood: {}'.format(M.lnL)) print("Akaike's Information Criterion {}".format(M.AIC), flush=True) print('---------------Variable estimates---------------') for var in Bayes_model.stochastics: print('{} = {}'.format(var, var.value)) # Save result to file if outname is None: outname = 'Max_aPosteriori_Estimate.txt' with open(outname, 'w') as fobj: fobj.write('Time elapsed: {}\n'.format(time.time() - tic)) fobj.write('Free stochastic variables: {}\n'.format(M.len)) fobj.write('Joint log-probability of model: {}\n'.format(M.logp)) fobj.write('Max joint log-probability of model: {}\n'.format( M.logp_at_max)) fobj.write('Maximum log-likelihood: {}\n'.format(M.lnL)) fobj.write("Akaike's Information Criterion {}\n".format(M.AIC)) fobj.write('---------------Variable estimates---------------\n') for var in Bayes_model.stochastics: fobj.write('{} = {}\n'.format(var, var.value)) print('Result saved to {}.'.format(outname)) return M def norm_run(fname, outname=None): '''Find normal approximation''' try: tic = time.time() M = pm.NormApprox(Bayes_model, eps=prior_eps, db='hdf5', dbname=fname, dbmode='a', dbcomplevel=0) print('Fitting....') M.fit() # Return statistics print('Estimate complete. Time elapsed: {}'.format(time.time() - tic)) print('Free stochastic variables: {}'.format(M.len)) print('Joint log-probability of model: {}'.format(M.logp)) print('Max joint log-probability of model: {}'.format( M.logp_at_max)) print("Akaike's Information Criterion {}".format(M.AIC), flush=True) print('---------------Variable estimates---------------') print('Estimated means: ') for var in bio_model.stochastics: print('{} = {}'.format(var, M.mu[var])) print('Estimated variances: ') for var in bio_model.stochastics: print('{} = {}'.format(var, M.C[var])) # Save result to file if outname is None: outname = "Normal_approx.txt" with open(outname, 'w') as fobj: fobj.write('Time elapsed: {}\n'.format(time.time() - tic)) fobj.write('Free stochastic variables: {}\n'.format(M.len)) fobj.write('Joint log-probability of model: {}\n'.format( M.logp)) fobj.write('Max joint log-probability of model: {}\n'.format( M.logp_at_max)) fobj.write("Akaike's Information Criterion {}\n".format(M.AIC)) fobj.write( '---------------Variable estimates---------------\n') fobj.write('Estimated means: \n') for var in bio_model.stochastics: fobj.write('{} = {}\n'.format(var, M.mu[var])) fobj.write('Estimated variances: \n') for var in bio_model.stochastics: fobj.write('{} = {}\n'.format(var, M.C[var])) print('These results have been saved to {}.'.format(outname)) except Exception as e: print(e) print('Exception: database closing...') M.db.close() print('Database closed.') raise return M # Parse run type if RUNFLAG == 'MAP_RUN': M = MAP_run(outname) elif RUNFLAG is not None: M = norm_run(RUNFLAG, outname) M.db.close() else: print( '----- Maximum a posteriori estimates & Normal approximations -----' ) while True: print(" 'map': Calculate maximum a posteriori estimate") print("'norm': Calculate normal approximation") print("'quit': Quit.") cmd = input('Enter: ') cmd = cmd.strip() cmd = cmd.lower() if cmd == 'map': M = MAP_run(outname) # Option to enter IPython cmd_py = input('Enter IPython y/[n]:') cmd_py = cmd_py.strip() cmd_py = cmd_py.lower() if cmd_py == 'y' or cmd_py == 'yes': import IPython IPython.embed() elif cmd == 'norm': fname = input("Enter database name or 'back' to cancel:") fname = fname.strip() if fname == 'q' or fname == 'quit': return elif fname == 'b' or fname == 'back': continue elif fname[-3:] != '.h5': fname = fname + '.h5' M = norm_run(fname, outname) try: print( 'For covariances, enter IPython and request a covariance' + ' matrix by passing variables in the following syntax:\n' + 'M.C[var1,var2,...,varn]\n' + 'Example: M.C[f_a1,f_a2] gives the covariance matrix of\n' + ' f_a1 and f_a2.') # Option to enter IPython cmd_py = input('Enter IPython y/[n]:') cmd_py = cmd_py.strip() cmd_py = cmd_py.lower() if cmd_py == 'y' or cmd_py == 'yes': import IPython IPython.embed() M.db.close() print('Database closed.') except Exception as e: print(e) print('Exception: database closing...') M.db.close() print('Database closed.') raise elif cmd == 'quit' or cmd == 'q': return else: print('Command not recognized.')