def pymc_linear_fit(data1, data2, data1err=None, data2err=None, print_results=False, intercept=True, nsample=5000, burn=1000, thin=10, return_MC=False, guess=None, ignore_nans=True, progress_bar=True): import numpy as np old_errsettings = np.geterr() import pymc # pymc breaks np error settings np.seterr(**old_errsettings) if ignore_nans: data1,data2,data1err,data2err = remove_nans(data1,data2,data1err,data2err) if guess is None: guess = (0,0) xmu = pymc.distributions.Uninformative(name='x_observed',value=0) if data1err is None: xdata = pymc.distributions.Normal('x', mu=xmu, observed=True, value=data1, tau=1, trace=False) else: xtau = pymc.distributions.Uninformative(name='x_tau', value=1.0/data1err**2, observed=True, trace=False) xdata = pymc.distributions.Normal('x', mu=xmu, observed=True, value=data1, tau=xtau, trace=False) d={'slope':pymc.distributions.Uninformative(name='slope', value=guess[0]), } if intercept: d['intercept'] = pymc.distributions.Uninformative(name='intercept', value=guess[1]) @pymc.deterministic(trace=False) def model(x=xdata,slope=d['slope'],intercept=d['intercept']): return x*slope+intercept else: @pymc.deterministic(trace=False) def model(x=xdata,slope=d['slope']): return x*slope d['f'] = model if data2err is None: ydata = pymc.distributions.Normal('y', mu=model, observed=True, value=data2, tau=1, trace=False) else: ytau = pymc.distributions.Uninformative(name='y_tau', value=1.0/data2err**2, observed=True, trace=False) ydata = pymc.distributions.Normal('y', mu=model, observed=True, value=data2, tau=ytau, trace=False) d['y'] = ydata MC = pymc.MCMC(d) MC.sample(nsample,burn=burn,thin=thin,progress_bar=progress_bar) MCs = MC.stats() m,em = MCs['slope']['mean'],MCs['slope']['standard deviation'] if intercept: b,eb = MCs['intercept']['mean'],MCs['intercept']['standard deviation'] if print_results: print "MCMC Best fit y = %g x" % (m), if intercept: print " + %g" % (b) else: print "" print "m = %g +/- %g" % (m,em) if intercept: print "b = %g +/- %g" % (b,eb) print "Chi^2 = %g, N = %i" % (((data2-(data1*m))**2).sum(),data1.shape[0]-1) if return_MC: return MC if intercept: return m,b else: return m
mut_region_length = config.getint('Input','mut_region_length') expname = config.get('Input','expname') #ex MscS mut1, describes the experiment without the different batch numbers. # file to save trace to is passed as command line argument # also, the config file should be passed as cli. This enables some # rudimentary error checking that everything is consistent. parser = argparse.ArgumentParser() parser.add_argument('runnum',help='Filename of database file to save traces to.') parser.add_argument('savefn') #parser.add_argument('cfg_fn',help='Filename pymc config file for the run.') args = parser.parse_args() # make sure the generic_energy_matrix module and this script are # looking at the same data # cli_cfg = os.path.abspath(args.cfg_fn) # module_cfg = os.path.abspath(generic_energy_matrix.cfg_fn) # if cli_cfg != module_cfg: # raise ValueError("generic_energy_matrix module %s and script cli %s are not consistent" % (module_cfg,cli_cfg)) #print args.db_fn fulldbname = '/home/wireland/mscS4-8-15/results/' + str(args.savefn) + str(args.runnum) + '.sql' #fulldbname = '/home/wireland/lassoresults/MCMC/MCMCtest3_' + str(args.runnum) + '.sql' M = pymc.MCMC(generic_energy_matrix_nonunique,db='sqlite',dbname=fulldbname) M.use_step_method(stepper.GaugePreservingStepper,generic_energy_matrix_nonunique.emat) f = open('/home/wireland/mscS4-8-15/runsdetails/' + str(args.savefn) + str(args.runnum) + '.txt','w') f.writelines(['dbname = ' + fulldbname + '\n', 'mut_region_start = ' + str(mut_region_start) + '\n','mut_region_length = ' + str(mut_region_length) + '\n', 'exp_name = ' + str(expname)]) f.close() M.sample(30000,thin=10)
# return p.evaluate(a) @pm.stochastic(dtype=float, observed=True) def loglike(value=1.0, fg=fg, gamma=gamma): f = fg[0] g = fg[1:] return gamma * 20. * f + gamma * (min(0., g[0])) # return gamma * f + \ # np.sum(np.log(1.0 / (1.0 + np.exp(-gamma*10. * g)))) return locals() if __name__ == '__main__': model = make_model() mcmc = pm.MCMC(model) mcmc.use_step_method(ps.RandomWalk, model['a'], proposal_sd=0.002) smc = SteadyPaceSMC(mcmc, num_particles=400, num_mcmc=5, verbose=4, gamma_is_an_exponent=True, ess_reduction=0.9, adapt_proposal_step=True, mpi=mpi) smc.initialize(0.1) results = [] for gamma in np.linspace(0., 20, 200)[1:]: smc.move_to(gamma) pa = smc.get_particle_approximation().gather() if mpi.COMM_WORLD.Get_rank() == 0:
Tleaf=df["Tleaf"], Jmax=Jmax, Vcmax=Vcmax, Rd=Rd) return An y = pymc.Normal('y', mu=farquhar_wrapper, tau=1.0 / obs_sigma**2, value=obs, observed=True) N = 100000 model = pymc.Model([y, df, Vcmax, Jmax, Rd]) M = pymc.MCMC(model) M.sample(iter=N, burn=N * 0.1, thin=10) Vcmax = M.stats()["Vcmax"]['mean'] Jmax = M.stats()["Jmax"]['mean'] Rd = M.stats()["Rd"]['mean'] (An, Anc, Anj) = F.calc_photosynthesis(Ci=df["Ci"], Tleaf=df["Tleaf"], Jmax=Jmax, Vcmax=Vcmax, Rd=Rd) rmse = np.sqrt(((obs - An)**2).mean(0)) print("RMSE: %.4f" % (rmse)) # Get the fits Vcmax = M.trace('Vcmax').gettrace()
analysis_power = observed_power_spectrum[fftfreq >= 0][1:-1] # get a simple estimate of the power law index estimate = rn_utils.do_simple_fit(analysis_frequencies, analysis_power) c_estimate = estimate[0] m_estimate = estimate[1] # plot the power law spectrum power_fit = c_estimate * analysis_frequencies**(-m_estimate) # Define the MCMC model this_model = pymcmodels.single_power_law(analysis_frequencies, analysis_power, m_estimate) # Set up the MCMC model M1 = pymc.MCMC(this_model) # Run the sampler M1.sample(iter=50000, burn=1000, thin=10, progress_bar=False) # Get the power law index and save the results pli = M1.trace("power_law_index")[:] s_pli = rn_utils.summary_stats(pli, 0.05, bins=40) pi95 = rn_utils.credible_interval(pli, ci=0.95) cli = M1.trace("power_law_norm")[:] s_cli = rn_utils.summary_stats(cli, 0.05, bins=40) ci95 = rn_utils.credible_interval(cli, ci=0.95) bayes_mean_fit = np.exp(s_cli["mean"]) * (analysis_frequencies** -s_pli["mean"])
def setUpClass(self): self.S = pymc.MCMC(disaster_model, db='pickle', dbname=os.path.join(testdir, 'Disaster.pickle'), dbmode='w') self.S.use_step_method(pymc.Metropolis, self.S.early_mean, tally=True)
def setUpClass(self): self.S = pymc.MCMC(disaster_model, db='base')
alpha_of_beta_after_map, beta_of_beta_after_map = alpha_of_beta.value, beta_of_beta.value alpha_of_beta_after_map, beta_of_beta_after_map # In[34]: a_list = [] b_list = [] alpha_of_beta.value, beta_of_beta.value = alpha_of_beta.random( ), beta_of_beta.random() alpha_of_beta_random, beta_of_beta_random = alpha_of_beta.value, beta_of_beta.value a_list.append(alpha_of_beta_random) b_list.append(beta_of_beta_random) print alpha_of_beta_random, beta_of_beta_random bin_beta_MCMC = mc.MCMC(bin_beta_model) # In[35]: bin_beta_MCMC.sample(iter=40000, burn=20000, thin=1) alpha_of_beta_after_mcmc, beta_of_beta_after_mcmc = alpha_of_beta.value, beta_of_beta.value a_list.append(alpha_of_beta_after_mcmc) b_list.append(beta_of_beta_after_mcmc) alpha_of_beta_after_mcmc, beta_of_beta_after_mcmc # In[36]: mc.Matplot.plot(bin_beta_MCMC)
y = pymc.Normal('y', mu=y_model, tau=1.0 / (sigma**2.0), value=ydata, observed=True) DelayLin_model = dict(kappa=kappa, tau=tau, Omega=Omega, sigma=sigma, y_model=y_model, y=y) MDL = pymc.MCMC(DelayLin_model) MDL.sample(niter, burn) kappa_trace = MDL.trace('kappa')[:] tau_trace = MDL.trace('tau')[:] Omega_trace = MDL.trace('Omega')[:] kappa_est = np.median(kappa_trace) tau_est = np.median(tau_trace) Omega_est = np.median(Omega_trace) a_est = ((2 * np.pi) * Omega_est / np.tan( (2 * np.pi) * Omega_est * tau_est)) - epsilon b_est = -(2 * np.pi) * Omega_est / np.sin((2 * np.pi) * Omega_est * tau_est) Power_est = func_DelayLin(w=w, kappa=kappa_est, a=a_est, b=b_est, tau=tau_est)
import pymc import numpy as np import matplotlib.pyplot as plt #---------------------------- Run Time Params --------------------------------# # Probably going to try and use - flag conventions with __init__(self, *args, **kwargs) #---------------------------- Load Data --------------------------------------# data_matrix = np.loadtxt("../data/Y_alpha0.1_K5_N20.txt", delimiter=',') num_people = 20 num_groups = 5 alpha = np.ones(num_groups).ravel() * 0.1 #B = np.eye(num_groups)*0.85 #B = B + np.random.random(size=[num_groups,num_groups])*0.1 B = np.eye(num_groups) * 0.8 B = B + np.ones([num_groups, num_groups]) * 0.2 - np.eye(num_groups) * 0.2 #---------------------------- Setup Model -----------------------------------# raw_model = model.create_model(data_matrix, num_people, num_groups, alpha, B) #model_instance = pymc.Model(raw_model) #---------------------------- Call MAP to initialize MCMC -------------------# #pymc.MAP(model_instance).fit(method='fmin_powell') print '---------- Finished Running MAP to Set MCMC Initial Values ----------' #---------------------------- Run MCMC --------------------------------------# print '--------------------------- Starting MCMC ---------------------------' M = pymc.MCMC(raw_model) M.sample(100000, 50000, thin=5, verbose=0)
def main(): print('Starting MCMC simulation and fit.') global params # Check if path to configfile is provided and if file exists. if len(sys.argv) > 1: if os.path.isfile(sys.argv[1]) == True: params['NAME_CONFIGFILE_TEMPLATE'] = sys.argv[1] params['RUN_SCRIPT'] = 'RUN_HELPER.sh' else: print('* ERROR:', sys.argv[1], 'does not exist.') if len(sys.argv) > 2: if os.path.isfile(sys.argv[2]) == True: params['RUN_SCRIPT'] = sys.argv[2] else: print('* ERROR: Optional run script', sys.argv[2], 'does not exist.') print('Aborting.') exit() else: print('* ERROR: No command line argument for configfile provided.') if params['NAME_CONFIGFILE_TEMPLATE'] == '': print('Usage: python3', sys.argv[0], '<PATH/TO/CONFIGFILE> [<PATH/TO/RUN/SCRIPT]') print('Aborting.') exit() name = create_testcase_name(TESTED_VARIABLES, params) db_name = name + '.pickle' parse_pymc_from_config_file(params) sample_iterations = params['ITERATIONS'] sample_burns = params['BURNS'] print('Number of sample iterations: {}.'.format(sample_iterations)) print('Number of sample burns: {}.'.format(sample_burns)) targetValues = [params['T_NORMAL'], params['T_TUMOR'], params['T_VESSEL']] print('Target values: [T_normal, T_tumor, T_vessel]') print('Target values for this dataset: {}.'.format(targetValues)) # Apply MCMC sampler. MDL = pymc.MCMC(fitSimulation(targetValues), db='pickle', dbname=db_name) MDL.sample(iter=sample_iterations, burn=sample_burns) print() # Extract and plot results. temperatures = MDL.stats()['callScaFES']['mean'] lambda_bt = MDL.stats()['lambda_bt']['mean'] pymc.Matplot.plot(MDL) print() print('T_final: [T_normal, T_tumor, T_vessel]') print('T_final:', temperatures) print() print('lambda_bt:', lambda_bt) graph = pymc.graph.graph(MDL) graph.write_png('graph.png') print() print('Number of ScaFES calls:', count) print() T_normal = MDL.trace('callScaFES')[:,0] T_tumor = MDL.trace('callScaFES')[:,1] T_vessel = MDL.trace('callScaFES')[:,2] lambda_bt = MDL.trace('lambda_bt')[:] l2_norm = np.linalg.norm(np.subtract(MDL.trace('callScaFES')[:], targetValues), 2, axis=1) iterations = l2_norm.shape[0] nc_file = create_mcmc_netcdf_file('pymc_' + name + '.nc', iterations) save_vector_to_mcmc_file(nc_file, lambda_bt, 'lambda_bt') save_vector_to_mcmc_file(nc_file, l2_norm, 'L2-norm') save_vector_to_mcmc_file(nc_file, T_normal, 'T_normal') save_vector_to_mcmc_file(nc_file, T_tumor, 'T_tumor') save_vector_to_mcmc_file(nc_file, T_vessel, 'T_vessel') write_ini_file_to_nc_file(nc_file, params['NAME_CONFIGFILE_TEMPLATE']) close_nc_file(nc_file) MDL.db.close() print() print('Done.')
pod = 1.-stats.norm.cdf((np.log(ai)-lmd)/beta) pinrange = stats.norm.cdf(2.0619, loc=ai, scale=sigmae)-\ stats.norm.cdf(1.8243, loc=ai, scale=sigmae) if value == 0: p = (1.-pod)+pod*(1.-pinrange) return np.log(p) else: return np.log(pod*pinrange) def random(ai): pod = 1.-stats.norm.cdf((np.log(ai)-lmd)/beta) pinrange = stats.norm.cdf(2.0619, loc=ai, scale=sigmae)-\ stats.norm.cdf(1.8243, loc=ai, scale=sigmae) if np.random.rand()<=pod: return stats.bernoulli.rvs(p=pinrange,size=1) else: return 0 return locals() M = pymc.MCMC(make_pymc_model(), db='pickle', dbname='soliman_pymc.pickle') # graph = pymc.graph.graph(M) # graph.write_png('soliman2014_pymc.png') # import os # os.system('eog soliman2014_pymc.png') nchain=4; niter=int(1e6); nburn=niter/2; nthin=2 for i in range(nchain): M.sample(iter=niter, burn=nburn, thin=nthin) pymc.Matplot.plot(M) M.db.close()
def pymc_linear_fit_mixture(data1, data2, data1err=None, data2err=None, print_results=False, intercept=True, nsample=5000, burn=1000, thin=10, return_MC=False, guess=None, verbose=0): """ ***NOT TESTED*** ***MAY IGNORE X ERRORS*** Use pymc to fit a line to data with outliers assuming outliers come from a broad, uniform distribution that cover all the data The model doesn't work exactly right; it over-rejects "outliers" because 'bady' is an unobserved stochastic parameter that can therefore move. Maybe it should be implemented as a "potential" or a mixture model, e.g. as in Hogg 2010 / Van Der Plas' astroml example: http://astroml.github.com/book_figures/chapter8/fig_outlier_rejection.html """ import pymc if guess is None: guess = (0,0) xmu = pymc.distributions.Uninformative(name='x_observed',value=0) if data1err is None: xdata = pymc.distributions.Normal('x', mu=xmu, observed=True, value=data1, tau=1, trace=False) else: xtau = pymc.distributions.Uninformative(name='x_tau', value=1.0/data1err**2, observed=True, trace=False) xdata = pymc.distributions.Normal('x', mu=xmu, observed=True, value=data1, tau=xtau, trace=False) d={'slope':pymc.distributions.Uninformative(name='slope', value=guess[0]), #d['badvals'] = pymc.distributions.Binomial('bad',len(data2),0.5,value=[False]*len(data2)) #d['badx'] = pymc.distributions.Uniform('badx',min(data1-data1err),max(data1+data1err),value=data1) #'badvals':pymc.distributions.DiscreteUniform('bad',0,1,value=[False]*len(data2)), #'bady':pymc.distributions.Uniform('bady',min(data2-data2err),max(data2+data2err),value=data2), } # set up "mixture model" from Hogg et al 2010: # uniform prior on Pb, the fraction of bad points Pb = pymc.Uniform('Pb', 0, 1.0, value=0.1) # uniform prior on Yb, the centroid of the outlier distribution Yb = pymc.Uniform('Yb', -10000, 10000, value=0) # uniform prior on log(sigmab), the spread of the outlier distribution log_sigmab = pymc.Uniform('log_sigmab', -10, 10, value=5) @pymc.deterministic def sigmab(log_sigmab=log_sigmab): return np.exp(log_sigmab) MixtureNormal = pymc.stochastic_from_dist('mixturenormal', logp=mixture_likelihood, dtype=np.float, mv=True) if intercept: d['intercept'] = pymc.distributions.Uninformative(name='intercept', value=guess[1]) @pymc.deterministic(trace=False) def model(x=xdata,slope=d['slope'],intercept=d['intercept']): return (x*slope+intercept) else: @pymc.deterministic(trace=False) def model(x=xdata,slope=d['slope']): return x*slope d['f'] = model dyi = data2err if data2err is not None else np.ones(data2.shape) y_mixture = MixtureNormal('y_mixture', model=model, dyi=dyi, Pb=Pb, Yb=Yb, sigmab=sigmab, observed=True, value=data2) d['y'] = y_mixture #if data2err is None: # ydata = pymc.distributions.Normal('y', mu=model, observed=True, # value=data2, tau=1, trace=False) #else: # ytau = pymc.distributions.Uninformative(name='y_tau', # value=1.0/data2err**2, observed=True, trace=False) # ydata = pymc.distributions.Normal('y', mu=model, observed=True, # value=data2, tau=ytau, trace=False) #d['y'] = ydata MC = pymc.MCMC(d) MC.sample(nsample,burn=burn,thin=thin,verbose=verbose)
def pymc_linear_fit_withoutliers(data1, data2, data1err=None, data2err=None, print_results=False, intercept=True, nsample=5000, burn=1000, thin=10, return_MC=False, guess=None, verbose=0, ignore_nans=True, progress_bar=True): """ Use pymc to fit a line to data with outliers assuming outliers come from a broad, uniform distribution that cover all the data The model doesn't work exactly right; it over-rejects "outliers" because 'bady' is an unobserved stochastic parameter that can therefore move. Maybe it should be implemented as a "potential" or a mixture model, e.g. as in Hogg 2010 / Van Der Plas' astroml example: http://astroml.github.com/book_figures/chapter8/fig_outlier_rejection.html """ import pymc if ignore_nans: data1,data2,data1err,data2err = remove_nans(data1,data2,data1err,data2err) if guess is None: guess = (0,0) xmu = pymc.distributions.Uninformative(name='x_observed',value=0) if data1err is None: xdata = pymc.distributions.Normal('x', mu=xmu, observed=True, value=data1, tau=1, trace=False) else: xtau = pymc.distributions.Uninformative(name='x_tau', value=1.0/data1err**2, observed=True, trace=False) xdata = pymc.distributions.Normal('x', mu=xmu, observed=True, value=data1, tau=xtau, trace=False) d={'slope':pymc.distributions.Uninformative(name='slope', value=guess[0]), #d['badvals'] = pymc.distributions.Binomial('bad',len(data2),0.5,value=[False]*len(data2)) #d['badx'] = pymc.distributions.Uniform('badx',min(data1-data1err),max(data1+data1err),value=data1) 'badvals':pymc.distributions.DiscreteUniform('bad',0,1,value=[False]*len(data2)), 'bady':pymc.distributions.Uniform('bady',min(data2-data2err),max(data2+data2err),value=data2), } if intercept: d['intercept'] = pymc.distributions.Uninformative(name='intercept', value=guess[1]) @pymc.deterministic(trace=False) def model(x=xdata,slope=d['slope'],intercept=d['intercept'], badvals=d['badvals'], bady=d['bady']): return (x*slope+intercept) * (True-badvals) + badvals*bady else: @pymc.deterministic(trace=False) def model(x=xdata,slope=d['slope'], badvals=d['badvals'], bady=d['bady']): return x*slope*(True-badvals) + badvals*bady d['f'] = model if data2err is None: ydata = pymc.distributions.Normal('y', mu=model, observed=True, value=data2, tau=1, trace=False) else: ytau = pymc.distributions.Uninformative(name='y_tau', value=1.0/data2err**2, observed=True, trace=False) ydata = pymc.distributions.Normal('y', mu=model, observed=True, value=data2, tau=ytau, trace=False) d['y'] = ydata MC = pymc.MCMC(d) MC.sample(nsample,burn=burn,thin=thin,verbose=verbose,progress_bar=progress_bar) MCs = MC.stats() m,em = MCs['slope']['mean'],MCs['slope']['standard deviation'] if intercept: b,eb = MCs['intercept']['mean'],MCs['intercept']['standard deviation'] if print_results: print "MCMC Best fit y = %g x" % (m), if intercept: print " + %g" % (b) else: print "" print "m = %g +/- %g" % (m,em) if intercept: print "b = %g +/- %g" % (b,eb) print "Chi^2 = %g, N = %i" % (((data2-(data1*m))**2).sum(),data1.shape[0]-1) if return_MC: return MC if intercept: return m,b else: return m
-0.8762523, 0.47377688, 0.76516415, 0.27890419, -0.07819642, -0.13399348, 0.82877293, 0.22308624, 0.7485783, -0.14700254, -1.03145657, 0.85641097, 0.43396285, 0.47901653, 0.80137086, 0.33566812, 0.71443253, -1.57590815, -0.24090179, -2.0128344, 0.34503324, 0.12944091, -1.5327008, 0.06363034, 0.21042021, -0.81425636, 0.20209279, -1.48130423, -1.04983523, 0.16001774, -0.75239072, 0.33427956, -0.10224921, 0.26463561, -1.09374674, -0.72749811, -0.54892116, -1.89631844, -0.94393545, -0.2521341, 0.26840341, 0.23563219, 0.35333094 ]) # Model: the data are truncated-normally distributed with unknown upper bound. mu = pm.Normal('mu', 0, .01, value=0) tau = pm.Exponential('tau', .01, value=1) cutoff = pm.Exponential('cutoff', 1, value=1.3) D = pm.TruncatedNormal('D', mu, tau, -np.inf, cutoff, value=data, observed=True) M = pm.MCMC([mu, tau, cutoff, D]) # Use a TruncatedMetropolis step method that will never propose jumps below D's maximum value. M.use_step_method(TruncatedMetropolis, cutoff, D.value.max(), np.inf) # Get a handle to the step method handling cutoff to investigate its behavior. S = M.step_method_dict[cutoff][0] M.isample(10000, 0, 10)
lifetime_[censor] = 10 - birth[censor] #we only see this part of their lives. #this begins the model alpha = pm.Uniform("alpha", 0, 20) #lets just use uninformative priors beta = pm.Uniform("beta", 0, 20) @pm.observed def survival(value=lifetime_, alpha=alpha, beta=beta): return np.sum((1 - censor) * (np.log(alpha / beta) + (alpha - 1) * np.log(value / beta) - (value / beta)**(alpha))) mcmc = pm.MCMC([alpha, beta, survival]) mcmc.sample(50000, 30000) alpha_samples = mcmc.trace('alpha')[:] beta_samples = mcmc.trace('beta')[:] # histogram of the samples: plt.hist(alpha_samples, normed=True) plt.show() plt.hist(beta_samples, normed=True) plt.show() medianlifetime_samples = beta_samples * (np.log(2)**(1 / alpha_samples)) plt.hist(medianlifetime_samples, normed=True)
def approximate_map(self, individual_subjs=True, minimizer='Powell', use_basin=False, fall_to_simplex=True, cycles=1, debug=False, minimizer_kwargs=None, basin_kwargs=None): """Set model to its approximate MAP. :Arguments: individual_subjs : bool <default=True> Optimize each subject individually. minimizer : str <default='Powell'> Optimize using Powell. See numpy.optimize.minimize. Other choice might be 'Nelder-Mead' use_basin : bool <default=True> Use basin hopping optimization to avoid local minima. fall_to_simplex : bool <default=True> should map try using simplex algorithm if powell method failes cycles : int <default=1> How many times to optimize the model. Since lower level nodes depend on higher level nodes, they might be estimated differently in a second pass. minimizer_kwargs : dict <default={}> Keyword arguments passed to minimizer. See scipy.optimize.minimize for options. basin_kwargs : dict <default={}> Keyword arguments passed to basinhopping. See scipy.optimize.basinhopping for options. debug : bool <default=False> Whether to print current values and neg logp at each iteration. """ ############################### # In order to find the MAP of a hierarchical model one needs # to integrate over the subj nodes. Since this is difficult we # optimize the generations iteratively on the generation below. # only need this to get at the generations # TODO: Find out how to get this from pymc.utils.find_generations() m = pm.MCMC(self.nodes_db.node) generations_unsorted = m.generations generations_unsorted.append(self.get_observeds().node) # Filter out empty generations generations_unsorted = [ gen for gen in generations_unsorted if len(gen) != 0 ] # Sort generations according to order of nodes_db generations = [] for gen in generations_unsorted: generations.append([ row.node for name, row in self.nodes_db.iterrows() if name in [node.__name__ for node in gen] ]) for cyc in range(cycles): for i in range(len(generations) - 1, 0, -1): if self.is_group_model and individual_subjs and ( i == len(generations) - 1): self._approximate_map_subj( fall_to_simplex=fall_to_simplex, minimizer=minimizer, use_basin=use_basin, debug=debug, minimizer_kwargs=minimizer_kwargs, basin_kwargs=basin_kwargs) continue # Optimize the generation at i-1 evaluated over the generation at i self._partial_optimize(generations[i - 1], generations[i], fall_to_simplex=fall_to_simplex, minimizer=minimizer, use_basin=use_basin, debug=debug, minimizer_kwargs=minimizer_kwargs, basin_kwargs=basin_kwargs) #update map in nodes_db self.nodes_db['map'] = np.NaN for name, value in self.values.iteritems(): try: self.nodes_db['map'].ix[name] = value # Some values can be series which we'll just ignore except (AttributeError, ValueError): pass
def run_SPXcentered(self, sigma_x, n_subjs, size, mu_value, mu_step_method, seed): """ run a single Spxcentered test""" #init basic mcmc if np.isscalar(mu_value): n_conds = 1 else: n_conds = len(mu_value) max_tries = 5 iter = 10000 #100000 burnin = 5000 #90000 nodes, t_values = self.create_hierarchical_model(sigma_x=sigma_x, n_subjs=n_subjs, size=size, mu_value=mu_value, seed=seed) mcmc = pm.MCMC(nodes) [mcmc.use_step_method(mu_step_method, node) for node in nodes['mu']] #init mcmc with SPX step method nodes_spx, t_values = self.create_hierarchical_model(sigma_x=sigma_x, n_subjs=n_subjs, size=size, mu_value=mu_value, seed=seed) mcmc_spx = pm.MCMC(nodes_spx) mcmc_spx.use_step_method(kabuki.steps.SPXcentered, loc=nodes_spx['mu'], scale=nodes_spx['sigma'], loc_step_method=mu_step_method) #init mcmc with spx on vec model nodes_vpx, t_values = self.create_hierarchical_model(sigma_x=sigma_x, n_subjs=n_subjs, size=size, mu_value=mu_value, seed=seed, vec=True) mcmc_vpx = pm.MCMC(nodes_vpx) mcmc_vpx.use_step_method(kabuki.steps.SPXcentered, loc=nodes_vpx['mu'], scale=nodes_vpx['sigma'], loc_step_method=mu_step_method) #run all the models until they converge to the same values i_try = 0 while i_try < max_tries: print("~~~~~ trying for the %d time ~~~~~~" % (i_try + 1)) #run spx mcmc i_t = time() mcmc_spx.sample(iter, burnin) print("spx sampling took %.2f seconds" % (time() - i_t)) stats = dict([('mu%d spx' % x, mcmc_spx.mu[x].stats()) for x in range(n_conds)]) #run vpx mcmc i_t = time() mcmc_vpx.sample(iter, burnin) print("vpx sampling took %.2f seconds" % (time() - i_t)) stats.update( dict([('mu%d vpx' % x, mcmc_vpx.mu[x].stats()) for x in range(n_conds)])) #run basic mcmc i_t = time() mcmc.sample(iter, burnin) print("basic sampling took %.2f seconds" % (time() - i_t)) stats.update( dict([('mu%d basic' % x, mcmc.mu[x].stats()) for x in range(n_conds)])) df = DataFrame(stats, index=['mean', 'standard deviation']).T df = df.rename(columns={ 'mean': 'mean', 'standard deviation': 'std' }) print(df) #check if all the results are close enough try: for i in range(len(df) / 3): np.testing.assert_allclose(df[(3 * i + 0):(3 * i + 1)], df[(3 * i + 1):(3 * i + 2)], atol=0.1, rtol=0.01) np.testing.assert_allclose(df[(3 * i + 1):(3 * i + 2)], df[(3 * i + 2):(3 * i + 3)], atol=0.1, rtol=0.01) np.testing.assert_allclose(df[(3 * i + 2):(3 * i + 3)], df[(3 * i + 0):(3 * i + 1)], atol=0.1, rtol=0.01) break #if not add more runs except AssertionError: print("Failed to reach agreement. trying again") i_try += 1 assert (i_try < max_tries ), "could not replicate values using different mcmc samplers"
def setUpClass(self): self.S = pymc.MCMC(disaster_model, db='txt', dbname=os.path.join(testdir, 'Disaster.txt'), dbmode='w')
def run_SliceStep(self, sigma_x, n_subjs, size, mu_value, seed, left=None, max_tries=5): #init basic mcmc if np.isscalar(mu_value): n_conds = 1 else: n_conds = len(mu_value) iter = 10000 #100000 burnin = 5000 #90000 #init basic mcmc nodes, t_values = self.create_hierarchical_model(sigma_x=sigma_x, n_subjs=n_subjs, size=size, mu_value=mu_value, seed=seed) mcmc = pm.MCMC(nodes) [ mcmc.use_step_method(kabuki.steps.kNormalNormal, node) for node in nodes['mu'] ] #init mcmc with slice step nodes_s, t_values = self.create_hierarchical_model(sigma_x=sigma_x, n_subjs=n_subjs, size=size, mu_value=mu_value, seed=seed) mcmc_s = pm.MCMC(nodes_s) [ mcmc_s.use_step_method(kabuki.steps.kNormalNormal, node) for node in nodes_s['mu'] ] mcmc_s.use_step_method(kabuki.steps.SliceStep, nodes_s['sigma'], width=3, left=left) #run all the models until they converge to the same values i_try = 0 stats = {} while i_try < max_tries: print("~~~~~ trying for the %d time ~~~~~~" % (i_try + 1)) #run slice mcmc i_t = time() mcmc_s.sample(iter, burnin) print("slice sampling took %.2f seconds" % (time() - i_t)) stats.update( dict([('mu%d S' % x, mcmc_s.mu[x].stats()) for x in range(n_conds)])) #run basic mcmc i_t = time() mcmc.sample(iter, burnin) print("basic sampling took %.2f seconds" % (time() - i_t)) stats.update( dict([('mu%d basic' % x, mcmc.mu[x].stats()) for x in range(n_conds)])) df = DataFrame(stats, index=['mean', 'standard deviation']).T df = df.rename(columns={ 'mean': 'mean', 'standard deviation': 'std' }) print(df) #check if all the results are close enough try: for i in range(len(df) / 2): np.testing.assert_allclose(df[(2 * i + 0):(2 * i + 1)], df[(2 * i + 1):(2 * i + 2)], atol=0.1, rtol=0.01) break #if not add more runs except AssertionError: print("Failed to reach agreement In:") print(df[(2 * i):(2 * (i + 1))]) print("trying again") i_try += 1 assert (i_try < max_tries ), "could not replicate values using different mcmc samplers" return mcmc, mcmc_s
def setUpClass(self): self.S = pymc.MCMC(disaster_model, db='ram') self.S.use_step_method(pymc.Metropolis, self.S.early_mean, tally=True)
def mk_multi_bayes(tree, chars,nregime,qidx, pi="Equal" ,seglen=0.02,stepsize=0.05): """ Create a Bayesian multi-mk model. User specifies which regime models to use and the Bayesian model finds the switchpoints. Args: tree (Node): Root node of tree. chars (dict): Dict mapping tip labels to discrete character states. Character states must be in the form of [0,1,2...] regime (int): The number of distinct regimes to test. Set to 1 for an Mk model, set to greater than 1 for a multi-regime Mk model. qidx (np.array): Index specifying the model to test columns: 0, 1, 2 - index axes of q 3 - index of params This scheme allows flexible specification of models. E.g.: Symmetric mk2: params = [0.2]; qidx = [[0,0,1,0], [0,1,0,0]] Asymmetric mk2: params = [0.2,0.6]; qidx = [[0,0,1,0], [0,1,0,1]] NOTE: The qidx corresponding to the first q matrix (first column 0) is always the root regime pi (str or np.array): Option to weight the root node by given values. Either a string containing the method or an array of weights. Weights should be given in order. Accepted methods of weighting root: Equal: flat prior Equilibrium: Prior equal to stationary distribution of Q matrix Fitzjohn: Root states weighted by how well they explain the data at the tips. seglen (float): Size of segments to break tree into. The smaller this value, the more "fine-grained" the analysis will be. Optional, defaults to 2% of the root-to-tip length. stepsize (float): Maximum size of steps for switchpoints to take. Optional, defaults to 5% of root-to-tip length. """ if type(chars) == dict: data = chars.copy() chars = [chars[l] for l in [n.label for n in tree.leaves()]] else: data = dict(zip([n.label for n in tree.leaves()],chars)) # Preparations nchar = len(set(chars)) nparam = len(set([n[-1] for n in qidx])) # This model has 2 components: Q parameters and switchpoints # They are combined in a custom likelihood function ########################################################################### # Switchpoint: ########################################################################### # Modeling the movement of the regime shift(s) is the tricky part # Regime shifts will only be allowed to happen at a node seg_map = tree_map(tree,seglen) switch = [None]*(nregime-1) for regime in range(nregime-1): switch[regime]= make_switchpoint_stoch(seg_map, name=str("switch_{}".format(regime))) ########################################################################### # Qparams: ########################################################################### # Each Q parameter is an exponential Qparams = [None] * nparam for i in range(nparam): Qparams[i] = pymc.Exponential(name=str("Qparam_{}".format(i)), beta=1.0, value=0.1*(i+1)) ########################################################################### # Likelihood ########################################################################### # The likelihood function l = cyexpokit.make_mklnl_func(tree, data,nchar,nregime,qidx) @pymc.deterministic def likelihood(q = Qparams, s=switch,name="likelihood"): return l(np.array(q),np.array([x[0].ni for x in s],dtype=np.intp),np.array([x[1] for x in s])) @pymc.potential def multi_mklik(lnl=likelihood): if not (np.isnan(lnl)): return lnl else: return -np.inf mod = pymc.MCMC(locals()) for s in switch: mod.use_step_method(SwitchpointMetropolis, s, tree, seg_map,stepsize=stepsize,seglen=seglen) return mod
def test(self): S = pymc.MCMC(input=BinaryTestModel) S.sample(1000, 500) f = S.fair.trace() assert (1.0 * f.sum() / len(f) > .5)
def test_RAM1d(): """ Test the RobustAdaptiveMetro step from my_pymc_steps.py. :param gaussian_data1d: A fixture function that generates some data to use for testing. """ data = gaussian_data1d() # Setup PyMC model @pymc.stochastic def Mu(value=data.mean()): """ Normal parameters (mu,variance). """ return 0.0 # Uniform prior, so log(prior) is just zero. @pymc.stochastic(observed=True) def normal_data(value=data, Mu=Mu): """ Data generated from a normal distribution. """ data_mean = np.mean(value) ndata = value.size loglik = -ndata / 2.0 * np.sqrt(2.0 * np.pi * true_variance) - \ 0.5 * ndata * (data_mean - Mu) ** 2 / true_variance return loglik # Now let's run the MCMC sampler and make sure the RAM algorithm behaves as expected mcmc = pymc.MCMC({ 'Mu': Mu, 'normal_data': normal_data }) # The MCMC sampler covar_guess = data.var() / data.size target_rate = 0.40 # Make sure we use the RAM step to update the normal mean. mcmc.use_step_method(RobustAdaptiveMetro, Mu, target_rate, proposal_covar=covar_guess, proposal_distribution='T') # Before we start the MCMC sampler, let's run some tests to make sure that the RAM step is properly initialized. RAM = mcmc.step_method_dict[Mu][0] assert RAM._dim == 1 assert RAM._proposal_distribution == 'T' assert np.abs(RAM._cholesky_factor - np.sqrt(data.var() / data.size)) < 1e-5 niter = 100000 nburn = 10000 mcmc.sample(niter, burn=nburn) mu_draws = mcmc.trace('Mu')[:] assert RAM._current_iter == niter assert RAM._accepted + RAM._rejected == niter # Make sure acceptance rate is within 2% of the target rate acceptance_rate = RAM._accepted / float(RAM._current_iter) assert abs(acceptance_rate - target_rate) / abs(target_rate) < 0.02 # Compare the histogram of the draws obtained from the RAM algorithm # with the expected distribution xgrid = np.linspace(data.mean() - 5.0 * np.std(data) / np.sqrt(data.size), data.mean() + 5.0 * np.std(data) / np.sqrt(data.size)) post_var = true_variance / data.size post_mean = data.mean() post_pdf = 1.0 / np.sqrt(2.0 * np.pi * post_var) * np.exp( -0.5 * (xgrid - post_mean)**2 / post_var) plt.subplot(111) plt.hist(mu_draws, bins=25, normed=True) plt.plot(xgrid, post_pdf, 'r', lw=2) plt.title("Normal Model: Test of Metropolis step method") plt.xlabel("Mean") plt.ylabel("PDF") plt.show()
c_estimate = estimate[0] m_estimate = estimate[1] # Keep the least squares fit lstsqr_fit[i, j, 0] = c_estimate lstsqr_fit[i, j, 1] = m_estimate # plot the power law spectrum power_fit = c_estimate * analysis_frequencies**(-m_estimate) # Define the model we are going to use single_power_law_model = pymcmodels.single_power_law( analysis_frequencies, analysis_power, m_estimate) # Set up the MCMC model M1 = pymc.MCMC(single_power_law_model) # Run the sampler M1.sample(iter=iterations, burn=burn, thin=thin, progress_bar=False) # Get the power law index and the normalization pli = M1.trace("power_law_index")[:] bayes_mean[i, j, 1] = np.mean(pli) cli = M1.trace("power_law_norm")[:] bayes_mean[i, j, 0] = np.mean(cli) # Get various properties of the power law index marginal distribution # and save them ci_keep68[i, j, :] = rn_utils.credible_interval(pli, ci=0.68) for k in range(0, 100):
def test_RAM2d(): data = gaussian_data2d() # Setup PyMC model @pymc.stochastic def Mu(value=data.mean(axis=0)): """ Normal parameters (mu,variance). """ return 0.0 # Uniform prior, so log(prior) is just zero. @pymc.stochastic(observed=True) def normal_data(value=data, Mu=Mu): """ Data generated from a normal distribution. """ data_mean = value.mean(axis=0) ndata = value.shape[0] zcent = data_mean - Mu loglik = -0.5 * ndata * np.dot(zcent.T, np.linalg.inv(true_covar).dot(zcent)) return loglik mcmc = pymc.MCMC({'Mu': Mu, 'normal_data': normal_data}) covar_guess = np.cov(data, rowvar=0) / data.shape[0] target_rate = 0.4 # Make sure we use the RAM step to update the normal mean. mcmc.use_step_method(RobustAdaptiveMetro, Mu, proposal_covar=covar_guess, target_rate=target_rate) # Before we start the MCMC sampler, let's run some tests to make sure that the RAM step is properly initialized. RAM = mcmc.step_method_dict[Mu][0] assert RAM._dim == 2 assert RAM._proposal_distribution == 'Normal' # Now let's run the MCMC sampler and make sure the RAM algorithm behaves as expected niter = 100000 nburn = 50000 mcmc.sample(niter, nburn) assert RAM._current_iter == niter mu_draws = mcmc.trace(Mu)[:] # Make sure acceptance rate is within 2% of the target rate acceptance_rate = RAM._accepted / float(RAM._current_iter) assert abs(acceptance_rate - target_rate) / abs(target_rate) < 0.02 # Compare covariance matrix of proposals with posterior covariance post_covar = true_covar / data.shape[0] covar_n = RAM._cholesky_factor.T.dot(RAM._cholesky_factor) eigenval_n, eigenvect_n = linalg.eig(covar_n) eigenval_n = np.diagflat(eigenval_n) covroot_n = np.dot( eigenvect_n.T, np.sqrt(eigenval_n).dot( eigenvect_n)) # Matrix square-root of proposal covariance covroot_n = covroot_n.real eigenval, eigenvect = linalg.eig(post_covar) eigenval_inv = np.diagflat(1.0 / eigenval) covroot_inv = np.dot(eigenvect.T, np.sqrt(eigenval_inv).dot(eigenvect)) covroot_inv = covroot_inv.real evals, evects = linalg.eig(covroot_n.dot(covroot_inv)) # Compute the 'suboptimality factor'. This should be unity if the two matrices are proportional. subopt_factor = evals.size * np.sum(1.0 / evals**2) / (np.sum( 1.0 / evals))**2 assert subopt_factor < 1.01 # Test for proportionality of proposal covariance and posterior covariance # Now make nice plots comparing sampled values with the true posterior data_mean = data.mean(axis=0) xgrid = np.linspace(data_mean[0] - 4.0 * np.sqrt(post_covar[0, 0]), data_mean[0] + 4.0 * np.sqrt(post_covar[0, 0]), 100) ygrid = np.linspace(data_mean[1] - 4.0 * np.sqrt(post_covar[1, 1]), data_mean[1] + 4.0 * np.sqrt(post_covar[1, 1]), 100) X, Y = np.meshgrid(xgrid, ygrid) true_pdf = mlab.bivariate_normal(X, Y, np.sqrt(post_covar[0, 0]), np.sqrt(post_covar[1, 1]), data_mean[0], data_mean[1], post_covar[0, 1]) plt.subplot(211) post_pdf = 1.0 / np.sqrt(2.0 * np.pi * post_covar[0, 0]) * \ np.exp(-0.5 * (xgrid - data_mean[0]) ** 2 / post_covar[0, 0]) plt.hist(mu_draws[:, 0], bins=25, normed=True) plt.plot(xgrid, post_pdf, 'r', lw=2) plt.title( "Bivariate Normal Model: Test of Robust Adaptive Metropolis step method" ) plt.xlabel("Mean 1") plt.ylabel("PDF") plt.subplot(212) post_pdf = 1.0 / np.sqrt(2.0 * np.pi * post_covar[1, 1]) * \ np.exp(-0.5 * (ygrid - data_mean[1]) ** 2 / post_covar[1, 1]) plt.hist(mu_draws[:, 1], bins=25, normed=True) plt.plot(ygrid, post_pdf, 'r', lw=2) plt.xlabel("Mean 2") plt.ylabel("PDF") plt.show() plt.figure() plt.plot(mu_draws[:, 0], mu_draws[:, 1], '.', ms=2) plt.contour(X, Y, true_pdf, linewidths=5) plt.title( 'Test of Robust Adaptive Metropolis Algorithms: Bivariate Normal Model' ) plt.ylabel('Mean 2') plt.xlabel('Mean 1') plt.show()
def get_Bayes(measurements=[], chunksize=5, Ndp=5, iter=50000, burn=5000): sc = pymc.Uniform('sc', 0.1, 2.0, value=0.24) tau = pymc.Uniform('tau', 0.0, 1.0, value=0.5) concinit = 1.0 conclo = 0.1 conchi = 10.0 concentration = pymc.Uniform('concentration', lower=conclo, upper=conchi, value=concinit) # The stick-breaking construction: requires Ndp beta draws dependent on the # concentration, before the probability mass function is actually constructed. #betas = pymc.Beta('betas', alpha=1, beta=concentration, size=Ndp) betas = pymc.Beta('betas', alpha=1, beta=1, size=Ndp - 1) @pymc.deterministic def pmf(betas=betas): "Construct a probability mass function for the truncated Dirichlet process" # prod = lambda x: np.exp(np.sum(np.log(x))) # Slow but more accurate(?) prod = np.prod value = map(lambda i, u: u * prod(1.0 - betas[:i]), enumerate(betas)) value.append(1.0 - sum(value[:])) # force value to sum to 1 return value # The cluster assignments: each data point's estimated cluster ID. # Remove idinit to allow clusterid to be randomly initialized: Ndata = len(measurements) idinit = np.zeros(Ndata, dtype=np.int64) clusterid = pymc.Categorical('clusterid', p=pmf, size=Ndata, value=idinit) @pymc.deterministic(name='clustermean') def clustermean(clusterid=clusterid, sc=sc, Ndp=Ndp): return sc * np.arange(1, Ndp + 1)[clusterid] @pymc.deterministic(name='clusterprec') def clusterprec(clusterid=clusterid, sc=sc, tau=tau, Ndp=Ndp): return 1.0 / (sc * sc * tau * tau * (np.arange(1, Ndp + 1)[clusterid])) y = pymc.Normal('y', mu=clustermean, tau=clusterprec, observed=True, value=measurements) ## for predictive poeterior simulation @pymc.deterministic(name='y_sim') def y_sim(value=[0], sc=sc, tau=tau, clusterid=clusterid, Ndp=Ndp): n = np.arange(1, Ndp + 1)[np.random.choice(clusterid)] return np.random.normal(loc=sc * n, scale=sc * tau * n) m = pymc.Model({ "scale": sc, "tau": tau, "betas": betas, "clusterid": clusterid, "normal": y, "pred": y_sim }) sc_samples = [] modes = [] simulations = [] for i in range(0, chunksize): mc = pymc.MCMC(m) mc.sample(iter=50000, burn=10000) plot(mc) sc_sample = mc.trace('sc')[:] sc_samples.append(sc_sample) simulation = mc.trace('y_sim')[:] simulations.append(simulation) plt.hist(measurements, 50, fc='gray', histtype='stepfilled', alpha=0.3, normed=False) plt.hist(simulation, 30, fc='blue', histtype='stepfilled', alpha=0.3, normed=True) hist, edges = np.histogram( measurements, bins=100, range=[np.min(measurements) - 0.25, np.max(measurements) + 0.25]) argm = hist.argmax() (edges[argm] + edges[argm + 1]) / 2 modes.append((edges[argm] + edges[argm + 1]) / 2) if chunksize <= 1: gr = np.nan else: pymc.gelman_rubin(sc_samples) dic = { 'gelman_rubin': gr, 'modes': modes, 'simulations': simulations, 'sc_samples': sc_samples } return dic
cutoff_b = 27.0 print cutoff_b masked_values = np.ma.masked_values(full_data_day_wind_speed, value=None) print masked_values.mask.sum() print masked_values.data.max() wind_speed_day = TruncatedNormal('dws', mu, tau, a=cutoff_a, b=cutoff_b, value=masked_values, observed=True) return locals() M_missing_day = pm.MCMC(day_missing_model()) M_missing_day.sample(iter=no_of_iterations, burn=burn, thin=thin) missing_values = np.mean(np.array(M_missing_day.trace('dws')[-50:-1]), axis=0) print missing_values.dtype # print len(missing_values) # print len(full_data_day.index) # full_data_day.to_csv('/media/kiruba/New Volume/ACCUWA_Data/weather_station/KSNDMC/Tubgere_prior.csv') masked_values = np.ma.masked_values(full_data_day_wind_speed, value=None) masked_values[masked_values.mask] = missing_values full_data_day['corrected_wind_speed'] = np.array(masked_values.data.tolist()) full_data_day['corrected_wind_speed'] = full_data_day[ 'corrected_wind_speed'].astype(int) # full_data_day.to_csv('/media/kiruba/New Volume/ACCUWA_Data/weather_station/KSNDMC/Tubgere_after.csv') def night_missing_model():
# Concatenate Poisson means out = np.zeros(len(stormsNumbers)) out[:s] = e out[s:] = l return out storms = pm.Poisson('storms', mu=rate, value=stormsNumbers, observed=True) storms_model = pm.Model([storms, early_mean, late_mean, rate]) strmsM = pm.MCMC(storms_model) strmsM.sample(iter=40000, burn=1000, thin=20) plt.hist(strmsM.trace('late_mean')[:], edgecolor="k") general.set_grid_to_plot() plt.savefig(general.folderPath2 + "exp2_late_mean.png") plt.clf() plt.hist(strmsM.trace('early_mean')[:], edgecolor="k") general.set_grid_to_plot() plt.savefig(general.folderPath2 + "exp2_early_mean.png") plt.clf() plt.hist(strmsM.trace('switchpoint')[:], edgecolor="k") general.set_grid_to_plot() plt.savefig(general.folderPath2 + "exp2_switchpoint.png")
def run_mcmc(gp, img, compare_img, transverse_sigma=1.0, motion_angle=0.0): """Estimate PSF using Markov Chain Monte Carlo gp - Gaussian priors - array of N objects with attributes a, b, sigma img - image to apply PSF to compare_img - comparison image transverse_sigma - prior motion_angle - prior Model a Point Spread Function consisting of the sum of N collinear Gaussians, blurred in the transverse direction and the result rotated. Each of the collinear Gauusians is parameterized by a (amplitude), b (center), and sigma (std. deviation). The Point Spread Function is applied to the image img and the result compared with the image compare_img. """ print "gp.shape", gp.shape print "gp", gp motion_angle = np.deg2rad(motion_angle) motion_angle = pm.VonMises("motion_angle", motion_angle, 1.0, value=motion_angle) transverse_sigma = pm.Exponential("transverse_sigma", 1.0, value=transverse_sigma) N = gp.shape[0] mixing_coeffs = pm.Exponential("mixing_coeffs", 1.0, size=N) #mixing_coeffs.set_value(gp['a']) mixing_coeffs.value = gp['a'] longitudinal_sigmas = pm.Exponential("longitudinal_sigmas", 1.0, size=N) #longitudinal_sigmas.set_value(gp['sigma']) longitudinal_sigmas.value = gp['sigma'] b = np.array(sorted(gp['b']), dtype=float) cut_points = (b[1:] + b[:-1]) * 0.5 long_means = [None] * b.shape[0] print long_means left_mean = pm.Gamma("left_mean", 1.0, 2.5 * gp['sigma'][0]) long_means[0] = cut_points[0] - left_mean right_mean = pm.Gamma("right_mean", 1.0, 2.5 * gp['sigma'][-1]) long_means[-1] = cut_points[-1] + right_mean for ix in range(1, N - 1): long_means[ix] = pm.Uniform("mid%d_mean" % ix, lower=cut_points[ix - 1], upper=cut_points[ix]) print "cut_points", cut_points print "long_means", long_means #longitudinal_means = pm.Normal("longitudinal_means", 0.0, 0.04, size=N) #longitudinal_means.value = gp['b'] dtype = np.dtype([('a', np.float), ('b', np.float), ('sigma', np.float)]) @pm.deterministic def psf(mixing_coeffs=mixing_coeffs, longitudinal_sigmas=longitudinal_sigmas, \ longitudinal_means=long_means, transverse_sigma=transverse_sigma, motion_angle=motion_angle): gp = np.ones((N, ), dtype=dtype) gp['a'] = mixing_coeffs gp['b'] = longitudinal_means gp['sigma'] = longitudinal_sigmas motion_angle_deg = np.rad2deg(motion_angle) if True: print "gp: a", mixing_coeffs print " b", longitudinal_means print " s", longitudinal_sigmas print "tr-sigma", transverse_sigma, "angle=", motion_angle_deg return generate_sum_gauss(gp, transverse_sigma, motion_angle_deg) @pm.deterministic def image_fitness(psf=psf, img=img, compare_img=compare_img): img_convolved = ndimage.convolve(img, psf) img_diff = img_convolved.astype(int) - compare_img return img_diff.std() if False: trial_psf = generate_sum_gauss(gp, 2.0, 50.0, plot_unrot_kernel=True, plot_rot_kernel=True, verbose=True) print "trial_psf", trial_psf.min(), trial_psf.mean(), trial_psf.max( ), trial_psf.std() obs_psf = pm.Uniform("obs_psf", lower=-1.0, upper=1.0, doc="Point Spread Function", value=trial_psf, observed=True, verbose=False) print "image_fitness value started at", image_fitness.value known_fitness = pm.Exponential("fitness", image_fitness + 0.001, value=0.669, observed=True) #mcmc = pm.MCMC([motion_angle, transverse_sigma, mixing_coeffs, longitudinal_sigmas, longitudinal_means, image_fitness, known_fitness], verbose=2) mcmc = pm.MCMC([ motion_angle, transverse_sigma, mixing_coeffs, longitudinal_sigmas, image_fitness, known_fitness, left_mean, right_mean ] + long_means, verbose=2) pm.graph.dag(mcmc, format='png') plt.show() #mcmc.sample(20000, 1000) mcmc.sample(2000) motion_angle_samples = mcmc.trace("motion_angle")[:] transverse_sigma_samples = mcmc.trace("transverse_sigma")[:] image_fitness_samples = mcmc.trace("image_fitness")[:] best_fit = np.percentile(image_fitness_samples, 1.0) best_fit_selection = image_fitness_samples < best_fit print mcmc.db.trace_names for k in [k for k in mcmc.stats().keys() if k != "known_fitness"]: #samples = mcmc.trace(k)[:] samples = mcmc.trace(k).gettrace() print samples.shape selected_samples = samples[best_fit_selection] print k, samples.mean(axis=0), samples.std(axis=0), \ selected_samples.mean(axis=0), selected_samples.std(axis=0) ax = plt.subplot(211) plt.hist(motion_angle_samples, histtype='stepfilled', bins=25, alpha=0.85, label="posterior of $p_\\theta$", color="#A60628", normed=True) plt.legend(loc="upper right") plt.title("Posterior distributions of $p_\\theta$, $p_\\sigma$") ax = plt.subplot(212) plt.hist(transverse_sigma_samples, histtype='stepfilled', bins=25, alpha=0.85, label="posterior of $p_\\sigma$", color="#467821", normed=True) plt.legend(loc="upper right") plt.show() for k, v in mcmc.stats().iteritems(): print k, v # deprecated? use discrepancy... print mcmc.goodness() mcmc.write_csv("out.csv") pm.Matplot.plot(mcmc) plt.show()