def test_plot(self): if not PLOT: raise nose.SkipTest # Plot samples plot(self.M, path=DIR, verbose=0)
def test_plot(self): if not PLOT: raise nose.SkipTest # Plot samples plot(self.M)
def fit_model(): M = MCMC(disaster_model) M.sample(iter=10000, burn=1000, thin=10) print('switchpoint: ', M.trace('switchpoint')[:]) print('hist: ', hist(M.trace('late_mean')[:])) # show() plot(M)
def plot(self): mc_map = pm.MAP(self.mc) mc_map.fit(tol=.01) # iterlim = 250, print("BIC score: {}".format(mc_map.BIC)) plot(self.mc) # set years and months years = mdates.YearLocator() # every year months = mdates.MonthLocator() # every month years_fmt = mdates.DateFormatter('%Y') fig, ax = plt.subplots() # plot the data ax.plot(months_list, confirmed_cases, 'o', mec='black', color='black', label='confirmed cases') ax.plot(months_list, mortalitysim.stats()['mean'], color='red', linewidth=1, label='MIH (mean)') y_min = mortalitysim.stats()['quantiles'][2.5] y_max = mortalitysim.stats()['quantiles'][97.5] ax.fill_between(months_list, y_min, y_max, color='r', alpha=0.3, label='BPL (95% CI)') # format the ticks ax.xaxis.set_major_locator(years) ax.xaxis.set_major_formatter(years_fmt) ax.xaxis.set_minor_locator(months) # set the axis limit datemin = min(months_list) - 1 datemax = max(months_list) + 1 ax.set_xlim(datemin, datemax) # format the coords message box def price(x): return '$%1.2f' % x ax.format_xdata = mdates.DateFormatter('%Y-%m-%d') ax.format_ydata = price ax.grid(True) # rotates and right aligns the x labels, and moves the bottom of the # axes up to make room for them fig.autofmt_xdate() # some extra plot formating ax.legend(loc='best') plt.style.use('ggplot') plt.rc('font', size=12) plt.rc('lines', linewidth=2) plt.title("Plague model fit to laboratory confirmed cases") plt.xlabel('Time in months') plt.ylabel('Number of infecteds') plt.legend() plt.savefig(self.dir.split("\\")[-1] + '_fit.png')
def run1(): #fake data [x, y, yerr, xyerr] data = np.array([[201, 592, 61, 9], [244, 401, 25, 4], [47, 583, 58, 11], [287, 402, 15, 7], [203, 495, 21, 5], [58, 173, 15, 9], [210, 479, 27, 4], [202, 504, 14, 4], [198, 510, 30, 11], [158, 416, 16, 7], [165, 393, 14, 5], [201, 442, 25, 5], [157, 317, 52, 5], [131, 311, 16, 6], [166, 400, 34, 6], [160, 337, 31, 5], [186, 423, 42, 9], [125, 334, 26, 8], [218, 533, 16, 6], [146, 344, 22, 5], [150, 300, 23, 10], [270, 620, 40, 15]]) #rename columns xdata, ydata = data[:, 0], data[:, 1] xerr, yerr = data[:, 3], data[:, 2] #perform MCMC MC = pymc_linear_fit_withoutliers(xdata, ydata, data1err=xerr, data2err=yerr, return_MC=True) MC.sample(100000, burn=1000, verbose=0) #show the results fig = plt.figure() #plot the confidence levels low25 = np.linspace(20,300)*MC.stats()['slope']['quantiles'][2.5] + MC.stats()['intercept']['quantiles'][2.5] top97 = np.linspace(20,300)*MC.stats()['slope']['quantiles'][97.5] + MC.stats()['intercept']['quantiles'][97.5] plt.fill_between(np.linspace(20,300), low25, top97, color='k', alpha=0.1, label='2.5/97.5 quartile') #plot the average results plt.plot(np.linspace(20,300), np.linspace(20,300)*MC.stats()['slope']['mean'] + MC.stats()['intercept']['mean'], color='k', linewidth=1, label='Average fit') #plot data plt.errorbar(xdata, ydata, xerr=xerr, yerr=yerr, color='b', label='data', fmt='o') #show likely outliers plt.plot(xdata[MC.badvals.value.astype('bool')], ydata[MC.badvals.value.astype('bool')], 'rs', label='likely outliers') plt.xlim(20, 300) plt.legend(shadow=True, fancybox=True, scatterpoints=1, numpoints=1, loc='upper left') plt.savefig('test.pdf') plt.close() #MCMC plot plot(MC)
def run_mc(self,nsample = 10000,interactive=False): """run the model using mcmc""" from pymc.Matplot import plot from pymc import MCMC self.M = MCMC(self) if interactive: self.M.isample(iter=nsample, burn=1000, thin=10) else: self.M.sample(iter=nsample, burn=1000, thin=10) plot(self.M)
def run_mc(self,nsample = 10000,interactive=False,doplot=False,verbose=0): """run the model using mcmc""" from pymc import MCMC self.M = MCMC(self) if interactive: self.M.isample(iter=nsample, burn=1000, thin=10,verbose=verbose) else: self.M.sample(iter=nsample, burn=1000, thin=10,verbose=verbose) if doplot: from pymc.Matplot import plot plot(self.M)
def test_simple(self): intervals = 20 scores = pymc.geweke(S, intervals=intervals) a_scores = scores['a'] assert_equal(len(a_scores), intervals) # Plot diagnostics (if plotting is available) try: from pymc.Matplot import geweke_plot as plot plot(scores, path=DIR, verbose=0) except ImportError: pass
def test_simple(self): intervals = 20 scores = pymc.geweke(S, intervals=intervals, maxlag=5) a_scores = scores['a'] assert_equal(len(a_scores), intervals) # Plot diagnostics (if plotting is available) try: from pymc.Matplot import geweke_plot as plot plot(scores, path=DIR) except ImportError: pass
def analizeMwm(): masked_values = np.ma.masked_equal(x, value=None) print("m v: ", masked_values) print("dmwm da: ", dmwm.disasters_array) Mwm = MCMC(dmwm) Mwm.sample(iter=10000, burn=1000, thin=10) print("Mwm t: ", Mwm.trace('switchpoint')[:]) hist(Mwm.trace('late_mean')[:]) # show() plot(Mwm)
def run_mc(self, nsample=10000, interactive=False, doplot=False, verbose=0): """run the model using mcmc""" from pymc import MCMC self.M = MCMC(self) if interactive: self.M.isample(iter=nsample, burn=1000, thin=10, verbose=verbose) else: self.M.sample(iter=nsample, burn=1000, thin=10, verbose=verbose) if doplot: from pymc.Matplot import plot plot(self.M)
def test_simple(self): scores = pymc.geweke(S, intervals=20) a_scores = scores['a'] assert_equal(len(a_scores), 20) # If the model has converged, 95% the scores should lie # within 2 standard deviations of zero, under standard normal model assert(sum(np.abs(np.array(a_scores)[:, 1]) > 1.96) < 2) # Plot diagnostics (if plotting is available) try: from pymc.Matplot import geweke_plot as plot plot(scores, path=DIR, verbose=0) except ImportError: pass
def test_simple(self): scores = pymc.geweke(S, intervals=20) a_scores = scores['a'] assert_equal(len(a_scores), 20) # If the model has converged, 95% the scores should lie # within 2 standard deviations of zero, under standard normal model assert(sum(np.abs(np.array(a_scores)[:,1]) > 1.96) < 2) # Plot diagnostics (if plotting is available) try: from pymc.Matplot import geweke_plot as plot plot(scores, path=DIR, verbose=0) except ImportError: pass
def analizeM(): M = MCMC(dm) print("M: ", M) M.sample(iter=10000, burn=1000, thin=10) print("M t: ", M.trace('switchpoint')[:]) hist(M.trace('late_mean')[:]) # show() plot(M) # show() print("M smd dm sp: ", M.step_method_dict[dm.switchpoint]) print("M smd dm em: ", M.step_method_dict[dm.early_mean]) print("M smd dm lm: ", M.step_method_dict[dm.late_mean]) M.use_step_method(Metropolis, dm.late_mean, proposal_sd=2.)
def main(): Amu = 2 Bmu = 2 Skappa = 1 Rkappa = 0.1 data = [(12, 6), (7,2)] model=pymc.MCMC(hierarchical_prior.set_flips(data, Amu, Bmu, Skappa, Rkappa)) print "PRIOR KAPPA/MU", model.kappa.value, model.mu.value model.sample(iter=10000, burn=9000, thin=2) print "POST KAPPA/MU", model.kappa.value, model.mu.value print "MODEL", model.variables plot(model, format='pdf')
#perform MCMC MC = pymc_linear_fit_withoutliers(xdata, ydata, data1err=xerr, data2err=yerr, return_MC=True) MC.sample(100000, burn=1000, verbose=0) #show the results fig = plt.figure() #plot the confidence levels low25 = np.linspace(20,300)*MC.stats()['slope']['quantiles'][2.5] + MC.stats()['intercept']['quantiles'][2.5] top97 = np.linspace(20,300)*MC.stats()['slope']['quantiles'][97.5] + MC.stats()['intercept']['quantiles'][97.5] plt.fill_between(np.linspace(20,300), low25, top97, color='k', alpha=0.1, label='2.5/97.5 quartile') #plot the average results plt.plot(np.linspace(20,300), np.linspace(20,300)*MC.stats()['slope']['mean'] + MC.stats()['intercept']['mean'], color='k', linewidth=1, label='Average fit') #plot data plt.errorbar(xdata, ydata, xerr=xerr, yerr=yerr, color='b', label='data', fmt='o') #show likely outliers plt.plot(xdata[MC.badvals.value.astype('bool')], ydata[MC.badvals.value.astype('bool')], 'rs', label='likely outliers') plt.xlim(20, 300) plt.legend(shadow=True, fancybox=True, scatterpoints=1, numpoints=1, loc='upper left') plt.savefig('test.pdf') plt.close() #MCMC plot plot(MC)
import mean_std import pymc from pymc.Matplot import plot from pylab import show # now, use MCMC sampling model = pymc.MCMC(mean_std); model.sample(iter=10000); print(model.stats()) plot(model) show()
def pymc_linear_fit_withoutliers(data1, data2, data1err=None, data2err=None, print_results=True, intercept=True, nsample=50000, burn=5000, thin=2, return_MC=False, guess=None, verbose=0): """ Use pymc to fit a line to data with outliers, assuming outliers come from a broad, uniform distribution that cover all the data. :param data1: xdata :param data2: ydata :param data1err: x errors :param data2err: y errors :param print_results: whether or not to print out the results :param intercept: whether or not to fit for intercept :param nsample: number of samples :param burn: number of burn-in samples :param thin: thinnening value :param return_MC: whether or not to return the pymc MCMC instance :param guess: initial guessues for slope and intercept :param verbose: verbosity level of MCMC sampler """ if guess is None: guess = (0, 0) xmu = pymc.distributions.Uninformative(name='x_observed', value=0) if data1err is None: xdata = pymc.distributions.Normal('x', mu=xmu, observed=True, value=data1, tau=1, trace=False) else: xtau = pymc.distributions.Uninformative(name='x_tau', value=1.0 / data1err ** 2, observed=True, trace=False) xdata = pymc.distributions.Normal('x', mu=xmu, observed=True, value=data1, tau=xtau, trace=False) d = {'slope': pymc.distributions.Uninformative(name='slope', value=guess[0], doc='Slope of the straight line'), 'badvals': pymc.distributions.DiscreteUniform('bad', lower=0, upper=1, value=[False] * len(data2), doc='Outliers'), 'bady': pymc.distributions.Uniform('bady', np.min(data2 - data2err), np.max(data2 + data2err), value=data2)} if intercept: d['intercept'] = pymc.distributions.Uninformative(name='intercept', value=guess[1]) @pymc.deterministic(trace=False) def model(x=xdata, slope=d['slope'], intercept=d['intercept'], badvals=d['badvals'], bady=d['bady']): return (x * slope + intercept) * (True - badvals) + badvals * bady else: @pymc.deterministic(trace=False) def model(x=xdata, slope=d['slope'], badvals=d['badvals'], bady=d['bady']): return x * slope * (True - badvals) + badvals * bady d['f'] = model if data2err is None: ydata = pymc.distributions.Normal('y', mu=model, observed=True, value=data2, tau=1, trace=False) else: ytau = pymc.distributions.Uninformative(name='y_tau', value=1.0 / data2err ** 2, observed=True, trace=False) ydata = pymc.distributions.Normal('y', mu=model, observed=True, value=data2, tau=ytau, trace=False) d['y'] = ydata MC = pymc.MCMC(d) MC.sample(nsample, burn=burn, thin=thin, verbose=verbose) #generate plots plot(MC) MCs = MC.stats() m, em = MCs['slope']['mean'], MCs['slope']['standard deviation'] if intercept: b, eb = MCs['intercept']['mean'], MCs['intercept']['standard deviation'] if print_results: print "MCMC Best fit y = %g x" % (m), if intercept: print " + %g" % (b) else: print "" print "m = %g +/- %g" % (m, em) if intercept: print "b = %g +/- %g" % (b, eb) print "Chi^2 = %g, N = %i" % (((data2 - (data1 * m)) ** 2).sum(), data1.shape[0] - 1) if return_MC: return MC if intercept: return m, b else: return m
ent = 0 accept = dbstate[dbstate.keys()[ent]]['_accepted'] reject = dbstate[dbstate.keys()[ent]]['_rejected'] acceptancerate_theta = accept / (reject+accept) if args.verbose: print ' with theta = (k,L*,N) acceptance rate =',acceptancerate_theta print ' No. of accepted steps = '+str(accept) print ' No. of rejected steps = '+str(reject) MM.db.close() if args.verbose: print ' - Saved chains to ',dbname #------------------------------------------------------------------------------------------------------------- if args.verbose: print ' - Plot logNobjuniverse, k and logLstar samples' plot(MM) # plotting results if not os.path.isdir('./balff_plots/'): if args.verbose: print ' - Did not find "./balff_plots/" so creating it' os.mkdir('./balff_plots/') thetafile = './balff_plots/'+dbname.split('balff_output/')[-1].replace('.pickle','_theta.png') if args.verbose: print ' - Moving theta_2.png to',thetafile out = commands.getoutput('mv theta_2.png '+thetafile) if args.verbose: print '\n - Printing Summary' statval = 1 # resetting stat indicator ssval = MM.stats() # checking that stats can be created if (ssval['theta'] == None): statval = 0 if statval == 1:
#!/usr/bin/env python import two_normal_model from pymc import MCMC from pymc.Matplot import plot # do posterior sampling m = MCMC(two_normal_model) m.sample(iter=100000, burn=1000) print(m.stats()) import numpy for p in ['mean1', 'mean2', 'std_dev', 'theta']: numpy.savetxt("%s.trace" % p, m.trace(p)[:]) # draw some pictures plot(m)
""" A wrapper for the main tomography script. """ # Author : Sangeeta Bhatia import pymc as pm import tomography as tm from pymc.Matplot import plot from os import rename import sys runs = int(input("Enter the number of iterations for the MCMC simulation: ")) burnin = int(input("Enter the burn in for the MCMC simulation: ")) thin = int(input("Enter the thining variable for the MCMC simulation: ")) S = pm.MCMC(tm) S.sample(runs, burnin, thin) stats = S.stats() S.write_csv("summary.csv", variables=['Q', 'tau']) plot(S) # Automatically saves the output - one .png file for each variable. #Finally fname = sys.argv[1] sname = fname + '-summary.csv' qname = fname + '-Q.png' tname = fname + '-tau.png' rename("summary.csv", sname) rename("Q.png", qname) rename("tau.png", tname)
from pymc import MCMC from pymc.Matplot import plot import numpy as np import small_model as model A = MCMC(model) A.sample(iter=5000) plot(A, suffix='-gamma') print '%s prior' % model.prior print[(x, A.stats()[x]['mean']) for x in A.stats()] error = (1 - A.stats()['ABp']['mean']) * 400 + A.stats( )['CAp']['mean'] * 600 + A.stats()['CBp']['mean'] * 1000 - 200 print 'Error: %s' % error
def run1(): #fake data [x, y, yerr, xyerr] data = np.array([[201, 592, 61, 9], [244, 401, 25, 4], [47, 583, 58, 11], [287, 402, 15, 7], [203, 495, 21, 5], [58, 173, 15, 9], [210, 479, 27, 4], [202, 504, 14, 4], [198, 510, 30, 11], [158, 416, 16, 7], [165, 393, 14, 5], [201, 442, 25, 5], [157, 317, 52, 5], [131, 311, 16, 6], [166, 400, 34, 6], [160, 337, 31, 5], [186, 423, 42, 9], [125, 334, 26, 8], [218, 533, 16, 6], [146, 344, 22, 5], [150, 300, 23, 10], [270, 620, 40, 15]]) #rename columns xdata, ydata = data[:, 0], data[:, 1] xerr, yerr = data[:, 3], data[:, 2] #perform MCMC MC = pymc_linear_fit_withoutliers(xdata, ydata, data1err=xerr, data2err=yerr, return_MC=True) MC.sample(100000, burn=1000, verbose=0) #show the results fig = plt.figure() #plot the confidence levels low25 = np.linspace(20, 300) * MC.stats( )['slope']['quantiles'][2.5] + MC.stats()['intercept']['quantiles'][2.5] top97 = np.linspace(20, 300) * MC.stats( )['slope']['quantiles'][97.5] + MC.stats()['intercept']['quantiles'][97.5] plt.fill_between(np.linspace(20, 300), low25, top97, color='k', alpha=0.1, label='2.5/97.5 quartile') #plot the average results plt.plot(np.linspace(20, 300), np.linspace(20, 300) * MC.stats()['slope']['mean'] + MC.stats()['intercept']['mean'], color='k', linewidth=1, label='Average fit') #plot data plt.errorbar(xdata, ydata, xerr=xerr, yerr=yerr, color='b', label='data', fmt='o') #show likely outliers plt.plot(xdata[MC.badvals.value.astype('bool')], ydata[MC.badvals.value.astype('bool')], 'rs', label='likely outliers') plt.xlim(20, 300) plt.legend(shadow=True, fancybox=True, scatterpoints=1, numpoints=1, loc='upper left') plt.savefig('test.pdf') plt.close() #MCMC plot plot(MC)
import model import pymc import networkx as nx import matplotlib.pyplot as plt from pymc import MCMC from pymc.Matplot import plot M = MCMC(model) M.sample(iter=10000, burn=1000, thin=10) plot(M, path='./plots/test_soft_evidence') #g = pymc.graph.graph(pymc.Model(model), path='.') #g.write('model.dot') #G = nx.drawing.nx_agraph.read_dot('model.dot') #nx.draw(G) #plt.draw() #hist(M.trace('late_mean')[:]) #show()
class MCMCRunManager(object): """Manages a single MCMC result in disk.""" def __init__(self, root_dir, name=None, backend='pickle'): super(MCMCRunManager, self).__init__() # root-dir and name if name is None: self.name = op.basename(root_dir) else: root_dir = op.join(root_dir, name) self.root_dir = ensure_dir(root_dir) # Models storage self.model_dir = ensure_dir(op.join(root_dir, 'model')) self.model_pickle = op.join(self.model_dir, '%s.pickle' % self.name) self.model_txt = op.join(self.model_dir, '%s.py' % self.name) self.model_dot = op.join(self.model_dir, '%s.dot' % self.name) self.model_png = op.join(self.model_dir, '%s.png' % self.name) # Traces storage self.db_dir = ensure_dir(op.join(self.root_dir, 'db')) self.backend = backend self.db_file = op.join(self.db_dir, '%s.pymc.%s' % (self.name, self.backend)) self._db = None # Plots storage self.plots_dir = ensure_dir(op.join(self.root_dir, 'plots')) # Stats storage self.stats_dir = ensure_dir(op.join(self.root_dir, 'stats')) # Data storage - only if we really want to keep provenance clear self.data_dir = ensure_dir(op.join(self.root_dir, 'data')) self.data_file = op.join(self.data_dir, 'data.pickle') # Done file self.done_file = op.join(self.root_dir, 'DONE') def is_done(self): return op.isfile(self.done_file) def save_model_txt(self, txt): with open(self.model_txt, 'w') as writer: writer.write(txt) def load_model_txt(self): try: with open(self.model_txt) as reader: return reader.read() except: return None def save_data(self, data, overwrite=True): save_perturbation_record_data_to_hdf5(data, self.data_file, overwrite=overwrite) def load_data(self): try: return load_perturbation_record_data_from_hdf5(self.data_file) except: return None def save_pymc_model_dict(self, model_dict): try: import dill with open(self.model_pickle, 'w') as writer: dill.dump(model_dict, writer, dill.HIGHEST_PROTOCOL) except: print 'Move to PyMC3 and hope that theano allows serialization...' raise # No way to pickle these nasty fortrans, does PyMC allow to serialize models like this? def pymc_db(self): # WARNING: weakrefed cache traces = self._db() if self._db is not None else None if traces is None: backend = getattr(pymc.database, self.backend) traces = backend.load(self.db_file) self._db = weakref.ref(traces) return traces def traces(self, varname): """Returns a numpy array with the traces for the variable, one squeezed row per chain.""" # TODO: Q&D to eliminate delays on reading, rethink... cache_file = op.join(self.db_dir, '%s.%s' % (varname, 'pickle')) if not op.isfile(cache_file): # We could instead combine all the chains into a long one with chain=None # See e.g. https://github.com/pymc-devs/pymc/issues/144 traces = np.array([ self.pymc_db().trace(varname, chain=chain)[:].squeeze() for chain in xrange(self.num_chains()) ]) joblib.dump(traces, cache_file, compress=3) return traces return joblib.load(cache_file) def pymctraces(self, varname): traces = self.pymc_db()._traces print traces return traces[varname] def varnames(self): # TODO: Q&D to eliminate delays on reading, rethink cache_file = op.join(self.db_dir, 'tracenames.pickled') if not op.isfile(cache_file): trace_names = self.pymc_db().trace_names joblib.dump(trace_names, cache_file, compress=3) return trace_names return joblib.load(cache_file) def num_chains(self): return len(self.varnames()) def group_plots_in_dirs(self): def ensure_symlink(dest_dir, file_name, plot_file): dest_file = op.join(dest_dir, file_name) plot_file = op.join('../%s' % op.basename(plot_file)) if not op.islink(dest_file): ensure_dir(dest_dir) os.symlink(plot_file, dest_file) for plot_file in glob(op.join(self.plots_dir, '*.png')): file_name = op.basename(plot_file) if 'group=' in file_name: ensure_symlink(op.join(self.plots_dir, 'posteriors-group'), file_name, plot_file) # Assume only one group per model ATM elif 'summary__' in file_name: ensure_symlink(op.join(self.plots_dir, 'summaries'), file_name, plot_file) elif 'fly=': fly_name = file_name.partition('fly=')[2].partition('_')[0] ensure_symlink( op.join(self.plots_dir, 'posteriors-fly=%s' % fly_name), file_name, plot_file) def sample(self, model, mapstart=False, step_methods=None, iters=80000, burn=20000, num_chains=4, doplot=True, showplots=False, force=False, progress_bar=False): print('MCMC for %s' % self.name) if self.is_done(): if not force: print('\tAlready done, skipping...') return self.db_file else: print( '\tWARNING: recomputing, there might be spurious files from previous runs...' ) # Not a good idea # Let's graph the model graph = pymc.graph.dag(pymc.Model(model), name=self.name, path=self.model_dir) graph.write_png(op.join(self.model_dir, self.name + '.png')) start = time.time() if mapstart: # See http://stronginference.com/post/burn-in-and-other-mcmc-folklore # BUT WARNING, WOULD THIS MAKE MULTIPLE CHAIN START BE OVERLY CORRELATED? try: from pymc import MAP print('\tFinding MAP estimates...') M = MAP(model) M.fit() model = M.variables print('\tMAP estimates found...') except Exception, e: print('\tMAP Failed...', str(e)) # Instantiate model M = pymc.MCMC(model, db=self.backend, dbname=self.db_file, name=self.name) # Tune step methods if step_methods is not None: for var, step_method, sm_kwargs in step_methods: M.use_step_method(step_method, var, **sm_kwargs) # Sample! for chain in xrange(num_chains): print('\tChain %d of %d' % (chain + 1, num_chains)) M.sample(iter=iters, burn=burn, progress_bar=progress_bar) try: if doplot: # Summaries for the chain plot(M, suffix='__' + self.name + '__chain=%d' % chain, path=self.plots_dir, verbose=0) summary_plot(M, name='summary__' + self.name + '__chain=%d' % chain, path=self.plots_dir + '/') # TODO: report no op.join (+'/') bug to pymc people self.group_plots_in_dirs() if showplots: plt.show() chain_stats = M.stats(chain=chain) with open(op.join(self.stats_dir, 'stats__chain=%d' % chain), 'w') as writer: pprint(chain_stats, writer) except Exception, e: print('\tError plotting or summarizing') print(str(e))
ent = 0 accept = dbstate[dbstate.keys()[ent]]['_accepted'] reject = dbstate[dbstate.keys()[ent]]['_rejected'] acceptancerate_theta = accept / (reject + accept) if args.verbose: print ' with theta = (k,L*,N) acceptance rate =', acceptancerate_theta print ' No. of accepted steps = ' + str(accept) print ' No. of rejected steps = ' + str(reject) MM.db.close() if args.verbose: print ' - Saved chains to ', dbname #------------------------------------------------------------------------------------------------------------- if args.verbose: print ' - Plot logNobjuniverse, k and logLstar samples' plot(MM) # plotting results if not os.path.isdir('./balff_plots/'): if args.verbose: print ' - Did not find "./balff_plots/" so creating it' os.mkdir('./balff_plots/') thetafile = './balff_plots/' + dbname.split('balff_output/')[-1].replace( '.pickle', '_theta.png') if args.verbose: print ' - Moving theta_2.png to', thetafile out = commands.getoutput('mv theta_2.png ' + thetafile) if args.verbose: print '\n - Printing Summary' statval = 1 # resetting stat indicator ssval = MM.stats() # checking that stats can be created if (ssval['theta'] == None): statval = 0
def get_Bayes(measurements=[], chunksize=5, Ndp=5, iter=50000, burn=5000): sc = pymc.Uniform('sc', 0.1, 2.0, value=0.24) tau = pymc.Uniform('tau', 0.0, 1.0, value=0.5) concinit = 1.0 conclo = 0.1 conchi = 10.0 concentration = pymc.Uniform('concentration', lower=conclo, upper=conchi, value=concinit) # The stick-breaking construction: requires Ndp beta draws dependent on the # concentration, before the probability mass function is actually constructed. #betas = pymc.Beta('betas', alpha=1, beta=concentration, size=Ndp) betas = pymc.Beta('betas', alpha=1, beta=1, size=Ndp - 1) @pymc.deterministic def pmf(betas=betas): "Construct a probability mass function for the truncated Dirichlet process" # prod = lambda x: np.exp(np.sum(np.log(x))) # Slow but more accurate(?) prod = np.prod value = map(lambda i, u: u * prod(1.0 - betas[:i]), enumerate(betas)) value.append(1.0 - sum(value[:])) # force value to sum to 1 return value # The cluster assignments: each data point's estimated cluster ID. # Remove idinit to allow clusterid to be randomly initialized: Ndata = len(measurements) idinit = np.zeros(Ndata, dtype=np.int64) clusterid = pymc.Categorical('clusterid', p=pmf, size=Ndata, value=idinit) @pymc.deterministic(name='clustermean') def clustermean(clusterid=clusterid, sc=sc, Ndp=Ndp): return sc * np.arange(1, Ndp + 1)[clusterid] @pymc.deterministic(name='clusterprec') def clusterprec(clusterid=clusterid, sc=sc, tau=tau, Ndp=Ndp): return 1.0 / (sc * sc * tau * tau * (np.arange(1, Ndp + 1)[clusterid])) y = pymc.Normal('y', mu=clustermean, tau=clusterprec, observed=True, value=measurements) ## for predictive poeterior simulation @pymc.deterministic(name='y_sim') def y_sim(value=[0], sc=sc, tau=tau, clusterid=clusterid, Ndp=Ndp): n = np.arange(1, Ndp + 1)[np.random.choice(clusterid)] return np.random.normal(loc=sc * n, scale=sc * tau * n) m = pymc.Model({ "scale": sc, "tau": tau, "betas": betas, "clusterid": clusterid, "normal": y, "pred": y_sim }) sc_samples = [] modes = [] simulations = [] for i in range(0, chunksize): mc = pymc.MCMC(m) mc.sample(iter=50000, burn=10000) plot(mc) sc_sample = mc.trace('sc')[:] sc_samples.append(sc_sample) simulation = mc.trace('y_sim')[:] simulations.append(simulation) plt.hist(measurements, 50, fc='gray', histtype='stepfilled', alpha=0.3, normed=False) plt.hist(simulation, 30, fc='blue', histtype='stepfilled', alpha=0.3, normed=True) hist, edges = np.histogram( measurements, bins=100, range=[np.min(measurements) - 0.25, np.max(measurements) + 0.25]) argm = hist.argmax() (edges[argm] + edges[argm + 1]) / 2 modes.append((edges[argm] + edges[argm + 1]) / 2) if chunksize <= 1: gr = np.nan else: pymc.gelman_rubin(sc_samples) dic = { 'gelman_rubin': gr, 'modes': modes, 'simulations': simulations, 'sc_samples': sc_samples } return dic
color='k', linewidth=1, label='Average fit') #plot data plt.errorbar(xdata, ydata, xerr=xerr, yerr=yerr, color='b', label='data', fmt='o') #show likely outliers plt.plot(xdata[MC.badvals.value.astype('bool')], ydata[MC.badvals.value.astype('bool')], 'rs', label='likely outliers') plt.xlim(20, 300) plt.legend(shadow=True, fancybox=True, scatterpoints=1, numpoints=1, loc='upper left') plt.savefig('test.pdf') plt.close() #MCMC plot plot(MC)
# Ph21 Set 5 # Aritra Biswas # coin_mcmc.py # Run MCMC on coin_model.py import coin_model from pymc import MCMC from pymc.Matplot import plot M = MCMC(coin_model) M.sample(iter = 10000, burn = 0, thin = 1) print plot(M) M.pheads.summary()
def bayesian_model(): model = MCMC(disastermodel) model.isample(iter=10000, burn=1000, thin=10) print model.trace('l')[:] plot(model) return model