def bayesian_regression(self, Methodology): fit_dict = OrderedDict() fit_dict['methodology'] = r'Inference $\chi^{2}$ model' #Initial guess for the fitting: Np_lsf = polyfit(self.x_array, self.y_array, 1) m_0, n_0 = Np_lsf[0], Np_lsf[1] MCMC_dict = self.lr_ChiSq(self.x_array, self.y_array, m_0, n_0) myMCMC = MCMC(MCMC_dict) myMCMC.sample(iter=10000, burn=1000) fit_dict['m'], fit_dict['n'], fit_dict['m_error'], fit_dict['n_error'] = myMCMC.stats()['m']['mean'], myMCMC.stats()['n']['mean'], myMCMC.stats()['m']['standard deviation'], myMCMC.stats()['n']['standard deviation'] return fit_dict
def dotheMCMC(x): ''' Performs the Markov Chain Monte Carlo analysis to find the global average of film runtimes and the deviation from that average for different countries, languages and genres. Parameters ---------- x: tuple x[0]: integer the year in which the films to be analysed were released. x[1]: pandas dataframe the dataframe containing all the movies released that year Returns ------- stats: a pyMC2 stats dictionary this contains the results of the MCMC, i.e. the average, standard deviation and 95% confidence interval for each category and for the global average. group: pandas dataframe identical to the dataframe x[1] representedCountries: dictionary of arrays a dictionary of two elements: "same" and "diff". dict['same'] and dict['diff'] each contains an array of two element lists. Each pair is the name of a country and the number of times that country appears in the group dataframe for overlapping and non-overlapping writer/director respectively. The array is ordered by the number of appearances from smallest to largest. representedLanguages: dictionary of arrays as representedCountries but for languages representedGenres: dictionary of arrays as representedCountries but for genres numRepresented: integer the total number of movies released that year ''' #get the parameters needed to initialize the model year, group = x[0], x[1] representedCountries = get_represented(group, countries, 'Cou_') representedLanguages = get_represented(group, languages, 'Lan_') representedGenres = get_represented(group, genres, 'Gen_') numRepresented = representedCountries['same'].shape[0] + \ representedCountries['diff'].shape[0] numRepresented += representedLanguages['same'].shape[0] + \ representedLanguages['diff'].shape[0] numRepresented += representedGenres['same'].shape[0] + \ representedGenres['diff'].shape[0] #initialize the model in a pyMC object, then perform the MCMC mc=MCMC(film_model_by_year(str(year), group, representedCountries, \ representedLanguages, representedGenres, \ numRepresented)) mc.sample(iter=300000, burn=75000, progress_bar=False) return {'stats':mc.stats(), 'year':year, 'countries':representedCountries, \ 'languages': representedLanguages, 'genres':representedGenres, \ 'num': numRepresented}
def test_stats_after_reload(self): db = database.pickle.load('MCMC.pickle') M2 = MCMC(DisasterModel, db=db) M2.stats() db.close() os.remove('MCMC.pickle')
# value for each parameter, as well as the 95% confidence interval. # plot function takes the model (or a single parameter) as an argument: Matplot.plot(M) plt.show() # ### Making inferences about model parameters ### # The *stats()* function provides an interface to the statistics of our posterior, # in the form of a dictionary. For example, let's find the predicted ratio between # effective sizes of the disk and the bulge, and let's also explore how confidently # we can determine the effective surface brightness of the disk. print 'R_effective (bulge) / R_effective (disk) =', \ M.stats()['r_e_B']['mean'] / M.stats()['r_e_D']['mean'] print 'Effective surface brightness of the bulge: \n', \ ' Best-fit value:', M.stats()['M_e_B']['mean'], \ '\n 95% Confidence interval:', M.stats()['M_e_B']['quantiles'][2.5], \ 'to', M.stats()['M_e_B']['quantiles'][97.5] # ### Visualizing specific realizations of our model ### # The *trace()* method presents the values of a variable for all of the saved # Markov Chain steps. Let's plot up several of these traces, and see how # the model changes with different parameter values. for i in range(50): plt.plot(M.r.value, M.trace('SB')[i], c='gray', alpha=.25) plt.scatter(M.r.value, M.mags.value, c='r')
def test_stats_after_reload(self): db = database.pickle.load("MCMC.pickle") M2 = MCMC(disaster_model, db=db) M2.stats() db.close() os.remove("MCMC.pickle")
from pymc import MCMC import numpy as np from pythonMCMC import pymcCrater from pymc.Matplot import plot from pylab import hist, show,draw M = MCMC(pymcCrater) M.sample(iter=10000, burn=700, thin=5) print M.trace('lnlike')[:] print M.stats() plot(M) show()
print """Beta distribution with alpha=%.4f and beta=%.4f yields mu=%.4f and sigma^2=%.4f """ % (M.alpha, M.beta, B.mean(), B.var()) # ## (b) Draw samples from the posterior M.sample(20000, burn=2000, thin=20) # ## (c) Check convergence of MCMC by plotting traces fig, axs = plt.subplots(1, 3, figsize=(12, 4)); for i in range(3): axs[i].plot(M.avg.trace[:, i]); axs[i].set_title('Player %u' % i); axs[0].set_ylabel('Batting average'); axs[1].set_xlabel('Sample'); # ## (d) Posterior mean and 95% CI for each player avg_mcmc_mean = M.stats()['avg']['mean'] avg_mcmc_ci = M.stats()['avg']['95% HPD interval'] print print 'MCMC mean for each player' for m, ci in zip(avg_mcmc_mean, avg_mcmc_ci): print 'Mean: %.4f\tCI: (%.4f, %.4f)' % (m, ci[0], ci[1]) # transform confidence intervals for plotting avg_mcmc_ci[:, 0] = avg_mcmc_mean - avg_mcmc_ci[:, 0] avg_mcmc_ci[:, 1] = avg_mcmc_ci[:, 1] - avg_mcmc_mean # ## (e) Full-season batting average versus MLE from April df_full = pd.read_csv('laa_2011_full.txt', sep='\t') avg_mle_full = df_full.H / df_full.AB.astype(float)
import coal_disaster from pymc import MCMC from pylab import hist, show from pymc.Matplot import plot,pyplot __author__ = 'auroua' M = MCMC(coal_disaster) print M.switchpoint.value M.sample(iter=10000, burn=1000, thin=10) # print len(M.trace('switchpoint')[:]) # hist(M.trace('late_mean')[:]) # show() plot(M) M.stats()
#!/usr/bin/env python import two_normal_model from pymc import MCMC from pymc.Matplot import plot # do posterior sampling m = MCMC(two_normal_model) m.sample(iter=100000, burn=1000) print(m.stats()) import numpy for p in ['mean1', 'mean2', 'std_dev', 'theta']: numpy.savetxt("%s.trace" % p, m.trace(p)[:]) # draw some pictures plot(m)
from pymc import MCMC from pymc.Matplot import plot import numpy as np import small_model as model A = MCMC(model) A.sample(iter=5000) plot(A, suffix='-gamma') print '%s prior' % model.prior print[(x, A.stats()[x]['mean']) for x in A.stats()] error = (1 - A.stats()['ABp']['mean']) * 400 + A.stats( )['CAp']['mean'] * 600 + A.stats()['CBp']['mean'] * 1000 - 200 print 'Error: %s' % error
import model from pymc import MCMC import pprint import sys, os # Run sampling for 40000 iterations, with a burn-in of 2000 iterations and thinning for every 10 iterations. M = MCMC(model) print M sys.exit() M.sample(iter=40000, burn=5000, thin=10) # Refer to sample_output.txt for example of posterior sampling summary. pprint.pprint(M.stats())