-
Notifications
You must be signed in to change notification settings - Fork 0
/
transcript.py
151 lines (128 loc) · 5.42 KB
/
transcript.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# Here is a transcript of a Python session where I do the analysis of
# the data set in this directory. I've editied it extensively, so
# it's possible that some of the commands will not work as written,
# but hopefully this is close.
import emcee
import scipy.stats as ss
import multilevel as m
# Load up the true parameters, just so we have access later
p0 = loadtxt('true-params.dat')
nteachers = p0.shape[0]
p0 = m.to_params(p0, m.nteachers_from_length(nteachers))
# Load the student data
students = loadtxt('students.dat.gz', dtype=[('score', np.float), ('teacher', np.int)])
# Set up the posterior object.
lnpost = m.Posterior(students['score'], students['teacher'])
# Initialize a sampler using 200 walkers, in 48 dimensions
# (m.params_length(nteachers)==48), with the given posterior function.
sampler = emcee.EnsembleSampler(200, 48, lnpost)
# Set up the initial sample in a small ball about p0.
pts = random.multivariate_normal(mean=p0.view(float).reshape((-1,)), cov=1e-3*eye(48), size=200)
# Run for a little while, just to make sure it works:
result = sampler.run_mcmc(pts, 500, thin=10)
# Examine how the mean of the ensemble of points has changed over each
# iteration for each variable (normalized to the overall mean and
# standard deviation).
for k in range(48):
mu = mean(sampler.chain[:,:,k])
sigma = std(mean(sampler.chain[:,:,k], axis=0))
plot((mean(sampler.chain[:,:,k], axis=0)-mu)/sigma)
# Just to check that the sampler has a good acceptance fraction---not
# too high (near 1), not too low (near 0). In this case it was near
# 0.1, which is fine.
sampler.acceptance_fraction
# Now that we've had a bit of burn-in time for the sampler to settle
# down, go ahead and reset, and run for a significant length.
sampler.reset()
result = sampler.run_mcmc(result[0], 500, thin=10)
# Look again, see that the sampler is not really converging very
# fast---the means don't seem to wander randomly, but rather wander in
# a directed fashion.
for k in range(48):
mu = mean(sampler.chain[:,:,k])
sigma = std(mean(sampler.chain[:,:,k], axis=0))
plot((mean(sampler.chain[:,:,k], axis=0)-mu)/sigma)
# Confirm that the autocorrelation length (acor) is very long.
sampler.acor
sampler.reset()
# Run for a very long time, recording a lot fewer samples (only every
# 100 steps of the chain).
result = sampler.run_mcmc(result[0], 5000, thin=100)
for k in range(48):
mu = mean(sampler.chain[:,:,k])
sigma = std(mean(sampler.chain[:,:,k], axis=0))
plot((mean(sampler.chain[:,:,k], axis=0)-mu)/sigma)
# Looking better....
sampler.acor
# Go just a bit further.
result = sampler.run_mcmc(result[0], 1000, thin=100)
sampler.acor
# And now things look pretty good.
for k in range(48):
mu = mean(sampler.chain[:,:,k])
sigma = std(mean(sampler.chain[:,:,k], axis=0))
plot((mean(sampler.chain[:,:,k], axis=0)-mu)/sigma)
# Make it possible to access the chain of samples using the column
# names
pchain = m.to_params(sampler.flatchain, nteachers)
# Plot the average mu_j and sigma_j from the chain (in blue), and then
# the true mu_j and sigma_j (in black).
errorbar(range(nteachers), mean(squeeze(pchain['mu_teacher']), axis=0), yerr=mean(squeeze(pchain['sigma_teacher']), axis=0), fmt='.b')
errorbar(range(nteachers), squeeze(p0['mu_teacher']), yerr=squeeze(p0['sigma_teacher']), fmt='.k')
axis(xmin=-1)
axis(xmax=nteachers)
xlabel(r'Teacher')
ylabel(r'$\mu_j \pm \sigma_j$')
savefig('teachers.pdf')
# You can get the plotutils package from
# http://github.com/farr/plotutils ; it does some nifty things for
# you, like automatically choose the histogram bin width according to
# the amount of data (more data = finer bin width, so you can resolve
# more features of the distribution).
import plotutils.plotutils as pu
# Now make a plot of the expected distribution of teacher means
xs = linspace(p0['mu'] - 3.0*p0['sigma'], p0['mu']+3.0*p0['sigma'], 1000)
# This is the true distribution: N(mu_j, sigma_j) with the true values
# of mu_j and sigma_j.
plot(xs, ss.norm.pdf(xs, loc=p0['mu'], scale=p0['sigma']), '-k')
# Now for each sample of mu_j and sigma_j in the chain, we'll compute
# the sample distribution N(mu_j, sigma_j), evaluate it at the xs, and
# average all those values together. (Make sure you understand how
# this is different from N(<mu_j>, <sigma_j>), which is the single
# distribution evaluated at the average mu_j and sigma_j. This sort
# of thing, by the way, is why people like Bayesian techniques that
# sample the posterior distribution---you don't have to just plot
# things like parameters; you can plot any *function* of the
# parameters you are interested in.
ys = zeros(xs.shape)
for mu, sigma in zip(pchain['mu'].flatten(), pchain['sigma'].flatten()):
ys += ss.norm.pdf(xs, loc=mu, scale=sigma)
ys /= 12000.0
# Here's the plot
plot(xs, ys, '-b')
xlabel(r'$\mu_j$')
ylablel(r'$p\left( \mu_j \right)$')
ylabel(r'$p\left( \mu_j \right)$')
savefig('muj.pdf')
# Now just plot some posterior distributions of the mu and sigma
# parameters.
pu.plot_histogram_posterior(pchain['mu'].flatten(), normed=True, histtype='step')
axis(ymax=1)
axis(ymax=0.2
)
axis(ymax=0.07)
axis(ymax=0.06)
axvline(p0['mu'])
axvline(p0['mu'], color='k')
xlabel(r'$\mu$')
ylabel(r'$p(\mu)$')
savefig('mu.pdf')
pu.plot_histogram_posterior(pchain['sigma'].flatten(), normed=True, histtype='step')
axis(ymax=0.07)
axis(ymax=0.1)
axis(ymax=0.09)
axvline(p0['sigma'])
axvline(p0['sigma'], color='k')
xlabel(r'$\sigma$')
ylabel(r'$p(\sigma)$')
savefig('sigma.pdf')