forked from timestocome/DeepLearning
/
HybridMonteCarlo.py
310 lines (230 loc) · 10.6 KB
/
HybridMonteCarlo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
#!/python
import numpy as np
from theano import function, shared
from theano import tensor as TT
import theano
# adapted from original source code
# http://deeplearning.net/tutorial/
# Models physical kinetic and potential energy in a system of particles
# obtain new velocity from a Gaussian distribution
# perform n leapfrog steps to get to new state for X
# accept or reject new X
##################################################################################
# if possible run on GPU rather than CPU
##################################################################################
GPU = True
if GPU:
print ("Trying to run under a GPU. ")
try: theano.config.device = 'gpu'
except: pass # it's already set
theano.config.floatX = 'float32'
else:
print ("Running with a CPU. ")
###################################################################################
#
###################################################################################
sharedX = (lambda X, name:shared(np.asarray(X, dtype=theano.config.floatX), name=name))
# return vector of kinetic energy for given velocity
def kinetic_energy(vel):
return 0.5 * (vel ** 2).sum(axis=1)
# return sum of potential and kinetic energy for a given velocity and position
# mass is set to 1
def hamiltonian(pos, vel, energy_fn):
return energy_fn(pos) + kinetic_energy(vel)
# does this look like a reasonable approximation
def metropolis_hastings_accept(energy_prev, energy_next, s_rng):
ediff = energy_prev - energy_next
return (TT.exp(ediff) - s_rng.uniform(size=energy_prev.shape)) >= 0
# return position, velocity after n steps
def simulate_dynamics(initial_pos, initial_vel, stepsize, n_steps, energy_fn):
# using scan perform one step
# returns matrix of new positions, velocities, and dictionary of updates
def leapfrog(pos, vel, step):
# from pos(t) and vel(t-stepsize//2), compute vel(t+stepsize//2)
dE_dpos = TT.grad(energy_fn(pos).sum(), pos)
new_vel = vel - step * dE_dpos
# from vel(t+stepsize//2) compute pos(t+stepsize)
new_pos = pos + step * new_vel
return [new_pos, new_vel], {}
# compute velocity at time-step: t + stepsize//2
initial_energy = energy_fn(initial_pos)
dE_dpos = TT.grad(initial_energy.sum(), initial_pos)
vel_half_step = initial_vel - 0.5 * stepsize * dE_dpos
# compute position at time-step: t + stepsize
pos_full_step = initial_pos + stepsize * vel_half_step
# perform leapfrog updates: the scan op is used to repeatedly compute
# vel(t + (m-1/2)*stepsize) and pos(t + m*stepsize) for m in [2,n_steps].
(all_pos, all_vel), scan_updates = theano.scan(
leapfrog,
outputs_info=[
dict(initial=pos_full_step),
dict(initial=vel_half_step),
],
non_sequences=[stepsize],
n_steps=n_steps - 1)
final_pos = all_pos[-1]
final_vel = all_vel[-1]
# NOTE: Scan always returns an updates dictionary, in case the
# scanned function draws samples from a RandomStream. These
# updates must then be used when compiling the Theano function, to
# avoid drawing the same random numbers each time the function is
# called. In this case however, we consciously ignore
# "scan_updates" because we know it is empty.
assert not scan_updates
# The last velocity returned by scan is vel(t +
# (n_steps - 1 / 2) * stepsize) We therefore perform one more half-step
# to return vel(t + n_steps * stepsize)
energy = energy_fn(final_pos)
final_vel = final_vel - 0.5 * stepsize * TT.grad(energy.sum(), final_pos)
# return new proposal state
return final_pos, final_vel
# perform one step of MonteCarlo
# grab a random velocity from Guassian
# perform n steps as leap frog updates
# accept or reject new state
# return new position
def hmc_move(s_rng, positions, energy_fn, stepsize, n_steps):
# sample random velocity
initial_vel = s_rng.normal(size=positions.shape)
# perform simulation of particles subject to Hamiltonian dynamics
final_pos, final_vel = simulate_dynamics(
initial_pos=positions,
initial_vel=initial_vel,
stepsize=stepsize,
n_steps=n_steps,
energy_fn=energy_fn
)
# accept/reject the proposed move based on the joint distribution
accept = metropolis_hastings_accept(
energy_prev=hamiltonian(positions, initial_vel, energy_fn),
energy_next=hamiltonian(final_pos, final_vel, energy_fn),
s_rng=s_rng
)
return accept, final_pos
# update simulation after so many steps
# return dictionary of updates
def hmc_updates(positions, stepsize, avg_acceptance_rate, final_pos, accept,
target_acceptance_rate, stepsize_inc, stepsize_dec,
stepsize_min, stepsize_max, avg_acceptance_slowness):
# broadcast `accept` scalar to tensor with the same dimensions as final_pos.
accept_matrix = accept.dimshuffle(0, *(('x',) * (final_pos.ndim - 1)))
# if accept is True, update to `final_pos` else stay put
new_positions = TT.switch(accept_matrix, final_pos, positions)
## STEPSIZE UPDATES ##
# if acceptance rate is too low, our sampler is too "noisy" and we reduce
# the stepsize. If it is too high, our sampler is too conservative, we can
# get away with a larger stepsize (resulting in better mixing).
_new_stepsize = TT.switch(avg_acceptance_rate > target_acceptance_rate,
stepsize * stepsize_inc, stepsize * stepsize_dec)
# maintain stepsize in [stepsize_min, stepsize_max]
new_stepsize = TT.clip(_new_stepsize, stepsize_min, stepsize_max)
# perform exponential moving average
mean_dtype = theano.scalar.upcast(accept.dtype, avg_acceptance_rate.dtype)
new_acceptance_rate = TT.add(
avg_acceptance_slowness * avg_acceptance_rate,
(1.0 - avg_acceptance_slowness) * accept.mean(dtype=mean_dtype))
return [(positions, new_positions), (stepsize, new_stepsize), (avg_acceptance_rate, new_acceptance_rate)]
# create Theano graph for Hybrid Monte Carlo
class HMC_sampler(object):
def __init__(self, **kwargs):
self.__dict__.update(kwargs)
@classmethod
def new_from_shared_positions(
cls,
shared_positions,
energy_fn,
initial_stepsize=0.01,
target_acceptance_rate=.9,
n_steps=20,
stepsize_dec=0.98,
stepsize_min=0.001,
stepsize_max=0.25,
stepsize_inc=1.02,
# used in geometric avg. 1.0 would be not moving at all
avg_acceptance_slowness=0.9,
seed=12345
):
# allocate shared variables
stepsize = sharedX(initial_stepsize, 'hmc_stepsize')
avg_acceptance_rate = sharedX(target_acceptance_rate, 'avg_acceptance_rate')
s_rng = TT.shared_randomstreams.RandomStreams(seed)
# define graph for an `n_steps` HMC simulation
accept, final_pos = hmc_move(
s_rng,
shared_positions,
energy_fn,
stepsize,
n_steps)
# define the dictionary of updates, to apply on every `simulate` call
simulate_updates = hmc_updates(
shared_positions,
stepsize,
avg_acceptance_rate,
final_pos=final_pos,
accept=accept,
stepsize_min=stepsize_min,
stepsize_max=stepsize_max,
stepsize_inc=stepsize_inc,
stepsize_dec=stepsize_dec,
target_acceptance_rate=target_acceptance_rate,
avg_acceptance_slowness=avg_acceptance_slowness)
# compile theano function
simulate = function([], [], updates=simulate_updates)
# create HMC_sampler object with the following attributes ...
return cls(
positions=shared_positions,
stepsize=stepsize,
stepsize_min=stepsize_min,
stepsize_max=stepsize_max,
avg_acceptance_rate=avg_acceptance_rate,
target_acceptance_rate=target_acceptance_rate,
s_rng=s_rng,
_updates=simulate_updates,
simulate=simulate)
# returns new position after n steps of simulation
def draw(self, **kwargs):
self.simulate()
return self.positions.get_value(borrow=False)
#########################################################################################
def sampler_on_nd_gaussian(sampler_cls, burnin, n_samples, dim=10):
batchsize = 3
rng = np.random.RandomState(42)
# Define a covariance and mu for a gaussian
mu = np.array(rng.rand(dim) * 10, dtype=theano.config.floatX)
cov = np.array(rng.rand(dim, dim), dtype=theano.config.floatX)
cov = (cov + cov.T) / 2.
cov[np.arange(dim), np.arange(dim)] = 1.0
cov_inv = np.linalg.inv(cov)
# Define energy function for a multi-variate Gaussian
def gaussian_energy(x):
return 0.5 * (theano.tensor.dot((x - mu), cov_inv) * (x - mu)).sum(axis=1)
# Declared shared random variable for positions
position = rng.randn(batchsize, dim).astype(theano.config.floatX)
position = theano.shared(position)
# Create HMC sampler
sampler = sampler_cls(position, gaussian_energy, initial_stepsize=1e-3, stepsize_max=0.5)
# Start with a burn-in process
garbage = [sampler.draw() for r in range(burnin)] # burn-in Draw
# `n_samples`: result is a 3D tensor of dim [n_samples, batchsize, dim]
_samples = np.asarray([sampler.draw() for r in range(n_samples)])
# Flatten to [n_samples * batchsize, dim]
samples = _samples.T.reshape(dim, -1).T
print('****** TARGET VALUES ******')
print('target mean:', mu)
print('target cov:\n', cov)
print('****** EMPIRICAL MEAN/COV USING HMC ******')
print('empirical mean: ', samples.mean(axis=0))
print('empirical_cov:\n', np.cov(samples.T))
print('****** HMC INTERNALS ******')
print('final stepsize', sampler.stepsize.get_value())
print('final acceptance_rate', sampler.avg_acceptance_rate.get_value())
return sampler
def test_hmc():
sampler = sampler_on_nd_gaussian(HMC_sampler.new_from_shared_positions,
burnin=1000, n_samples=1000, dim=5)
assert abs(sampler.avg_acceptance_rate.get_value() -
sampler.target_acceptance_rate) < .1
assert sampler.stepsize.get_value() >= sampler.stepsize_min
assert sampler.stepsize.get_value() <= sampler.stepsize_max
#################################################################################################
test_hmc()