forked from kratarth1203/NeuralNet
-
Notifications
You must be signed in to change notification settings - Fork 0
/
rnndbn.py
453 lines (385 loc) · 17.8 KB
/
rnndbn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
# Author: Kratarth Goel
# BITS Pilani Goa Campus (2014)
# RNN-DBN for polyphonic music generation
# for any further clarifications visit
# for the ICANN 2014 paper or email me @ kratarthgoel@gmail.com
# This code is based on the one writen by Nicolas Boulanger-Lewandowski
# University of Montreal (2012)
# RNN-RBM deep learning tutorial
# More information at http://deeplearning.net/tutorial/rnnrbm.html
import os
import sys
import numpy as np
import zipfile
try:
import urllib.request as urllib # for backwards compatibility
except ImportError:
import urllib2 as urllib
try:
from midi.utils import midiread, midiwrite
except ImportError:
raise ImportError("Need GPL licensed midi utils",
"Can be downloaded by doing the following in the script dir",
"wget http://www.iro.umontreal.ca/~lisa/deep/midi.zip; unzip midi.zip")
import theano
from theano import tensor
from theano.tensor.shared_randomstreams import RandomStreams
import cPickle as pickle
#Don't use python long as this doesn't work on 32 bit computers.
np.random.seed(0xbeef)
rng = RandomStreams(seed=np.random.randint(1 << 30))
theano.config.warn.subtensor_merge_bug = False
def download(url, server_fname, local_fname=None, progress_update_percentage=5,
bypass_certificate_check=False):
"""
An internet download utility modified from
http://stackoverflow.com/questions/22676/
how-do-i-download-a-file-over-http-using-python/22776#22776
"""
if bypass_certificate_check:
import ssl
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
u = urllib.urlopen(url, context=ctx)
else:
u = urllib.urlopen(url)
if local_fname is None:
local_fname = server_fname
full_path = local_fname
meta = u.info()
with open(full_path, 'wb') as f:
try:
file_size = int(meta.get("Content-Length"))
except TypeError:
print("WARNING: Cannot get file size, displaying bytes instead!")
file_size = 100
print("Downloading: %s Bytes: %s" % (server_fname, file_size))
file_size_dl = 0
block_sz = int(1E7)
p = 0
while True:
buffer = u.read(block_sz)
if not buffer:
break
file_size_dl += len(buffer)
f.write(buffer)
if (file_size_dl * 100. / file_size) > p:
status = r"%10d [%3.2f%%]" % (file_size_dl, file_size_dl *
100. / file_size)
print(status)
p += progress_update_percentage
def fetch_nottingham():
url = "http://www.iro.umontreal.ca/~lisa/deep/data/Nottingham.zip"
data_path = "Nottingham.zip"
if not os.path.exists(data_path):
download(url, data_path)
key_range = (21, 109)
dt = 0.3
all_data = []
with zipfile.ZipFile(data_path, "r") as f:
for name in f.namelist():
if ".mid" not in name:
# Skip README
continue
data = midiread(f, key_range, dt).piano_roll.astype(
theano.config.floatX)
all_data.extend(data)
return key_range, dt, all_data
def build_rbm(v, W, bv, bh, k):
'''Construct a k-step Gibbs chain starting at v for an RBM.
v : Theano vector or matrix
If a matrix, multiple chains will be run in parallel (batch).
W : Theano matrix
Weight matrix of the RBM.
bv : Theano vector
Visible bias vector of the RBM.
bh : Theano vector
Hidden bias vector of the RBM.
k : scalar or Theano scalar
Length of the Gibbs chain.
Return a (v_sample, cost, monitor, updates) tuple:
v_sample : Theano vector or matrix with the same shape as `v`
Corresponds to the generated sample(s).
cost : Theano scalar
Expression whose gradient with respect to W, bv, bh is the CD-k approximation
to the log-likelihood of `v` (training example) under the RBM.
The cost is averaged in the batch case.
monitor: Theano scalar
Pseudo log-likelihood (also averaged in the batch case).
updates: dictionary of Theano variable -> Theano variable
The `updates` object returned by scan.'''
def gibbs_step(v):
mean_h = tensor.nnet.sigmoid(tensor.dot(v, W) + bh)
h = rng.binomial(size=mean_h.shape, n=1, p=mean_h,
dtype=theano.config.floatX)
mean_v = tensor.nnet.sigmoid(tensor.dot(h, W.T) + bv)
v = rng.binomial(size=mean_v.shape, n=1, p=mean_v,
dtype=theano.config.floatX)
return mean_v, v
chain, updates = theano.scan(lambda v: gibbs_step(v)[1], outputs_info=[v],
n_steps=k)
v_sample = chain[-1]
mean_v = gibbs_step(v_sample)[0]
monitor = tensor.xlogx.xlogy0(v, mean_v) + tensor.xlogx.xlogy0(1 - v, 1 - mean_v)
monitor = monitor.sum() / v.shape[0]
def free_energy(v):
return -(v * bv).sum() - tensor.log(1 + tensor.exp(tensor.dot(v, W) + bh)).sum()
cost = (free_energy(v) - free_energy(v_sample)) / v.shape[0]
return v_sample, cost, monitor, updates
def shared_normal(num_rows, num_cols, scale=1):
'''Initialize a matrix shared variable with normally distributed
elements.'''
return theano.shared(np.random.normal(
scale=scale, size=(num_rows, num_cols)).astype(theano.config.floatX))
def shared_zeros(*shape):
'''Initialize a vector shared variable with zero elements.'''
return theano.shared(np.zeros(shape, dtype=theano.config.floatX))
def build_rnnrbm(n_visible, n_hidden, n_hidden_recurrent):
'''Construct a symbolic RNN-RBM and initialize parameters.
n_visible : integer
Number of visible units.
n_hidden : integer
Number of hidden units of the conditional RBMs.
n_hidden_recurrent : integer
Number of hidden units of the RNN.
Return a (v, v_sample, cost1, monitor1, params1, updates_train1,cost2, monitor2, params2, updates_train2, v_t,
updates_generate) tuple:
v : Theano matrix
Symbolic variable holding an input sequence (used during training)
v_sample : Theano matrix
Symbolic variable holding the negative particles for CD log-likelihood
gradient estimation (used during training)
cost1(2) : Theano scalar
Expression whose gradient (considering v_sample constant) corresponds to the
LL gradient of the RNN-RBM1(2) i.e. the visible layer and the first hidden layer of the DBN
(used during training)
monitor1(2) : Theano scalar
Frame-level pseudo-likelihood (useful for monitoring during training) for RNN_RBM1(2)
params1(2) : tuple of Theano shared variables
The parameters of the RNN-RBM1(2) model to be optimized during training.
updates_train1(2) : dictionary of Theano variable -> Theano variable
Update object that should be passed to theano.function when compiling the
training function for the RNN-RBM1(2).
v_t : Theano matrix
Symbolic variable holding a generated sequence (used during sampling)
updates_generate : dictionary of Theano variable -> Theano variable
Update object that should be passed to theano.function when compiling the
generation function.'''
W1 = shared_normal(n_visible, n_hidden, 0.01)
bv = shared_zeros(n_visible)
bh1 = shared_zeros(n_hidden)
Wuh1 = shared_normal(n_hidden_recurrent, n_hidden, 0.0001)
Wuv = shared_normal(n_hidden_recurrent, n_visible, 0.0001)
Wvu = shared_normal(n_visible, n_hidden_recurrent, 0.0001)
Wuu = shared_normal(n_hidden_recurrent, n_hidden_recurrent, 0.0001)
bu = shared_zeros(n_hidden_recurrent)
params1 = W1, bv, bh1, Wuh1, Wuv, Wvu, Wuu, bu # learned parameters as shared
# variables for RNN_RBM1
W2 = shared_normal(n_hidden, n_hidden, 0.01)
bh2 = shared_zeros(n_hidden)
Wuh2 = shared_normal(n_hidden_recurrent, n_hidden, 0.0001)
params2 = W2, bh2, bh1, Wuh2, Wuh1 # learned parameters as shared
# variables for RNN-RBM2
v = tensor.matrix() # a training sequence
lin_output = tensor.dot(v, W1) + bh1
activation = theano.tensor.nnet.sigmoid
h = activation(lin_output)
u0 = tensor.zeros((n_hidden_recurrent,)) # initial value for the RNN hidden
# units
# deterministic recurrence to compute the variable
# biases bv_t , bh1_t at each time step.
def recurrence1(v_t, u_tm1):
bv_t = bv + tensor.dot(u_tm1, Wuv)
bh1_t = bh1 + tensor.dot(u_tm1, Wuh1)
u_t = tensor.tanh(bu + tensor.dot(v_t, Wvu) + tensor.dot(u_tm1, Wuu))
return [u_t, bv_t, bh1_t]
# If `h_t` is given, deterministic recurrence to compute the variable
# biases bh1_t, bh2_t at each time step. If `h_t` is None, same recurrence
# but with a separate Gibbs chain at each time step to sample (generate)
# of the top layer RBM from the RNN-DBN. The resulting sample v_t is returned
# in order to be passed down to the sequence history.
def recurrence2(v_t,h_t, u_tm1):
bh1_t = bh1 + tensor.dot(u_tm1, Wuh1)
bh2_t = bh2 + tensor.dot(u_tm1, Wuh2)
generate = h_t is None
if generate:
h_t, _, _, updates = build_rbm(tensor.zeros((n_hidden,)), W2, bh1_t,
bh2_t, k=25)
u_t = tensor.tanh(bu + tensor.dot(v_t, Wvu) + tensor.dot(u_tm1, Wuu))
return ([u_t, h_t], updates) if generate else [u_t, bh1_t, bh2_t]
# function used for generation of a sample from the RNN_DBN.
# Starting with the sampling if the first hidden layer by
# Gibbs Sampling in the top layer RBM of the RNN_DBN, which involves
# generation of the RBM parameters that depend upon the RNN.
# This is followed by generation of the visible layer sample.
def generate(u_tm1):
bh1_t = bh1 + tensor.dot(u_tm1, Wuh1)
bh2_t = bh2 + tensor.dot(u_tm1, Wuh2)
h_t, _, _, updates = build_rbm(tensor.zeros((n_hidden,)), W2, bh1_t,
bh2_t, k=25)
lin_v_t = tensor.dot(h_t, W1.T) + bv
mean_v = activation(lin_v_t)
v_t = rng.binomial(size=mean_v.shape, n=1, p=mean_v,
dtype=theano.config.floatX)
u_t = tensor.tanh(bu + tensor.dot(v_t, Wvu) + tensor.dot(u_tm1, Wuu))
return ([u_t,v_t], updates)
# For training, the deterministic recurrence is used to compute all the
# {bv_t, bh_t, 1 <= t <= T} given v. Conditional RBMs can then be trained
# in batches using those parameters.
(u_t, bv_t, bh1_t), updates_train1 = theano.scan(
lambda v_t, u_tm1, *_: recurrence1(v_t, u_tm1),
sequences=v, outputs_info=[u0, None, None], non_sequences=params1)
v_sample, cost1, monitor1, updates_rbm1 = build_rbm(v, W1, bv_t[:], bh1_t[:],
k=15)
updates_train1.update(updates_rbm1)
(u_t, bh1_t, bh2_t), updates_train2 = theano.scan(
lambda v_t, h_t, u_tm1, *_: recurrence2(v_t , h_t, u_tm1),
sequences=[v,h], outputs_info=[u0, None, None], non_sequences=params2)
h1_sample, cost2, monitor2, updates_rbm2 = build_rbm(h, W2, bh1_t[:], bh2_t[:],
k=15)
updates_train2.update(updates_rbm2)
# symbolic loop for sequence generation
(u_t,v_t), updates_generate = theano.scan(
lambda u_tm1,*_ : generate(u_tm1), outputs_info = [u0,None],
non_sequences = params2, n_steps=200)
return (v, v_sample, cost1, monitor1, params1, updates_train1, h, h1_sample,cost2, monitor2, params2, updates_train2, v_t,
updates_generate)
'''
return (v, v_sample, cost1, monitor1, params1, updates_train1, v_t,
updates_generate)
'''
class RnnRbm:
'''Simple class to train an RNN-RBM from MIDI files and to generate sample
sequences.'''
def __init__(self, n_hidden=150, n_hidden_recurrent=100, lr=0.001,
r=(21, 109), dt=0.3):
'''Constructs and compiles Theano functions for training and sequence
generation.
n_hidden : integer
Number of hidden units of the conditional RBMs.
n_hidden_recurrent : integer
Number of hidden units of the RNN.
lr : float
Learning rate
r : (integer, integer) tuple
Specifies the pitch range of the piano-roll in MIDI note numbers, including
r[0] but not r[1], such that r[1]-r[0] is the number of visible units of the
RBM at a given time step. The default (21, 109) corresponds to the full range
of piano (88 notes).
dt : float
Sampling period when converting the MIDI files into piano-rolls, or
equivalently the time difference between consecutive time steps.'''
self.r = r
self.dt = dt
(v, v_sample, cost1, monitor1, params1, updates_train1, h, h1_sample , cost2, monitor2, params2, updates_train2, v_t,
updates_generate) = build_rnnrbm(r[1] - r[0], n_hidden,
n_hidden_recurrent)
'''
(v, v_sample, cost1, monitor1, params1, updates_train1,v_t,
updates_generate) = build_rnnrbm(r[1] - r[0], n_hidden,
n_hidden_recurrent)
'''
gradient1 = tensor.grad(cost1, params1, consider_constant=[v_sample])
updates_train1.update(((p, p - lr * g) for p, g in zip(params1,
gradient1)))
gradient2 = tensor.grad(cost2, params2, consider_constant=[h1_sample])
updates_train2.update(((p, p - lr * g) for p, g in zip(params2,
gradient2)))
self.train_function1 = theano.function([v], monitor1,
updates=updates_train1)
self.train_function2 = theano.function([v], monitor2,
updates=updates_train2)
self.generate_function = theano.function([], v_t,
updates=updates_generate)
def train_RNNRBM1(self, dataset, batch_size=100, num_epochs=200):
'''Train the RNN-RBM via stochastic gradient descent (SGD) using MIDI
files converted to piano-rolls.
files : list of strings
List of MIDI files that will be loaded as piano-rolls for training.
batch_size : integer
Training sequences will be split into subsequences of at most this size
before applying the SGD updates.
num_epochs : integer
Number of epochs (pass over the training set) performed. The user can
safely interrupt training with Ctrl+C at any time.'''
try:
for epoch in xrange(num_epochs):
np.random.shuffle(dataset)
costs1 = []
for s, sequence in enumerate(dataset):
for i in xrange(0, len(sequence), batch_size):
cost1 = self.train_function1(sequence[i:i + batch_size])
costs1.append(cost1)
print('Epoch %i/%i' % (epoch + 1, num_epochs))
print(np.mean(costs1))
sys.stdout.flush()
except KeyboardInterrupt:
print('Interrupted by user.')
def train_RNNRBM2(self, dataset, batch_size=100, num_epochs=200):
'''Train the RNN-RBM via stochastic gradient descent (SGD) using MIDI
files converted to piano-rolls.
files : list of strings
List of MIDI files that will be loaded as piano-rolls for training.
batch_size : integer
Training sequences will be split into subsequences of at most this size
before applying the SGD updates.
num_epochs : integer
Number of epochs (pass over the training set) performed. The user can
safely interrupt training with Ctrl+C at any time.'''
try:
for epoch in xrange(num_epochs):
np.random.shuffle(dataset)
costs2 = []
for s, sequence in enumerate(dataset):
for i in xrange(0, len(sequence), batch_size):
cost2 = self.train_function2(sequence[i:i + batch_size])
costs2.append(cost2)
print('For 2nd layer Epoch %i/%i' % (epoch + 1, num_epochs))
print(np.mean(costs2))
sys.stdout.flush()
except KeyboardInterrupt:
print('Interrupted by user.')
def generate(self):
'''Generate a sample sequence, plot the resulting piano-roll and save
it as a MIDI file.
filename : string
A MIDI file will be created at this location.
show : boolean
If True, a piano-roll of the generated sequence will be shown.'''
piano_roll = self.generate_function()
return piano_roll
def train_rnnrbm(model, dataset, batch_size=100, num_epochs=200):
model.train_RNNRBM1(dataset,
batch_size=batch_size, num_epochs=num_epochs)
model.train_RNNRBM2(dataset,
batch_size=batch_size, num_epochs=num_epochs)
return model
if __name__ == '__main__':
key_range, dt, dataset = fetch_nottingham()
# coarse model persistance
if not os.path.exists('saved_rnndbn.pkl'):
model = RnnRbm()
model = train_rnnrbm(model, dataset)
cur = sys.getrecursionlimit()
sys.setrecursionlimit(40000)
with open('saved_rnndbn.pkl', mode='w') as f:
pickle.dump(model, f, -1)
sys.setrecursionlimit(cur)
with open('saved_rnndbn.pkl', mode='r') as f:
model = pickle.load(f)
import matplotlib
matplotlib.use('Agg')
sample_files = ["sample1.mid", "sample2.mid"]
for s in sample_files:
piano_roll = model.generate()
midiwrite(s, piano_roll, key_range, dt)
extent = (0, dt * len(piano_roll)) + key_range
import matplotlib.pyplot as plt
plt.imshow(piano_roll.T, origin='lower', aspect='auto',
interpolation='nearest', cmap="gray_r",
extent=extent)
plt.xlabel('time (s)')
plt.ylabel('MIDI note number')
plt.title('generated piano-roll')
plt.savefig(s[:-4] + '.png')