-
Notifications
You must be signed in to change notification settings - Fork 0
/
gru.py
134 lines (95 loc) · 3.83 KB
/
gru.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
"""
Implements a Vanilla RNN
This code was highly inspired by Mohammad Pezeshki
author : Eloi Zablocki
"""
import numpy as np
import theano
from theano import tensor
from blocks import initialization
from blocks.bricks.recurrent import GatedRecurrent
from blocks.bricks import Tanh, Linear, Sigmoid
from blocks.graph import ComputationGraph
from blocks.bricks.cost import SquaredError
from blocks.algorithms import GradientDescent, Scale, CompositeRule, StepClipping
from blocks.extensions import FinishAfter, Printing
from blocks.extensions.monitoring import TrainingDataMonitoring
from blocks.main_loop import MainLoop
from blocks.model import Model
from blocks.filter import VariableFilter
from blocks.roles import WEIGHT, BIAS
from fuel.datasets import IterableDataset
from fuel.streams import DataStream
from datasets import single_bouncing_ball, save_as_gif
floatX = theano.config.floatX
# Parameters
n_u = 225 # input vector size (not time at this point)
n_y = 225 # output vector size
n_h = 500 # numer of hidden units
iteration = 300 # number of epochs of gradient descent
print "Building Model"
# Symbolic variables
x = tensor.tensor3('x', dtype=floatX)
target = tensor.tensor3('target', dtype=floatX)
# Build the model
linear = Linear(input_dim = n_u, output_dim = n_h, name="first_layer")
rnn = GatedRecurrent(dim=n_h, activation=Tanh())
linear2 = Linear(input_dim = n_h, output_dim = n_y, name="output_layer")
sigm = Sigmoid()
x_transform = linear.apply(x)
h = rnn.apply(x_transform)
predict = sigm.apply(linear2.apply(h))
# only for generation B x h_dim
h_initial = tensor.tensor3('h_initial', dtype=floatX)
h_testing = rnn.apply(x_transform, h_initial, iterate=False)
y_hat_testing = linear2.apply(h_testing)
y_hat_testing = sigm.apply(y_hat_testing)
y_hat_testing.name = 'y_hat_testing'
# Cost function
cost = SquaredError().apply(predict,target)
# Initialization
for brick in (rnn, linear, linear2):
brick.weights_init = initialization.IsotropicGaussian(0.01)
brick.biases_init = initialization.Constant(0)
brick.initialize()
cg = ComputationGraph(cost)
print(VariableFilter(roles=[WEIGHT, BIAS])(cg.variables))
# Training process
algorithm = GradientDescent(cost=cost, params=cg.parameters, step_rule=CompositeRule([StepClipping(10.0),Scale(4)]))
monitor_cost = TrainingDataMonitoring([cost], prefix="train", after_epoch=True)
print "Model built"
############
# TEST
############
# Build input and output
inputs = single_bouncing_ball(10,10,200,15,2)
outputs = np.zeros(inputs.shape, dtype = floatX)
outputs[:, 0:-1, :, :] = inputs[:, 1:, :, :]
print inputs.dtype
print outputs.dtype
print 'Bulding DataStream ...'
dataset = IterableDataset({'x': inputs, 'target': outputs})
stream = DataStream(dataset)
model = Model(cost)
main_loop = MainLoop(data_stream=stream, algorithm=algorithm, extensions=[monitor_cost, FinishAfter(after_n_epochs=iteration), Printing()], model=model)
print 'Starting training ...'
main_loop.run()
generate1 = theano.function([x], [predict, h])
generate2 = theano.function([x, h_initial], [y_hat_testing, h_testing])
initial_seq = inputs[0, :20, 0:1, :]
current_output, current_hidden = generate1(initial_seq)
current_output, current_hidden = current_output[-1:], current_hidden[-1:]
generated_seq = initial_seq[:, 0]
next_input = current_output
prev_state = current_hidden
print np.shape(next_input)
print np.shape(prev_state)
for i in range(200):
current_output, current_hidden = generate2(next_input, prev_state)
next_input = current_output
prev_state = current_hidden
generated_seq = np.vstack((generated_seq, current_output[:, 0]))
print generated_seq.shape
save_as_gif(generated_seq.reshape(generated_seq.shape[0],
np.sqrt(generated_seq.shape[1]),
np.sqrt(generated_seq.shape[1])), "results/gru.gif")