/
rnntests.py
185 lines (152 loc) · 7.35 KB
/
rnntests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# this is from https://m.reddit.com/r/MachineLearning/comments/3sok8k/tensorflow_basic_rnn_example_with_variable_length/
# just run an untrained random network
# a) basics
# b) test cwrnn
import argparse
import tensorflow as tf
from tensorflow.models.rnn import rnn, rnn_cell, seq2seq
from tensorflow.models.rnn.rnn_cell import BasicRNNCell, BasicLSTMCell, LSTMCell, CWRNNCell
import numpy as np
import matplotlib.pylab as pl
def get_seq_input_data(n_steps, batch_size, seq_width):
# seq_input_data = np.random.rand(n_steps, batch_size, seq_width).astype('float32')
seq_input_data = np.zeros((n_steps, batch_size, seq_width)).astype('float32')
seq_input_data[0, :, :] = 1.
seq_input_data[n_steps/2, :, :] = -1.
return seq_input_data
# class seqrnn():
# outputs, states = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm')
class srnn():
def __init__(self, args):
self.size = args.rnn_size
self.n_steps = args.n_steps
self.batch_size = args.batch_size
self.input_dim = args.input_dim
self.num_layers = args.num_layers
initializer = tf.random_uniform_initializer(-0.8,0.8)
# initializer = tf.zeros_initializer((size*2,1), dtype=tf.float32)
self.seq_input = tf.placeholder(tf.float32, [self.n_steps, self.batch_size, self.input_dim])
# sequence we will provide at runtime
self.early_stop = tf.placeholder(tf.int32)
# what timestep we want to stop at
self.inputs = [tf.reshape(i, (self.batch_size, self.input_dim)) for i in tf.split(0, self.n_steps, self.seq_input)]
# inputs for rnn needs to be a list, each item being a timestep.
# we need to split our input into each timestep, and reshape it because split keeps dims by default
# result = tf.placeholder(tf.float32, [n_steps, batch_size, seq_width])
self.result = tf.placeholder(tf.float32, [None, self.input_dim])
if args.cell_type == "srnn":
cell = BasicRNNCell(self.size)#, seq_width, initializer=initializer)
elif args.cell_type == "lstm":
cell = BasicLSTMCell(self.size, forget_bias = 1.0)
elif args.cell_type == "lstmp":
cell = LSTMCell(self.size, self.input_dim, initializer=initializer)
elif args.cell_type == "cw":
cell = CWRNNCell(self.size, [1, 4, 16, 64])#, seq_width, initializer=initializer)
self.cell = cell = rnn_cell.MultiRNNCell([cell] * self.num_layers)
# initial_state = cell.zero_state(batch_size, tf.float32)
self.initial_state = tf.random_uniform([self.batch_size, self.cell.state_size], -0.1, 0.1)
# self variables: scope RNN -> BasicRNNCell -> get_variable("Matrix", "Bias")
# network type
if args.rnn_type == "rnn":
self.outputs, self.states = rnn.rnn(self.cell, self.inputs,
initial_state = self.initial_state,
sequence_length = self.early_stop)
elif args.rnn_type == "seq2seq":
self.outputs, self.states = seq2seq.rnn_decoder(self.inputs,
self.initial_state,
self.cell,
loop_function=loop if False else None)
# set up lstm
self.final_state = self.states[-1]
self.W_o = tf.Variable(tf.random_normal([self.size,1], stddev=0.01))
self.b_o = tf.Variable(tf.random_normal([1], stddev=0.01))
print "type(outputs)", type(self.outputs)
self.output_cat = tf.reshape(tf.concat(1, self.outputs), [-1, self.size])
self.output = tf.nn.xw_plus_b(self.output_cat, self.W_o, self.b_o)
# self.final_state = states[-1]
self.output2 = tf.reshape(self.output, [self.batch_size, self.n_steps, self.input_dim])
self.output2 = self.output2 + tf.random_normal([self.batch_size, self.n_steps, self.input_dim], stddev=0.05)
# then transpose
self.output2 = tf.transpose(self.output2, [1, 0, 2])
def run(model, args):
if model == None:
return
iop = tf.initialize_all_variables()
# create initialize op, this needs to be run by the session!
session = tf.Session()
session.run(iop)
# actually initialize, if you don't do this you get errors about uninitialized stuff
seq_input_data = get_seq_input_data(args.n_steps, args.batch_size, args.input_dim)
# prev_state = session.run(cell.zero_state(batch_size, tf.float32))
prev_state = session.run(tf.random_uniform([args.batch_size, model.cell.state_size], -1., 1.))
# check vars
# with tf.variable_scope(""):
# Wr = tf.get_variable("RNN/BasicRNNCell/Matrix")
tvars = tf.trainable_variables()
# print "trainable variables", tvars
for tvar in tvars:
print tvar.name
print session.run(tvar)
# print "current scope", session.run(tf.get_variable_scope())
tf.get_variable_scope().reuse_variables()
Wr = tf.get_variable("RNN/MultiRNNCell/Cell0/BasicRNNCell/Linear/Matrix")
session.run(Wr.assign(Wr + 0.1))
print "Wr", session.run(Wr)
allouts = []
allstates = []
allhiddens = []
for i in range(3):
print "pstate", prev_state
feed = {model.early_stop: args.n_steps,
model.seq_input: seq_input_data,
model.initial_state: prev_state}
# feed = {early_stop:n_steps, seq_input: seq_input_data}
# define our feeds.
# early_stop can be varied, but seq_input needs to match the shape that was defined earlier
outs, fstate, hidden = session.run([model.output, model.final_state, model.output_cat], feed_dict=feed)
print "session return types", type(outs), type(fstate), type(hidden)
prev_state = fstate
print "fstate", fstate
allouts.append(outs)
allstates.append(fstate)
allhiddens.append(hidden)
# Wr_ = session.run(Wr)
# print Wr_
# run once
# output is a list, each item being a single timestep. Items at t>early_stop are all 0s
# print outs
print type(outs)
print len(outs)
print type(outs[0])
print outs[0].shape
print "allouts", len(allouts)
pl.subplot(411)
pl.plot(seq_input_data[:,0,:])
for i,out in enumerate(allouts):
print out.shape
pl.subplot(412)
pl.plot(out)
pl.subplot(413)
pl.plot(allstates[i])
pl.subplot(414)
print hidden[i].shape
pl.plot(allhiddens[i])
# pl.plot(outs)
pl.show()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--rnn_size", type=int, default=4)
parser.add_argument("--rnn_type", type=str, default="rnn", help="Either: rnn or seq2seq")
parser.add_argument("--cell_type", type=str, default="srnn", help="rnn, gru, lstm, lstmp, cw")
parser.add_argument("--batch_size", type=int, default=1)
parser.add_argument("--n_steps", type=int, default=200)
parser.add_argument("--input_dim", type=int, default=2)
parser.add_argument("--num_layers", type=int, default=1)
args = parser.parse_args()
np.random.seed(1)
# size = 16
# batch_size= 1 # 100
# n_steps = 200
# input_dim = 1
model = srnn(args)
run(model, args)