forked from RobRomijnders/LSTM_cpd
/
LSTM_cpd_main_bidirec.py
268 lines (225 loc) · 9.97 KB
/
LSTM_cpd_main_bidirec.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
# -*- coding: utf-8 -*-
"""
Change Point Detection with dummy dataset
This projects amounts as much to change point detection as to pattern localization.
In generate_data() we generate either one signal or a combintion of many signals.
In the second case, there's also an option to introduce a pettern at the changepoint
The LSTM will target a label, being 0 for the base class and 1 for the target class
Made by Rob Romijnders
romijndersrob@gmail.com
Made on June 3 2016
Convention dor the data formats
X is a matrix in R^{N x D}
y is a matrix in R^{N,} not to donfuse with {N,1}
@author: rob
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.python.framework import ops
from tensorflow.python.ops import clip_ops
from scipy import signal
from tensorflow.models.rnn import seq2seq
from tensorflow.models.rnn import rnn, rnn_cell
"""Hyperparameters"""
hidden_size = 60 #hidden size of the LSTM
output_size = 2 # For later expansion
batch_size = 64 # batch_size
seq_len = 50 # How long do you want the vectors with integers to add be?
drop_out = 0.8 # Drop out
num_layers = 2 # Number of RNN layers
max_iterations=500 # Number of iterations to train with
plot_every = 100 # How often you want terminal output?
ratio = 0.8 # Ratio for train val split
lr_rate = 0.005 # learning rate
N = 10000 #How many point you want to generate?
#Filter characteristics
fs = 500.0 #sample frequency
cutoff1 = 100.0
cutoff0 = 200.0
# Create a pattern for the transition between one signal and the other
# Note that also without this pattern, we achieve good performance
pattern = np.array([0.0, 0.5, 1.0, 0.8, 0.6, 0.4, 0.2, -0.2, -0.3, -0.4, -0.6, -0.8, -1.0, -0.5, 0.0])
pattern_len = len(pattern)
def generate_noise(D,N):
"""Generate data for the changepoint detection. Data can either be of type 0
or type 1, but when it's a combination fo both, we define a target label
Input
- D,N Dimenstionality arguments D dimensions over N samples
Output
- Data in format
X is a matrix in R^{N x D}
y is a matrix in R^{N,} not to donfuse with {N,1}"""
#Check if we have even D, so we can split the array in future
assert D%2 == 0, 'We need even number of dimensions'
ratioP = 0.5 #balance of targets
X = np.random.randn(N,D)
y = np.zeros(N)
mark = np.zeros(N)
#Generate two filter cofficients
filters = {}
filters['b1'],filters['a1'] = signal.butter(4,2.0*cutoff1/fs,btype='lowpass')
filters['b0'],filters['a0'] = signal.butter(4,2.0*cutoff0/fs,btype='lowpass')
for i in xrange(N):
if np.random.rand() > 0.5: #Half of the samples will have changepoint, other half wont
Dcut = np.random.randint(pattern_len,D-pattern_len)
signalA = signal.filtfilt(filters['b1'],filters['a1'],X[i])
signalB = signal.filtfilt(filters['b0'],filters['a0'],X[i])
X[i] = np.concatenate((signalA[:Dcut],signalB[Dcut:]),axis=0) #Concatenate the two signals
if True: #Boolean: do you want to introduce a pattern at the changepoint?
Dstart = int(Dcut - pattern_len/2)
X[i,Dstart:Dstart+pattern_len] = pattern
y[i] = 1 #The target label
mark[i] = Dcut
else:
mode = int(np.random.rand()>ratioP)
X[i] = signal.filtfilt(filters['b'+str(mode)],filters['a'+str(mode)],X[i])
y[i] = 0 #The target label
return X,y,mark
"""Set up the data"""
#We speak of
#X_, which are the time series
#y_, which ar the labels, being a change point or not
#mark, which is the location of the (possibly) chane point
data,labels,mark = generate_noise(seq_len,N)
#Shuffle the data
ind_cut = int(ratio*N)
ind = np.random.permutation(N)
X_train = data[:ind_cut]
X_val = data[ind_cut:]
y_train = labels[:ind_cut]
y_val = labels[ind_cut:]
mark_train = mark[:ind_cut]
mark_val = mark[ind_cut:]
N,D = X_train.shape
Nval = X_val.shape[0]
data = None #we don;t need to store this big matrix anymore
"""Plot some Data"""
Nplot = 5 #Numer of rows for which you want a plot
count_pos = 0
count_neg = 0
f, axarr = plt.subplots(Nplot, 2)
while count_pos < Nplot or count_neg < Nplot:
ind = np.random.randint(N)
if y_train[ind] == 0 and count_neg < Nplot:
axarr[count_neg,0].plot(X_train[ind])
count_neg += 1
elif y_train[ind] == 1 and count_pos < Nplot:
axarr[count_pos,1].plot(X_train[ind])
count_pos += 1
plt.show()
with tf.name_scope("Placeholders") as scope:
#The place holders for the model
inputs = [tf.placeholder(tf.float32,shape=[batch_size,1]) for _ in range(seq_len)]
target = tf.placeholder(tf.int64, shape=[batch_size], name = 'Target')
keep_prob = tf.placeholder("float", name = 'Drop_Out_keep_probability')
marks = tf.placeholder(tf.int64, shape = [batch_size], name = 'Marks')
with tf.name_scope("Cell_fw") as scope:
#Define one cell, stack the cell to obtain many layers of cell and wrap a DropOut
cell_fw = rnn_cell.BasicLSTMCell(hidden_size)
cell_fw = rnn_cell.MultiRNNCell([cell_fw] * num_layers)
cell_fw = rnn_cell.DropoutWrapper(cell_fw,output_keep_prob=keep_prob)
initial_state_fw = cell_fw.zero_state(batch_size, tf.float32)
with tf.name_scope("Cell_bw") as scope:
#Define one cell, stack the cell to obtain many layers of cell and wrap a DropOut
cell_bw = rnn_cell.BasicLSTMCell(hidden_size)
cell_bw = rnn_cell.MultiRNNCell([cell_bw] * num_layers)
cell_bw = rnn_cell.DropoutWrapper(cell_bw,output_keep_prob=keep_prob)
initial_state_bw = cell_bw.zero_state(batch_size, tf.float32)
with tf.name_scope("RNN") as scope:
# Thanks to Tensorflow, the entire decoder is just one line of code:
#outputs, states = seq2seq.rnn_decoder(inputs, initial_state, cell_fw)
outputs, _, _ = rnn.bidirectional_rnn(cell_fw, cell_bw, inputs,
initial_state_fw=initial_state_fw, initial_state_bw=initial_state_bw,
dtype=tf.float32)
outputs_tensor = tf.concat(0, outputs)
final = outputs[-1]
with tf.name_scope("Mark") as scope:
W_m = tf.Variable(tf.random_normal([2*hidden_size,1], stddev=0.01))
b_m = tf.Variable(tf.random_normal([1], stddev=0.01))
h_m = tf.matmul(outputs_tensor, W_m) + b_m
h_mark = tf.reshape(h_m,(seq_len,batch_size))
h_markt = tf.transpose(h_mark)
sm_mark = tf.nn.softmax(h_markt)
cost_mark = tf.nn.sparse_softmax_cross_entropy_with_logits(h_markt,marks)
loss_mark = tf.reduce_mean(cost_mark)
with tf.name_scope("Output") as scope:
#Map the final output state to out distribution
W_o = tf.Variable(tf.random_normal([2*hidden_size,output_size], stddev=0.01))
b_o = tf.Variable(tf.random_normal([output_size], stddev=0.01))
prediction = tf.matmul(final, W_o) + b_o
with tf.name_scope("Optimization") as scope:
#Optimize with cross entropy error between the true distribution and the
# distribution following the SoftMax
cost = tf.nn.sparse_softmax_cross_entropy_with_logits(prediction, target)
loss = tf.reduce_mean(cost)
train_op = tf.train.RMSPropOptimizer(lr_rate, 0.2).minimize(loss+loss_mark)
with tf.name_scope("Evaluating_accuracy") as scope:
correct_prediction = tf.equal(tf.argmax(prediction,1), target)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
#Fire up session
sess = tf.Session()
sess.run(tf.initialize_all_variables())
#Collect information
# For now, we take old-fashioned numpy approach. In future, we might
# use TensorBoard for this part
perf_collect = np.zeros((int(np.floor(max_iterations /plot_every)),6))
step = 0
for k in range(1,max_iterations):
batch_ind = np.random.choice(N,batch_size,replace=False)
X = X_train[batch_ind]
y = y_train[batch_ind]
X = np.split(X,seq_len,axis=1)
#Create the dictionary of inputs to feed into sess.run
train_dict = {inputs[i]:X[i] for i in range(seq_len)}
train_dict.update({target: y, marks:mark_train[batch_ind], keep_prob:drop_out})
result = sess.run([train_op,loss, accuracy, loss_mark],feed_dict=train_dict) #perform an update on the parameters
# debug = sess.run([h_m,h_m_res,h_mark], feed_dict = train_dict)
# debug = sess.run(h_mark, feed_dict = train_dict)
cost_train = result[1]
acc_train = result[2]
cost_mark_train = result[3]
if (k%plot_every==0): #Output information
batch_ind_sub = np.random.choice(Nval,batch_size,replace=False)
X_val_sub = X_val[batch_ind_sub]
y_val_sub = y_val[batch_ind_sub]
X_val_subl = np.split(X_val_sub,seq_len,axis=1)
val_dict = {inputs[i]:X_val_subl[i] for i in range(seq_len)} #create validation dictionary
val_dict.update({target: y_val_sub, marks:mark_val[batch_ind_sub], keep_prob:1.0})
result = sess.run([loss,accuracy,loss_mark,sm_mark],feed_dict = val_dict ) #compute the cost on the validation set
cost_val = result[0]
acc_val = result[1]
cost_mark_val = result[2]
perf_collect[step,0] = cost_train
perf_collect[step,1] = cost_val
perf_collect[step,2] = acc_train
perf_collect[step,3] = acc_val
perf_collect[step,4] = cost_mark_train
perf_collect[step,5] = cost_mark_val
soft_mark = result[3]
print('At %.0f/%.0f Cost %.4f/%.4f Accuracy %.3f/%.3f, Cost mark %.4f/%.4f'%(k,max_iterations,cost_train,cost_val,acc_train,acc_val, cost_mark_train,cost_mark_val))
step += 1
plt.figure()
plt.plot(perf_collect[:,0],label='train cost')
plt.plot(perf_collect[:,1],label='val cost')
plt.legend()
plt.show()
"""Plot some Data"""
Nplot = 7 #Numer of rows for which you want a plot
count_pos = 0
count_neg = 0
f, axarr = plt.subplots(Nplot, 2)
while count_pos < Nplot or count_neg < Nplot:
ind = np.random.randint(batch_size)
if y_val_sub[ind] == 0 and count_neg < Nplot:
axarr[count_neg,0].plot(X_val_sub[ind])
axarr[count_neg,0].plot(soft_mark[ind])
count_neg += 1
elif y_val_sub[ind] == 1 and count_pos < Nplot:
axarr[count_pos,1].plot(X_val_sub[ind])
axarr[count_pos,1].plot(soft_mark[ind])
count_pos += 1
plt.show()