-
Notifications
You must be signed in to change notification settings - Fork 0
/
RNNTimeSeries.py
497 lines (384 loc) · 19.9 KB
/
RNNTimeSeries.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
import os
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import datetime as dt
from collections import UserDict
from TimeSeriesTensor import TimeSeriesTensor
# Using TensorFlow backend.
# so need to pip install tensorflow
from keras.models import Model, Sequential
from keras.layers import GRU, Dense, RepeatVector, TimeDistributed, Flatten, Input
from keras.callbacks import EarlyStopping
# http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html
# http://scikit-learn.org/stable/install.html
from sklearn.preprocessing import MinMaxScaler
# this does not work. skip it at first
# https://blog.csdn.net/liangzuojiayi/article/details/78183783
# https://ipython.org/ipython-doc/3/interactive/tutorial.html
# https://docs.microsoft.com/en-us/visualstudio/python/interactive-repl-ipython
# need ipython to support ?
# skip it at first
# % %matplotlib inline
#https://pandas.pydata.org/
#https://pandas.pydata.org/pandas-docs/stable/options.html
pd.options.display.float_format = '{:,.2f}'.format
np.set_printoptions(precision=2)
# load_data.py
def load_data():
# read GEFCom2014 load data
data_dir = 'data/'
# https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html?highlight=read_csv#pandas.read_csv
# read
energy = pd.read_csv(os.path.join(data_dir, 'energy.csv'), parse_dates=['timestamp'])
# Reindex the dataframe such that the dataframe has a record for every time point
# between the minimum and maximum timestamp in the time series. This helps to
# identify missing time periods in the data (there are none in this dataset).
energy.index = energy['timestamp']
# https://pandas.pydata.org/pandas-docs/stable/generated/pandas.date_range.html?highlight=date_range#pandas.date_range
energy = energy.reindex(pd.date_range(min(energy['timestamp']),
max(energy['timestamp']),
freq='H'))
energy = energy.drop('timestamp', axis=1)
return energy
# mape.py
def mape(predictions, actuals):
return ((predictions - actuals).abs() / actuals).mean()
# create_evalatuion_df.py
def create_evaluation_df(predictions, test_inputs, H, scaler):
# https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html
eval_df = pd.DataFrame(predictions, columns=['t+'+str(t) for t in range(1, H+1)])
# for debug
print (eval_df)
eval_df['timestamp'] = test_inputs.dataframe.index
eval_df = pd.melt(eval_df, id_vars='timestamp', value_name='prediction', var_name='h')
eval_df['actual'] = np.transpose(test_inputs['target']).ravel()
eval_df[['prediction', 'actual']] = scaler.inverse_transform(eval_df[['prediction', 'actual']])
return eval_df
# Define the funtion to make single sequence prediction
# based on scoring encoder-decoder
def predict_single_sequence(single_input_seq, horizon, n_features, encoder_model, decoder_model):
# apply encoder model to the input_seq to get state
states_value = encoder_model.predict(single_input_seq)
# get input for decoder's first time step (which is encoder input at time t)
# https://docs.scipy.org/doc/numpy/reference/generated/numpy.zeros.html
dec_input = np.zeros((1, 1, n_features))
# https://stackoverflow.com/questions/15535205/what-does-1-mean-do-in-python
# https://www.digitalocean.com/community/tutorials/how-to-index-and-slice-strings-in-python-3
# normal
# proper_slice ::= [lower_bound] ":" [upper_bound] [ ":" [stride] ]
# [:5]
# output 0 to 5(excluding)
# [-3]
# reverse index is minus and it starts with -1 -2 -3...
# -3 is to output the character with index = -3
# [-4:-1]
# output character with index = -4 to index = -1
# [::-1]
# stride = -1, output total string reversely
# [::-2]
# stride = -2
# [:-1]
# upper bound = -1, so remove the last charaacter
# [-1:]
# low bound = -1,
# output the last character, same as [-1]
# [-1:1:-1]
# reverse string
# index -1 to index 1(excluding)
# ex: input = 'abcedfg'
# cedfg
# string is reverse output becaasue striide is -1
# so output 'gfedc'
# [:]
# means nothing constraint
# just output original string
# https://www.zhihu.com/question/22686450
# https://stackoverflow.com/questions/11367902/negative-list-index/11367936
# ?more examples
# ? not fully understand
# https://stackoverflow.com/questions/31061625/accessing-slice-of-3d-numpy-array
# python slice
# for debug
# this is the first row for encoding input data
#print (single_input_seq)
# access the kast element of the vector
# why starting from the last one
# it is becasue this is time T
dec_input[0, 0, 0] = single_input_seq[0, -1, :]
# for debug
#print (dec_input[0, 0, 0])
# create final output placeholder
# ? only one elements in single_input_seq is used to predict three outputs for three timestamps
# becasue the previous output will be used as next time input
output = list()
# collect predictions
for t in range(horizon):
# predict next value
yhat, h = decoder_model.predict([dec_input] + [states_value])
# store prediction
output.append(yhat[0,0,:])
# update state
state = [h]
# update decoder input to be used as input for next prediction
dec_input[0, 0, 0] = yhat
return np.array(output)
# Define the funtion to make multiple sequence prediction
# based on scoring encoder-decoder
def predict_multi_sequence(input_seq_multi, horizon, n_features, encoder_model, decoder_model):
# create output placeholder
predictions_all = list()
# for debug
# print (input_seq_multi)
# why specifying [0] here ?
# https://stackoverflow.com/questions/10200268/what-does-shape-do-in-for-i-in-rangey-shape0
# it is to return the first dimention of a numpy array
# number of samples and each sample has t-5 t-4 t-3 t-2 t-1 t
for seq_index in range(input_seq_multi.shape[0]):
# Take one sequence for decoding
input_seq = input_seq_multi[seq_index: seq_index + 1]
# Generate prediction for the single sequence
predictions = predict_single_sequence(input_seq, horizon, n_features, encoder_model, decoder_model)
# store all the sequence prediction
predictions_all.append(predictions)
return np.array(predictions_all)
if __name__ == "__main__":
# step 1:
# download data folders
# set extract_data.py as start file and generate energy.csv
# step2: use energy.csv to extra data
energy = load_data()
print (energy.head())
valid_start_dt = '2014-09-01 00:00:00'
test_start_dt = '2014-11-01 00:00:00'
T = 6
HORIZON = 3
# Create training set containing only the model features
train = energy.copy()[energy.index < valid_start_dt][['load']]
# Scale data to be in range (0, 1). This transformation should be calibrated on the training set only.
# normalization only applies to training data. This is to prevent information from the validation or test sets leaking into the training data.
y_scaler = MinMaxScaler()
# Compute the minimum and maximum to be used for later scaling.
y_scaler.fit(train[['load']])
X_scaler = MinMaxScaler()
# fit to data and transform it
# http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html
# ? accroding to exampke, why needs to have two scalers
# ? why traing data is using 0-1 to transform
# ? but y_scaler is used for testing data by maximum and minimum
train[['load']] = X_scaler.fit_transform(train)
# using time seriese tensor class to parse data
# Shift the values of the time series to create a Pandas dataframe containing all the data for a single training example
# Discard any samples with missing values
# Transform this Pandas dataframe into a numpy array of shape (samples, time steps, features) for input into Keras
tensor_structure = {'encoder_input':(range(-T+1, 1), ['load']), 'decoder_input':(range(0, HORIZON), ['load'])}
# dataset: original time series
# H: the forecast horizon
# tensor_structure: a dictionary discribing the tensor structure in the form { 'tensor_name' : (range(max_backward_shift, max_forward_shift), [feature, feature, ...] ) }
# freq: time series frequency
# drop_incomplete: (Boolean) whether to drop incomplete samples, default is true
# so here does drops incomplete examples
# https://stackoverflow.com/questions/4534438/typeerror-module-object-is-not-callable
# calling a module
train_inputs = TimeSeriesTensor(train, 'load', HORIZON, tensor_structure)
print (train_inputs.dataframe.head())
# extra data as valid inputs i guess
look_back_dt = dt.datetime.strptime(valid_start_dt, '%Y-%m-%d %H:%M:%S') - dt.timedelta(hours=T-1)
valid = energy.copy()[(energy.index >=look_back_dt) & (energy.index < test_start_dt)][['load']]
valid[['load']] = X_scaler.transform(valid)
valid_inputs = TimeSeriesTensor(valid, 'load', HORIZON, tensor_structure)
# for debug
# dictionary tyoe
# 'target'
# 'encoder_input'
# 'decoder_input'
# as key to access different data
#print (valid_inputs)
print (valid_inputs.dataframe.head())
# parameter
# not sure how this gets seup, might be by hyper parameter tune up
BATCH_SIZE = 32
LATENT_DIM = 5
EPOCHS = 50
# define training encoder
# really different from lecture's example
# https://keras.io/getting-started/sequential-model-guide/#specifying-the-input-shape
# specify input with None None indicates that any positive integer may be expected).
# ? encoder input is a tuple
# (seqence_length , input kength)
# ? no using rolling feature of normal flow
encoder_input = Input(shape=(None, 1))
# using GUR is differnt from RNN
# https://keras.io/layers/recurrent/
# return_state: Boolean. Whether to return the last state in addition to the output.
encoder = GRU(LATENT_DIM, return_state=True)
# proviate input and extra state different from output
encoder_output, state_h = encoder(encoder_input)
# ? how does this line work ? it looks like accessing dictionary by key
# https://openhome.cc/Gossip/CodeData/PythonTutorial/ContainerFlowComprehensionPy3.html
# it should be initial as list dadastrucutre i guees
# this will output an object
print (state_h)
encoder_states = [state_h]
print (encoder_states)
# define training decoder
decoder_input = Input(shape=(None, 1))
# why setup return sequences ture at the second layer?
# yes, based on coding it does make sense
# why latent_dim is 6 . i think it should be 3
# latent_dim is the dimention of GUR , notinput
decoder_GRU = GRU(LATENT_DIM, return_state=True, return_sequences=True)
# ise _ to get output states
# https://keras.io/layers/recurrent/
# initial_state should be a list of tensors
decoder_output, _ = decoder_GRU(decoder_input, initial_state=encoder_states)
# https://keras.io/getting-started/functional-api-guide/#all-models-are-callable-just-like-layers
# https://keras.io/layers/wrappers/
# https://blog.csdn.net/u012193416/article/details/79477220
# https://machinelearningmastery.com/timedistributed-layer-for-long-short-term-memory-networks-in-python/
# ? not very understand about syntax
# ? might be following lecture 4 to recode it better
decoder_dense = TimeDistributed(Dense(1))
decoder_output = decoder_dense(decoder_output)
# also GRU
# ? not do sequentail and model add here
# ? why go on thos way, why not basically going same as model add as lecture 4
# https://github.com/say543/RNNForTimeSeriesForecastTutorial/blob/master/4_multi_step_encoder_decoder_simple.ipynb
# model class API
# https://keras.io/models/model/#model-class-api
# https://keras.io/getting-started/functional-api-guide/
# here using multiple input, sinlge output (providing output is for dense)
# Note that by calling a model you aren't just reusing the architecture of the model, you are also reusing its weights.
# [] should be forming a list
model = Model([encoder_input, decoder_input], decoder_output)
# optimization function
# https://github.com/say543/RNNForTimeSeriesForecastTutorial/blob/master/slides/RNN%20For%20Time%20Series%20Forecasting%20Tutorial.pdf
# mse : Mean-squared-error
# how to select optimization ?
# i used to learn that is it provided by Maximal likelihood optimization
# https://keras.io/getting-started/sequential-model-guide/#compilation
# having adagrad, studying in th future
model.compile(optimizer='RMSprop', loss='mse')
# output model
# i borrow from
# https://github.com/say543/RNNForTimeSeriesForecastTutorial/blob/master/4_multi_step_encoder_decoder_simple.ipynb
# ? not sure how to read it
# https://keras.io/models/about-keras-models/#about-keras-models
model.summary()
# introducing early stop
# ? why need to have early stop
earlystop = EarlyStopping(monitor='val_loss', min_delta=0, patience=5)
# get label data for train data and valid data
# no user dictionary so change it
train_target = train_inputs['target'].reshape(train_inputs['target'].shape[0], train_inputs['target'].shape[1], 1)
valid_target = valid_inputs['target'].reshape(valid_inputs['target'].shape[0], valid_inputs['target'].shape[1], 1)
# train
# why needs to have both decoder inpput and encoder input as inputs
# ? i think having decoder input is good enough
# no user dictionary so change it
model.fit([train_inputs['encoder_input'], train_inputs['decoder_input']],
train_target,
batch_size=BATCH_SIZE,
epochs=EPOCHS,
validation_data=([valid_inputs['encoder_input'], valid_inputs['decoder_input']], valid_target),
callbacks=[earlystop],
verbose=1)
#model.fit([train_inputs.dataframe.encoder_input, train_inputs.dataframe.decoder_input],
# train_target,
# batch_size=BATCH_SIZE,
# epochs=EPOCHS,
# validation_data=([valid_inputs.dataframe.encoder_input, valid_inputs.dataframe.decoder_input], valid_target),
# callbacks=[earlystop],
# verbose=1)
# implement inference model
# ? not sure why this is different from training model, should it use previous model to do
# build ingerence encoder model
# ? why using encoder input / encoder states again
# encoder_input seems only providng dimention
# ? how about encoder_states, is it output from training output for reuse or just dimention
# for debug
# cannot tell from output, only know they are two objects
# https://keras.io/layers/recurrent/
# initial_state should be a list of tensors
## print(encoder_input)
## print(encoder_states)
# https://keras.io/layers/recurrent/
# initial_state should be a list of tensors
encoder_model = Model(encoder_input, encoder_states)
# build ingerence decoder model
decoder_state_input_h = Input(shape=(LATENT_DIM,))
# form a list since initial_state needs a list
decoder_states_input = [decoder_state_input_h]
# reuse decoder_GRU's archutecture
decoder_output, state_h = decoder_GRU(decoder_input, initial_state=decoder_states_input)
decoder_states = [state_h]
decoder_output = decoder_dense(decoder_output)
# ? why adding here, do not understand also using +
# looks like list operation
# for debug
# it will be a list having two tensors as elements
# print ([decoder_input] + decoder_states_input)
# basically the same as this i guess ?
# ? wrong this will say unhashable list becasue decoder_states_input / decoder_states are list already
# decoder_model = Model([decoder_input, decoder_states_input], [decoder_output, decoder_states])
# so using list combine operation
# https://stackoverflow.com/questions/1720421/how-to-concatenate-two-lists-in-python
decoder_model = Model([decoder_input] + decoder_states_input, [decoder_output] + decoder_states)
#################################################################
# example of single sequence prediction
##################################################################
# predict_single_sequence will use encoder_model / decoding_model as global parameter
# so passing it
#print(predict_single_sequence(valid_inputs['encoder_input'][0:1], HORIZON, 1))
# ? does 1 mean input size
print(predict_single_sequence(valid_inputs['encoder_input'][0:1], HORIZON, 1, encoder_model, decoder_model))
#################################################################
# example of output sequence prediction
# using single sequence prediction as a subroutine
##################################################################
look_back_dt = dt.datetime.strptime(test_start_dt, '%Y-%m-%d %H:%M:%S') - dt.timedelta(hours=T-1)
# energy is a input file
test = energy.copy()[test_start_dt:][['load']]
# http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html
# ? y_scaler search this code above. why it goes this way?
test[['load']] = y_scaler.transform(test)
test_inputs = TimeSeriesTensor(test, 'load', HORIZON, tensor_structure)
# predict_single_sequence will use encoder_model / decoding_model as global parameter
# so passing it
# example of multiple sequence prediction based on validation data
test_predictions_all = predict_multi_sequence(test_inputs['encoder_input'], HORIZON, 1, encoder_model, decoder_model)
# output shape
# if should be
# number of encoder input samples, [t+1, t+2, t+3] , size of each t output
print(test_predictions_all.shape)
#reshape
# https://stackoverflow.com/questions/10200268/what-does-shape-do-in-for-i-in-rangey-shape0
# reshape
# from number of encoder input samples, [t+1, t+2, t+3] , size of each t output
# to
# number of encoder input samples, [t+1, t+2, t+3]
test_predictions_all_eval = test_predictions_all.reshape(test_predictions_all.shape[0], test_predictions_all.shape[1])
print(test_predictions_all_eval.shape)
# create a new table having prediction data and actual data together
eval_df = create_evaluation_df(test_predictions_all_eval, test_inputs, HORIZON, y_scaler)
print (eval_df.head())
# ? why needs to output mean
eval_df['APE'] = (eval_df['prediction'] - eval_df['actual']).abs() / eval_df['actual']
eval_df.groupby('h')['APE'].mean()
# calculate based on error funtion
print(mape(eval_df['prediction'], eval_df['actual']))
# plot output
plot_df = eval_df[(eval_df.timestamp<'2014-11-08') & (eval_df.h=='t+1')][['timestamp', 'actual']]
for t in range(1, HORIZON+1):
plot_df['t+'+str(t)] = eval_df[(eval_df.timestamp<'2014-11-08') & (eval_df.h=='t+'+str(t))]['prediction'].values
fig = plt.figure(figsize=(15, 8))
ax = plt.plot(plot_df['timestamp'], plot_df['actual'], color='red', linewidth=4.0)
ax = fig.add_subplot(111)
ax.plot(plot_df['timestamp'], plot_df['t+1'], color='blue', linewidth=4.0, alpha=0.75)
ax.plot(plot_df['timestamp'], plot_df['t+2'], color='blue', linewidth=3.0, alpha=0.5)
ax.plot(plot_df['timestamp'], plot_df['t+3'], color='blue', linewidth=2.0, alpha=0.25)
plt.xlabel('timestamp', fontsize=12)
plt.ylabel('load', fontsize=12)
ax.legend(loc='best')
plt.show()