/
ChainValidation.py
128 lines (103 loc) · 4.18 KB
/
ChainValidation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
from __future__ import print_function
import csv
import numpy as np
import sys
import matplotlib.pyplot as plt
from itertools import cycle
from pybrain.datasets import SequentialDataSet
from pybrain.tools.shortcuts import buildNetwork
from pybrain.structure.modules import LSTMLayer
from pybrain.supervised import RPropMinusTrainer
from pybrain.tools.validation import ModuleValidator, Validator
from sys import stdout
data = np.genfromtxt('normopen.csv', delimiter=",", dtype=float)
#input area, the lp is to control the how
# many experiments to do in once and to output the result in one file,
# now suggested to put all small datasets together
# However in terms of test the time for excuting a project, we can't use it, so this time
# It's suggested to be 1
for lp in range(0,1,1):
kfolds = raw_input("Input a number for the kfolds in cross validation here: ")
trdatf = raw_input("Thfe range of training data here(start) ")
trdats = raw_input("The range of training data here(end) ")
tedatf = raw_input("The range of testing data here(start) ")
tedats = raw_input("The range of training data here(end) ")
kfolds = int(kfolds)
trdatf = int(trdatf)
trdats = int(trdats)
tedatf = int(tedatf)
tedats = int(tedats)
dat = data[trdatf:trdats]
num = len(dat)-len(dat)%(kfolds+1)
dat = dat[0:num]
cv_data = np.split(dat, kfolds+1)
eval_err = []
modnet = []
hypernet = []
hypereval = []
hnum = [1,2,3,4,5,6,7,8,9,10]
# Building the neural network
# for thnum in range(hnum):
for num in hnum:
#thum=num
net = buildNetwork(1, num, 1, hiddenclass=LSTMLayer, outputbias=False, recurrent=True) # 1 input, 5 hidden layer and 1 output
for i in range(kfolds+1):
test_data = cv_data[i]
train_data = []
for j in range (kfolds+1) :
train_data.extend(cv_data[j] ) # make the train data continuous data
print("test///", test_data)
train_ds = SequentialDataSet(1, 1)
# The next_sample picks a next number of the begin of the sample
for sample, next_sample in zip(train_data, cycle(train_data[1:])):
train_ds.addSample(sample, next_sample)
test_ds = SequentialDataSet(1, 1)
# The next_sample picks a next number of the begin of the sample
for sample, next_sample in zip(test_data, cycle(test_data[1:])):
test_ds.addSample(sample, next_sample)
# Training
trainer = RPropMinusTrainer(net, dataset=train_ds)
train_errors = []
# save errors for plotting later
EPOCHS_PER_CYCLE = 10
CYCLES = 10
EPOCHS = EPOCHS_PER_CYCLE * CYCLES
for a in xrange(CYCLES):
trainer.trainEpochs(EPOCHS_PER_CYCLE)
train_errors.append(trainer.testOnData())
epoch = (a+1) * EPOCHS_PER_CYCLE
print("\r epoch {}/{}".format(epoch, EPOCHS), end="")
stdout.flush()
print("final error for training =", train_errors[-1])
err_tst = ModuleValidator.validate(Validator.MSE, net, dataset=test_ds)
eval_err.append(err_tst)
modnet.append(net)
print("test_Err", err_tst)
print(eval_err)
pmin = eval_err.index(min(eval_err))
print(pmin)
net = modnet[pmin]
hypernet.append(net)
hypereval.append(min(eval_err))
hypermin = hypereval.index(min(eval_err))
net = hypernet[hypermin]
print("number of hidden layers", hypermin+1)
#Testing data
ds = SequentialDataSet(1, 1)
dat = data[tedatf:tedats]
# The next_sample picks a next number of the begin of the sample
for sample, next_sample in zip(dat, cycle(dat[1:])):
ds.addSample(sample, next_sample)
print("put into practice:", ModuleValidator.validate(Validator.MSE, net, dataset=ds))
pred = []
for sample, target in ds.getSequenceIterator(0):
pred.append(net.activate(sample))
b = ds.getSequence(0)
ax1 = plt.subplot(1,1,1)
ax1.plot(b[1], label='tar')
ax1.plot(pred, label='pre')
ax1.legend(loc=1, ncol=2, shadow=True)
plt.title('Stock Predicting Result')
plt.xlabel('Time')
plt.ylabel('Return of The Stock')
plt.show()