-
Notifications
You must be signed in to change notification settings - Fork 0
/
mnist-main.py
115 lines (101 loc) · 4.09 KB
/
mnist-main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import numpy as np
from loader import MNIST
from nnCostFunction import nnCostFunction
from randInitializeWeights import randInitializeWeights
from computeNumericalGradient import unRolling
from predict import predict
from shuffle import shuffle
# Get data from Mnist
data = MNIST()
data.load_training()
data.load_testing()
x_train = data.train_images
y_train = data.train_labels
x_test = data.test_images
y_test = data.test_labels
x_train = np.reshape(x_train, (len(x_train), 784))
y_train = np.reshape(y_train, (len(y_train), 1))
y_train_fix = np.reshape(np.zeros(len(y_train)*10), (len(y_train), 10))
for i in range(len(y_train)):
for j in range(0, 10):
if y_train[i] == j:
y_train_fix[i][j] = 1
# Create Validation, Train
list_x_val = []
list_y_val = []
list_x_train = []
list_y_train = []
list_y_val_fix = []
for i in range(6):
x_val = x_train[i*10000: i*10000 + 10000]
y_val = y_train[i*10000: i*10000 + 10000]
y_val_fix = y_train_fix[i*10000: i*10000 + 10000]
x_traint = np.concatenate(
[x_train[0:i*10000], x_train[i*10000 + 10000:60000]])
y_traint = np.concatenate(
[y_train_fix[0:i*10000], y_train_fix[i*10000 + 10000:60000]])
list_x_val.append(x_val)
list_y_val.append(y_val)
list_y_val_fix.append(y_val_fix)
list_x_train.append(x_traint)
list_y_train.append(y_traint)
# initialize theta
hidden_layer = 150
# mini batch
x = np.zeros((1000, 784))
y = np.zeros((1000, 10))
# train process
alpha = 0.01
epoch = 148
mini_batch = 80
l = 50 #lamda
index = 5
for index in range(6):
x_train = list_x_train[index]
y_train = list_y_train[index]
theta1 = randInitializeWeights(784, hidden_layer)
theta2 = randInitializeWeights(hidden_layer, hidden_layer)
theta3 = randInitializeWeights(hidden_layer, 10)
print 'training... ' + str(index + 1)
for i in range(epoch):
# mini batch
x_train, y_train = shuffle(x_train, y_train)
for k in range(len(x_train)/mini_batch):
x = x_train[k*mini_batch:(k + 1)*mini_batch]
y = y_train[k*mini_batch:(k + 1)*mini_batch]
results = nnCostFunction(theta1, theta2, theta3, x, y, lamda=l)
grad1 = results[1]
grad2 = results[2]
grad3 = results[3]
#print 'inter ' + str(i) + ' coss = ' + str(results[0])
grad = np.concatenate([unRolling(grad1), unRolling(grad2)])
grad = np.concatenate([grad, unRolling(grad3)])
theta = np.concatenate([unRolling(theta1), unRolling(theta2)])
theta = np.concatenate([theta, unRolling(theta3)])
theta = theta - alpha * grad
theta1 = np.reshape(
theta[0:785 * hidden_layer], (hidden_layer, 785))
theta2 = np.reshape(
theta[785 * hidden_layer:(786 + hidden_layer) * hidden_layer], (hidden_layer, hidden_layer + 1))
theta3 = np.reshape(
theta[(786 + hidden_layer) * hidden_layer:len(theta)], (10, hidden_layer + 1))
j_train = nnCostFunction(theta1, theta2, theta3,
x_train, y_train, lamda=l)[0]
# print 'epoch ' + str(i + 1) + ' coss train = ' + str(j_train)
j_val = nnCostFunction(
theta1, theta2, theta3, list_x_val[index], list_y_val_fix[index], lamda=l)[0]
j_train = nnCostFunction(theta1, theta2, theta3,
x_train, y_train, lamda=l)[0]
print 'coss val = ' + str(j_val) + ' coss train = ' + str(j_train) + ' lamda = ' + str(l)
theta = np.concatenate([unRolling(theta1), unRolling(theta2)])
theta = np.concatenate([theta, unRolling(theta3)])
np.savetxt('theta-val-version 4- {}.txt'.format(index), theta, delimiter=',')
np.savetxt('theta-val-version 4- {}.txt'.format(index), j_val, delimiter=',')
np.savetxt('theta-val-version 4- {}.txt'.format(index), j_train, delimiter=',')
a4 = predict(theta1, theta2, theta3, list_x_val[index])
count = 0
for i in range(len(a4)):
a4[i] = np.argmax(a4[i])
if a4[i][0] == list_y_val[index][i]:
count += 1
print 'val - ' + str(index) + ' ' + str((100.0 * count/10000))