-
Notifications
You must be signed in to change notification settings - Fork 0
/
logistic_regression.py
executable file
·169 lines (142 loc) · 5.24 KB
/
logistic_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# make our code python2 and python3 compatible
from __future__ import print_function, absolute_import, division
# use appropriate matplotlib backend for ipython notebook
import matplotlib
matplotlib.use('Agg')
from IPython.core.pylabtools import figsize
figsize(12, 4)
# figures and plot library
from matplotlib import pyplot as plt
# don't need to care about this
import os
import sys
os.environ['THEANO_FLAGS'] = "device=cpu,optimizer=fast_run"
DATA_DIR = os.path.join('/res', 'data')
# path to our libraries source code
sys.path.append(os.path.join('/res', 'src'))
import scipy.io as sio
import numpy as np
# computation libraries
import theano
from theano import tensor as T
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import h5py # for loading data
# some utilities I write for you for easily plotting stuffs
from utils import plot_images, Progbar, plot_confusion_matrix, plot_weights
dataset = h5py.File(os.path.join(DATA_DIR, 'mnist.h5'), 'r')
for key, value in dataset.iteritems():
print('Name:%s, Shape:%s, Dtype:%s' % (key, value.shape, value.dtype))
# Load the training data
X_train = dataset['X_train'].value
y_train = dataset['y_train'].value
# Load validation data
X_valid = dataset['X_valid'].value
y_valid = dataset['y_valid'].value
# Load test data
X_test = dataset['X_test'].value
y_test = dataset['y_test'].value
# pick randomly 16 images from training data
random_choices = np.random.choice(np.arange(X_train.shape[0]),
size=16, replace=False)
X_sampled = X_train[random_choices]
y_samples = y_train[random_choices]
# start plotting
plt.figure()
_ = plot_images(X_sampled)
plt.show()
print(y_samples)
# start plotting
plt.figure()
plt.subplot(1, 3, 1)
plt.title("Training set statistics")
plt.hist(y_train, bins=10)
plt.subplot(1, 3, 2)
plt.title("Validation set statistics")
plt.hist(y_valid, bins=10)
_ = plt.subplot(1, 3, 3)
plt.title("Test set statistics")
plt.hist(y_test, bins=10)
plt.show()
def glorot_uniform(shape, gain=1.0):
if len(shape) < 2:
shape = (1,) + tuple(shape)
n1, n2 = shape[:2]
receptive_field_size = np.prod(shape[2:])
std = gain * np.sqrt(2.0 / ((n1 + n2) * receptive_field_size))
a = 0.0 - np.sqrt(3) * std
b = 0.0 + np.sqrt(3) * std
return np.cast['float32'](
np.random.uniform(low=a, high=b, size=shape))
# our features are stored in a tensor (nb_samples, nb_row, nb_col)
X = T.tensor3(name='X', dtype='float32')
y_true = T.ivector() # our output is integer vector (i.e. the number 1, 2, 3, 4, 5, 6 ...)
# Our parameters
nb_features = np.prod(X_train.shape[1:]) # 784
nb_classes = 10 # 10 different digits
W_init = glorot_uniform((nb_features, nb_classes))
W = theano.shared(W_init, name='W')
b = theano.shared(np.zeros(shape=(nb_classes,), dtype='float32'), name='bias')
# activation just a linear combination of features and parameters (weights)
# Don't forget to add the bias
activation = T.dot(T.flatten(X, outdim=2), W) + b
# softmax function "smash" to activation to the probability value (confident value) for each digits
y_pred = T.nnet.softmax(activation)
# We use categorical_crossentropy as objective function
cost = T.mean(T.nnet.categorical_crossentropy(y_pred, y_true))
# gradient descent
W_gradient, b_gradient = T.grad(cost=cost, wrt=[W, b])
# we have to cast the update to float32 to make the type of weights consistent
learning_rate = theano.shared(np.cast['float32'](0.1), name='learning_rate')
update = [(W, W - W_gradient * learning_rate),
(b, b - b_gradient * learning_rate)]
# create function for training and making prediction
f_train = theano.function(inputs=[X, y_true], outputs=cost,
updates=update,
allow_input_downcast=True)
f_predict = theano.function(inputs=[X], outputs=y_pred,
allow_input_downcast=True)
NB_EPOCH = 2
BATCH_SIZE = 128
LEARNING_RATE = 0.1
learning_rate.set_value(np.cast['float32'](LEARNING_RATE))
training_history = []
valid_history = []
for epoch in range(NB_EPOCH):
prog = Progbar(target=X_train.shape[0])
n = 0
history = []
while n < X_train.shape[0]:
start = n
end = min(n + BATCH_SIZE, X_train.shape[0])
c = f_train(X_train[start:end], y_train[start:end])
prog.title = 'Epoch: %.2d, Cost: %.4f' % (epoch + 1, c)
prog.add(end - start)
n += BATCH_SIZE
history.append(c)
# end of epoch, start validating
y = np.argmax(f_predict(X_valid), axis=-1)
accuracy = accuracy_score(y_valid, y)
print('Validation accuracy:', accuracy)
# save history
training_history.append(np.mean(history))
valid_history.append(accuracy)
y = np.argmax(f_predict(X_test), axis=-1)
accuracy = accuracy_score(y_test, y)
print('Test accuracy:', accuracy)
print('Classification report:')
print(classification_report(y_test, y))
plt.figure()
plot_confusion_matrix(confusion_matrix(y_test, y),
labels=range(1, 11))
plt.show()
plt.figure()
plt.plot(training_history, c='b', label="Training cost")
plt.plot(valid_history, c='r', label="Validation accuracy")
plt.legend()
plt.show()
plt.figure()
plt.subplot(2, 1, 1)
plot_weights(W_init, keep_aspect=False)
plt.subplot(2, 1, 2)
plot_weights(W.get_value(), keep_aspect=False)
plt.show()