class LSTM: def __init__(self, Wx, Wh, b): self.params = [Wx, Wh, b] self.grads = [np.zeros_like(Wx), np.zeros_like(Wh), np.zeros_like(b)] self.cache = None self.sigmoid = Sigmoid() def _slice(self, A, H): f = A[:, :H] g = A[:, H:2 * H] i = A[:, 2 * H:3 * H] o = A[:, 3 * H:] return f, g, i, o def forward(self, x, h_prev, c_prev): Wx, Wh, b = self.params N, H = h_prev.shape A = np.dot(x, Wx) + np.dot(h_prev, Wh) + b f, g, i, o = self._slice(A, H) f = self.sigmoid.forward(f) g = np.tanh(g) i = self.sigmoid.forward(i) o = self.sigmoid.forward(o) c_next = f * c_prev + g * i h_next = o * np.tanh(c_next) self.cache = (x, h_prev, c_prev, (i, f, g, o), c_next) return c_next, h_next def backward(self, dh_next, dc_next): Wx, Wh, b = self.params x, h_prev, c_prev, gates, c_next = self.cache i, f, g, o = gates tanh_c_next = np.tanh(c_next) ds = dc_next + (dh_next * o) * (1 - tanh_c_next**2) dc_prev = ds * f di = ds * g df = ds * c_prev do = dh_next * tanh_c_next dg = ds * i di *= i * (1 - i) df *= f * (1 - f) do *= o * (1 - o) dg *= (1 - g**2) dA = np.hstack((df, dg, di, do)) dWh = np.dot(h_prev.T, dA) dWx = np.dot(x.T, dA) db = dA.sum(axis=0) self.grads[0][...] = dWx self.grads[1][...] = dWh self.grads[2][...] = db dx = np.dot(dA, Wx.T) dh_prev = np.dot(dA, Wh.T) return dx, dh_prev, dc_prev
class TestSigmoid(unittest.TestCase): def setUp(self): self.sigmoid = Sigmoid() self.x = np.random.randn(10, 4) def test_forward(self): out = self.sigmoid.forward(self.x) self.assertEqual((10, 4), out.shape) def test_backward(self): self.sigmoid.forward(self.x) dout = np.random.randn(10, 4) dx = self.sigmoid.backward(dout) self.assertEqual((10, 4), dx.shape)
class NeuralNetwork: def __init__(self, input_width, output_width, hidden_width=None, depth=3, learning_rate=.1, activation=Sigmoid): self.network = [] self.learning_rate = learning_rate self.activation = Sigmoid() last_width = input_width for layer_idx in range(depth - 2): if isinstance(hidden_width, (collections.Sequence, tuple, np.ndarray)): width = hidden_width[layer_idx] elif isinstance(hidden_width, (int, float)): width = hidden_width else: width = np.abs(input_width - output_width) / (depth - 1) scale = last_width**-.5 layer = np.random.normal(scale=scale, size=(width, last_width)) self.network.append(layer) last_width = width scale = last_width**-.5 self.network.append( np.random.normal(scale=scale, size=(output_width, last_width))) def predict(self, inputs): for layer in self.network: inputs = self.activation.forward(np.dot(layer, inputs)) return inputs def cost_f(self, predictions, targets): return targets - predictions def train(self, features, labels): outputs = [np.array(features, ndmin=2).T] for layer in self.network: outputs.append(self.activation.forward(np.dot(layer, outputs[-1]))) labels = np.array(labels, ndmin=2).T errors = self.cost_f(outputs[-1], labels) for l_idx, layer in enumerate(self.network[::-1]): p_deltas = errors * self.activation.backward(outputs[-l_idx - 1]) errors = np.dot(layer.T, errors) layer += self.learning_rate * np.dot(p_deltas, outputs[-l_idx - 2].T)
class TestSigmoid(unittest.TestCase): def setUp(self): self.sigmoid = Sigmoid() def test_forward(self): x = np.array([[1.0, -0.5], [-2.0, 3.0]]) assert_almost_equal( ([[0.73105858, 0.37754067], [0.11920292, 0.95257413]]), self.sigmoid.forward(x)) def test_backward(self): x = np.array([[1.0, -0.5], [-2.0, 3.0]]) self.sigmoid.forward(x) dout = 1 assert_almost_equal( np.array([[0.0386563, 0.0552267], [0.0110237, 0.0020409]]), self.sigmoid.backward(self.sigmoid.backward(dout)))
def acc(w1, b1, w2, b2, x, t): accuracy_cnt = 0 affine1 = Affine(w1, b1) affine2 = Affine(w2, b2) sigmoid = Sigmoid() x1 = affine1.forward(x) y1 = sigmoid.forward(x1) x2 = affine2.forward(y1) for i in range(len(t)): if abs(x2[i][0] - t[i][0]) < 0.5: accuracy_cnt += 1 #print(x2[i][0],t[i][0]) print(accuracy_cnt / 64)
def main(): train_data = np.array([[0, 0], [0, 1], [0, 2], [0, 3], [0, 4], [0, 5], [0, 6], [0, 7], [1, 0], [1, 1], [1, 2], [1, 3], [1, 4], [1, 5], [1, 6], [1, 7], [2, 0], [2, 1], [2, 2], [2, 3], [2, 4], [2, 5], [2, 6], [2, 7], [3, 0], [3, 1], [3, 2], [3, 3], [3, 4], [3, 5], [3, 6], [3, 7], [4, 0], [4, 1], [4, 2], [4, 3], [4, 4], [4, 5], [4, 6], [4, 7], [5, 0], [5, 1], [5, 2], [5, 3], [5, 4], [5, 5], [5, 6], [5, 7], [6, 0], [6, 1], [6, 2], [6, 3], [6, 4], [6, 5], [6, 6], [6, 7], [7, 0], [7, 1], [7, 2], [7, 3], [7, 4], [7, 5], [7, 6], [7, 7]]) label_data = np.array( [[0], [1], [2], [3], [4], [5], [6], [7], [1], [0], [3], [2], [5], [4], [7], [6], [2], [3], [0], [1], [6], [7], [4], [5], [3], [2], [1], [0], [7], [6], [5], [4], [4], [5], [6], [7], [0], [1], [2], [3], [5], [4], [7], [6], [1], [0], [3], [2], [6], [7], [4], [5], [2], [3], [0], [1], [7], [6], [5], [4], [3], [2], [1], [0]]) learn_rate = 0.15 epoch = 600000 loss_list = [] w1, b1, w2, b2 = init(input_size=2, hidden_size=16, output_size=1) #print(w1, w2, b1, b2) for i in range(epoch): affine1 = Affine(w1, b1) affine2 = Affine(w2, b2) sigmoid = Sigmoid() loss = MSE() x1 = affine1.forward(train_data) y1 = sigmoid.forward(x1) x2 = affine2.forward(y1) ls = loss.mean_square_error(x2, label_data) print(ls) loss_list.append(ls) dout = loss.backward(x2, label_data) dx = affine2.backward(dout) w2 = w2 - learn_rate * affine2.dw b2 = b2 - learn_rate * affine2.db dy1 = sigmoid.backward(dx) dx = affine1.backward(dy1) b1 = b1 - learn_rate * affine1.db w1 = w1 - learn_rate * affine1.dw #print(w1,w2,b1,b2) plt.plot(loss_list) plt.show() acc(w1, b1, w2, b2, train_data, label_data)
def get_output_layer(self, h): sigmoid = Sigmoid() a = sigmoid.forward(h) out = np.dot(a, self.W) + self.b return out
for i in range(image_number // batch_size): train_image = np.zeros((784, batch_size)) train_label = np.zeros((10, batch_size)) for j in range(batch_size): index = index_list[i * batch_size + j] inputx = (img[index] - 128) / 256.0 inputx = inputx.reshape((784, 1)) label = np.zeros([10, 1]) label[labels[index]] = 1 train_image[:, j:j + 1] = inputx train_label[:, j:j + 1] = label dense.forward(train_image) sigmoid.forward(dense.end) loss.forward(sigmoid.end) loss.backward(train_label) sigmoid.backward(loss.grad) dense.backward(sigmoid.grad) print("--------------") count = 0 for i in range(10000): inputx = (test_imgs[i] - 128) / 256.0 inputx = inputx.reshape((784, 1)) dense.forward(inputx) sigmoid.forward(dense.end) loss.forward(sigmoid.end) if (loss.end.argmax() == test_label[i]):