def test_update(self): for vector_size in range(1, 10): adam = Adam() gnn = GraphNeuralNetwork(vector_size) expected = gnn.params adam.update(gnn) actual = gnn.params self.assertEqual(expected, actual) gnn.grads["W"] = np.random.rand(vector_size, vector_size) gnn.grads["A"] = np.random.rand(vector_size) gnn.grads["b"] = np.random.rand(1) v = {} m = {} for key, grad in gnn.grads.items(): v[key] = np.zeros_like(grad) m[key] = np.zeros_like(grad) params = copy.deepcopy(gnn.params) for i in range(1, 100): gnn.grads["W"] = np.random.rand(vector_size, vector_size) gnn.grads["A"] = np.random.rand(vector_size) gnn.grads["b"] = np.random.rand(1) adam.update(gnn) for key, param in params.items(): m[key] = adam.beta1 * m[key] + ( (1 - adam.beta1) * gnn.grads[key]) v[key] = adam.beta2 * v[key] + ( (1 - adam.beta2) * gnn.grads[key]**2) m_hat = m[key] / (1 - adam.beta1**i) v_hat = v[key] / (1 - adam.beta2**i) params[key] = param - adam.lr * m_hat / (np.sqrt(v_hat) + 1.0e-8) expected1 = repr( np.round(np.abs(params[key] - gnn.params[key]), 6)) actual1 = repr(np.zeros_like(params[key])) self.assertEqual(expected1, actual1)
class net: def __init__(self): self.params = dict() self.grads = dict() self.params['W1'] = args.weight_init_std * np.random.randn(784, 256) self.params['B1'] = np.zeros(256) self.params['W2'] = args.weight_init_std * np.random.randn(256, 256) self.params['B2'] = np.zeros(256) self.params['W3'] = args.weight_init_std * np.random.randn(256, 10) self.params['B3'] = np.zeros(10) self.optimizer = Adam() def forward(self, input, target): z1 = np.dot(input, self.params['W1']) + self.params['B1'] x2 = self.relu(z1) z2 = np.dot(x2, self.params['W2']) + self.params['B2'] x3 = self.relu(z2) z3 = np.dot(x3, self.params['W3']) + self.params['B3'] output = self.softmax(z3) self.params['X1'] = input self.params['Z1'] = z1 self.params['X2'] = x2 self.params['Z2'] = z2 self.params['X3'] = x3 self.params['Z3'] = z3 self.params['Y'] = output self.params['T'] = target return output, self.loss(output, target) def backward(self): # Grad # https://stats.stackexchange.com/questions/235528/backpropagation-with-softmax-cross-entropy W3 = (self.params['Y'] - self.params['T']) / args.batch_size self.grads['W3'] = np.dot(self.params['X3'].T, W3) self.grads['B3'] = np.sum(W3, axis=0) W2 = np.dot(W3, self.params['W3'].T) W2 *= np.where(self.params['Z2'] > 0, 1, 0) self.grads['W2'] = np.dot(self.params['X2'].T, W2) self.grads['B2'] = np.sum(W2, axis=0) W1 = np.dot(W2, self.params['W2'].T) W1 *= np.where(self.params['Z1'] > 0, 1, 0) self.grads['W1'] = np.dot(self.params['X1'].T, W1) self.grads['B1'] = np.sum(W1, axis=0) self.optimizer.update(self.params, self.grads) def loss(self, output, target): # Cross Entropy if output.ndim == 1: target = t.reshape(1, target.size) output = output.reshape(1, output.size) # 훈련 데이터가 원-핫 벡터라면 정답 레이블의 인덱스로 반환 if target.size == output.size: target = target.argmax(axis=1) batch_size = output.shape[0] return -np.sum( np.log(output[np.arange(batch_size), target] + 1e-7)) / batch_size def accuracy(self, output, target): return np.sum(np.argmax(output, axis=1) == np.argmax( target, axis=1)) / output.shape[0] def softmax(self, x): if x.ndim == 2: x = x.T x = x - np.max(x, axis=0) y = np.exp(x) / np.sum(np.exp(x), axis=0) return y.T x = x - np.max(x) # 오버플로 대책 return np.exp(x) / np.sum(np.exp(x)) def relu(self, x): return np.maximum(0, x) def relu6(self, x): return np.minimum(6, np.maximum(0, x))
train_acc_list = [] test_acc_list = [] iters_num = 10000 batch_size = 100 train_size = x_train.shape[0] iter_per_epoch = max(train_size / batch_size, 1) net = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) # optim = SGD(net.params, lr=0.1, momentum=0.9) # optim = AdaGrad(net.params) optim = Adam(net.params) for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] grad = net.gradient(x_batch, t_batch) net.params = optim.update(net.params, grad) loss = net.loss(x_batch, t_batch) train_loss_list.append(loss) if i % iter_per_epoch == 0: train_acc = net.accuracy(x_train, t_train) test_acc = net.accuracy(x_test, t_test) train_acc_list.append(train_acc) test_acc_list.append(test_acc) print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))