def numerical_gradient(self, x, t): loss_w = lambda w: self.loss(x, t); grads = dict(); for idx in (1, 2, 3): grads['W' + str(idx)] = numerical_gradient(loss_w, self.params['W' + str(idx)]); grads['b' + str(idx)] = numerical_gradient(loss_w, self.params['b' + str(idx)]); return grads;
def numerical_gradient(self, x, t): func_loss = lambda W: self.loss(x, t) grads = {} grads['W1'] = numerical_gradient(func_loss, self.params['W1']) grads['B1'] = numerical_gradient(func_loss, self.params['B1']) grads['W2'] = numerical_gradient(func_loss, self.params['W2']) grads['B2'] = numerical_gradient(func_loss, self.params['B2']) return grads
def numerical_gradient(self, x, t): loss_W = lambda W: self.loss(x, t) grads = {} grads['W1'] = numerical_gradient(loss_W, self.params['W1']) grads['b1'] = numerical_gradient(loss_W, self.params['b1']) grads['W2'] = numerical_gradient(loss_W, self.params['W2']) grads['b2'] = numerical_gradient(loss_W, self.params['b2']) return grads
def numerical_gradient(self, x, t): # Since 'numerical_gradient' has a function f as an input, # we set a dummy function which is actually the loss function loss_W = lambda W: self.loss(x, t); grads = dict(); grads['W1'] = numerical_gradient(loss_W, self.params['W1']) grads['b1'] = numerical_gradient(loss_W, self.params['b1']) grads['W2'] = numerical_gradient(loss_W, self.params['W2']) grads['b2'] = numerical_gradient(loss_W, self.params['b2']) return grads;
def gradient_descent(f, init_x, lr=0.01, step_num=100): x = init_x for i in range(step_num): grad = numerical_gradient(f, x) x -= grad * lr return x
def numerical_gradient(self, x, t): # Since 'numerical_gradient' has a function f as an input, # we set a dummy function which is actually the loss function loss_W = lambda W: self.loss(x, t, train_flag=True) grads = dict() # Calculate numerical_gradient of each layer # 1 ~ self.hidden_layer_num+2 : input layer ~ last hidden layer for idx in range(1, self.hidden_layer_num + 2): grads['W' + str(idx)] = numerical_gradient( loss_W, self.params['W' + str(idx)]) grads['b' + str(idx)] = numerical_gradient( loss_W, self.params['b' + str(idx)]) # if we use batchNormalization layer if self.use_batchNorm and idx != self.hidden_layer_num + 1: grads['gamma' + str(idx)] = numerical_gradient( loss_W, self.params['gamma' + str(idx)]) grads['beta' + str(idx)] = numerical_gradient( loss_W, self.params['beta' + str(idx)]) return grads