def numerical_gradient(self, x, t): loss_W = lambda W: self.loss(x, t) grads = {} grads['W1'] = numerical_gradient(loss_W, self.params['W1']) grads['b1'] = numerical_gradient(loss_W, self.params['b1']) grads['W2'] = numerical_gradient(loss_W, self.params['W2']) grads['b2'] = numerical_gradient(loss_W, self.params['b2']) return grads
def gradient_descent(f, init_x, lr=0.01, step_num=100): x = init_x print_step = step_num // 10 for i in range(step_num): grad = numerical_gradient(f, x) x -= lr * grad if i % print_step == 0: print("step {}: {}".format(i, x)) return x
def gradient_descent(f, init_x, lr=0.01, step_num=100): x = init_x x_history = [] for i in range(step_num): x_history.append(x.copy()) grad = numerical_gradient(f, x) x -= lr * grad return x, np.array(x_history)
def numerical_gradient(self, x, t): """求梯度(数值微分) Parameters ---------- x : 输入数据 t : 教师标签 Returns ------- 具有各层的梯度的字典变量 grads['W1']、grads['W2']、...是各层的权重 grads['b1']、grads['b2']、...是各层的偏置 """ loss_W = lambda W: self.loss(x, t) grads = {} for idx in range(1, self.hidden_layer_num + 2): grads['W' + str(idx)] = numerical_gradient( loss_W, self.params['W' + str(idx)]) grads['b' + str(idx)] = numerical_gradient( loss_W, self.params['b' + str(idx)]) return grads
for idx in range(x.size): tmp_val = x[idx] # f(x+h)的计算 x[idx] = tmp_val + h fxh1 = f(x) # f(x-h)的计算 x[idx] = tmp_val - h fxh2 = f(x) grad[idx] = (fxh1 - fxh2) / (2 * h) x[idx] = tmp_val # 还原值 return grad #%% print(numerical_gradient(function_2, np.array([3.0, 4.0]))) print(numerical_gradient(function_2, np.array([0.0, 2.0]))) print(numerical_gradient(function_2, np.array([3.0, 0.0]))) #%% #numerical_gradient(function_2, np.array([3.0, 0.0])) matrix = np.random.randint(1, 10, size=(3, 2, 4)) for index, m in enumerate(matrix): print(index) print(m.shape) print(m) print() #%% def gradient_descent(f, init_x, lr=0.01, step_num=100): x = init_x