def gradient(self, x, d): # forward self.loss(x, d, train_flg=True) # backward dout = 1 dout = self.last_layer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) # 設定 grads = {} for idx in range(1, self.hidden_layer_num + 2): grads['W' + str(idx)] = self.layers['Affine' + str( idx)].dW + self.weight_decay_lambda * self.params['W' + str(idx)] grads['b' + str(idx)] = self.layers['Affine' + str(idx)].db if self.use_batchnorm and idx != self.hidden_layer_num + 1: grads['gamma' + str(idx)] = self.layers['BatchNorm' + str(idx)].dgamma grads['beta' + str(idx)] = self.layers['BatchNorm' + str(idx)].dbeta return grads
def gradient(self, x, t): """勾配を求める(誤差逆伝播法) Parameters ---------- x : 入力データ t : 教師ラベル Returns ------- 各層の勾配を持ったディクショナリ変数 grads['W1']、grads['W2']、...は各層の重み grads['b1']、grads['b2']、...は各層のバイアス """ # forward self.loss(x, t) # backward dout = 1 dout = self.last_layer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) # 設定 grads = {} for idx in range(1, self.hidden_layer_num + 2): grads['W' + str(idx)] = self.layers['Affine' + str( idx)].d_W + self.weight_decay_lambda * self.layers['Affine' + str(idx)].W grads['b' + str(idx)] = self.layers['Affine' + str(idx)].d_b return grads
def gradient(self, x, d): # forward self.loss(x, d) # backward dout = 1 dout = self.last_layer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) # 設定 grad = {} grad['W1'], grad['b1'] = self.layers['Conv1'].dW, self.layers[ 'Conv1'].db grad['W2'], grad['b2'] = self.layers['Conv2'].dW, self.layers[ 'Conv2'].db grad['W3'], grad['b3'] = self.layers['Affine1'].dW, self.layers[ 'Affine1'].db grad['W4'], grad['b4'] = self.layers['Affine2'].dW, self.layers[ 'Affine2'].db return grad
def gradient(self, x_batch, t_batch): # forward self.loss(x_batch, t_batch, train_flag=True) # backward dout = 1 dout = self.last_layer.backward(d_y=dout) layers = list( self.layers.values()) # self.layers is a dict not a list!!! layers.reverse() for layer in layers: dout = layer.backward(dout) grads = {} for idx in range(1, len(self.hidden_size_list) + 2): grads['W' + str(idx)] = self.layers['Affine' + str( idx)].d_W + self.weight_decay_lambda * self.layers['Affine' + str(idx)].W grads['b' + str(idx)] = self.layers['Affine' + str(idx)].d_b # calculate gradients of gamma & beta # last Affine layer need no BN if self.use_batchnorm: for idx in range(1, self.hidden_layer_num + 1): grads['gamma' + str(idx)] = self.layers['BatchNorm' + str(idx)].dgamma grads['beta' + str(idx)] = self.layers['BatchNorm' + str(idx)].dbeta return grads
def gradient(self, x, t): """기울기를 구한다 Parameters ---------- x : 입력 데이터 t : 정답 레이블 Returns ------- 각 층의 기울기를 담은 사전(dictionary) 변수 grads['W1']、grads['W2']、... 각 층의 가중치 grads['b1']、grads['b2']、... 각 층의 편향 """ # forward loss = self.loss(x, t, train_flg=True) # backward dout = 1 dout = self.last_layer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) # 결과 저장 grads = {} # hidden_layer_num + 1 만큼 (출력층 Affine 포함) for idx in range(1, self.hidden_layer_num + 2): # 각 Affine 층의 가중치 매개변수에 가중치 감소의 미분값을 더해준다. grads['W' + str(idx)] = self.layers['Affine' + str( idx)].dW + self.weight_decay_lambda * self.params['W' + str(idx)] grads['b' + str(idx)] = self.layers['Affine' + str(idx)].db # < BatchNormalization 계층 사용한다면 매개변수 갱신해준다.> if self.use_batchnorm and idx != self.hidden_layer_num + 1: grads['gamma' + str(idx)] = self.layers['BatchNorm' + str(idx)].dgamma grads['beta' + str(idx)] = self.layers['BatchNorm' + str(idx)].dbeta return grads
def gradient(self, x, d): # forward self.loss(x, d) # backward dout = 1 dout = self.last_layer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) # 設定 grad = {} for idx in range(1, self.hidden_layer_num+2): grad['W' + str(idx)] = self.layers['Affine' + str(idx)].dW grad['b' + str(idx)] = self.layers['Affine' + str(idx)].db return grad
def gradient(self, x, t): # forward self.loss(x, t) # backward dout = 1 dout = self.lastLayer.backward(dout) # 반대 순서로 각 계층의 backward() 메서드를 호출하기만 하면 처리된다 layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) # 결과 저장 grads = {} grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers[ 'Affine1'].db grads['W2'], grads['b2'] = self.layers['Affine2'].dW, self.layers[ 'Affine2'].db return grads
def gradient(self, x_batch, t_batch): # forward logging.info('Forward Start...') self.loss(x_batch, t_batch) logging.info('Forward End.') # backward logging.info('Backward Start...') dout = 1 logging.info('Loss Layer> {}'.format(self.last_layer)) dout = self.last_layer.backward(d_y=dout) layers = list( self.layers.values()) # self.layers is a dict not a list!!! layers.reverse() for layer in layers: dout = layer.backward(dout) logging.info('Backward Layer> {}'.format(layer)) logging.info('Backward End.') grad = {} for idx in range(1, len(self.hidden_size_list) + 2): grad['W' + str(idx)] = self.layers['Affine' + str(idx)].d_W grad['b' + str(idx)] = self.layers['Affine' + str(idx)].d_b return grad