class TwoLayerNet: def __init__(self, input_size, hidden_size, output_size): I, H, O = input_size, hidden_size, output_size # 重み初期化 W1 = 0.01 * np.random.randn(I, H) W2 = 0.01 * np.random.randn(H, O) # バイアスの初期化 b1 = np.zeros(H) b2 = np.zeros(O) # レイヤ生成 self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)] self.loss_layer = SoftmaxWithLoss() # 全ての重みと勾配をリストにまとめる self.params, self.grads = [], [] for layer in self.layers: self.params += layer.params self.grads += layer.grads def predict(self, x): for layer in self.layers: x = layer.forward(x) return x def forward(self, x, t): score = self.predict(x) loss = self.loss_layer.forward(score, t) return loss def backward(self, dout=1): dout = self.loss_layer.backward(dout) for layer in reversed(self.layers): dout = layer.backward(dout) return dout
class TwoLayerNet: def __init__(self, input_size, hidden_size, output_size): I, H, O = input_size, hidden_size, output_size # 가중치와 편향 초기화 W1 = 0.01 * np.random.randn(I, H) b1 = np.zeros(H) W2 = 0.01 * np.random.randn(H, O) b2 = np.zeros(O) # 계층 생성 self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)] self.loss_layer = SoftmaxWithLoss() # 모든 가중치와 기울기를 리스트에 모은다. # grads가 언제나 깊은 복사를 함. # 이렇게 하면, 기울기를 그룹화하는 작업을 최초에 한번만 하면 된다는 이점이 생긴다. self.params, self.grads = [], [] for layer in self.layers: self.params += layer.params self.grads += layer.grads def predict(self, x): for layer in self.layers: x = layer.forward(x) return x def forward(self, x, t): score = self.predict(x) loss = self.loss_layer.forward(score, t) return loss def backward(self, dout=1): dout = self.loss_layer.backward(dout) for layer in reversed(self.layers): dout = layer.backward(dout) return dout
class SimpleCBOW: def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') self.in_layer0 = MatMul(W_in) self.in_layer1 = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer = SoftmaxWithLoss() layers = [self.in_layer0, self.in_layer1, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads self.word_vecs = W_in def forward(self, contexts, target): h0 = self.in_layer0.forward(contexts[:, 0]) h1 = self.in_layer1.forward(contexts[:, 1]) h = 0.5 * (h0 + h1) score = self.out_layer.forward(h) loss = self.loss_layer.forward(score, target) return loss def backward(self, dout=1): ds = self.loss_layer.backward(dout) da = self.out_layer.backward(ds) da *= 0.5 self.in_layer1.backward(da) self.in_layer0.backward(da) return None
class TwoLayerNet: def __init__(self, input_size, hidden_size, output_size): I, H, O = input_size, hidden_size, output_size # initialize weight and bias W1 = 0.01 * np.random.randn(I, H) b1 = np.zeros(H) W2 = 0.01 * np.random.randn(H, O) b2 = np.zeros(O) # create layer self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)] self.loss_layer = SoftmaxWithLoss() # combine all weight and grads into list self.params, self.grads = [], [] for layer in self.layers: self.params += layer.params self.grads += layer.grads def predict(self, x): for layer in self.layers: x = layer.forward(x) return x def forward(self, x, t): score = self.predict(x) loss = self.loss_layer.forward(score, t) return loss def backward(self, dout=1): dout = self.loss_layer.backward(dout) for layer in reversed(self.layers): dout = layer.backward(dout) return dout
class TwoLayerNet: def __init__(self, input_size, hidden_size, output_size): W1 = 0.01 * np.random.randn(input_size, hidden_size) b1 = np.zeros(hidden_size) W2 = 0.01 * np.random.randn(hidden_size, output_size) b2 = np.zeros(output_size) self.layers = [ Affine(W1, b1), Sigmoid(), Affine(W2, b2) ] self.loss_layer = SoftmaxWithLoss() self.params, self.grads = [], [] for layer in self.layers: self.params += layer.params self.grads += layer.grads def predict(self, x): for layer in self.layers: x = layer.forward(x) return x def forward(self, x, t): score = self.predict(x) loss = self.loss_layer.forward(score, t) return loss def backward(self, dout=1): dout = self.loss_layer.backward(dout) for layer in reversed(self.layers): dout = layer.backward(dout) return dout
class TwoLayerNet: def __init__(self, input_size, hidden_size, output_size): I, H, O = input_size, hidden_size, output_size # 가중치와 편향 초기화 W1 = 0.01 * np.random.randn(I, H) b1 = np.zeros(H) W2 = 0.01 * np.random.randn(H, O) b2 = np.zeros(O) # 계층 생성 self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)] self.loss_layer = SoftmaxWithLoss() # 모든 가중치와 기울기를 리스트에 모은다. self.params, self.grads = [], [] for layer in self.layers: self.params += layer.params self.grads += layer.grads def predict(self, x): for layer in self.layers: x = layer.forward(x) return x def forward(self, x, t): score = self.predict(x) loss = self.loss_layer.forward(score, t) return loss def backward(self, dout=1): dout = self.loss_layer.backward( dout) # softmax 역전파로 나온 dx : (30,3) -> 정답 인덱스에만 음수로 나타내어짐 for layer in reversed(self.layers): dout = layer.backward(dout) return dout
class Network: def __init__(self): self.__input_size = 28 ** 2 # MNIST data is 28 x 28 pixel. self.__hidden_size = 50 # Hidden layer size. self.__output_size = 10 # Output is an one-hot array from 0 to 9. self.__weight_init_std = 0.01 # Standard deviation for initial weights # Initialize weights and biases. self.params = {} self.params['W1'] = self.__weight_init_std * np.random.randn(self.__input_size, self.__hidden_size) self.params['b1'] = np.zeros(self.__hidden_size) self.params['W2'] = self.__weight_init_std * np.random.randn(self.__hidden_size, self.__output_size) self.params['b2'] = np.zeros(self.__output_size) # Generate layers. self.layers = OrderedDict() self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']) self.layers['Relu1'] = Relu() self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) self.lastLayer = SoftmaxWithLoss() def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x # x:input data, t:teacher data def loss(self, x, t): y = self.predict(x) return self.lastLayer.forward(y, t) def accuracy(self, x, t): y = self.predict(x) y = np.argmax(y, axis=1) if t.ndim != 1 : t = np.argmax(t, axis=1) accuracy = np.sum(y == t) / float(x.shape[0]) return accuracy def gradient(self, x, t): # forward self.loss(x, t) # backward dout = 1 dout = self.lastLayer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) # return values grads = {} grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers['Affine1'].db grads['W2'], grads['b2'] = self.layers['Affine2'].dW, self.layers['Affine2'].db return grads def repr(self): print("Input -> Affine(W1+b1) -> Affine(W2+b2) -> SoftMax") self.__set_format__("{:.2f}") for key in self.params.keys(): print(key + str(self.params[key].shape)) print(self.params[key]) def __set_format__(self, format_): float_formatter = format_.format np.set_printoptions(formatter = {'float_kind':float_formatter})
class TwoLayerNet: def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01): # 重みの初期化 self.params = {} self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size) self.params['b1'] = np.zeros(hidden_size) self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size) self.params['b2'] = np.zeros(output_size) # レイヤの生成 self.layers = OrderedDict() # 順番付きdict形式. self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']) self.layers['Relu1'] = ReLU() self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) self.lastLayer = SoftmaxWithLoss() # 出力層 def predict(self, x): """ 推論関数 x : 入力 """ for layer in self.layers.values(): # 入力されたxを更新していく = 順伝播計算 x = layer.forward(x) return x def loss(self, x, t): """ 損失関数 x:入力データ, t:教師データ """ y = self.predict(x) return self.lastLayer.forward(y, t) def accuracy(self, x, t): """ 識別精度 """ # 推論. 返り値は正規化されていない実数 y = self.predict(x) #正規化されていない実数をもとに、最大値になるindexに変換する y = np.argmax(y, axis=1) if t.ndim != 1 : """ one-hotベクトルの場合、教師データをindexに変換する """ t = np.argmax(t, axis=1) # 精度 accuracy = np.sum(y == t) / float(x.shape[0]) return accuracy def gradient(self, x, t): """ 全パラメータの勾配を計算 """ # 順伝播 self.loss(x, t) # 逆伝播 dout = 1 # クロスエントロピー誤差を用いる場合は使用されない dout = self.lastLayer.backward(dout=1) # 出力層 ## doutを逆向きに伝える layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) # dW, dbをgradsにまとめる grads = {} grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers['Affine1'].db grads['W2'], grads['b2'] = self.layers['Affine2'].dW, self.layers['Affine2'].db return grads
class SimpleConvNet: def __init__(self, input_dim=(1, 28, 28), conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1}, hidden_size=100, output_size=10, weight_init_std=0.01): """ input_size : 入力の配列形状(チャンネル数、画像の高さ、画像の幅) conv_param : 畳み込みの条件, dict形式 例、{'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1} hidden_size : 隠れ層のノード数 output_size : 出力層のノード数 weight_init_std : 重みWを初期化する際に用いる標準偏差 """ filter_num = conv_param['filter_num'] filter_size = conv_param['filter_size'] filter_pad = conv_param['pad'] filter_stride = conv_param['stride'] input_size = input_dim[1] conv_output_size = (input_size - filter_size + 2*filter_pad) / filter_stride + 1 pool_output_size = int(filter_num * (conv_output_size/2) * (conv_output_size/2)) # 重みの初期化 self.params = {} std = weight_init_std self.params['W1_1'] = std * np.random.randn(16, 1, 3,3) self.params['b1_1'] = np.zeros(16) self.params['W1_3'] = std * np.random.randn(16, 16, 3,3) self.params['b1_3'] = np.zeros(16) self.params['W2_1'] = std * np.random.randn(32, 16, 3,3) self.params['b2_1'] = np.zeros(32) self.params['W2_3'] = std * np.random.randn(32, 32, 3,3) self.params['b2_3'] = np.zeros(32) self.params['W3_1'] = std * np.random.randn(64, 32, 3,3) self.params['b3_1'] = np.zeros(64) self.params['W3_3'] = std * np.random.randn(64, 64, 3,3) self.params['b3_3'] = np.zeros(64) self.params['W4_1'] = std * np.random.randn(64*4*4, hidden_size) self.params['b4_1'] = np.zeros(hidden_size) self.params['W5_1'] = std * np.random.randn(hidden_size, output_size) self.params['b5_1'] = np.zeros(output_size) # レイヤの生成 self.layers = OrderedDict() self.layers['Conv1_1'] = Convolution(self.params['W1_1'], self.params['b1_1'],1,1) self.layers['ReLU1_2'] = ReLU() self.layers['Conv1_3'] = Convolution(self.params['W1_3'], self.params['b1_3'],1,1) self.layers['ReLU1_4'] = ReLU() self.layers['Pool1_5'] = MaxPooling(pool_h=2, pool_w=2, stride=2) self.layers['Conv2_1'] = Convolution(self.params['W2_1'], self.params['b2_1'],1,1) self.layers['ReLU2_2'] = ReLU() self.layers['Conv2_3'] = Convolution(self.params['W2_3'], self.params['b2_3'],1,2) self.layers['ReLU2_4'] = ReLU() self.layers['Pool2_5'] = MaxPooling(pool_h=2, pool_w=2, stride=2) self.layers['Conv3_1'] = Convolution(self.params['W3_1'], self.params['b3_1'],1,1) self.layers['ReLU3_2'] = ReLU() self.layers['Conv3_3'] = Convolution(self.params['W3_3'], self.params['b3_3'],1,1) self.layers['ReLU3_4'] = ReLU() self.layers['Pool3_5'] = MaxPooling(pool_h=2, pool_w=2, stride=2) self.layers['Affine4_1'] = Affine(self.params['W4_1'], self.params['b4_1']) self.layers['ReLU4_2'] = ReLU() self.layers['Dropout4_3'] = Dropout(0.5) self.layers['Affine5_1'] = Affine(self.params['W5_1'], self.params['b5_1']) self.layers['Dropout5_2'] = Dropout(0.5) self.last_layer = SoftmaxWithLoss() def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x def loss(self, x, t): """ 損失関数 x : 入力データ t : 教師データ """ y = self.predict(x) return self.last_layer.forward(y, t) def accuracy(self, x, t, batch_size=100): if t.ndim != 1 : t = np.argmax(t, axis=1) acc = 0.0 for i in range(int(x.shape[0] / batch_size)): tx = x[i*batch_size:(i+1)*batch_size] tt = t[i*batch_size:(i+1)*batch_size] y = self.predict(tx) y = np.argmax(y, axis=1) acc += np.sum(y == tt) return acc / x.shape[0] def gradient(self, x, t): """勾配を求める(誤差逆伝播法) Parameters ---------- x : 入力データ t : 教師データ Returns ------- 各層の勾配を持ったディクショナリ変数 grads['W1']、grads['W2']、...は各層の重み grads['b1']、grads['b2']、...は各層のバイアス """ # forward self.loss(x, t) # backward dout = 1 dout = self.last_layer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) # 設定 grads = {} grads['W1_1'], grads['b1_1'] = self.layers['Conv1_1'].dW, self.layers['Conv1_1'].db grads['W1_3'], grads['b1_3'] = self.layers['Conv1_3'].dW, self.layers['Conv1_3'].db grads['W2_1'], grads['b2_1'] = self.layers['Conv2_1'].dW, self.layers['Conv2_1'].db grads['W2_3'], grads['b2_3'] = self.layers['Conv2_3'].dW, self.layers['Conv2_3'].db grads['W3_1'], grads['b3_1'] = self.layers['Conv3_1'].dW, self.layers['Conv3_1'].db grads['W3_3'], grads['b3_3'] = self.layers['Conv3_3'].dW, self.layers['Conv3_3'].db grads['W4_1'], grads['b4_1'] = self.layers['Affine4_1'].dW, self.layers['Affine4_1'].db grads['W5_1'], grads['b5_1'] = self.layers['Affine5_1'].dW, self.layers['Affine5_1'].db return grads
class SimpleConvNet: def __init__(self, input_dim=(1, 28, 28), conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1}, hidden_size=100, output_size=10, weight_init_std=0.01): filter_num = conv_param['filter_num'] filter_size = conv_param['filter_size'] filter_pad = conv_param['pad'] filter_stride = conv_param['stride'] input_size = input_dim[1] conv_output_size = (input_size - filter_size + 2*filter_pad) / filter_stride + 1 pool_output_size = int(filter_num * (conv_output_size/2) * (conv_output_size/2)) self.params = {} self.params['W1'] = weight_init_std * np.random.randn(filter_num, input_dim[0], filter_size, filter_size) self.params['b1'] = np.zeros(filter_num) self.params['W2'] = weight_init_std * np.random.randn(pool_output_size, hidden_size) self.params['b2'] = np.zeros(hidden_size) self.params['W3'] = weight_init_std * np.random.randn(hidden_size, output_size) self.params['b3'] = np.zeros(output_size) self.layers = OrderedDict() self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'], conv_param['stride'], conv_param['pad']) self.layers['Relu1'] = Relu() self.layers['Pool1']= Pooling(pool_h=2, pool_w=2, stride=2) self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2']) self.layers['Relu2'] = Relu() self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3']) self.last_layer = SoftmaxWithLoss() def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x def loss(self, x, t): y = self.predict(x) return self.last_layer.forward(y, t) def accuracy(self, x, t, batch_size=100): if t.ndim != 1 : t = np.argmax(t, axis=1) acc = 0.0 for i in range(int(x.shape[0] / batch_size)): tx = x[i*batch_size:(i+1)*batch_size] tt = t[i*batch_size:(i+1)*batch_size] y = self.predict(tx) y = np.argmax(y, axis=1) acc += np.sum(y == tt) return acc / x.shape[0] def gradient(self, x, t): #순전파 self.loss(x, t) #역전파 dout = 1 dout = self.last_layer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) grads = {} grads['W1'] = self.layers['Conv1'].dW grads['b1'] = self.layers['Conv1'].db grads['W2'] = self.layers['Affine1'].dW grads['b2'] = self.layers['Affine1'].db grads['W3'] = self.layers['Affine2'].dW grads['b3'] = self.layers['Affine2'].db return grads
class SimpleConvNet: def __init__(self, dim_in=(1, 28, 28), par={'num_filter': 30, 'size_filter': 5, 'pad': 0, 'stride': 1}, s_hidden=100, s_out=10, std_w_init=0.01): n_f = par['num_filter'] s_f = par['size_filter'] pad = par['pad'] stride = par['stride'] size_in = dim_in[1] size_out_conv = int((size_in + 2 * pad - s_f) / stride) + 1 size_out_pool = int(n_f * (size_out_conv / 2) ** 2) self.params = {} self.params['W1'] =\ std_w_init * np.random.randn(n_f, dim_in[0], s_f, s_f) self.params['b1'] = np.zeros(n_f) self.params['W2'] = std_w_init * np.random.randn(size_out_pool, s_hidden) self.params['b2'] = np.zeros(s_hidden) self.params['W3'] = std_w_init * np.random.randn(s_hidden, s_out) self.params['b3'] = np.zeros(s_out) self.layers = OrderedDict() self.layers['Conv'] = Convolution(self.params['W1'], self.params['b1'], stride, pad) self.layers['Relu1'] = Relu() self.layers['Pool'] = Pooling(2, 2, 2) self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2']) self.layers['Relu'] = Relu() self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3']) self.last_layer = SoftmaxWithLoss() def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x def loss(self, x, t): y = self.predict(x) loss = self.last_layer.forward(y, t) return loss def accuracy(self, x, t): y = self.predict(x) pred = y.argmax(axis=1) if t.ndim != 1: t = t.argmax(axis=1) return np.sum(pred == t) / float(pred.size) def gradient(self, x, t): self.loss(x, t) dout = 1 dout = self.last_layer.backward(dout) for layer in reversed(self.layers.values()): dout = layer.backward(dout) grads = {} grads['W1'] = self.layers['Conv'].dW grads['b1'] = self.layers['Conv'].db grads['W2'] = self.layers['Affine1'].dW grads['b2'] = self.layers['Affine1'].db grads['W3'] = self.layers['Affine2'].dW grads['b3'] = self.layers['Affine2'].db return grads
class SimpleConvNet: def __init__(self, input_dim=(1, 28, 28), conv_param={ 'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1 }, hidden_size=100, output_size=10, weight_init_std=0.01, weight_decay_lambda=0.01): """ input_size : 入力の配列形状(チャンネル数、画像の高さ、画像の幅) conv_param : 畳み込みの条件, dict形式 例、{'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1} hidden_size : 隠れ層のノード数 output_size : 出力層のノード数 weight_init_std : 重みWを初期化する際に用いる標準偏差 """ self.hidden_layer_num = 3 self.weight_decay_lambda = weight_decay_lambda #filter_num = conv_param['filter_num'] #filter_size = conv_param['filter_size'] #filter_pad = conv_param['pad'] #filter_stride = conv_param['stride'] filter_num = 30 filter_size = 5 filter_pad = 0 filter_stride = 1 input_size = input_dim[1] conv_output_size = (input_size - filter_size + 2 * filter_pad) / filter_stride + 1 pool_output_size = int(filter_num * (conv_output_size / 2) * (conv_output_size / 2)) # 重みの初期化 self.params = {} std = weight_init_std self.params['W1'] = std * np.random.randn( filter_num, input_dim[0], filter_size, filter_size) # W1は畳み込みフィルターの重みになる self.params['b1'] = np.zeros(filter_num) #b1は畳み込みフィルターのバイアスになる #self.params['W2'] = std * np.random.randn(pool_output_size, hidden_size) self.params['b2'] = np.zeros(hidden_size) #self.params['W3'] = std * np.random.randn(hidden_size, output_size) self.params['b3'] = np.zeros(output_size) #Heの初期値を使用 self.params['W2'] = np.random.randn(pool_output_size, hidden_size) * he(pool_output_size) self.params['W3'] = np.random.randn(hidden_size, output_size) * he(hidden_size) # レイヤの生成 self.layers = OrderedDict() #self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'], # conv_param['stride'], conv_param['pad']) # W1が畳み込みフィルターの重み, b1が畳み込みフィルターのバイアスになる self.layers['Conv1'] = Convolution( self.params['W1'], self.params['b1'], 1, 0) # W1が畳み込みフィルターの重み, b1が畳み込みフィルターのバイアスになる self.layers['ReLU1'] = ReLU() self.layers['Pool1'] = MaxPooling(pool_h=2, pool_w=2, stride=2) self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2']) self.layers['ReLU2'] = ReLU() self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3']) self.last_layer = SoftmaxWithLoss() def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x def loss(self, x, t): """ 損失関数 x : 入力データ t : 教師データ """ y = self.predict(x) # 荷重減衰を考慮した損失を求める lmd = self.weight_decay_lambda weight_decay = 0 for idx in range(1, self.hidden_layer_num + 1): W = self.params['W' + str(idx)] # 全ての行列Wについて、1/2* lambda * Σwij^2を求め、積算していく weight_decay += 0.5 * lmd * np.sum(W**2) return self.last_layer.forward(y, t) + weight_decay def accuracy(self, x, t, batch_size=100): if t.ndim != 1: t = np.argmax(t, axis=1) acc = 0.0 for i in range(int(x.shape[0] / batch_size)): tx = x[i * batch_size:(i + 1) * batch_size] tt = t[i * batch_size:(i + 1) * batch_size] y = self.predict(tx) y = np.argmax(y, axis=1) acc += np.sum(y == tt) return acc / x.shape[0] def gradient(self, x, t): """勾配を求める(誤差逆伝播法) Parameters ---------- x : 入力データ t : 教師データ 減衰を考慮した損失を求める lmd = self.weight_decay_lambda weight_decay = 0 for idx in range(1, self.hidden_layer_num + 2): W = self.params['W' + str(idx)] # 全ての行列Wについて、1/2* lambda * Σwij^2を求め、積算していく weight_decay += 0.5 * lmd * np.sum(W**2) return self.lastLayer.forward(y, t) + weight_decay ------- 各層の勾配を持ったディクショナリ変数 grads['W1']、grads['W2']、...は各層の重み grads['b1']、grads['b2']、...は各層のバイアス """ # forward self.loss(x, t) # backward dout = 1 dout = self.last_layer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) # 設定 # 荷重減衰を考慮しながら、dW, dbをgradsにまとめる lmd = self.weight_decay_lambda grads = {} grads['W1'], grads['b1'] = self.layers['Conv1'].dW, self.layers[ 'Conv1'].db grads['W2'], grads['b2'] = self.layers[ 'Affine1'].dW + lmd * self.layers['Affine1'].W, self.layers[ 'Affine1'].db grads['W3'], grads['b3'] = self.layers[ 'Affine2'].dW + lmd * self.layers['Affine2'].W, self.layers[ 'Affine2'].db return grads def save_params(self, file_name="CNNparams.pkl"): params = {} #for key, val in self.params.items(): # params[key] = val print("W1Start") params['W1'] = self.params['W1'] print("b1Start") params['b1'] = self.params['b1'] print("W2Start") params['W2'] = self.params['W2'] print("b2Start") params['b2'] = self.params['b2'] print("W3Start") params['W3'] = self.params['W3'] print("b3Start") params['b3'] = self.params['b3'] with open(file_name, 'wb') as f: pickle.dump(params, f) def load_params(self, file_name="CNNparams.pkl"): with open(file_name, 'rb') as f: params = pickle.load(f) #for key, val in params.items(): # self.params[key] = val #for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)): # self.layers[layer_idx].W = self.params['W' + str(i+1)] # self.layers[layer_idx].b = self.params['b' + str(i+1)] self.params['W1'] = params['W1'] self.params['b1'] = params['b1'] self.params['W2'] = params['W2'] self.params['b2'] = params['b2'] self.params['W3'] = params['W3'] self.params['b3'] = params['b3'] def make_layers(self): # レイヤの生成 self.layers = OrderedDict() self.layers['Conv1'] = Convolution( self.params['W1'], self.params['b1'], 1, 0) # W1が畳み込みフィルタの重み, b1が畳み込みフィルタのバイアスになる self.layers['ReLU1'] = ReLU() self.layers['Pool1'] = MaxPooling(pool_h=2, pool_w=2, stride=2) self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2']) self.layers['ReLU2'] = ReLU() self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3']) self.last_layer = SoftmaxWithLoss()
class SimpleConvNet: def __init__(self, input_dim=(1, 28, 28), conv_param={ 'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1 }, hidden_size=100, output_size=10, weight_init_std=0.01): filter_num = conv_param['filter_num'] filter_size = conv_param['filter_size'] filter_pad = conv_param['pad'] filter_stride = conv_param['stride'] input_size = input_dim[1] # 畳み込み層の出力サイズの計算 conv_output_size = (input_size - filter_size + 2 * filter_pad) / filter_stride + 1 pool_output_size = int(filter_num * (conv_output_size / 2) * (conv_output_size)) # 重みパラメータの初期化 (1: 畳み込み層、2: 全結合、3: 全結合) self.params = {} self.params['W1'] = weight_init_std * np.random.randn( filter_num, input_dim[0], filter_size, filter_size) self.params['b1'] = np.zeros(filter_num) self.params['W2'] = weight_init_std * np.random.randn( pool_output_size, hidden_size) self.params['b2'] = np.zeros(hidden_size) self.params['W3'] = weight_init_std * np.random.randn( hidden_size, output_size) self.params['b3'] = np.zeros(output_size) # レイヤの生成 self.layers = OrderedDict() self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'], conv_param['stride'], conv_param['pad']) self.layers['Relu1'] = Relu() self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2) self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2']) self.layers['Relu2'] = Relu() self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3']) self.last_layer = SoftmaxWithLoss() def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x def loss(self, x, t): y = self.predict(x) return self.last_layer.backward(y, t) def gradient(self, x, t): # forward self.loss(x, t) # backward dout = 1 dout = self.last_layer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) # 設定 grads = {} grads['W1'] = self.layers['Conv1'].dW grads['b1'] = self.layers['Conv1'].db grads['W2'] = self.layers['Affine1'].dW grads['b2'] = self.layers['Affine1'].db grads['W3'] = self.layers['Affine2'].dW grads['b3'] = self.layers['Affine2'].db return grads
class TwoLayerNet: def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01): # 初始化权重 # self.params = {"W1": np.random.randn(input_size, hidden_size) / np.sqrt(input_size), # "b1": np.zeros(hidden_size), # "W2": np.random.randn(hidden_size, output_size) / np.sqrt(hidden_size), # "b2": np.zeros(output_size)} self.params = {"W1": weight_init_std * np.random.randn(input_size, hidden_size), "b1": np.zeros(hidden_size), "W2": weight_init_std * np.random.randn(hidden_size, output_size), "b2": np.zeros(output_size)} # 生成层 self.layers = OrderedDict() self.layers["Affine1"] = Affine(self.params["W1"], self.params["b1"]) self.layers["ReLU1"] = ReLU() self.layers["Affine2"] = Affine(self.params["W2"], self.params["b2"]) self.lastLayer = SoftmaxWithLoss() def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x def loss(self, x, t): y = self.predict(x) return self.lastLayer.forward(y, t) def accuracy(self, x, t): y = self.predict(x) y = np.argmax(y, axis=1) if t.ndim != 1: t = np.argmax(t, axis=1) accuracy = np.sum(y == t) / float(x.shape[0]) return accuracy def numerical_gradient(self, x, t): loss_W = lambda W: self.loss(x, t) grads = {'W1': numerical_gradient(loss_W, self.params['W1']), 'b1': numerical_gradient(loss_W, self.params['b1']), 'W2': numerical_gradient(loss_W, self.params['W2']), 'b2': numerical_gradient(loss_W, self.params['b2'])} return grads def gradient(self, x, t): # forward self.loss(x, t) # backward dout = 1 dout = self.lastLayer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) grads = {'W1': self.layers["Affine1"].dW, 'b1': self.layers["Affine1"].db, 'W2': self.layers["Affine2"].dW, 'b2': self.layers["Affine2"].db} return grads
class DPLMultiLayerNet: """全結合による多層ニューラルネットワーク Parameters ---------- input_size : 入力サイズ(MNISTの場合は784) hidden_size_list : 隠れ層のニューロンの数のリスト(e.g. [100, 100, 100]) output_size : 出力サイズ(MNISTの場合は10) activation : 'relu' or 'sigmoid' or 'through' weight_init_std : 重みの標準偏差を指定(e.g. 0.01) 'relu'または'he'を指定した場合は「Heの初期値」を設定 'sigmoid'または'xavier'を指定した場合は「Xavierの初期値」を設定 ’linear'または'through'を指定した場合は、「Linearの初期値」を設定 DLの重み:正規乱数、DPLの重み:正規乱数の絶対値 """ def __init__(self, input_size, hidden_size_list, output_size,batch_size=1, activation='sigmoid', weight_init_std='linear',dpl='dpl'): self.input_size = input_size self.output_size = output_size self.hidden_size_list = hidden_size_list self.hidden_layer_num = len(hidden_size_list) self.batch_size = batch_size self.dpl = dpl self.params = {} self.layers = OrderedDict() # 重みの初期化 self.__init_weight(weight_init_std,dpl) # レイヤの生成 self.__init_wb(activation,dpl) #print("layers:",self.layers) self.update_path = update_path(self.layers) def __init_wb(self,activation,dpl) : """レイヤー生成 activation : 'relu' or 'sigmoid' or 'through' """ activation_layer = {'sigmoid': DPLSigmoid, 'relu': DPLRelu,'through':Through} #print("_init_wb activation",activation,"dpl",dpl) self.layers['Path1'] = FirstPath(self.params['W1'],self.params['b1'],self.batch_size) self.layers['Activation_function1'] = activation_layer[activation]() for idx in range(2, self.hidden_layer_num+1): #print("idx:",idx) self.layers['Path' + str(idx)] =DPLPath(self.params['W' + str(idx)], self.params['b' + str(idx)]) self.layers['Activation_function' + str(idx)] = activation_layer[activation]() idx = self.hidden_layer_num + 1 self.layers['Path' + str(idx)] = LastPath(self.params['W' + str(idx)], self.params['b' + str(idx)],self.batch_size) self.last_layer = SoftmaxWithLoss() def __init_weight(self, weight_init_std,dpl): """重みの初期値設定 Parameters ---------- weight_init_std : 重みの標準偏差を指定(e.g. 0.01) 'relu'または'he'を指定した場合は「Heの初期値」を設定 'sigmoid'または'xavier'を指定した場合は「Xavierの初期値」を設定 ’linear'または'through'を指定した場合は、「Linearの初期値」を設定 DLの重み:正規乱数、DPLの重み:正規乱数の絶対値 """ all_size_list = [self.input_size] + self.hidden_size_list + [self.output_size] #print ("all_size_list:",all_size_list) for idx in range(1, len(all_size_list)): #print("idx:",idx) scale = weight_init_std if str(weight_init_std).lower() in ('relu', 'he'): scale = np.sqrt(2.0 / all_size_list[idx - 1]) # ReLUを使う場合に推奨される初期値 elif str(weight_init_std).lower() in ('sigmoid', 'xavier'): scale = np.sqrt(1.0 / all_size_list[idx - 1]) # sigmoidを使う場合に推奨される初期値 elif str(weight_init_std).lower() in ('linear','through'): scale = 1.0/all_size_list[idx - 1] #DPLで使う場合に推奨される初期値 rand = np.random.randn(all_size_list[idx-1], all_size_list[idx]) #if idx == 1: scale/self.batch_size if dpl == 'dpl':rand = np.fabs(rand) self.params['W' + str(idx)] = scale * rand self.params['b' + str(idx)] = np.zeros(all_size_list[idx]) #print("param:",self.params) def set_batch(self,x,t) : self.x = x self.t = t self.batch_size = x.shape[0] def predict(self): x = self.x for layer in self.layers.values(): x = layer.forward(x) #print("Predict x:",x.shape,"self.x",self.x.shape) return x def DPLpredict(self): self.update_path.update() x = self.x for layer in self.layers.values(): x = layer.DPLforward(x) #print("DPLPredict x:",x.shape,"self.x",self.x.shape) return x def loss(self): """損失関数を求める Returns ------- 損失関数の値 """ if self.dpl == 'dpl' : y = self.DPLpredict() else: y = self.predict() #print("loss y:",y.shape,"t",self.t.shape) return self.last_layer.forward(y, self.t) def accuracy(self): y = self.predict() y = np.argmax(y, axis=1) #print("accracy y:",y.shape,"t:",self.t.shape) if self.t.ndim != 1 : t = np.argmax(self.t, axis=1) accuracy = np.sum(y == t) / float(self.x.shape[0]) return accuracy def numerical_gradient(self): """勾配を求める(数値微分) Returns ------- 各層の勾配を持ったディクショナリ変数 grads['W1']、grads['W2']、...は各層の重み grads['b1']、grads['b2']、...は各層のバイアス """ loss_W = lambda W: self.loss() grads = {} for idx in range(1, self.hidden_layer_num+2): grads['W' + str(idx)] = numerical_gradient(loss_W, self.params['W' + str(idx)]) grads['b' + str(idx)] = numerical_gradient(loss_W, self.params['b' + str(idx)]) return grads def gradient(self): """勾配を求める(誤差逆伝搬法) Returns ------- 各層の勾配を持ったディクショナリ変数 grads['W1']、grads['W2']、...は各層の重み grads['b1']、grads['b2']、...は各層のバイアス """ # forward self.loss() # backward dout = 1 dout = self.last_layer.backward(dout) layers = list(self.layers.values()) layers.reverse() if self.dpl == 'dpl' : for layer in layers: dout = layer.DPLbackward(dout) # Fix comfirmed "backward" was not work for DPLforward else: for layer in layers: dout = layer.backward(dout) # 設定 grads = {} for idx in range(1, self.hidden_layer_num+2): grads['W' + str(idx)] = self.layers['Path' + str(idx)].dW grads['b' + str(idx)] = self.layers['Path' + str(idx)].db return grads
class TwoLayerNet: def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01): # 重みの初期化 self.params = {} #self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size) self.params['b1'] = np.zeros(hidden_size) #self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size) self.params['b2'] = np.zeros(output_size) #Heの初期値を使用 self.params['W1'] = np.random.randn(input_size, hidden_size) * he(input_size) self.params['W2'] = np.random.randn(hidden_size, output_size) * he(hidden_size) # レイヤの生成 self.layers = OrderedDict() # 順番付きdict形式. self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']) self.layers['Relu1'] = ReLU() self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) self.lastLayer = SoftmaxWithLoss() # 出力層 def predict(self, x): """ 推論関数 x : 入力 """ for layer in self.layers.values(): # 入力されたxを更新していく = 順伝播計算 x = layer.forward(x) return x def loss(self, x, t): """ 損失関数 x:入力データ, t:教師データ """ y = self.predict(x) return self.lastLayer.forward(y, t) def accuracy(self, x, t): """ 識別精度 """ # 推論. 返り値は正規化されていない実数 y = self.predict(x) #正規化されていない実数をもとに、最大値になるindexに変換する y = np.argmax(y, axis=1) if t.ndim != 1 : """ one-hotベクトルの場合、教師データをindexに変換する """ t = np.argmax(t, axis=1) # 精度 accuracy = np.sum(y == t) / float(x.shape[0]) return accuracy def gradient(self, x, t): """ 全パラメータの勾配を計算 """ # 順伝播 self.loss(x, t) # 逆伝播 dout = 1 # クロスエントロピー誤差を用いる場合は使用されない dout = self.lastLayer.backward(dout=1) # 出力層 ## doutを逆向きに伝える layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) # dW, dbをgradsにまとめる grads = {} grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers['Affine1'].db grads['W2'], grads['b2'] = self.layers['Affine2'].dW, self.layers['Affine2'].db return grads def save_params(self, file_name="params.pkl"): params = {} #for key, val in self.params.items(): # params[key] = val print("W1Start") params['W1'] = self.params['W1'] print("b1Start") params['b1'] = self.params['b1'] print("W2Start") params['W2'] = self.params['W2'] print("b2Start") params['b2'] = self.params['b2'] with open(file_name, 'wb') as f: pickle.dump(params, f) def load_params(self, file_name="params.pkl"): with open(file_name, 'rb') as f: params = pickle.load(f) #for key, val in params.items(): # self.params[key] = val #for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)): # self.layers[layer_idx].W = self.params['W' + str(i+1)] # self.layers[layer_idx].b = self.params['b' + str(i+1)] self.params['W1'] = params['W1'] self.params['b1'] = params['b1'] self.params['W2'] = params['W2'] self.params['b2'] = params['b2'] def he(n1): """ Heの初期値を利用するための関数 返り値は、見かけの標準偏差 """ return np.sqrt(2/n1)
class TwoLayerNet: def __init__(self, inputLayerSize, hiddenLayerSize, ouputLayerSize, distributionScale=0.01): # Initialize weight self.params = {} self.params['w1'] = distributionScale * np.random.randn( inputLayerSize, hiddenLayerSize) self.params['b1'] = np.zeros(hiddenLayerSize) self.params['w2'] = distributionScale * np.random.randn( hiddenLayerSize, ouputLayerSize) self.params['b2'] = np.zeros(ouputLayerSize) # Create layers self.layers = OrderedDict() self.layers['affine1'] = Affine(self.params['w1'], self.params['b1']) self.layers['relu1'] = Relu() self.layers['affine2'] = Affine(self.params['w2'], self.params['b2']) self.lastLayer = SoftmaxWithLoss() def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x def getLoss(self, x, t): y = self.predict(x) return self.lastLayer.forward(y, t) def getAccuracy(self, x, t): y = self.predict(x) y = np.argmax(y, axis=1) if t.ndim != 1: t = np.argmax(t, axis=1) accuracy = np.sum(y == t) / float(x.shape[0]) return accuracy def getGradient(self, x, t): # forward self.getLoss(x, t) # backward dout = 1 dout = self.lastLayer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) gradients = {} gradients['w1'], gradients['b1'] = self.layers[ 'affine1'].dw, self.layers['affine1'].db gradients['w2'], gradients['b2'] = self.layers[ 'affine2'].dw, self.layers['affine2'].db return gradients # Numerical gradient for recalculation def getNumericalGradient(self, x, t): loss = lambda W: self.getLoss(x, t) gradients = {} gradients['w1'] = numericalGradient(loss, self.params['w1']) gradients['b1'] = numericalGradient(loss, self.params['b1']) gradients['w2'] = numericalGradient(loss, self.params['w2']) gradients['b2'] = numericalGradient(loss, self.params['b2']) return gradients
class MultiLayerNexExtend: """ 具有weight Decay、Dropout、Batch Normalization功能的全连接多层神经网络 """ def __init__(self, input_size, hidden_size_list, output_size, activation="relu", weight_init_std="relu", weight_decay_lambda=0, use_dropout=False, dropout_ratio=0.5, use_batchnorm=False): """ :param input_size: 输入的大小 :param hidden_size_list: 隐藏层的神经元数量列表 :param output_size: 输出的大小 :param activation: "relu" or "sigmoid" :param weight_init_std: 指定权重的标准差, 指定"relu" 或者 "he" 是定为"He"的初始值 指定"sigmoid" 或者 "xavier" 是定为"Xauver"的初始值 :param weight_decay_lambda: Weight Decay(L2范数)的强度 :param use_dropout: 是否使用Dropout :param dropout_ratio: Dropout比例 :param use_batchnorm: 是否只用Batch Normalization """ self.input_size = input_size self.output_size = output_size self.hidden_size_list = hidden_size_list self.hidden_layer_num = len(hidden_size_list) self.use_dropout = use_dropout self.weight_decay_lambda = weight_decay_lambda self.use_batchnorm = use_batchnorm self.params = {} # 初始化权值 self.__init_weight(weight_init_std) # 生成层 activation_layer = {"sigmoid": Sigmoid, "relu": ReLU} self.layers = OrderedDict() for idx in range(1, self.hidden_layer_num + 1): self.layers["Affine" + str(idx)] = Affine( self.params["W" + str(idx)], self.params["b" + str(idx)]) if self.use_batchnorm: self.params["gamma" + str(idx)] = np.ones( hidden_size_list[idx - 1]) self.params["beta" + str(idx)] = np.zeros( hidden_size_list[idx - 1]) self.layers['BatchNorm' + str(idx)] = BatchNormalization( self.params['gamma' + str(idx)], self.params['beta' + str(idx)]) self.layers["Activation_function" + str(idx)] = activation_layer[activation]() if self.use_dropout: self.layers["Dropout" + str(idx)] = Dropout(dropout_ratio) idx = self.hidden_layer_num + 1 self.layers["Affine" + str(idx)] = Affine(self.params["W" + str(idx)], self.params["b" + str(idx)]) self.last_layer = SoftmaxWithLoss() def __init_weight(self, weight_init_std): """ 设定权重的初始值 :param weight_init_std: :return: """ all_size_list = [self.input_size ] + self.hidden_size_list + [self.output_size] for idx in range(1, len(all_size_list)): scale = weight_init_std if str(weight_init_std).lower() in ("relu", "he"): scale = np.sqrt(2.0 / all_size_list[idx - 1]) elif str(weight_init_std).lower() in ("sigmoid", "xavier"): scale = np.sqrt(1.0 / all_size_list[idx - 1]) self.params["W" + str(idx)] = scale * np.random.randn( all_size_list[idx - 1], all_size_list[idx]) self.params["b" + str(idx)] = np.zeros(all_size_list[idx]) def predict(self, x, train_flg=False): for key, layer in self.layers.items(): if "Dropout" in key or "BatchNorm" in key: x = layer.forward(x, train_flg) else: x = layer.forward(x) return x def loss(self, x, t, train_flg=False): """ 求损失函数 :param x:输入数据 :param t: 真是标签 :param train_flg:是否为模型训练 :return: """ y = self.predict(x, train_flg) weight_decay = 0 for idx in range(1, self.hidden_layer_num + 2): W = self.params["W" + str(idx)] weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2) return self.last_layer.forward(y, t) + weight_decay def accuracy(self, X, T): Y = self.predict(X, train_flg=False) Y = np.argmax(Y, axis=1) if T.ndim != 1: T = np.argmax(T, axis=1) accuracy = np.sum(Y == T) / float(X.shape[0]) return accuracy def numerical_gradient(self, X, T): loss_W = lambda W: self.loss(X, T, train_flg=True) grads = {} for idx in range(1, self.hidden_layer_num + 2): grads["W" + str(idx)] = numerical_gradient( loss_W, self.params["W" + str(idx)]) grads["b" + str(idx)] = numerical_gradient( loss_W, self.params["b" + str(idx)]) if self.use_batchnorm and idx != self.hidden_layer_num + 1: grads['gamma' + str(idx)] = numerical_gradient( loss_W, self.params['gamma' + str(idx)]) grads['beta' + str(idx)] = numerical_gradient( loss_W, self.params['beta' + str(idx)]) return grads def gradient(self, x, t): # forward self.loss(x, t, train_flg=True) # backward dout = 1 dout = self.last_layer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) # 设定 grads = {} for idx in range(1, self.hidden_layer_num + 2): grads["W" + str(idx)] = self.layers["Affine" + str( idx)].dW + self.weight_decay_lambda * self.params["W" + str(idx)] grads["b" + str(idx)] = self.layers["Affine" + str(idx)].db if self.use_batchnorm and idx != self.hidden_layer_num + 1: grads['gamma' + str(idx)] = self.layers['BatchNorm' + str(idx)].dgamma grads['beta' + str(idx)] = self.layers['BatchNorm' + str(idx)].dbeta return grads
# coding: utf-8 import numpy as np import sys sys.path.append('../../') from common.layers import SoftmaxWithLoss softmaxWithLoss = SoftmaxWithLoss() #--------------------------------------- x = np.array([[0.3, 0.2, 0.5]]) t = np.array([[0, 1, 0]]) out = softmaxWithLoss.forward(x, t) print(out) dx = softmaxWithLoss.backward(1) print(dx) #--------------------------------------- x = np.array([[0.01, 0.99, 0.0]]) t = np.array([[0, 1, 0]]) out = softmaxWithLoss.forward(x, t) print(out) dx = softmaxWithLoss.backward(1) print(dx)
class DeepConvnet: def __init__(self, input_dim=(1, 28, 28), conv_param_1={'filter_num': 16, 'filter_size': 3, 'pad': 1, 'stride': 1}, conv_param_2={'filter_num': 16, 'filter_size': 3, 'pad': 1, 'stride': 1}, conv_param_3={'filter_num': 32, 'filter_size': 3, 'pad': 1, 'stride': 1}, conv_param_4={'filter_num': 32, 'filter_size': 3, 'pad': 2, 'stride': 1}, conv_param_5={'filter_num': 64, 'filter_size': 3, 'pad': 1, 'stride': 1}, conv_param_6={'filter_num': 64, 'filter_size': 3, 'pad': 1, 'stride': 1}, hidden_size=50, output_size=10): pre_node_nums = np.array([1*3*3, 16*3*3, 16*3*3, 32*3*3, 32*3*3, 64*3*3, 64*4*4, hidden_size]) weight_init_scale = np.sqrt(2.0 / pre_node_nums) # weights init self.params = {} pre_channel_num = input_dim[0] for idx, conv_param in enumerate([conv_param_1, conv_param_2, conv_param_3, conv_param_4, conv_param_5, conv_param_6]): self.params['w'+str(idx+1)] = weight_init_scale[idx] *\ np.random.randn( conv_param['filter_num'], pre_channel_num, conv_param['filter_size'], conv_param['filter_size']) self.params['b'+str(idx+1)] = np.zeros(conv_param['filter_num']) pre_channel_num = conv_param['filter_num'] self.params['w7'] = weight_init_scale[6] * np.random.randn(64*4*4, hidden_size) self.params['b7'] = np.zeros(hidden_size) self.params['w8'] = weight_init_scale[7] * np.random.randn(hidden_size, output_size) self.params['b8'] = np.zeros(output_size) # gen layers self.layers = [] self.layers.append(Convolution(self.params['w1'], self.params['b1'], conv_param_1['stride'], conv_param_1['pad'])) self.layers.append(Relu()) self.layers.append(Convolution(self.params['w2'], self.params['b2'], conv_param_2['stride'], conv_param_2['pad'])) self.layers.append(Relu()) self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2)) self.layers.append(Convolution(self.params['w3'], self.params['b3'], conv_param_3['stride'], conv_param_3['pad'])) self.layers.append(Relu()) self.layers.append(Convolution(self.params['w4'], self.params['b4'], conv_param_4['stride'], conv_param_4['pad'])) self.layers.append(Relu()) self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2)) self.layers.append(Convolution(self.params['w5'], self.params['b5'], conv_param_5['stride'], conv_param_5['pad'])) self.layers.append(Relu()) self.layers.append(Convolution(self.params['w6'], self.params['b6'], conv_param_6['stride'], conv_param_6['pad'])) self.layers.append(Relu()) self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2)) self.layers.append(Affine(self.params['w7'], self.params['b7'])) self.layers.append(Relu()) self.layers.append(Dropout(0.5)) self.layers.append(Affine(self.params['w8'], self.params['b8'])) self.layers.append(Dropout(0.5)) self.last_layer = SoftmaxWithLoss() def predict(self, x, train_flg=False): for layer in self.layers: if isinstance(layer, Dropout): x = layer.forward(x, train_flg) else: x = layer.forward(x) return x def loss(self, x, t): y = self.predict(x, train_flg=True) return self.last_layer.forward(y, t) def accuracy(self, x, t, batch_size=100): if t.ndim != 1: t = np.argmax(t, axis=1) acc = 0.0 for i in range(int(x.shape[0] / batch_size)): tx = x[i*batch_size:(i+1)*batch_size] tt = t[i*batch_size:(i+1)*batch_size] y = self.predict(tx) y = np.argmax(y, axis=1) acc += np.sum(y == tt) return acc / x.shape[0] def gradient(self, x, t): # forward self.loss(x, t) # backward dout = 1 dout = self.last_layer.backward(dout) tmp_layers = self.layers.copy() tmp_layers.reverse() for layer in tmp_layers: dout = layer.backward(dout) # settings grads = {} for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)): grads['w'+str(i+1)] = self.layers[layer_idx].dw grads['b'+str(i+1)] = self.layers[layer_idx].db return grads def save_params(self, file_name='params.pkl'): params = {} for key, val in self.params.items(): params[key] = val with open(file_name, 'wb') as f: pickle.dump(params, f) def load_params(self, file_name='params.pkl'): with open(file_name, 'rb') as f: params = pickle.load(f) for key, val in params.items(): self.params[key] = val for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)): self.layers[layer_idx].w = self.params['w'+str(i+1)] self.layers[layer_idx].b = self.params['b'+str(i+1)]
class DeepConvNet: """認識率99%以上の高精度なConvNet ネットワーク構成は下記の通り conv - relu - conv- relu - pool - conv - relu - conv- relu - pool - conv - relu - conv- relu - pool - affine - relu - dropout - affine - dropout - softmax """ def __init__(self, input_dim=(1, 28, 28), conv_param_1 = {'filter_num':16, 'filter_size':3, 'pad':1, 'stride':1}, conv_param_2 = {'filter_num':16, 'filter_size':3, 'pad':1, 'stride':1}, conv_param_3 = {'filter_num':32, 'filter_size':3, 'pad':1, 'stride':1}, conv_param_4 = {'filter_num':32, 'filter_size':3, 'pad':2, 'stride':1}, conv_param_5 = {'filter_num':64, 'filter_size':3, 'pad':1, 'stride':1}, conv_param_6 = {'filter_num':64, 'filter_size':3, 'pad':1, 'stride':1}, hidden_size=50, output_size=10): # 重みの初期化=========== # 各層のニューロンひとつあたりが、前層のニューロンといくつのつながりがあるか(TODO:自動で計算する) pre_node_nums = np.array([1*3*3, 16*3*3, 16*3*3, 32*3*3, 32*3*3, 64*3*3, 64*4*4, hidden_size]) weight_init_scales = np.sqrt(2.0 / pre_node_nums) # ReLUを使う場合に推奨される初期値 self.params = {} pre_channel_num = input_dim[0] for idx, conv_param in enumerate([conv_param_1, conv_param_2, conv_param_3, conv_param_4, conv_param_5, conv_param_6]): self.params['W' + str(idx+1)] = weight_init_scales[idx] * np.random.randn(conv_param['filter_num'], pre_channel_num, conv_param['filter_size'], conv_param['filter_size']) self.params['b' + str(idx+1)] = np.zeros(conv_param['filter_num']) pre_channel_num = conv_param['filter_num'] self.params['W7'] = weight_init_scales[6] * np.random.randn(64*4*4, hidden_size) self.params['b7'] = np.zeros(hidden_size) self.params['W8'] = weight_init_scales[7] * np.random.randn(hidden_size, output_size) self.params['b8'] = np.zeros(output_size) # レイヤの生成=========== self.layers = [] self.layers.append(Convolution(self.params['W1'], self.params['b1'], conv_param_1['stride'], conv_param_1['pad'])) self.layers.append(Relu()) self.layers.append(Convolution(self.params['W2'], self.params['b2'], conv_param_2['stride'], conv_param_2['pad'])) self.layers.append(Relu()) self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2)) self.layers.append(Convolution(self.params['W3'], self.params['b3'], conv_param_3['stride'], conv_param_3['pad'])) self.layers.append(Relu()) self.layers.append(Convolution(self.params['W4'], self.params['b4'], conv_param_4['stride'], conv_param_4['pad'])) self.layers.append(Relu()) self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2)) self.layers.append(Convolution(self.params['W5'], self.params['b5'], conv_param_5['stride'], conv_param_5['pad'])) self.layers.append(Relu()) self.layers.append(Convolution(self.params['W6'], self.params['b6'], conv_param_6['stride'], conv_param_6['pad'])) self.layers.append(Relu()) self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2)) self.layers.append(Affine(self.params['W7'], self.params['b7'])) self.layers.append(Relu()) self.layers.append(Dropout(0.5)) self.layers.append(Affine(self.params['W8'], self.params['b8'])) self.layers.append(Dropout(0.5)) self.last_layer = SoftmaxWithLoss() def predict(self, x, train_flg=False): for layer in self.layers: if isinstance(layer, Dropout): x = layer.forward(x, train_flg) else: x = layer.forward(x) return x def loss(self, x, t): y = self.predict(x, train_flg=True) return self.last_layer.forward(y, t) def accuracy(self, x, t, batch_size=100): if t.ndim != 1 : t = np.argmax(t, axis=1) acc = 0.0 for i in range(int(x.shape[0] / batch_size)): tx = x[i*batch_size:(i+1)*batch_size] tt = t[i*batch_size:(i+1)*batch_size] y = self.predict(tx, train_flg=False) y = np.argmax(y, axis=1) acc += np.sum(y == tt) return acc / x.shape[0] def gradient(self, x, t): # forward self.loss(x, t) # backward dout = 1 dout = self.last_layer.backward(dout) tmp_layers = self.layers.copy() tmp_layers.reverse() for layer in tmp_layers: dout = layer.backward(dout) # 設定 grads = {} for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)): grads['W' + str(i+1)] = self.layers[layer_idx].dW grads['b' + str(i+1)] = self.layers[layer_idx].db return grads def save_params(self, file_name="params.pkl"): params = {} for key, val in self.params.items(): params[key] = val with open(file_name, 'wb') as f: pickle.dump(params, f) def load_params(self, file_name="params.pkl"): with open(file_name, 'rb') as f: params = pickle.load(f) for key, val in params.items(): self.params[key] = val for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)): self.layers[layer_idx].W = self.params['W' + str(i+1)] self.layers[layer_idx].b = self.params['b' + str(i+1)]
class NeuralNetwork(): def __init__(self, n_features, n_output, n_hidden=30, l2=0.0, l1=0.0, epochs=50, eta=0.001, decrease_const=0.0, shuffle=True, n_minibatches=1, random_state=None): np.random.seed(random_state) self.n_features = n_features self.n_hidden = n_hidden self.n_output = n_output self.l2 = l2 self.l1 = l1 self.epochs = epochs self.eta = eta self.decrease_const = decrease_const self.shuffle = shuffle self.n_minibatches = n_minibatches self.params = {} self._init_weights() self.layers = {} self.layers['Affine_1'] = Affine(self.params['W1'], self.params['b1']) self.layers['Sigmoid'] = Sigmoid() self.layers['Affine_2'] = Affine(self.params['W2'], self.params['b2']) self.last_layer = SoftmaxWithLoss() self._loss = [] self._iter_t = 0 def _init_weights(self): ls_nodes = [self.n_features, self.n_hidden, self.n_output] scale_1 = np.sqrt(1.0 / ls_nodes[0]) scale_2 = np.sqrt(1.0 / ls_nodes[1]) self.params['W1'] = scale_1 * np.random.randn(ls_nodes[0], ls_nodes[1]) self.params['b1'] = np.zeros(ls_nodes[1]) self.params['W2'] = scale_2 * np.random.randn(ls_nodes[1], ls_nodes[2]) self.params['b2'] = np.zeros(ls_nodes[2]) def predict(self, X): for layer in self.layers.values(): X = layer(X) y_hat = X return y_hat def _calc_loss(self, X, t): y_hat = self.predict(X) W1, W2 = self.params['W1'], self.params['W2'] l2_term, l1_term = 0.0, 0.0 l2_term += 0.5 * self.l2 * np.sum(W1**2) l2_term += 0.5 * self.l2 * np.sum(W2**2) l1_term += 0.5 * self.l1 * np.abs(W1).sum() l1_term += 0.5 * self.l1 * np.abs(W2).sum() loss = self.last_layer(y_hat, t) + l2_term + l1_term return loss def accuracy(self, X, t): y_hat = self.predict(X) y = np.argmax(y_hat, axis=1) if t.ndim != 1: t = np.argmax(t, axis=1) accuracy = np.sum(y == t) / float(X.shape[0]) return accuracy def _encode_labels(self, y, n_labels): onehot = np.zeros((y.shape[0], n_labels)) for idx, val in enumerate(y): onehot[idx, val] = 1.0 return onehot def fit(self, X, y, print_progress=False): X_data, y_data = X.copy(), y.copy() y_enc = self._encode_labels(y, self.n_output) self._loss = [] for i in range(self.epochs): self.eta /= (1 + self.decrease_const * i) if print_progress: sys.stderr.write('\rEpoch: {}/{}'.format(i + 1, self.epochs)) sys.stderr.flush() if self.shuffle: idx = np.random.permutation(y_data.shape[0]) X_data, y_enc = X_data[idx], y_enc[idx] batches = np.array_split(range(y_data.shape[0]), self.n_minibatches) self._iter_t = 0 for batch in batches: # forward X_batch, y_batch = X_data[batch], y_enc[batch] loss = self._calc_loss(X_batch, y_batch) self._loss.append(loss) # backward delta = 1 delta = self.last_layer.backward(delta) layers = list(self.layers.values()) layers.reverse() for layer in layers: delta = layer.backward(delta) # gradients grads = {} W1 = self.layers['Affine_1'].W W2 = self.layers['Affine_2'].W grads['W1'] = self.layers[ 'Affine_1'].dW + self.l2 * W1 + self.l1 * np.sign(W1) grads['b1'] = self.layers['Affine_1'].db grads['W2'] = self.layers[ 'Affine_2'].dW + self.l2 * W2 + self.l1 * np.sign(W2) grads['b2'] = self.layers['Affine_2'].db self._update_grads(self.params, grads) return self """ # SGD def _update_grads(self, params, grads): for key in params.keys(): params[key] -= self.eta * grads[key] """ # Adam def _update_grads(self, params, grads): beta1, beta2 = 0.9, 0.999 eps = 1e-8 m, v = {}, {} for key, val in params.items(): m[key] = np.zeros_like(val) v[key] = np.zeros_like(val) self._iter_t += 1 for key in params.keys(): t = self._iter_t # (1 - beta)で因数分解されたAdamの更新式 m[key] += (1 - beta1) * (grads[key] + m[key]) m[key] /= 1 - beta1**t v[key] += (1 - beta2) * (grads[key]**2 + v[key]) v[key] /= 1 - beta2**t params[key] -= self.eta * m[key] / (np.sqrt(v[key]) + eps) @property def loss_(self): return self._loss
class SimpleConvNet: def __init__(self, input_dim=(1, 28, 28), conv_param={'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1}, hidden_size=100, output_size=10, weight_init_std=0.01): filter_num = conv_param['filter_num'] filter_size = conv_param['filter_size'] filter_pad = conv_param['pad'] filter_stride = conv_param['stride'] input_size = input_dim[1] conv_output_size = (input_size - filter_size + 2 * filter_pad) / filter_stride + 1 pool_output_size = int(filter_num * (conv_output_size / 2) * (conv_output_size / 2)) # 初始化权重 self.params = {'W1': weight_init_std * \ np.random.randn(filter_num, input_dim[0], filter_size, filter_size), 'b1': np.zeros(filter_num), 'W2': weight_init_std * \ np.random.randn(pool_output_size, hidden_size), 'b2': np.zeros(hidden_size), 'W3': weight_init_std * \ np.random.randn(hidden_size, output_size), 'b3': np.zeros(output_size)} # 生成层 self.layers = OrderedDict() self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'], conv_param['stride'], conv_param['pad']) self.layers['Relu1'] = ReLU() self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2) self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2']) self.layers['Relu2'] = ReLU() self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3']) self.last_layer = SoftmaxWithLoss() def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x def loss(self, x, t): """求损失函数 参数x是输入数据、t是教师标签 """ y = self.predict(x) return self.last_layer.forward(y, t) def accuracy(self, x, t, batch_size=100): if t.ndim != 1: t = np.argmax(t, axis=1) acc = 0.0 for i in range(int(x.shape[0] / batch_size)): tx = x[i * batch_size:(i + 1) * batch_size] tt = t[i * batch_size:(i + 1) * batch_size] y = self.predict(tx) y = np.argmax(y, axis=1) acc += np.sum(y == tt) return acc / x.shape[0] def numerical_gradient(self, x, t): loss_w = lambda w: self.loss(x, t) grads = {} for idx in (1, 2, 3): grads['W' + str(idx)] = numerical_gradient(loss_w, self.params['W' + str(idx)]) grads['b' + str(idx)] = numerical_gradient(loss_w, self.params['b' + str(idx)]) return grads def gradient(self, x, t): # forward self.loss(x, t) # backward dout = 1 dout = self.last_layer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) # 设定 grads = {'W1': self.layers['Conv1'].dW, 'b1': self.layers['Conv1'].db, 'W2': self.layers['Affine1'].dW, 'b2': self.layers['Affine1'].db, 'W3': self.layers['Affine2'].dW, 'b3': self.layers['Affine2'].db} return grads def save_params(self, file_name="params.pkl"): params = {} for key, val in self.params.items(): params[key] = val with open(file_name, 'wb') as f: pickle.dump(params, f) def load_params(self, file_name="params.pkl"): with open(file_name, 'rb') as f: params = pickle.load(f) for key, val in params.items(): self.params[key] = val for i, key in enumerate(['Conv1', 'Affine1', 'Affine2']): self.layers[key].W = self.params['W' + str(i + 1)] self.layers[key].b = self.params['b' + str(i + 1)]
class DeepConvNet: """ 识别率为99%以上的高精度的ConvNet 网络结构如下所示 conv - relu - conv- relu - pool - conv - relu - conv- relu - pool - conv - relu - conv- relu - pool - affine - relu - dropout - affine - dropout - softmax """ def __init__(self, input_dim=(1, 28, 28), conv_param_1=None, conv_param_2=None, conv_param_3=None, conv_param_4=None, conv_param_5=None, conv_param_6=None, hidden_size=50, output_size=10): # 第一个卷积层输入1x28x28,输出16x28x28 if conv_param_1 is None: conv_param_1 = { 'filter_num': 16, 'filter_size': 3, 'pad': 1, 'stride': 1 } # 第二个卷积层输入16x28x28,输出16x28x28 if conv_param_2 is None: conv_param_2 = { 'filter_num': 16, 'filter_size': 3, 'pad': 1, 'stride': 1 } # 第二个卷积层之后接最大池化层,池化层大小为2x2,步长为2,即高、宽减半 # 第三个卷积层输入16x14x14,输出32x14x14 if conv_param_3 is None: conv_param_3 = { 'filter_num': 32, 'filter_size': 3, 'pad': 1, 'stride': 1 } # 第四个卷积层输入32x14x14,但由于pad2个,因此输出32x16x16 if conv_param_4 is None: conv_param_4 = { 'filter_num': 32, 'filter_size': 3, 'pad': 2, 'stride': 1 } # 第四个卷积层之后接最大池化层,池化层大小为2x2,步长为2,即高、宽减半 # 第五个卷积层输入32x8x8,输出64x8x8 if conv_param_5 is None: conv_param_5 = { 'filter_num': 64, 'filter_size': 3, 'pad': 1, 'stride': 1 } # 第五个卷积层输入64x8x8,输出64x8x8 if conv_param_6 is None: conv_param_6 = { 'filter_num': 64, 'filter_size': 3, 'pad': 1, 'stride': 1 } """ 卷积层的每个节点只与前一层的filter_size个节点连接, 即本层卷积层的卷积核 高x宽有多少,就和前一层的多少个节点连接。 如果有多个通道,那还要乘上通道数(深度) 这里的所有卷积层都用3x3的大小 各层输出如下: 卷积层1: 16 28 28 卷积层2 | 池化层1: 16 28 28 | 16 14 14 卷积层3: 32 14 14 卷积层4 | 池化层2: 32 16 16 | 32 8 8 卷积层5: 64 8 8 卷积层6: 64 8 8 | 64 4 4 """ pre_node_nums = np.array([ 1 * 3 * 3, # 卷积层1:前一层(输入层)通道数(深度)为1 16 * 3 * 3, # 卷积层2:前一层(卷积层1)通道数(深度)为16 16 * 3 * 3, # 卷积层3:前一层(卷积层2)通道数(深度)为16 32 * 3 * 3, # 卷积层4:前一层(卷积层3)通道数(深度)为32 32 * 3 * 3, # 卷积层5:前一层(卷积层4)通道数(深度)为32 64 * 3 * 3, # 卷积层6:前一层(卷积层5)通道数(深度)为64 # 隐藏层:前一层(池化层),池化层接全连接层需要拉直成一维数组, # 因此隐藏层与前一层(池化层)的连接数为池化层的输出节点总数 64 * 4 * 4, # 输出层:前一层(隐藏层),全连接与前一层全部节点相连,即隐藏层大小 hidden_size ]) # 权重初始化时的标准差。由于使用ReLU激活函数,因此使用He初始化方式 weight_init_scales = np.sqrt(2.0 / pre_node_nums) """初始化权重参数和偏置""" self.params = {} pre_channel_num = input_dim[0] # 记录上一层的通道数(即滤波器的通道数) for idx, conv_param in enumerate([ conv_param_1, conv_param_2, conv_param_3, conv_param_4, conv_param_5, conv_param_6 ]): # 卷积层滤波器的形状:滤波器个数、通道数、高度、宽度 self.params['W'+str(idx+1)] = weight_init_scales[idx] *\ np.random.randn( conv_param['filter_num'], pre_channel_num, conv_param['filter_size'], conv_param['filter_size']) self.params['b' + str(idx + 1)] = np.zeros( conv_param['filter_num']) pre_channel_num = conv_param['filter_num'] # 更新上一层的通道数 self.params['W7'] = weight_init_scales[6] * np.random.randn( 64 * 4 * 4, hidden_size) self.params['b7'] = np.zeros(hidden_size) self.params['W8'] = weight_init_scales[7] * np.random.randn( hidden_size, output_size) self.params['b8'] = np.zeros(output_size) """ 构造神经网络: 书上没有用到之前用的有序字典,其实我觉得很好用,所以就实现了有序字典版本 Conv1->ReLU1->Conv2->ReLU2->Pool1-> Conv3->ReLU3->Conv4->ReLU4->Pool2-> Conv5->ReLU5->Conv6->ReLU6->Pool3-> Affine1(Hidden Layer1)->ReLU7->Dropout1-> Affine2(Output Layer1)->Dropout2------->SoftmaxWithLoss """ self.layers = OrderedDict() self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'], stride=conv_param_1['stride'], pad=conv_param_1['pad']) self.layers['ReLU1'] = ReLU() self.layers['Conv2'] = Convolution(self.params['W2'], self.params['b2'], stride=conv_param_2['stride'], pad=conv_param_2['pad']) self.layers['ReLU2'] = ReLU() self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2, pad=0) self.layers['Conv3'] = Convolution(self.params['W3'], self.params['b3'], stride=conv_param_3['stride'], pad=conv_param_3['pad']) self.layers['ReLU3'] = ReLU() self.layers['Conv4'] = Convolution(self.params['W4'], self.params['b4'], stride=conv_param_4['stride'], pad=conv_param_4['pad']) self.layers['ReLU4'] = ReLU() self.layers['Pool2'] = Pooling(pool_h=2, pool_w=2, stride=2, pad=0) self.layers['Conv5'] = Convolution(self.params['W5'], self.params['b5'], stride=conv_param_5['stride'], pad=conv_param_5['pad']) self.layers['ReLU5'] = ReLU() self.layers['Conv6'] = Convolution(self.params['W6'], self.params['b6'], stride=conv_param_6['stride'], pad=conv_param_6['pad']) self.layers['ReLU6'] = ReLU() self.layers['Pool3'] = Pooling(pool_h=2, pool_w=2, stride=2, pad=0) self.layers['Affine1'] = Affine(self.params['W7'], self.params['b7']) self.layers['ReLU7'] = ReLU() self.layers['Dropout1'] = Dropout(dropout_ratio=0.5) self.layers['Affine2'] = Affine(self.params['W8'], self.params['b8']) self.layers['Dropout2'] = Dropout(dropout_ratio=0.5) self.last_layer = SoftmaxWithLoss() def predict(self, x, train_flag=False): # 逐层前向传播,预测输入x的输出 # 如果是Dropout层,需要将train_flag参数传入 for layer in self.layers.values(): if isinstance(layer, Dropout): x = layer.forward(x, train_flag) else: x = layer.forward(x) return x def loss(self, x, true_label): # 计算损失值。这里只计算了交叉熵误差,也可以加上L2正则化项 y = self.predict(x, train_flag=True) total_loss = self.last_layer.forward(y, true_label) return total_loss def accuracy(self, x, true_label, batch_size=100): """ 计算输入x的预测准确率。使用batch处理加速运算 :param x: 输入数据 :param true_label: 真实标签 :param batch_size: 批处理数据量 :return: 准确率 """ # 如果真实标签是one-hot编码,先提取成一维数组:一行代表一个真实值 if true_label.ndim != 1: true_label = np.argmax(true_label, axis=1) correct_cnt = 0.0 # 书上原本代码没有处理剩余的这部分数据。 # 在这里加上iters这个变量,用于控制循环次数 if x.shape[0] % batch_size: iters = int(x.shape[0] / batch_size) + 1 else: iters = int(x.shape[0] / batch_size) for i in range(iters): # 获取一个batch的数据和对应的真实标签 temp_x = x[i * batch_size:(i + 1) * batch_size] temp_true_label = true_label[i * batch_size:(i + 1) * batch_size] # 预测这个batch的数据的输出 y = self.predict(temp_x) y = np.argmax(y, axis=1) # 统计每个batch的预测正确数 correct_cnt += np.sum(y == temp_true_label) acc = correct_cnt / x.shape[0] # 计算准确率 return acc def gradient(self, x, true_label): # 先前向传播计算中间值 self.loss(x, true_label) # 逐层反向传播 dout = 1 dout = self.last_layer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) # 反向传播结束后从各层提取梯度 grads = {} for i in range(1, 7): grads['W' + str(i)] = self.layers['Conv' + str(i)].dW grads['b' + str(i)] = self.layers['Conv' + str(i)].db grads['W7'] = self.layers['Affine1'].dW grads['b7'] = self.layers['Affine1'].db grads['W8'] = self.layers['Affine2'].dW grads['b8'] = self.layers['Affine2'].db return grads def save_params(self): # 持久化训练好的参数 file_path = './data/params.pkl' params = {} for key, val in self.params.items(): params[key] = val with open(file_path, 'wb') as f: pickle.dump(params, f) def load_params(self): # 加载参数 file_path = './data/params.pkl' with open(file_path, 'rb') as f: params = pickle.load(f) for key, val in params.items(): self.params[key] = val for i in range(1, 7): self.layers['Conv' + str(i)].W = self.params['W' + str(i)] self.layers['Conv' + str(i)].b = self.params['b' + str(i)] self.layers['Affine1'].W = self.params['W7'] self.layers['Affine1'].b = self.params['b7'] self.layers['Affine2'].W = self.params['W8'] self.layers['Affine2'].b = self.params['b8']
class TwoLayerNet: def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01): # 重みの初期化 self.params = {} self.params['W1'] = weight_init_std * np.random.randn( input_size, hidden_size) self.params['b1'] = np.zeros(hidden_size) self.params['W2'] = weight_init_std * np.random.randn( hidden_size, output_size) self.params['b2'] = np.zeros(output_size) # レイヤの生成 self.layers = OrderedDict() self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']) self.layers['Relu1'] = Relu() self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) self.lastLayer = SoftmaxWithLoss() def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x # x:入力データ, t:教師データ def loss(self, x, t): y = self.predict(x) return self.lastLayer.forward(y, t) def accuracy(self, x, t): y = self.predict(x) y = np.argmax(y, axis=1) if t.ndim != 1: t = np.argmax(t, axis=1) accuracy = np.sum(y == t) / float(x.shape[0]) return accuracy # x:入力データ, t:教師データ def numerical_gradient(self, x, t): loss_W = lambda W: self.loss(x, t) grads = {} grads['W1'] = numerical_gradient(loss_W, self.params['W1']) grads['b1'] = numerical_gradient(loss_W, self.params['b1']) grads['W2'] = numerical_gradient(loss_W, self.params['W2']) grads['b2'] = numerical_gradient(loss_W, self.params['b2']) return grads def gradient(self, x, t): # forward self.loss(x, t) # backward dout = 1 dout = self.lastLayer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) # 設定 grads = {} grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers[ 'Affine1'].db grads['W2'], grads['b2'] = self.layers['Affine2'].dW, self.layers[ 'Affine2'].db return grads
class ConvNet: def __init__(self, input_dim=(1, 28, 28), use_conv2=True, use_affine2=True, conv_param={ 'filter_num': 128, 'filter_size': 3, 'pad': 1, 'stride': 1 }, pool_param={ 'pool_size': 2, 'pad': 1, 'stride': 2 }, conv_param2={ 'filter_num2': 128, 'filter_size2': 3, 'pad2': 1, 'stride2': 1 }, pool_param2={ 'pool_size2': 2, 'pad2': 1, 'stride2': 2 }, hidden_size=128, hidden_size2=128, output_size=15, weight_init_std=0.01, use_batchnorm_C1=False, use_batchnorm_C2=False, use_batchnorm_A1=False, use_batchnorm_A2=False, use_dropout_A1=False, dropout_ratio_A1=0.5, use_dropout_A2=False, dropout_ratio_A2=0.5, use_succession=False, data_num=1, prediction_mode=False): filter_num = conv_param['filter_num'] filter_size = conv_param['filter_size'] filter_pad = conv_param['pad'] filter_stride = conv_param['stride'] pool_size = pool_param['pool_size'] pool_pad = pool_param['pad'] pool_stride = pool_param['stride'] filter_num2 = conv_param2['filter_num2'] filter_size2 = conv_param2['filter_size2'] filter_pad2 = conv_param2['pad2'] filter_stride2 = conv_param2['stride2'] pool_size2 = pool_param2['pool_size2'] pool_pad2 = pool_param2['pad2'] pool_stride2 = pool_param2['stride2'] input_size = input_dim[1] conv_output_size = (input_size + 2 * filter_pad - filter_size ) // filter_stride + 1 # 畳み込み後のサイズ(H,W共通) pool_output_size = (conv_output_size + 2 * pool_pad - pool_size) // pool_stride + 1 # プーリング後のサイズ(H,W共通) pool_output_pixel = filter_num * pool_output_size * pool_output_size # プーリング後のピクセル総数 input_size2 = pool_output_size conv_output_size2 = (input_size2 + 2 * filter_pad2 - filter_size2 ) // filter_stride2 + 1 # 畳み込み後のサイズ(H,W共通) pool_output_size2 = (conv_output_size2 + 2 * pool_pad2 - pool_size2 ) // pool_stride2 + 1 # プーリング後のサイズ(H,W共通) pool_output_pixel2 = filter_num2 * pool_output_size2 * pool_output_size2 # プーリング後のピクセル総数 self.use_conv2 = use_conv2 self.use_affine2 = use_affine2 self.use_batchnorm_C1 = use_batchnorm_C1 self.use_batchnorm_C2 = use_batchnorm_C2 self.use_batchnorm_A1 = use_batchnorm_A1 self.use_batchnorm_A2 = use_batchnorm_A2 self.use_dropout_A1 = use_dropout_A1 self.use_dropout_A2 = use_dropout_A2 self.dropout_ratio_A1 = dropout_ratio_A1 self.dropout_ratio_A2 = dropout_ratio_A2 self.use_succession = use_succession self.data_num = data_num self.prediction_mode = prediction_mode # if W1 == []: self.params = {} self.paramsB = {} std = weight_init_std if self.use_succession: #----------重みをpickleから代入-------------- with open("params_" + str(self.data_num) + ".pickle", "rb") as f: params_s = pickle.load(f) with open("params_BN" + str(self.data_num) + ".pickle", "rb") as f: params_BN = pickle.load(f) # self.params = {} # self.paramsB = {} self.params['W1'] = params_s['W1'] # W1は畳み込みフィルターの重みになる self.params['b1'] = params_s['b1'] if self.use_batchnorm_C1: self.paramsB["BC1_moving_mean"] = params_BN["BC1_moving_mean"] self.paramsB["BC1_moving_var"] = params_BN["BC1_moving_var"] if self.use_conv2: self.params['W1_2'] = params_s['W1_2'] self.params['b1_2'] = params_s['b1_2'] if self.use_batchnorm_C2: self.paramsB["BC2_moving_mean"] = params_BN[ "BC2_moving_mean"] self.paramsB["BC2_moving_var"] = params_BN[ "BC2_moving_var"] self.params['W2'] = params_s['W2'] self.params['b2'] = params_s['b2'] if self.use_batchnorm_A1: self.paramsB["BA1_moving_mean"] = params_BN["BA1_moving_mean"] self.paramsB["BA1_moving_var"] = params_BN["BA1_moving_var"] if self.use_affine2: self.params['W2_2'] = params_s['W2_2'] self.params['b2_2'] = params_s['b2_2'] if self.use_batchnorm_A2: self.paramsB["BA2_moving_mean"] = params_BN[ "BA2_moving_mean"] self.paramsB["BA2_moving_var"] = params_BN[ "BA2_moving_var"] self.params['W3'] = params_s['W3'] self.params['b3'] = params_s['b3'] #----------重みをpickleから代入-------------- else: # 重みの初期化 #----第1層Conv---- self.params['W1'] = std * np.random.randn( filter_num, input_dim[0], filter_size, filter_size) # W1は畳み込みフィルターの重みになる self.params['b1'] = np.zeros(filter_num) #b1は畳み込みフィルターのバイアスになる #----第2層Conv---- if self.use_conv2: self.params['W1_2'] = std * np.random.randn( filter_num2, filter_num, filter_size2, filter_size2) #-----追加------ self.params['b1_2'] = np.zeros(filter_num2) #-----追加------ #----第3層Affine---- self.params['W2'] = std * np.random.randn( pool_output_pixel2, hidden_size) else: self.params['W2'] = std * np.random.randn( pool_output_pixel, hidden_size) self.params['b2'] = np.zeros(hidden_size) #----第4層Affine---- if self.use_affine2: self.params['W2_2'] = std * np.random.randn( hidden_size, hidden_size2) #-----追加------ self.params['b2_2'] = np.zeros(hidden_size2) #-----追加------ #----第5層出力---- self.params['W3'] = std * np.random.randn( hidden_size2, output_size) #--変更-- else: self.params['W3'] = std * np.random.randn( hidden_size, output_size) #--変更-- self.params['b3'] = np.zeros(output_size) # レイヤの生成 self.layers = OrderedDict() #----第1層Conv---- self.layers['Conv1'] = Convolution( self.params['W1'], self.params['b1'], conv_param['stride'], conv_param['pad']) # W1が畳み込みフィルターの重み, b1が畳み込みフィルターのバイアスになる if self.use_batchnorm_C1: print(conv_output_size) print(conv_output_size ^ 2) batch_num = conv_output_size * conv_output_size if self.prediction_mode: self.layers['BatchNormalization_C1'] = BatchNormalization( np.ones(batch_num, filter_num), np.zeros(filter_num), moving_mean=self.paramsB["BC1_moving_mean"], moving_var=self.paramsB["BC1_moving_var"]) else: self.layers['BatchNormalization_C1'] = BatchNormalization( np.ones(batch_num), np.zeros(batch_num), DataNum=self.data_num, LayerNum="C1") self.paramsB["BC1_moving_mean"] = self.layers[ 'BatchNormalization_C1'].moving_mean self.paramsB["BC1_moving_var"] = self.layers[ 'BatchNormalization_C1'].moving_var self.layers['ReLU1'] = ReLU() self.layers['Pool1'] = MaxPooling(pool_h=pool_size, pool_w=pool_size, stride=pool_stride, pad=pool_pad) #----第2層Conv---- if self.use_conv2: self.layers['Conv1_2'] = Convolution( self.params['W1_2'], self.params['b1_2'], conv_param2['stride2'], conv_param2['pad2']) #-----追加------ if self.use_batchnorm_C2: batch_num2 = conv_output_size2 * conv_output_size2 * filter_num2 if self.prediction_mode: self.layers['BatchNormalization_C2'] = BatchNormalization( np.ones(batch_num), np.zeros(batch_num), moving_mean=self.paramsB["BC2_moving_mean"], moving_var=self.paramsB["BC12moving_var"]) else: self.layers['BatchNormalization_C2'] = BatchNormalization( np.ones(batch_num), np.zeros(batch_num), DataNum=self.data_num, LayerNum="C2") self.paramsB["BC2_moving_mean"] = self.layers[ 'BatchNormalization_C2'].moving_mean self.paramsB["BC2_moving_var"] = self.layers[ 'BatchNormalization_C2'].moving_var self.layers['ReLU1_2'] = ReLU() #-----追加------ self.layers['Pool1_2'] = MaxPooling(pool_h=pool_size2, pool_w=pool_size2, stride=pool_stride2, pad=pool_pad2) #-----追加------ #----第3層Affine---- self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2']) if self.use_batchnorm_A1: if self.prediction_mode: self.layers['BatchNormalization_A1'] = BatchNormalization( np.ones(hidden_size), np.zeros(hidden_size), moving_mean=self.paramsB["BA1_moving_mean"], moving_var=self.paramsB["BA1_moving_var"]) else: self.layers['BatchNormalization_A1'] = BatchNormalization( np.ones(hidden_size), np.zeros(hidden_size), DataNum=self.data_num, LayerNum="A1") self.paramsB["BA1_moving_mean"] = self.layers[ 'BatchNormalization_A1'].moving_mean self.paramsB["BA1_moving_var"] = self.layers[ 'BatchNormalization_A1'].moving_var if self.use_dropout_A1: self.layers['DropoutA1'] = Dropout(self.dropout_ratio_A1) self.layers['ReLU2'] = ReLU() # ----第4層Affine---- if self.use_affine2: self.layers['Affine2'] = Affine( self.params['W2_2'], self.params['b2_2']) #-----追加------ if self.use_batchnorm_A2: if self.prediction_mode: self.layers['BatchNormalization_A2'] = BatchNormalization( np.ones(hidden_size2), np.zeros(hidden_size2), moving_mean=self.paramsB["BA2_moving_mean"], moving_var=self.paramsB["BA2_moving_var"]) else: self.layers['BatchNormalization_A2'] = BatchNormalization( np.ones(hidden_size2), np.zeros(hidden_size2), DataNum=self.data_num, LayerNum="A2") self.paramsB["BA2_moving_mean"] = self.layers[ 'BatchNormalization_A2'].moving_mean self.paramsB["BA2_moving_var"] = self.layers[ 'BatchNormalization_A2'].moving_var if self.use_dropout_A2: self.layers['DropoutA2'] = Dropout(self.dropout_ratio_A2) self.layers['ReLU3'] = ReLU() #-----追加------ #----第5層出力---- self.layers['Affine3'] = Affine(self.params['W3'], self.params['b3']) self.last_layer = SoftmaxWithLoss() # print('input size',input_size) # print('conv_output_size',conv_output_size) # print('pool_output_size',pool_output_size) # print('pool_output_pixel',pool_output_pixel) # print('input size2',input_size2) # print('conv_output_size2',conv_output_size2) # print('pool_output_size2',pool_output_size2) # print('pool_output_pixel2',pool_output_pixel2) def predict(self, x, train_flg=False): for key, layer in self.layers.items(): if "Dropout" in key or "BatchNorm" in key: x = layer.forward(x, train_flg) else: x = layer.forward(x) return x def loss(self, x, t, train_flg=False): y = self.predict(x, train_flg) return self.last_layer.forward(y, t) def accuracy(self, x, t, batch_size=100): if t.ndim != 1: t = np.argmax(t, axis=1) acc = 0.0 for i in range(int(x.shape[0] / batch_size)): tx = x[i * batch_size:(i + 1) * batch_size] tt = t[i * batch_size:(i + 1) * batch_size] y = self.predict(tx, train_flg=False) y = np.argmax(y, axis=1) acc += np.sum(y == tt) return acc / x.shape[0] def gradient(self, x, t): # forward self.loss(x, t, train_flg=True) # backward dout = 1 dout = self.last_layer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) # 設定 grads = {} grads['W1'], grads['b1'] = self.layers['Conv1'].dW, self.layers[ 'Conv1'].db if self.use_conv2: grads['W1_2'], grads['b1_2'] = self.layers[ 'Conv1_2'].dW, self.layers['Conv1_2'].db #-----追加------ grads['W2'], grads['b2'] = self.layers['Affine1'].dW, self.layers[ 'Affine1'].db if self.use_affine2: grads['W2_2'], grads['b2_2'] = self.layers[ 'Affine2'].dW, self.layers['Affine2'].db #-----追加------ grads['W3'], grads['b3'] = self.layers['Affine3'].dW, self.layers[ 'Affine3'].db if self.prediction_mode == False: if self.use_batchnorm_A1: self.paramsB["BA1_moving_mean"] = self.layers[ 'BatchNormalization_A1'].moving_mean self.paramsB["BA1_moving_var"] = self.layers[ 'BatchNormalization_A1'].moving_var if self.use_batchnorm_A2: self.paramsB["BA2_moving_mean"] = self.layers[ 'BatchNormalization_A2'].moving_mean self.paramsB["BA2_moving_var"] = self.layers[ 'BatchNormalization_A2'].moving_var if self.use_batchnorm_C1: self.paramsB["BC1_moving_mean"] = self.layers[ 'BatchNormalization_C1'].moving_mean self.paramsB["BC1_moving_var"] = self.layers[ 'BatchNormalization_C1'].moving_var if self.use_batchnorm_C2: self.paramsB["BC2_moving_mean"] = self.layers[ 'BatchNormalization_C2'].moving_mean self.paramsB["BC2_moving_var"] = self.layers[ 'BatchNormalization_C2'].moving_var return grads
class SimpleConvNet: def __init__(self, input_dim=(1, 28, 28), conv_param=None, hidden_size=100, output_size=10, weight_init_std=0.01, regularizer_lambda=0.1): # 卷积层的默认参数:默认情况下滤波器个数为30个,大小为5x5,不填充,步长1 if conv_param is None: conv_param = {'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1} filter_num = conv_param['filter_num'] filter_size = conv_param['filter_size'] filter_pad = conv_param['pad'] filter_stride = conv_param['stride'] input_size = input_dim[1] # 输入层的矩阵大小:单通道下二维矩阵的宽/高 conv_output_size = int((input_size + 2 * filter_pad - filter_size) / filter_stride + 1) # 卷积层输出的单个特征图的大小 # 最大池化层的输出大小:池化后保持特征图个数不变,由于使用的是2x2的最大 # 池化层,因此宽/高都变为原来的一半。 # 总的输出元素个数为:特征图个数 * (卷积层输出 / 2) * (卷积层输出 / 2) # 因为这里的简单CNN中池化层后面接全连接层, # 需要将池化层的节点拉平成一个一维数组 pool_output_size = int(filter_num * (conv_output_size / 2) ** 2) self.regularizer_lambda = regularizer_lambda # 正则化强度 # 初始化神经网络各层的参数:卷积层、(池化层)、全连接层、全连接层 # 其中池化层没有需要训练的参数,因此不需要初始化。 self.params = {} # 第一层(卷积层):滤波器的参数(权重参数) + 偏置参数 # 滤波器的参数有4个:滤波器个数、通道数、高、宽 self.params['W1'] = weight_init_std * np.random.randn(filter_num, input_dim[0], filter_size, filter_size) # 卷积层的偏置参数:一个滤波器需要一个偏置,因此一共filter_num个偏置 self.params['b1'] = np.zeros(filter_num) # 全连接层(在这里是一个隐藏层)权重参数: # 输入节点数为池化层的所有节点个数,输出为隐藏层大小 self.params['W2'] = weight_init_std * np.random.randn(pool_output_size, hidden_size) self.params['b2'] = np.zeros(hidden_size) # 全连接层(在这里是输出层)权重参数: # 输入节点数为隐藏层的所有节点个数,输出为输出层大小 self.params['W3'] = weight_init_std * np.random.randn(hidden_size, output_size) self.params['b3'] = np.zeros(output_size) # 构造神经网络: # 卷积层、激活层(ReLU层)、最大池化层、 # 仿射层(隐藏层)、激活层(ReLU层)、仿射层(输出层) self.layers = OrderedDict() self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'], conv_param['stride'], conv_param['pad']) self.layers['ReLU1'] = ReLU() self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2) self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2']) self.layers['ReLU2'] = ReLU() self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3']) # 最后加入一层SoftmaxWithLoss层用于计算交叉熵误差,帮助训练神经网络 self.last_layer = SoftmaxWithLoss() def predict(self, x): # 逐层前向传播,预测输入x的输出 for layer in self.layers.values(): x = layer.forward(x) return x def loss(self, x, true_label): """ 计算损失。书上原本只计算了交叉熵误差,我在这里加上L2正则化 :param x: :param true_label: :return: """ # 计算交叉熵误差 y = self.predict(x) total_loss = self.last_layer.forward(y, true_label) # 计算L2正则化项。不知为何加入L2正则化之后损失收敛不了,一直递增 # 但是训练是正常进行的,准确率也很稳定 regularizer = 0 for idx in (1, 2, 3): W = self.params['W'+str(idx)] regularizer += 0.5 * self.regularizer_lambda * np.sum(W ** 2) total_loss += regularizer return total_loss def accuracy(self, x, true_label, batch_size=100): """ 计算输入x的预测准确率。使用batch处理加速运算 :param x: 输入数据 :param true_label: 真实标签 :param batch_size: 批处理数据量 :return: 准确率 """ # 如果真实标签是one-hot编码,先提取成一维数组:一行代表一个真实值 if true_label.ndim != 1: true_label = np.argmax(true_label, axis=1) correct_cnt = 0.0 # 书上原本代码没有处理剩余的这部分数据。 # 在这里加上iters这个变量,用于控制循环次数 if x.shape[0] % batch_size: iters = int(x.shape[0] / batch_size) + 1 else: iters = int(x.shape[0] / batch_size) for i in range(iters): # 获取一个batch的数据和对应的真实标签 temp_x = x[i * batch_size: (i+1) * batch_size] temp_true_label = true_label[i * batch_size: (i+1) * batch_size] # 预测这个batch的数据的输出 y = self.predict(temp_x) y = np.argmax(y, axis=1) # 统计每个batch的预测正确数 correct_cnt += np.sum(y == temp_true_label) acc = correct_cnt / x.shape[0] # 计算准确率 return acc def numerical_gradient(self, x, true_label): # 数值方法计算梯度 loss_func = lambda _: self.loss(x, true_label) grads = {} for idx in range(1, 4): grads['W'+str(idx)] = ng(loss_func, self.params['W'+str(idx)]) grads['b'+str(idx)] = ng(loss_func, self.params['b'+str(idx)]) return grads def gradient(self, x, true_label): """ 反向传播计算梯度 :param x: :param true_label: :return: """ # 先前向传播计算中间值 self.loss(x, true_label) """反向传播""" dout = 1 dout = self.last_layer.backward(dout) # 反向传播经过输出层的激活函数 # 逐层反向传播 layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) # 反向传播结束后从各层提取梯度 grads = {} grads['W1'] = self.layers['Conv1'].dW grads['b1'] = self.layers['Conv1'].db grads['W2'] = self.layers['Affine1'].dW grads['b2'] = self.layers['Affine1'].db grads['W3'] = self.layers['Affine2'].dW grads['b3'] = self.layers['Affine2'].db return grads def save_params(self): # 持久化训练好的参数 file_path = './data/params.pkl' with open(file_path, 'wb') as f: pickle.dump(self.params, f) def load_params(self): # 加载参数 file_path = './data/params.pkl' with open(file_path, 'rb') as f: params = pickle.load(f) for key, val in params.items(): # 直接将params赋值给self.params的话, # 改变params也会改变self.params,不安全 self.params[key] = val
class SimpleConvNet: """ 1st hidden layer: Convolution -> ReLU -> Pooling 2nd hidden layer: Affine -> ReLU (fully-connected network, 완전연결층) 출력층: Affine -> SoftmaxWithLoss """ def __init__(self, input_dim=(1, 28, 28), conv_param={ 'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1 }, hidden_size=100, output_size=10, weight_init_std=0.01): filter_num = conv_param['filter_num'] filter_size = conv_param['filter_size'] filter_pad = conv_param['pad'] filter_stride = conv_param['stride'] input_size = input_dim[1] conv_output_size = (input_size - filter_size + 2 * filter_pad) / filter_stride + 1 pool_output_size = int(filter_num * (conv_output_size / 2) * (conv_output_size / 2)) """ 인스턴스 초기화 - CNN 구성, 변수들 초기화""" # CNN layer에서 필요한 파라미터들 self.params = dict() self.params['W1'] = weight_init_std * \ np.random.randn(filter_num, input_dim[0], filter_size, filter_size) self.params['b1'] = np.zeros(filter_num) self.params['W2'] = weight_init_std * \ np.random.randn(pool_output_size, hidden_size) self.params['b2'] = np.zeros(hidden_size) self.params['W3'] = weight_init_std * \ np.random.randn(hidden_size, output_size) self.params['b3'] = np.zeros(output_size) # CNN layer(계층) 생성, 연결 self.layers = OrderedDict() self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'], conv_param['stride'], conv_param['pad']) self.layers['Relu1'] = Relu() self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2) self.layers['Relu2'] = Relu() self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3']) self.last_layer = SoftmaxWithLoss() def predict(self): for layer in self.layers.values(): x = layer.forward(x) return x def loss(self, x, t): y = self.predict(x) return self.last_layer.forward(y, t) def gradient(self, x, t): # 순전파 self.loss(x, t) # 역전파 dout = 1 dout = self.last_layer.backward(dout) layers = list(self.last_layer.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) # 결과 저장 grads = {} grads['W1'] = self.layers['Conv1'].dW grads['b1'] = self.layers['Conv1'].db grads['W2'] = self.layers['Affine1'].dW grads['b2'] = self.layers['Affine1'].db grads['W3'] = self.layers['Affine2'].dW grads['b3'] = self.layers['Affine2'].db return grads
class SimpleConvNet: """CNN""" def __init__(self, input_dim=(1, 28, 28), conv_param={ 'filter_num': 30, "filter_size": 5, 'pad': 0, 'stride': 1 }, hidden_size=100, output_size=10, weight_init_std=0.01): """ :param input_dim:输入数据的维度:(通道,高,长) :param conv_param:卷积层的超参数(字典)。字典的关键字如下: filter_num―滤波器的数量 filter_size―滤波器的大小 stride―步幅 pad―填充 :param hidden_size:隐藏层(全连接)的神经元数量 :param output_size:输出层(全连接)的神经元数量 :param weight_init_std:初始化时权重的标准差 """ filter_num = conv_param['filter_num'] # 滤波器数量 filter_size = conv_param['filter_size'] # 滤波器大小 filter_pad = conv_param['pad'] # 滤波器填充 filter_stride = conv_param['stride'] # 滤波器步幅 input_size = input_dim[1] conv_output_size = (input_size - filter_size + 2 * filter_pad) / \ filter_stride + 1 pool_output_size = int(filter_num * (conv_output_size / 2) * (conv_output_size / 2)) # 权重参数初始化 self.params = { 'W1': weight_init_std * np.random.randn( filter_num, input_dim[0], filter_size, filter_size), # 卷积层权重 'b1': np.zeros(filter_num), # 卷积层偏置 'W2': weight_init_std * np.random.randn(pool_output_size, hidden_size), 'b2': np.zeros(hidden_size), 'W3': weight_init_std * np.random.randn(hidden_size, output_size), 'b3': np.zeros(output_size) } # 生成必要的层 self.layers = OrderedDict() self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'], conv_param['stride'], conv_param['pad']) self.layers['Relu1'] = Relu() self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2) self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2']) self.layers['Relu2'] = Relu() self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3']) self.last_layer = SoftmaxWithLoss() def predict(self, x): """预测""" for layer in self.layers.values(): x = layer.forward(x) return x def loss(self, x, t): """损失函数""" y = self.predict(x) return self.last_layer.forward(y, t) def gradient(self, x, t): """方向传播发求梯度""" self.loss(x, t) dout = 1 dout = self.last_layer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) grads = { 'W1': self.layers['Conv1'].dW, 'b1': self.layers['Conv1'].db, 'W2': self.layers['Affine1'].dW, 'b2': self.layers['Affine1'].db, 'W3': self.layers['Affine2'].dW, 'b3': self.layers['Affine2'].db } return grads def save_params(self, file_name="params.pkl"): """保存权重参数""" params = {} for key, val in self.params.items(): params[key] = val with open(file_name, 'wb') as f: pickle.dump(params, f) def load_params(self, file_name="params.pkl"): """读取权重参数""" with open(file_name, 'rb') as f: params = pickle.load(f) for key, val in params.items(): self.params[key] = val for i, key in enumerate(['Conv1', 'Affine1', 'Affine2']): self.layers[key].W = self.params['W' + str(i + 1)] self.layers[key].b = self.params['b' + str(i + 1)] def accuracy(self, x, t, batch_size=100): """计算精度""" if t.ndim != 1: t = np.argmax(t, axis=1) acc = 0.0 for i in range(int(x.shape[0] / batch_size)): tx = x[i * batch_size:(i + 1) * batch_size] tt = t[i * batch_size:(i + 1) * batch_size] y = self.predict(tx) y = np.argmax(y, axis=1) acc += np.sum(y == tt) return acc / x.shape[0]
class SimpleConvNet: """ X > Convolution > activation function > pooling : Convolution > activation function > pooling = 1개의 활성곱 레이어/ 1 hidden layer : there can be multiples of layers 1st hidden layer: Convolution (W,b) -> ReLU -> Pooling 2nd hidden layer: Affine (W,b) -> ReLU (fully-connected network, 완전 연결층) 출력층: Affine (W,b) -> SoftmaxWithLoss # batch_normalization을 넣는다고 하면 또 다른 파라미터 (gamma, beta)가 있다 # 파라미터가 많아지면 gradient를 계산하는 시간이 길어진다 """ def __init__(self, input_dim = (1, 28, 28), conv_params = {'filter_num':30,'filter_size': 5, 'pad': 0, 'stride':1}, hidden_size = 100, output_size = 10, weight_init_std = 0.01): """ 인스턴스 초기화 (변수들의 초기값을 줌) - CNN 구성, 변수들 초기화 input_dim: 입력 데이터 차원, MINIST인 경우(1, 28, 28) conv_param: Convolution 레이어의 파라미터(filter, bias)를 생성하기 위해 필요한 값들 필터 개수 (filter_num), 필터 크기(filter_size = filter_height = filter_width), 패딩 개수(pad), 보폭(stride) hidden_size: Affine 계층에서 사용할 뉴런의 개수 -> W 행렬의 크기 output_size: 출력값의 원소의 개수. MNIST인 경우 10 weight_init_std: 가중치(weight) 행렬을 난수로 초기화 할 때 사용할 표준편차 """ filter_num = conv_params['filter_num'] filter_size = conv_params['filter_size'] filter_pad = conv_params['pad'] filter_stride = conv_params['stride'] input_size = input_dim[1] conv_output_size = (input_size - filter_size + 2 * filter_pad) / \ filter_stride + 1 pool_output_size = int(filter_num * (conv_output_size / 2) * (conv_output_size / 2)) # CNN Layer에서 필요한 파라미터들 self.params = dict() self.params['W1'] = weight_init_std * np.random.randn(filter_num, input_dim[0], filter_size, filter_size) self.params['b1'] = np.zeros(filter_num) self.params['W2'] = weight_init_std * np.random.randn(pool_output_size, hidden_size) self.params['b2'] = np.zeros(hidden_size) self.params['W'] = weight_init_std * np.random.randn(hidden_size, output_size) self.params['b3'] = np.zeros(output_size) # CNN Layer(계층) 생성, 연결 self.layers = OrderedDict() # 방법 1 __init__(self,W,b) 라고 주고, self.W = W, self.b = b 를 선언 # self.W = W # 난수로 생성하려고 해도 데이터의 크기(size)를 알아야 필터를 생성할 수 있다 # self.b = b # bias의 크기는 필터의 크기와 같다. 마찬가지로 난수로 생성해도 크기를 알아야한다 => dimension 결정 # 방법 2 # input_dim = (1, 28, 28) = MNIST를 위한 클래스 # dimension을 주도록 설정 + 필터갯수가 있도록 설정해줘야한다 # convolution 할 때 필터를 몇번 만들 것인가 -> 난수로 만들어서 넣어줄 수 있다 # key값 self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'], conv_params['stride'], conv_params['pad']) # W와 b를 선언 self.layers['ReLu1'] = Relu() # x -> Convolution에서 전해주는 값 self.layers['Pool1'] = Pooling(pool_h = 2, pool_w =2, stride =2) self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2']) self.layers['Relu2'] = Relu() self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3']) self.last_layer = SoftmaxWithLoss() def predict(self, x): """ network의 목적: 예측하는 것 """ for layer in self.layers.vlaues(): x = layer.forward(x) return x def loss(self, x, t): """ 순반향 전파가 모두 끝나고 손실 계산 -> 이 손실을 꺼꾸로 보내면서 gradient를 계산 """ y = self.predict(x) return self.last_layer.forward(y, t) def accuracy(self): pass def gradient(self, x, t): # 순전파 self.loss(x,t) # 역전파 dout = 1 dout = self.last_layer.backward(dout) layers = list(self.layers.vlaues()) layers.reverse() for layer in layers: dout = layer.backward(dout) #결과저장 grads = {} grads['W1'] = self.layers['Conv1'].dW grads['b1'] = self.layers['Conv1'].db grads['W2'] = self.layers['Affine1'].dW grads['b2'] = self.layers['Affine1'].db grads['W3'] = self.layers['Affine2'].dW grads['b3'] = self.layers['Affine2'].db