class TwoLayerNet: def __init__(self, input_size: int, hidden_size: int, output_size: int): W1 = 0.01 * np.random.randn(input_size, hidden_size) b1 = np.zeros(hidden_size) W2 = 0.01 * np.random.randn(hidden_size, output_size) b2 = np.zeros(output_size) self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)] self.loss_layer = SoftmaxWithLoss() self.params, self.grads = [], [] for layer in self.layers: self.params += layer.params self.grads += layer.grads def predict(self, x: np.ndarray) -> np.ndarray: for layer in self.layers: x = layer.forward(x) return x def forward(self, x: np.ndarray, t: np.ndarray) -> np.ndarray: score = self.predict(x) loss = self.loss_layer.forward(score, t) return loss def backward(self, dout: int = 1) -> np.ndarray: dout = self.loss_layer.backward(dout) for layer in reversed(self.layers): dout = layer.backward(dout) return dout
class SimpleCBOW: def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') self.in_layer0 = Matmul(W_in) self.in_layer1 = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer = SoftmaxWithLoss() layers = [self.in_layer0, self.in_layer1, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads self.word_vecs = W_in def forward(self, contexts, target): h0 = self.in_layer0.forward(contexts[:, 0]) h1 = self.in_layer1.forward(contexts[:, 1]) h = (h0 + h1) * 0.5 score = self.out_layer.forward(h) loss = self.loss_layer.forward(score, target) return loss def backward(self, dout=1): ds = self.loss_layer.backward(dout) da = self.out_layer.backward(ds) da *= 0.5 self.in_layer1.backward(da) self.in_layer0.backward(da) return None
class TwoLayersNet: def __init__(self, input_size, hidden_size, output_size): I, H, O = input_size, hidden_size, output_size w1 = np.random.randn(I, H) * 0.01 b1 = np.zeros(H) #np.random.randn(H) w2 = np.random.randn(H, O) * 0.01 b2 = np.zeros(O) #np.random.randn(O) self.layers = [Affine(w1, b1), Sigmoid(), Affine(w2, b2)] self.loss_layer = SoftmaxWithLoss() self.params, self.grads = [], [] for l in self.layers: self.params += l.params self.grads += l.grads # 勾配まとめはこのときだけ。-> 各layerの勾配更新は参照場所を動かさないようにする。 def predict(self, x): for l in self.layers: x = l.forward(x) return x def forward(self, x, t): score = self.predict(x) #print('t:', t) # test #print('score:', score) # test loss = self.loss_layer.forward(score, t) return loss def backward(self, dl): # dl=1): dl = self.loss_layer.backward(dl) for l in self.layers[::-1]: dl = l.backward(dl)
class TwoLayerNet: def __init__(self, input_size, hidden_size, output_size): I, H, O = input_size, hidden_size, output_size # 重みとバイアスの初期化 W1 = 0.01 * np.random.randn(I, H) b1 = np.random.randn(H) W2 = 0.01 * np.random.randn(H, O) b2 = np.random.randn(O) # レイヤの生成 self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)] self.loss_layer = SoftmaxWithLoss() # すべての重みをリストにまとめる self.params, self.grads = [], [] for layer in self.layers: self.params += layer.params self.grads += layer.grads def predict(self, x): for layer in self.layers: x = layer.forward(x) return x def forward(self, x, t): score = self.predict(x) loss = self.loss_layer.forward(score.t) return loss def backward(self, dout=1): dout = self.loss_layer.backward(dout) for layer in reversed(self.layers): dout = layer.backward(dout) return dout
class SimpleSkipGram: def __init__(self, vocab_size, hidden_size): # おもみ w_in = 0.01 * np.random.randn(vocab_size, hidden_size).astype('f') w_out = 0.01 * np.random.randn(hidden_size, vocab_size).astype('f') # layers self.in_layer = MatMul(w_in) self.out_layer = MatMul(w_out) # おもみ、勾配 まとめ layers = [self.in_layer, self.out_layer] self.params, self.grads = [], [] for l in layers: self.params += l.params self.grads += l.grads # loss self.loss_layer0 = SoftmaxWithLoss() self.loss_layer1 = SoftmaxWithLoss() # 極限でコンテキストの各単語の確率 0.5。 # softmax->*2->lossでも一律ln2引くようだから同じたぶん。 # 単語の分散表現 self.word_vecs = w_in def forward(self, contexts, target): h = self.in_layer.forward(target) s = self.out_layer.forward(h) l0 = self.loss_layer0.forward(s, contexts[:, 0]) l1 = self.loss_layer1.forward(s, contexts[:, 1]) loss = l0 + l1 return loss def backward(self, dl=1): ds0 = self.loss_layer0.backward(dl) ds1 = self.loss_layer1.backward(dl) ds = ds0 + ds1 dh = self.out_layer.backward(ds) self.in_layer.backward(dh)
class TwoLayersNet(): def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01): self.params = {} self.params['w1'] = np.random.randn(input_size, hidden_size) / weight_init_std self.params['b1'] = np.zeros(hidden_size) self.params['w2'] = np.random.randn(hidden_size, output_size) / weight_init_std self.params['b2'] = np.zeros(output_size) self.layers = OrderedDict() self.layers['Affine1'] = Affine(self.params['w1'], self.params['b1']) self.layers['Relu1'] = Relu() self.layers['Affine2'] = Affine(self.params['w2'], self.params['b2']) self.lastlayer = SoftmaxWithLoss() def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x def loss(self, x, t): y = self.predict(x) return self.lastlayer.forward(y, t) def accuracy(self, x, t): y = self.predict(x) y = np.argmax(y, axis=1) t = np.argmax(t, axis=1) return np.sum(y == t) / y.shape def gradient(self, x, t): dout = 1 self.loss(x, t) dout = self.lastlayer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) grads = {} grads['w1'] = self.layers['Affine1'].dw grads['b1'] = self.layers['Affine1'].db grads['w2'] = self.layers['Affine2'].dw grads['b2'] = self.layers['Affine2'].db return grads
class SimpleCBOW: def __init__(self, vocab_size, hidden_size): # おもみ w_in = 0.01 * np.random.randn(vocab_size, hidden_size).astype('f') w_out = 0.01 * np.random.randn(hidden_size, vocab_size).astype('f') # layers self.in_layer0 = MatMul(w_in) self.in_layer1 = MatMul(w_in) self.out_layer = MatMul(w_out) # おもみ、勾配 まとめ layers = [self.in_layer0, self.in_layer1, self.out_layer] self.params, self.grads = [], [] for l in layers: self.params += l.params self.grads += l.grads # loss self.loss_layer = SoftmaxWithLoss() # 単語の分散表現 self.word_vecs = w_in def forward(self, contexts, target): h0 = self.in_layer0.forward(contexts[:, 0]) h1 = self.in_layer1.forward(contexts[:, 1]) h = (h0 + h1) * 0.5 score = self.out_layer.forward(h) loss = self.loss_layer.forward(score, target) return loss def backward(self, dl=1): ds = self.loss_layer.backward(dl) da = self.out_layer.backward(ds) da *= 0.5 self.in_layer0.backward(da) self.in_layer1.backward(da)
class TwoLayerNet: def __init__(self, input_size, hidden_size, output_size): I, H, O = input_size, hidden_size, output_size # initialize weight and bias W1 = 0.01 * cp.random.randn(I, H) b1 = cp.zeros(H) W2 = 0.01 * cp.random.randn(H, O) b2 = cp.zeros(O) # create layer self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)] self.loss_layer = SoftmaxWithLoss() # combine all weight and grads into list self.params, self.grads = [], [] for layer in self.layers: self.params += layer.params self.grads += layer.grads def predict(self, x): for layer in self.layers: x = layer.forward(x) return x def forward(self, x, t): score = self.predict(x) loss = self.loss_layer.forward(score, t) return loss def backward(self, dout=1): dout = self.loss_layer.backward(dout) for layer in reversed(self.layers): dout = layer.backward(dout) return dout
class SimpleCBOW: def __init__(self, vocab_size: int, hidden_size: int) -> None: W_in = 0.01 * np.random.randn(vocab_size, hidden_size).astype(float) W_out = 0.01 * np.random.randn(hidden_size, vocab_size).astype(float) self.in_layer0 = MatMul(W_in) self.in_layer1 = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer = SoftmaxWithLoss() layers = [ self.in_layer0, self.in_layer1, self.out_layer, self.loss_layer ] self.params = [] self.grads = [] for layer in layers: self.params += layer.params self.grads += layer.grads self.word_vecs = W_in def forward(self, contexts: np.ndarray, target: np.ndarray) -> float: h0 = self.in_layer0.forward(contexts[:, 0]) h1 = self.in_layer1.forward(contexts[:, 1]) h = (h0 + h1) * 0.5 score = self.out_layer.forward(h) loss = self.loss_layer.forward(score, target) return loss def backward(self, dout: int = 1) -> None: ds = self.loss_layer.backward(dout) da = self.out_layer.backward(ds) da *= 0.5 self.in_layer1.backward(da) self.in_layer0.backward(da) return None
class TwoLayerNet: def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01): self.params = {} self.params['W1'] = weight_init_std * np.random.randn( input_size, hidden_size) self.params['b1'] = np.zeros(hidden_size) self.params['W2'] = weight_init_std * np.random.randn( hidden_size, output_size) self.params['b2'] = np.zeros(output_size) # f = open("./db/param_result/784x50x10-0.99162.json", 'r') # self.params = json.load(f) # f.close() # for key in ('W1', 'b1', 'W2', 'b2'): # self.params[key] = np.array(self.params[key]) # 创建各层的对象 self.layers = OrderedDict() self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']) self.layers['Relu1'] = Relu() self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) self.last_layer = SoftmaxWithLoss() def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x def loss(self, x, t): y = self.predict(x) return self.last_layer.forward(y, t) def accuracy(self, x, t): y = self.predict(x) y = np.argmax(y, axis=1) if t.ndim != 1: t = np.argmax(t, axis=1) acc = np.sum(y == t) / float(x.shape[0]) return acc def numerical_gradient(self, x, t): loss_W = lambda W: self.loss(x, t) grads = {} grads['W1'] = numerical_gradient(loss_W, self.params['W1']) grads['b1'] = numerical_gradient(loss_W, self.params['b1']) grads['W2'] = numerical_gradient(loss_W, self.params['W2']) grads['b2'] = numerical_gradient(loss_W, self.params['b2']) return grads def gradient(self, x, t): # forward self.loss(x, t) # backward dout = 1 dout = self.last_layer.backward(dout) layers_list = list(self.layers.values()) layers_list.reverse() for layer in layers_list: dout = layer.backward(dout) grads = {} grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers[ 'Affine1'].db grads['W2'], grads['b2'] = self.layers['Affine2'].dW, self.layers[ 'Affine2'].db return grads
class MultiLayerNet: """ 多层神经网络 """ def __init__(self, input_size, hidden_size_list, output_size, activation='relu', weight_init_std='relu', weight_decay_lambda=0, use_dropout=False, dropout_ration=0.5, use_batchnorm=False): """ :param input_size: 输入层大小 :param hidden_size_list: 隐藏层神经元数量的列表 (e.g. [50, 50, 50]) :param output_size: 输出层大小 :param activation: 激活函数 ('relu' or 'sigmod') :param weight_init_std: 指定权重标准差 (e.g. 0.01) 指定 'relu' 或 'he': 使用 "He 初始值" 指定 'sigmoid' 或 'xavier: 使用 "Xavier 初始值" :param weight_decay_lambda: Weight Decay (L2 范数) 的强度 :param use_dropout: 是否使用 Dropout :param dropout_ration: Dropout 比例 :param use_batchnorm: 是否使用 Batch Normalization """ self.input_size = input_size self.output_size = output_size self.hidden_size_list = hidden_size_list self.hidden_layer_num = len(hidden_size_list) self.activation = activation self.weight_decay_lambda = weight_decay_lambda self.use_dropout = use_dropout self.use_batchnorm = use_batchnorm self.dropout_ration = dropout_ration self.weight_init_std = weight_init_std self.params = {} # 初始化权重 self.__init_weight() # 初始化层 self.__init_layer() def __init_layer(self): activation_layer = {'sigmoid': Sigmoid, 'relu': Relu} self.layers = OrderedDict() for idx in range(1, self.hidden_layer_num + 1): self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)]) if self.use_batchnorm: self.params['gamma' + str(idx)] = np.ones(self.hidden_size_list[idx - 1]) self.params['beta' + str(idx)] = np.zeros(self.hidden_size_list[idx - 1]) self.layers['BatchNorm' + str(idx)] = BatchNormalization(self.params['gamma' + str(idx)], self.params['beta' + str(idx)]) self.layers['activation_function' + str(idx)] = activation_layer[self.activation]() if self.use_dropout: self.layers['Dropout' + str(idx)] = Dropout(self.dropout_ration) idx += 1 self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)]) self.last_layer = SoftmaxWithLoss() def __init_weight(self): all_size_list = [self.input_size] + self.hidden_size_list + [self.output_size] for idx in range(1, len(all_size_list)): scale = self.weight_init_std if str(self.weight_init_std).lower() in ('relu', 'he'): scale = np.sqrt(2.0 / all_size_list[idx - 1]) elif str(self.weight_init_std).lower() in ('sigmoid', 'xavier'): scale = np.sqrt(1.0 / all_size_list[idx - 1]) self.params['W' + str(idx)] = scale * np.random.randn(all_size_list[idx - 1], all_size_list[idx]) self.params['b' + str(idx)] = np.zeros(all_size_list[idx]) def predict(self, x, train_flag=False): for key, layer in self.layers.items(): if "Dropout" in key or "BatchNorm" in key: # Dropout[0-9]* or BatchNorm[0-9]* x = layer.forward(x, train_flag) else: x = layer.forward(x) return x def loss(self, x, t, train_flag=False): y = self.predict(x, train_flag) weight_decay = 0 for idx in range(1, self.hidden_layer_num + 2): W = self.params['W' + str(idx)] weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2) return self.last_layer.forward(y, t) + weight_decay def accuracy(self, x, t): y = self.predict(x, train_flag=False) y = np.argmax(y, axis=1) if t.ndim != 1: t = np.argmax(t, axis=1) acc = np.sum(y == t) / float(x.shape[0]) return acc def gradient(self, x, t): # forward self.loss(x, t, train_flag=True) # backward dout = 1 dout = self.last_layer.backward(dout) layers_list = list(self.layers.values()) layers_list.reverse() for layer in layers_list: dout = layer.backward(dout) grads = {} for idx in range(1, self.hidden_layer_num + 2): grads['W' + str(idx)] = self.layers['Affine' + str(idx)].dW + self.weight_decay_lambda * self.params['W' + str(idx)] grads['b' + str(idx)] = self.layers['Affine' + str(idx)].db if self.use_batchnorm and idx != self.hidden_layer_num + 1: grads['gamma' + str(idx)] = self.layers['BatchNorm' + str(idx)].dgamma grads['beta' + str(idx)] = self.layers['BatchNorm' + str(idx)].dbeta return grads def set_dropout(self, flag): self.use_dropout = flag self.__init_weight() self.__init_layer()
class MulLayerNet: """ Parameters ---------- input_size : 输入大小(MNIST的情况下为784) hidden_size_list : 隐藏层的神经元数量的列表(e.g. [100, 100, 100]) output_size : 输出大小(MNIST的情况下为10) activation : 'relu' or 'sigmoid' weight_init_std : 指定权重的标准差(e.g. 0.01) 指定'relu'或'he'的情况下设定“He的初始值” 指定'sigmoid'或'xavier'的情况下设定“Xavier的初始值” weight_decay_lambda : Weight Decay(L2范数)的强度 """ def __init__(self, input_size, output_size, hidden_size_list, activation='relu', weight_init_std='relu', weight_decay_lambda=0): self.input_size = input_size self.hidden_size_list = hidden_size_list self.output_size = output_size self.activation = activation self.weight_init_std = weight_init_std self.weight_decay_lambda = weight_decay_lambda self._init_weight() self._init_layers() def _init_weight(self): self.params = {} all_layers = [self.input_size ] + self.hidden_size_list + [self.output_size] weight_init_std = self.weight_init_std for i in range(1, len(self.hidden_size_list) + 2): if weight_init_std == 'relu' or weight_init_std == 'he': scalar = np.sqrt(2 / all_layers[i - 1]) elif weight_init_std == 'sigmoid' or weight_init_std == 'xavier': scalar = np.sqrt(1 / all_layers[i - 1]) else: scalar = weight_init_std self.params['W%d' % i] = np.random.randn(all_layers[i - 1], all_layers[i]) * scalar self.params['b%d' % i] = np.zeros(all_layers[i], dtype='float') def _init_layers(self): self.layers = OrderedDict() all_layers = [self.input_size ] + self.hidden_size_list + [self.output_size] activation_dict = {'relu': Relu, 'sigmoid': Sigmoid} for i in range(1, len(self.hidden_size_list) + 2): self.layers['Affine%d' % i] = Affine(self.params['W%d' % i], self.params['b%d' % i]) self.layers['Activation%d' % i] = activation_dict[self.activation]() self.last_layers = SoftmaxWithLoss() def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x def loss(self, x, t): y = self.predict(x) weight_decay = 0 for param_index in range(1, len(self.hidden_size_list) + 2): param = self.params['W%d' % param_index] weight_decay += .5 * self.weight_decay_lambda * np.sum(param**2) return self.last_layers.forward(y, t) + weight_decay def accuracy(self, x, t): p = self.predict(x) y = np.argmax(p, axis=1) if t.ndim != 1: t = np.argmax(t, axis=1) return np.sum(y == t, dtype='float') / float(y.shape[0]) ''' 求损失函数关于参数的梯度 ''' def gradient(self, x, t): self.loss(x, t) dout = 1. dout = self.last_layers.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) grads = {} for i in range(1, len(self.hidden_size_list) + 2): grads['W%d' % i] = self.layers[ 'Affine%d' % i].dW + self.weight_decay_lambda * self.layers['Affine%d' % i].W grads['b%d' % i] = self.layers['Affine%d' % i].db return grads
class DeepConvNet: def __init__(self, input_dim, conv_params=[ { 'filter_num': 32, 'filter_size': 9, 'pad': 0, 'stride': 3 }, { 'filter_num': 64, 'filter_size': 5, 'pad': 2, 'stride': 1 }, { 'filter_num': 128, 'filter_size': 7, 'pad': 0, 'stride': 1 }, ], hidden_size=128, dropout_ratio=[0.2, 0.5], output_size=5): self.params = {} self.layers = {} pre_shape = input_dim for idx, conv_param in enumerate(conv_params): # init parameters self.params['W' + str(idx + 1)] = init_he(pre_shape[0] * conv_param['filter_size']**2) *\ np.random.randn( conv_param['filter_num'], pre_shape[0], conv_param['filter_size'], conv_param['filter_size']) self.params['b' + str(idx + 1)] = np.zeros( conv_param['filter_num']) # set layers self.layers['Conv' + str(idx + 1)] = Convolution( self.params['W' + str(idx + 1)], self.params['b' + str(idx + 1)], conv_param['stride'], conv_param['pad']) self.layers['Relu' + str(idx + 1)] = Relu() # calc output image size of conv layers pre_shape = self.layers['Conv' + str(idx + 1)].output_size(pre_shape) idx = len(conv_params) # init parameters and set layers Affine self.params['W' + str(idx + 1)] = init_he(pre_shape[0] * pre_shape[1]**2) *\ np.random.randn(pre_shape[0] * pre_shape[1]**2, hidden_size) self.params['b' + str(idx + 1)] = np.zeros(hidden_size) self.layers['Affine' + str(idx + 1)] = Affine( self.params['W' + str(idx + 1)], self.params['b' + str(idx + 1)]) self.layers['Relu' + str(idx + 1)] = Relu() idx += 1 # init parameters and set layers output self.params['W' + str(idx + 1)] = init_he(hidden_size) * np.random.randn( hidden_size, output_size) self.params['b' + str(idx + 1)] = np.zeros(output_size) self.layers['Affine' + str(idx + 1)] = Affine( self.params['W' + str(idx + 1)], self.params['b' + str(idx + 1)]) # set loss function layer self.loss_layer = SoftmaxWithLoss() def predict(self, x, train_flg=False): for layer in self.layers.values(): if isinstance(layer, Dropout): x = layer.forward(x, train_flg) else: x = layer.forward(x) return x def loss(self, x, t): y = self.predict(x, train_flg=True) return self.loss_layer.forward(y, t) def accuracy(self, x, t, batch_size=100): if t.ndim != 1: t = np.argmax(t, axis=1) acc = 0.0 for i in range(int(x.shape[0] / batch_size)): tx = x[i * batch_size:(i + 1) * batch_size] tt = t[i * batch_size:(i + 1) * batch_size] y = self.predict(tx, train_flg=False) y = np.argmax(y, axis=1) acc += np.sum(y == tt) return acc / x.shape[0] def gradient(self, x, t): # forward self.loss(x, t) # backward dout = 1 dout = self.loss_layer.backward(dout) tmp_layers = list(self.layers.values()) tmp_layers.reverse() for layer in tmp_layers: dout = layer.backward(dout) # setting grads = {} for i, layer_name in enumerate(self.get_layer_names()): grads['W' + str(i + 1)] = self.layers[layer_name].dW grads['b' + str(i + 1)] = self.layers[layer_name].db return grads def save_params(self, file_name="params.pkl"): params = {} for key, val in self.params.items(): params[key] = val with open(file_name, 'wb') as f: pickle.dump(params, f) def load_params(self, file_name="params.pkl"): with open(file_name, 'rb') as f: params = pickle.load(f) for key, val in params.items(): self.params[key] = val for i, layer_name in enumerate(self.get_layer_names()): self.layers[layer_name].W = self.params['W' + str(i + 1)] self.layers[layer_name].b = self.params['b' + str(i + 1)] def get_layer_names(self): lst = [] for layer_name in self.layers.keys(): if 'Conv' in layer_name or 'Affine' in layer_name: lst.append(layer_name) return np.array(lst)
class TwoLayerNet: def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01): # Initialize weights. self.params = {} self.params['W1'] = weight_init_std * \ np.random.randn(input_size, hidden_size) self.params['b1'] = np.zeros(hidden_size) self.params['W2'] = weight_init_std * \ np.random.randn(hidden_size, output_size) self.params['b2'] = np.zeros(output_size) # Generate layers. self.layers = OrderedDict() # Ordered dictionary self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']) self.layers['Relu'] = Relu() self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) self.lastLayer = SoftmaxWithLoss() # Forward propagation. def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x def loss(self, x, t): y = self.predict(x) return self.lastLayer.forward(y, t) # Calculate accurary. def accuracy(self, x, t): y = self.predict(x) y = np.argmax(y, axis=1) t = np.argmax(t, axis=1) accuracy = np.sum(y == t) / float(x.shape[0]) return accuracy # # Numerical method to calculate gradient. # def numerical_gradient(self, x, t): # loss_W = lambda W: self.loss(x, t) # grads = {} # grads['W1'] = numerical_gradient(loss_W, self.params['W1']) # grads['b1'] = numerical_gradient(loss_W, self.params['b1']) # grads['W2'] = numerical_gradient(loss_W, self.params['W2']) # grads['b2'] = numerical_gradient(loss_W, self.params['b2']) # return grads # BP method to calculate gradient. def gradient(self, x, t): # FP. self.loss(x, t) # BP. dout = 1 dout = self.lastLayer.backward(dout) # Reverse the order of elements in list layers. layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) # Settings. grads = {} grads['W1'] = self.layers['Affine1'].dW grads['b1'] = self.layers['Affine1'].db grads['W2'] = self.layers['Affine2'].dW grads['b2'] = self.layers['Affine2'].db return grads
sys.path.append(os.path.join(Path(os.getcwd()).parent, 'lib')) from layers import SoftmaxWithLoss from common import softmax import twolayernet as network except ImportError: print('Library Module Can Not Found') # 1. load training/test data _x, _t = np.array([2.6, 3.9, 5.6]), np.array([0, 0, 1]) # 2. hyperparameter # 3. initialize layer layer = SoftmaxWithLoss() # Test loss = layer.forward(_x, _t) dout = layer.backward(1) print(loss, dout) # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ def forward_propagation(x): y = softmax(x) return y network.forward_progation = forward_propagation loss = network.loss(_x, _t) print(loss)
class SimpleConvNet: """简单ConvNet conv - relu - pool - affine - relu - affine - softmax """ def __init__(self, input_dim=(1, 28, 28), conv_param={ 'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1 }, hidden_size=100, output_size=10, weight_init_std=0.01): filter_num = conv_param['filter_num'] filter_size = conv_param['filter_size'] filter_pad = conv_param['pad'] filter_stride = conv_param['stride'] input_size = input_dim[1] conv_output_size = (input_size - filter_size + 2 * filter_pad) / filter_stride + 1 pool_output_size = int(filter_num * (conv_output_size / 2) * (conv_output_size / 2)) # 权重初始化 self.params = {} self.params['W1'] = weight_init_std * \ np.random.randn(filter_num, input_dim[0], filter_size, filter_size) self.params['b1'] = np.zeros(filter_num) self.params['W2'] = weight_init_std * \ np.random.randn(pool_output_size, hidden_size) self.params['b2'] = np.zeros(hidden_size) self.params['W3'] = weight_init_std * \ np.random.randn(hidden_size, output_size) self.params['b3'] = np.zeros(output_size) # 层生成 self.layers = OrderedDict() self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'], conv_param['stride'], conv_param['pad']) self.layers['Relu1'] = Relu() self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2) self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2']) self.layers['Relu2'] = Relu() self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3']) self.last_layer = SoftmaxWithLoss() def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x def loss(self, x, t): y = self.predict(x) return self.last_layer.forward(y, t) def accuracy(self, x, t, batch_size=100): if t.ndim != 1: t = np.argmax(t, axis=1) acc = 0.0 for i in range(int(x.shape[0] / batch_size)): tx = x[i * batch_size:(i + 1) * batch_size] tt = t[i * batch_size:(i + 1) * batch_size] y = self.predict(tx) y = np.argmax(y, axis=1) acc += np.sum(y == tt) return acc / x.shape[0] def numerical_gradient(self, x, t): loss_w = lambda w: self.loss(x, t) grads = {} for idx in (1, 2, 3): grads['W' + str(idx)] = numerical_gradient( loss_w, self.params['W' + str(idx)]) grads['b' + str(idx)] = numerical_gradient( loss_w, self.params['b' + str(idx)]) return grads def gradient(self, x, t): # forward self.loss(x, t) # backward dout = 1 dout = self.last_layer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) # 設定 grads = {} grads['W1'], grads['b1'] = self.layers['Conv1'].dW, self.layers[ 'Conv1'].db grads['W2'], grads['b2'] = self.layers['Affine1'].dW, self.layers[ 'Affine1'].db grads['W3'], grads['b3'] = self.layers['Affine2'].dW, self.layers[ 'Affine2'].db return grads def save_params(self, file_name="params.pkl"): params = {} for key, val in self.params.items(): params[key] = val with open(file_name, 'wb') as f: pickle.dump(params, f) def load_params(self, file_name="params.pkl"): with open(file_name, 'rb') as f: params = pickle.load(f) for key, val in params.items(): self.params[key] = val for i, key in enumerate(['Conv1', 'Affine1', 'Affine2']): self.layers[key].W = self.params['W' + str(i + 1)] self.layers[key].b = self.params['b' + str(i + 1)]
class TwoLayerNet(object): def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01): # 初始化权重 self.params = {} self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size) # self.params['W1'] = np.ones((input_size, hidden_size)) self.params['b1'] = np.zeros(hidden_size) self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size) # self.params['W2'] = np.ones((hidden_size, output_size)) self.params['b2'] = np.zeros(output_size) # 生成层 self.layers = OrderedDict() self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']) self.layers['Relu1'] = Relu() self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) self.lastLayer = SoftmaxWithLoss() def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x # x: 输入数据,t: 标签 def loss(self, x, t): y = self.predict(x) return self.lastLayer.forward(y, t) def accuracy(self, x, t): y = self.predict(x) y = np.argmax(y, axis=1) if t.ndim != 1: t = np.argmax(t, axis=1) accuracy = np.sum(y == t) / float(x.shape[0]) return accuracy def numerical_gradient(self, x, t): loss_W = lambda W: self.loss(x, t) grads = {} grads['W1'] = numerical_gradient(loss_W, self.params['W1']) grads['b1'] = numerical_gradient(loss_W, self.params['b1']) grads['W2'] = numerical_gradient(loss_W, self.params['W2']) grads['b2'] = numerical_gradient(loss_W, self.params['b2']) return grads def gradient(self, x, t): # forward self.loss(x, t) # backward dout = 1 dout = self.lastLayer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) # 设定 grads = {} grads['W1'] = self.layers['Affine1'].dW grads['b1'] = self.layers['Affine1'].db grads['W2'] = self.layers['Affine2'].dW grads['b2'] = self.layers['Affine2'].db return grads
class MultiLayerNet: """ Parameters ---------- input_size : hidden_size_list : (e.g. [100, 100, 100]) output_size : activation : 'relu' or 'sigmoid' weight_init_std : 权重初始化标准差(e.g. 0.01) 'relu'「He的初始化」 'sigmoid'「Xavier的初始化」 weight_decay_lambda : Weight Decay(L2 Norm) use_dropout: 是否使用Dropout层 drop_out_ratio: dropout的比例 use_batchnorm: 是否使用BatchNormalization层 """ def __init__(self, input_size, hidden_size_list, output_size, activation='relu', weight_init_std='relu', weight_decay_lambda=0, use_dropout=False, dropout_ratio=0.5, use_batchnorm=False): self.input_size = input_size self.output_size = output_size self.hidden_size_list = hidden_size_list self.hidden_layer_num = len(hidden_size_list) self.weight_decay_lambda = weight_decay_lambda self.use_dropout = use_dropout self.use_batchnorm = use_batchnorm self.params = {} # 权重初始化 self.__init_weight(weight_init_std) # 生成层 activation_layer = {'sigmoid': Sigmoid, 'relu': Relu} self.layers = OrderedDict() for idx in range(1, self.hidden_layer_num + 1): self.layers['Affine' + str(idx)] = Affine( self.params['W' + str(idx)], self.params['b' + str(idx)]) if self.use_batchnorm: self.params['gamma' + str(idx)] = np.ones( hidden_size_list[idx - 1]) self.params['beta' + str(idx)] = np.zeros( hidden_size_list[idx - 1]) self.layers['BatchNorm' + str(idx)] = BatchNormalization( self.params['gamma' + str(idx)], self.params['beta' + str(idx)]) self.layers['Activation_function' + str(idx)] = activation_layer[activation]() if self.use_dropout: self.layers['Dropout' + str(idx)] = Dropout(dropout_ratio) idx = self.hidden_layer_num + 1 self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)]) self.last_layer = SoftmaxWithLoss() def __init_weight(self, weight_init_std): """权重初始值设定 Parameters ---------- weight_init_std : 权重初始化标准差(e.g. 0.01) 'relu'「He的初始化」 np.sqrt(2/n) 'sigmoid'「Xavier的初始化」 np.sqrt(1/n) """ all_size_list = [self.input_size ] + self.hidden_size_list + [self.output_size] for idx in range(1, len(all_size_list)): scale = weight_init_std if str(weight_init_std).lower() in ('relu', 'he'): scale = np.sqrt(2.0 / all_size_list[idx - 1]) # ReLU使用的随机化标准差 elif str(weight_init_std).lower() in ('sigmoid', 'xavier'): scale = np.sqrt(1.0 / all_size_list[idx - 1]) # Sigmoid使用的随机化标准差 self.params['W' + str(idx)] = scale * np.random.randn( all_size_list[idx - 1], all_size_list[idx]) self.params['b' + str(idx)] = np.zeros(all_size_list[idx]) def predict(self, x, train_flg=False): for key, layer in self.layers.items(): if "Dropout" in key or "BatchNorm" in key: x = layer.forward(x, train_flg) else: x = layer.forward(x) return x def loss(self, x, t, train_flg=False): y = self.predict(x, train_flg) weight_decay = 0 for idx in range(1, self.hidden_layer_num + 2): W = self.params['W' + str(idx)] weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2) return self.last_layer.forward(y, t) + weight_decay def accuracy(self, x, t): y = self.predict(x, train_flg=False) y = np.argmax(y, axis=1) if t.ndim != 1: t = np.argmax(t, axis=1) accuracy = np.sum(y == t) / float(x.shape[0]) return accuracy def numerical_gradient(self, x, t): loss_W = lambda W: self.loss(x, t, train_flg=True) grads = {} for idx in range(1, self.hidden_layer_num + 2): grads['W' + str(idx)] = numerical_gradient( loss_W, self.params['W' + str(idx)]) grads['b' + str(idx)] = numerical_gradient( loss_W, self.params['b' + str(idx)]) if self.use_batchnorm and idx != self.hidden_layer_num + 1: grads['gamma' + str(idx)] = numerical_gradient( loss_W, self.params['gamma' + str(idx)]) grads['beta' + str(idx)] = numerical_gradient( loss_W, self.params['beta' + str(idx)]) return grads def gradient(self, x, t): # forward self.loss(x, t, train_flg=True) # backward dout = 1 dout = self.last_layer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) grads = {} for idx in range(1, self.hidden_layer_num + 2): grads['W' + str(idx)] = self.layers['Affine' + str( idx)].dW + self.weight_decay_lambda * self.layers['Affine' + str(idx)].W grads['b' + str(idx)] = self.layers['Affine' + str(idx)].db if self.use_batchnorm and idx != self.hidden_layer_num + 1: grads['gamma' + str(idx)] = self.layers['BatchNorm' + str(idx)].dgamma grads['beta' + str(idx)] = self.layers['BatchNorm' + str(idx)].dbeta return grads # """拡張版の全結合による多層ニューラルネットワーク # Weiht Decay、Dropout、Batch Normalizationの機能を持つ # Parameters # ---------- # input_size : 入力サイズ(MNISTの場合は784) # hidden_size_list : 隠れ層のニューロンの数のリスト(e.g. [100, 100, 100]) # output_size : 出力サイズ(MNISTの場合は10) # activation : 'relu' or 'sigmoid' # weight_init_std : 重みの標準偏差を指定(e.g. 0.01) # 'relu'または'he'を指定した場合は「Heの初期値」を設定 # 'sigmoid'または'xavier'を指定した場合は「Xavierの初期値」を設定 # weight_decay_lambda : Weight Decay(L2ノルム)の強さ # use_dropout: Dropoutを使用するかどうか # dropout_ration : Dropoutの割り合い # use_batchNorm: Batch Normalizationを使用するかどうか # """ # def __init__(self, input_size, hidden_size_list, output_size, # activation='relu', weight_init_std='relu', weight_decay_lambda=0, # use_dropout = False, dropout_ration = 0.5, use_batchnorm=False): # self.input_size = input_size # self.output_size = output_size # self.hidden_size_list = hidden_size_list # self.hidden_layer_num = len(hidden_size_list) # self.use_dropout = use_dropout # self.weight_decay_lambda = weight_decay_lambda # self.use_batchnorm = use_batchnorm # self.params = {} # # 重みの初期化 # self.__init_weight(weight_init_std) # # レイヤの生成 # activation_layer = {'sigmoid': Sigmoid, 'relu': Relu} # self.layers = OrderedDict() # for idx in range(1, self.hidden_layer_num+1): # self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], # self.params['b' + str(idx)]) # if self.use_batchnorm: # self.params['gamma' + str(idx)] = np.ones(hidden_size_list[idx-1]) # self.params['beta' + str(idx)] = np.zeros(hidden_size_list[idx-1]) # self.layers['BatchNorm' + str(idx)] = BatchNormalization(self.params['gamma' + str(idx)], self.params['beta' + str(idx)]) # self.layers['Activation_function' + str(idx)] = activation_layer[activation]() # if self.use_dropout: # self.layers['Dropout' + str(idx)] = Dropout(dropout_ration) # idx = self.hidden_layer_num + 1 # self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)]) # self.last_layer = SoftmaxWithLoss() # def __init_weight(self, weight_init_std): # """重みの初期値設定 # Parameters # ---------- # weight_init_std : 重みの標準偏差を指定(e.g. 0.01) # 'relu'または'he'を指定した場合は「Heの初期値」を設定 # 'sigmoid'または'xavier'を指定した場合は「Xavierの初期値」を設定 # """ # all_size_list = [self.input_size] + self.hidden_size_list + [self.output_size] # for idx in range(1, len(all_size_list)): # scale = weight_init_std # if str(weight_init_std).lower() in ('relu', 'he'): # scale = np.sqrt(2.0 / all_size_list[idx - 1]) # ReLUを使う場合に推奨される初期値 # elif str(weight_init_std).lower() in ('sigmoid', 'xavier'): # scale = np.sqrt(1.0 / all_size_list[idx - 1]) # sigmoidを使う場合に推奨される初期値 # self.params['W' + str(idx)] = scale * np.random.randn(all_size_list[idx-1], all_size_list[idx]) # self.params['b' + str(idx)] = np.zeros(all_size_list[idx]) # def predict(self, x, train_flg=False): # for key, layer in self.layers.items(): # if "Dropout" in key or "BatchNorm" in key: # x = layer.forward(x, train_flg) # else: # x = layer.forward(x) # return x # def loss(self, x, t, train_flg=False): # """損失関数を求める # 引数のxは入力データ、tは教師ラベル # """ # y = self.predict(x, train_flg) # weight_decay = 0 # for idx in range(1, self.hidden_layer_num + 2): # W = self.params['W' + str(idx)] # weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2) # return self.last_layer.forward(y, t) + weight_decay # def accuracy(self, X, T): # Y = self.predict(X, train_flg=False) # Y = np.argmax(Y, axis=1) # if T.ndim != 1 : T = np.argmax(T, axis=1) # accuracy = np.sum(Y == T) / float(X.shape[0]) # return accuracy # def numerical_gradient(self, X, T): # """勾配を求める(数値微分) # Parameters # ---------- # X : 入力データ # T : 教師ラベル # Returns # ------- # 各層の勾配を持ったディクショナリ変数 # grads['W1']、grads['W2']、...は各層の重み # grads['b1']、grads['b2']、...は各層のバイアス # """ # loss_W = lambda W: self.loss(X, T, train_flg=True) # grads = {} # for idx in range(1, self.hidden_layer_num+2): # grads['W' + str(idx)] = numerical_gradient(loss_W, self.params['W' + str(idx)]) # grads['b' + str(idx)] = numerical_gradient(loss_W, self.params['b' + str(idx)]) # if self.use_batchnorm and idx != self.hidden_layer_num+1: # grads['gamma' + str(idx)] = numerical_gradient(loss_W, self.params['gamma' + str(idx)]) # grads['beta' + str(idx)] = numerical_gradient(loss_W, self.params['beta' + str(idx)]) # return grads # def gradient(self, x, t): # # forward # self.loss(x, t, train_flg=True) # # backward # dout = 1 # dout = self.last_layer.backward(dout) # layers = list(self.layers.values()) # layers.reverse() # for layer in layers: # dout = layer.backward(dout) # # 設定 # grads = {} # for idx in range(1, self.hidden_layer_num+2): # grads['W' + str(idx)] = self.layers['Affine' + str(idx)].dW + self.weight_decay_lambda * self.params['W' + str(idx)] # grads['b' + str(idx)] = self.layers['Affine' + str(idx)].db # if self.use_batchnorm and idx != self.hidden_layer_num+1: # grads['gamma' + str(idx)] = self.layers['BatchNorm' + str(idx)].dgamma # grads['beta' + str(idx)] = self.layers['BatchNorm' + str(idx)].dbeta # return grads
class TwoLayerNet(DeepLearn): def __init__(self, hidden_size, weight_init_std=0.01): super().__init__() self.params = dict() self.params['W1'] = weight_init_std * np.random.randn( self.x_train.shape[1], hidden_size) self.params['b1'] = np.zeros(hidden_size) self.params['W2'] = weight_init_std * np.random.randn( hidden_size, self.t_train.shape[1]) self.params['b2'] = np.zeros(self.t_train.shape[1]) # 生成层 self.layers = OrderedDict() self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']) self.layers['Relu'] = Relu() self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) self.last_layer = SoftmaxWithLoss() def predict(self, x): # w1, w2 = self.params['W1'], self.params['W2'] # b1, b2 = self.params['b1'], self.params['b2'] # # a1 = np.dot(x, w1) + b1 # z1 = fun.sigmoid(a1) # a2 = np.dot(z1, w2) + b2 # return fun.softmax(a2) for layer in self.layers.values(): x = layer.forward(x) return x def cross_entropy_loss(self, x, t): y = self.predict(x) return self.last_layer.forward(y, t) def numerical_gradient(self, x, t): """数值微分求梯度,速度慢,有误差,但实现简单""" loss_w = lambda _: self.cross_entropy_loss(x, t) grads = dict() grads['W1'] = fun.numerical_gradient(loss_w, self.params['W1']) grads['b1'] = fun.numerical_gradient(loss_w, self.params['b1']) grads['W2'] = fun.numerical_gradient(loss_w, self.params['W2']) grads['b2'] = fun.numerical_gradient(loss_w, self.params['b2']) return grads def gradient(self, x, t): """误差反向传播求梯度,用的是解析式求微分,速度快""" # forward self.cross_entropy_loss(x, t) # backward dout = self.last_layer.backward() layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) grads = dict() grads['W1'] = self.layers['Affine1'].dW grads['b1'] = self.layers['Affine1'].db grads['W2'] = self.layers['Affine2'].dW grads['b2'] = self.layers['Affine2'].db return grads def test(self): self.train_acc_list.append(self.accuracy(self.x_train, self.t_train)) self.test_acc_list.append(self.accuracy(self.x_test, self.t_test)) def start(self, iters_num=10000, batch_size=100, learning_rate=0.1, epoch=0, record=False, numerical=False): for i in range(iters_num): print('learn:', i) batch_mask = np.random.choice(self.x_train.shape[0], batch_size) x_batch = self.x_train[batch_mask] t_batch = self.t_train[batch_mask] print('开始计算梯度') if numerical: grad = self.numerical_gradient(x_batch, t_batch) else: grad = self.gradient(x_batch, t_batch) print('重新调整权重系数') for k in ('W1', 'b1', 'W2', 'b2'): self.params[k] -= learning_rate * grad[k] if epoch and (i + 1) % epoch == 0: self.test() if record: loss = self.cross_entropy_loss(x_batch, t_batch) yield loss
class SimpleConvNet: def __init__(self, input_dim=(1, 28, 28), # (C, W, H) filter_num=30, filter_size=5, filter_pad=0, filter_stride=1, hidden_size=100, output_size=10, weight_init_std=0.01 ): # input(N, C, W, H) # -> Conv(N, FN, conv_out_h, conv_out_w) -> ReLu # -> Pooling(N, FN , pool_out_h, pool_out_w) # -> Affine[flatten行う](N, hidden_layer) -> ReLu # -> Affine(N, output_layer) -> SoftMax # input_sizeは動的に決定(正方形を前提) input_size = input_dim[1] conv_output_size = (input_size + 2 * filter_pad - filter_size) / filter_stride + 1 # FN * pool_out_h * pool_out_w pool_output_size = int(filter_num * (conv_output_size / 2) * (conv_output_size / 2)) self.params = {} # Conv # (input_size, C, W, H) -> (N, FilterNum, out_h, out_w) self.params['W1'] = \ weight_init_std * np.random.randn(filter_num, input_dim[0], filter_size, filter_size) self.params['b1'] = np.zeros(filter_num) # ReLu # Pool # Affine self.params['W2'] = weight_init_std * np.random.randn(pool_output_size, hidden_size) self.params['b2'] = np.zeros(hidden_size) # Relu # Affine self.params['W3'] = weight_init_std * np.random.randn(hidden_size, output_size) self.params['b3'] = np.zeros(output_size) self.layers = OrderedDict() self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'], filter_stride, filter_pad) self.layers['ReLu1'] = ReLu() self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2) self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2']) self.layers['ReLu2'] = ReLu() self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3']) self.last_layer = SoftmaxWithLoss() def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x def loss(self, x, t): pred_y = self.predict(x) return self.last_layer.forward(pred_y, t) def gradient(self, x, t): self.loss(x, t) dout = 1 dout = self.last_layer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) grads = { 'W1': self.layers['Conv1'].dW, 'b1': self.layers['Conv1'].db, 'W2': self.layers['Affine1'].dW, 'b2': self.layers['Affine1'].db, 'W3': self.layers['Affine2'].dW, 'b3': self.layers['Affine2'].db } return grads def accuracy(self, x: np.ndarray, t: np.ndarray): pred_y = self.predict(x) y = np.argmax(pred_y, axis=1) if t.ndim != 1: # one-hot vectorの場合 np.argmax(t, axis=1) accuracy = np.sum(y == t) / x.shape[0] return accuracy
class MultiLayernet: def __init__(self, input_size, hidden_size_list, output_size, activation='relu', weight_init_std='relu', weight_decay_lambda=0): self.input_size = input_size self.hidden_size_list = hidden_size_list self.hidden_layer_num = len(hidden_size_list) self.weight_decay_lambda = weight_decay_lambda self.params = {} # Initialize weights self.__init_weight(weight_init_std) # Generate layers activation_layer = {'sigmoid': Sigmoid, 'relu': ReLU} self.layers = OrderedDict() for idx in range(1, self.hidden_layer_num + 1): self.layers['Affine' + str(idx)] = Affine( self.params['W' + str(idx)], self.params['b', str(idx)]) self.layers['Activation_function' + str(idx)] = \ activation_layer[activation]() idx = self.hidden_layer_num + 1 self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)]) self.last_layer = SoftmaxWithLoss() def __init_weight(self, weight_init_std): all_size_list = \ [self.input_size] + self.hidden_size_list + [self.output_size] for idx in range(1, len(all_size_list)): scale = weight_init_std if str(weight_init_std).lower() in ('relu', 'he'): scale = np.sqrt(2.0 / all_size_list[idx - 1]) elif str(weight_init_std).lower() in ('sigmoid', 'xavier'): scale = np.sqrt(1.0 / all_size_list[idx - 1]) self.params['W' + str(idx)] = scale * np.random.randn( all_size_list[idx - 1], all_size_list[idx]) self.params['b' + str(idx)] = np.zeros(all_size_list[idx]) def predict(self, x, t): for layer in self.layers.values(): x = layer.forward(x) return x def loss(self, x, t): y = self.predict(x) weight_decay = 0 for idx in range(1, self.hidden_layer_num + 2): W = self.params['W' + str(idx)] weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2) return self.last_layer.forward(y, t) + weight_decay def accuracy(self, x, t): y = self.predict(x) y = np.argmax(y, axis=1) if t.ndim != 1: t = np.argmax(t, axis=1) return np.sum(y == t) / float(x.shape[0]) def gradient(self, x, t): # Forward self.loss(x, t) # Backward dout = 1 dout = self.last_layer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) # Settings grads = {} for idx in range(1, self.hidden_layer_num + 2): grads['W' + str(idx)] = \ self.layers['Affine' + str(idx)].dW \ + self.weight_decay_lambda * self.layers['Affine' + str(idx)].W grads['b' + str(idx)] = \ self.layers['Affine' + str(idx)].db return grads
class NLayerNet: def __init__(self, layer_num, input_size, output_size, hidden_size, weight_init_std=0.01): # 重みの初期化 self.layer_num = layer_num self.input_size = input_size self.hidden_size = hidden_size self.output_size = output_size self.weight_init_std = weight_init_std self.weights = [] self._make_weights() self.bias = [] self._make_bias() self._make_layers() def _make_layers(self): self.layers = [] for i in range(self.layer_num): self.layers.append(Affine(self.weights[i], self.bias[i])) if i == self.layer_num - 1: pass else: self.layers.append(Relu()) self.lastLayer = SoftmaxWithLoss() def _make_weights(self): """ make wights list """ for i in range(self.layer_num): if i == 0: #input -> hidden self.weights.append( self.weight_init_std * np.random.randn(self.input_size, self.hidden_size)) elif i == self.layer_num - 1: #hidden -> output self.weights.append( self.weight_init_std * np.random.randn(self.hidden_size, self.output_size)) else: #hidden -> hidden self.weights.append( self.weight_init_std * np.random.randn(self.hidden_size, self.hidden_size)) def _make_bias(self): """ make bias list """ for i in range(self.layer_num): if i == self.layer_num - 1: self.bias.append(np.zeros(self.output_size)) else: self.bias.append(np.zeros(self.hidden_size)) def predict(self, x): """ predict. :param x:input_size matrix :return: output_size matrix """ for layer in self.layers: x = layer.forward(x) return x def loss(self, x, t): """損失関数の値を求める :param x: 画像データ :param t:正解ラベル :return:損失関数 """ y = self.predict(x) return self.lastLayer.forward(y, t) def accuracy(self, x, t): """認識精度を求める. :param x: input data :param t: label :return:認識精度 """ y = self.predict(x) y = np.argmax(y, axis=1) t = np.argmax(t, axis=1) accuracy = np.sum(y == t) / float(x.shape[0]) return accuracy def gradient(self, x, t): self.loss(x, t) # backward dout = 1 dout = self.lastLayer.backward(dout) for layer in reversed(self.layers): dout = layer.backward(dout) # return weight_grads = [] bias_grads = [] for i in range(self.layer_num): Affine_layer = self.layers[2 * i] weight_grads.append(Affine_layer.dW) bias_grads.append(Affine_layer.db) return weight_grads, bias_grads
class TwoLayerNet: def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01): self.params = {} self.params['W1'] = weight_init_std*np.random.randn(input_size, hidden_size) self.params['W2'] = weight_init_std*np.random.randn(hidden_size, output_size) self.params['b1'] = np.zeros(hidden_size) self.params['b2'] = np.zeros(output_size) self.layers = OrderedDict() self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']) self.layers['Relu'] = Relu() self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) self.lastLayer = SoftmaxWithLoss() def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x def loss(self, x, t): y = self.predict(x) return self.lastLayer.forward(y, t) def accuracy(self, x, t): y = self.predict(x) y = np.argmax(y, axis=1) t = np.argmax(t, axis=1) return np.sum(y==t)/float(t.shape[0]) def numerical_gradient(self, x, t): loss_w = lambda W : self.loss(x, t) grads = {} grads['W1'] = numerical_gradient(loss_w, self.params['W1']) grads['b1'] = numerical_gradient(loss_w, self.params['b1']) grads['W2'] = numerical_gradient(loss_w, self.params['W2']) grads['b2'] = numerical_gradient(loss_w, self.params['b2']) return grads def gradient(self, x, t): self.loss(x, t) dout = 1.0 dout = self.lastLayer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) grads = {} grads['W1'] = self.layers['Affine1'].dW grads['b1'] = self.layers['Affine1'].db grads['W2'] = self.layers['Affine2'].dW grads['b2'] = self.layers['Affine2'].db return grads
class MultiLayerNet: def __init__(self, input_size, hidden_size_list, output_size, activation='relu', weight_init_std="relu", weight_decay_lambda=0, use_batchnorm=False): self.input_size = input_size self.hidden_layer_list = hidden_size_list self.output_size = output_size self.weight_decay_lambda = weight_decay_lambda self.use_batchnorm = use_batchnorm activations = {'relu':Relu, 'sigmoid': Sigmoid} self.activation_func = activations[activation] layer_size_list = [input_size] + hidden_size_list + [output_size] self._init_weight_params(layer_size_list, weight_init_std) self._init_layers(layer_size_list) def _init_weight_params(self, layer_size_list, weight_init_std): self.params = {} for i in range(1, len(layer_size_list)): front_layer_size = layer_size_list[i-1] back_layer_size = layer_size_list[i] scale = weight_init_std if str(weight_init_std).lower() in ('relu', 'he'): scale = np.sqrt(2.0 / front_layer_size) # 使用ReLU的情况下推荐的初始值 elif str(weight_init_std).lower() in ('sigmoid', 'xavier'): scale = np.sqrt(1.0 / front_layer_size) # 使用sigmoid的情况下推荐的初始值 self.params['W'+str(i)] = scale * \ np.random.randn(front_layer_size, back_layer_size) self.params['b'+str(i)] = np.zeros(back_layer_size) def _init_layers(self, layer_size_list): self.layers = OrderedDict() for i in range(1, len(layer_size_list)-1): self.layers['Affine'+str(i)] = Affine( self.params['W' + str(i)], self.params['b'+str(i)]) if self.use_batchnorm: self.params['gamma'+str(i)] = np.ones(layer_size_list[i]) self.params['beta'+str(i)] = np.zeros(layer_size_list[i]) self.layers['BatchNorm'+str(i)] = BatchNormalization( self.params['gamma'+str(i)], self.params['beta'+str(i)]) self.layers['Activition'+str(i)] = self.activation_func() output_layer_idx = len(layer_size_list)-1 self.layers['Affine'+str(output_layer_idx)] = Affine( self.params['W'+str(output_layer_idx)], self.params['b'+str(output_layer_idx)]) self.last_layer = SoftmaxWithLoss() def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x def loss(self, x, t): y = self.predict(x) weight_decay = 0 for idx in range(1, len(self.hidden_layer_list) + 2): W = self.params['W' + str(idx)] weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2) loss_val = self.last_layer.forward(y, t) return loss_val def accuracy(self, x, t): y = self.predict(x) y = np.argmax(y, axis=1) if t.ndim != 1: t = np.argmax(t, axis=1) accuracy = np.sum(y == t) / float(x.shape[0]) return accuracy def numerical_gradient(self, x, t): def loss_w(W): return self.loss(x, t) grads = {} layer_count = len(self.hidden_layer_list) + 1 for i in range(1, layer_count+1): grads['W'+str(i)] = numerical_gradient(loss_w, self.params['W'+str(i)]) grads['b'+str(i)] = numerical_gradient(loss_w, self.params['b'+str(i)]) if self.use_batchnorm and i < layer_count: grads['gamma'+str(i)] = numerical_gradient(loss_w, self.params['gamma'+str(i)]) grads['beta'+str(i)] = numerical_gradient(loss_w, self.params['beta'+str(i)]) return grads def gradient(self, x, t): # forward self.loss(x, t) # backward dout = 1 dout = self.last_layer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) # 设定 grads = {} layer_count = len(self.hidden_layer_list) + 1 for idx in range(1, layer_count+1): grads['W' + str(idx)] = self.layers['Affine' + str(idx)].dW + self.weight_decay_lambda*self.params['W' + str(idx)] grads['b' + str(idx)] = self.layers['Affine' + str(idx)].db if self.use_batchnorm and idx < layer_count: grads['gamma'+str(idx)] = self.layers['BatchNorm'+str(idx)].dgamma grads['beta'+str(idx)] = self.layers['BatchNorm'+str(idx)].dbeta return grads