class TwoLayerNet: def __init__(self, input_size, hidden_size, output_size): I, H, O = input_size, hidden_size, output_size W1 = 0.01 * np.random.randn(I, H) b1 = np.zeros(H) W2 = 0.01 * np.random.randn(H, O) b2 = np.zeros(O) self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)] self.loss_layer = SoftmaxWithLoss() self.params, self.grads = [], [] for layer in self.layers: self.params += layer.params self.grads += layer.grads def predict(self, x): for layer in self.layers: x = layer.forward(x) return x def forward(self, x, t): score = self.predict(x) loss = self.loss_layer.forward(score, t) return loss def backward(self, dout=1): dout = self.loss_layer.backward(dout) for layer in reversed(self.layers): dout = layer.backward(dout) return dout
class SimpleCBOW: def __init__(self, vocab_size, hidden_size): W_in = 0.01 * np.random.randn(vocab_size, hidden_size).astype('f') W_out = 0.01 * np.random.randn(hidden_size, vocab_size).astype('f') self.input_layor0 = MatMul(W_in) self.input_layer1 = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer = SoftmaxWithLoss() layers = [self.input_layor0, self.input_layer1, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads self.word_vecs = W_in def forward(self, contexts, target): h0 = self.input_layer0.forward(contexts[:, 0, :]) h1 = self.input_layer1.forward(contexts[:, 1, :]) h = (h0 + h1) / 2 score = self.out_layer.forward(h) loss = self.loss_layer.forward(score, target) return loss def backword(self, dout=1): ds = self.loss_layer.backward(dout) da = self.out_layer.backword(ds) da /= 2 self.input_layer0.backward(da) self.input_layer1.backword(da) return None
class SimpleSkipGram: def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') self.in_layer = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer0 = SoftmaxWithLoss() self.loss_layer1 = SoftmaxWithLoss() layers = [self.in_layer, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads self.word_vecs = W_in def forward(self, contexts, target): h = self.in_layer.forward(target) s = self.out_layer.forward(h) l1 = self.loss_layer0.forward(s, contexts[:, 0]) l2 = self.loss_layer1.forward(s, contexts[:, 1]) loss = l1 + l2 return loss def backward(self, dout=1): dl0 = self.loss_layer1.backward(dout) dl1 = self.loss_layer0.backward(dout) ds = dl0 + dl1 dh = self.out_layer.backward(ds) self.in_layer.backward(dh) return None
class TwoLayerNet: def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01): self.params = {'W1': weight_init_std * np.random.randn(input_size, hidden_size), 'b1': np.zeros(hidden_size), 'W2': weight_init_std * np.random.randn(hidden_size, output_size), 'b2': np.zeros(output_size)} self.layers = OrderedDict() self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']) self.layers['Relu1'] = Relu() self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) self.lastLayer = SoftmaxWithLoss() def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x def loss(self, x, t): y = self.predict(x) return self.lastLayer.forward(y, t) def accuracy(self, x, t): y = self.predict(x) y = np.argmax(y, axis=1) t = np.argmax(t, axis=1) return np.sum(y == t) / float(x.shape[0]) def numerical_gradient(self, x, t): def loss_w(w): return self.loss(x, t) return {'W1': numerical_gradient(loss_w, self.params['W1']), 'b1': numerical_gradient(loss_w, self.params['b1']), 'W2': numerical_gradient(loss_w, self.params['W2']), 'b2': numerical_gradient(loss_w, self.params['b2'])} def gradient(self, x, t): self.loss(x, t) dout = 1 dout = self.lastLayer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) return { 'W1': self.layers['Affine1'].dW, 'b1': self.layers['Affine1'].db, 'W2': self.layers['Affine2'].dW, 'b2': self.layers['Affine2'].db }
class SimpleCBOW: def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') self.in_layer0 = MatMul(W_in) self.in_layer1 = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer = SoftmaxWithLoss() layers = [self.in_layer0, self.in_layer1, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads self.word_vecs = W_in def forward(self, contexts, target): h0 = self.in_layer0.forward(contexts[:, 0]) h1 = self.in_layer1.forward(contexts[:, 1]) h = (h0 + h1) * 0.5 score = self.out_layer.forward(h) loss = self.loss_layer.forward(score, target) return loss def backward(self, dout=1): ds = self.loss_layer.backward(dout) da = self.out_layer.backward(ds) # distribute diff to h0/h1 equally da *= 0.5 self.in_layer1.backward(da) self.in_layer0.backward(da) return None
class SimpleCBOW: def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size # 重みの初期化 W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') # レイヤの生成 self.in_layer0 = MatMul(W_in) self.in_layer1 = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer = SoftmaxWithLoss() # すべての重みと勾配をリストにまとめる layers = [self.in_layer0, self.in_layer1, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # メンバ変数に単語の分散表現を設定 self.word_vecs = W_in def forward(self, contexts, target): h0 = self.in_layer0.forward(contexts[:, 0]) h1 = self.in_layer1.forward(contexts[:, 1]) h = (h0 + h1) * 0.5 score = self.out_layer.forward(h) loss = self.loss_layer.forward(score, target) return loss def backward(self, dout=1): ds = self.loss_layer.backward(dout) da = self.out_layer.backward(ds) da *= 0.5 self.in_layer1.backward(da) self.in_layer0.backward(da) return None
class TwoLayerNet: """ """ def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01): self.params = {} self.params['W1'] = weight_init_std * np.random.randn( input_size, hidden_size) self.params['b1'] = np.zeros(hidden_size) self.params['W2'] = weight_init_std * np.random.randn( hidden_size, output_size) self.params['b2'] = np.zeros(output_size) self.layers = OrderedDict() self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']) self.layers['Relu'] = Relu() self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) self.lastLayer = SoftmaxWithLoss() def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x def loss(self, x, t): y = self.predict(x) return self.lastLayer.forward(y, t) def accuracy(self, x, t): y = self.predict(x) y = np.argmax(y, axis=1) if t.ndim != 1: t = np.argmax(t, axis=1) accuracy = np.sum(y == t) / float(x.shape[0]) return accuracy def numerical_gradient(self, x, t): loss_W = lambda W: self.loss(x, t) grads = {} grads['W1'] = numerical_gradient(loss_W, self.params['W1']) grads['b1'] = numerical_gradient(loss_W, self.params['b1']) grads['W2'] = numerical_gradient(loss_W, self.params['W2']) grads['b2'] = numerical_gradient(loss_W, self.params['b2']) return grads def gradient(self, x, t): self.loss(x, t) dout = 1 dout = self.lastLayer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) grads = {} grads['W1'] = self.layers['Affine1'].dW grads['b1'] = self.layers['Affine1'].db grads['W2'] = self.layers['Affine2'].dW grads['b2'] = self.layers['Affine2'].db return grads
class SimpleCNN: """ """ def __init__(self, input_dim=(1, 28, 28), conv_param={ 'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1 }, hidden_size=100, output_size=10, weight_init_std=0.01): filter_num = conv_param['filter_num'] filter_size = conv_param['filter_size'] filter_pad = conv_param['pad'] filter_stride = conv_param['stride'] input_size = input_dim[1] # conv_output_size: 24 conv_output_size = (input_size - filter_size + 2 * filter_pad) / filter_stride + 1 # pool_output_size: 30 * 12 * 12 = 4320 pool_output_size = int(filter_num * (conv_output_size / 2) * (conv_output_size / 2)) self.params = {} # W1.shape: (30, 1, 5, 5) self.params['W1'] = weight_init_std * \ np.random.randn(filter_num, input_dim[0], filter_size, filter_size) # b1.shape: (30) self.params['b1'] = np.zeros(filter_num) # output of convolusion: (N, 30, 24, 24) # W2.shape: (4320, 100) self.params['W2'] = weight_init_std * \ np.random.randn(pool_output_size, hidden_size) # b2.shape: (100) self.params['b2'] = np.zeros(hidden_size) # output of Affine: self.params['W3'] = weight_init_std * \ np.random.randn(hidden_size, output_size) self.params['b3'] = np.zeros(output_size) self.layers = OrderedDict() self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'], filter_stride, filter_pad) self.layers['Relu1'] = Relu() self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2) self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2']) self.layers['Relu2'] = Relu() self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3']) self.last_layer = SoftmaxWithLoss() def predict(self, x): for key, layer in self.layers.items(): x = layer.forward(x) return x def loss(self, x, t): y = self.predict(x) return self.last_layer.forward(y, t) def gradient(self, x, t): # forward self.loss(x, t) dout = 1 dout = self.last_layer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) grads = {} grads['W1'] = self.layers['Conv1'].dW grads['b1'] = self.layers['Conv1'].db grads['W2'] = self.layers['Affine1'].dW grads['b2'] = self.layers['Affine1'].db grads['W3'] = self.layers['Affine2'].dW grads['b3'] = self.layers['Affine2'].db return grads def accuracy(self, x, t, batch_size=100): # one-hot-vector の場合、正解ラベルのインデックスに変換 if t.ndim != 1: t = np.argmax(t, axis=1) acc = 0.0 for i in range(int(x.shape[0] / batch_size)): tx = x[i * batch_size:(i + 1) * batch_size] tt = t[i * batch_size:(i + 1) * batch_size] y = self.predict(tx) y = np.argmax(y, axis=1) acc += np.sum(y == tt) return acc / x.shape[0] def save_params(self, file_name="params.pkl"): params = {} for key, val in self.params.items(): params[key] = val with open(file_name, 'wb') as f: pickle.dump(params, f) def load_params(self, file_name='params.pkl'): with open(file_name, 'rb') as f: params = pickle.load(f) for key, val in params.items(): self.params[key] = val for i, key in enumerate(['Conv1', 'Affine1', 'Affine2']): self.layers[key].W = self.params['W' + str(i + 1)] self.layers[key].b = self.params['b' + str(i + 1)]
class TwoLayer(object): ''' >>> from common.function import softmax >>> n = TwoLayer(2, 10, 3) >>> output = softmax(n.predict(np.array([[1, 2]]))) >>> abs(np.sum(output) - 1.0) < 0.0001 True >>> output = softmax(n.predict(np.array([[1, 2], [3, 4]]))) >>> np.all(abs(np.sum(output, axis=1) - 1.0) < 0.0001) True ''' def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01): self.params = {} self.params['w1'] = weight_init_std * np.random.randn( input_size, hidden_size) self.params['b1'] = np.zeros(hidden_size) self.params['w2'] = weight_init_std * np.random.randn( hidden_size, output_size) self.params['b2'] = np.zeros(output_size) self.layers = OrderedDict() self.layers['Affine1'] = Affine(self.params['w1'], self.params['b1']) self.layers['Relu'] = Relu() self.layers['Affine2'] = Affine(self.params['w2'], self.params['b2']) self.output_layer = SoftmaxWithLoss() def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x def loss(self, x, t): y = self.predict(x) return self.output_layer.forward(y, t) def accuracy(self, x, t): predicted_label = self.predict(x).argmax(axis=1) if t.ndim != 1: test_label = t.argmax(axis=1) else: test_label = t return float(np.sum(predicted_label == test_label)) / x.shape[0] def numerical_gradient(self, x, t): lost_func = lambda w: self.loss(x, t) grads = {} for k in self.params: grads[k] = numerical_grad(lost_func, self.params[k]) return grads def grad(self, x, t): self.loss(x, t) dout = self.output_layer.backward() for layer in reversed(self.layers.values()): dout = layer.backward(dout) grads = {} grads['w1'] = self.layers['Affine1'].dw grads['b1'] = self.layers['Affine1'].db grads['w2'] = self.layers['Affine2'].dw grads['b2'] = self.layers['Affine2'].db return grads
class SimpleCNN(object): ''' Structure: conv->relu->pooling(max)->affine->relu->affine->softmax_with_loss ''' def __init__(self, input_dim=(1, 10, 10), conv_param={ 'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1 }, hidden_size=10, output_size=10, weight_init_std=0.01): filter_num = conv_param['filter_num'] filter_size = conv_param['filter_size'] pad = conv_param['pad'] stride = conv_param['stride'] input_size = input_dim[1] conv_output_size = 1 + (input_size - filter_size + 2 * pad) // stride pool_output_size = int(filter_num * (conv_output_size / 2) * (conv_output_size / 2)) self.params = {} self.params['w1'] = weight_init_std * np.random.randn( filter_num, input_dim[0], filter_size, filter_size) self.params['b1'] = np.zeros(filter_num) self.params['w2'] = weight_init_std * np.random.randn( pool_output_size, hidden_size) self.params['b2'] = np.zeros(hidden_size) self.params['w3'] = weight_init_std * np.random.randn( hidden_size, output_size) self.params['b3'] = np.zeros(output_size) self.layers = OrderedDict() self.layers['Conv1'] = Convolution(self.params['w1'], self.params['b1'], conv_param['stride'], conv_param['pad']) self.layers['Relu1'] = Relu() self.layers['Pool1'] = Pooling(2, 2, stride=2) self.layers['Affine1'] = Affine(self.params['w2'], self.params['b2']) self.layers['Relu2'] = Relu() self.layers['Affine2'] = Affine(self.params['w3'], self.params['b3']) self.output_layer = SoftmaxWithLoss() def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x def loss(self, x, t): y = self.predict(x) return self.output_layer.forward(y, t) def accuracy(self, x, t): predicted_label = self.predict(x).argmax(axis=1) if t.ndim != 1: test_label = t.argmax(axis=1) else: test_label = t return float(np.sum(predicted_label == test_label)) / x.shape[0] def numerical_gradient(self, x, t): lost_func = lambda w: self.loss(x, t) grads = {} for k in self.params: grads[k] = numerical_grad(lost_func, self.params[k]) return grads def grad(self, x, t): self.loss(x, t) dout = self.output_layer.backward() for layer in reversed(self.layers.values()): dout = layer.backward(dout) grads = {} grads['w1'] = self.layers['Conv1'].dw grads['b1'] = self.layers['Conv1'].db grads['w2'] = self.layers['Affine1'].dw grads['b2'] = self.layers['Affine1'].db grads['w3'] = self.layers['Affine2'].dw grads['b3'] = self.layers['Affine2'].db return grads
class SimpleConvNet: def __init__(self, input_dim=(1, 28, 28), conv_param={'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1}, hidden_size=100, output_size=10, weight_init_std=0.01): filter_num = conv_param['filter_num'] filter_size = conv_param['filter_size'] filter_pad = conv_param['pad'] filter_stride = conv_param['stride'] input_size = input_dim[1] conv_output_size = (input_size - filter_size + 2 * filter_pad) / filter_stride + 1 pool_output_size = int( filter_num * (conv_output_size/2) * (conv_output_size/2)) self.params = { 'W1': weight_init_std * np.random.randn(filter_num, input_dim[0], filter_size, filter_size), 'b1': np.zeros(filter_num), 'W2': weight_init_std * np.random.randn(pool_output_size, hidden_size), 'b2': np.zeros(hidden_size), 'W3': weight_init_std * np.random.randn(hidden_size, output_size), 'b3': np.zeros(output_size) } self.layers = OrderedDict() self.layers['Conv1'] = Convolution( self.params['W1'], self.params['b1'], conv_param['stride'], conv_param['pad']) self.layers['Relu'] = Relu() self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2) self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2']) self.layers['Relu2'] = Relu() self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3']) self.last_layer = SoftmaxWithLoss() def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x def loss(self, x, t): y = self.predict(x) return self.last_layer.forward(y, t) def gradient(self, x, t): self.loss(x, t) dout = 1 dout = self.last_layer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) grads = { 'W1': self.layers['Conv1'].dW, 'b1': self.layers['Conv1'].db, 'W2': self.layers['Affine1'].dW, 'b2': self.layers['Affine1'].db, 'W3': self.layers['Affine2'].dW, 'b3': self.layers['Affine2'].db, } return grads def save_params(self, file_name="params.pkl"): params = {} for key, val in self.params.items(): params[key] = val with open(file_name, 'wb') as f: pickle.dump(params, f) def load_params(self, file_name="params.pkl"): with open(file_name, 'rb') as f: params = pickle.load(f) for key, val in params.items(): self.params[key] = val for i, key in enumerate(['Conv1', 'Affine1', 'Affine2']): self.layers[key].W = self.params['W' + str(i+1)] self.layers[key].b = self.params['b' + str(i+1)] def accuracy(self, x, t, batch_size=100): if t.ndim != 1: t = np.argmax(t, axis=1) acc = 0.0 for i in range(int(x.shape[0] / batch_size)): tx = x[i*batch_size:(i+1)*batch_size] tt = t[i*batch_size:(i+1)*batch_size] y = self.predict(tx) y = np.argmax(y, axis=1) acc += np.sum(y == tt) return acc / x.shape[0]