class TwoLayersNet: def __init__(self, input_size, hidden_size, output_size): I, H, O = input_size, hidden_size, output_size w1 = np.random.randn(I, H) * 0.01 b1 = np.zeros(H) #np.random.randn(H) w2 = np.random.randn(H, O) * 0.01 b2 = np.zeros(O) #np.random.randn(O) self.layers = [Affine(w1, b1), Sigmoid(), Affine(w2, b2)] self.loss_layer = SoftmaxWithLoss() self.params, self.grads = [], [] for l in self.layers: self.params += l.params self.grads += l.grads # 勾配まとめはこのときだけ。-> 各layerの勾配更新は参照場所を動かさないようにする。 def predict(self, x): for l in self.layers: x = l.forward(x) return x def forward(self, x, t): score = self.predict(x) #print('t:', t) # test #print('score:', score) # test loss = self.loss_layer.forward(score, t) return loss def backward(self, dl): # dl=1): dl = self.loss_layer.backward(dl) for l in self.layers[::-1]: dl = l.backward(dl)
class TwoLayerNet: def __init__(self, input_size: int, hidden_size: int, output_size: int): W1 = 0.01 * np.random.randn(input_size, hidden_size) b1 = np.zeros(hidden_size) W2 = 0.01 * np.random.randn(hidden_size, output_size) b2 = np.zeros(output_size) self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)] self.loss_layer = SoftmaxWithLoss() self.params, self.grads = [], [] for layer in self.layers: self.params += layer.params self.grads += layer.grads def predict(self, x: np.ndarray) -> np.ndarray: for layer in self.layers: x = layer.forward(x) return x def forward(self, x: np.ndarray, t: np.ndarray) -> np.ndarray: score = self.predict(x) loss = self.loss_layer.forward(score, t) return loss def backward(self, dout: int = 1) -> np.ndarray: dout = self.loss_layer.backward(dout) for layer in reversed(self.layers): dout = layer.backward(dout) return dout
class SimpleCBOW: def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') self.in_layer0 = Matmul(W_in) self.in_layer1 = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer = SoftmaxWithLoss() layers = [self.in_layer0, self.in_layer1, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads self.word_vecs = W_in def forward(self, contexts, target): h0 = self.in_layer0.forward(contexts[:, 0]) h1 = self.in_layer1.forward(contexts[:, 1]) h = (h0 + h1) * 0.5 score = self.out_layer.forward(h) loss = self.loss_layer.forward(score, target) return loss def backward(self, dout=1): ds = self.loss_layer.backward(dout) da = self.out_layer.backward(ds) da *= 0.5 self.in_layer1.backward(da) self.in_layer0.backward(da) return None
class TwoLayerNet: def __init__(self, input_size, hidden_size, output_size): I, H, O = input_size, hidden_size, output_size # 重みとバイアスの初期化 W1 = 0.01 * np.random.randn(I, H) b1 = np.random.randn(H) W2 = 0.01 * np.random.randn(H, O) b2 = np.random.randn(O) # レイヤの生成 self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)] self.loss_layer = SoftmaxWithLoss() # すべての重みをリストにまとめる self.params, self.grads = [], [] for layer in self.layers: self.params += layer.params self.grads += layer.grads def predict(self, x): for layer in self.layers: x = layer.forward(x) return x def forward(self, x, t): score = self.predict(x) loss = self.loss_layer.forward(score.t) return loss def backward(self, dout=1): dout = self.loss_layer.backward(dout) for layer in reversed(self.layers): dout = layer.backward(dout) return dout
def __init__(self, input_size, hidden_size_list, output_size, activation='relu', weight_init_std='relu', weight_decay_lambda=0): self.input_size = input_size self.hidden_size_list = hidden_size_list self.hidden_layer_num = len(hidden_size_list) self.weight_decay_lambda = weight_decay_lambda self.params = {} # Initialize weights self.__init_weight(weight_init_std) # Generate layers activation_layer = {'sigmoid': Sigmoid, 'relu': ReLU} self.layers = OrderedDict() for idx in range(1, self.hidden_layer_num + 1): self.layers['Affine' + str(idx)] = Affine( self.params['W' + str(idx)], self.params['b', str(idx)]) self.layers['Activation_function' + str(idx)] = \ activation_layer[activation]() idx = self.hidden_layer_num + 1 self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)]) self.last_layer = SoftmaxWithLoss()
def _make_layers(self): self.layers = [] for i in range(self.layer_num): self.layers.append(Affine(self.weights[i], self.bias[i])) if i == self.layer_num - 1: pass else: self.layers.append(Relu()) self.lastLayer = SoftmaxWithLoss()
def _init_layers(self): self.layers = OrderedDict() all_layers = [self.input_size ] + self.hidden_size_list + [self.output_size] activation_dict = {'relu': Relu, 'sigmoid': Sigmoid} for i in range(1, len(self.hidden_size_list) + 2): self.layers['Affine%d' % i] = Affine(self.params['W%d' % i], self.params['b%d' % i]) self.layers['Activation%d' % i] = activation_dict[self.activation]() self.last_layers = SoftmaxWithLoss()
def __init__(self, input_size: int, hidden_size: int, output_size: int): W1 = 0.01 * np.random.randn(input_size, hidden_size) b1 = np.zeros(hidden_size) W2 = 0.01 * np.random.randn(hidden_size, output_size) b2 = np.zeros(output_size) self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)] self.loss_layer = SoftmaxWithLoss() self.params, self.grads = [], [] for layer in self.layers: self.params += layer.params self.grads += layer.grads
def __init__(self, input_dim=(1, 28, 28), # (C, W, H) filter_num=30, filter_size=5, filter_pad=0, filter_stride=1, hidden_size=100, output_size=10, weight_init_std=0.01 ): # input(N, C, W, H) # -> Conv(N, FN, conv_out_h, conv_out_w) -> ReLu # -> Pooling(N, FN , pool_out_h, pool_out_w) # -> Affine[flatten行う](N, hidden_layer) -> ReLu # -> Affine(N, output_layer) -> SoftMax # input_sizeは動的に決定(正方形を前提) input_size = input_dim[1] conv_output_size = (input_size + 2 * filter_pad - filter_size) / filter_stride + 1 # FN * pool_out_h * pool_out_w pool_output_size = int(filter_num * (conv_output_size / 2) * (conv_output_size / 2)) self.params = {} # Conv # (input_size, C, W, H) -> (N, FilterNum, out_h, out_w) self.params['W1'] = \ weight_init_std * np.random.randn(filter_num, input_dim[0], filter_size, filter_size) self.params['b1'] = np.zeros(filter_num) # ReLu # Pool # Affine self.params['W2'] = weight_init_std * np.random.randn(pool_output_size, hidden_size) self.params['b2'] = np.zeros(hidden_size) # Relu # Affine self.params['W3'] = weight_init_std * np.random.randn(hidden_size, output_size) self.params['b3'] = np.zeros(output_size) self.layers = OrderedDict() self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'], filter_stride, filter_pad) self.layers['ReLu1'] = ReLu() self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2) self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2']) self.layers['ReLu2'] = ReLu() self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3']) self.last_layer = SoftmaxWithLoss()
class TwoLayersNet(): def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01): self.params = {} self.params['w1'] = np.random.randn(input_size, hidden_size) / weight_init_std self.params['b1'] = np.zeros(hidden_size) self.params['w2'] = np.random.randn(hidden_size, output_size) / weight_init_std self.params['b2'] = np.zeros(output_size) self.layers = OrderedDict() self.layers['Affine1'] = Affine(self.params['w1'], self.params['b1']) self.layers['Relu1'] = Relu() self.layers['Affine2'] = Affine(self.params['w2'], self.params['b2']) self.lastlayer = SoftmaxWithLoss() def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x def loss(self, x, t): y = self.predict(x) return self.lastlayer.forward(y, t) def accuracy(self, x, t): y = self.predict(x) y = np.argmax(y, axis=1) t = np.argmax(t, axis=1) return np.sum(y == t) / y.shape def gradient(self, x, t): dout = 1 self.loss(x, t) dout = self.lastlayer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) grads = {} grads['w1'] = self.layers['Affine1'].dw grads['b1'] = self.layers['Affine1'].db grads['w2'] = self.layers['Affine2'].dw grads['b2'] = self.layers['Affine2'].db return grads
def __init__(self, input_size, hidden_size, output_size): I, H, O = input_size, hidden_size, output_size w1 = np.random.randn(I, H) * 0.01 b1 = np.zeros(H) #np.random.randn(H) w2 = np.random.randn(H, O) * 0.01 b2 = np.zeros(O) #np.random.randn(O) self.layers = [Affine(w1, b1), Sigmoid(), Affine(w2, b2)] self.loss_layer = SoftmaxWithLoss() self.params, self.grads = [], [] for l in self.layers: self.params += l.params self.grads += l.grads # 勾配まとめはこのときだけ。-> 各layerの勾配更新は参照場所を動かさないようにする。
def __init__(self, input_size, hidden_size_list, output_size, activation='relu', weight_init_std='relu', weight_decay_lambda=0, use_dropout=False, dropout_ratio=0.5, use_batchnorm=False): self.input_size = input_size self.output_size = output_size self.hidden_size_list = hidden_size_list self.hidden_layer_num = len(hidden_size_list) self.weight_decay_lambda = weight_decay_lambda self.use_dropout = use_dropout self.use_batchnorm = use_batchnorm self.params = {} # 权重初始化 self.__init_weight(weight_init_std) # 生成层 activation_layer = {'sigmoid': Sigmoid, 'relu': Relu} self.layers = OrderedDict() for idx in range(1, self.hidden_layer_num + 1): self.layers['Affine' + str(idx)] = Affine( self.params['W' + str(idx)], self.params['b' + str(idx)]) if self.use_batchnorm: self.params['gamma' + str(idx)] = np.ones( hidden_size_list[idx - 1]) self.params['beta' + str(idx)] = np.zeros( hidden_size_list[idx - 1]) self.layers['BatchNorm' + str(idx)] = BatchNormalization( self.params['gamma' + str(idx)], self.params['beta' + str(idx)]) self.layers['Activation_function' + str(idx)] = activation_layer[activation]() if self.use_dropout: self.layers['Dropout' + str(idx)] = Dropout(dropout_ratio) idx = self.hidden_layer_num + 1 self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)]) self.last_layer = SoftmaxWithLoss()
def __init__(self, input_size, hidden_size, output_size): I, H, O = input_size, hidden_size, output_size W1 = 0.01 * np.random.randn(I, H) b1 = np.zeros(H) W2 = 0.01 * np.random.randn(H, O) b2 = np.zeros(O) self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)] self.loss_layer = SoftmaxWithLoss() self.params, self.grads = [], [] for layer in self.layer: self.params += layer.params self.grads += layer.grads
def initialize(input_size, hidden_size, output_size, init_weight=0.01, init_params=None): hidden_count = len(hidden_size) if init_params is None: params['w1'] = init_weight * np.random.randn(input_size, hidden_size[0]) params['b1'] = np.zeros(hidden_size[0]) for idx in range(1, hidden_count): params[f'w{idx+1}'] = init_weight * np.random.randn( hidden_size[idx - 1], hidden_size[idx]) params[f'b{idx+1}'] = np.zeros(hidden_size[idx]) params[f'w{hidden_count+1}'] = init_weight * np.random.randn( hidden_size[hidden_count - 1], output_size) params[f'b{hidden_count+1}'] = np.zeros(output_size) else: globals()['params'] = init_params layers.append(Affine(params['w1'], params['b1'])) layers.append(ReLU()) for idx in range(1, hidden_count): layers.append(Affine(params[f'w{idx+1}'], params[f'b{idx+1}'])) layers.append(ReLU()) layers.append( Affine(params[f'w{hidden_count+1}'], params[f'b{hidden_count+1}'])) layers.append(SoftmaxWithLoss())
def __init__(self, hidden_size, weight_init_std=0.01): super().__init__() self.params = dict() self.params['W1'] = weight_init_std * np.random.randn( self.x_train.shape[1], hidden_size) self.params['b1'] = np.zeros(hidden_size) self.params['W2'] = weight_init_std * np.random.randn( hidden_size, self.t_train.shape[1]) self.params['b2'] = np.zeros(self.t_train.shape[1]) # 生成层 self.layers = OrderedDict() self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']) self.layers['Relu'] = Relu() self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) self.last_layer = SoftmaxWithLoss()
def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01): # 初始化权重 self.params = {} self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size) # self.params['W1'] = np.ones((input_size, hidden_size)) self.params['b1'] = np.zeros(hidden_size) self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size) # self.params['W2'] = np.ones((hidden_size, output_size)) self.params['b2'] = np.zeros(output_size) # 生成层 self.layers = OrderedDict() self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']) self.layers['Relu1'] = Relu() self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) self.lastLayer = SoftmaxWithLoss()
def __init__(self, input_dim=(1, 28, 28), conv_param={ 'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1 }, hidden_size=100, output_size=10, weight_init_std=0.01): filter_num = conv_param['filter_num'] filter_size = conv_param['filter_size'] filter_pad = conv_param['pad'] filter_stride = conv_param['stride'] input_size = input_dim[1] conv_output_size = (input_size - filter_size + 2 * filter_pad) / filter_stride + 1 pool_output_size = int(filter_num * (conv_output_size / 2) * (conv_output_size / 2)) # 权重初始化 self.params = {} self.params['W1'] = weight_init_std * \ np.random.randn(filter_num, input_dim[0], filter_size, filter_size) self.params['b1'] = np.zeros(filter_num) self.params['W2'] = weight_init_std * \ np.random.randn(pool_output_size, hidden_size) self.params['b2'] = np.zeros(hidden_size) self.params['W3'] = weight_init_std * \ np.random.randn(hidden_size, output_size) self.params['b3'] = np.zeros(output_size) # 层生成 self.layers = OrderedDict() self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'], conv_param['stride'], conv_param['pad']) self.layers['Relu1'] = Relu() self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2) self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2']) self.layers['Relu2'] = Relu() self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3']) self.last_layer = SoftmaxWithLoss()
def __init__(self, input_size, hidden_size, output_size): I, H, O = input_size, hidden_size, output_size # 重みとバイアスの初期化 W1 = 0.01 * np.random.randn(I, H) b1 = np.random.randn(H) W2 = 0.01 * np.random.randn(H, O) b2 = np.random.randn(O) # レイヤの生成 self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)] self.loss_layer = SoftmaxWithLoss() # すべての重みをリストにまとめる self.params, self.grads = [], [] for layer in self.layers: self.params += layer.params self.grads += layer.grads
def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') self.in_layer0 = Matmul(W_in) self.in_layer1 = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer = SoftmaxWithLoss() layers = [self.in_layer0, self.in_layer1, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads self.word_vecs = W_in
def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01): # Initialize weights. self.params = {} self.params['W1'] = weight_init_std * \ np.random.randn(input_size, hidden_size) self.params['b1'] = np.zeros(hidden_size) self.params['W2'] = weight_init_std * \ np.random.randn(hidden_size, output_size) self.params['b2'] = np.zeros(output_size) # Generate layers. self.layers = OrderedDict() # Ordered dictionary self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']) self.layers['Relu'] = Relu() self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) self.lastLayer = SoftmaxWithLoss()
def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01): self.params = {} self.params['w1'] = np.random.randn(input_size, hidden_size) / weight_init_std self.params['b1'] = np.zeros(hidden_size) self.params['w2'] = np.random.randn(hidden_size, output_size) / weight_init_std self.params['b2'] = np.zeros(output_size) self.layers = OrderedDict() self.layers['Affine1'] = Affine(self.params['w1'], self.params['b1']) self.layers['Relu1'] = Relu() self.layers['Affine2'] = Affine(self.params['w2'], self.params['b2']) self.lastlayer = SoftmaxWithLoss()
def __init__(self, input_size, hidden_size, output_size): I, H, O = input_size, hidden_size, output_size # initialize weight and bias W1 = 0.01 * cp.random.randn(I, H) b1 = cp.zeros(H) W2 = 0.01 * cp.random.randn(H, O) b2 = cp.zeros(O) # create layer self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)] self.loss_layer = SoftmaxWithLoss() # combine all weight and grads into list self.params, self.grads = [], [] for layer in self.layers: self.params += layer.params self.grads += layer.grads
def __init_layer(self): activation_layer = {'sigmoid': Sigmoid, 'relu': Relu} self.layers = OrderedDict() for idx in range(1, self.hidden_layer_num + 1): self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)]) if self.use_batchnorm: self.params['gamma' + str(idx)] = np.ones(self.hidden_size_list[idx - 1]) self.params['beta' + str(idx)] = np.zeros(self.hidden_size_list[idx - 1]) self.layers['BatchNorm' + str(idx)] = BatchNormalization(self.params['gamma' + str(idx)], self.params['beta' + str(idx)]) self.layers['activation_function' + str(idx)] = activation_layer[self.activation]() if self.use_dropout: self.layers['Dropout' + str(idx)] = Dropout(self.dropout_ration) idx += 1 self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)]) self.last_layer = SoftmaxWithLoss()
def initialize(input_size, hidden_size, output_size, init_weight=0.01): params['w1'] = init_weight * np.random.randn(input_size, hidden_size) params['b1'] = np.zeros(hidden_size) params['w2'] = init_weight * np.random.randn(hidden_size, output_size) params['b2'] = np.zeros(output_size) layers.append(Affine(params['w1'], params['b1'])) layers.append(ReLU()) layers.append(Affine(params['w2'], params['b2'])) layers.append(SoftmaxWithLoss())
def __init__(self, vocab_size: int, hidden_size: int) -> None: W_in = 0.01 * np.random.randn(vocab_size, hidden_size).astype(float) W_out = 0.01 * np.random.randn(hidden_size, vocab_size).astype(float) self.in_layer0 = MatMul(W_in) self.in_layer1 = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer = SoftmaxWithLoss() layers = [ self.in_layer0, self.in_layer1, self.out_layer, self.loss_layer ] self.params = [] self.grads = [] for layer in layers: self.params += layer.params self.grads += layer.grads self.word_vecs = W_in
class SimpleSkipGram: def __init__(self, vocab_size, hidden_size): # おもみ w_in = 0.01 * np.random.randn(vocab_size, hidden_size).astype('f') w_out = 0.01 * np.random.randn(hidden_size, vocab_size).astype('f') # layers self.in_layer = MatMul(w_in) self.out_layer = MatMul(w_out) # おもみ、勾配 まとめ layers = [self.in_layer, self.out_layer] self.params, self.grads = [], [] for l in layers: self.params += l.params self.grads += l.grads # loss self.loss_layer0 = SoftmaxWithLoss() self.loss_layer1 = SoftmaxWithLoss() # 極限でコンテキストの各単語の確率 0.5。 # softmax->*2->lossでも一律ln2引くようだから同じたぶん。 # 単語の分散表現 self.word_vecs = w_in def forward(self, contexts, target): h = self.in_layer.forward(target) s = self.out_layer.forward(h) l0 = self.loss_layer0.forward(s, contexts[:, 0]) l1 = self.loss_layer1.forward(s, contexts[:, 1]) loss = l0 + l1 return loss def backward(self, dl=1): ds0 = self.loss_layer0.backward(dl) ds1 = self.loss_layer1.backward(dl) ds = ds0 + ds1 dh = self.out_layer.backward(ds) self.in_layer.backward(dh)
def __init__(self, vocab_size, hidden_size): # おもみ w_in = 0.01 * np.random.randn(vocab_size, hidden_size).astype('f') w_out = 0.01 * np.random.randn(hidden_size, vocab_size).astype('f') # layers self.in_layer0 = MatMul(w_in) self.in_layer1 = MatMul(w_in) self.out_layer = MatMul(w_out) # おもみ、勾配 まとめ layers = [self.in_layer0, self.in_layer1, self.out_layer] self.params, self.grads = [], [] for l in layers: self.params += l.params self.grads += l.grads # loss self.loss_layer = SoftmaxWithLoss() # 単語の分散表現 self.word_vecs = w_in
def initialize(input_size, hidden_size, output_size, init_weight=0.01, init_params=None): if init_params is None: params['w1'] = init_weight * np.random.randn(input_size, hidden_size) params['b1'] = np.zeros(hidden_size) params['w2'] = init_weight * np.random.randn(hidden_size, output_size) params['b2'] = np.zeros(output_size) else: globals()['params'] = init_params layers.append(Affine(params['w1'], params['b1'])) layers.append(ReLU()) layers.append(Affine(params['w2'], params['b2'])) layers.append(SoftmaxWithLoss())
class SimpleCBOW: def __init__(self, vocab_size, hidden_size): # おもみ w_in = 0.01 * np.random.randn(vocab_size, hidden_size).astype('f') w_out = 0.01 * np.random.randn(hidden_size, vocab_size).astype('f') # layers self.in_layer0 = MatMul(w_in) self.in_layer1 = MatMul(w_in) self.out_layer = MatMul(w_out) # おもみ、勾配 まとめ layers = [self.in_layer0, self.in_layer1, self.out_layer] self.params, self.grads = [], [] for l in layers: self.params += l.params self.grads += l.grads # loss self.loss_layer = SoftmaxWithLoss() # 単語の分散表現 self.word_vecs = w_in def forward(self, contexts, target): h0 = self.in_layer0.forward(contexts[:, 0]) h1 = self.in_layer1.forward(contexts[:, 1]) h = (h0 + h1) * 0.5 score = self.out_layer.forward(h) loss = self.loss_layer.forward(score, target) return loss def backward(self, dl=1): ds = self.loss_layer.backward(dl) da = self.out_layer.backward(ds) da *= 0.5 self.in_layer0.backward(da) self.in_layer1.backward(da)
def __init__(self, vocab_size, hidden_size): # おもみ w_in = 0.01 * np.random.randn(vocab_size, hidden_size).astype('f') w_out = 0.01 * np.random.randn(hidden_size, vocab_size).astype('f') # layers self.in_layer = MatMul(w_in) self.out_layer = MatMul(w_out) # おもみ、勾配 まとめ layers = [self.in_layer, self.out_layer] self.params, self.grads = [], [] for l in layers: self.params += l.params self.grads += l.grads # loss self.loss_layer0 = SoftmaxWithLoss() self.loss_layer1 = SoftmaxWithLoss() # 極限でコンテキストの各単語の確率 0.5。 # softmax->*2->lossでも一律ln2引くようだから同じたぶん。 # 単語の分散表現 self.word_vecs = w_in