class RNN: def __init__(self, wx, wh, b): self.matmul_x = MatMul(wx) self.matmul_h = MatMul(wh) self.params = self.matmul_x.params + self.matmul_h.params + [b] self.grads = self.matmul_x.grads + self.matmul_h.grads + [ np.zeros_like(b) ] self.h = None def forward(self, x, h_prev): b = self.params[2] t = self.matmul_h.forward(h_prev) + self.matmul_x.forward(x) + b h = np.tanh(t) self.h = h return h def backward(self, dh): dt = dh * (1 - self.h**2) db = np.sum(dt, axis=0) dh_prev = self.matmul_h.backward(dt) dx = self.matmul_x.backward(dt) self.grads[2][...] = db return dx, dh_prev
class SimpleCBOW: def __init__(self, vocab_size, hidden_size): V, H = vocab_size, hidden_size W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') self.in_layer0 = Matmul(W_in) self.in_layer1 = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer = SoftmaxWithLoss() layers = [self.in_layer0, self.in_layer1, self.out_layer] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads self.word_vecs = W_in def forward(self, contexts, target): h0 = self.in_layer0.forward(contexts[:, 0]) h1 = self.in_layer1.forward(contexts[:, 1]) h = (h0 + h1) * 0.5 score = self.out_layer.forward(h) loss = self.loss_layer.forward(score, target) return loss def backward(self, dout=1): ds = self.loss_layer.backward(dout) da = self.out_layer.backward(ds) da *= 0.5 self.in_layer1.backward(da) self.in_layer0.backward(da) return None
class SimpleSkipGram: def __init__(self, vocab_size, hidden_size): # おもみ w_in = 0.01 * np.random.randn(vocab_size, hidden_size).astype('f') w_out = 0.01 * np.random.randn(hidden_size, vocab_size).astype('f') # layers self.in_layer = MatMul(w_in) self.out_layer = MatMul(w_out) # おもみ、勾配 まとめ layers = [self.in_layer, self.out_layer] self.params, self.grads = [], [] for l in layers: self.params += l.params self.grads += l.grads # loss self.loss_layer0 = SoftmaxWithLoss() self.loss_layer1 = SoftmaxWithLoss() # 極限でコンテキストの各単語の確率 0.5。 # softmax->*2->lossでも一律ln2引くようだから同じたぶん。 # 単語の分散表現 self.word_vecs = w_in def forward(self, contexts, target): h = self.in_layer.forward(target) s = self.out_layer.forward(h) l0 = self.loss_layer0.forward(s, contexts[:, 0]) l1 = self.loss_layer1.forward(s, contexts[:, 1]) loss = l0 + l1 return loss def backward(self, dl=1): ds0 = self.loss_layer0.backward(dl) ds1 = self.loss_layer1.backward(dl) ds = ds0 + ds1 dh = self.out_layer.backward(ds) self.in_layer.backward(dh)
class SimpleCBOW: def __init__(self, vocab_size, hidden_size): # おもみ w_in = 0.01 * np.random.randn(vocab_size, hidden_size).astype('f') w_out = 0.01 * np.random.randn(hidden_size, vocab_size).astype('f') # layers self.in_layer0 = MatMul(w_in) self.in_layer1 = MatMul(w_in) self.out_layer = MatMul(w_out) # おもみ、勾配 まとめ layers = [self.in_layer0, self.in_layer1, self.out_layer] self.params, self.grads = [], [] for l in layers: self.params += l.params self.grads += l.grads # loss self.loss_layer = SoftmaxWithLoss() # 単語の分散表現 self.word_vecs = w_in def forward(self, contexts, target): h0 = self.in_layer0.forward(contexts[:, 0]) h1 = self.in_layer1.forward(contexts[:, 1]) h = (h0 + h1) * 0.5 score = self.out_layer.forward(h) loss = self.loss_layer.forward(score, target) return loss def backward(self, dl=1): ds = self.loss_layer.backward(dl) da = self.out_layer.backward(ds) da *= 0.5 self.in_layer0.backward(da) self.in_layer1.backward(da)
class SimpleCBOW: def __init__(self, vocab_size: int, hidden_size: int) -> None: W_in = 0.01 * np.random.randn(vocab_size, hidden_size).astype(float) W_out = 0.01 * np.random.randn(hidden_size, vocab_size).astype(float) self.in_layer0 = MatMul(W_in) self.in_layer1 = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer = SoftmaxWithLoss() layers = [ self.in_layer0, self.in_layer1, self.out_layer, self.loss_layer ] self.params = [] self.grads = [] for layer in layers: self.params += layer.params self.grads += layer.grads self.word_vecs = W_in def forward(self, contexts: np.ndarray, target: np.ndarray) -> float: h0 = self.in_layer0.forward(contexts[:, 0]) h1 = self.in_layer1.forward(contexts[:, 1]) h = (h0 + h1) * 0.5 score = self.out_layer.forward(h) loss = self.loss_layer.forward(score, target) return loss def backward(self, dout: int = 1) -> None: ds = self.loss_layer.backward(dout) da = self.out_layer.backward(ds) da *= 0.5 self.in_layer1.backward(da) self.in_layer0.backward(da) return None