class SkipGram: def __init__(self, vocab_size, hidden_size, window_size, corpus): # おもみ w_in = 0.01 * np.random.randn(vocab_size, hidden_size).astype('f') w_out = 0.01 * np.random.randn(vocab_size, hidden_size).astype('f') # layers self.embed_layer = Embedding(w_in) self.ns_loss_layers = [ NegativeSamplingLoss(w_out, corpus) for _ in range(2 * window_size) ] # おもみ, 勾配まとめ layers = [self.embed_layer] + self.ns_loss_layers self.params, self.grads = [], [] for l in layers: self.params += l.params self.grads += l.grads # 単語の分散表現 self.word_vecs = w_in def forward(self, contexts, target): h = self.embed_layer.forward(target) loss = sum([ l.forward(h, contexts[:, i]) for i, l in enumerate(self.ns_loss_layers) ]) return loss def backward(self, dl=1): dh = sum([l.backward(dl) for i, l in enumerate(self.ns_loss_layers)]) self.embed_layer.backward(dh)
class EmbeddingDot: def __init__(self, w): self.embed = Embedding(w) self.params = self.embed.params self.grads = self.embed.grads self.cache = None def forward(self, h, idx): w_idx = self.embed.forward(idx) s = np.sum(h * w_idx, axis=1) self.cache = (h, w_idx) return s def backward(self, ds): ds = ds.reshape(ds.shape[0], 1) # ??? h, w_idx = self.cache dw_idx = ds * h self.embed.backward(dw_idx) dh = ds * w_idx return dh
class EmbeddingDot: def __init__(self, W: np.ndarray) -> None: self.embed = Embedding(W) self.params = self.embed.params self.grads = self.embed.grads self.cache = None def forward(self, h: np.ndarray, idx: List[int]): target_W = self.embed.forward(idx) out = np.sum(target_W * h, axis=1) self.cache = (h, target_W) return out def backward(self, dout: np.ndarray) -> np.ndarray: h, target_W = self.cache dout = dout.reshape(dout.shape[0], 1) dtarget_W = dout * h self.embed.backward(dtarget_W) dh = dout * target_W return dh