def __init__(self, vocab_size, wordvec_size, hidden_size): V, D, H = vocab_size, wordvec_size, hidden_size rn = np.random.randn embed_W = (rn(V, D) / 100).astype('f') lstm_Wx = (rn(D, 4 * H) / np.sqrt(D)).astype('f') lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f') lstm_b = np.zeros(4 * H).astype('f') affine_W = (rn(H, V) / np.sqrt(H)).astype('f') affine_b = np.zeros(V).astype('f') self.embed = TimeEmbedding(embed_W) self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=True) self.affine = TimeAffine(affine_W, affine_b) self.params, self.grads = [], [] for layer in (self.embed, self.lstm, self.affine): self.params += layer.params self.grads += layer.grads
def forward(self, xs): Wx, Wh, b = self.params N, T, D = xs.shape H = Wh.shape[0] self.layers = [] hs = np.empty((N, T, H), dtype='f') if not self.stateful or self.h is None: self.h = np.zeros((N, H), dtype='f') if not self.stateful or self.c is None: self.c = np.zeros((N, H), dtype='f') for t in range(T): layer = LSTM(*self.params) self.h, self.c = layer.forward(xs[:, t, :], self.h, self.c) hs[:, t, :] = self.h self.layers.append(layer) return hs
def __init__(self, vocab_size, wordvec_size, hidden_size): V, D, H = vocab_size, wordvec_size, hidden_size H = H * 2 # 順方向と逆方向が足し合わされた行列を受け取るため rn = np.random.randn embed_W = (rn(V, D) / 100).astype('f') lstm_Wx = (rn(D, 4 * H) / np.sqrt(D)).astype('f') lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f') lstm_b = np.zeros(4 * H).astype('f') affine_W = (rn(2 * H, V) / np.sqrt(H)).astype('f') affine_b = np.zeros(V).astype('f') self.embed = TimeEmbedding(embed_W) self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=True) self.attention = TimeAttention() self.affine = TimeAffine(affine_W, affine_b) layers = [self.embed, self.lstm, self.attention, self.affine] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads
def get_negative_sample(self, target): batch_size = target.shape[0] if not GPU: negative_sample = np.zeros((batch_size, self.sample_size), dtype=np.int32) for i in range(batch_size): p = self.word_p.copy() target_idx = target[i] p[target_idx] = 0 p /= p.sum() negative_sample[i,:] = np.random.choice(self.vocab_size, size=self.sample_size, replace=False, p=p) else: negative_sample = np.random.choice(self.vocab_size, size=(batch_size, self.sample_size), replace=True, p=self.word_p) return negative_sample
def __init__(self, vocab_size, wordvec_size, hidden_size): V, D, H = vocab_size, wordvec_size, hidden_size rn = np.random.randn embed_W = (rn(V, D) / 100).astype('f') lstm_Wx = (rn(D, 4 * H) / np.sqrt(D)).astype('f') lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f') lstm_b = np.zeros(4 * H).astype('f') self.embed = TimeEmbedding(embed_W) self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=False) self.params = self.embed.params + self.lstm.params self.grads = self.embed.grads + self.lstm.grads self.hs = None
def forward(self, xs): N, T, _ = xs.shape H, _ = self.Wh.shape self.layers = [] hs = np.empty((N, T, H), dtype='f') if not self.stateful or self.h is None: self.h = np.zeros((N, H), dtype='f') for t in range(T): layer = GRU(self.Wx, self.Wh) self.h = layer.forward(xs[:, t, :], self.h) hs[:, t, :] = self.h self.layers.append(layer) return hs
def forward(self, h, target): batch_size = target.shape[0] negative_sample = self.sampler.get_negative_sample(target) # 正例のフォワード score = self.embed_dot_layers[0].forward(h, target) correct_label = np.ones(batch_size, dtype=np.int32) loss = self.loss_layers[0].forward(score, correct_label) # 負例のフォワード negative_label = np.zeros(batch_size, dtype=np.int32) for i in range(self.sample_size): negative_target = negative_sample[:, i] score = self.embed_dot_layers[1 + i].forward(h, negative_target) loss += self.loss_layers[1 + i].forward(score, negative_label) return loss
def forward(self, xs): Wx, _, _ = self.params N, T, _ = xs.shape _, H = Wx.shape self.layers = [] hs = np.empty((N, T, H), dtype='f') if not self.stateful or self.h is None: self.h = np.zeros((N, H), dtype='f') for t in range(T): layer = RNN(*self.params) self.h = layer.forward(xs[:, t, :], self.h) hs[:, t, :] = self.h self.layers.append(layer) return hs
def __init__(self, corpus, power, sample_size): self.sample_size = sample_size self.vocab_size = None self.word_p = None counts = Counter() for word_id in corpus: counts[word_id] += 1 vocab_size = len(counts) self.vocab_size = vocab_size self.word_p = np.zeros(vocab_size) for i in range(vocab_size): self.word_p[i] = counts[i] self.word_p = np.power(self.word_p, power) self.word_p /= np.sum(self.word_p)