def __init__(self, vocab_size=10000, wordvec_size=100, hidden_size=100): V, D, H = vocab_size, wordvec_size, hidden_size rn = np.random.randn # 重みの初期化 embed_W = (rn(V, D) / 100).astype('f') lstm_Wx = (rn(D, 4 * H) / np.sqrt(D)).astype('f') lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f') lstm_b = np.zeros(4 * H).astype('f') affine_W = (rn(H, V) / np.sqrt(H)).astype('f') affine_b = np.zeros(V).astype('f') # レイヤの生成 self.layers = [ TimeEmbedding(embed_W), TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=True), TimeAffine(affine_W, affine_b) ] self.loss_layer = TimeSoftmaxWithLoss() self.lstm_layer = self.layers[1] # すべての重みと勾配をリストにまとめる self.params, self.grads = [], [] for layer in self.layers: self.params += layer.params self.grads += layer.grads
def normalize(x): if x.ndim == 2: s = np.sqrt((x * x).sum(axis=1)) x /= s.reshape((s.shape[0], 1)) elif x.ndim == 1: s = np.sqrt((x * x).sum()) x /= s return x
def cos_similarity(x, y, eps=1e-8): '''コサイン類似度の算出 :param x: ベクトル :param y: ベクトル :param eps: ”0割り”防止のための微小値 :return: ''' nx = x / (np.sqrt(np.sum(x**2)) + eps) ny = y / (np.sqrt(np.sum(y**2)) + eps) return np.dot(nx, ny)
def __init__(self, vocab_size, wordvec_size, hidden_size): V, D, H = vocab_size, wordvec_size, hidden_size rn = np.random.randn embed_W = (rn(V, D) / 100).astype('f') lstm_Wx = (rn(D, 4 * H) / np.sqrt(D)).astype('f') lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f') lstm_b = np.zeros(4 * H).astype('f') self.embed = TimeEmbedding(embed_W) self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=False) self.params = self.embed.params + self.lstm.params self.grads = self.embed.grads + self.lstm.grads self.hs = None
def __init__(self, vocab_size, wordvec_size, hidden_size): V, D, H = vocab_size, wordvec_size, hidden_size rn = np.random.randn embed_W = (rn(V, D) / 100).astype('f') lstm_Wx = (rn(D, 4 * H) / np.sqrt(D)).astype('f') lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f') lstm_b = np.zeros(4 * H).astype('f') affine_W = (rn(H, V) / np.sqrt(H)).astype('f') affine_b = np.zeros(V).astype('f') self.embed = TimeEmbedding(embed_W) self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=True) self.affine = TimeAffine(affine_W, affine_b) self.params, self.grads = [], [] for layer in (self.embed, self.lstm, self.affine): self.params += layer.params self.grads += layer.grads
def clip_grads(grads, max_norm): total_norm = 0 for grad in grads: total_norm += np.sum(grad**2) total_norm = np.sqrt(total_norm) rate = max_norm / (total_norm + 1e-6) if rate < 1: for grad in grads: grad *= rate
def update(self, params, grads): if self.m is None: self.m, self.v = [], [] for param in params: self.m.append(np.zeros_like(param)) self.v.append(np.zeros_like(param)) self.iter += 1 for i in range(len(params)): self.m[i] = self.beta1 * self.m[i] + (1 - self.beta1) * grads[i] self.v[i] = self.beta2 * self.v[i] + (1 - self.beta2) * (grads[i]** 2) m_i_hat = self.m[i] / (1 - self.beta1**self.iter) v_i_hat = self.v[i] / (1 - self.beta2**self.iter) params[i] -= self.lr * m_i_hat / (np.sqrt(v_i_hat) + 1e-8)