def cos_similarity(x, y, eps=1e-8): '''コサイン類似度の算出 :param x: ベクトル :param y: ベクトル :param eps: ”0割り”防止のための微小値 :return: ''' nx = x / (np.sqrt(np.sum(x**2)) + eps) ny = y / (np.sqrt(np.sum(y**2)) + eps) return np.dot(nx, ny)
def backward(self, dc): hs, ar = self.cache N, T, H = hs.shape dt = dc.reshape(N, 1, H).repeat(T, axis=1) # sumの逆伝播 dar = dt * hs dhs = dt * ar da = np.sum(dar, axis=2) # repeatの逆伝播 return dhs, da
def backward(self, dout): W = self.params[0] dx = np.dot(dout, W.T) dW = np.dot(self.x.T, dout) db = np.sum(dout, axis=0) self.grads[0][...] = dW self.grads[1][...] = db return dx
def softmax(x): if x.ndim == 2: x = x - x.max(axis=1, keepdims=True) x = np.exp(x) x /= x.sum(axis=1, keepdims=True) elif x.ndim == 1: x = x - np.max(x) x = np.exp(x) / np.sum(np.exp(x)) return x
def forward(self, hs, a): N, T, H = hs.shape # ar = a.reshape(N, T, 1).repeat(H, axis=2) ar = a.reshape(N, T, 1) t = hs * ar c = np.sum(t, axis=1) self.cache = (hs, ar) return c
def clip_grads(grads, max_norm): total_norm = 0 for grad in grads: total_norm += np.sum(grad**2) total_norm = np.sqrt(total_norm) rate = max_norm / (total_norm + 1e-6) if rate < 1: for grad in grads: grad *= rate
def backward(self, da): hs, hr = self.cache N, T, H = hs.shape ds = self.softmax.backward(da) dt = ds.reshape(N, T, 1).repeat(H, axis=2) dhs = dt * hr dhr = dt * hs dh = np.sum(dhr, axis=1) return dhs, dh
def forward(self, hs, h): N, T, H = hs.shape # hr = h.reshape(N, 1, H).repeat(T, axis=1) hr = h.reshape(N, 1, H) t = hs * hr s = np.sum(t, axis=2) a = self.softmax.forward(s) self.cache = (hs, hr) return a
def cross_entropy_error(y, t): if y.ndim == 1: t.reshape(1, -1) y.reshape(1, -1) # 教師データがone-hot-vectorの場合、正解ラベルのインデックスに変換 if t.size == y.size: t = t.argmax(axis=1) batch_size = y.shape[0] log_y = np.log(np.clip(y[np.arange(batch_size), t], 1e-7, 1)) loss = -np.sum(log_y) / batch_size return loss
def ppmi(C, verbose=False, eps=1e-8): '''PPMI(正の相互情報量)の作成 :param C: 共起行列 :param verbose: 進行状況を出力するかどうか :return: ''' M = np.zeros_like(C, dtype=np.float32) N = np.sum(C) S = np.sum(C, axis=0) total = C.shape[0] * C.shape[1] cnt = 0 for i in range(C.shape[0]): for j in range(C.shape[1]): pmi = np.log2(C[i, j] * N / (S[i] * S[j]) + eps) M[i, j] = max(0, pmi) if verbose: cnt += 1 if cnt % (total // 100) == 0: print('%.1f%% done' % (100 * cnt / total)) return M
def backward(self, dscore): H = self.cache dout = self.affine.backward(dscore) dhs0, dout = dout[:, :, :H], dout[:, :, H:] dout = self.lstm.backward(dout) dhs1, dembed = dout[:, :, :H], dout[:, :, H:] self.embed.backward(dembed) dhs = dhs0 + dhs1 dh = self.lstm.dh + np.sum(dhs, axis=1) return dh
def backward(self, dh_next): Wx, Wh, _ = self.params x, h_prev, h_next = self.cache dt = dh_next * (1 - h_next**2) db = np.sum(dt, axis=0) dWh = np.dot(h_prev.T, dt) dh_prev = np.dot(dt, Wh.T) dWx = np.dot(x.T, dt) dx = np.dot(dt, Wx.T) self.grads[0][...] = dWx self.grads[1][...] = dWh self.grads[2][...] = db return dx, dh_prev
def backward(self, dout): x = self.x N, T, D = x.shape W = self.params[0] dout = dout.reshape(N * T, -1) rx = x.reshape(N * T, D) db = np.sum(dout, axis=0) dW = np.dot(rx.T, dout) dx = np.dot(dout, W.T) dx = dx.reshape(N, T, D) self.grads[0][...] = dW self.grads[1][...] = db return dx
def __init__(self, corpus, power, sample_size): self.sample_size = sample_size self.vocab_size = None self.word_p = None counts = collections.Counter() for word_id in corpus: counts[word_id] += 1 vocab_size = len(counts) self.vocab_size = vocab_size self.word_p = np.zeros(vocab_size) for i in range(vocab_size): self.word_p[i] = counts[i] self.word_p = np.power(self.word_p, power) self.word_p /= np.sum(self.word_p)
def forward(self, xs, ts): N, T, V = xs.shape if ts.ndim == 3: # 教師ラベルがone-hotベクトルの場合 ts = ts.argmax(axis=2) mask = (ts != self.ignore_label) # バッチ分と時系列分をまとめる(reshape) xs = xs.reshape(N * T, V) ts = ts.reshape(N * T) mask = mask.reshape(N * T) ys = softmax(xs) ls = np.log(np.clip(ys[np.arange(N * T), ts], 1e-6, 1)) ls *= mask # ignore_labelに該当するデータは損失を0にする loss = -np.sum(ls) loss /= mask.sum() self.cache = (ts, ys, mask, (N, T, V)) return loss
def backward(self, dout): dx = self.out * dout sumdx = np.sum(dx, axis=1, keepdims=True) dx -= self.out * sumdx return dx
def forward(self, h, idx): target_W = self.embed.forward(idx) out = np.sum(target_W * h, axis=1) self.cache = (h, target_W) return out