def forward(self): L, M = self.shape N, T, L = self.x.d.shape if self.h.d is None or not self.stateful.d: self.h_prev = np.zeros((N, M), dtype=self.dtype) else: self.h_prev = self.h.d if self.c is None or not self.stateful.d: self.c = np.empty((N, T, M), dtype=self.dtype) self.c_prev = np.zeros((N, M), dtype=self.dtype) else: self.c_prev = self.c[:, -1] if self.A is None or self.A.shape != (N, T, 4 * M): self.A = np.empty((N, T, 4 * M), dtype=self.dtype) # [f, g, i, o] x = self.x.d @ self.W.d y = np.empty((N, T, M), dtype=self.dtype) for t in range(T): h = self.h_prev if t == 0 else y[:, t - 1] a = x[:, t] + h @ self.U.d + self.b.d a[:, :M] = sigmoid(a[:, :M]) # f a[:, M:2 * M] = np.tanh(a[:, M:2 * M]) # g a[:, 2 * M:3 * M] = sigmoid(a[:, 2 * M:3 * M]) # i a[:, 3 * M:] = sigmoid(a[:, 3 * M:]) # o self.A[:, t] = a f, g, i, o = a[:, :M], a[:, M:2 * M], a[:, 2 * M:3 * M], a[:, 3 * M:] c = self.c_prev if t == 0 else self.c[:, t - 1] self.c[:, t] = f * c + g * i y[:, t] = o * np.tanh(self.c[:, t]) self.y.d = y self.h.d = y[:, -1]
def cbow(vocab_size, window_size=5, hidden_size=100, sample_size=5, batch_size=100): em = [("input", 2 * window_size, vocab_size), ("embeddingmean", hidden_size)] ns = [("embeddingdot", vocab_size), "sigmoid_cross_entropy"] h = branch(em)[-1] losses = [branch(ns, h)[-1].loss for _ in range(sample_size + 1)] # type:ignore model = Model(losses) # EmbeddingDotレイヤはひとつの重みを共有します。 v = model.weights[1].variable for p in model.weights[2:]: p.set_variable(v) len(v.parameters) # type:ignore # 正例と負例の正解ラベルを代入します。今後更新することがないので、`frozen`を`True`に設定します。 v = model.data_input_variables[2] v.data = np.ones(batch_size, dtype=np.int32) v.frozen = True for v in model.data_input_variables[4::2]: v.data = np.zeros(batch_size, dtype=np.int32) v.frozen = True # 再度モデルをビルドします。 model.build() # 重みを初期化します。 model.init(std=0.01) return model
def convert_one_hot(corpus, vocab_size): N = corpus.shape[0] if corpus.ndim == 1: one_hot = np.zeros((N, vocab_size), dtype=np.int32) for idx, word_id in enumerate(corpus): one_hot[idx, word_id] = 1 elif corpus.ndim == 2: C = corpus.shape[1] one_hot = np.zeros((N, C, vocab_size), dtype=np.int32) for idx_0, word_ids in enumerate(corpus): for idx_1, word_id in enumerate(word_ids): one_hot[idx_0, idx_1, word_id] = 1 return one_hot
def __init__(self, corpus, power=0.75): counts = collections.Counter(corpus) self.vocab_size = len(counts) self.probability = np.zeros(self.vocab_size) for i in range(self.vocab_size): self.probability[i] = counts[i] self.probability = np.power(self.probability, power) self.probability /= np.sum(self.probability)
def backward(self): PH, PW = self.shape[3:5] dy = self.y.g.transpose(0, 2, 3, 1) pool_size = self.shape[3] * self.shape[4] dmax = np.zeros((dy.size, pool_size)) dmax[np.arange(self.arg_max.size), self.arg_max.flatten()] = dy.flatten() dmax = dmax.reshape(dy.shape + (pool_size,)) dx_2d = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1) self.x.g = col2im(dx_2d, self.x.d.shape, PH, PW, self.stride.d, self.padding.d)
def col2im(col, input_shape, filter_h, filter_w, stride=1, pad=0): N, C, H, W = input_shape out_h = (H + 2 * pad - filter_h) // stride + 1 out_w = (W + 2 * pad - filter_w) // stride + 1 col = col.reshape(N, out_h, out_w, C, filter_h, filter_w) col = col.transpose(0, 3, 4, 5, 1, 2) img = np.zeros((N, C, H + 2 * pad + stride - 1, W + 2 * pad + stride - 1)) for y in range(filter_h): y_max = y + stride * out_h for x in range(filter_w): x_max = x + stride * out_w img[:, :, y:y_max:stride, x:x_max:stride] += col[:, :, y, x, :, :] return img[:, :, pad : H + pad, pad : W + pad]
def forward(self): L, M = self.shape N, T, L = self.x.d.shape if self.h.d is None or not self.stateful.d: self.h_prev = np.zeros((N, M), dtype=self.dtype) else: self.h_prev = self.h.d x = self.x.d @ self.W.d y = np.empty((N, T, M), dtype=self.dtype) for t in range(T): h = self.h_prev if t == 0 else y[:, t - 1] y[:, t] = np.tanh(x[:, t] + h @ self.U.d + self.b.d) self.y.d = y self.h.d = y[:, -1]
def im2col(input_data, filter_h, filter_w, stride=1, pad=0): N, C, H, W = input_data.shape out_h = (H + 2 * pad - filter_h) // stride + 1 out_w = (W + 2 * pad - filter_w) // stride + 1 img = np.pad(input_data, [(0, 0), (0, 0), (pad, pad), (pad, pad)], "constant") col = np.zeros((N, C, filter_h, filter_w, out_h, out_w)) for y in range(filter_h): y_max = y + stride * out_h for x in range(filter_w): x_max = x + stride * out_w col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride] col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N * out_h * out_w, -1) return col
def create_co_matrix(corpus, vocab_size, window_size=1): corpus_size = len(corpus) co_matrix = np.zeros((vocab_size, vocab_size), dtype=np.int32) for idx, word_id in enumerate(corpus): for i in range(1, window_size + 1): left_idx = idx - i right_idx = idx + i if left_idx >= 0: left_word_id = corpus[left_idx] co_matrix[word_id, left_word_id] += 1 if right_idx < corpus_size: right_word_id = corpus[right_idx] co_matrix[word_id, right_word_id] += 1 return co_matrix
def get_negative_sample(self, target, sample_size, replace=False): batch_size = target.shape[0] if not replace: negative_sample = np.zeros((batch_size, sample_size), dtype=np.int32) for i in range(batch_size): p = self.probability.copy() p[target[i]] = 0 p /= p.sum() negative_sample[i, :] = np.random.choice(self.vocab_size, size=sample_size, replace=False, p=p) return negative_sample else: # Fast when `replace` is True. size = (batch_size, sample_size) p = self.probability return np.random.choice(self.vocab_size, size, replace=True, p=p)
def most_similar(query, word_to_id, id_to_word, word_matrix, top=5): if query not in word_to_id: print("%s is not found" % query) return print("\n[query] " + query) query_id = word_to_id[query] query_vec = word_matrix[query_id] vocab_size = len(id_to_word) similarity = np.zeros(vocab_size) for i in range(vocab_size): similarity[i] = cos_similarity(word_matrix[i], query_vec) count = 0 for i in (-1 * similarity).argsort(): if id_to_word[i] == query: continue print(" %s: %s" % (id_to_word[i], similarity[i])) count += 1 if count >= top: return
# レイヤパラメータを設定します。 from ivory.common.context import np # isort:skip w = rnn.W.variable.data = np.random.randn(L, M) # type:ignore u = rnn.U.variable.data = np.random.randn(M, M) # type:ignore b = rnn.b.variable.data = np.random.randn(M) # type:ignore # ランダムな入力を作成します。 x = np.random.randn(N, T, L) t = np.random.randint(0, M, (N, T)) model.set_data(x, t) # 順伝搬します。 model.forward() print(rnn.y.d[:, :2]) # バッチ数分の内積値が出力されました。上記が正しいか、確かめます。 y = np.zeros(M) for xt in x[0, :2]: y = np.tanh(xt @ w + y @ u + b) print(y) print() y = np.zeros(M) for xt in x[1, :2]: y = np.tanh(xt @ w + y @ u + b) print(y) # 隠れ状態$\mathbf{h}$は最後の出力を保持します。 print(rnn.h.d) # type:ignore # 逆伝搬を検証するために、数値微分による勾配確認を行います。 model.forward() model.backward() for v in model.grad_variables:
def zeros(self) -> "Parameter": self.init = lambda: np.zeros(self.shape, dtype=self.layer.dtype) return self