def forward(self, x, t): self.t = t self.y = softmax(x) # 教師ラベルがone-hotベクトルの場合、正解のインデックスに変換 if self.t.size == self.y.size: self.t = self.t.argmax(axis=1) loss = cross_entropy_error(self.y, self.t) return loss
def predict(network, x): w1, w2, w3 = network['W1'], network['W2'], network['W3'] b1, b2, b3 = network['b1'], network['b2'], network['b3'] a1 = np.dot(x, w1) + b1 z1 = sigmoid(a1) a2 = np.dot(z1, w2) + b2 z2 = sigmoid(a2) a3 = np.dot(z2, w3) + b3 y = softmax(a3) return y
def generate(self, start_id, skip_ids=None, sample_size=100): word_ids = [start_id] x = start_id while len(word_ids) < sample_size: x = np.array(x).reshape(1, 1) score = self.predict(x).flatten() p = softmax(score).flatten() sampled = np.random.choice(len(p), size=1, p=p) if (skip_ids is None) or (sampled not in skip_ids): x = sampled word_ids.append(int(x)) return word_ids
def forward(self, xs, ts): N, T, V = xs.shape if ts.ndim == 3: # 教師ラベルがone-hotベクトルの場合 ts = ts.argmax(axis=2) mask = (ts != self.ignore_label) # バッチ分と時系列分をまとめる(reshape) xs = xs.reshape(N * T, V) ts = ts.reshape(N * T) mask = mask.reshape(N * T) ys = softmax(xs) ls = np.log(ys[np.arange(N * T), ts]) ls *= mask # ignore_labelに該当するデータは損失を0にする loss = -np.sum(ls) loss /= mask.sum() self.cache = (ts, ys, mask, (N, T, V)) return loss
def forward(self, x): self.out = softmax(x) return self.out
def loss(self, x, t): z = self.predict(x) y = softmax(z) loss = cross_entropy_error(y, t) return loss