def backward(self, dout=1): batch_size = self.t.shape[0] dx = self.y.copy() dx[np.arange(batch_size), self.t] -= 1 dx *= dout dx = dx / batch_size return dx
def backward(self, dout=1): ts, ys, mask, (N, T, V) = self.cache dx = ys dx[np.arange(N * T), ts] -= 1 dx *= dout dx /= mask.sum() dx *= mask[:, np.newaxis] dx = dx.reshape((N, T, V)) return dx
def cross_entropy_error(y, t): if y.ndim == 1: t = t.reshape(1, t.size) y = y.reshape(1, y.size) # 教師データがone-hot-vectorの場合、正解ラベルのインデックスに変換 if t.size == y.size: t = t.argmax(axis=1) batch_size = y.shape[0] return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
def backward(self, dout=1): ts, ys, mask, (N, T, V) = self.cache dx = ys dx[np.arange(N * T), ts] -= 1 dx *= dout dx /= mask.sum() # when data is ignore_label, gradient is 0 dx *= mask[:, np.newaxis] dx = dx.reshape((N, T, V)) return dx
def cross_entropy_error(y, t): """ Cross entropy error. One of the loss functions. """ if y.ndim == 1: t = t.reshape(1, t.size) y = y.reshape(1, y.size) # convert to index of correct label when training data is one-hot-vector # (教師データが one-hot-vector の場合、正解ラベルのインデックスに変換 if t.size == y.size: t = t.argmax(axis=1) batch_size = y.shape[0] return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
def forward(self, xs, ts): N, T, V = xs.shape if ts.ndim == 3: ts = ts.argmax(asix=2) mask = (ts != self.ignore_label) xs = xs.reshape(N * T, V) ts = ts.reshape(N * T) mask = mask.reshape(N * T) ys = softmax(xs) ls = np.log(ys[np.arange(N * T), ts]) ls *= mask loss = -np.sum(ls) loss /= mask.sum() self.cache = (ts, ys, mask, (N, T, V)) return loss
def forward(self, xs, ts): N, T, V = xs.shape if ts.ndim == 3: # 教師ラベルがone-hotベクトルの場合 ts = ts.argmax(axis=2) mask = (ts != self.ignore_label) # バッチ分と時系列分をまとめる(reshape) xs = xs.reshape(N * T, V) ts = ts.reshape(N * T) mask = mask.reshape(N * T) ys = softmax(xs) ls = np.log(ys[np.arange(N * T), ts]) ls *= mask # ignore_labelに該当するデータは損失を0にする loss = -np.sum(ls) loss /= mask.sum() self.cache = (ts, ys, mask, (N, T, V)) return loss
def forward(self, xs, ts): N, T, V = xs.shape # when train-label is one-hot vector if ts.ndim == 3: ts = ts.argmax(axis=2) mask = (ts != self.ignore_label) # reshape batch with recurrent xs = xs.reshape(N * T, V) ts = ts.reshape(N * T) mask = mask.reshape(N * T) ys = softmax(xs) ls = np.log(ys[np.arange(N * T), ts]) # when data is ignore_label, loss is 0 ls *= mask loss = -np.sum(ls) loss /= mask.sum() self.cache = (ts, ys, mask, (N, T, V)) return loss
batch_size=batch_size, max_grad=max_grad) correct_num = 0 for i in range(len(x_test)): question, correct = x_test[[i]], t_test[[i]] verbose = i < 10 correct_num += eval_seq2seq(model, question, correct, id_to_char, verbose) acc = float(correct_num) / len(x_test) acc_list.append(acc) print('val acc %.3f%%' % (acc * 100)) return acc_list # Plot as graph acc_nomal = train_eval(x_train, x_test, is_peeky=False) acc_rev = train_eval(x_train_rev, x_test_rev, is_peeky=False) acc_pk = train_eval(x_train_rev, x_test_rev, is_peeky=True) x_normal = np.arange(len(acc_nomal)) x_reverse = np.arange(len(acc_rev)) x_pk = np.arange(len(acc_pk)) plt.plot(x_normal, acc_nomal, marker='o', label="normal") plt.plot(x_reverse, acc_rev, marker='v', label="reverse") plt.plot(x_pk, acc_pk, marker='^', label="peeky+rev") plt.xlabel('epochs') plt.ylabel('accuracy') plt.ylim(0, 1.0) plt.legend() plt.savefig('train_seq2seq.png')