Example #1
0
 def backward(self, dout=1):
     batch_size = self.t.shape[0]
     dx = self.y.copy()
     dx[np.arange(batch_size), self.t] -= 1
     dx *= dout
     dx = dx / batch_size
     return dx
Example #2
0
    def backward(self, dout=1):
        ts, ys, mask, (N, T, V) = self.cache

        dx = ys
        dx[np.arange(N * T), ts] -= 1
        dx *= dout
        dx /= mask.sum()
        dx *= mask[:, np.newaxis]

        dx = dx.reshape((N, T, V))

        return dx
Example #3
0
def cross_entropy_error(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)

    # 教師データがone-hot-vectorの場合、正解ラベルのインデックスに変換
    if t.size == y.size:
        t = t.argmax(axis=1)

    batch_size = y.shape[0]

    return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
Example #4
0
    def backward(self, dout=1):
        ts, ys, mask, (N, T, V) = self.cache

        dx = ys
        dx[np.arange(N * T), ts] -= 1
        dx *= dout
        dx /= mask.sum()
        # when data is ignore_label, gradient is 0
        dx *= mask[:, np.newaxis]

        dx = dx.reshape((N, T, V))

        return dx
def cross_entropy_error(y, t):
    """
    Cross entropy error.
    One of the loss functions.
    """
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)

    # convert to index of correct label when training data is one-hot-vector
    # (教師データが one-hot-vector の場合、正解ラベルのインデックスに変換
    if t.size == y.size:
        t = t.argmax(axis=1)

    batch_size = y.shape[0]

    return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
Example #6
0
    def forward(self, xs, ts):
        N, T, V = xs.shape

        if ts.ndim == 3:
            ts = ts.argmax(asix=2)

        mask = (ts != self.ignore_label)
        xs = xs.reshape(N * T, V)
        ts = ts.reshape(N * T)
        mask = mask.reshape(N * T)

        ys = softmax(xs)
        ls = np.log(ys[np.arange(N * T), ts])
        ls *= mask
        loss = -np.sum(ls)
        loss /= mask.sum()

        self.cache = (ts, ys, mask, (N, T, V))
        return loss
Example #7
0
    def forward(self, xs, ts):
        N, T, V = xs.shape

        if ts.ndim == 3:  # 教師ラベルがone-hotベクトルの場合
            ts = ts.argmax(axis=2)

        mask = (ts != self.ignore_label)

        # バッチ分と時系列分をまとめる(reshape)
        xs = xs.reshape(N * T, V)
        ts = ts.reshape(N * T)
        mask = mask.reshape(N * T)

        ys = softmax(xs)
        ls = np.log(ys[np.arange(N * T), ts])
        ls *= mask  # ignore_labelに該当するデータは損失を0にする
        loss = -np.sum(ls)
        loss /= mask.sum()

        self.cache = (ts, ys, mask, (N, T, V))
        return loss
Example #8
0
    def forward(self, xs, ts):
        N, T, V = xs.shape

        # when train-label is one-hot vector
        if ts.ndim == 3:
            ts = ts.argmax(axis=2)

        mask = (ts != self.ignore_label)

        # reshape batch with recurrent
        xs = xs.reshape(N * T, V)
        ts = ts.reshape(N * T)
        mask = mask.reshape(N * T)

        ys = softmax(xs)
        ls = np.log(ys[np.arange(N * T), ts])
        # when data is ignore_label, loss is 0
        ls *= mask
        loss = -np.sum(ls)
        loss /= mask.sum()

        self.cache = (ts, ys, mask, (N, T, V))
        return loss
Example #9
0
                    batch_size=batch_size,
                    max_grad=max_grad)
        correct_num = 0
        for i in range(len(x_test)):
            question, correct = x_test[[i]], t_test[[i]]
            verbose = i < 10
            correct_num += eval_seq2seq(model, question, correct, id_to_char,
                                        verbose)
        acc = float(correct_num) / len(x_test)
        acc_list.append(acc)
        print('val acc %.3f%%' % (acc * 100))
    return acc_list


# Plot as graph
acc_nomal = train_eval(x_train, x_test, is_peeky=False)
acc_rev = train_eval(x_train_rev, x_test_rev, is_peeky=False)
acc_pk = train_eval(x_train_rev, x_test_rev, is_peeky=True)
x_normal = np.arange(len(acc_nomal))
x_reverse = np.arange(len(acc_rev))
x_pk = np.arange(len(acc_pk))
plt.plot(x_normal, acc_nomal, marker='o', label="normal")
plt.plot(x_reverse, acc_rev, marker='v', label="reverse")
plt.plot(x_pk, acc_pk, marker='^', label="peeky+rev")

plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.ylim(0, 1.0)
plt.legend()
plt.savefig('train_seq2seq.png')