Esempio n. 1
0
 def check_inconsistent_input_size(
         self, h_data, c_data, xs_data, ws_data, bs_data):
     h = _wrap_variable(h_data)
     c = _wrap_variable(c_data)
     xs = _wrap_variable(xs_data)
     ws = _wrap_variable(ws_data)
     bs = _wrap_variable(bs_data)
     with self.assertRaises(ValueError):
         functions.n_step_lstm(
             self.n_layers, self.dropout, h, c, ws, bs, xs)
Esempio n. 2
0
 def check_inconsistent_input_size(self, h_data, c_data, xs_data, ws_data,
                                   bs_data):
     h = _wrap_variable(h_data)
     c = _wrap_variable(c_data)
     xs = _wrap_variable(xs_data)
     ws = _wrap_variable(ws_data)
     bs = _wrap_variable(bs_data)
     with self.assertRaises(ValueError):
         functions.n_step_lstm(self.n_layers, self.dropout, h, c, ws, bs,
                               xs)
Esempio n. 3
0
 def forward(self, inputs, device):
     h, c, ws, bs, xs = self.process_inputs(inputs)
     if h.array.dtype == numpy.float64:
         with chainer.using_config('use_cudnn', 'never'):
             out = F.n_step_lstm(self.n_layers, 0.0, h, c, ws, bs, xs)
     else:
         out = F.n_step_lstm(self.n_layers, 0.0, h, c, ws, bs, xs)
     rets = []
     rets.append(out[0])
     rets.append(out[1])
     for i in range(len(out[2])):
         rets.append(out[2][i])
     return tuple(rets)
Esempio n. 4
0
    def check_forward(self, h_data, c_data, xs_data, ws_data, bs_data):
        h = chainer.Variable(h_data)
        c = chainer.Variable(c_data)
        xs = [chainer.Variable(x) for x in xs_data]
        ws = [[chainer.Variable(w) for w in ws] for ws in ws_data]
        bs = [[chainer.Variable(b) for b in bs] for bs in bs_data]
        hy, cy, ys = functions.n_step_lstm(self.n_layers, self.dropout, h, c,
                                           ws, bs, xs)

        e_hy = self.hx.copy()
        e_cy = self.cx.copy()
        for ind in range(self.length):
            x = self.xs[ind]
            batch = x.shape[0]
            for layer in range(self.n_layers):
                w = self.ws[layer]
                b = self.bs[layer]
                h_prev = e_hy[layer, :batch]
                c_prev = e_cy[layer, :batch]
                i = sigmoid(x.dot(w[0].T) + h_prev.dot(w[4].T) + b[0] + b[4])
                f = sigmoid(x.dot(w[1].T) + h_prev.dot(w[5].T) + b[1] + b[5])
                c_bar = numpy.tanh(
                    x.dot(w[2].T) + h_prev.dot(w[6].T) + b[2] + b[6])
                o = sigmoid(x.dot(w[3].T) + h_prev.dot(w[7].T) + b[3] + b[7])
                e_c = (f * c_prev + i * c_bar)
                e_h = o * numpy.tanh(e_c)
                e_hy[layer, :batch] = e_h
                e_cy[layer, :batch] = e_c

                x = e_h

            testing.assert_allclose(ys[ind].data, x, rtol=1e-4, atol=1e-4)

        testing.assert_allclose(hy.data, e_hy, rtol=1e-4, atol=1e-4)
        testing.assert_allclose(cy.data, e_cy, rtol=1e-4, atol=1e-4)
Esempio n. 5
0
    def test_forward_dropout_count(self):
        y_counts = [0] * self.length
        h_counts = [0] * self.n_layers
        c_counts = [0] * self.n_layers

        for _ in range(self.n_tests):
            hy1, cy1, ys1 = lstm_without_dropout(self.n_layers, self.dropout,
                                                 self.hx, self.cx, self.ws,
                                                 self.bs, self.xs)
            with chainer.using_config('use_cudnn', self.use_cudnn):
                hy2, cy2, ys2 = functions.n_step_lstm(self.n_layers,
                                                      self.dropout, self.hx,
                                                      self.cx, self.ws,
                                                      self.bs, self.xs)

            for i in range(self.length):
                y_counts[i] += count_close(ys1[i].data, ys2[i].data)

            for i in range(self.n_layers):
                h_counts[i] += count_close(hy1[i].data, hy2[i].data)
                c_counts[i] += count_close(cy1[i].data, cy2[i].data)

        total = self.batch * self.n_tests
        for i in range(self.length):
            self.assert_count(
                y_counts[i],
                total * (1 - self.dropout)**((self.n_layers - 1) * (i + 1)))
        for i in range(self.n_layers):
            self.assert_count(h_counts[i],
                              total * (1 - self.dropout)**(self.length * i))
            self.assert_count(c_counts[i],
                              total * (1 - self.dropout)**(self.length * i))
Esempio n. 6
0
    def test_forward_dropout_count(self):
        y_counts = [0] * self.length
        h_counts = [0] * self.n_layers
        c_counts = [0] * self.n_layers

        for _ in range(self.n_tests):
            hy1, cy1, ys1 = lstm_without_dropout(
                self.n_layers, self.dropout, self.hx, self.cx, self.ws,
                self.bs, self.xs)
            hy2, cy2, ys2 = functions.n_step_lstm(
                self.n_layers, self.dropout, self.hx, self.cx, self.ws,
                self.bs, self.xs, train=True, use_cudnn=self.use_cudnn)

            for i in range(self.length):
                y_counts[i] += count_close(ys1[i].data, ys2[i].data)

            for i in range(self.n_layers):
                h_counts[i] += count_close(hy1[i].data, hy2[i].data)
                c_counts[i] += count_close(cy1[i].data, cy2[i].data)

        total = self.batch * self.n_tests
        for i in range(self.length):
            self.assert_count(
                y_counts[i],
                total * (1 - self.dropout) ** ((self.n_layers - 1) * (i + 1)))
        for i in range(self.n_layers):
            self.assert_count(
                h_counts[i], total * (1 - self.dropout) ** (self.length * i))
            self.assert_count(
                c_counts[i], total * (1 - self.dropout) ** (self.length * i))
Esempio n. 7
0
 def forward(self, train):
     h = chainer.Variable(self.hx)
     c = chainer.Variable(self.cx)
     xs = [chainer.Variable(x) for x in self.xs]
     ws = [[chainer.Variable(w) for w in ws] for ws in self.ws]
     bs = [[chainer.Variable(b) for b in bs] for bs in self.bs]
     with chainer.using_config('train', train):
         return functions.n_step_lstm(self.n_layers, self.dropout, h, c, ws,
                                      bs, xs)
Esempio n. 8
0
def run_with_n_step_lstm(xs, h, c, w, b):
    xs = F.transpose_sequence(xs)
    print(w.shape)
    wx, wh = F.split_axis(w, 2, 1)
    ws = F.split_axis(wx, 4, 0) + F.split_axis(wh, 4, 0)
    b = b / 2
    bs = F.split_axis(b, 4, 0) * 2
    print(bs)
    h, _, _ = F.n_step_lstm(1, 0.0, h, c, ws, bs, xs)
    return h
 def call_forward(self, train):
     hx = _wrap_variable(_to_gpu(self.hx))
     cx = _wrap_variable(_to_gpu(self.cx))
     xs = _wrap_variable(_to_gpu(self.xs))
     ws = _wrap_variable(_to_gpu(self.ws))
     bs = _wrap_variable(_to_gpu(self.bs))
     with chainer.using_config('enable_backprop', train), \
             chainer.using_config('train', train):
         return functions.n_step_lstm(
             self.n_layers, self.dropout, hx, cx, ws, bs, xs)
Esempio n. 10
0
 def forward(self, train):
     h = chainer.Variable(self.hx)
     c = chainer.Variable(self.cx)
     xs = [chainer.Variable(x) for x in self.xs]
     ws = [[chainer.Variable(w) for w in ws]
           for ws in self.ws]
     bs = [[chainer.Variable(b) for b in bs]
           for bs in self.bs]
     with chainer.using_config('train', train):
         return functions.n_step_lstm(
             self.n_layers, self.dropout, h, c, ws, bs, xs)
Esempio n. 11
0
 def forward(self, train):
     volatile = not train
     h = chainer.Variable(self.hx, volatile=volatile)
     c = chainer.Variable(self.cx, volatile=volatile)
     xs = [chainer.Variable(x, volatile=volatile) for x in self.xs]
     ws = [[chainer.Variable(w, volatile=volatile) for w in ws]
           for ws in self.ws]
     bs = [[chainer.Variable(b, volatile=volatile) for b in bs]
           for bs in self.bs]
     return functions.n_step_lstm(
         self.n_layers, self.dropout, h, c, ws, bs, xs,
         train=train, use_cudnn=self.use_cudnn)
Esempio n. 12
0
 def forward(self, train):
     volatile = not train
     h = chainer.Variable(self.hx, volatile=volatile)
     c = chainer.Variable(self.cx, volatile=volatile)
     xs = [chainer.Variable(x, volatile=volatile) for x in self.xs]
     ws = [[chainer.Variable(w, volatile=volatile) for w in ws]
           for ws in self.ws]
     bs = [[chainer.Variable(b, volatile=volatile) for b in bs]
           for bs in self.bs]
     return functions.n_step_lstm(
         self.n_layers, self.dropout, h, c, ws, bs, xs,
         train=train, use_cudnn=self.use_cudnn)
Esempio n. 13
0
 def f(*inputs):
     (hx, cx), inputs = _split(inputs, 2)
     ws = []
     for i in range(self.n_layers):
         weights, inputs = _split(inputs, 8)
         ws.append(weights)
     bs = []
     for i in range(self.n_layers):
         biases, inputs = _split(inputs, 8)
         bs.append(biases)
     xs = inputs
     hy, cy, ys = functions.n_step_lstm(self.n_layers, self.dropout, hx,
                                        cx, ws, bs, xs)
     return (hy, cy) + ys
Esempio n. 14
0
 def f(*inputs):
     (hx, cx), inputs = _split(inputs, 2)
     ws = []
     for i in range(self.n_layers):
         weights, inputs = _split(inputs, 8)
         ws.append(weights)
     bs = []
     for i in range(self.n_layers):
         biases, inputs = _split(inputs, 8)
         bs.append(biases)
     xs = inputs
     hy, cy, ys = functions.n_step_lstm(
         self.n_layers, self.dropout, hx, cx, ws, bs, xs)
     return (hy, cy) + ys
Esempio n. 15
0
    def __call__(self, hx, cx, xs, flag_train, args):
        if hx is None:
            hx = self.init_hx(xs)
        if cx is None:
            cx = self.init_hx(xs)

        # hx, cx は (layer数, minibatch数,出力次元数)のtensor
        # xsは (系列長, minibatch数,出力次元数)のtensor
        # Note: chainF.n_step_lstm() は最初の入力層にはdropoutしない仕様
        hy, cy, ys = chainF.n_step_lstm(self.n_layers, self.dropout_rate, hx,
                                        cx, self.ws, self.bs, xs)
        # hy, cy は (layer数, minibatch数,出力次元数) で出てくる
        # ysは最終隠れ層だけなので,系列長のタプルで
        # 各要素が (minibatch数,出力次元数)
        # 扱いやすくするためにstackを使ってタプルを一つのchainer.Variableに変換
        # (系列長, minibatch数,出力次元数)のtensor
        hlist = chainF.stack(ys)
        return hy, cy, hlist
Esempio n. 16
0
    def check_forward(
            self, h_data, c_data, xs_data, ws_data, bs_data, volatile):
        h = chainer.Variable(h_data, volatile=volatile)
        c = chainer.Variable(c_data, volatile=volatile)
        xs = [chainer.Variable(x, volatile=volatile) for x in xs_data]
        ws = [[chainer.Variable(w, volatile=volatile) for w in ws]
              for ws in ws_data]
        bs = [[chainer.Variable(b, volatile=volatile) for b in bs]
              for bs in bs_data]
        hy, cy, ys = functions.n_step_lstm(
            self.n_layers, self.dropout, h, c, ws, bs, xs,
            use_cudnn=self.use_cudnn)

        e_hy = self.hx.copy()
        e_cy = self.cx.copy()
        for ind in range(self.length):
            x = self.xs[ind]
            batch = x.shape[0]
            for layer in range(self.n_layers):
                w = self.ws[layer]
                b = self.bs[layer]
                h_prev = e_hy[layer, :batch]
                c_prev = e_cy[layer, :batch]
                i = sigmoid(x.dot(w[0].T) + h_prev.dot(w[4].T) + b[0] + b[4])
                f = sigmoid(x.dot(w[1].T) + h_prev.dot(w[5].T) + b[1] + b[5])
                c_bar = numpy.tanh(
                    x.dot(w[2].T) + h_prev.dot(w[6].T) + b[2] + b[6])
                o = sigmoid(x.dot(w[3].T) + h_prev.dot(w[7].T) + b[3] + b[7])
                e_c = (f * c_prev + i * c_bar)
                e_h = o * numpy.tanh(e_c)
                e_hy[layer, :batch] = e_h
                e_cy[layer, :batch] = e_c

                x = e_h

            testing.assert_allclose(
                ys[ind].data, x, rtol=1e-4, atol=1e-4)

        testing.assert_allclose(hy.data, e_hy, rtol=1e-4, atol=1e-4)
        testing.assert_allclose(cy.data, e_cy, rtol=1e-4, atol=1e-4)
Esempio n. 17
0
    def check_forward(
            self, h_data, c_data, xs_data, ws_data, bs_data):
        h = _wrap_variable(h_data)
        c = _wrap_variable(c_data)
        xs = _wrap_variable(xs_data)
        ws = _wrap_variable(ws_data)
        bs = _wrap_variable(bs_data)
        hy, cy, ys = functions.n_step_lstm(
            self.n_layers, self.dropout, h, c, ws, bs, xs)

        e_hy = self.hx.copy()
        e_cy = self.cx.copy()
        for ind in range(self.length):
            x = self.xs[ind]
            batch = x.shape[0]
            for layer in range(self.n_layers):
                w = self.ws[layer]
                b = self.bs[layer]
                h_prev = e_hy[layer, :batch]
                c_prev = e_cy[layer, :batch]
                i = sigmoid(x.dot(w[0].T) + h_prev.dot(w[4].T) + b[0] + b[4])
                f = sigmoid(x.dot(w[1].T) + h_prev.dot(w[5].T) + b[1] + b[5])
                c_bar = numpy.tanh(
                    x.dot(w[2].T) + h_prev.dot(w[6].T) + b[2] + b[6])
                o = sigmoid(x.dot(w[3].T) + h_prev.dot(w[7].T) + b[3] + b[7])
                e_c = (f * c_prev + i * c_bar)
                e_h = o * numpy.tanh(e_c)
                e_hy[layer, :batch] = e_h
                e_cy[layer, :batch] = e_c

                x = e_h

            testing.assert_allclose(
                ys[ind].data, x, rtol=1e-4, atol=1e-4)

        testing.assert_allclose(hy.data, e_hy, rtol=1e-4, atol=1e-4)
        testing.assert_allclose(cy.data, e_cy, rtol=1e-4, atol=1e-4)