def check_inconsistent_input_size( self, h_data, c_data, xs_data, ws_data, bs_data): h = _wrap_variable(h_data) c = _wrap_variable(c_data) xs = _wrap_variable(xs_data) ws = _wrap_variable(ws_data) bs = _wrap_variable(bs_data) with self.assertRaises(ValueError): functions.n_step_lstm( self.n_layers, self.dropout, h, c, ws, bs, xs)
def check_inconsistent_input_size(self, h_data, c_data, xs_data, ws_data, bs_data): h = _wrap_variable(h_data) c = _wrap_variable(c_data) xs = _wrap_variable(xs_data) ws = _wrap_variable(ws_data) bs = _wrap_variable(bs_data) with self.assertRaises(ValueError): functions.n_step_lstm(self.n_layers, self.dropout, h, c, ws, bs, xs)
def forward(self, inputs, device): h, c, ws, bs, xs = self.process_inputs(inputs) if h.array.dtype == numpy.float64: with chainer.using_config('use_cudnn', 'never'): out = F.n_step_lstm(self.n_layers, 0.0, h, c, ws, bs, xs) else: out = F.n_step_lstm(self.n_layers, 0.0, h, c, ws, bs, xs) rets = [] rets.append(out[0]) rets.append(out[1]) for i in range(len(out[2])): rets.append(out[2][i]) return tuple(rets)
def check_forward(self, h_data, c_data, xs_data, ws_data, bs_data): h = chainer.Variable(h_data) c = chainer.Variable(c_data) xs = [chainer.Variable(x) for x in xs_data] ws = [[chainer.Variable(w) for w in ws] for ws in ws_data] bs = [[chainer.Variable(b) for b in bs] for bs in bs_data] hy, cy, ys = functions.n_step_lstm(self.n_layers, self.dropout, h, c, ws, bs, xs) e_hy = self.hx.copy() e_cy = self.cx.copy() for ind in range(self.length): x = self.xs[ind] batch = x.shape[0] for layer in range(self.n_layers): w = self.ws[layer] b = self.bs[layer] h_prev = e_hy[layer, :batch] c_prev = e_cy[layer, :batch] i = sigmoid(x.dot(w[0].T) + h_prev.dot(w[4].T) + b[0] + b[4]) f = sigmoid(x.dot(w[1].T) + h_prev.dot(w[5].T) + b[1] + b[5]) c_bar = numpy.tanh( x.dot(w[2].T) + h_prev.dot(w[6].T) + b[2] + b[6]) o = sigmoid(x.dot(w[3].T) + h_prev.dot(w[7].T) + b[3] + b[7]) e_c = (f * c_prev + i * c_bar) e_h = o * numpy.tanh(e_c) e_hy[layer, :batch] = e_h e_cy[layer, :batch] = e_c x = e_h testing.assert_allclose(ys[ind].data, x, rtol=1e-4, atol=1e-4) testing.assert_allclose(hy.data, e_hy, rtol=1e-4, atol=1e-4) testing.assert_allclose(cy.data, e_cy, rtol=1e-4, atol=1e-4)
def test_forward_dropout_count(self): y_counts = [0] * self.length h_counts = [0] * self.n_layers c_counts = [0] * self.n_layers for _ in range(self.n_tests): hy1, cy1, ys1 = lstm_without_dropout(self.n_layers, self.dropout, self.hx, self.cx, self.ws, self.bs, self.xs) with chainer.using_config('use_cudnn', self.use_cudnn): hy2, cy2, ys2 = functions.n_step_lstm(self.n_layers, self.dropout, self.hx, self.cx, self.ws, self.bs, self.xs) for i in range(self.length): y_counts[i] += count_close(ys1[i].data, ys2[i].data) for i in range(self.n_layers): h_counts[i] += count_close(hy1[i].data, hy2[i].data) c_counts[i] += count_close(cy1[i].data, cy2[i].data) total = self.batch * self.n_tests for i in range(self.length): self.assert_count( y_counts[i], total * (1 - self.dropout)**((self.n_layers - 1) * (i + 1))) for i in range(self.n_layers): self.assert_count(h_counts[i], total * (1 - self.dropout)**(self.length * i)) self.assert_count(c_counts[i], total * (1 - self.dropout)**(self.length * i))
def test_forward_dropout_count(self): y_counts = [0] * self.length h_counts = [0] * self.n_layers c_counts = [0] * self.n_layers for _ in range(self.n_tests): hy1, cy1, ys1 = lstm_without_dropout( self.n_layers, self.dropout, self.hx, self.cx, self.ws, self.bs, self.xs) hy2, cy2, ys2 = functions.n_step_lstm( self.n_layers, self.dropout, self.hx, self.cx, self.ws, self.bs, self.xs, train=True, use_cudnn=self.use_cudnn) for i in range(self.length): y_counts[i] += count_close(ys1[i].data, ys2[i].data) for i in range(self.n_layers): h_counts[i] += count_close(hy1[i].data, hy2[i].data) c_counts[i] += count_close(cy1[i].data, cy2[i].data) total = self.batch * self.n_tests for i in range(self.length): self.assert_count( y_counts[i], total * (1 - self.dropout) ** ((self.n_layers - 1) * (i + 1))) for i in range(self.n_layers): self.assert_count( h_counts[i], total * (1 - self.dropout) ** (self.length * i)) self.assert_count( c_counts[i], total * (1 - self.dropout) ** (self.length * i))
def forward(self, train): h = chainer.Variable(self.hx) c = chainer.Variable(self.cx) xs = [chainer.Variable(x) for x in self.xs] ws = [[chainer.Variable(w) for w in ws] for ws in self.ws] bs = [[chainer.Variable(b) for b in bs] for bs in self.bs] with chainer.using_config('train', train): return functions.n_step_lstm(self.n_layers, self.dropout, h, c, ws, bs, xs)
def run_with_n_step_lstm(xs, h, c, w, b): xs = F.transpose_sequence(xs) print(w.shape) wx, wh = F.split_axis(w, 2, 1) ws = F.split_axis(wx, 4, 0) + F.split_axis(wh, 4, 0) b = b / 2 bs = F.split_axis(b, 4, 0) * 2 print(bs) h, _, _ = F.n_step_lstm(1, 0.0, h, c, ws, bs, xs) return h
def call_forward(self, train): hx = _wrap_variable(_to_gpu(self.hx)) cx = _wrap_variable(_to_gpu(self.cx)) xs = _wrap_variable(_to_gpu(self.xs)) ws = _wrap_variable(_to_gpu(self.ws)) bs = _wrap_variable(_to_gpu(self.bs)) with chainer.using_config('enable_backprop', train), \ chainer.using_config('train', train): return functions.n_step_lstm( self.n_layers, self.dropout, hx, cx, ws, bs, xs)
def forward(self, train): h = chainer.Variable(self.hx) c = chainer.Variable(self.cx) xs = [chainer.Variable(x) for x in self.xs] ws = [[chainer.Variable(w) for w in ws] for ws in self.ws] bs = [[chainer.Variable(b) for b in bs] for bs in self.bs] with chainer.using_config('train', train): return functions.n_step_lstm( self.n_layers, self.dropout, h, c, ws, bs, xs)
def forward(self, train): volatile = not train h = chainer.Variable(self.hx, volatile=volatile) c = chainer.Variable(self.cx, volatile=volatile) xs = [chainer.Variable(x, volatile=volatile) for x in self.xs] ws = [[chainer.Variable(w, volatile=volatile) for w in ws] for ws in self.ws] bs = [[chainer.Variable(b, volatile=volatile) for b in bs] for bs in self.bs] return functions.n_step_lstm( self.n_layers, self.dropout, h, c, ws, bs, xs, train=train, use_cudnn=self.use_cudnn)
def f(*inputs): (hx, cx), inputs = _split(inputs, 2) ws = [] for i in range(self.n_layers): weights, inputs = _split(inputs, 8) ws.append(weights) bs = [] for i in range(self.n_layers): biases, inputs = _split(inputs, 8) bs.append(biases) xs = inputs hy, cy, ys = functions.n_step_lstm(self.n_layers, self.dropout, hx, cx, ws, bs, xs) return (hy, cy) + ys
def f(*inputs): (hx, cx), inputs = _split(inputs, 2) ws = [] for i in range(self.n_layers): weights, inputs = _split(inputs, 8) ws.append(weights) bs = [] for i in range(self.n_layers): biases, inputs = _split(inputs, 8) bs.append(biases) xs = inputs hy, cy, ys = functions.n_step_lstm( self.n_layers, self.dropout, hx, cx, ws, bs, xs) return (hy, cy) + ys
def __call__(self, hx, cx, xs, flag_train, args): if hx is None: hx = self.init_hx(xs) if cx is None: cx = self.init_hx(xs) # hx, cx は (layer数, minibatch数,出力次元数)のtensor # xsは (系列長, minibatch数,出力次元数)のtensor # Note: chainF.n_step_lstm() は最初の入力層にはdropoutしない仕様 hy, cy, ys = chainF.n_step_lstm(self.n_layers, self.dropout_rate, hx, cx, self.ws, self.bs, xs) # hy, cy は (layer数, minibatch数,出力次元数) で出てくる # ysは最終隠れ層だけなので,系列長のタプルで # 各要素が (minibatch数,出力次元数) # 扱いやすくするためにstackを使ってタプルを一つのchainer.Variableに変換 # (系列長, minibatch数,出力次元数)のtensor hlist = chainF.stack(ys) return hy, cy, hlist
def check_forward( self, h_data, c_data, xs_data, ws_data, bs_data, volatile): h = chainer.Variable(h_data, volatile=volatile) c = chainer.Variable(c_data, volatile=volatile) xs = [chainer.Variable(x, volatile=volatile) for x in xs_data] ws = [[chainer.Variable(w, volatile=volatile) for w in ws] for ws in ws_data] bs = [[chainer.Variable(b, volatile=volatile) for b in bs] for bs in bs_data] hy, cy, ys = functions.n_step_lstm( self.n_layers, self.dropout, h, c, ws, bs, xs, use_cudnn=self.use_cudnn) e_hy = self.hx.copy() e_cy = self.cx.copy() for ind in range(self.length): x = self.xs[ind] batch = x.shape[0] for layer in range(self.n_layers): w = self.ws[layer] b = self.bs[layer] h_prev = e_hy[layer, :batch] c_prev = e_cy[layer, :batch] i = sigmoid(x.dot(w[0].T) + h_prev.dot(w[4].T) + b[0] + b[4]) f = sigmoid(x.dot(w[1].T) + h_prev.dot(w[5].T) + b[1] + b[5]) c_bar = numpy.tanh( x.dot(w[2].T) + h_prev.dot(w[6].T) + b[2] + b[6]) o = sigmoid(x.dot(w[3].T) + h_prev.dot(w[7].T) + b[3] + b[7]) e_c = (f * c_prev + i * c_bar) e_h = o * numpy.tanh(e_c) e_hy[layer, :batch] = e_h e_cy[layer, :batch] = e_c x = e_h testing.assert_allclose( ys[ind].data, x, rtol=1e-4, atol=1e-4) testing.assert_allclose(hy.data, e_hy, rtol=1e-4, atol=1e-4) testing.assert_allclose(cy.data, e_cy, rtol=1e-4, atol=1e-4)
def check_forward( self, h_data, c_data, xs_data, ws_data, bs_data): h = _wrap_variable(h_data) c = _wrap_variable(c_data) xs = _wrap_variable(xs_data) ws = _wrap_variable(ws_data) bs = _wrap_variable(bs_data) hy, cy, ys = functions.n_step_lstm( self.n_layers, self.dropout, h, c, ws, bs, xs) e_hy = self.hx.copy() e_cy = self.cx.copy() for ind in range(self.length): x = self.xs[ind] batch = x.shape[0] for layer in range(self.n_layers): w = self.ws[layer] b = self.bs[layer] h_prev = e_hy[layer, :batch] c_prev = e_cy[layer, :batch] i = sigmoid(x.dot(w[0].T) + h_prev.dot(w[4].T) + b[0] + b[4]) f = sigmoid(x.dot(w[1].T) + h_prev.dot(w[5].T) + b[1] + b[5]) c_bar = numpy.tanh( x.dot(w[2].T) + h_prev.dot(w[6].T) + b[2] + b[6]) o = sigmoid(x.dot(w[3].T) + h_prev.dot(w[7].T) + b[3] + b[7]) e_c = (f * c_prev + i * c_bar) e_h = o * numpy.tanh(e_c) e_hy[layer, :batch] = e_h e_cy[layer, :batch] = e_c x = e_h testing.assert_allclose( ys[ind].data, x, rtol=1e-4, atol=1e-4) testing.assert_allclose(hy.data, e_hy, rtol=1e-4, atol=1e-4) testing.assert_allclose(cy.data, e_cy, rtol=1e-4, atol=1e-4)