def check_forward(self, c_prev1_data, c_prev2_data, x1_data, x2_data): c_prev1 = chainer.Variable(c_prev1_data) c_prev2 = chainer.Variable(c_prev2_data) x1 = chainer.Variable(x1_data) x2 = chainer.Variable(x2_data) c, h = functions.slstm(c_prev1, c_prev2, x1, x2) self.assertEqual(c.data.dtype, numpy.float32) self.assertEqual(h.data.dtype, numpy.float32) # Compute expected out a1_in = self.x1[:, [0, 4]] i1_in = self.x1[:, [1, 5]] f1_in = self.x1[:, [2, 6]] o1_in = self.x1[:, [3, 7]] a2_in = self.x2[:, [0, 4]] i2_in = self.x2[:, [1, 5]] f2_in = self.x2[:, [2, 6]] o2_in = self.x2[:, [3, 7]] c_expect = _sigmoid(i1_in) * numpy.tanh(a1_in) + \ _sigmoid(i2_in) * numpy.tanh(a2_in) + \ _sigmoid(f1_in) * self.c_prev1 + \ _sigmoid(f2_in) * self.c_prev2 h_expect = _sigmoid(o1_in + o2_in) * numpy.tanh(c_expect) gradient_check.assert_allclose(c_expect, c.data) gradient_check.assert_allclose(h_expect, h.data)
def check_forward(self, c_prev1_data, c_prev2_data, x1_data, x2_data): c_prev1 = chainer.Variable(c_prev1_data) c_prev2 = chainer.Variable(c_prev2_data) x1 = chainer.Variable(x1_data) x2 = chainer.Variable(x2_data) c, h = functions.slstm(c_prev1, c_prev2, x1, x2) # Compute expected out a1_in = self.x1[:, [0, 4]] i1_in = self.x1[:, [1, 5]] f1_in = self.x1[:, [2, 6]] o1_in = self.x1[:, [3, 7]] a2_in = self.x2[:, [0, 4]] i2_in = self.x2[:, [1, 5]] f2_in = self.x2[:, [2, 6]] o2_in = self.x2[:, [3, 7]] c_expect = _sigmoid(i1_in) * numpy.tanh(a1_in) + \ _sigmoid(i2_in) * numpy.tanh(a2_in) + \ _sigmoid(f1_in) * self.c_prev1 + \ _sigmoid(f2_in) * self.c_prev2 h_expect = _sigmoid(o1_in + o2_in) * numpy.tanh(c_expect) print("state = ",numpy.allclose(c_expect, c.data)) print("hidden = ",numpy.allclose(h_expect, h.data))
def __call__(self, tree): # skip the node if whose child is only one while len(tree.children) == 1 and not tree.is_leaf(): tree = tree.children[0] if tree.is_leaf(): word = tree.get_word() # avg if self.is_leaf_as_chunk: vector = None for tok in word.split('/'): embed = self.get_word_vec(tok) if vector is None: vector = self.embed2hidden(embed) else: vector += self.embed2hidden(embed) vector /= len(word.split('/')) else: embed = self.get_word_vec(word) vector = self.embed2hidden(embed) c = Variable(np.zeros((1, self.mem_units), dtype=np.float32)) else: left_tree, right_tree = tree.children leftc = self(left_tree) rightc = self(right_tree) # skip the node if whose child is only one while len(left_tree.children) == 1 and not left_tree.is_leaf(): left_tree = left_tree.children[0] while len(right_tree.children) == 1 and not right_tree.is_leaf(): right_tree = right_tree.children[0] left_vec = left_tree.data['vector'] right_vec = right_tree.data['vector'] # composition by tree lstm left_attention_vec = self.calc_attention(left_tree) right_attention_vec = self.calc_attention(right_tree) concat = F.concat( (left_vec, right_vec, left_attention_vec, right_attention_vec)) u_l = self.updatel(concat) u_r = self.updater(concat) i_l = self.inputl(concat) i_r = self.inputr(concat) if self.comp_type == Composition.tree_attention_lstm: concatl = F.concat((left_vec, left_attention_vec)) concatr = F.concat((right_vec, right_attention_vec)) f_l = self.forgetl(concatr) f_r = self.forgetr(concatl) elif self.comp_type == Composition.attention_slstm: f_l = self.forgetl(concat) f_r = self.forgetr(concat) o_l = self.outputl(concat) o_r = self.outputr(concat) l_v = F.concat((u_l, i_l, f_l, o_l)) r_v = F.concat((u_r, i_r, f_r, o_r)) c, vector = F.slstm(leftc, rightc, l_v, r_v) tree.data['vector'] = vector if tree.is_root(): self.calc_attention(tree) return c
def check_forward(self, inputs, backend_config): c_prev1, c_prev2, x1, x2 = inputs c_expect, h_expect = self.forward_cpu(inputs) if backend_config.use_cuda: inputs = cuda.to_gpu(inputs) inputs = [chainer.Variable(xx) for xx in inputs] with backend_config: c, h = functions.slstm(*inputs) assert c.data.dtype == self.dtype assert h.data.dtype == self.dtype testing.assert_allclose(c_expect, c.data, **self.check_forward_options) testing.assert_allclose(h_expect, h.data, **self.check_forward_options)
def check_forward(self, inputs, backend_config): c_prev1, c_prev2, x1, x2 = inputs c_expect, h_expect = self.forward_cpu(inputs) if backend_config.use_cuda: inputs = cuda.to_gpu(inputs) inputs = [chainer.Variable(xx) for xx in inputs] with backend_config: c, h = functions.slstm(*inputs) assert c.data.dtype == self.dtype assert h.data.dtype == self.dtype testing.assert_allclose( c_expect, c.data, **self.check_forward_options) testing.assert_allclose( h_expect, h.data, **self.check_forward_options)
def expr_for_tree(self, tree, decorate=False): if tree.isleaf(): return zeros((1, self.hdim)), self.embed(makevar(self.w2i.get(tree.label, 0))) if len(tree.children) == 1: assert (tree.children[0].isleaf()) c0, e0 = self.expr_for_tree(tree.children[0]) c, h = F.lstm(c0, self.WU(e0)) if decorate: tree._e = (c, h) return c, h assert (len(tree.children) == 2), tree.children[0] c1, e1 = self.expr_for_tree(tree.children[0], decorate) c2, e2 = self.expr_for_tree(tree.children[1], decorate) c, h = F.slstm(c1, c2, self.W1(e1), self.W2(e2)) if decorate: tree._e = (c, h) return c, h
def forward(self, inputs, device): c1, c2, x1, x2 = inputs out = functions.slstm(c1, c2, x1, x2) return out
def __call__(self, train, x_batch, y_batch=None): model = self n_units = self.__n_units mod = self.__mod gpu = self.__gpu batch_size = len(x_batch) x_len = len(x_batch[0]) depth = int(log(x_len, 2)) + 1 self.reset_state() list_a = [[] for i in range(2**depth - 1)] list_c = [[] for i in range(2**depth - 1)] zeros = mod.zeros((batch_size, n_units), dtype=np.float32) for l in xrange(x_len): x_data = mod.array([x_batch[k][l] for k in range(batch_size)]) x_data = Variable(x_data, volatile=not train) x_data = model.h_lstm(F.dropout(x_data, ratio=0.2, train=train)) list_a[x_len - 1 + l] = x_data list_c[x_len - 1 + l] = model.h_lstm.c #Variable(zeros, volatile=not train) for d in reversed(range(1, depth)): for s in range(2**d - 1, 2**(d + 1) - 1, 2): l = model.h_x(F.dropout(list_a[s], ratio=0.2, train=train)) r = model.h_h(F.dropout(list_a[s + 1], ratio=0.2, train=train)) c_l = list_c[s] c_r = list_c[s + 1] c, h = F.slstm(c_l, c_r, l, r) list_a[(s - 1) / 2] = h list_c[(s - 1) / 2] = c list_p = [] list_h = [] for a in list_a: n_hs = F.split_axis(a, 2, axis=0) list_p.append(n_hs[0]) list_h.append(n_hs[1]) list_pq = F.concat( [F.reshape(h, (batch_size / 2, 1, n_units)) for h in list_p], axis=1) list_aoa = [] for d in reversed(range(1, depth)): for s in range(2**d - 1, 2**(d + 1) - 1, 1): a = self.__attend_fast(list_pq, list_h[s], batch_size / 2, train) # a = self.__attend_f_tree(list_p[:], list_pq, list_h[s], batch_size/2, train) hs = model.m_lstm(F.dropout(a, ratio=0.2, train=train)) list_aoa.append(hs) list_aoa = F.concat([ F.reshape(h, (batch_size / 2, 1, n_units)) for h in list_aoa[:-1] ], axis=1) hs = self.__attend_fast(list_aoa, hs, batch_size / 2, train) model.m_lstm.reset_state() list_pq = F.concat( [F.reshape(h, (batch_size / 2, 1, n_units)) for h in list_h], axis=1) list_aoa = [] for d in reversed(range(1, depth)): for s in range(2**d - 1, 2**(d + 1) - 1, 1): a = self.__attend_fast(list_pq, list_p[s], batch_size / 2, train) # a = self.__attend_f_tree(list_h[:], list_pq, list_p[s], batch_size/2, train) hs1 = model.m_lstm(F.dropout(a, ratio=0.2, train=train)) list_aoa.append(hs1) list_aoa = F.concat([ F.reshape(h, (batch_size / 2, 1, n_units)) for h in list_aoa[:-1] ], axis=1) hs1 = self.__attend_fast(list_aoa, hs1, batch_size / 2, train) hs = F.relu(model.h_l1(F.concat([hs, hs1], axis=1))) y = model.l_y(F.dropout(hs, ratio=0.2, train=train)) preds = mod.argmax(y.data, 1).tolist() accum_loss = 0 if train else None if train: if gpu >= 0: y_batch = cuda.to_gpu(y_batch) lbl = Variable(y_batch, volatile=not train) accum_loss = F.softmax_cross_entropy(y, lbl) return preds, accum_loss, y