def forward(self, *inputs): batch = len(inputs) // 6 lefts = inputs[0: batch] rights = inputs[batch: batch * 2] dests = inputs[batch * 2: batch * 3] labels = inputs[batch * 3: batch * 4] sequences = inputs[batch * 4: batch * 5] leaf_labels = inputs[batch * 5: batch * 6] inds = numpy.argsort([-len(l) for l in lefts]) # Sort all arrays in descending order and transpose them lefts = F.transpose_sequence([lefts[i] for i in inds]) rights = F.transpose_sequence([rights[i] for i in inds]) dests = F.transpose_sequence([dests[i] for i in inds]) labels = F.transpose_sequence([labels[i] for i in inds]) sequences = F.transpose_sequence([sequences[i] for i in inds]) leaf_labels = F.transpose_sequence( [leaf_labels[i] for i in inds]) batch = len(inds) maxlen = len(sequences) loss = 0 count = 0 correct = 0 stack = self.xp.zeros( (batch, maxlen * 2, self.n_units), self.xp.float32) for i, (word, label) in enumerate(zip(sequences, leaf_labels)): batch = word.shape[0] es = self.leaf(word) ds = self.xp.full((batch,), i, self.xp.int32) y = self.label(es) loss += F.softmax_cross_entropy(y, label, normalize=False) * batch count += batch predict = self.xp.argmax(y.array, axis=1) correct += (predict == label.array).sum() stack = thin_stack.thin_stack_set(stack, ds, es) for left, right, dest, label in zip(lefts, rights, dests, labels): l, stack = thin_stack.thin_stack_get(stack, left) r, stack = thin_stack.thin_stack_get(stack, right) o = self.node(l, r) y = self.label(o) batch = l.shape[0] loss += F.softmax_cross_entropy(y, label, normalize=False) * batch count += batch predict = self.xp.argmax(y.array, axis=1) correct += (predict == label.array).sum() stack = thin_stack.thin_stack_set(stack, dest, o) loss /= count reporter.report({'loss': loss}, self) reporter.report({'total': count}, self) reporter.report({'correct': correct}, self) return loss
def forward(self, *inputs): batch = len(inputs) // 6 lefts = inputs[0:batch] rights = inputs[batch:batch * 2] dests = inputs[batch * 2:batch * 3] labels = inputs[batch * 3:batch * 4] sequences = inputs[batch * 4:batch * 5] leaf_labels = inputs[batch * 5:batch * 6] inds = numpy.argsort([-len(l) for l in lefts]) # Sort all arrays in descending order and transpose them lefts = F.transpose_sequence([lefts[i] for i in inds]) rights = F.transpose_sequence([rights[i] for i in inds]) dests = F.transpose_sequence([dests[i] for i in inds]) labels = F.transpose_sequence([labels[i] for i in inds]) sequences = F.transpose_sequence([sequences[i] for i in inds]) leaf_labels = F.transpose_sequence([leaf_labels[i] for i in inds]) batch = len(inds) maxlen = len(sequences) loss = 0 count = 0 correct = 0 stack = self.xp.zeros((batch, maxlen * 2, self.n_units), self.xp.float32) for i, (word, label) in enumerate(zip(sequences, leaf_labels)): batch = word.shape[0] es = self.leaf(word) ds = self.xp.full((batch, ), i, self.xp.int32) y = self.label(es) loss += F.softmax_cross_entropy(y, label, normalize=False) * batch count += batch predict = self.xp.argmax(y.array, axis=1) correct += (predict == label.array).sum() stack = thin_stack.thin_stack_set(stack, ds, es) for left, right, dest, label in zip(lefts, rights, dests, labels): l, stack = thin_stack.thin_stack_get(stack, left) r, stack = thin_stack.thin_stack_get(stack, right) o = self.node(l, r) y = self.label(o) batch = l.shape[0] loss += F.softmax_cross_entropy(y, label, normalize=False) * batch count += batch predict = self.xp.argmax(y.array, axis=1) correct += (predict == label.array).sum() stack = thin_stack.thin_stack_set(stack, dest, o) loss /= count reporter.report({'loss': loss}, self) reporter.report({'total': count}, self) reporter.report({'correct': correct}, self) return loss
def _compose(self, batch): batch_size = int(len(batch) / 5) # -- Store Data lefts = batch[0:batch_size] rights = batch[batch_size:batch_size * 2] dests = batch[batch_size * 2:batch_size * 3] opes = batch[batch_size * 3:batch_size * 4] words = batch[batch_size * 4:batch_size * 5] # -- Sort all arrays in descending order and transpose them inds = np.argsort([-len(l) for l in lefts]) root_inds = [len(words[i]) * 2 - 2 for i in inds] inds_reverse = [0] * batch_size for i, ind in enumerate(inds): inds_reverse[ind] = i lefts = F.transpose_sequence([lefts[i] for i in inds]) rights = F.transpose_sequence([rights[i] for i in inds]) dests = F.transpose_sequence([dests[i] for i in inds]) opes = F.transpose_sequence([opes[i] for i in inds]) words = F.transpose_sequence([words[i] for i in inds]) # -- Store max length of sentence maxlen = len(words) # -- Calculate compositional vectors if self.comp: stack = self.xp.zeros((batch_size, maxlen * 2, self.d * 2), 'f') else: stack = self.xp.zeros((batch_size, maxlen * 2, self.d), 'f') for i, word in enumerate(words): batch = word.shape[0] es = self._leaf(word) ds = self.xp.full((batch, ), i, 'i') stack = TS.thin_stack_set(stack, ds, es) for left, right, dest, ope in zip(lefts, rights, dests, opes): l, stack = TS.thin_stack_get(stack, left) r, stack = TS.thin_stack_get(stack, right) o = self._node(l, r, ope.data) stack = TS.thin_stack_set(stack, dest, o) lasts_ = stack[self.xp.arange(batch_size, dtype=self.xp.int32), root_inds] lasts = F.concat( [F.expand_dims(lasts_[i], axis=0) for i in inds_reverse], axis=0) return lasts
def check_forward(self, s_data, i_data, x_data): xp = backend.get_array_module(s_data) s = chainer.Variable(s_data) i = chainer.Variable(i_data) x = chainer.Variable(x_data) t = thin_stack.thin_stack_set(s, i, x) testing.assert_allclose(t.data[xp.arange(len(i_data)), i_data], x_data) # Thin stack reuses the same ndarray. self.assertIs(s_data, t.data)
def check_forward(self, s_data, i_data, x_data): xp = backend.get_array_module(s_data) s = chainer.Variable(s_data) i = chainer.Variable(i_data) x = chainer.Variable(x_data) t = thin_stack.thin_stack_set(s, i, x) testing.assert_allclose( t.array[xp.arange(len(i_data)), i_data], x_data) # Thin stack reuses the same ndarray. self.assertIs(s_data, t.array)
def check_backward(self, s_data, i_data, x_data, gt_data): # We cannot use check_backward method as a thin stack reuses ndarray. gt_old = gt_data.copy() s = chainer.Variable(s_data) i = chainer.Variable(i_data) x = chainer.Variable(x_data) t = thin_stack.thin_stack_set(s, i, x) t.grad = gt_data t.backward() for j, ind in enumerate(i_data): testing.assert_allclose(x.grad[j], gt_old[j, ind]) for k in range(self.shape[1]): if k == ind: testing.assert_allclose(s.grad[j, k], 0) else: testing.assert_allclose(s.grad[j, k], gt_old[j, k]) self.assertIsNone(i.grad) # Thin stack reueses the same gradient array. self.assertIs(s.grad, t.grad)