def epoch_predict(env, args, model, loader): """Predict in one epoch""" model.eval() arcs, rels, probs = [], [], [] for words, feats in loader(): # ignore the first token of each sentence tmp_words = layers.pad(words[:, 1:], paddings=[0, 0, 1, 0], pad_value=args.pad_index) mask = tmp_words != args.pad_index lens = nn.reduce_sum(mask, -1) s_arc, s_rel = model(words, feats) arc_preds, rel_preds = decode(args, s_arc, s_rel, mask) arcs.extend( layers.split(nn.masked_select(arc_preds, mask), lens.numpy().tolist())) rels.extend( layers.split(nn.masked_select(rel_preds, mask), lens.numpy().tolist())) if args.prob: arc_probs = nn.index_sample(layers.softmax(s_arc, -1), layers.unsqueeze(arc_preds, -1)) probs.extend( layers.split( nn.masked_select(layers.squeeze(arc_probs, axes=[-1]), mask), lens.numpy().tolist())) arcs = [seq.numpy().tolist() for seq in arcs] rels = [env.REL.vocab[seq.numpy().tolist()] for seq in rels] probs = [[round(p, 3) for p in seq.numpy().tolist()] for seq in probs] return arcs, rels, probs
def __call__(self, arc_preds, rel_preds, arc_golds, rel_golds, mask): """call""" arc_mask = nn.masked_select(arc_preds == arc_golds, mask) rel_mask = layers.logical_and( nn.masked_select(rel_preds == rel_golds, mask), arc_mask) self.total += len(arc_mask) self.correct_arcs += np.sum(arc_mask.numpy()).item() self.correct_rels += np.sum(rel_mask.numpy()).item()
def loss_function(s_arc, s_rel, arcs, rels, mask): """Loss function""" arcs = nn.masked_select(arcs, mask) rels = nn.masked_select(rels, mask) s_arc = nn.masked_select(s_arc, mask) s_rel = nn.masked_select(s_rel, mask) s_rel = nn.index_sample(s_rel, layers.unsqueeze(arcs, 1)) arc_loss = layers.cross_entropy(layers.softmax(s_arc), arcs) rel_loss = layers.cross_entropy(layers.softmax(s_rel), rels) loss = layers.reduce_mean(arc_loss + rel_loss) return loss
def pack_padded_sequence(self, x, mask, pad_index): """ Packs a padded sequences x. Args: x: input matrix mask: mask matrix pad_index: pad_index Returns: new_x: output batch_sizes: sort batch_size by step. sorted_indices: The index of x sorted by length >>> x [ [5, 6, 7, 0], [1, 2, 3, 4], [8, 9, 0, 0] ] >>> mask [ [True, True, True, False], [True, True, True, True], [True, True, False, False] ] >>> self.pack_padded_sequence(x, mask, 0) [1, 5, 8, 2, 6 ,9 , 3 , 7, 4] """ # sentence length mask = layers.cast(mask, 'int64') lens = layers.reduce_sum(mask, dim=-1) # Sort by sentence length in descending order _, sorted_indices = layers.argsort(lens, descending=True) sorted_x = layers.index_select(x, sorted_indices) sorted_mask = layers.index_select(mask, sorted_indices) # transpose t_x = layers.transpose(sorted_x, perm=[1, 0, 2]) t_mask = layers.transpose(sorted_mask, perm=[1, 0]) # mask_select new_x = nn.masked_select(t_x, t_mask) # Batch by step batch_sizes = layers.reduce_sum(t_mask, -1) # remove zero batch_sizes = nn.masked_select(batch_sizes, batch_sizes != 0) return new_x, batch_sizes.numpy().tolist(), sorted_indices
def forward(self, x): """Forward network""" mask = layers.reduce_any(x != self.pad_index, -1) lens = nn.reduce_sum(mask, -1) masked_x = nn.masked_select(x, mask) h, _ = self.transformer(masked_x) feat_embed = nn.pad_sequence_paddle( layers.split(h, lens.numpy().tolist(), dim=0), self.pad_index) return feat_embed
def epoch_predict(env, args, model, loader): """Predict in one epoch""" connections, deprels, probabilities = [], [], [] pad_index = args.pad_index bos_index = args.bos_index eos_index = args.eos_index for batch, inputs in enumerate(loader(), start=1): if args.encoding_model.startswith("ernie"): words = inputs[0] connection_prob, deprel_prob, words = model(words) else: words, feats = inputs connection_prob, deprel_prob, words = model(words, feats) mask = layers.logical_and( layers.logical_and(words != pad_index, words != bos_index), words != eos_index, ) lens = nn.reduce_sum(mask, -1) connection_predicts, deprel_predicts = decode(args, connection_prob, deprel_prob, mask) connections.extend( layers.split(nn.masked_select(connection_predicts, mask), lens.numpy().tolist())) deprels.extend( layers.split(nn.masked_select(deprel_predicts, mask), lens.numpy().tolist())) if args.prob: arc_probs = nn.index_sample( layers.softmax(connection_prob, -1), layers.unsqueeze(connection_predicts, -1)) probabilities.extend( layers.split( nn.masked_select(layers.squeeze(arc_probs, axes=[-1]), mask), lens.numpy().tolist(), )) connections = [seq.numpy().tolist() for seq in connections] deprels = [env.REL.vocab[seq.numpy().tolist()] for seq in deprels] probabilities = [[round(p, 3) for p in seq.numpy().tolist()] for seq in probabilities] return connections, deprels, probabilities
def forward(self, x): """Forward network""" mask = layers.reduce_any(x != self.pad_index, -1) lens = nn.reduce_sum(mask, -1) masked_x = nn.masked_select(x, mask) char_mask = masked_x != self.pad_index emb = self.embed(masked_x) _, (h, _) = self.lstm(emb, char_mask, self.pad_index) h = layers.concat(layers.unstack(h), axis=-1) feat_embed = nn.pad_sequence_paddle( layers.split(h, lens.numpy().tolist(), dim=0), self.pad_index) return feat_embed
def flat_words(self, words): pad_index = self.args.pad_index lens = nn.reduce_sum(words != pad_index, dim=-1) position = layers.cumsum(lens + layers.cast((lens == 0), "int32"), axis=1) - 1 flat_words = nn.masked_select(words, words != pad_index) flat_words = nn.pad_sequence_paddle( layers.split(flat_words, layers.reduce_sum(lens, -1).numpy().tolist(), pad_index)) max_len = flat_words.shape[1] position = nn.mask_fill(position, position >= max_len, max_len - 1) return flat_words, position