def forward(self, words, feats): """Forward network""" batch_size, seq_len = words.shape # get the mask and lengths of given batch mask = words != self.pad_index ext_words = words # set the indices larger than num_embeddings to unk_index if hasattr(self, 'pretrained'): ext_mask = words >= self.word_embed.weight.shape[0] ext_words = nn.mask_fill(words, ext_mask, self.unk_index) # get outputs from embedding layers word_embed = self.word_embed(ext_words) if hasattr(self, 'pretrained'): word_embed += self.pretrained(words) feat_embed = self.feat_embed(feats) word_embed, feat_embed = self.embed_dropout(word_embed, feat_embed) # concatenate the word and feat representations # embed.size = (batch, seq_len, n_embed * 2) embed = layers.concat((word_embed, feat_embed), axis=-1) if self.args.encoding_model == "lstm": x, _ = self.lstm(embed, mask, self.pad_index) x = self.lstm_dropout(x) else: _, x = self.transformer(words, word_emb=embed) # apply MLPs to the BiLSTM output states arc_h = self.mlp_arc_h(x) arc_d = self.mlp_arc_d(x) rel_h = self.mlp_rel_h(x) rel_d = self.mlp_rel_d(x) # get arc and rel scores from the bilinear attention # [batch_size, seq_len, seq_len] s_arc = self.arc_attn(arc_d, arc_h) # [batch_size, seq_len, seq_len, n_rels] s_rel = layers.transpose(self.rel_attn(rel_d, rel_h), perm=(0, 2, 3, 1)) # set the scores that exceed the length of each sentence to -1e5 s_arc_mask = nn.unsqueeze(layers.logical_not(mask), 1) s_arc = nn.mask_fill(s_arc, s_arc_mask, -1e5) return s_arc, s_rel
def flat_words(self, words): pad_index = self.args.pad_index lens = nn.reduce_sum(words != pad_index, dim=-1) position = layers.cumsum(lens + layers.cast((lens == 0), "int32"), axis=1) - 1 flat_words = nn.masked_select(words, words != pad_index) flat_words = nn.pad_sequence_paddle( layers.split(flat_words, layers.reduce_sum(lens, -1).numpy().tolist(), pad_index)) max_len = flat_words.shape[1] position = nn.mask_fill(position, position >= max_len, max_len - 1) return flat_words, position
def forward(self, input, adj): """Forward network""" h = layers.fc(input, size=self.out_features, num_flatten_dims=2) _, N, _ = h.shape middle_result1 = layers.expand(layers.matmul(h, self.a1), expand_times=(1, 1, N)) middle_result2 = layers.transpose(layers.expand( layers.matmul(h, self.a2), expand_times=(1, 1, N)), perm=[0, 2, 1]) e = layers.leaky_relu(middle_result1 + middle_result2, self.alpha) adj = layers.cast(adj, dtype='int32') attention = nn.mask_fill(e, adj == 0.0, -1e9) attention = layers.softmax(attention, axis=2) attention = layers.dropout(attention, self.dropout) h_prime = layers.matmul(attention, h) if self.concat: return layers.elu(h_prime) else: return h_prime