Ejemplo n.º 1
0
    def forward(self, words, feats):
        """Forward network"""
        batch_size, seq_len = words.shape
        # get the mask and lengths of given batch
        mask = words != self.pad_index
        ext_words = words
        # set the indices larger than num_embeddings to unk_index
        if hasattr(self, 'pretrained'):
            ext_mask = words >= self.word_embed.weight.shape[0]
            ext_words = nn.mask_fill(words, ext_mask, self.unk_index)

        # get outputs from embedding layers
        word_embed = self.word_embed(ext_words)
        if hasattr(self, 'pretrained'):
            word_embed += self.pretrained(words)
        feat_embed = self.feat_embed(feats)
        word_embed, feat_embed = self.embed_dropout(word_embed, feat_embed)
        # concatenate the word and feat representations
        # embed.size = (batch, seq_len, n_embed * 2)
        embed = layers.concat((word_embed, feat_embed), axis=-1)

        if self.args.encoding_model == "lstm":
            x, _ = self.lstm(embed, mask, self.pad_index)
            x = self.lstm_dropout(x)
        else:
            _, x = self.transformer(words, word_emb=embed)

        # apply MLPs to the BiLSTM output states
        arc_h = self.mlp_arc_h(x)
        arc_d = self.mlp_arc_d(x)
        rel_h = self.mlp_rel_h(x)
        rel_d = self.mlp_rel_d(x)

        # get arc and rel scores from the bilinear attention
        # [batch_size, seq_len, seq_len]
        s_arc = self.arc_attn(arc_d, arc_h)
        # [batch_size, seq_len, seq_len, n_rels]
        s_rel = layers.transpose(self.rel_attn(rel_d, rel_h),
                                 perm=(0, 2, 3, 1))
        # set the scores that exceed the length of each sentence to -1e5
        s_arc_mask = nn.unsqueeze(layers.logical_not(mask), 1)
        s_arc = nn.mask_fill(s_arc, s_arc_mask, -1e5)
        return s_arc, s_rel
Ejemplo n.º 2
0
 def flat_words(self, words):
     pad_index = self.args.pad_index
     lens = nn.reduce_sum(words != pad_index, dim=-1)
     position = layers.cumsum(lens + layers.cast((lens == 0), "int32"),
                              axis=1) - 1
     flat_words = nn.masked_select(words, words != pad_index)
     flat_words = nn.pad_sequence_paddle(
         layers.split(flat_words,
                      layers.reduce_sum(lens, -1).numpy().tolist(),
                      pad_index))
     max_len = flat_words.shape[1]
     position = nn.mask_fill(position, position >= max_len, max_len - 1)
     return flat_words, position
Ejemplo n.º 3
0
    def forward(self, input, adj):
        """Forward network"""
        h = layers.fc(input, size=self.out_features, num_flatten_dims=2)

        _, N, _ = h.shape
        middle_result1 = layers.expand(layers.matmul(h, self.a1),
                                       expand_times=(1, 1, N))
        middle_result2 = layers.transpose(layers.expand(
            layers.matmul(h, self.a2), expand_times=(1, 1, N)),
                                          perm=[0, 2, 1])
        e = layers.leaky_relu(middle_result1 + middle_result2, self.alpha)
        adj = layers.cast(adj, dtype='int32')
        attention = nn.mask_fill(e, adj == 0.0, -1e9)
        attention = layers.softmax(attention, axis=2)
        attention = layers.dropout(attention, self.dropout)
        h_prime = layers.matmul(attention, h)
        if self.concat:
            return layers.elu(h_prime)
        else:
            return h_prime